diff options
-rw-r--r-- | doc-src/scripting/inlinescripts.html | 12 | ||||
-rw-r--r-- | examples/nonblocking.py | 8 | ||||
-rw-r--r-- | examples/stub.py | 7 | ||||
-rw-r--r-- | libmproxy/proxy.py | 163 | ||||
-rw-r--r-- | libmproxy/script.py | 26 | ||||
-rw-r--r-- | test/scripts/all.py | 4 | ||||
-rw-r--r-- | test/scripts/concurrent_decorator.py | 32 | ||||
-rw-r--r-- | test/scripts/concurrent_decorator_err.py | 5 | ||||
-rw-r--r-- | test/test_dump.py | 4 | ||||
-rw-r--r-- | test/test_flow.py | 6 | ||||
-rw-r--r-- | test/test_script.py | 53 | ||||
-rw-r--r-- | test/tutils.py | 19 |
12 files changed, 254 insertions, 85 deletions
diff --git a/doc-src/scripting/inlinescripts.html b/doc-src/scripting/inlinescripts.html index c9e188fc..7ab1c101 100644 --- a/doc-src/scripting/inlinescripts.html +++ b/doc-src/scripting/inlinescripts.html @@ -36,6 +36,11 @@ Called when a client initiates a connection to the proxy. Note that a connection can correspond to multiple HTTP requests. +### serverconnect(ScriptContext, ServerConnection) + +Called when the proxy initiates a connection to the target server. Note that +a connection can correspond to multiple HTTP requests. + ### request(ScriptContext, Flow) Called when a client request has been received. The __Flow__ object is @@ -123,6 +128,13 @@ using pydoc (which is installed with Python by default), like this: </pre> +## Running scripts in parallel + +We have a single flow primitive, so when a script is handling something, other requests block. +While that's a very desirable behaviour under some circumstances, scripts can be run threaded by using the <code>libmproxy.script.concurrent</code> decorator. + +$!example("examples/nonblocking.py")!$ + ## Running scripts on saved flows Sometimes, we want to run a script on __Flow__ objects that are already diff --git a/examples/nonblocking.py b/examples/nonblocking.py new file mode 100644 index 00000000..ed8cc7c9 --- /dev/null +++ b/examples/nonblocking.py @@ -0,0 +1,8 @@ +import time +from libmproxy.script import concurrent + +@concurrent +def request(context, flow): + print "handle request: %s%s" % (flow.request.host, flow.request.path) + time.sleep(5) + print "start request: %s%s" % (flow.request.host, flow.request.path)
\ No newline at end of file diff --git a/examples/stub.py b/examples/stub.py index 42b2935a..78cbfcf2 100644 --- a/examples/stub.py +++ b/examples/stub.py @@ -14,6 +14,13 @@ def clientconnect(ctx, client_connect): """ ctx.log("clientconnect") +def serverconnect(ctx, server_connection): + """ + Called when the proxy initiates a connection to the target server. Note that a + connection can correspond to multiple HTTP requests + """ + ctx.log("serverconnect") + def request(ctx, flow): """ Called when a client request has been received. diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py index 77c8828a..609ffb62 100644 --- a/libmproxy/proxy.py +++ b/libmproxy/proxy.py @@ -151,7 +151,7 @@ class ProxyHandler(tcp.BaseHandler): if not sni: sni = host conn_info = (scheme, host, port, sni) - if sc and (conn_info != sc.conn_info or request and sc.require_request): + if sc and (conn_info != sc.conn_info or (request and sc.require_request)): sc.terminate() self.server_conn = None self.log( @@ -353,6 +353,15 @@ class ProxyHandler(tcp.BaseHandler): line = fp.readline() return line + def read_request(self, client_conn): + self.rfile.reset_timestamps() + if self.config.transparent_proxy: + return self.read_request_transparent(client_conn) + elif self.config.reverse_proxy: + return self.read_request_reverse(client_conn) + else: + return self.read_request_proxy(client_conn) + def read_request_transparent(self, client_conn): orig = self.config.transparent_proxy["resolver"].original_addr(self.connection) if not orig: @@ -365,95 +374,99 @@ class ProxyHandler(tcp.BaseHandler): else: scheme = "http" - return self._read_request_transparent(client_conn, scheme, host, port) + return self._read_request_origin_form(client_conn, scheme, host, port) + + def read_request_reverse(self, client_conn): + scheme, host, port = self.config.reverse_proxy + return self._read_request_origin_form(client_conn, scheme, host, port) + + def read_request_proxy(self, client_conn): + # Check for a CONNECT command. + if not self.proxy_connect_state: + line = self.get_line(self.rfile) + if line == "": + return None + self.proxy_connect_state = self._read_request_authority_form(line) + + # Check for an actual request + if self.proxy_connect_state: + host, port, _ = self.proxy_connect_state + return self._read_request_origin_form(client_conn, "https", host, port) + else: + # noinspection PyUnboundLocalVariable + return self._read_request_absolute_form(client_conn, line) + + def _read_request_authority_form(self, line): + """ + The authority-form of request-target is only used for CONNECT requests. + The CONNECT method is used to request a tunnel to the destination server. + This function sends a "200 Connection established" response to the client + and returns the host information that can be used to process further requests in origin-form. + An example authority-form request line would be: + CONNECT www.example.com:80 HTTP/1.1 + """ + connparts = http.parse_init_connect(line) + if connparts: + self.read_headers(authenticate=True) + # respond according to http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.2 + self.wfile.write( + 'HTTP/1.1 200 Connection established\r\n' + + ('Proxy-agent: %s\r\n'%self.server_version) + + '\r\n' + ) + self.wfile.flush() + return connparts - def _read_request_transparent(self, client_conn, scheme, host, port): + def _read_request_absolute_form(self, client_conn, line): """ - Read a transparent HTTP request. Transparent means that the client isn't aware of proxying. - In other words, the client request starts with - "GET /foo.html HTTP/1.1" - rather than - "CONNECT example.com:80 HTTP/1.1" + When making a request to a proxy (other than CONNECT or OPTIONS), + a client must send the target uri in absolute-form. + An example absolute-form request line would be: + GET http://www.example.com/foo.html HTTP/1.1 + """ + r = http.parse_init_proxy(line) + if not r: + raise ProxyError(400, "Bad HTTP request line: %s"%repr(line)) + method, scheme, host, port, path, httpversion = r + headers = self.read_headers(authenticate=True) + content = http.read_http_body_request( + self.rfile, self.wfile, headers, httpversion, self.config.body_size_limit + ) + return flow.Request( + client_conn, httpversion, host, port, scheme, method, path, headers, content, + self.rfile.first_byte_timestamp, utils.timestamp() + ) + + def _read_request_origin_form(self, client_conn, scheme, host, port): + """ + Read a HTTP request with regular (origin-form) request line. + An example origin-form request line would be: + GET /foo.html HTTP/1.1 + + The request destination is already known from one of the following sources: + 1) transparent proxy: destination provided by platform resolver + 2) reverse proxy: fixed destination + 3) regular proxy: known from CONNECT command. """ if scheme.lower() == "https" and not self.ssl_established: self.establish_ssl(client_conn, host, port) + line = self.get_line(self.rfile) if line == "": return None + r = http.parse_init_http(line) if not r: raise ProxyError(400, "Bad HTTP request line: %s"%repr(line)) method, path, httpversion = r headers = self.read_headers(authenticate=False) content = http.read_http_body_request( - self.rfile, self.wfile, headers, httpversion, self.config.body_size_limit - ) + self.rfile, self.wfile, headers, httpversion, self.config.body_size_limit + ) return flow.Request( - client_conn,httpversion, host, port, scheme, method, path, headers, content, - self.rfile.first_byte_timestamp, utils.timestamp() - ) - - def read_request_proxy(self, client_conn): - line = self.get_line(self.rfile) - if line == "": - return None - - if not self.proxy_connect_state: - connparts = http.parse_init_connect(line) - if connparts: - host, port, httpversion = connparts - headers = self.read_headers(authenticate=True) - self.wfile.write( - 'HTTP/1.1 200 Connection established\r\n' + - ('Proxy-agent: %s\r\n'%self.server_version) + - '\r\n' - ) - self.wfile.flush() - self.establish_ssl(client_conn, host, port) - self.proxy_connect_state = (host, port, httpversion) - line = self.rfile.readline(line) - - if self.proxy_connect_state: - r = http.parse_init_http(line) - if not r: - raise ProxyError(400, "Bad HTTP request line: %s"%repr(line)) - method, path, httpversion = r - headers = self.read_headers(authenticate=False) - - host, port, _ = self.proxy_connect_state - content = http.read_http_body_request( - self.rfile, self.wfile, headers, httpversion, self.config.body_size_limit - ) - return flow.Request( - client_conn, httpversion, host, port, "https", method, path, headers, content, - self.rfile.first_byte_timestamp, utils.timestamp() - ) - else: - r = http.parse_init_proxy(line) - if not r: - raise ProxyError(400, "Bad HTTP request line: %s"%repr(line)) - method, scheme, host, port, path, httpversion = r - headers = self.read_headers(authenticate=True) - content = http.read_http_body_request( - self.rfile, self.wfile, headers, httpversion, self.config.body_size_limit - ) - return flow.Request( - client_conn, httpversion, host, port, scheme, method, path, headers, content, - self.rfile.first_byte_timestamp, utils.timestamp() - ) - - def read_request_reverse(self, client_conn): - scheme, host, port = self.config.reverse_proxy - return self._read_request_transparent(client_conn, scheme, host, port) - - def read_request(self, client_conn): - self.rfile.reset_timestamps() - if self.config.transparent_proxy: - return self.read_request_transparent(client_conn) - elif self.config.reverse_proxy: - return self.read_request_reverse(client_conn) - else: - return self.read_request_proxy(client_conn) + client_conn, httpversion, host, port, scheme, method, path, headers, content, + self.rfile.first_byte_timestamp, utils.timestamp() + ) def read_headers(self, authenticate=False): headers = http.read_headers(self.rfile) diff --git a/libmproxy/script.py b/libmproxy/script.py index 623f2b92..f8a0d085 100644 --- a/libmproxy/script.py +++ b/libmproxy/script.py @@ -1,4 +1,5 @@ -import os, traceback +import os, traceback, threading +import controller class ScriptError(Exception): pass @@ -59,3 +60,26 @@ class Script: return (False, (v, traceback.format_exc(v))) else: return (False, None) + + +def _handle_concurrent_reply(fn, o, args=[], kwargs={}): + reply = o.reply + o.reply = controller.DummyReply() + + def run(): + fn(*args, **kwargs) + reply(o) + threading.Thread(target=run).start() + + +def concurrent(fn): + if fn.func_name in ["request", "response", "error"]: + def _concurrent(ctx, flow): + r = getattr(flow, fn.func_name) + _handle_concurrent_reply(fn, r, [ctx, flow]) + return _concurrent + elif fn.func_name in ["clientconnect", "serverconnect", "clientdisconnect"]: + def _concurrent(ctx, conn): + _handle_concurrent_reply(fn, conn, [ctx, conn]) + return _concurrent + raise NotImplementedError("Concurrent decorator not supported for this method.")
\ No newline at end of file diff --git a/test/scripts/all.py b/test/scripts/all.py index e6da7e51..7d30d757 100644 --- a/test/scripts/all.py +++ b/test/scripts/all.py @@ -3,6 +3,10 @@ def clientconnect(ctx, cc): ctx.log("XCLIENTCONNECT") log.append("clientconnect") +def serverconnect(ctx, cc): + ctx.log("XSERVERCONNECT") + log.append("serverconnect") + def request(ctx, r): ctx.log("XREQUEST") log.append("request") diff --git a/test/scripts/concurrent_decorator.py b/test/scripts/concurrent_decorator.py new file mode 100644 index 00000000..8e132006 --- /dev/null +++ b/test/scripts/concurrent_decorator.py @@ -0,0 +1,32 @@ +import time +from libmproxy.script import concurrent + + +@concurrent +def clientconnect(context, cc): + context.log("clientconnect") + + +@concurrent +def serverconnect(context, sc): + context.log("serverconnect") + + +@concurrent +def request(context, flow): + time.sleep(0.1) + + +@concurrent +def response(context, flow): + context.log("response") + + +@concurrent +def error(context, err): + context.log("error") + + +@concurrent +def clientdisconnect(context, dc): + context.log("clientdisconnect")
\ No newline at end of file diff --git a/test/scripts/concurrent_decorator_err.py b/test/scripts/concurrent_decorator_err.py new file mode 100644 index 00000000..78191315 --- /dev/null +++ b/test/scripts/concurrent_decorator_err.py @@ -0,0 +1,5 @@ +from libmproxy.script import concurrent + +@concurrent +def start(context, argv): + pass
\ No newline at end of file diff --git a/test/test_dump.py b/test/test_dump.py index 3b79c721..3d375f16 100644 --- a/test/test_dump.py +++ b/test/test_dump.py @@ -30,6 +30,9 @@ class TestDumpMaster: resp = tutils.tresp(req) resp.content = content m.handle_clientconnect(cc) + sc = proxy.ServerConnection(m.o, req.scheme, req.host, req.port, None) + sc.reply = mock.MagicMock() + m.handle_serverconnection(sc) m.handle_request(req) f = m.handle_response(resp) cd = flow.ClientDisconnect(cc) @@ -153,6 +156,7 @@ class TestDumpMaster: scripts=[[tutils.test_data.path("scripts/all.py")]], verbosity=0, eventlog=True ) assert "XCLIENTCONNECT" in ret + assert "XSERVERCONNECT" in ret assert "XREQUEST" in ret assert "XRESPONSE" in ret assert "XCLIENTDISCONNECT" in ret diff --git a/test/test_flow.py b/test/test_flow.py index 9844e0fd..c614960b 100644 --- a/test/test_flow.py +++ b/test/test_flow.py @@ -1,7 +1,7 @@ import Queue, time, os.path from cStringIO import StringIO import email.utils -from libmproxy import filt, flow, controller, utils, tnetstring +from libmproxy import filt, flow, controller, utils, tnetstring, proxy import tutils @@ -575,6 +575,10 @@ class TestFlowMaster: req = tutils.treq() fm.handle_clientconnect(req.client_conn) assert fm.scripts[0].ns["log"][-1] == "clientconnect" + sc = proxy.ServerConnection(None, req.scheme, req.host, req.port, None) + sc.reply = controller.DummyReply() + fm.handle_serverconnection(sc) + assert fm.scripts[0].ns["log"][-1] == "serverconnect" f = fm.handle_request(req) assert fm.scripts[0].ns["log"][-1] == "request" resp = tutils.tresp(req) diff --git a/test/test_script.py b/test/test_script.py index 9033c4fc..ad2296ef 100644 --- a/test/test_script.py +++ b/test/test_script.py @@ -2,6 +2,19 @@ from libmproxy import script, flow import tutils import shlex import os +import time + + +class TCounter: + count = 0 + + def __call__(self, *args, **kwargs): + self.count += 1 + + +class TScriptContext(TCounter): + def log(self, msg): + self.__call__() class TestScript: def test_simple(self): @@ -64,3 +77,43 @@ class TestScript: s.load ) + def test_concurrent(self): + s = flow.State() + fm = flow.FlowMaster(None, s) + fm.load_script([tutils.test_data.path("scripts/concurrent_decorator.py")]) + + reply = TCounter() + r1, r2 = tutils.treq(), tutils.treq() + r1.reply, r2.reply = reply, reply + t_start = time.time() + fm.handle_request(r1) + r1.reply() + fm.handle_request(r2) + r2.reply() + assert reply.count < 2 + assert (time.time() - t_start) < 0.09 + time.sleep(0.2) + assert reply.count == 2 + + def test_concurrent2(self): + ctx = TScriptContext() + s = script.Script([tutils.test_data.path("scripts/concurrent_decorator.py")], ctx) + s.load() + f = tutils.tflow_full() + f.error = tutils.terr(f.request) + f.reply = f.request.reply + + s.run("clientconnect", f) + s.run("serverconnect", f) + s.run("response", f) + s.run("error", f) + s.run("clientdisconnect", f) + time.sleep(0.1) + assert ctx.count == 5 + + def test_concurrent_err(self): + s = script.Script([tutils.test_data.path("scripts/concurrent_decorator_err.py")], TScriptContext()) + tutils.raises( + "decorator not supported for this method", + s.load + )
\ No newline at end of file diff --git a/test/tutils.py b/test/tutils.py index e42256ed..4cd7b7f8 100644 --- a/test/tutils.py +++ b/test/tutils.py @@ -33,6 +33,13 @@ def tresp(req=None): resp.reply = controller.DummyReply() return resp +def terr(req=None): + if not req: + req = treq() + err = flow.Error(req, "error") + err.reply = controller.DummyReply() + return err + def tflow(): r = treq() @@ -40,21 +47,17 @@ def tflow(): def tflow_full(): - r = treq() - f = flow.Flow(r) - f.response = tresp(r) + f = tflow() + f.response = tresp(f.request) return f def tflow_err(): - r = treq() - f = flow.Flow(r) - f.error = flow.Error(r, "error") - f.error.reply = controller.DummyReply() + f = tflow() + f.error = terr(f.request) return f - @contextmanager def tmpdir(*args, **kwargs): orig_workdir = os.getcwd() |