From 244ef243d75145a01d9029589de65be51299b3f3 Mon Sep 17 00:00:00 2001 From: Krzysztof Bielicki Date: Tue, 10 Mar 2015 10:44:06 +0100 Subject: [#514] Add support for ignoring payload params in multipart/form-data --- libmproxy/protocol/http.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 49310ec3..512cf75b 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -15,6 +15,7 @@ from ..proxy.connection import ServerConnection from .. import encoding, utils, controller, stateobject, proxy HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" +HDR_FORM_MULTIPART = "multipart/form-data" CONTENT_MISSING = 0 @@ -507,6 +508,19 @@ class HTTPRequest(HTTPMessage): """ self.headers["Host"] = [self.host] + def get_form(self): + """ + Retrieves the URL-encoded or multipart form data, returning an ODict object. + Returns an empty ODict if there is no data or the content-type + indicates non-form data. + """ + if self.content: + if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): + return self.get_form_urlencoded() + elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + return self.get_form_multipart() + return ODict([]) + def get_form_urlencoded(self): """ Retrieves the URL-encoded form data, returning an ODict object. @@ -514,7 +528,12 @@ class HTTPRequest(HTTPMessage): indicates non-form data. """ if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): - return ODict(utils.urldecode(self.content)) + return ODict(utils.urldecode(self.content)) + return ODict([]) + + def get_form_multipart(self): + if self.content and self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + return ODict(utils.multipartdecode(self.headers, self.content)) return ODict([]) def set_form_urlencoded(self, odict): -- cgit v1.2.3 From 02a61ea45dc1ca6d0c88b44adf83f68b791130e7 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 21 Mar 2015 22:49:51 +0100 Subject: structure components --- libmproxy/protocol/http.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 49310ec3..00086c21 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -119,6 +119,8 @@ class HTTPMessage(stateobject.StateObject): if short: if self.content: ret["contentLength"] = len(self.content) + elif self.content == CONTENT_MISSING: + ret["contentLength"] = None else: ret["contentLength"] = 0 return ret -- cgit v1.2.3 From 6852eb9d0af5cf7196da5cad2ccf3b036e348226 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 10 Apr 2015 14:59:38 +0200 Subject: fix #553 --- libmproxy/protocol/http.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index c763db4c..05657ea6 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -328,9 +328,22 @@ class HTTPRequest(HTTPMessage): ) @classmethod - def from_stream(cls, rfile, include_body=True, body_size_limit=None): + def from_stream(cls, rfile, include_body=True, body_size_limit=None, wfile=None): """ Parse an HTTP request from a file stream + + Args: + rfile (file): Input file to read from + include_body (bool): Read response body as well + body_size_limit (bool): Maximum body size + wfile (file): If specified, HTTP Expect headers are handled automatically. + by writing a HTTP 100 CONTINUE response to the stream. + + Returns: + HTTPRequest: The HTTP request + + Raises: + HttpError: If the input is invalid. """ httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end = ( None, None, None, None, None, None, None, None, None, None) @@ -385,6 +398,15 @@ class HTTPRequest(HTTPMessage): if headers is None: raise http.HttpError(400, "Invalid headers") + expect_header = headers.get_first("expect") + if expect_header and expect_header.lower() == "100-continue" and httpversion >= (1, 1): + wfile.write( + 'HTTP/1.1 100 Continue\r\n' + '\r\n' + ) + wfile.flush() + del headers['expect'] + if include_body: content = http.read_http_body(rfile, headers, body_size_limit, method, None, True) @@ -1062,7 +1084,8 @@ class HTTPHandler(ProtocolHandler): try: req = HTTPRequest.from_stream( self.c.client_conn.rfile, - body_size_limit=self.c.config.body_size_limit + body_size_limit=self.c.config.body_size_limit, + wfile=self.c.client_conn.wfile ) except tcp.NetLibError: # don't throw an error for disconnects that happen -- cgit v1.2.3 From ab2b98b988e6ec76d2c0f79dfe52d9ff4096619c Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 10 Apr 2015 19:35:42 +0200 Subject: fix pretty_host if no host is present --- libmproxy/protocol/http.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 05657ea6..93065b47 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -631,8 +631,10 @@ class HTTPRequest(HTTPMessage): host = self.headers.get_first("host") if not host: host = self.host - host = host.encode("idna") - return host + if host: + return host.encode("idna") + else: + return None def pretty_url(self, hostheader): if self.form_out == "authority": # upstream proxy mode -- cgit v1.2.3 From bea0bd236a60e3e6c80027448e51b7802394304a Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 14 Apr 2015 11:58:10 +1200 Subject: Housekeeping and cleanups - No output to stdout on load in examples - they muck up the test suite. - Use the odict module directly, rather than aliasing it. The small convenience this gives to scripters is not worth it. - Move the cookie tests from the flow test module to the protocol_http test module. --- libmproxy/protocol/http.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index c763db4c..496b71cc 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -8,7 +8,7 @@ from email.utils import parsedate_tz, formatdate, mktime_tz import threading from netlib import http, tcp, http_status import netlib.utils -from netlib.odict import ODict, ODictCaseless +from netlib import odict from .tcp import TCPHandler from .primitives import KILL, ProtocolHandler, Flow, Error from ..proxy.connection import ServerConnection @@ -45,7 +45,7 @@ def send_connect_request(conn, host, port, update_state=True): port, None, (1, 1), - ODictCaseless(), + odict.ODictCaseless(), "" ) conn.send(upstream_request.assemble()) @@ -100,7 +100,7 @@ class HTTPMessage(stateobject.StateObject): timestamp_end=None): self.httpversion = httpversion self.headers = headers - """@type: ODictCaseless""" + """@type: odict.ODictCaseless""" self.content = content self.timestamp_start = timestamp_start @@ -108,7 +108,7 @@ class HTTPMessage(stateobject.StateObject): _stateobject_attributes = dict( httpversion=tuple, - headers=ODictCaseless, + headers=odict.ODictCaseless, content=str, timestamp_start=float, timestamp_end=float @@ -242,7 +242,7 @@ class HTTPRequest(HTTPMessage): httpversion: HTTP version tuple, e.g. (1,1) - headers: ODictCaseless object + headers: odict.ODictCaseless object content: Content of the request, None, or CONTENT_MISSING if there is content associated, but not present. CONTENT_MISSING evaluates @@ -280,7 +280,7 @@ class HTTPRequest(HTTPMessage): timestamp_end=None, form_out=None ): - assert isinstance(headers, ODictCaseless) or not headers + assert isinstance(headers, odict.ODictCaseless) or not headers HTTPMessage.__init__( self, httpversion, @@ -521,7 +521,7 @@ class HTTPRequest(HTTPMessage): return self.get_form_urlencoded() elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): return self.get_form_multipart() - return ODict([]) + return odict.ODict([]) def get_form_urlencoded(self): """ @@ -530,13 +530,13 @@ class HTTPRequest(HTTPMessage): indicates non-form data. """ if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): - return ODict(utils.urldecode(self.content)) - return ODict([]) + return odict.ODict(utils.urldecode(self.content)) + return odict.ODict([]) def get_form_multipart(self): if self.content and self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): - return ODict(utils.multipartdecode(self.headers, self.content)) - return ODict([]) + return odict.ODict(utils.multipartdecode(self.headers, self.content)) + return odict.ODict([]) def set_form_urlencoded(self, odict): """ @@ -577,8 +577,8 @@ class HTTPRequest(HTTPMessage): """ _, _, _, _, query, _ = urlparse.urlparse(self.url) if query: - return ODict(utils.urldecode(query)) - return ODict([]) + return odict.ODict(utils.urldecode(query)) + return odict.ODict([]) def set_query(self, odict): """ @@ -697,7 +697,7 @@ class HTTPResponse(HTTPMessage): def __init__(self, httpversion, code, msg, headers, content, timestamp_start=None, timestamp_end=None): - assert isinstance(headers, ODictCaseless) or headers is None + assert isinstance(headers, odict.ODictCaseless) or headers is None HTTPMessage.__init__( self, httpversion, -- cgit v1.2.3 From e17eacd8d77c78daa88d8f89ace990463378d98d Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 14 Apr 2015 13:45:38 +1200 Subject: New get_cookie and set_cookie implementations for HTTPRequest --- libmproxy/protocol/http.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 94077e42..cd9458f2 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -6,7 +6,7 @@ import time import copy from email.utils import parsedate_tz, formatdate, mktime_tz import threading -from netlib import http, tcp, http_status +from netlib import http, tcp, http_status, http_cookies import netlib.utils from netlib import odict from .tcp import TCPHandler @@ -670,15 +670,22 @@ class HTTPRequest(HTTPMessage): self.scheme, self.host, self.port, self.path = parts def get_cookies(self): - cookie_headers = self.headers.get("cookie") - if not cookie_headers: - return None + """ - cookies = [] - for header in cookie_headers: - pairs = [pair.partition("=") for pair in header.split(';')] - cookies.extend((pair[0], (pair[2], {})) for pair in pairs) - return dict(cookies) + Returns a possibly empty netlib.odict.ODict object. + """ + ret = odict.ODict() + for i in self.headers["cookie"]: + ret.extend(http_cookies.parse_cookie_header(i)) + return ret + + def set_cookies(self, odict): + """ + Takes an netlib.odict.ODict object. Over-writes any existing Cookie + headers. + """ + v = http_cookies.format_cookie_header(odict) + self.headers["Cookie"] = [v] def replace(self, pattern, repl, *args, **kwargs): """ -- cgit v1.2.3 From ab7e2857cc9095c4cee8ca9b569c16516aa520ba Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 14 Apr 2015 15:14:36 +1200 Subject: New get_cookies for HttpResponse --- libmproxy/protocol/http.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index cd9458f2..da8eaa01 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -902,20 +902,21 @@ class HTTPResponse(HTTPMessage): self.headers["set-cookie"] = c def get_cookies(self): - cookie_headers = self.headers.get("set-cookie") - if not cookie_headers: - return None + """ + Get the contents of all Set-Cookie headers. - cookies = [] - for header in cookie_headers: - pairs = [pair.partition("=") for pair in header.split(';')] - cookie_name = pairs[0][0] # the key of the first key/value pairs - cookie_value = pairs[0][2] # the value of the first key/value pairs - cookie_parameters = { - key.strip().lower(): value.strip() for key, sep, value in pairs[1:] - } - cookies.append((cookie_name, (cookie_value, cookie_parameters))) - return dict(cookies) + Returns a possibly empty ODict, where keys are cookie name strings, + and values are [value, attr] lists. Value is a string, and attr is + an ODictCaseless containing cookie attributes. Within attrs, unary + attributes (e.g. HTTPOnly) are indicated by a Null value. + """ + ret = [] + for header in self.headers["set-cookie"]: + v = http_cookies.parse_set_cookie_header(header) + if v: + name, value, attrs = v + ret.append([name, [value, attrs]]) + return odict.ODict(ret) class HTTPFlow(Flow): -- cgit v1.2.3 From c335c2b5330865ccab176c6213db63151383a142 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 14 Apr 2015 16:23:51 +1200 Subject: Add set_cookies method to HTTPResponse --- libmproxy/protocol/http.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index da8eaa01..eb7749ea 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -918,6 +918,25 @@ class HTTPResponse(HTTPMessage): ret.append([name, [value, attrs]]) return odict.ODict(ret) + def set_cookies(self, odict): + """ + Set the Set-Cookie headers on this response, over-writing existing + headers. + + Accepts an ODict of the same format as that returned by get_cookies. + """ + values = [] + for i in odict.lst: + values.append( + http_cookies.format_set_cookie_header( + i[0], + i[1][0], + i[1][1] + ) + ) + self.headers["Set-Cookie"] = values + + class HTTPFlow(Flow): """ -- cgit v1.2.3 From 5ff430312440d9ed95a003a2d3afc018669dfb6f Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Wed, 15 Apr 2015 10:29:57 +1200 Subject: Adjust for ODict interface change --- libmproxy/protocol/http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index eb7749ea..da6b8aa6 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -469,7 +469,7 @@ class HTTPRequest(HTTPMessage): if self.content or self.content == "": headers["Content-Length"] = [str(len(self.content))] - return str(headers) + return headers.format() def _assemble_head(self, form=None): return "%s\r\n%s\r\n" % ( @@ -823,7 +823,7 @@ class HTTPResponse(HTTPMessage): if self.content or self.content == "": headers["Content-Length"] = [str(len(self.content))] - return str(headers) + return headers.format() def _assemble_head(self, preserve_transfer_encoding=False): return '%s\r\n%s\r\n' % ( -- cgit v1.2.3 From ddf458b330bf9fe200cb1dbc3ddb5ae1a5d2102a Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 21 Apr 2015 11:05:37 +1200 Subject: HTTP request reading moves to netlib --- libmproxy/protocol/http.py | 169 +++++++++++++++++---------------------------- 1 file changed, 64 insertions(+), 105 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index da6b8aa6..e5ec53c3 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -23,19 +23,6 @@ class KillSignal(Exception): pass -def get_line(fp): - """ - Get a line, possibly preceded by a blank. - """ - line = fp.readline() - if line == "\r\n" or line == "\n": - # Possible leftover from previous message - line = fp.readline() - if line == "": - raise tcp.NetLibDisconnect() - return line - - def send_connect_request(conn, host, port, update_state=True): upstream_request = HTTPRequest( "authority", @@ -349,79 +336,26 @@ class HTTPRequest(HTTPMessage): None, None, None, None, None, None, None, None, None, None) timestamp_start = utils.timestamp() - if hasattr(rfile, "reset_timestamps"): rfile.reset_timestamps() - request_line = get_line(rfile) - - if hasattr(rfile, "first_byte_timestamp"): - # more accurate timestamp_start - timestamp_start = rfile.first_byte_timestamp - - request_line_parts = http.parse_init(request_line) - if not request_line_parts: - raise http.HttpError( - 400, - "Bad HTTP request line: %s" % repr(request_line) - ) - method, path, httpversion = request_line_parts - - if path == '*' or path.startswith("/"): - form_in = "relative" - if not netlib.utils.isascii(path): - raise http.HttpError( - 400, - "Bad HTTP request line: %s" % repr(request_line) - ) - elif method.upper() == 'CONNECT': - form_in = "authority" - r = http.parse_init_connect(request_line) - if not r: - raise http.HttpError( - 400, - "Bad HTTP request line: %s" % repr(request_line) - ) - host, port, _ = r - path = None - else: - form_in = "absolute" - r = http.parse_init_proxy(request_line) - if not r: - raise http.HttpError( - 400, - "Bad HTTP request line: %s" % repr(request_line) - ) - _, scheme, host, port, path, _ = r - - headers = http.read_headers(rfile) - if headers is None: - raise http.HttpError(400, "Invalid headers") - - expect_header = headers.get_first("expect") - if expect_header and expect_header.lower() == "100-continue" and httpversion >= (1, 1): - wfile.write( - 'HTTP/1.1 100 Continue\r\n' - '\r\n' - ) - wfile.flush() - del headers['expect'] - - if include_body: - content = http.read_http_body(rfile, headers, body_size_limit, - method, None, True) - timestamp_end = utils.timestamp() - + req = http.read_request( + rfile, + include_body = include_body, + body_size_limit = body_size_limit, + wfile = wfile + ) + timestamp_end = utils.timestamp() return HTTPRequest( - form_in, - method, - scheme, - host, - port, - path, - httpversion, - headers, - content, + req.form_in, + req.method, + req.scheme, + req.host, + req.port, + req.path, + req.httpversion, + req.headers, + req.content, timestamp_start, timestamp_end ) @@ -1377,7 +1311,8 @@ class HTTPHandler(ProtocolHandler): ) if needs_server_change: - # force create new connection to the proxy server to reset state + # force create new connection to the proxy server to reset + # state self.live.change_server(self.c.server_conn.address, force=True) if ssl: send_connect_request( @@ -1387,8 +1322,9 @@ class HTTPHandler(ProtocolHandler): ) self.c.establish_ssl(server=True) else: - # If we're not in upstream mode, we just want to update the host and - # possibly establish TLS. This is a no op if the addresses match. + # If we're not in upstream mode, we just want to update the host + # and possibly establish TLS. This is a no op if the addresses + # match. self.live.change_server(address, ssl=ssl) flow.server_conn = self.c.server_conn @@ -1396,8 +1332,8 @@ class HTTPHandler(ProtocolHandler): def send_response_to_client(self, flow): if not flow.response.stream: # no streaming: - # we already received the full response from the server and can send - # it to the client straight away. + # we already received the full response from the server and can + # send it to the client straight away. self.c.client_conn.send(flow.response.assemble()) else: # streaming: @@ -1435,8 +1371,10 @@ class HTTPHandler(ProtocolHandler): flow.response.code) == -1) if close_connection: if flow.request.form_in == "authority" and flow.response.code == 200: - # Workaround for https://github.com/mitmproxy/mitmproxy/issues/313: - # Some proxies (e.g. Charles) send a CONNECT response with HTTP/1.0 and no Content-Length header + # Workaround for + # https://github.com/mitmproxy/mitmproxy/issues/313: Some + # proxies (e.g. Charles) send a CONNECT response with HTTP/1.0 + # and no Content-Length header pass else: return True @@ -1458,14 +1396,16 @@ class HTTPHandler(ProtocolHandler): self.expected_form_out = "relative" self.skip_authentication = True - # In practice, nobody issues a CONNECT request to send unencrypted HTTP requests afterwards. - # If we don't delegate to TCP mode, we should always negotiate a SSL connection. + # In practice, nobody issues a CONNECT request to send unencrypted + # HTTP requests afterwards. If we don't delegate to TCP mode, we + # should always negotiate a SSL connection. # - # FIXME: - # Turns out the previous statement isn't entirely true. Chrome on Windows CONNECTs to :80 - # if an explicit proxy is configured and a websocket connection should be established. - # We don't support websocket at the moment, so it fails anyway, but we should come up with - # a better solution to this if we start to support WebSockets. + # FIXME: Turns out the previous statement isn't entirely true. + # Chrome on Windows CONNECTs to :80 if an explicit proxy is + # configured and a websocket connection should be established. We + # don't support websocket at the moment, so it fails anyway, but we + # should come up with a better solution to this if we start to + # support WebSockets. should_establish_ssl = ( address.port in self.c.config.ssl_ports or @@ -1473,12 +1413,18 @@ class HTTPHandler(ProtocolHandler): ) if should_establish_ssl: - self.c.log("Received CONNECT request to SSL port. Upgrading to SSL...", "debug") + self.c.log( + "Received CONNECT request to SSL port. " + "Upgrading to SSL...", "debug" + ) self.c.establish_ssl(server=True, client=True) self.c.log("Upgrade to SSL completed.", "debug") if self.c.config.check_tcp(address): - self.c.log("Generic TCP mode for host: %s:%s" % address(), "info") + self.c.log( + "Generic TCP mode for host: %s:%s" % address(), + "info" + ) TCPHandler(self.c).handle_messages() return False @@ -1499,7 +1445,8 @@ class RequestReplayThread(threading.Thread): def __init__(self, config, flow, masterq, should_exit): """ - masterqueue can be a queue or None, if no scripthooks should be processed. + masterqueue can be a queue or None, if no scripthooks should be + processed. """ self.config, self.flow = config, flow if masterq: @@ -1525,12 +1472,17 @@ class RequestReplayThread(threading.Thread): if not self.flow.response: # In all modes, we directly connect to the server displayed if self.config.mode == "upstream": - server_address = self.config.mode.get_upstream_server(self.flow.client_conn)[2:] + server_address = self.config.mode.get_upstream_server( + self.flow.client_conn + )[2:] server = ServerConnection(server_address) server.connect() if r.scheme == "https": send_connect_request(server, r.host, r.port) - server.establish_ssl(self.config.clientcerts, sni=self.flow.server_conn.sni) + server.establish_ssl( + self.config.clientcerts, + sni=self.flow.server_conn.sni + ) r.form_out = "relative" else: r.form_out = "absolute" @@ -1539,12 +1491,18 @@ class RequestReplayThread(threading.Thread): server = ServerConnection(server_address) server.connect() if r.scheme == "https": - server.establish_ssl(self.config.clientcerts, sni=self.flow.server_conn.sni) + server.establish_ssl( + self.config.clientcerts, + sni=self.flow.server_conn.sni + ) r.form_out = "relative" server.send(r.assemble()) self.flow.server_conn = server - self.flow.response = HTTPResponse.from_stream(server.rfile, r.method, - body_size_limit=self.config.body_size_limit) + self.flow.response = HTTPResponse.from_stream( + server.rfile, + r.method, + body_size_limit=self.config.body_size_limit + ) if self.channel: response_reply = self.channel.ask("response", self.flow) if response_reply is None or response_reply == KILL: @@ -1554,7 +1512,8 @@ class RequestReplayThread(threading.Thread): if self.channel: self.channel.ask("error", self.flow) except KillSignal: - # KillSignal should only be raised if there's a channel in the first place. + # KillSignal should only be raised if there's a channel in the + # first place. self.channel.tell("log", proxy.Log("Connection killed", "info")) finally: r.form_out = form_out_backup -- cgit v1.2.3 From 37d731aacd4c7cf6d03836e50bf3eed0955eff47 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 21 Apr 2015 11:42:27 +1200 Subject: Missed some un-needed variable declarations --- libmproxy/protocol/http.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index e5ec53c3..852ce393 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -332,8 +332,7 @@ class HTTPRequest(HTTPMessage): Raises: HttpError: If the input is invalid. """ - httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end = ( - None, None, None, None, None, None, None, None, None, None) + timestamp_start, timestamp_end = None, None timestamp_start = utils.timestamp() if hasattr(rfile, "reset_timestamps"): @@ -871,7 +870,6 @@ class HTTPResponse(HTTPMessage): self.headers["Set-Cookie"] = values - class HTTPFlow(Flow): """ A HTTPFlow is a collection of objects representing a single HTTP -- cgit v1.2.3 From 1c26516b1822d82e3b701539591a1d22831e0a19 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Thu, 30 Apr 2015 12:18:01 +1200 Subject: pretty_size now lives in netlib.utils --- libmproxy/protocol/http.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 852ce393..8d703786 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -691,7 +691,10 @@ class HTTPResponse(HTTPMessage): return f def __repr__(self): - size = utils.pretty_size(len(self.content)) if self.content else "content missing" + if self.content: + size = netlib.utils.pretty_size(len(self.content)) + else: + size = "content missing" return "".format( code=self.code, msg=self.msg, -- cgit v1.2.3 From 202ede3c546b3d99acee14f3bcceefd500e53c70 Mon Sep 17 00:00:00 2001 From: Nick Raptis Date: Fri, 8 May 2015 10:58:05 +0300 Subject: Accurately timestamp start of request When building a request from a stream, try to get an accurate start timestamp from the Reader. This was already in the code and also used when building response objects, but was ommited in commit ddf458b330bf9fe200cb1dbc3ddb5ae1a5d2102a Without his logic and when the client is reusing a connection to send requests, the timestamp_start of subsequent requests is early and equal to when the connection started read blocking --- libmproxy/protocol/http.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 8d703786..7d80e0d8 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -344,6 +344,11 @@ class HTTPRequest(HTTPMessage): body_size_limit = body_size_limit, wfile = wfile ) + + if hasattr(rfile, "first_byte_timestamp"): + # more accurate timestamp_start + timestamp_start = rfile.first_byte_timestamp + timestamp_end = utils.timestamp() return HTTPRequest( req.form_in, -- cgit v1.2.3 From cdff79fd4c03f644c25373bf0d15ef30e3644180 Mon Sep 17 00:00:00 2001 From: Doug Lethin Date: Fri, 15 May 2015 22:20:09 -0400 Subject: Make sure proxy returns the httpversion specified in the request rather than hardcoding to 1.1. --- libmproxy/protocol/http.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 8d703786..2bfc313c 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1248,7 +1248,8 @@ class HTTPHandler(ProtocolHandler): flow.server_conn = self.c.server_conn self.c.establish_server_connection() self.c.client_conn.send( - 'HTTP/1.1 200 Connection established\r\n' + + ('HTTP/%s.%s 200 ' % (request.httpversion[0],request.httpversion[1])) + + 'Connection established\r\n' + 'Content-Length: 0\r\n' + ('Proxy-agent: %s\r\n' % self.c.config.server_version) + '\r\n' -- cgit v1.2.3 From a05a70d8168a07c92b2a3ecbbb1958d85532efe3 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sat, 30 May 2015 12:03:28 +1200 Subject: Add coding style check, reformat. --- libmproxy/protocol/http.py | 101 +++++++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 26 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 324a188f..91e74567 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -305,7 +305,18 @@ class HTTPRequest(HTTPMessage): @classmethod def from_state(cls, state): - f = cls(None, None, None, None, None, None, None, None, None, None, None) + f = cls( + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None) f.load_state(state) return f @@ -315,7 +326,12 @@ class HTTPRequest(HTTPMessage): ) @classmethod - def from_stream(cls, rfile, include_body=True, body_size_limit=None, wfile=None): + def from_stream( + cls, + rfile, + include_body=True, + body_size_limit=None, + wfile=None): """ Parse an HTTP request from a file stream @@ -403,7 +419,8 @@ class HTTPRequest(HTTPMessage): self.host, self.port)] - # If content is defined (i.e. not None or CONTENT_MISSING), we always add a content-length header. + # If content is defined (i.e. not None or CONTENT_MISSING), we always + # add a content-length header. if self.content or self.content == "": headers["Content-Length"] = [str(len(self.content))] @@ -460,9 +477,9 @@ class HTTPRequest(HTTPMessage): decode appropriately. """ if self.headers["accept-encoding"]: - self.headers["accept-encoding"] = [', '.join( - e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0] - )] + self.headers["accept-encoding"] = [ + ', '.join( + e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0])] def update_host_header(self): """ @@ -489,13 +506,22 @@ class HTTPRequest(HTTPMessage): Returns an empty ODict if there is no data or the content-type indicates non-form data. """ - if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): - return odict.ODict(utils.urldecode(self.content)) + if self.content and self.headers.in_any( + "content-type", + HDR_FORM_URLENCODED, + True): + return odict.ODict(utils.urldecode(self.content)) return odict.ODict([]) def get_form_multipart(self): - if self.content and self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): - return odict.ODict(utils.multipartdecode(self.headers, self.content)) + if self.content and self.headers.in_any( + "content-type", + HDR_FORM_MULTIPART, + True): + return odict.ODict( + utils.multipartdecode( + self.headers, + self.content)) return odict.ODict([]) def set_form_urlencoded(self, odict): @@ -664,8 +690,15 @@ class HTTPResponse(HTTPMessage): timestamp_end: Timestamp indicating when request transmission ended """ - def __init__(self, httpversion, code, msg, headers, content, timestamp_start=None, - timestamp_end=None): + def __init__( + self, + httpversion, + code, + msg, + headers, + content, + timestamp_start=None, + timestamp_end=None): assert isinstance(headers, odict.ODictCaseless) or headers is None HTTPMessage.__init__( self, @@ -710,7 +743,12 @@ class HTTPResponse(HTTPMessage): ) @classmethod - def from_stream(cls, rfile, request_method, include_body=True, body_size_limit=None): + def from_stream( + cls, + rfile, + request_method, + include_body=True, + body_size_limit=None): """ Parse an HTTP response from a file stream """ @@ -760,7 +798,8 @@ class HTTPResponse(HTTPMessage): if not preserve_transfer_encoding: del headers['Transfer-Encoding'] - # If content is defined (i.e. not None or CONTENT_MISSING), we always add a content-length header. + # If content is defined (i.e. not None or CONTENT_MISSING), we always + # add a content-length header. if self.content or self.content == "": headers["Content-Length"] = [str(len(self.content))] @@ -1008,7 +1047,7 @@ class HTTPHandler(ProtocolHandler): include_body=False ) break - except (tcp.NetLibError, http.HttpErrorConnClosed), v: + except (tcp.NetLibError, http.HttpErrorConnClosed) as v: self.c.log( "error in server communication: %s" % repr(v), level="debug" @@ -1079,7 +1118,8 @@ class HTTPHandler(ProtocolHandler): if request_reply is None or request_reply == KILL: raise KillSignal() - self.process_server_address(flow) # The inline script may have changed request.host + # The inline script may have changed request.host + self.process_server_address(flow) if isinstance(request_reply, HTTPResponse): flow.response = request_reply @@ -1090,7 +1130,9 @@ class HTTPHandler(ProtocolHandler): # we can safely set it as the final attribute value here. flow.server_conn = self.c.server_conn - self.c.log("response", "debug", [flow.response._assemble_first_line()]) + self.c.log( + "response", "debug", [ + flow.response._assemble_first_line()]) response_reply = self.c.channel.ask("response", flow) if response_reply is None or response_reply == KILL: raise KillSignal() @@ -1117,7 +1159,8 @@ class HTTPHandler(ProtocolHandler): } ) ) - if not self.process_connect_request((flow.request.host, flow.request.port)): + if not self.process_connect_request( + (flow.request.host, flow.request.port)): return False # If the user has changed the target server on this connection, @@ -1130,7 +1173,7 @@ class HTTPHandler(ProtocolHandler): http.HttpError, proxy.ProxyError, tcp.NetLibError, - ), e: + ) as e: self.handle_error(e, flow) except KillSignal: self.c.log("Connection killed", "info") @@ -1226,7 +1269,8 @@ class HTTPHandler(ProtocolHandler): # Determine .scheme, .host and .port attributes # For absolute-form requests, they are directly given in the request. # For authority-form requests, we only need to determine the request scheme. - # For relative-form requests, we need to determine host and port as well. + # For relative-form requests, we need to determine host and port as + # well. if not request.scheme: request.scheme = "https" if flow.server_conn and flow.server_conn.ssl_established else "http" if not request.host: @@ -1253,8 +1297,8 @@ class HTTPHandler(ProtocolHandler): flow.server_conn = self.c.server_conn self.c.establish_server_connection() self.c.client_conn.send( - ('HTTP/%s.%s 200 ' % (request.httpversion[0],request.httpversion[1])) + - 'Connection established\r\n' + + ('HTTP/%s.%s 200 ' % (request.httpversion[0], request.httpversion[1])) + + 'Connection established\r\n' + 'Content-Length: 0\r\n' + ('Proxy-agent: %s\r\n' % self.c.config.server_version) + '\r\n' @@ -1372,10 +1416,15 @@ class HTTPHandler(ProtocolHandler): semantics. Returns True, if so. """ close_connection = ( - http.connection_close(flow.request.httpversion, flow.request.headers) or - http.connection_close(flow.response.httpversion, flow.response.headers) or - http.expected_http_body_size(flow.response.headers, False, flow.request.method, - flow.response.code) == -1) + http.connection_close( + flow.request.httpversion, + flow.request.headers) or http.connection_close( + flow.response.httpversion, + flow.response.headers) or http.expected_http_body_size( + flow.response.headers, + False, + flow.request.method, + flow.response.code) == -1) if close_connection: if flow.request.form_in == "authority" and flow.response.code == 200: # Workaround for -- cgit v1.2.3 From 7890450b0c9d0cd95a2e5f507a9a8247702051be Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Fri, 12 Jun 2015 16:00:16 +1200 Subject: Handle invalid IDNA encoding in hostnames Fixes #622 --- libmproxy/protocol/http.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 91e74567..9c143386 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -584,11 +584,10 @@ class HTTPRequest(HTTPMessage): of the request, e.g. if an upstream proxy is in place If hostheader is set to True, the Host: header will be used as - additional (and preferred) data source. This is handy in transparent - mode, where only the ip of the destination is known, but not the - resolved name. This is disabled by default, as an attacker may spoof - the host header to confuse an analyst. - + additional (and preferred) data source. This is handy in + transparent mode, where only the IO of the destination is known, + but not the resolved name. This is disabled by default, as an + attacker may spoof the host header to confuse an analyst. """ host = None if hostheader: @@ -596,7 +595,10 @@ class HTTPRequest(HTTPMessage): if not host: host = self.host if host: - return host.encode("idna") + try: + return host.encode("idna") + except ValueError: + return host else: return None -- cgit v1.2.3 From 59ec291b6cff1dfa83b316401418b6308df93aac Mon Sep 17 00:00:00 2001 From: iroiro123 Date: Thu, 18 Jun 2015 23:53:27 +0900 Subject: HTTP Transparent Proxy --- libmproxy/protocol/http.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 9c143386..c7479b76 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1328,7 +1328,20 @@ class HTTPHandler(ProtocolHandler): # value at flow.server_conn self.c.set_server_address((request.host, request.port)) flow.server_conn = self.c.server_conn - + + elif request.form_in == "relative": + if self.c.config.mode == "httptransparent": + h = request.headers.get_first("host") + if h is None: + raise http.HttpError( + 400, + "Invalid request: No Host header" + ) + p = http.parse_url("http://" + h) + request.host, request.port = p[1], p[2] + self.c.set_server_address((request.host, request.port)) + flow.server_conn = self.c.server_conn + return None raise http.HttpError( 400, "Invalid HTTP request form (expected: %s, got: %s)" % ( -- cgit v1.2.3 From 378aa783243cf23d84a39d02dde5420beadc188b Mon Sep 17 00:00:00 2001 From: iroiro123 Date: Sat, 20 Jun 2015 21:43:50 +0900 Subject: Spoof mode --- libmproxy/protocol/http.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index c7479b76..61782698 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1330,19 +1330,22 @@ class HTTPHandler(ProtocolHandler): flow.server_conn = self.c.server_conn elif request.form_in == "relative": - if self.c.config.mode == "httptransparent": - h = request.headers.get_first("host") + if self.c.config.mode == "spoof": + # Host header + h = request.pretty_host(hostheader=True) if h is None: raise http.HttpError( 400, "Invalid request: No Host header" ) p = http.parse_url("http://" + h) - request.host, request.port = p[1], p[2] + request.host = p[1] + request.port = p[2] self.c.set_server_address((request.host, request.port)) flow.server_conn = self.c.server_conn return None + raise http.HttpError( 400, "Invalid HTTP request form (expected: %s, got: %s)" % ( self.expected_form_in, request.form_in -- cgit v1.2.3 From fd903673299c050b7b4137aabf6b9265df3d6233 Mon Sep 17 00:00:00 2001 From: iroiro123 Date: Sun, 21 Jun 2015 00:51:56 +0900 Subject: SSL Spoof mode --- libmproxy/protocol/http.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 61782698..11436b30 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1336,13 +1336,18 @@ class HTTPHandler(ProtocolHandler): if h is None: raise http.HttpError( 400, - "Invalid request: No Host header" + "Invalid request: No host information" ) p = http.parse_url("http://" + h) - request.host = p[1] - request.port = p[2] + request.scheme = p[0] + request.host = p[1] + request.port = p[2] self.c.set_server_address((request.host, request.port)) flow.server_conn = self.c.server_conn + + if self.c.config.mode == "sslspoof": + # SNI is processed in server.py + return None return None -- cgit v1.2.3 From fbb23b5c9fae6e402d84ddae3c3b8c218def366c Mon Sep 17 00:00:00 2001 From: iroiro123 Date: Tue, 23 Jun 2015 01:49:22 +0900 Subject: changed error handling (ssl spoof mode) --- libmproxy/protocol/http.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 11436b30..9bce7206 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1347,7 +1347,12 @@ class HTTPHandler(ProtocolHandler): if self.c.config.mode == "sslspoof": # SNI is processed in server.py - return None + if not (flow.server_conn and flow.server_conn.ssl_established): + print ":::::::::::::::" + raise http.HttpError( + 400, + "Invalid request: No host information" + ) return None -- cgit v1.2.3