diff options
Diffstat (limited to 'libmproxy/protocol/http.py')
-rw-r--r-- | libmproxy/protocol/http.py | 234 |
1 files changed, 130 insertions, 104 deletions
diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 3f9eecb3..90d8ff16 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -26,7 +26,7 @@ def get_line(fp): return line -def send_connect_request(conn, host, port): +def send_connect_request(conn, host, port, update_state=True): upstream_request = HTTPRequest("authority", "CONNECT", None, host, port, None, (1, 1), ODictCaseless(), "") conn.send(upstream_request._assemble()) @@ -36,6 +36,12 @@ def send_connect_request(conn, host, port): "Cannot establish SSL " + "connection with upstream proxy: \r\n" + str(resp._assemble())) + if update_state: + conn.state.append(("http", { + "state": "connect", + "host": host, + "port": port} + )) return resp @@ -405,7 +411,14 @@ class HTTPRequest(HTTPMessage): e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0] )] - def get_form_urlencoded(self): + def update_host_header(self): + """ + Update the host header to reflect the current target. + """ + self.headers["Host"] = [self.host] + + @property + def form_urlencoded(self): """ Retrieves the URL-encoded form data, returning an ODict object. Returns an empty ODict if there is no data or the content-type @@ -415,7 +428,8 @@ class HTTPRequest(HTTPMessage): return ODict(utils.urldecode(self.content)) return ODict([]) - def set_form_urlencoded(self, odict): + @form_urlencoded.setter + def form_urlencoded(self, odict): """ Sets the body to the URL-encoded form data, and adds the appropriate content-type header. Note that this will destory the @@ -426,16 +440,18 @@ class HTTPRequest(HTTPMessage): self.headers["Content-Type"] = [HDR_FORM_URLENCODED] self.content = utils.urlencode(odict.lst) - def get_path_components(self, f): + @property + def path_components(self): """ Returns the path components of the URL as a list of strings. Components are unquoted. """ - _, _, path, _, _, _ = urlparse.urlparse(self.get_url(False, f)) + _, _, path, _, _, _ = urlparse.urlparse(self.url) return [urllib.unquote(i) for i in path.split("/") if i] - def set_path_components(self, lst, f): + @path_components.setter + def path_components(self, lst): """ Takes a list of strings, and sets the path component of the URL. @@ -443,32 +459,34 @@ class HTTPRequest(HTTPMessage): """ lst = [urllib.quote(i, safe="") for i in lst] path = "/" + "/".join(lst) - scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url(False, f)) - self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f) + scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url) + self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment]) - def get_query(self, f): + @property + def query(self): """ Gets the request query string. Returns an ODict object. """ - _, _, _, _, query, _ = urlparse.urlparse(self.get_url(False, f)) + _, _, _, _, query, _ = urlparse.urlparse(self.url) if query: return ODict(utils.urldecode(query)) return ODict([]) - def set_query(self, odict, f): + @query.setter + def query(self, odict): """ Takes an ODict object, and sets the request query string. """ - scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url(False, f)) + scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url) query = utils.urlencode(odict.lst) - self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f) + self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment]) - def get_host(self, hostheader, flow): + def pretty_host(self, hostheader): """ Heuristic to get the host of the request. - Note that get_host() does not always return the TCP destination of the request, - e.g. on a transparently intercepted request to an unrelated HTTP proxy. + Note that pretty_host() does not always return the TCP destination of the request, + e.g. if an upstream proxy is in place If hostheader is set to True, the Host: header will be used as additional (and preferred) data source. This is handy in transparent mode, where only the ip of the destination is known, but not the @@ -478,54 +496,27 @@ class HTTPRequest(HTTPMessage): if hostheader: host = self.headers.get_first("host") if not host: - if self.host: - host = self.host - else: - for s in flow.server_conn.state: - if s[0] == "http" and s[1]["state"] == "connect": - host = s[1]["host"] - break - if not host: - host = flow.server_conn.address.host + host = self.host host = host.encode("idna") return host - def get_scheme(self, flow): - """ - Returns the request port, either from the request itself or from the flow's server connection - """ - if self.scheme: - return self.scheme - if self.form_out == "authority": # On SSLed connections, the original CONNECT request is still unencrypted. - return "http" - return "https" if flow.server_conn.ssl_established else "http" - - def get_port(self, flow): - """ - Returns the request port, either from the request itself or from the flow's server connection - """ - if self.port: - return self.port - for s in flow.server_conn.state: - if s[0] == "http" and s[1].get("state") == "connect": - return s[1]["port"] - return flow.server_conn.address.port + def pretty_url(self, hostheader): + if self.form_out == "authority": # upstream proxy mode + return "%s:%s" % (self.pretty_host(hostheader), self.port) + return utils.unparse_url(self.scheme, + self.pretty_host(hostheader), + self.port, + self.path).encode('ascii') - def get_url(self, hostheader, flow): + @property + def url(self): """ Returns a URL string, constructed from the Request's URL components. - - If hostheader is True, we use the value specified in the request - Host header to construct the URL. """ - if self.form_out == "authority": # upstream proxy mode - return "%s:%s" % (self.get_host(hostheader, flow), self.get_port(flow)) - return utils.unparse_url(self.get_scheme(flow), - self.get_host(hostheader, flow), - self.get_port(flow), - self.path).encode('ascii') + return self.pretty_url(False) - def set_url(self, url, flow): + @url.setter + def url(self, url): """ Parses a URL specification, and updates the Request's information accordingly. @@ -534,32 +525,11 @@ class HTTPRequest(HTTPMessage): """ parts = http.parse_url(url) if not parts: - return False - scheme, host, port, path = parts - is_ssl = (True if scheme == "https" else False) - - self.path = path + raise ValueError("Invalid URL: %s" % url) + self.scheme, self.host, self.port, self.path = parts - if host != self.get_host(False, flow) or port != self.get_port(flow): - if flow.live: - flow.live.change_server((host, port), ssl=is_ssl) - else: - # There's not live server connection, we're just changing the attributes here. - flow.server_conn = ServerConnection((host, port), - proxy.AddressPriority.MANUALLY_CHANGED) - flow.server_conn.ssl_established = is_ssl - - # If this is an absolute request, replace the attributes on the request object as well. - if self.host: - self.host = host - if self.port: - self.port = port - if self.scheme: - self.scheme = scheme - - return True - - def get_cookies(self): + @property + def cookies(self): cookie_headers = self.headers.get("cookie") if not cookie_headers: return None @@ -755,7 +725,8 @@ class HTTPResponse(HTTPMessage): if c: self.headers["set-cookie"] = c - def get_cookies(self): + @property + def cookies(self): cookie_headers = self.headers.get("set-cookie") if not cookie_headers: return None @@ -815,7 +786,7 @@ class HTTPFlow(Flow): s = "<HTTPFlow" for a in ("request", "response", "error", "client_conn", "server_conn"): if getattr(self, a, False): - s += "\r\n %s = {flow.%s}" % (a,a) + s += "\r\n %s = {flow.%s}" % (a, a) s += ">" return s.format(flow=self) @@ -950,8 +921,7 @@ class HTTPHandler(ProtocolHandler): # sent through to the Master. flow.request = req request_reply = self.c.channel.ask("request", flow) - self.determine_server_address(flow, flow.request) # The inline script may have changed request.host - flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow + self.process_server_address(flow) # The inline script may have changed request.host if request_reply is None or request_reply == KILL: return False @@ -1048,7 +1018,7 @@ class HTTPHandler(ProtocolHandler): def handle_server_reconnect(self, state): if state["state"] == "connect": - send_connect_request(self.c.server_conn, state["host"], state["port"]) + send_connect_request(self.c.server_conn, state["host"], state["port"], update_state=False) else: # pragma: nocover raise RuntimeError("Unknown State: %s" % state["state"]) @@ -1114,14 +1084,30 @@ class HTTPHandler(ProtocolHandler): if not self.skip_authentication: self.authenticate(request) + # Determine .scheme, .host and .port attributes + # For absolute-form requests, they are directly given in the request. + # For authority-form requests, we only need to determine the request scheme. + # For relative-form requests, we need to determine host and port as well. + if not request.scheme: + request.scheme = "https" if flow.server_conn and flow.server_conn.ssl_established else "http" + if not request.host: + # Host/Port Complication: In upstream mode, use the server we CONNECTed to, + # not the upstream proxy. + if flow.server_conn: + for s in flow.server_conn.state: + if s[0] == "http" and s[1]["state"] == "connect": + request.host, request.port = s[1]["host"], s[1]["port"] + if not request.host and flow.server_conn: + request.host, request.port = flow.server_conn.address.host, flow.server_conn.address.port + + # Now we can process the request. if request.form_in == "authority": if self.c.client_conn.ssl_established: raise http.HttpError(400, "Must not CONNECT on already encrypted connection") if self.expected_form_in == "absolute": - if not self.c.config.get_upstream_server: - self.c.set_server_address((request.host, request.port), - proxy.AddressPriority.FROM_PROTOCOL) + if not self.c.config.get_upstream_server: # Regular mode + self.c.set_server_address((request.host, request.port)) flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow self.c.establish_server_connection() self.c.client_conn.send( @@ -1140,24 +1126,63 @@ class HTTPHandler(ProtocolHandler): self.ssl_upgrade() self.skip_authentication = True return True - else: + else: # upstream proxy mode return None + else: + pass # CONNECT should never occur if we don't expect absolute-form requests + elif request.form_in == self.expected_form_in: + + request.form_out = self.expected_form_out + if request.form_in == "absolute": if request.scheme != "http": raise http.HttpError(400, "Invalid request scheme: %s" % request.scheme) - self.determine_server_address(flow, request) - request.form_out = self.expected_form_out + if request.form_out == "relative": + self.c.set_server_address((request.host, request.port)) + flow.server_conn = self.c.server_conn + + return None raise http.HttpError(400, "Invalid HTTP request form (expected: %s, got: %s)" % (self.expected_form_in, request.form_in)) - def determine_server_address(self, flow, request): - if request.form_in == "absolute": - self.c.set_server_address((request.host, request.port), - proxy.AddressPriority.FROM_PROTOCOL) - flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow + def process_server_address(self, flow): + # Depending on the proxy mode, server handling is entirely different + # We provide a mostly unified API to the user, which needs to be unfiddled here + # ( See also: https://github.com/mitmproxy/mitmproxy/issues/337 ) + address = netlib.tcp.Address((flow.request.host, flow.request.port)) + + ssl = (flow.request.scheme == "https") + + if self.c.config.http_form_in == self.c.config.http_form_out == "absolute": # Upstream Proxy mode + + # The connection to the upstream proxy may have a state we may need to take into account. + connected_to = None + for s in flow.server_conn.state: + if s[0] == "http" and s[1]["state"] == "connect": + connected_to = tcp.Address((s[1]["host"], s[1]["port"])) + + # We need to reconnect if the current flow either requires a (possibly impossible) + # change to the connection state, e.g. the host has changed but we already CONNECTed somewhere else. + needs_server_change = ( + ssl != self.c.server_conn.ssl_established + or + (connected_to and address != connected_to) # HTTP proxying is "stateless", CONNECT isn't. + ) + + if needs_server_change: + # force create new connection to the proxy server to reset state + self.live.change_server(self.c.server_conn.address, force=True) + if ssl: + send_connect_request(self.c.server_conn, address.host, address.port) + self.c.establish_ssl(server=True) + else: + # If we're not in upstream mode, we just want to update the host and possibly establish TLS. + self.live.change_server(address, ssl=ssl) # this is a no op if the addresses match. + + flow.server_conn = self.c.server_conn def authenticate(self, request): if self.c.config.authenticator: @@ -1183,7 +1208,9 @@ class RequestReplayThread(threading.Thread): r.form_out = self.config.http_form_out server_address, server_ssl = False, False - if self.config.get_upstream_server: + # If the flow is live, r.host is already the correct upstream server unless modified by a script. + # If modified by a script, we probably want to keep the modified destination. + if self.config.get_upstream_server and not self.flow.live: try: # this will fail in transparent mode upstream_info = self.config.get_upstream_server(self.flow.client_conn) @@ -1192,17 +1219,16 @@ class RequestReplayThread(threading.Thread): except proxy.ProxyError: pass if not server_address: - server_address = (r.get_host(False, self.flow), r.get_port(self.flow)) + server_address = (r.host, r.port) - server = ServerConnection(server_address, None) + server = ServerConnection(server_address) server.connect() - if server_ssl or r.get_scheme(self.flow) == "https": + if server_ssl or r.scheme == "https": if self.config.http_form_out == "absolute": # form_out == absolute -> forward mode -> send CONNECT - send_connect_request(server, r.get_host(), r.get_port()) + send_connect_request(server, r.host, r.port) r.form_out = "relative" - server.establish_ssl(self.config.clientcerts, - self.flow.server_conn.sni) + server.establish_ssl(self.config.clientcerts, sni=r.host) server.send(r._assemble()) self.flow.response = HTTPResponse.from_stream(server.rfile, r.method, body_size_limit=self.config.body_size_limit) |