diff options
author | Thomas Kriechbaumer <thomas@kriechbaumer.name> | 2015-08-01 10:39:14 +0200 |
---|---|---|
committer | Thomas Kriechbaumer <thomas@kriechbaumer.name> | 2015-08-01 12:40:40 +0200 |
commit | a837230320378d629ba9f25960b1dfd25c892ad9 (patch) | |
tree | eca444b3965abb294c6304ec41de2fbc307e240f /netlib/http/semantics.py | |
parent | 199f2a44fed6b5f1c6fada6c96b981dfab5fded2 (diff) | |
download | mitmproxy-a837230320378d629ba9f25960b1dfd25c892ad9.tar.gz mitmproxy-a837230320378d629ba9f25960b1dfd25c892ad9.tar.bz2 mitmproxy-a837230320378d629ba9f25960b1dfd25c892ad9.zip |
move code from mitmproxy to netlib
Diffstat (limited to 'netlib/http/semantics.py')
-rw-r--r-- | netlib/http/semantics.py | 366 |
1 files changed, 290 insertions, 76 deletions
diff --git a/netlib/http/semantics.py b/netlib/http/semantics.py index 54bf83d2..e7ae2b5f 100644 --- a/netlib/http/semantics.py +++ b/netlib/http/semantics.py @@ -3,9 +3,15 @@ import binascii import collections import string import sys +import urllib import urlparse from .. import utils, odict +from . import cookies +from netlib import utils, encoding + +HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" +HDR_FORM_MULTIPART = "multipart/form-data" CONTENT_MISSING = 0 @@ -75,7 +81,240 @@ class Request(object): return False def __repr__(self): - return "Request(%s - %s, %s)" % (self.method, self.host, self.path) + # return "Request(%s - %s, %s)" % (self.method, self.host, self.path) + + return "<HTTPRequest: {0}>".format( + self.legacy_first_line()[:-9] + ) + + def legacy_first_line(self): + if self.form_in == "relative": + return '%s %s HTTP/%s.%s' % ( + self.method, + self.path, + self.httpversion[0], + self.httpversion[1], + ) + elif self.form_in == "authority": + return '%s %s:%s HTTP/%s.%s' % ( + self.method, + self.host, + self.port, + self.httpversion[0], + self.httpversion[1], + ) + elif self.form_in == "absolute": + return '%s %s://%s:%s%s HTTP/%s.%s' % ( + self.method, + self.scheme, + self.host, + self.port, + self.path, + self.httpversion[0], + self.httpversion[1], + ) + else: + raise http.HttpError(400, "Invalid request form") + + def anticache(self): + """ + Modifies this request to remove headers that might produce a cached + response. That is, we remove ETags and If-Modified-Since headers. + """ + delheaders = [ + "if-modified-since", + "if-none-match", + ] + for i in delheaders: + del self.headers[i] + + def anticomp(self): + """ + Modifies this request to remove headers that will compress the + resource's data. + """ + self.headers["accept-encoding"] = ["identity"] + + def constrain_encoding(self): + """ + Limits the permissible Accept-Encoding values, based on what we can + decode appropriately. + """ + if self.headers["accept-encoding"]: + self.headers["accept-encoding"] = [ + ', '.join( + e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0])] + + def update_host_header(self): + """ + Update the host header to reflect the current target. + """ + self.headers["Host"] = [self.host] + + def get_form(self): + """ + Retrieves the URL-encoded or multipart form data, returning an ODict object. + Returns an empty ODict if there is no data or the content-type + indicates non-form data. + """ + if self.body: + if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): + return self.get_form_urlencoded() + elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + return self.get_form_multipart() + return odict.ODict([]) + + def get_form_urlencoded(self): + """ + Retrieves the URL-encoded form data, returning an ODict object. + Returns an empty ODict if there is no data or the content-type + indicates non-form data. + """ + if self.body and self.headers.in_any( + "content-type", + HDR_FORM_URLENCODED, + True): + return odict.ODict(utils.urldecode(self.body)) + return odict.ODict([]) + + def get_form_multipart(self): + if self.body and self.headers.in_any( + "content-type", + HDR_FORM_MULTIPART, + True): + return odict.ODict( + utils.multipartdecode( + self.headers, + self.body)) + return odict.ODict([]) + + def set_form_urlencoded(self, odict): + """ + Sets the body to the URL-encoded form data, and adds the + appropriate content-type header. Note that this will destory the + existing body if there is one. + """ + # FIXME: If there's an existing content-type header indicating a + # url-encoded form, leave it alone. + self.headers["Content-Type"] = [HDR_FORM_URLENCODED] + self.body = utils.urlencode(odict.lst) + + def get_path_components(self): + """ + Returns the path components of the URL as a list of strings. + + Components are unquoted. + """ + _, _, path, _, _, _ = urlparse.urlparse(self.url) + return [urllib.unquote(i) for i in path.split("/") if i] + + def set_path_components(self, lst): + """ + Takes a list of strings, and sets the path component of the URL. + + Components are quoted. + """ + lst = [urllib.quote(i, safe="") for i in lst] + path = "/" + "/".join(lst) + scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url) + self.url = urlparse.urlunparse( + [scheme, netloc, path, params, query, fragment] + ) + + def get_query(self): + """ + Gets the request query string. Returns an ODict object. + """ + _, _, _, _, query, _ = urlparse.urlparse(self.url) + if query: + return odict.ODict(utils.urldecode(query)) + return odict.ODict([]) + + def set_query(self, odict): + """ + Takes an ODict object, and sets the request query string. + """ + scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url) + query = utils.urlencode(odict.lst) + self.url = urlparse.urlunparse( + [scheme, netloc, path, params, query, fragment] + ) + + def pretty_host(self, hostheader): + """ + Heuristic to get the host of the request. + + Note that pretty_host() does not always return the TCP destination + of the request, e.g. if an upstream proxy is in place + + If hostheader is set to True, the Host: header will be used as + additional (and preferred) data source. This is handy in + transparent mode, where only the IO of the destination is known, + but not the resolved name. This is disabled by default, as an + attacker may spoof the host header to confuse an analyst. + """ + host = None + if hostheader: + host = self.headers.get_first("host") + if not host: + host = self.host + if host: + try: + return host.encode("idna") + except ValueError: + return host + else: + return None + + def pretty_url(self, hostheader): + if self.form_out == "authority": # upstream proxy mode + return "%s:%s" % (self.pretty_host(hostheader), self.port) + return utils.unparse_url(self.scheme, + self.pretty_host(hostheader), + self.port, + self.path).encode('ascii') + + def get_cookies(self): + """ + Returns a possibly empty netlib.odict.ODict object. + """ + ret = odict.ODict() + for i in self.headers["cookie"]: + ret.extend(cookies.parse_cookie_header(i)) + return ret + + def set_cookies(self, odict): + """ + Takes an netlib.odict.ODict object. Over-writes any existing Cookie + headers. + """ + v = cookies.format_cookie_header(odict) + self.headers["Cookie"] = [v] + + @property + def url(self): + """ + Returns a URL string, constructed from the Request's URL components. + """ + return utils.unparse_url( + self.scheme, + self.host, + self.port, + self.path + ).encode('ascii') + + @url.setter + def url(self, url): + """ + Parses a URL specification, and updates the Request's information + accordingly. + + Returns False if the URL was invalid, True if the request succeeded. + """ + parts = utils.parse_url(url) + if not parts: + raise ValueError("Invalid URL: %s" % url) + self.scheme, self.host, self.port, self.path = parts @property def content(self): @@ -139,7 +378,56 @@ class Response(object): return False def __repr__(self): - return "Response(%s - %s)" % (self.status_code, self.msg) + # return "Response(%s - %s)" % (self.status_code, self.msg) + + if self.body: + size = utils.pretty_size(len(self.body)) + else: + size = "content missing" + return "<HTTPResponse: {status_code} {msg} ({contenttype}, {size})>".format( + status_code=self.status_code, + msg=self.msg, + contenttype=self.headers.get_first( + "content-type", "unknown content type" + ), + size=size + ) + + + def get_cookies(self): + """ + Get the contents of all Set-Cookie headers. + + Returns a possibly empty ODict, where keys are cookie name strings, + and values are [value, attr] lists. Value is a string, and attr is + an ODictCaseless containing cookie attributes. Within attrs, unary + attributes (e.g. HTTPOnly) are indicated by a Null value. + """ + ret = [] + for header in self.headers["set-cookie"]: + v = cookies.parse_set_cookie_header(header) + if v: + name, value, attrs = v + ret.append([name, [value, attrs]]) + return odict.ODict(ret) + + def set_cookies(self, odict): + """ + Set the Set-Cookie headers on this response, over-writing existing + headers. + + Accepts an ODict of the same format as that returned by get_cookies. + """ + values = [] + for i in odict.lst: + values.append( + cookies.format_set_cookie_header( + i[0], + i[1][0], + i[1][1] + ) + ) + self.headers["Set-Cookie"] = values @property def content(self): @@ -160,77 +448,3 @@ class Response(object): def code(self, code): # TODO: remove deprecated setter self.status_code = code - - - -def is_valid_port(port): - if not 0 <= port <= 65535: - return False - return True - - -def is_valid_host(host): - try: - host.decode("idna") - except ValueError: - return False - if "\0" in host: - return None - return True - - -def parse_url(url): - """ - Returns a (scheme, host, port, path) tuple, or None on error. - - Checks that: - port is an integer 0-65535 - host is a valid IDNA-encoded hostname with no null-bytes - path is valid ASCII - """ - try: - scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) - except ValueError: - return None - if not scheme: - return None - if '@' in netloc: - # FIXME: Consider what to do with the discarded credentials here Most - # probably we should extend the signature to return these as a separate - # value. - _, netloc = string.rsplit(netloc, '@', maxsplit=1) - if ':' in netloc: - host, port = string.rsplit(netloc, ':', maxsplit=1) - try: - port = int(port) - except ValueError: - return None - else: - host = netloc - if scheme == "https": - port = 443 - else: - port = 80 - path = urlparse.urlunparse(('', '', path, params, query, fragment)) - if not path.startswith("/"): - path = "/" + path - if not is_valid_host(host): - return None - if not utils.isascii(path): - return None - if not is_valid_port(port): - return None - return scheme, host, port, path - - -def get_header_tokens(headers, key): - """ - Retrieve all tokens for a header key. A number of different headers - follow a pattern where each header line can containe comma-separated - tokens, and headers can be set multiple times. - """ - toks = [] - for i in headers[key]: - for j in i.split(","): - toks.append(j.strip()) - return toks |