aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/semantics.py
diff options
context:
space:
mode:
authorThomas Kriechbaumer <thomas@kriechbaumer.name>2015-08-01 10:39:14 +0200
committerThomas Kriechbaumer <thomas@kriechbaumer.name>2015-08-01 12:40:40 +0200
commita837230320378d629ba9f25960b1dfd25c892ad9 (patch)
treeeca444b3965abb294c6304ec41de2fbc307e240f /netlib/http/semantics.py
parent199f2a44fed6b5f1c6fada6c96b981dfab5fded2 (diff)
downloadmitmproxy-a837230320378d629ba9f25960b1dfd25c892ad9.tar.gz
mitmproxy-a837230320378d629ba9f25960b1dfd25c892ad9.tar.bz2
mitmproxy-a837230320378d629ba9f25960b1dfd25c892ad9.zip
move code from mitmproxy to netlib
Diffstat (limited to 'netlib/http/semantics.py')
-rw-r--r--netlib/http/semantics.py366
1 files changed, 290 insertions, 76 deletions
diff --git a/netlib/http/semantics.py b/netlib/http/semantics.py
index 54bf83d2..e7ae2b5f 100644
--- a/netlib/http/semantics.py
+++ b/netlib/http/semantics.py
@@ -3,9 +3,15 @@ import binascii
import collections
import string
import sys
+import urllib
import urlparse
from .. import utils, odict
+from . import cookies
+from netlib import utils, encoding
+
+HDR_FORM_URLENCODED = "application/x-www-form-urlencoded"
+HDR_FORM_MULTIPART = "multipart/form-data"
CONTENT_MISSING = 0
@@ -75,7 +81,240 @@ class Request(object):
return False
def __repr__(self):
- return "Request(%s - %s, %s)" % (self.method, self.host, self.path)
+ # return "Request(%s - %s, %s)" % (self.method, self.host, self.path)
+
+ return "<HTTPRequest: {0}>".format(
+ self.legacy_first_line()[:-9]
+ )
+
+ def legacy_first_line(self):
+ if self.form_in == "relative":
+ return '%s %s HTTP/%s.%s' % (
+ self.method,
+ self.path,
+ self.httpversion[0],
+ self.httpversion[1],
+ )
+ elif self.form_in == "authority":
+ return '%s %s:%s HTTP/%s.%s' % (
+ self.method,
+ self.host,
+ self.port,
+ self.httpversion[0],
+ self.httpversion[1],
+ )
+ elif self.form_in == "absolute":
+ return '%s %s://%s:%s%s HTTP/%s.%s' % (
+ self.method,
+ self.scheme,
+ self.host,
+ self.port,
+ self.path,
+ self.httpversion[0],
+ self.httpversion[1],
+ )
+ else:
+ raise http.HttpError(400, "Invalid request form")
+
+ def anticache(self):
+ """
+ Modifies this request to remove headers that might produce a cached
+ response. That is, we remove ETags and If-Modified-Since headers.
+ """
+ delheaders = [
+ "if-modified-since",
+ "if-none-match",
+ ]
+ for i in delheaders:
+ del self.headers[i]
+
+ def anticomp(self):
+ """
+ Modifies this request to remove headers that will compress the
+ resource's data.
+ """
+ self.headers["accept-encoding"] = ["identity"]
+
+ def constrain_encoding(self):
+ """
+ Limits the permissible Accept-Encoding values, based on what we can
+ decode appropriately.
+ """
+ if self.headers["accept-encoding"]:
+ self.headers["accept-encoding"] = [
+ ', '.join(
+ e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0])]
+
+ def update_host_header(self):
+ """
+ Update the host header to reflect the current target.
+ """
+ self.headers["Host"] = [self.host]
+
+ def get_form(self):
+ """
+ Retrieves the URL-encoded or multipart form data, returning an ODict object.
+ Returns an empty ODict if there is no data or the content-type
+ indicates non-form data.
+ """
+ if self.body:
+ if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
+ return self.get_form_urlencoded()
+ elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True):
+ return self.get_form_multipart()
+ return odict.ODict([])
+
+ def get_form_urlencoded(self):
+ """
+ Retrieves the URL-encoded form data, returning an ODict object.
+ Returns an empty ODict if there is no data or the content-type
+ indicates non-form data.
+ """
+ if self.body and self.headers.in_any(
+ "content-type",
+ HDR_FORM_URLENCODED,
+ True):
+ return odict.ODict(utils.urldecode(self.body))
+ return odict.ODict([])
+
+ def get_form_multipart(self):
+ if self.body and self.headers.in_any(
+ "content-type",
+ HDR_FORM_MULTIPART,
+ True):
+ return odict.ODict(
+ utils.multipartdecode(
+ self.headers,
+ self.body))
+ return odict.ODict([])
+
+ def set_form_urlencoded(self, odict):
+ """
+ Sets the body to the URL-encoded form data, and adds the
+ appropriate content-type header. Note that this will destory the
+ existing body if there is one.
+ """
+ # FIXME: If there's an existing content-type header indicating a
+ # url-encoded form, leave it alone.
+ self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
+ self.body = utils.urlencode(odict.lst)
+
+ def get_path_components(self):
+ """
+ Returns the path components of the URL as a list of strings.
+
+ Components are unquoted.
+ """
+ _, _, path, _, _, _ = urlparse.urlparse(self.url)
+ return [urllib.unquote(i) for i in path.split("/") if i]
+
+ def set_path_components(self, lst):
+ """
+ Takes a list of strings, and sets the path component of the URL.
+
+ Components are quoted.
+ """
+ lst = [urllib.quote(i, safe="") for i in lst]
+ path = "/" + "/".join(lst)
+ scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url)
+ self.url = urlparse.urlunparse(
+ [scheme, netloc, path, params, query, fragment]
+ )
+
+ def get_query(self):
+ """
+ Gets the request query string. Returns an ODict object.
+ """
+ _, _, _, _, query, _ = urlparse.urlparse(self.url)
+ if query:
+ return odict.ODict(utils.urldecode(query))
+ return odict.ODict([])
+
+ def set_query(self, odict):
+ """
+ Takes an ODict object, and sets the request query string.
+ """
+ scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url)
+ query = utils.urlencode(odict.lst)
+ self.url = urlparse.urlunparse(
+ [scheme, netloc, path, params, query, fragment]
+ )
+
+ def pretty_host(self, hostheader):
+ """
+ Heuristic to get the host of the request.
+
+ Note that pretty_host() does not always return the TCP destination
+ of the request, e.g. if an upstream proxy is in place
+
+ If hostheader is set to True, the Host: header will be used as
+ additional (and preferred) data source. This is handy in
+ transparent mode, where only the IO of the destination is known,
+ but not the resolved name. This is disabled by default, as an
+ attacker may spoof the host header to confuse an analyst.
+ """
+ host = None
+ if hostheader:
+ host = self.headers.get_first("host")
+ if not host:
+ host = self.host
+ if host:
+ try:
+ return host.encode("idna")
+ except ValueError:
+ return host
+ else:
+ return None
+
+ def pretty_url(self, hostheader):
+ if self.form_out == "authority": # upstream proxy mode
+ return "%s:%s" % (self.pretty_host(hostheader), self.port)
+ return utils.unparse_url(self.scheme,
+ self.pretty_host(hostheader),
+ self.port,
+ self.path).encode('ascii')
+
+ def get_cookies(self):
+ """
+ Returns a possibly empty netlib.odict.ODict object.
+ """
+ ret = odict.ODict()
+ for i in self.headers["cookie"]:
+ ret.extend(cookies.parse_cookie_header(i))
+ return ret
+
+ def set_cookies(self, odict):
+ """
+ Takes an netlib.odict.ODict object. Over-writes any existing Cookie
+ headers.
+ """
+ v = cookies.format_cookie_header(odict)
+ self.headers["Cookie"] = [v]
+
+ @property
+ def url(self):
+ """
+ Returns a URL string, constructed from the Request's URL components.
+ """
+ return utils.unparse_url(
+ self.scheme,
+ self.host,
+ self.port,
+ self.path
+ ).encode('ascii')
+
+ @url.setter
+ def url(self, url):
+ """
+ Parses a URL specification, and updates the Request's information
+ accordingly.
+
+ Returns False if the URL was invalid, True if the request succeeded.
+ """
+ parts = utils.parse_url(url)
+ if not parts:
+ raise ValueError("Invalid URL: %s" % url)
+ self.scheme, self.host, self.port, self.path = parts
@property
def content(self):
@@ -139,7 +378,56 @@ class Response(object):
return False
def __repr__(self):
- return "Response(%s - %s)" % (self.status_code, self.msg)
+ # return "Response(%s - %s)" % (self.status_code, self.msg)
+
+ if self.body:
+ size = utils.pretty_size(len(self.body))
+ else:
+ size = "content missing"
+ return "<HTTPResponse: {status_code} {msg} ({contenttype}, {size})>".format(
+ status_code=self.status_code,
+ msg=self.msg,
+ contenttype=self.headers.get_first(
+ "content-type", "unknown content type"
+ ),
+ size=size
+ )
+
+
+ def get_cookies(self):
+ """
+ Get the contents of all Set-Cookie headers.
+
+ Returns a possibly empty ODict, where keys are cookie name strings,
+ and values are [value, attr] lists. Value is a string, and attr is
+ an ODictCaseless containing cookie attributes. Within attrs, unary
+ attributes (e.g. HTTPOnly) are indicated by a Null value.
+ """
+ ret = []
+ for header in self.headers["set-cookie"]:
+ v = cookies.parse_set_cookie_header(header)
+ if v:
+ name, value, attrs = v
+ ret.append([name, [value, attrs]])
+ return odict.ODict(ret)
+
+ def set_cookies(self, odict):
+ """
+ Set the Set-Cookie headers on this response, over-writing existing
+ headers.
+
+ Accepts an ODict of the same format as that returned by get_cookies.
+ """
+ values = []
+ for i in odict.lst:
+ values.append(
+ cookies.format_set_cookie_header(
+ i[0],
+ i[1][0],
+ i[1][1]
+ )
+ )
+ self.headers["Set-Cookie"] = values
@property
def content(self):
@@ -160,77 +448,3 @@ class Response(object):
def code(self, code):
# TODO: remove deprecated setter
self.status_code = code
-
-
-
-def is_valid_port(port):
- if not 0 <= port <= 65535:
- return False
- return True
-
-
-def is_valid_host(host):
- try:
- host.decode("idna")
- except ValueError:
- return False
- if "\0" in host:
- return None
- return True
-
-
-def parse_url(url):
- """
- Returns a (scheme, host, port, path) tuple, or None on error.
-
- Checks that:
- port is an integer 0-65535
- host is a valid IDNA-encoded hostname with no null-bytes
- path is valid ASCII
- """
- try:
- scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
- except ValueError:
- return None
- if not scheme:
- return None
- if '@' in netloc:
- # FIXME: Consider what to do with the discarded credentials here Most
- # probably we should extend the signature to return these as a separate
- # value.
- _, netloc = string.rsplit(netloc, '@', maxsplit=1)
- if ':' in netloc:
- host, port = string.rsplit(netloc, ':', maxsplit=1)
- try:
- port = int(port)
- except ValueError:
- return None
- else:
- host = netloc
- if scheme == "https":
- port = 443
- else:
- port = 80
- path = urlparse.urlunparse(('', '', path, params, query, fragment))
- if not path.startswith("/"):
- path = "/" + path
- if not is_valid_host(host):
- return None
- if not utils.isascii(path):
- return None
- if not is_valid_port(port):
- return None
- return scheme, host, port, path
-
-
-def get_header_tokens(headers, key):
- """
- Retrieve all tokens for a header key. A number of different headers
- follow a pattern where each header line can containe comma-separated
- tokens, and headers can be set multiple times.
- """
- toks = []
- for i in headers[key]:
- for j in i.split(","):
- toks.append(j.strip())
- return toks