diff options
Diffstat (limited to 'netlib/http/semantics.py')
-rw-r--r-- | netlib/http/semantics.py | 297 |
1 files changed, 228 insertions, 69 deletions
diff --git a/netlib/http/semantics.py b/netlib/http/semantics.py index 2b960483..edf5fc07 100644 --- a/netlib/http/semantics.py +++ b/netlib/http/semantics.py @@ -1,8 +1,10 @@ from __future__ import (absolute_import, print_function, division) +import UserDict +import copy import urllib import urlparse -from .. import utils, odict +from .. import odict from . import cookies, exceptions from netlib import utils, encoding @@ -12,8 +14,165 @@ HDR_FORM_MULTIPART = "multipart/form-data" CONTENT_MISSING = 0 -class ProtocolMixin(object): +class Headers(UserDict.DictMixin): + """ + Header class which allows both convenient access to individual headers as well as + direct access to the underlying raw data. Provides a full dictionary interface. + + Example: + + .. code-block:: python + + # Create header from a list of (header_name, header_value) tuples + >>> h = Headers([ + ["Host","example.com"], + ["Accept","text/html"], + ["accept","application/xml"] + ]) + + # Headers mostly behave like a normal dict. + >>> h["Host"] + "example.com" + + # HTTP Headers are case insensitive + >>> h["host"] + "example.com" + + # Multiple headers are folded into a single header as per RFC7230 + >>> h["Accept"] + "text/html, application/xml" + + # Setting a header removes all existing headers with the same name. + >>> h["Accept"] = "application/text" + >>> h["Accept"] + "application/text" + + # str(h) returns a HTTP1 header block. + >>> print(h) + Host: example.com + Accept: application/text + + # For full control, the raw header fields can be accessed + >>> h.fields + + # Headers can also be crated from keyword arguments + >>> h = Headers(host="example.com", content_type="application/xml") + + Caveats: + For use with the "Set-Cookie" header, see :py:meth:`get_all`. + """ + + def __init__(self, fields=None, **headers): + """ + Args: + fields: (optional) list of ``(name, value)`` header tuples, e.g. ``[("Host","example.com")]`` + **headers: Additional headers to set. Will overwrite existing values from `fields`. + For convenience, underscores in header names will be transformed to dashes - + this behaviour does not extend to other methods. + If ``**headers`` contains multiple keys that have equal ``.lower()`` s, + the behavior is undefined. + """ + self.fields = fields or [] + + # content_type -> content-type + headers = { + name.replace("_", "-"): value + for name, value in headers.iteritems() + } + self.update(headers) + + def __str__(self): + return "\r\n".join(": ".join(field) for field in self.fields) + "\r\n" + + def __getitem__(self, name): + values = self.get_all(name) + if not values: + raise KeyError(name) + else: + return ", ".join(values) + + def __setitem__(self, name, value): + idx = self._index(name) + + # To please the human eye, we insert at the same position the first existing header occured. + if idx is not None: + del self[name] + self.fields.insert(idx, [name, value]) + else: + self.fields.append([name, value]) + + def __delitem__(self, name): + if name not in self: + raise KeyError(name) + name = name.lower() + self.fields = [ + field for field in self.fields + if name != field[0].lower() + ] + + def _index(self, name): + name = name.lower() + for i, field in enumerate(self.fields): + if field[0].lower() == name: + return i + return None + + def keys(self): + seen = set() + names = [] + for name, _ in self.fields: + name_lower = name.lower() + if name_lower not in seen: + seen.add(name_lower) + names.append(name) + return names + + def __eq__(self, other): + if isinstance(other, Headers): + return self.fields == other.fields + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def get_all(self, name, default=[]): + """ + Like :py:meth:`get`, but does not fold multiple headers into a single one. + This is useful for Set-Cookie headers, which do not support folding. + + See also: https://tools.ietf.org/html/rfc7230#section-3.2.2 + """ + name = name.lower() + values = [value for n, value in self.fields if n.lower() == name] + return values or default + + def set_all(self, name, values): + """ + Explicitly set multiple headers for the given key. + See: :py:meth:`get_all` + """ + if name in self: + del self[name] + self.fields.extend( + [name, value] for value in values + ) + + def copy(self): + return Headers(copy.copy(self.fields)) + + # Implement the StateObject protocol from mitmproxy + def get_state(self, short=False): + return tuple(tuple(field) for field in self.fields) + + def load_state(self, state): + self.fields = [list(field) for field in state] + + @classmethod + def from_state(cls, state): + return cls([list(field) for field in state]) + +class ProtocolMixin(object): def read_request(self, *args, **kwargs): # pragma: no cover raise NotImplementedError @@ -47,23 +206,23 @@ class Request(object): ] def __init__( - self, - form_in, - method, - scheme, - host, - port, - path, - httpversion, - headers=None, - body=None, - timestamp_start=None, - timestamp_end=None, - form_out=None + self, + form_in, + method, + scheme, + host, + port, + path, + httpversion, + headers=None, + body=None, + timestamp_start=None, + timestamp_end=None, + form_out=None ): if not headers: - headers = odict.ODictCaseless() - assert isinstance(headers, odict.ODictCaseless) + headers = Headers() + assert isinstance(headers, Headers) self.form_in = form_in self.method = method @@ -80,8 +239,10 @@ class Request(object): def __eq__(self, other): try: - self_d = [self.__dict__[k] for k in self.__dict__ if k not in ('timestamp_start', 'timestamp_end')] - other_d = [other.__dict__[k] for k in other.__dict__ if k not in ('timestamp_start', 'timestamp_end')] + self_d = [self.__dict__[k] for k in self.__dict__ if + k not in ('timestamp_start', 'timestamp_end')] + other_d = [other.__dict__[k] for k in other.__dict__ if + k not in ('timestamp_start', 'timestamp_end')] return self_d == other_d except: return False @@ -134,30 +295,35 @@ class Request(object): "if-none-match", ] for i in delheaders: - del self.headers[i] + self.headers.pop(i, None) def anticomp(self): """ Modifies this request to remove headers that will compress the resource's data. """ - self.headers["accept-encoding"] = ["identity"] + self.headers["accept-encoding"] = "identity" def constrain_encoding(self): """ Limits the permissible Accept-Encoding values, based on what we can decode appropriately. """ - if self.headers["accept-encoding"]: - self.headers["accept-encoding"] = [ + accept_encoding = self.headers.get("accept-encoding") + if accept_encoding: + self.headers["accept-encoding"] = ( ', '.join( - e for e in encoding.ENCODINGS if e in self.headers.get_first("accept-encoding"))] + e + for e in encoding.ENCODINGS + if e in accept_encoding + ) + ) def update_host_header(self): """ Update the host header to reflect the current target. """ - self.headers["Host"] = [self.host] + self.headers["Host"] = self.host def get_form(self): """ @@ -166,9 +332,9 @@ class Request(object): indicates non-form data. """ if self.body: - if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): + if HDR_FORM_URLENCODED in self.headers.get("content-type","").lower(): return self.get_form_urlencoded() - elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + elif HDR_FORM_MULTIPART in self.headers.get("content-type","").lower(): return self.get_form_multipart() return odict.ODict([]) @@ -178,18 +344,12 @@ class Request(object): Returns an empty ODict if there is no data or the content-type indicates non-form data. """ - if self.body and self.headers.in_any( - "content-type", - HDR_FORM_URLENCODED, - True): + if self.body and HDR_FORM_URLENCODED in self.headers.get("content-type","").lower(): return odict.ODict(utils.urldecode(self.body)) return odict.ODict([]) def get_form_multipart(self): - if self.body and self.headers.in_any( - "content-type", - HDR_FORM_MULTIPART, - True): + if self.body and HDR_FORM_MULTIPART in self.headers.get("content-type","").lower(): return odict.ODict( utils.multipartdecode( self.headers, @@ -204,7 +364,7 @@ class Request(object): """ # FIXME: If there's an existing content-type header indicating a # url-encoded form, leave it alone. - self.headers["Content-Type"] = [HDR_FORM_URLENCODED] + self.headers["Content-Type"] = HDR_FORM_URLENCODED self.body = utils.urlencode(odict.lst) def get_path_components(self): @@ -263,7 +423,7 @@ class Request(object): """ host = None if hostheader: - host = self.headers.get_first("host") + host = self.headers.get("Host") if not host: host = self.host if host: @@ -287,7 +447,7 @@ class Request(object): Returns a possibly empty netlib.odict.ODict object. """ ret = odict.ODict() - for i in self.headers["cookie"]: + for i in self.headers.get_all("cookie"): ret.extend(cookies.parse_cookie_header(i)) return ret @@ -297,7 +457,7 @@ class Request(object): headers. """ v = cookies.format_cookie_header(odict) - self.headers["Cookie"] = [v] + self.headers["Cookie"] = v @property def url(self): @@ -336,18 +496,17 @@ class Request(object): class EmptyRequest(Request): - def __init__( - self, - form_in="", - method="", - scheme="", - host="", - port="", - path="", - httpversion=(0, 0), - headers=None, - body="" + self, + form_in="", + method="", + scheme="", + host="", + port="", + path="", + httpversion=(0, 0), + headers=None, + body="" ): super(EmptyRequest, self).__init__( form_in=form_in, @@ -357,7 +516,7 @@ class EmptyRequest(Request): port=port, path=path, httpversion=httpversion, - headers=(headers or odict.ODictCaseless()), + headers=headers, body=body, ) @@ -370,19 +529,19 @@ class Response(object): ] def __init__( - self, - httpversion, - status_code, - msg=None, - headers=None, - body=None, - sslinfo=None, - timestamp_start=None, - timestamp_end=None, + self, + httpversion, + status_code, + msg=None, + headers=None, + body=None, + sslinfo=None, + timestamp_start=None, + timestamp_end=None, ): if not headers: - headers = odict.ODictCaseless() - assert isinstance(headers, odict.ODictCaseless) + headers = Headers() + assert isinstance(headers, Headers) self.httpversion = httpversion self.status_code = status_code @@ -395,8 +554,10 @@ class Response(object): def __eq__(self, other): try: - self_d = [self.__dict__[k] for k in self.__dict__ if k not in ('timestamp_start', 'timestamp_end')] - other_d = [other.__dict__[k] for k in other.__dict__ if k not in ('timestamp_start', 'timestamp_end')] + self_d = [self.__dict__[k] for k in self.__dict__ if + k not in ('timestamp_start', 'timestamp_end')] + other_d = [other.__dict__[k] for k in other.__dict__ if + k not in ('timestamp_start', 'timestamp_end')] return self_d == other_d except: return False @@ -412,9 +573,7 @@ class Response(object): return "<Response: {status_code} {msg} ({contenttype}, {size})>".format( status_code=self.status_code, msg=self.msg, - contenttype=self.headers.get_first( - "content-type", - "unknown content type"), + contenttype=self.headers.get("content-type", "unknown content type"), size=size) def get_cookies(self): @@ -427,7 +586,7 @@ class Response(object): attributes (e.g. HTTPOnly) are indicated by a Null value. """ ret = [] - for header in self.headers["set-cookie"]: + for header in self.headers.get_all("set-cookie"): v = cookies.parse_set_cookie_header(header) if v: name, value, attrs = v @@ -450,7 +609,7 @@ class Response(object): i[1][1] ) ) - self.headers["Set-Cookie"] = values + self.headers.set_all("Set-Cookie", values) @property def content(self): # pragma: no cover |