aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/semantics.py
diff options
context:
space:
mode:
Diffstat (limited to 'netlib/http/semantics.py')
-rw-r--r--netlib/http/semantics.py297
1 files changed, 228 insertions, 69 deletions
diff --git a/netlib/http/semantics.py b/netlib/http/semantics.py
index 2b960483..edf5fc07 100644
--- a/netlib/http/semantics.py
+++ b/netlib/http/semantics.py
@@ -1,8 +1,10 @@
from __future__ import (absolute_import, print_function, division)
+import UserDict
+import copy
import urllib
import urlparse
-from .. import utils, odict
+from .. import odict
from . import cookies, exceptions
from netlib import utils, encoding
@@ -12,8 +14,165 @@ HDR_FORM_MULTIPART = "multipart/form-data"
CONTENT_MISSING = 0
-class ProtocolMixin(object):
+class Headers(UserDict.DictMixin):
+ """
+ Header class which allows both convenient access to individual headers as well as
+ direct access to the underlying raw data. Provides a full dictionary interface.
+
+ Example:
+
+ .. code-block:: python
+
+ # Create header from a list of (header_name, header_value) tuples
+ >>> h = Headers([
+ ["Host","example.com"],
+ ["Accept","text/html"],
+ ["accept","application/xml"]
+ ])
+
+ # Headers mostly behave like a normal dict.
+ >>> h["Host"]
+ "example.com"
+
+ # HTTP Headers are case insensitive
+ >>> h["host"]
+ "example.com"
+
+ # Multiple headers are folded into a single header as per RFC7230
+ >>> h["Accept"]
+ "text/html, application/xml"
+
+ # Setting a header removes all existing headers with the same name.
+ >>> h["Accept"] = "application/text"
+ >>> h["Accept"]
+ "application/text"
+
+ # str(h) returns a HTTP1 header block.
+ >>> print(h)
+ Host: example.com
+ Accept: application/text
+
+ # For full control, the raw header fields can be accessed
+ >>> h.fields
+
+ # Headers can also be crated from keyword arguments
+ >>> h = Headers(host="example.com", content_type="application/xml")
+
+ Caveats:
+ For use with the "Set-Cookie" header, see :py:meth:`get_all`.
+ """
+
+ def __init__(self, fields=None, **headers):
+ """
+ Args:
+ fields: (optional) list of ``(name, value)`` header tuples, e.g. ``[("Host","example.com")]``
+ **headers: Additional headers to set. Will overwrite existing values from `fields`.
+ For convenience, underscores in header names will be transformed to dashes -
+ this behaviour does not extend to other methods.
+ If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
+ the behavior is undefined.
+ """
+ self.fields = fields or []
+
+ # content_type -> content-type
+ headers = {
+ name.replace("_", "-"): value
+ for name, value in headers.iteritems()
+ }
+ self.update(headers)
+
+ def __str__(self):
+ return "\r\n".join(": ".join(field) for field in self.fields) + "\r\n"
+
+ def __getitem__(self, name):
+ values = self.get_all(name)
+ if not values:
+ raise KeyError(name)
+ else:
+ return ", ".join(values)
+
+ def __setitem__(self, name, value):
+ idx = self._index(name)
+
+ # To please the human eye, we insert at the same position the first existing header occured.
+ if idx is not None:
+ del self[name]
+ self.fields.insert(idx, [name, value])
+ else:
+ self.fields.append([name, value])
+
+ def __delitem__(self, name):
+ if name not in self:
+ raise KeyError(name)
+ name = name.lower()
+ self.fields = [
+ field for field in self.fields
+ if name != field[0].lower()
+ ]
+
+ def _index(self, name):
+ name = name.lower()
+ for i, field in enumerate(self.fields):
+ if field[0].lower() == name:
+ return i
+ return None
+
+ def keys(self):
+ seen = set()
+ names = []
+ for name, _ in self.fields:
+ name_lower = name.lower()
+ if name_lower not in seen:
+ seen.add(name_lower)
+ names.append(name)
+ return names
+
+ def __eq__(self, other):
+ if isinstance(other, Headers):
+ return self.fields == other.fields
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def get_all(self, name, default=[]):
+ """
+ Like :py:meth:`get`, but does not fold multiple headers into a single one.
+ This is useful for Set-Cookie headers, which do not support folding.
+
+ See also: https://tools.ietf.org/html/rfc7230#section-3.2.2
+ """
+ name = name.lower()
+ values = [value for n, value in self.fields if n.lower() == name]
+ return values or default
+
+ def set_all(self, name, values):
+ """
+ Explicitly set multiple headers for the given key.
+ See: :py:meth:`get_all`
+ """
+ if name in self:
+ del self[name]
+ self.fields.extend(
+ [name, value] for value in values
+ )
+
+ def copy(self):
+ return Headers(copy.copy(self.fields))
+
+ # Implement the StateObject protocol from mitmproxy
+ def get_state(self, short=False):
+ return tuple(tuple(field) for field in self.fields)
+
+ def load_state(self, state):
+ self.fields = [list(field) for field in state]
+
+ @classmethod
+ def from_state(cls, state):
+ return cls([list(field) for field in state])
+
+class ProtocolMixin(object):
def read_request(self, *args, **kwargs): # pragma: no cover
raise NotImplementedError
@@ -47,23 +206,23 @@ class Request(object):
]
def __init__(
- self,
- form_in,
- method,
- scheme,
- host,
- port,
- path,
- httpversion,
- headers=None,
- body=None,
- timestamp_start=None,
- timestamp_end=None,
- form_out=None
+ self,
+ form_in,
+ method,
+ scheme,
+ host,
+ port,
+ path,
+ httpversion,
+ headers=None,
+ body=None,
+ timestamp_start=None,
+ timestamp_end=None,
+ form_out=None
):
if not headers:
- headers = odict.ODictCaseless()
- assert isinstance(headers, odict.ODictCaseless)
+ headers = Headers()
+ assert isinstance(headers, Headers)
self.form_in = form_in
self.method = method
@@ -80,8 +239,10 @@ class Request(object):
def __eq__(self, other):
try:
- self_d = [self.__dict__[k] for k in self.__dict__ if k not in ('timestamp_start', 'timestamp_end')]
- other_d = [other.__dict__[k] for k in other.__dict__ if k not in ('timestamp_start', 'timestamp_end')]
+ self_d = [self.__dict__[k] for k in self.__dict__ if
+ k not in ('timestamp_start', 'timestamp_end')]
+ other_d = [other.__dict__[k] for k in other.__dict__ if
+ k not in ('timestamp_start', 'timestamp_end')]
return self_d == other_d
except:
return False
@@ -134,30 +295,35 @@ class Request(object):
"if-none-match",
]
for i in delheaders:
- del self.headers[i]
+ self.headers.pop(i, None)
def anticomp(self):
"""
Modifies this request to remove headers that will compress the
resource's data.
"""
- self.headers["accept-encoding"] = ["identity"]
+ self.headers["accept-encoding"] = "identity"
def constrain_encoding(self):
"""
Limits the permissible Accept-Encoding values, based on what we can
decode appropriately.
"""
- if self.headers["accept-encoding"]:
- self.headers["accept-encoding"] = [
+ accept_encoding = self.headers.get("accept-encoding")
+ if accept_encoding:
+ self.headers["accept-encoding"] = (
', '.join(
- e for e in encoding.ENCODINGS if e in self.headers.get_first("accept-encoding"))]
+ e
+ for e in encoding.ENCODINGS
+ if e in accept_encoding
+ )
+ )
def update_host_header(self):
"""
Update the host header to reflect the current target.
"""
- self.headers["Host"] = [self.host]
+ self.headers["Host"] = self.host
def get_form(self):
"""
@@ -166,9 +332,9 @@ class Request(object):
indicates non-form data.
"""
if self.body:
- if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
+ if HDR_FORM_URLENCODED in self.headers.get("content-type","").lower():
return self.get_form_urlencoded()
- elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True):
+ elif HDR_FORM_MULTIPART in self.headers.get("content-type","").lower():
return self.get_form_multipart()
return odict.ODict([])
@@ -178,18 +344,12 @@ class Request(object):
Returns an empty ODict if there is no data or the content-type
indicates non-form data.
"""
- if self.body and self.headers.in_any(
- "content-type",
- HDR_FORM_URLENCODED,
- True):
+ if self.body and HDR_FORM_URLENCODED in self.headers.get("content-type","").lower():
return odict.ODict(utils.urldecode(self.body))
return odict.ODict([])
def get_form_multipart(self):
- if self.body and self.headers.in_any(
- "content-type",
- HDR_FORM_MULTIPART,
- True):
+ if self.body and HDR_FORM_MULTIPART in self.headers.get("content-type","").lower():
return odict.ODict(
utils.multipartdecode(
self.headers,
@@ -204,7 +364,7 @@ class Request(object):
"""
# FIXME: If there's an existing content-type header indicating a
# url-encoded form, leave it alone.
- self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
+ self.headers["Content-Type"] = HDR_FORM_URLENCODED
self.body = utils.urlencode(odict.lst)
def get_path_components(self):
@@ -263,7 +423,7 @@ class Request(object):
"""
host = None
if hostheader:
- host = self.headers.get_first("host")
+ host = self.headers.get("Host")
if not host:
host = self.host
if host:
@@ -287,7 +447,7 @@ class Request(object):
Returns a possibly empty netlib.odict.ODict object.
"""
ret = odict.ODict()
- for i in self.headers["cookie"]:
+ for i in self.headers.get_all("cookie"):
ret.extend(cookies.parse_cookie_header(i))
return ret
@@ -297,7 +457,7 @@ class Request(object):
headers.
"""
v = cookies.format_cookie_header(odict)
- self.headers["Cookie"] = [v]
+ self.headers["Cookie"] = v
@property
def url(self):
@@ -336,18 +496,17 @@ class Request(object):
class EmptyRequest(Request):
-
def __init__(
- self,
- form_in="",
- method="",
- scheme="",
- host="",
- port="",
- path="",
- httpversion=(0, 0),
- headers=None,
- body=""
+ self,
+ form_in="",
+ method="",
+ scheme="",
+ host="",
+ port="",
+ path="",
+ httpversion=(0, 0),
+ headers=None,
+ body=""
):
super(EmptyRequest, self).__init__(
form_in=form_in,
@@ -357,7 +516,7 @@ class EmptyRequest(Request):
port=port,
path=path,
httpversion=httpversion,
- headers=(headers or odict.ODictCaseless()),
+ headers=headers,
body=body,
)
@@ -370,19 +529,19 @@ class Response(object):
]
def __init__(
- self,
- httpversion,
- status_code,
- msg=None,
- headers=None,
- body=None,
- sslinfo=None,
- timestamp_start=None,
- timestamp_end=None,
+ self,
+ httpversion,
+ status_code,
+ msg=None,
+ headers=None,
+ body=None,
+ sslinfo=None,
+ timestamp_start=None,
+ timestamp_end=None,
):
if not headers:
- headers = odict.ODictCaseless()
- assert isinstance(headers, odict.ODictCaseless)
+ headers = Headers()
+ assert isinstance(headers, Headers)
self.httpversion = httpversion
self.status_code = status_code
@@ -395,8 +554,10 @@ class Response(object):
def __eq__(self, other):
try:
- self_d = [self.__dict__[k] for k in self.__dict__ if k not in ('timestamp_start', 'timestamp_end')]
- other_d = [other.__dict__[k] for k in other.__dict__ if k not in ('timestamp_start', 'timestamp_end')]
+ self_d = [self.__dict__[k] for k in self.__dict__ if
+ k not in ('timestamp_start', 'timestamp_end')]
+ other_d = [other.__dict__[k] for k in other.__dict__ if
+ k not in ('timestamp_start', 'timestamp_end')]
return self_d == other_d
except:
return False
@@ -412,9 +573,7 @@ class Response(object):
return "<Response: {status_code} {msg} ({contenttype}, {size})>".format(
status_code=self.status_code,
msg=self.msg,
- contenttype=self.headers.get_first(
- "content-type",
- "unknown content type"),
+ contenttype=self.headers.get("content-type", "unknown content type"),
size=size)
def get_cookies(self):
@@ -427,7 +586,7 @@ class Response(object):
attributes (e.g. HTTPOnly) are indicated by a Null value.
"""
ret = []
- for header in self.headers["set-cookie"]:
+ for header in self.headers.get_all("set-cookie"):
v = cookies.parse_set_cookie_header(header)
if v:
name, value, attrs = v
@@ -450,7 +609,7 @@ class Response(object):
i[1][1]
)
)
- self.headers["Set-Cookie"] = values
+ self.headers.set_all("Set-Cookie", values)
@property
def content(self): # pragma: no cover