diff options
-rw-r--r-- | .env | 5 | ||||
-rw-r--r-- | netlib/http_cookies.py | 197 | ||||
-rw-r--r-- | netlib/odict.py | 35 | ||||
-rw-r--r-- | netlib/wsgi.py | 29 | ||||
-rw-r--r-- | test/test_http.py | 11 | ||||
-rw-r--r-- | test/test_http_cookies.py | 220 | ||||
-rw-r--r-- | test/test_odict.py | 30 | ||||
-rw-r--r-- | test/test_wsgi.py | 1 |
8 files changed, 504 insertions, 24 deletions
@@ -0,0 +1,5 @@ +DIR=`dirname $0` +if [ -z "$VIRTUAL_ENV" ] && [ -f $DIR/../venv.mitmproxy/bin/activate ]; then + echo "Activating mitmproxy virtualenv..." + source $DIR/../venv.mitmproxy/bin/activate +fi diff --git a/netlib/http_cookies.py b/netlib/http_cookies.py new file mode 100644 index 00000000..dab95ed0 --- /dev/null +++ b/netlib/http_cookies.py @@ -0,0 +1,197 @@ +""" +A flexible module for cookie parsing and manipulation. + +This module differs from usual standards-compliant cookie modules in a number of +ways. We try to be as permissive as possible, and to retain even mal-formed +information. Duplicate cookies are preserved in parsing, and can be set in +formatting. We do attempt to escape and quote values where needed, but will not +reject data that violate the specs. + +Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do +not parse the comma-separated variant of Set-Cookie that allows multiple cookies +to be set in a single header. Technically this should be feasible, but it turns +out that violations of RFC6265 that makes the parsing problem indeterminate are +much more common than genuine occurences of the multi-cookie variants. +Serialization follows RFC6265. + + http://tools.ietf.org/html/rfc6265 + http://tools.ietf.org/html/rfc2109 + http://tools.ietf.org/html/rfc2965 +""" + +# TODO +# - Disallow LHS-only Cookie values + +import re + +import odict + + +def _read_until(s, start, term): + """ + Read until one of the characters in term is reached. + """ + if start == len(s): + return "", start+1 + for i in range(start, len(s)): + if s[i] in term: + return s[start:i], i + return s[start:i+1], i+1 + + +def _read_token(s, start): + """ + Read a token - the LHS of a token/value pair in a cookie. + """ + return _read_until(s, start, ";=") + + +def _read_quoted_string(s, start): + """ + start: offset to the first quote of the string to be read + + A sort of loose super-set of the various quoted string specifications. + + RFC6265 disallows backslashes or double quotes within quoted strings. + Prior RFCs use backslashes to escape. This leaves us free to apply + backslash escaping by default and be compatible with everything. + """ + escaping = False + ret = [] + # Skip the first quote + for i in range(start+1, len(s)): + if escaping: + ret.append(s[i]) + escaping = False + elif s[i] == '"': + break + elif s[i] == "\\": + escaping = True + pass + else: + ret.append(s[i]) + return "".join(ret), i+1 + + +def _read_value(s, start, delims): + """ + Reads a value - the RHS of a token/value pair in a cookie. + + special: If the value is special, commas are premitted. Else comma + terminates. This helps us support old and new style values. + """ + if start >= len(s): + return "", start + elif s[start] == '"': + return _read_quoted_string(s, start) + else: + return _read_until(s, start, delims) + + +def _read_pairs(s, off=0, specials=()): + """ + Read pairs of lhs=rhs values. + + off: start offset + specials: a lower-cased list of keys that may contain commas + """ + vals = [] + while 1: + lhs, off = _read_token(s, off) + lhs = lhs.lstrip() + if lhs: + rhs = None + if off < len(s): + if s[off] == "=": + rhs, off = _read_value(s, off+1, ";") + vals.append([lhs, rhs]) + off += 1 + if not off < len(s): + break + return vals, off + + +def _has_special(s): + for i in s: + if i in '",;\\': + return True + o = ord(i) + if o < 0x21 or o > 0x7e: + return True + return False + + +ESCAPE = re.compile(r"([\"\\])") + + +def _format_pairs(lst, specials=(), sep="; "): + """ + specials: A lower-cased list of keys that will not be quoted. + """ + vals = [] + for k, v in lst: + if v is None: + vals.append(k) + else: + if k.lower() not in specials and _has_special(v): + v = ESCAPE.sub(r"\\\1", v) + v = '"%s"'%v + vals.append("%s=%s"%(k, v)) + return sep.join(vals) + + +def _format_set_cookie_pairs(lst): + return _format_pairs( + lst, + specials = ("expires", "path") + ) + + +def _parse_set_cookie_pairs(s): + """ + For Set-Cookie, we support multiple cookies as described in RFC2109. + This function therefore returns a list of lists. + """ + pairs, off = _read_pairs( + s, + specials = ("expires", "path") + ) + return pairs + + +def parse_set_cookie_header(str): + """ + Parse a Set-Cookie header value + + Returns a (name, value, attrs) tuple, or None, where attrs is an + ODictCaseless set of attributes. No attempt is made to parse attribute + values - they are treated purely as strings. + """ + pairs = _parse_set_cookie_pairs(str) + if pairs: + return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:]) + + +def format_set_cookie_header(name, value, attrs): + """ + Formats a Set-Cookie header value. + """ + pairs = [[name, value]] + pairs.extend(attrs.lst) + return _format_set_cookie_pairs(pairs) + + +def parse_cookie_header(str): + """ + Parse a Cookie header value. + Returns a (possibly empty) ODict object. + """ + pairs, off = _read_pairs(str) + return odict.ODict(pairs) + + +def format_cookie_header(od): + """ + Formats a Cookie header value. + """ + return _format_pairs(od.lst) diff --git a/netlib/odict.py b/netlib/odict.py index 7a2f611b..dd738c55 100644 --- a/netlib/odict.py +++ b/netlib/odict.py @@ -13,7 +13,8 @@ def safe_subn(pattern, repl, target, *args, **kwargs): class ODict(object): """ - A dictionary-like object for managing ordered (key, value) data. + A dictionary-like object for managing ordered (key, value) data. Think + about it as a convenient interface to a list of (key, value) tuples. """ def __init__(self, lst=None): self.lst = lst or [] @@ -64,11 +65,20 @@ class ODict(object): key, they are cleared. """ if isinstance(valuelist, basestring): - raise ValueError("Expected list of values instead of string. Example: odict['Host'] = ['www.example.com']") - - new = self._filter_lst(k, self.lst) - for i in valuelist: - new.append([k, i]) + raise ValueError( + "Expected list of values instead of string. " + "Example: odict['Host'] = ['www.example.com']" + ) + kc = self._kconv(k) + new = [] + for i in self.lst: + if self._kconv(i[0]) == kc: + if valuelist: + new.append([k, valuelist.pop(0)]) + else: + new.append(i) + while valuelist: + new.append([k, valuelist.pop(0)]) self.lst = new def __delitem__(self, k): @@ -84,7 +94,7 @@ class ODict(object): return False def add(self, key, value): - self.lst.append([key, str(value)]) + self.lst.append([key, value]) def get(self, k, d=None): if k in self: @@ -108,10 +118,19 @@ class ODict(object): lst = copy.deepcopy(self.lst) return self.__class__(lst) + def extend(self, other): + """ + Add the contents of other, preserving any duplicates. + """ + self.lst.extend(other.lst) + def __repr__(self): + return repr(self.lst) + + def format(self): elements = [] for itm in self.lst: - elements.append(itm[0] + ": " + itm[1]) + elements.append(itm[0] + ": " + str(itm[1])) elements.append("") return "\r\n".join(elements) diff --git a/netlib/wsgi.py b/netlib/wsgi.py index bac27d5a..1b979608 100644 --- a/netlib/wsgi.py +++ b/netlib/wsgi.py @@ -1,5 +1,8 @@ from __future__ import (absolute_import, print_function, division) -import cStringIO, urllib, time, traceback +import cStringIO +import urllib +import time +import traceback from . import odict, tcp @@ -23,15 +26,18 @@ class Request(object): def date_time_string(): """Return the current date and time formatted for a message header.""" WEEKS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - MONTHS = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + MONTHS = [ + None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ] now = time.time() year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now) s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( - WEEKS[wd], - day, MONTHS[month], year, - hh, mm, ss) + WEEKS[wd], + day, MONTHS[month], year, + hh, mm, ss + ) return s @@ -100,6 +106,7 @@ class WSGIAdaptor(object): status = None, headers = None ) + def write(data): if not state["headers_sent"]: soc.write("HTTP/1.1 %s\r\n"%state["status"]) @@ -108,7 +115,7 @@ class WSGIAdaptor(object): h["Server"] = [self.sversion] if 'date' not in h: h["Date"] = [date_time_string()] - soc.write(str(h)) + soc.write(h.format()) soc.write("\r\n") state["headers_sent"] = True if data: @@ -130,7 +137,9 @@ class WSGIAdaptor(object): errs = cStringIO.StringIO() try: - dataiter = self.app(self.make_environ(request, errs, **env), start_response) + dataiter = self.app( + self.make_environ(request, errs, **env), start_response + ) for i in dataiter: write(i) if not state["headers_sent"]: @@ -143,5 +152,3 @@ class WSGIAdaptor(object): except Exception: # pragma: no cover pass return errs.getvalue() - - diff --git a/test/test_http.py b/test/test_http.py index fed60946..b1c62458 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -53,6 +53,7 @@ def test_connection_close(): h["connection"] = ["close"] assert http.connection_close((1, 1), h) + def test_get_header_tokens(): h = odict.ODictCaseless() assert http.get_header_tokens(h, "foo") == [] @@ -69,11 +70,13 @@ def test_read_http_body_request(): r = cStringIO.StringIO("testing") assert http.read_http_body(r, h, None, "GET", None, True) == "" + def test_read_http_body_response(): h = odict.ODictCaseless() s = cStringIO.StringIO("testing") assert http.read_http_body(s, h, None, "GET", 200, False) == "testing" + def test_read_http_body(): # test default case h = odict.ODictCaseless() @@ -115,6 +118,7 @@ def test_read_http_body(): s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n") assert http.read_http_body(s, h, 100, "GET", 200, False) == "aaaaa" + def test_expected_http_body_size(): # gibber in the content-length field h = odict.ODictCaseless() @@ -135,6 +139,7 @@ def test_expected_http_body_size(): h = odict.ODictCaseless() assert http.expected_http_body_size(h, True, "GET", None) == 0 + def test_parse_http_protocol(): assert http.parse_http_protocol("HTTP/1.1") == (1, 1) assert http.parse_http_protocol("HTTP/0.0") == (0, 0) @@ -189,6 +194,7 @@ def test_parse_init_http(): assert not http.parse_init_http("GET /test foo/1.1") assert not http.parse_init_http("GET /test\xc0 HTTP/1.1") + class TestReadHeaders: def _read(self, data, verbatim=False): if not verbatim: @@ -251,11 +257,12 @@ class TestReadResponseNoContentLength(test.ServerTestBase): httpversion, code, msg, headers, content = http.read_response(c.rfile, "GET", None) assert content == "bar\r\n\r\n" + def test_read_response(): def tst(data, method, limit, include_body=True): data = textwrap.dedent(data) r = cStringIO.StringIO(data) - return http.read_response(r, method, limit, include_body=include_body) + return http.read_response(r, method, limit, include_body = include_body) tutils.raises("server disconnect", tst, "", "GET", None) tutils.raises("invalid server response", tst, "foo", "GET", None) @@ -351,6 +358,7 @@ def test_parse_url(): # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt assert not http.parse_url('http://lo[calhost') + def test_parse_http_basic_auth(): vals = ("basic", "foo", "bar") assert http.parse_http_basic_auth(http.assemble_http_basic_auth(*vals)) == vals @@ -358,4 +366,3 @@ def test_parse_http_basic_auth(): assert not http.parse_http_basic_auth("foo bar") v = "basic " + binascii.b2a_base64("foo") assert not http.parse_http_basic_auth(v) - diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py new file mode 100644 index 00000000..7438af7c --- /dev/null +++ b/test/test_http_cookies.py @@ -0,0 +1,220 @@ +import pprint +import nose.tools + +from netlib import http_cookies, odict + + +def test_read_token(): + tokens = [ + [("foo", 0), ("foo", 3)], + [("foo", 1), ("oo", 3)], + [(" foo", 1), ("foo", 4)], + [(" foo;", 1), ("foo", 4)], + [(" foo=", 1), ("foo", 4)], + [(" foo=bar", 1), ("foo", 4)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_token(*q), a) + + +def test_read_quoted_string(): + tokens = [ + [('"foo" x', 0), ("foo", 5)], + [('"f\oo" x', 0), ("foo", 6)], + [(r'"f\\o" x', 0), (r"f\o", 6)], + [(r'"f\\" x', 0), (r"f" + '\\', 5)], + [('"fo\\\"" x', 0), ("fo\"", 6)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_quoted_string(*q), a) + + +def test_read_pairs(): + vals = [ + [ + "one", + [["one", None]] + ], + [ + "one=two", + [["one", "two"]] + ], + [ + "one=", + [["one", ""]] + ], + [ + 'one="two"', + [["one", "two"]] + ], + [ + 'one="two"; three=four', + [["one", "two"], ["three", "four"]] + ], + [ + 'one="two"; three=four; five', + [["one", "two"], ["three", "four"], ["five", None]] + ], + [ + 'one="\\"two"; three=four', + [["one", '"two'], ["three", "four"]] + ], + ] + for s, lst in vals: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + + +def test_pairs_roundtrips(): + pairs = [ + [ + "", + [] + ], + [ + "one=uno", + [["one", "uno"]] + ], + [ + "one", + [["one", None]] + ], + [ + "one=uno; two=due", + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="uno"; two="\due"', + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="un\\"o"', + [["one", 'un"o']] + ], + [ + 'one="uno,due"', + [["one", 'uno,due']] + ], + [ + "one=uno; two; three=tre", + [["one", "uno"], ["two", None], ["three", "tre"]] + ], + [ + "_lvs2=zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g=; " + "_rcc2=53VdltWl+Ov6ordflA==;", + [ + ["_lvs2", "zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g="], + ["_rcc2", "53VdltWl+Ov6ordflA=="] + ] + ] + ] + for s, lst in pairs: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + s2 = http_cookies._format_pairs(lst) + ret, off = http_cookies._read_pairs(s2) + nose.tools.eq_(ret, lst) + + +def test_cookie_roundtrips(): + pairs = [ + [ + "one=uno", + [["one", "uno"]] + ], + [ + "one=uno; two=due", + [["one", "uno"], ["two", "due"]] + ], + ] + for s, lst in pairs: + ret = http_cookies.parse_cookie_header(s) + nose.tools.eq_(ret.lst, lst) + s2 = http_cookies.format_cookie_header(ret) + ret = http_cookies.parse_cookie_header(s2) + nose.tools.eq_(ret.lst, lst) + + +def test_parse_set_cookie_pairs(): + pairs = [ + [ + "one=uno", + [ + ["one", "uno"] + ] + ], + [ + "one=un\x20", + [ + ["one", "un\x20"] + ] + ], + [ + "one=uno; foo", + [ + ["one", "uno"], + ["foo", None] + ] + ], + [ + "mun=1.390.f60; " + "expires=sun, 11-oct-2015 12:38:31 gmt; path=/; " + "domain=b.aol.com", + [ + ["mun", "1.390.f60"], + ["expires", "sun, 11-oct-2015 12:38:31 gmt"], + ["path", "/"], + ["domain", "b.aol.com"] + ] + ], + [ + r'rpb=190%3d1%2616726%3d1%2634832%3d1%2634874%3d1; ' + 'domain=.rubiconproject.com; ' + 'expires=mon, 11-may-2015 21:54:57 gmt; ' + 'path=/', + [ + ['rpb', r'190%3d1%2616726%3d1%2634832%3d1%2634874%3d1'], + ['domain', '.rubiconproject.com'], + ['expires', 'mon, 11-may-2015 21:54:57 gmt'], + ['path', '/'] + ] + ], + ] + for s, lst in pairs: + ret = http_cookies._parse_set_cookie_pairs(s) + nose.tools.eq_(ret, lst) + s2 = http_cookies._format_set_cookie_pairs(ret) + ret2 = http_cookies._parse_set_cookie_pairs(s2) + nose.tools.eq_(ret2, lst) + + +def test_parse_set_cookie_header(): + vals = [ + [ + "", None + ], + [ + ";", None + ], + [ + "one=uno", + ("one", "uno", []) + ], + [ + "one=uno; foo=bar", + ("one", "uno", [["foo", "bar"]]) + ] + ] + for s, expected in vals: + ret = http_cookies.parse_set_cookie_header(s) + if expected: + assert ret[0] == expected[0] + assert ret[1] == expected[1] + nose.tools.eq_(ret[2].lst, expected[2]) + s2 = http_cookies.format_set_cookie_header(*ret) + ret2 = http_cookies.parse_set_cookie_header(s2) + assert ret2[0] == expected[0] + assert ret2[1] == expected[1] + nose.tools.eq_(ret2[2].lst, expected[2]) + else: + assert ret is None diff --git a/test/test_odict.py b/test/test_odict.py index d90bc6e5..c01c4dbe 100644 --- a/test/test_odict.py +++ b/test/test_odict.py @@ -6,6 +6,11 @@ class TestODict: def setUp(self): self.od = odict.ODict() + def test_repr(self): + h = odict.ODict() + h["one"] = ["two"] + assert repr(h) + def test_str_err(self): h = odict.ODict() tutils.raises(ValueError, h.__setitem__, "key", "foo") @@ -20,7 +25,7 @@ class TestODict: "two: tre\r\n", "\r\n" ] - out = repr(self.od) + out = self.od.format() for i in expected: assert out.find(i) >= 0 @@ -39,7 +44,7 @@ class TestODict: self.od["one"] = ["uno"] expected1 = "one: uno\r\n" expected2 = "\r\n" - out = repr(self.od) + out = self.od.format() assert out.find(expected1) >= 0 assert out.find(expected2) >= 0 @@ -109,6 +114,12 @@ class TestODict: assert self.od.get_first("one") == "two" assert self.od.get_first("two") == None + def test_extend(self): + a = odict.ODict([["a", "b"], ["c", "d"]]) + b = odict.ODict([["a", "b"], ["e", "f"]]) + a.extend(b) + assert len(a) == 4 + assert a["a"] == ["b", "b"] class TestODictCaseless: def setUp(self): @@ -145,3 +156,18 @@ class TestODictCaseless: self.od.add("bar", 2) assert len(self.od.keys()) == 2 + def test_add_order(self): + od = odict.ODict( + [ + ["one", "uno"], + ["two", "due"], + ["three", "tre"], + ] + ) + od["two"] = ["foo", "bar"] + assert od.lst == [ + ["one", "uno"], + ["two", "foo"], + ["three", "tre"], + ["two", "bar"], + ] diff --git a/test/test_wsgi.py b/test/test_wsgi.py index 6e1fb146..1c8c5263 100644 --- a/test/test_wsgi.py +++ b/test/test_wsgi.py @@ -100,4 +100,3 @@ class TestWSGI: start_response(status, response_headers, ei) yield "bbb" assert "Internal Server Error" in self._serve(app) - |