8 files changed, 504 insertions, 24 deletions
diff --git a/.env b/.env
new file mode 100644
index 00000000..7f847e29
--- /dev/null
+++ b/.env
@@ -0,0 +1,5 @@
+DIR=`dirname $0`
+if [ -z "$VIRTUAL_ENV" ] && [ -f $DIR/../venv.mitmproxy/bin/activate ]; then
+    echo "Activating mitmproxy virtualenv..."
+    source $DIR/../venv.mitmproxy/bin/activate
+fi
diff --git a/netlib/http_cookies.py b/netlib/http_cookies.py
new file mode 100644
index 00000000..dab95ed0
--- /dev/null
+++ b/netlib/http_cookies.py
@@ -0,0 +1,197 @@
+"""
+A flexible module for cookie parsing and manipulation.
+
+This module differs from usual standards-compliant cookie modules in a number of
+ways. We try to be as permissive as possible, and to retain even mal-formed
+information. Duplicate cookies are preserved in parsing, and can be set in
+formatting. We do attempt to escape and quote values where needed, but will not
+reject data that violate the specs.
+
+Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do
+not parse the comma-separated variant of Set-Cookie that allows multiple cookies
+to be set in a single header. Technically this should be feasible, but it turns
+out that violations of RFC6265 that makes the parsing problem indeterminate are
+much more common than genuine occurences of the multi-cookie variants.
+Serialization follows RFC6265.
+
+    http://tools.ietf.org/html/rfc6265
+    http://tools.ietf.org/html/rfc2109
+    http://tools.ietf.org/html/rfc2965
+"""
+
+# TODO
+# - Disallow LHS-only Cookie values
+
+import re
+
+import odict
+
+
+def _read_until(s, start, term):
+    """
+        Read until one of the characters in term is reached.
+    """
+    if start == len(s):
+        return "", start+1
+    for i in range(start, len(s)):
+        if s[i] in term:
+            return s[start:i], i
+    return s[start:i+1], i+1
+
+
+def _read_token(s, start):
+    """
+        Read a token - the LHS of a token/value pair in a cookie.
+    """
+    return _read_until(s, start, ";=")
+
+
+def _read_quoted_string(s, start):
+    """
+        start: offset to the first quote of the string to be read
+
+        A sort of loose super-set of the various quoted string specifications.
+
+        RFC6265 disallows backslashes or double quotes within quoted strings.
+        Prior RFCs use backslashes to escape. This leaves us free to apply
+        backslash escaping by default and be compatible with everything.
+    """
+    escaping = False
+    ret = []
+    # Skip the first quote
+    for i in range(start+1, len(s)):
+        if escaping:
+            ret.append(s[i])
+            escaping = False
+        elif s[i] == '"':
+            break
+        elif s[i] == "\\":
+            escaping = True
+            pass
+        else:
+            ret.append(s[i])
+    return "".join(ret), i+1
+
+
+def _read_value(s, start, delims):
+    """
+        Reads a value - the RHS of a token/value pair in a cookie.
+
+        special: If the value is special, commas are premitted. Else comma
+        terminates. This helps us support old and new style values.
+    """
+    if start >= len(s):
+        return "", start
+    elif s[start] == '"':
+        return _read_quoted_string(s, start)
+    else:
+        return _read_until(s, start, delims)
+
+
+def _read_pairs(s, off=0, specials=()):
+    """
+        Read pairs of lhs=rhs values.
+
+        off: start offset
+        specials: a lower-cased list of keys that may contain commas
+    """
+    vals = []
+    while 1:
+        lhs, off = _read_token(s, off)
+        lhs = lhs.lstrip()
+        if lhs:
+            rhs = None
+            if off < len(s):
+                if s[off] == "=":
+                    rhs, off = _read_value(s, off+1, ";")
+            vals.append([lhs, rhs])
+        off += 1
+        if not off < len(s):
+            break
+    return vals, off
+
+
+def _has_special(s):
+    for i in s:
+        if i in '",;\\':
+            return True
+        o = ord(i)
+        if o < 0x21 or o > 0x7e:
+            return True
+    return False
+
+
+ESCAPE = re.compile(r"([\"\\])")
+
+
+def _format_pairs(lst, specials=(), sep="; "):
+    """
+        specials: A lower-cased list of keys that will not be quoted.
+    """
+    vals = []
+    for k, v in lst:
+        if v is None:
+            vals.append(k)
+        else:
+            if k.lower() not in specials and _has_special(v):
+                v = ESCAPE.sub(r"\\\1", v)
+                v = '"%s"'%v
+            vals.append("%s=%s"%(k, v))
+    return sep.join(vals)
+
+
+def _format_set_cookie_pairs(lst):
+    return _format_pairs(
+        lst,
+        specials = ("expires", "path")
+    )
+
+
+def _parse_set_cookie_pairs(s):
+    """
+        For Set-Cookie, we support multiple cookies as described in RFC2109.
+        This function therefore returns a list of lists.
+    """
+    pairs, off = _read_pairs(
+        s,
+        specials = ("expires", "path")
+    )
+    return pairs
+
+
+def parse_set_cookie_header(str):
+    """
+        Parse a Set-Cookie header value
+
+        Returns a (name, value, attrs) tuple, or None, where attrs is an
+        ODictCaseless set of attributes. No attempt is made to parse attribute
+        values - they are treated purely as strings.
+    """
+    pairs = _parse_set_cookie_pairs(str)
+    if pairs:
+        return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:])
+
+
+def format_set_cookie_header(name, value, attrs):
+    """
+        Formats a Set-Cookie header value.
+    """
+    pairs = [[name, value]]
+    pairs.extend(attrs.lst)
+    return _format_set_cookie_pairs(pairs)
+
+
+def parse_cookie_header(str):
+    """
+        Parse a Cookie header value.
+        Returns a (possibly empty) ODict object.
+    """
+    pairs, off = _read_pairs(str)
+    return odict.ODict(pairs)
+
+
+def format_cookie_header(od):
+    """
+        Formats a Cookie header value.
+    """
+    return _format_pairs(od.lst)
diff --git a/netlib/odict.py b/netlib/odict.py
index 7a2f611b..dd738c55 100644
--- a/netlib/odict.py
+++ b/netlib/odict.py
@@ -13,7 +13,8 @@ def safe_subn(pattern, repl, target, *args, **kwargs):
 
 class ODict(object):
     """
-        A dictionary-like object for managing ordered (key, value) data.
+        A dictionary-like object for managing ordered (key, value) data. Think
+        about it as a convenient interface to a list of (key, value) tuples.
     """
     def __init__(self, lst=None):
         self.lst = lst or []
@@ -64,11 +65,20 @@ class ODict(object):
             key, they are cleared.
         """
         if isinstance(valuelist, basestring):
-            raise ValueError("Expected list of values instead of string. Example: odict['Host'] = ['www.example.com']")
-
-        new = self._filter_lst(k, self.lst)
-        for i in valuelist:
-            new.append([k, i])
+            raise ValueError(
+                "Expected list of values instead of string. "
+                "Example: odict['Host'] = ['www.example.com']"
+            )
+        kc = self._kconv(k)
+        new = []
+        for i in self.lst:
+            if self._kconv(i[0]) == kc:
+                if valuelist:
+                    new.append([k, valuelist.pop(0)])
+            else:
+                new.append(i)
+        while valuelist:
+            new.append([k, valuelist.pop(0)])
         self.lst = new
 
     def __delitem__(self, k):
@@ -84,7 +94,7 @@ class ODict(object):
         return False
 
     def add(self, key, value):
-        self.lst.append([key, str(value)])
+        self.lst.append([key, value])
 
     def get(self, k, d=None):
         if k in self:
@@ -108,10 +118,19 @@ class ODict(object):
         lst = copy.deepcopy(self.lst)
         return self.__class__(lst)
 
+    def extend(self, other):
+        """
+            Add the contents of other, preserving any duplicates.
+        """
+        self.lst.extend(other.lst)
+
     def __repr__(self):
+        return repr(self.lst)
+
+    def format(self):
         elements = []
         for itm in self.lst:
-            elements.append(itm[0] + ": " + itm[1])
+            elements.append(itm[0] + ": " + str(itm[1]))
         elements.append("")
         return "\r\n".join(elements)
 
diff --git a/netlib/wsgi.py b/netlib/wsgi.py
index bac27d5a..1b979608 100644
--- a/netlib/wsgi.py
+++ b/netlib/wsgi.py
@@ -1,5 +1,8 @@
 from __future__ import (absolute_import, print_function, division)
-import cStringIO, urllib, time, traceback
+import cStringIO
+import urllib
+import time
+import traceback
 from . import odict, tcp
 
 
@@ -23,15 +26,18 @@ class Request(object):
 def date_time_string():
     """Return the current date and time formatted for a message header."""
     WEEKS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
-    MONTHS = [None,
-                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
-                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+    MONTHS = [
+        None,
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    ]
     now = time.time()
     year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
     s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
-            WEEKS[wd],
-            day, MONTHS[month], year,
-            hh, mm, ss)
+        WEEKS[wd],
+        day, MONTHS[month], year,
+        hh, mm, ss
+    )
     return s
 
 
@@ -100,6 +106,7 @@ class WSGIAdaptor(object):
             status = None,
             headers = None
         )
+
         def write(data):
             if not state["headers_sent"]:
                 soc.write("HTTP/1.1 %s\r\n"%state["status"])
@@ -108,7 +115,7 @@ class WSGIAdaptor(object):
                     h["Server"] = [self.sversion]
                 if 'date' not in h:
                     h["Date"] = [date_time_string()]
-                soc.write(str(h))
+                soc.write(h.format())
                 soc.write("\r\n")
                 state["headers_sent"] = True
             if data:
@@ -130,7 +137,9 @@ class WSGIAdaptor(object):
 
         errs = cStringIO.StringIO()
         try:
-            dataiter = self.app(self.make_environ(request, errs, **env), start_response)
+            dataiter = self.app(
+                self.make_environ(request, errs, **env), start_response
+            )
             for i in dataiter:
                 write(i)
             if not state["headers_sent"]:
@@ -143,5 +152,3 @@ class WSGIAdaptor(object):
             except Exception:    # pragma: no cover
                 pass
         return errs.getvalue()
-
-
diff --git a/test/test_http.py b/test/test_http.py
index fed60946..b1c62458 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -53,6 +53,7 @@ def test_connection_close():
     h["connection"] = ["close"]
     assert http.connection_close((1, 1), h)
 
+
 def test_get_header_tokens():
     h = odict.ODictCaseless()
     assert http.get_header_tokens(h, "foo") == []
@@ -69,11 +70,13 @@ def test_read_http_body_request():
     r = cStringIO.StringIO("testing")
     assert http.read_http_body(r, h, None, "GET", None, True) == ""
 
+
 def test_read_http_body_response():
     h = odict.ODictCaseless()
     s = cStringIO.StringIO("testing")
     assert http.read_http_body(s, h, None, "GET", 200, False) == "testing"
 
+
 def test_read_http_body():
     # test default case
     h = odict.ODictCaseless()
@@ -115,6 +118,7 @@ def test_read_http_body():
     s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n")
     assert http.read_http_body(s, h, 100, "GET", 200, False) == "aaaaa"
 
+
 def test_expected_http_body_size():
     # gibber in the content-length field
     h = odict.ODictCaseless()
@@ -135,6 +139,7 @@ def test_expected_http_body_size():
     h = odict.ODictCaseless()
     assert http.expected_http_body_size(h, True, "GET", None) == 0
 
+
 def test_parse_http_protocol():
     assert http.parse_http_protocol("HTTP/1.1") == (1, 1)
     assert http.parse_http_protocol("HTTP/0.0") == (0, 0)
@@ -189,6 +194,7 @@ def test_parse_init_http():
     assert not http.parse_init_http("GET /test foo/1.1")
     assert not http.parse_init_http("GET /test\xc0 HTTP/1.1")
 
+
 class TestReadHeaders:
     def _read(self, data, verbatim=False):
         if not verbatim:
@@ -251,11 +257,12 @@ class TestReadResponseNoContentLength(test.ServerTestBase):
         httpversion, code, msg, headers, content = http.read_response(c.rfile, "GET", None)
         assert content == "bar\r\n\r\n"
 
+
 def test_read_response():
     def tst(data, method, limit, include_body=True):
         data = textwrap.dedent(data)
         r = cStringIO.StringIO(data)
-        return  http.read_response(r, method, limit, include_body=include_body)
+        return http.read_response(r, method, limit, include_body = include_body)
 
     tutils.raises("server disconnect", tst, "", "GET", None)
     tutils.raises("invalid server response", tst, "foo", "GET", None)
@@ -351,6 +358,7 @@ def test_parse_url():
     # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
     assert not http.parse_url('http://lo[calhost')
 
+
 def test_parse_http_basic_auth():
     vals = ("basic", "foo", "bar")
     assert http.parse_http_basic_auth(http.assemble_http_basic_auth(*vals)) == vals
@@ -358,4 +366,3 @@ def test_parse_http_basic_auth():
     assert not http.parse_http_basic_auth("foo bar")
     v = "basic " + binascii.b2a_base64("foo")
     assert not http.parse_http_basic_auth(v)
-
diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py
new file mode 100644
index 00000000..7438af7c
--- /dev/null
+++ b/test/test_http_cookies.py
@@ -0,0 +1,220 @@
+import pprint
+import nose.tools
+
+from netlib import http_cookies, odict
+
+
+def test_read_token():
+    tokens = [
+        [("foo", 0), ("foo", 3)],
+        [("foo", 1), ("oo", 3)],
+        [(" foo", 1), ("foo", 4)],
+        [(" foo;", 1), ("foo", 4)],
+        [(" foo=", 1), ("foo", 4)],
+        [(" foo=bar", 1), ("foo", 4)],
+    ]
+    for q, a in tokens:
+        nose.tools.eq_(http_cookies._read_token(*q), a)
+
+
+def test_read_quoted_string():
+    tokens = [
+        [('"foo" x', 0), ("foo", 5)],
+        [('"f\oo" x', 0), ("foo", 6)],
+        [(r'"f\\o" x', 0), (r"f\o", 6)],
+        [(r'"f\\" x', 0), (r"f" + '\\', 5)],
+        [('"fo\\\"" x', 0), ("fo\"", 6)],
+    ]
+    for q, a in tokens:
+        nose.tools.eq_(http_cookies._read_quoted_string(*q), a)
+
+
+def test_read_pairs():
+    vals = [
+        [
+            "one",
+            [["one", None]]
+        ],
+        [
+            "one=two",
+            [["one", "two"]]
+        ],
+        [
+            "one=",
+            [["one", ""]]
+        ],
+        [
+            'one="two"',
+            [["one", "two"]]
+        ],
+        [
+            'one="two"; three=four',
+            [["one", "two"], ["three", "four"]]
+        ],
+        [
+            'one="two"; three=four; five',
+            [["one", "two"], ["three", "four"], ["five", None]]
+        ],
+        [
+            'one="\\"two"; three=four',
+            [["one", '"two'], ["three", "four"]]
+        ],
+    ]
+    for s, lst in vals:
+        ret, off = http_cookies._read_pairs(s)
+        nose.tools.eq_(ret, lst)
+
+
+def test_pairs_roundtrips():
+    pairs = [
+        [
+            "",
+            []
+        ],
+        [
+            "one=uno",
+            [["one", "uno"]]
+        ],
+        [
+            "one",
+            [["one", None]]
+        ],
+        [
+            "one=uno; two=due",
+            [["one", "uno"], ["two", "due"]]
+        ],
+        [
+            'one="uno"; two="\due"',
+            [["one", "uno"], ["two", "due"]]
+        ],
+        [
+            'one="un\\"o"',
+            [["one", 'un"o']]
+        ],
+        [
+            'one="uno,due"',
+            [["one", 'uno,due']]
+        ],
+        [
+            "one=uno; two; three=tre",
+            [["one", "uno"], ["two", None], ["three", "tre"]]
+        ],
+        [
+            "_lvs2=zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g=; "
+            "_rcc2=53VdltWl+Ov6ordflA==;",
+            [
+                ["_lvs2", "zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g="],
+                ["_rcc2", "53VdltWl+Ov6ordflA=="]
+            ]
+        ]
+    ]
+    for s, lst in pairs:
+        ret, off = http_cookies._read_pairs(s)
+        nose.tools.eq_(ret, lst)
+        s2 = http_cookies._format_pairs(lst)
+        ret, off = http_cookies._read_pairs(s2)
+        nose.tools.eq_(ret, lst)
+
+
+def test_cookie_roundtrips():
+    pairs = [
+        [
+            "one=uno",
+            [["one", "uno"]]
+        ],
+        [
+            "one=uno; two=due",
+            [["one", "uno"], ["two", "due"]]
+        ],
+    ]
+    for s, lst in pairs:
+        ret = http_cookies.parse_cookie_header(s)
+        nose.tools.eq_(ret.lst, lst)
+        s2 = http_cookies.format_cookie_header(ret)
+        ret = http_cookies.parse_cookie_header(s2)
+        nose.tools.eq_(ret.lst, lst)
+
+
+def test_parse_set_cookie_pairs():
+    pairs = [
+        [
+            "one=uno",
+            [
+                ["one", "uno"]
+            ]
+        ],
+        [
+            "one=un\x20",
+            [
+                ["one", "un\x20"]
+            ]
+        ],
+        [
+            "one=uno; foo",
+            [
+                ["one", "uno"],
+                ["foo", None]
+            ]
+        ],
+        [
+            "mun=1.390.f60; "
+            "expires=sun, 11-oct-2015 12:38:31 gmt; path=/; "
+            "domain=b.aol.com",
+            [
+                ["mun", "1.390.f60"],
+                ["expires", "sun, 11-oct-2015 12:38:31 gmt"],
+                ["path", "/"],
+                ["domain", "b.aol.com"]
+            ]
+        ],
+        [
+            r'rpb=190%3d1%2616726%3d1%2634832%3d1%2634874%3d1; '
+            'domain=.rubiconproject.com; '
+            'expires=mon, 11-may-2015 21:54:57 gmt; '
+            'path=/',
+            [
+                ['rpb', r'190%3d1%2616726%3d1%2634832%3d1%2634874%3d1'],
+                ['domain', '.rubiconproject.com'],
+                ['expires', 'mon, 11-may-2015 21:54:57 gmt'],
+                ['path', '/']
+            ]
+        ],
+    ]
+    for s, lst in pairs:
+        ret = http_cookies._parse_set_cookie_pairs(s)
+        nose.tools.eq_(ret, lst)
+        s2 = http_cookies._format_set_cookie_pairs(ret)
+        ret2 = http_cookies._parse_set_cookie_pairs(s2)
+        nose.tools.eq_(ret2, lst)
+
+
+def test_parse_set_cookie_header():
+    vals = [
+        [
+            "", None
+        ],
+        [
+            ";", None
+        ],
+        [
+            "one=uno",
+            ("one", "uno", [])
+        ],
+        [
+            "one=uno; foo=bar",
+            ("one", "uno", [["foo", "bar"]])
+        ]
+    ]
+    for s, expected in vals:
+        ret = http_cookies.parse_set_cookie_header(s)
+        if expected:
+            assert ret[0] == expected[0]
+            assert ret[1] == expected[1]
+            nose.tools.eq_(ret[2].lst, expected[2])
+            s2 = http_cookies.format_set_cookie_header(*ret)
+            ret2 = http_cookies.parse_set_cookie_header(s2)
+            assert ret2[0] == expected[0]
+            assert ret2[1] == expected[1]
+            nose.tools.eq_(ret2[2].lst, expected[2])
+        else:
+            assert ret is None
diff --git a/test/test_odict.py b/test/test_odict.py
index d90bc6e5..c01c4dbe 100644
--- a/test/test_odict.py
+++ b/test/test_odict.py
@@ -6,6 +6,11 @@ class TestODict:
     def setUp(self):
         self.od = odict.ODict()
 
+    def test_repr(self):
+        h = odict.ODict()
+        h["one"] = ["two"]
+        assert repr(h)
+
     def test_str_err(self):
         h = odict.ODict()
         tutils.raises(ValueError, h.__setitem__, "key", "foo")
@@ -20,7 +25,7 @@ class TestODict:
             "two: tre\r\n",
             "\r\n"
         ]
-        out = repr(self.od)
+        out = self.od.format()
         for i in expected:
             assert out.find(i) >= 0
 
@@ -39,7 +44,7 @@ class TestODict:
         self.od["one"] = ["uno"]
         expected1 = "one: uno\r\n"
         expected2 = "\r\n"
-        out = repr(self.od)
+        out = self.od.format()
         assert out.find(expected1) >= 0
         assert out.find(expected2) >= 0
 
@@ -109,6 +114,12 @@ class TestODict:
         assert self.od.get_first("one") == "two"
         assert self.od.get_first("two") == None
 
+    def test_extend(self):
+        a = odict.ODict([["a", "b"], ["c", "d"]])
+        b = odict.ODict([["a", "b"], ["e", "f"]])
+        a.extend(b)
+        assert len(a) == 4
+        assert a["a"] == ["b", "b"]
 
 class TestODictCaseless:
     def setUp(self):
@@ -145,3 +156,18 @@ class TestODictCaseless:
         self.od.add("bar", 2)
         assert len(self.od.keys()) == 2
 
+    def test_add_order(self):
+        od = odict.ODict(
+            [
+                ["one", "uno"],
+                ["two", "due"],
+                ["three", "tre"],
+            ]
+        )
+        od["two"] = ["foo", "bar"]
+        assert od.lst == [
+            ["one", "uno"],
+            ["two", "foo"],
+            ["three", "tre"],
+            ["two", "bar"],
+        ]
diff --git a/test/test_wsgi.py b/test/test_wsgi.py
index 6e1fb146..1c8c5263 100644
--- a/test/test_wsgi.py
+++ b/test/test_wsgi.py
@@ -100,4 +100,3 @@ class TestWSGI:
             start_response(status, response_headers, ei)
             yield "bbb"
         assert "Internal Server Error" in self._serve(app)
-