aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.env5
-rw-r--r--netlib/http_cookies.py197
-rw-r--r--netlib/odict.py35
-rw-r--r--netlib/wsgi.py29
-rw-r--r--test/test_http.py11
-rw-r--r--test/test_http_cookies.py220
-rw-r--r--test/test_odict.py30
-rw-r--r--test/test_wsgi.py1
8 files changed, 504 insertions, 24 deletions
diff --git a/.env b/.env
new file mode 100644
index 00000000..7f847e29
--- /dev/null
+++ b/.env
@@ -0,0 +1,5 @@
+DIR=`dirname $0`
+if [ -z "$VIRTUAL_ENV" ] && [ -f $DIR/../venv.mitmproxy/bin/activate ]; then
+ echo "Activating mitmproxy virtualenv..."
+ source $DIR/../venv.mitmproxy/bin/activate
+fi
diff --git a/netlib/http_cookies.py b/netlib/http_cookies.py
new file mode 100644
index 00000000..dab95ed0
--- /dev/null
+++ b/netlib/http_cookies.py
@@ -0,0 +1,197 @@
+"""
+A flexible module for cookie parsing and manipulation.
+
+This module differs from usual standards-compliant cookie modules in a number of
+ways. We try to be as permissive as possible, and to retain even mal-formed
+information. Duplicate cookies are preserved in parsing, and can be set in
+formatting. We do attempt to escape and quote values where needed, but will not
+reject data that violate the specs.
+
+Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do
+not parse the comma-separated variant of Set-Cookie that allows multiple cookies
+to be set in a single header. Technically this should be feasible, but it turns
+out that violations of RFC6265 that makes the parsing problem indeterminate are
+much more common than genuine occurences of the multi-cookie variants.
+Serialization follows RFC6265.
+
+ http://tools.ietf.org/html/rfc6265
+ http://tools.ietf.org/html/rfc2109
+ http://tools.ietf.org/html/rfc2965
+"""
+
+# TODO
+# - Disallow LHS-only Cookie values
+
+import re
+
+import odict
+
+
+def _read_until(s, start, term):
+ """
+ Read until one of the characters in term is reached.
+ """
+ if start == len(s):
+ return "", start+1
+ for i in range(start, len(s)):
+ if s[i] in term:
+ return s[start:i], i
+ return s[start:i+1], i+1
+
+
+def _read_token(s, start):
+ """
+ Read a token - the LHS of a token/value pair in a cookie.
+ """
+ return _read_until(s, start, ";=")
+
+
+def _read_quoted_string(s, start):
+ """
+ start: offset to the first quote of the string to be read
+
+ A sort of loose super-set of the various quoted string specifications.
+
+ RFC6265 disallows backslashes or double quotes within quoted strings.
+ Prior RFCs use backslashes to escape. This leaves us free to apply
+ backslash escaping by default and be compatible with everything.
+ """
+ escaping = False
+ ret = []
+ # Skip the first quote
+ for i in range(start+1, len(s)):
+ if escaping:
+ ret.append(s[i])
+ escaping = False
+ elif s[i] == '"':
+ break
+ elif s[i] == "\\":
+ escaping = True
+ pass
+ else:
+ ret.append(s[i])
+ return "".join(ret), i+1
+
+
+def _read_value(s, start, delims):
+ """
+ Reads a value - the RHS of a token/value pair in a cookie.
+
+ special: If the value is special, commas are premitted. Else comma
+ terminates. This helps us support old and new style values.
+ """
+ if start >= len(s):
+ return "", start
+ elif s[start] == '"':
+ return _read_quoted_string(s, start)
+ else:
+ return _read_until(s, start, delims)
+
+
+def _read_pairs(s, off=0, specials=()):
+ """
+ Read pairs of lhs=rhs values.
+
+ off: start offset
+ specials: a lower-cased list of keys that may contain commas
+ """
+ vals = []
+ while 1:
+ lhs, off = _read_token(s, off)
+ lhs = lhs.lstrip()
+ if lhs:
+ rhs = None
+ if off < len(s):
+ if s[off] == "=":
+ rhs, off = _read_value(s, off+1, ";")
+ vals.append([lhs, rhs])
+ off += 1
+ if not off < len(s):
+ break
+ return vals, off
+
+
+def _has_special(s):
+ for i in s:
+ if i in '",;\\':
+ return True
+ o = ord(i)
+ if o < 0x21 or o > 0x7e:
+ return True
+ return False
+
+
+ESCAPE = re.compile(r"([\"\\])")
+
+
+def _format_pairs(lst, specials=(), sep="; "):
+ """
+ specials: A lower-cased list of keys that will not be quoted.
+ """
+ vals = []
+ for k, v in lst:
+ if v is None:
+ vals.append(k)
+ else:
+ if k.lower() not in specials and _has_special(v):
+ v = ESCAPE.sub(r"\\\1", v)
+ v = '"%s"'%v
+ vals.append("%s=%s"%(k, v))
+ return sep.join(vals)
+
+
+def _format_set_cookie_pairs(lst):
+ return _format_pairs(
+ lst,
+ specials = ("expires", "path")
+ )
+
+
+def _parse_set_cookie_pairs(s):
+ """
+ For Set-Cookie, we support multiple cookies as described in RFC2109.
+ This function therefore returns a list of lists.
+ """
+ pairs, off = _read_pairs(
+ s,
+ specials = ("expires", "path")
+ )
+ return pairs
+
+
+def parse_set_cookie_header(str):
+ """
+ Parse a Set-Cookie header value
+
+ Returns a (name, value, attrs) tuple, or None, where attrs is an
+ ODictCaseless set of attributes. No attempt is made to parse attribute
+ values - they are treated purely as strings.
+ """
+ pairs = _parse_set_cookie_pairs(str)
+ if pairs:
+ return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:])
+
+
+def format_set_cookie_header(name, value, attrs):
+ """
+ Formats a Set-Cookie header value.
+ """
+ pairs = [[name, value]]
+ pairs.extend(attrs.lst)
+ return _format_set_cookie_pairs(pairs)
+
+
+def parse_cookie_header(str):
+ """
+ Parse a Cookie header value.
+ Returns a (possibly empty) ODict object.
+ """
+ pairs, off = _read_pairs(str)
+ return odict.ODict(pairs)
+
+
+def format_cookie_header(od):
+ """
+ Formats a Cookie header value.
+ """
+ return _format_pairs(od.lst)
diff --git a/netlib/odict.py b/netlib/odict.py
index 7a2f611b..dd738c55 100644
--- a/netlib/odict.py
+++ b/netlib/odict.py
@@ -13,7 +13,8 @@ def safe_subn(pattern, repl, target, *args, **kwargs):
class ODict(object):
"""
- A dictionary-like object for managing ordered (key, value) data.
+ A dictionary-like object for managing ordered (key, value) data. Think
+ about it as a convenient interface to a list of (key, value) tuples.
"""
def __init__(self, lst=None):
self.lst = lst or []
@@ -64,11 +65,20 @@ class ODict(object):
key, they are cleared.
"""
if isinstance(valuelist, basestring):
- raise ValueError("Expected list of values instead of string. Example: odict['Host'] = ['www.example.com']")
-
- new = self._filter_lst(k, self.lst)
- for i in valuelist:
- new.append([k, i])
+ raise ValueError(
+ "Expected list of values instead of string. "
+ "Example: odict['Host'] = ['www.example.com']"
+ )
+ kc = self._kconv(k)
+ new = []
+ for i in self.lst:
+ if self._kconv(i[0]) == kc:
+ if valuelist:
+ new.append([k, valuelist.pop(0)])
+ else:
+ new.append(i)
+ while valuelist:
+ new.append([k, valuelist.pop(0)])
self.lst = new
def __delitem__(self, k):
@@ -84,7 +94,7 @@ class ODict(object):
return False
def add(self, key, value):
- self.lst.append([key, str(value)])
+ self.lst.append([key, value])
def get(self, k, d=None):
if k in self:
@@ -108,10 +118,19 @@ class ODict(object):
lst = copy.deepcopy(self.lst)
return self.__class__(lst)
+ def extend(self, other):
+ """
+ Add the contents of other, preserving any duplicates.
+ """
+ self.lst.extend(other.lst)
+
def __repr__(self):
+ return repr(self.lst)
+
+ def format(self):
elements = []
for itm in self.lst:
- elements.append(itm[0] + ": " + itm[1])
+ elements.append(itm[0] + ": " + str(itm[1]))
elements.append("")
return "\r\n".join(elements)
diff --git a/netlib/wsgi.py b/netlib/wsgi.py
index bac27d5a..1b979608 100644
--- a/netlib/wsgi.py
+++ b/netlib/wsgi.py
@@ -1,5 +1,8 @@
from __future__ import (absolute_import, print_function, division)
-import cStringIO, urllib, time, traceback
+import cStringIO
+import urllib
+import time
+import traceback
from . import odict, tcp
@@ -23,15 +26,18 @@ class Request(object):
def date_time_string():
"""Return the current date and time formatted for a message header."""
WEEKS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
- MONTHS = [None,
- 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+ MONTHS = [
+ None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+ ]
now = time.time()
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
- WEEKS[wd],
- day, MONTHS[month], year,
- hh, mm, ss)
+ WEEKS[wd],
+ day, MONTHS[month], year,
+ hh, mm, ss
+ )
return s
@@ -100,6 +106,7 @@ class WSGIAdaptor(object):
status = None,
headers = None
)
+
def write(data):
if not state["headers_sent"]:
soc.write("HTTP/1.1 %s\r\n"%state["status"])
@@ -108,7 +115,7 @@ class WSGIAdaptor(object):
h["Server"] = [self.sversion]
if 'date' not in h:
h["Date"] = [date_time_string()]
- soc.write(str(h))
+ soc.write(h.format())
soc.write("\r\n")
state["headers_sent"] = True
if data:
@@ -130,7 +137,9 @@ class WSGIAdaptor(object):
errs = cStringIO.StringIO()
try:
- dataiter = self.app(self.make_environ(request, errs, **env), start_response)
+ dataiter = self.app(
+ self.make_environ(request, errs, **env), start_response
+ )
for i in dataiter:
write(i)
if not state["headers_sent"]:
@@ -143,5 +152,3 @@ class WSGIAdaptor(object):
except Exception: # pragma: no cover
pass
return errs.getvalue()
-
-
diff --git a/test/test_http.py b/test/test_http.py
index fed60946..b1c62458 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -53,6 +53,7 @@ def test_connection_close():
h["connection"] = ["close"]
assert http.connection_close((1, 1), h)
+
def test_get_header_tokens():
h = odict.ODictCaseless()
assert http.get_header_tokens(h, "foo") == []
@@ -69,11 +70,13 @@ def test_read_http_body_request():
r = cStringIO.StringIO("testing")
assert http.read_http_body(r, h, None, "GET", None, True) == ""
+
def test_read_http_body_response():
h = odict.ODictCaseless()
s = cStringIO.StringIO("testing")
assert http.read_http_body(s, h, None, "GET", 200, False) == "testing"
+
def test_read_http_body():
# test default case
h = odict.ODictCaseless()
@@ -115,6 +118,7 @@ def test_read_http_body():
s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n")
assert http.read_http_body(s, h, 100, "GET", 200, False) == "aaaaa"
+
def test_expected_http_body_size():
# gibber in the content-length field
h = odict.ODictCaseless()
@@ -135,6 +139,7 @@ def test_expected_http_body_size():
h = odict.ODictCaseless()
assert http.expected_http_body_size(h, True, "GET", None) == 0
+
def test_parse_http_protocol():
assert http.parse_http_protocol("HTTP/1.1") == (1, 1)
assert http.parse_http_protocol("HTTP/0.0") == (0, 0)
@@ -189,6 +194,7 @@ def test_parse_init_http():
assert not http.parse_init_http("GET /test foo/1.1")
assert not http.parse_init_http("GET /test\xc0 HTTP/1.1")
+
class TestReadHeaders:
def _read(self, data, verbatim=False):
if not verbatim:
@@ -251,11 +257,12 @@ class TestReadResponseNoContentLength(test.ServerTestBase):
httpversion, code, msg, headers, content = http.read_response(c.rfile, "GET", None)
assert content == "bar\r\n\r\n"
+
def test_read_response():
def tst(data, method, limit, include_body=True):
data = textwrap.dedent(data)
r = cStringIO.StringIO(data)
- return http.read_response(r, method, limit, include_body=include_body)
+ return http.read_response(r, method, limit, include_body = include_body)
tutils.raises("server disconnect", tst, "", "GET", None)
tutils.raises("invalid server response", tst, "foo", "GET", None)
@@ -351,6 +358,7 @@ def test_parse_url():
# Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
assert not http.parse_url('http://lo[calhost')
+
def test_parse_http_basic_auth():
vals = ("basic", "foo", "bar")
assert http.parse_http_basic_auth(http.assemble_http_basic_auth(*vals)) == vals
@@ -358,4 +366,3 @@ def test_parse_http_basic_auth():
assert not http.parse_http_basic_auth("foo bar")
v = "basic " + binascii.b2a_base64("foo")
assert not http.parse_http_basic_auth(v)
-
diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py
new file mode 100644
index 00000000..7438af7c
--- /dev/null
+++ b/test/test_http_cookies.py
@@ -0,0 +1,220 @@
+import pprint
+import nose.tools
+
+from netlib import http_cookies, odict
+
+
+def test_read_token():
+ tokens = [
+ [("foo", 0), ("foo", 3)],
+ [("foo", 1), ("oo", 3)],
+ [(" foo", 1), ("foo", 4)],
+ [(" foo;", 1), ("foo", 4)],
+ [(" foo=", 1), ("foo", 4)],
+ [(" foo=bar", 1), ("foo", 4)],
+ ]
+ for q, a in tokens:
+ nose.tools.eq_(http_cookies._read_token(*q), a)
+
+
+def test_read_quoted_string():
+ tokens = [
+ [('"foo" x', 0), ("foo", 5)],
+ [('"f\oo" x', 0), ("foo", 6)],
+ [(r'"f\\o" x', 0), (r"f\o", 6)],
+ [(r'"f\\" x', 0), (r"f" + '\\', 5)],
+ [('"fo\\\"" x', 0), ("fo\"", 6)],
+ ]
+ for q, a in tokens:
+ nose.tools.eq_(http_cookies._read_quoted_string(*q), a)
+
+
+def test_read_pairs():
+ vals = [
+ [
+ "one",
+ [["one", None]]
+ ],
+ [
+ "one=two",
+ [["one", "two"]]
+ ],
+ [
+ "one=",
+ [["one", ""]]
+ ],
+ [
+ 'one="two"',
+ [["one", "two"]]
+ ],
+ [
+ 'one="two"; three=four',
+ [["one", "two"], ["three", "four"]]
+ ],
+ [
+ 'one="two"; three=four; five',
+ [["one", "two"], ["three", "four"], ["five", None]]
+ ],
+ [
+ 'one="\\"two"; three=four',
+ [["one", '"two'], ["three", "four"]]
+ ],
+ ]
+ for s, lst in vals:
+ ret, off = http_cookies._read_pairs(s)
+ nose.tools.eq_(ret, lst)
+
+
+def test_pairs_roundtrips():
+ pairs = [
+ [
+ "",
+ []
+ ],
+ [
+ "one=uno",
+ [["one", "uno"]]
+ ],
+ [
+ "one",
+ [["one", None]]
+ ],
+ [
+ "one=uno; two=due",
+ [["one", "uno"], ["two", "due"]]
+ ],
+ [
+ 'one="uno"; two="\due"',
+ [["one", "uno"], ["two", "due"]]
+ ],
+ [
+ 'one="un\\"o"',
+ [["one", 'un"o']]
+ ],
+ [
+ 'one="uno,due"',
+ [["one", 'uno,due']]
+ ],
+ [
+ "one=uno; two; three=tre",
+ [["one", "uno"], ["two", None], ["three", "tre"]]
+ ],
+ [
+ "_lvs2=zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g=; "
+ "_rcc2=53VdltWl+Ov6ordflA==;",
+ [
+ ["_lvs2", "zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g="],
+ ["_rcc2", "53VdltWl+Ov6ordflA=="]
+ ]
+ ]
+ ]
+ for s, lst in pairs:
+ ret, off = http_cookies._read_pairs(s)
+ nose.tools.eq_(ret, lst)
+ s2 = http_cookies._format_pairs(lst)
+ ret, off = http_cookies._read_pairs(s2)
+ nose.tools.eq_(ret, lst)
+
+
+def test_cookie_roundtrips():
+ pairs = [
+ [
+ "one=uno",
+ [["one", "uno"]]
+ ],
+ [
+ "one=uno; two=due",
+ [["one", "uno"], ["two", "due"]]
+ ],
+ ]
+ for s, lst in pairs:
+ ret = http_cookies.parse_cookie_header(s)
+ nose.tools.eq_(ret.lst, lst)
+ s2 = http_cookies.format_cookie_header(ret)
+ ret = http_cookies.parse_cookie_header(s2)
+ nose.tools.eq_(ret.lst, lst)
+
+
+def test_parse_set_cookie_pairs():
+ pairs = [
+ [
+ "one=uno",
+ [
+ ["one", "uno"]
+ ]
+ ],
+ [
+ "one=un\x20",
+ [
+ ["one", "un\x20"]
+ ]
+ ],
+ [
+ "one=uno; foo",
+ [
+ ["one", "uno"],
+ ["foo", None]
+ ]
+ ],
+ [
+ "mun=1.390.f60; "
+ "expires=sun, 11-oct-2015 12:38:31 gmt; path=/; "
+ "domain=b.aol.com",
+ [
+ ["mun", "1.390.f60"],
+ ["expires", "sun, 11-oct-2015 12:38:31 gmt"],
+ ["path", "/"],
+ ["domain", "b.aol.com"]
+ ]
+ ],
+ [
+ r'rpb=190%3d1%2616726%3d1%2634832%3d1%2634874%3d1; '
+ 'domain=.rubiconproject.com; '
+ 'expires=mon, 11-may-2015 21:54:57 gmt; '
+ 'path=/',
+ [
+ ['rpb', r'190%3d1%2616726%3d1%2634832%3d1%2634874%3d1'],
+ ['domain', '.rubiconproject.com'],
+ ['expires', 'mon, 11-may-2015 21:54:57 gmt'],
+ ['path', '/']
+ ]
+ ],
+ ]
+ for s, lst in pairs:
+ ret = http_cookies._parse_set_cookie_pairs(s)
+ nose.tools.eq_(ret, lst)
+ s2 = http_cookies._format_set_cookie_pairs(ret)
+ ret2 = http_cookies._parse_set_cookie_pairs(s2)
+ nose.tools.eq_(ret2, lst)
+
+
+def test_parse_set_cookie_header():
+ vals = [
+ [
+ "", None
+ ],
+ [
+ ";", None
+ ],
+ [
+ "one=uno",
+ ("one", "uno", [])
+ ],
+ [
+ "one=uno; foo=bar",
+ ("one", "uno", [["foo", "bar"]])
+ ]
+ ]
+ for s, expected in vals:
+ ret = http_cookies.parse_set_cookie_header(s)
+ if expected:
+ assert ret[0] == expected[0]
+ assert ret[1] == expected[1]
+ nose.tools.eq_(ret[2].lst, expected[2])
+ s2 = http_cookies.format_set_cookie_header(*ret)
+ ret2 = http_cookies.parse_set_cookie_header(s2)
+ assert ret2[0] == expected[0]
+ assert ret2[1] == expected[1]
+ nose.tools.eq_(ret2[2].lst, expected[2])
+ else:
+ assert ret is None
diff --git a/test/test_odict.py b/test/test_odict.py
index d90bc6e5..c01c4dbe 100644
--- a/test/test_odict.py
+++ b/test/test_odict.py
@@ -6,6 +6,11 @@ class TestODict:
def setUp(self):
self.od = odict.ODict()
+ def test_repr(self):
+ h = odict.ODict()
+ h["one"] = ["two"]
+ assert repr(h)
+
def test_str_err(self):
h = odict.ODict()
tutils.raises(ValueError, h.__setitem__, "key", "foo")
@@ -20,7 +25,7 @@ class TestODict:
"two: tre\r\n",
"\r\n"
]
- out = repr(self.od)
+ out = self.od.format()
for i in expected:
assert out.find(i) >= 0
@@ -39,7 +44,7 @@ class TestODict:
self.od["one"] = ["uno"]
expected1 = "one: uno\r\n"
expected2 = "\r\n"
- out = repr(self.od)
+ out = self.od.format()
assert out.find(expected1) >= 0
assert out.find(expected2) >= 0
@@ -109,6 +114,12 @@ class TestODict:
assert self.od.get_first("one") == "two"
assert self.od.get_first("two") == None
+ def test_extend(self):
+ a = odict.ODict([["a", "b"], ["c", "d"]])
+ b = odict.ODict([["a", "b"], ["e", "f"]])
+ a.extend(b)
+ assert len(a) == 4
+ assert a["a"] == ["b", "b"]
class TestODictCaseless:
def setUp(self):
@@ -145,3 +156,18 @@ class TestODictCaseless:
self.od.add("bar", 2)
assert len(self.od.keys()) == 2
+ def test_add_order(self):
+ od = odict.ODict(
+ [
+ ["one", "uno"],
+ ["two", "due"],
+ ["three", "tre"],
+ ]
+ )
+ od["two"] = ["foo", "bar"]
+ assert od.lst == [
+ ["one", "uno"],
+ ["two", "foo"],
+ ["three", "tre"],
+ ["two", "bar"],
+ ]
diff --git a/test/test_wsgi.py b/test/test_wsgi.py
index 6e1fb146..1c8c5263 100644
--- a/test/test_wsgi.py
+++ b/test/test_wsgi.py
@@ -100,4 +100,3 @@ class TestWSGI:
start_response(status, response_headers, ei)
yield "bbb"
assert "Internal Server Error" in self._serve(app)
-