diff options
-rw-r--r-- | .env | 5 | ||||
-rw-r--r-- | netlib/http_cookies.py | 151 | ||||
-rw-r--r-- | test/test_http_cookies.py | 114 |
3 files changed, 270 insertions, 0 deletions
@@ -0,0 +1,5 @@ +DIR=`dirname $0` +if [ -z "$VIRTUAL_ENV" ] && [ -f $DIR/../venv.mitmproxy/bin/activate ]; then + echo "Activating mitmproxy virtualenv..." + source $DIR/../venv.mitmproxy/bin/activate +fi diff --git a/netlib/http_cookies.py b/netlib/http_cookies.py new file mode 100644 index 00000000..82675418 --- /dev/null +++ b/netlib/http_cookies.py @@ -0,0 +1,151 @@ +""" +A flexible module for cookie parsing and manipulation. + +We try to be as permissive as possible. Parsing accepts formats from RFC6265 an +RFC2109. Serialization follows RFC6265 strictly. + + http://tools.ietf.org/html/rfc6265 + http://tools.ietf.org/html/rfc2109 +""" + +import re + +import odict + + +def _read_until(s, start, term): + """ + Read until one of the characters in term is reached. + """ + if start == len(s): + return "", start+1 + for i in range(start, len(s)): + if s[i] in term: + return s[start:i], i + return s[start:i+1], i+1 + + +def _read_token(s, start): + """ + Read a token - the LHS of a token/value pair in a cookie. + """ + return _read_until(s, start, ";=") + + +def _read_quoted_string(s, start): + """ + start: offset to the first quote of the string to be read + + A sort of loose super-set of the various quoted string specifications. + + RFC6265 disallows backslashes or double quotes within quoted strings. + Prior RFCs use backslashes to escape. This leaves us free to apply + backslash escaping by default and be compatible with everything. + """ + escaping = False + ret = [] + # Skip the first quote + for i in range(start+1, len(s)): + if escaping: + ret.append(s[i]) + escaping = False + elif s[i] == '"': + break + elif s[i] == "\\": + escaping = True + pass + else: + ret.append(s[i]) + return "".join(ret), i+1 + + +def _read_value(s, start, special): + """ + Reads a value - the RHS of a token/value pair in a cookie. + + special: If the value is special, commas are premitted. Else comma + terminates. This helps us support old and new style values. + """ + if start >= len(s): + return "", start + elif s[start] == '"': + return _read_quoted_string(s, start) + elif special: + return _read_until(s, start, ";") + else: + return _read_until(s, start, ";,") + + +def _read_pairs(s, specials=()): + """ + Read pairs of lhs=rhs values. + + specials: A lower-cased list of keys that may contain commas. + """ + off = 0 + vals = [] + while 1: + lhs, off = _read_token(s, off) + lhs = lhs.lstrip() + rhs = None + if off < len(s): + if s[off] == "=": + rhs, off = _read_value(s, off+1, lhs.lower() in specials) + vals.append([lhs, rhs]) + off += 1 + if not off < len(s): + break + return vals, off + + +ESCAPE = re.compile(r"([\"\\])") + + +def _has_special(s): + for i in s: + if i in '",;\\': + return True + o = ord(i) + if o < 0x21 or o > 0x7e: + return True + return False + + +def _format_pairs(lst, specials=()): + """ + specials: A lower-cased list of keys that will not be quoted. + """ + vals = [] + for k, v in lst: + if v is None: + vals.append(k) + else: + if k.lower() not in specials and _has_special(v): + v = ESCAPE.sub(r"\\\1", v) + v = '"%s"'%v + vals.append("%s=%s"%(k, v)) + return "; ".join(vals) + + +def parse_cookies(s): + """ + Parses a Cookie header value. + Returns an ODict object. + """ + pairs, off = _read_pairs(s) + return odict.ODict(pairs) + + +def unparse_cookies(od): + """ + Formats a Cookie header value. + """ + return _format_pairs(od.lst) + + +def parse_set_cookies(s): + start = 0 + + +def unparse_set_cookies(s): + pass diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py new file mode 100644 index 00000000..31e5f0b0 --- /dev/null +++ b/test/test_http_cookies.py @@ -0,0 +1,114 @@ +from netlib import http_cookies, odict +import nose.tools + + +def test_read_token(): + tokens = [ + [("foo", 0), ("foo", 3)], + [("foo", 1), ("oo", 3)], + [(" foo", 1), ("foo", 4)], + [(" foo;", 1), ("foo", 4)], + [(" foo=", 1), ("foo", 4)], + [(" foo=bar", 1), ("foo", 4)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_token(*q), a) + + +def test_read_quoted_string(): + tokens = [ + [('"foo" x', 0), ("foo", 5)], + [('"f\oo" x', 0), ("foo", 6)], + [(r'"f\\o" x', 0), (r"f\o", 6)], + [(r'"f\\" x', 0), (r"f" + '\\', 5)], + [('"fo\\\"" x', 0), ("fo\"", 6)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_quoted_string(*q), a) + + +def test_read_pairs(): + vals = [ + [ + "one", + [["one", None]] + ], + [ + "one=two", + [["one", "two"]] + ], + [ + "one=", + [["one", ""]] + ], + [ + 'one="two"', + [["one", "two"]] + ], + [ + 'one="two"; three=four', + [["one", "two"], ["three", "four"]] + ], + [ + 'one="two"; three=four; five', + [["one", "two"], ["three", "four"], ["five", None]] + ], + [ + 'one="\\"two"; three=four', + [["one", '"two'], ["three", "four"]] + ], + ] + for s, lst in vals: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + + +def test_pairs_roundtrips(): + pairs = [ + [ + "one=uno", + [["one", "uno"]] + ], + [ + "one", + [["one", None]] + ], + [ + "one=uno; two=due", + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="uno"; two="\due"', + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="un\\"o"', + [["one", 'un"o']] + ], + [ + 'one="uno,due"', + [["one", 'uno,due']] + ], + [ + "one=uno; two; three=tre", + [["one", "uno"], ["two", None], ["three", "tre"]] + ], + [ + "_lvs2=zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g=; " + "_rcc2=53VdltWl+Ov6ordflA==;", + [ + ["_lvs2", "zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g="], + ["_rcc2", "53VdltWl+Ov6ordflA=="] + ] + ] + ] + for s, lst in pairs: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + s2 = http_cookies._format_pairs(lst) + ret, off = http_cookies._read_pairs(s2) + nose.tools.eq_(ret, lst) + + +def test_parse_set_cookie(): + pass |