aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--netlib/http_cookies.py112
-rw-r--r--test/test_http_cookies.py115
2 files changed, 195 insertions, 32 deletions
diff --git a/netlib/http_cookies.py b/netlib/http_cookies.py
index 82675418..a1f240f5 100644
--- a/netlib/http_cookies.py
+++ b/netlib/http_cookies.py
@@ -1,13 +1,27 @@
"""
A flexible module for cookie parsing and manipulation.
-We try to be as permissive as possible. Parsing accepts formats from RFC6265 an
-RFC2109. Serialization follows RFC6265 strictly.
+This module differs from usual standards-compliant cookie modules in a number of
+ways. We try to be as permissive as possible, and to retain even mal-formed
+information. Duplicate cookies are preserved in parsing, and can be set in
+formatting. We do attempt to escape and quote values where needed, but will not
+reject data that violate the specs.
+
+Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do
+not parse the comma-separated variant of Set-Cookie that allows multiple cookies
+to be set in a single header. Technically this should be feasible, but it turns
+out that violations of RFC6265 that makes the parsing problem indeterminate are
+much more common than genuine occurences of the multi-cookie variants.
+Serialization follows RFC6265.
http://tools.ietf.org/html/rfc6265
http://tools.ietf.org/html/rfc2109
+ http://tools.ietf.org/html/rfc2965
"""
+# TODO
+# - Disallow LHS-only Cookie values
+
import re
import odict
@@ -59,7 +73,7 @@ def _read_quoted_string(s, start):
return "".join(ret), i+1
-def _read_value(s, start, special):
+def _read_value(s, start, delims):
"""
Reads a value - the RHS of a token/value pair in a cookie.
@@ -70,37 +84,41 @@ def _read_value(s, start, special):
return "", start
elif s[start] == '"':
return _read_quoted_string(s, start)
- elif special:
- return _read_until(s, start, ";")
else:
- return _read_until(s, start, ";,")
+ return _read_until(s, start, delims)
-def _read_pairs(s, specials=()):
+def _read_pairs(s, off=0, term=None, specials=()):
"""
Read pairs of lhs=rhs values.
- specials: A lower-cased list of keys that may contain commas.
+ off: start offset
+ term: if True, treat a comma as a terminator for the pairs lists
+ specials: a lower-cased list of keys that may contain commas if term is
+ True
"""
- off = 0
vals = []
while 1:
lhs, off = _read_token(s, off)
lhs = lhs.lstrip()
- rhs = None
- if off < len(s):
- if s[off] == "=":
- rhs, off = _read_value(s, off+1, lhs.lower() in specials)
- vals.append([lhs, rhs])
+ if lhs:
+ rhs = None
+ if off < len(s):
+ if s[off] == "=":
+ if term and lhs.lower() not in specials:
+ delims = ";,"
+ else:
+ delims = ";"
+ rhs, off = _read_value(s, off+1, delims)
+ vals.append([lhs, rhs])
off += 1
if not off < len(s):
break
+ if term and s[off-1] == ",":
+ break
return vals, off
-ESCAPE = re.compile(r"([\"\\])")
-
-
def _has_special(s):
for i in s:
if i in '",;\\':
@@ -111,6 +129,9 @@ def _has_special(s):
return False
+ESCAPE = re.compile(r"([\"\\])")
+
+
def _format_pairs(lst, specials=()):
"""
specials: A lower-cased list of keys that will not be quoted.
@@ -127,25 +148,58 @@ def _format_pairs(lst, specials=()):
return "; ".join(vals)
-def parse_cookies(s):
+def _format_set_cookie_pairs(lst):
+ return _format_pairs(
+ lst,
+ specials = ("expires", "path")
+ )
+
+
+def _parse_set_cookie_pairs(s):
"""
- Parses a Cookie header value.
- Returns an ODict object.
+ For Set-Cookie, we support multiple cookies as described in RFC2109.
+ This function therefore returns a list of lists.
"""
- pairs, off = _read_pairs(s)
- return odict.ODict(pairs)
+ pairs, off = _read_pairs(
+ s,
+ specials = ("expires", "path")
+ )
+ return pairs
-def unparse_cookies(od):
+def parse_set_cookie_header(str):
"""
- Formats a Cookie header value.
+ Parse a Set-Cookie header value
+
+ Returns a (name, value, attrs) tuple, or None, where attrs is an
+ ODictCaseless set of attributes. No attempt is made to parse attribute
+ values - they are treated purely as strings.
"""
- return _format_pairs(od.lst)
+ pairs = _parse_set_cookie_pairs(str)
+ if pairs:
+ return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:])
+
+
+def format_set_cookie_header(name, value, attrs):
+ """
+ Formats a Set-Cookie header value.
+ """
+ pairs = [[name, value]]
+ pairs.extend(attrs.lst)
+ return _format_set_cookie_pairs(pairs)
-def parse_set_cookies(s):
- start = 0
+def parse_cookie_header(str):
+ """
+ Parse a Cookie header value.
+ Returns a (possibly empty) ODict object.
+ """
+ pairs, off = _read_pairs(str)
+ return odict.ODict(pairs)
-def unparse_set_cookies(s):
- pass
+def format_cookie_header(od):
+ """
+ Formats a Cookie header value.
+ """
+ return _format_pairs(od.lst)
diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py
index 31e5f0b0..c0e5a5b7 100644
--- a/test/test_http_cookies.py
+++ b/test/test_http_cookies.py
@@ -1,6 +1,8 @@
-from netlib import http_cookies, odict
+import pprint
import nose.tools
+from netlib import http_cookies, odict
+
def test_read_token():
tokens = [
@@ -66,6 +68,10 @@ def test_read_pairs():
def test_pairs_roundtrips():
pairs = [
[
+ "",
+ []
+ ],
+ [
"one=uno",
[["one", "uno"]]
],
@@ -110,5 +116,108 @@ def test_pairs_roundtrips():
nose.tools.eq_(ret, lst)
-def test_parse_set_cookie():
- pass
+def test_cookie_roundtrips():
+ pairs = [
+ [
+ "one=uno",
+ [["one", "uno"]]
+ ],
+ [
+ "one=uno; two=due",
+ [["one", "uno"], ["two", "due"]]
+ ],
+ ]
+ for s, lst in pairs:
+ ret = http_cookies.parse_cookie_header(s)
+ nose.tools.eq_(ret.lst, lst)
+ s2 = http_cookies.format_cookie_header(ret)
+ ret = http_cookies.parse_cookie_header(s2)
+ nose.tools.eq_(ret.lst, lst)
+
+
+# TODO
+# I've seen the following pathological cookie in the wild:
+#
+# cid=09,0,0,0,0; expires=Wed, 10-Jun-2015 21:54:53 GMT; path=/
+#
+# It's not compliant under any RFC - the latest RFC prohibits commas in cookie
+# values completely, earlier RFCs require them to be within a quoted string.
+#
+# If we ditch support for earlier RFCs, we can handle this correctly. This
+# leaves us with the question: what's more common, multiple-value Set-Cookie
+# headers, or Set-Cookie headers that violate the standards?
+
+def test_parse_set_cookie_pairs():
+ pairs = [
+ [
+ "one=uno",
+ [
+ ["one", "uno"]
+ ]
+ ],
+ [
+ "one=uno; foo",
+ [
+ ["one", "uno"],
+ ["foo", None]
+ ]
+ ],
+ [
+ "mun=1.390.f60; "
+ "expires=sun, 11-oct-2015 12:38:31 gmt; path=/; "
+ "domain=b.aol.com",
+ [
+ ["mun", "1.390.f60"],
+ ["expires", "sun, 11-oct-2015 12:38:31 gmt"],
+ ["path", "/"],
+ ["domain", "b.aol.com"]
+ ]
+ ],
+ [
+ r'rpb=190%3d1%2616726%3d1%2634832%3d1%2634874%3d1; '
+ 'domain=.rubiconproject.com; '
+ 'expires=mon, 11-may-2015 21:54:57 gmt; '
+ 'path=/',
+ [
+ ['rpb', r'190%3d1%2616726%3d1%2634832%3d1%2634874%3d1'],
+ ['domain', '.rubiconproject.com'],
+ ['expires', 'mon, 11-may-2015 21:54:57 gmt'],
+ ['path', '/']
+ ]
+ ],
+ ]
+ for s, lst in pairs:
+ ret = http_cookies._parse_set_cookie_pairs(s)
+ nose.tools.eq_(ret, lst)
+ s2 = http_cookies._format_set_cookie_pairs(ret)
+ ret2 = http_cookies._parse_set_cookie_pairs(s2)
+ nose.tools.eq_(ret2, lst)
+
+
+def test_parse_set_cookie_header():
+ vals = [
+ [
+ "", None
+ ],
+ [
+ "one=uno",
+ ("one", "uno", [])
+ ],
+ [
+ "one=uno; foo=bar",
+ ("one", "uno", [["foo", "bar"]])
+ ]
+ ]
+ for s, expected in vals:
+ ret = http_cookies.parse_set_cookie_header(s)
+ if expected:
+ assert ret[0] == expected[0]
+ assert ret[1] == expected[1]
+ nose.tools.eq_(ret[2].lst, expected[2])
+ s2 = http_cookies.format_set_cookie_header(*ret)
+ ret2 = http_cookies.parse_set_cookie_header(s2)
+ assert ret2[0] == expected[0]
+ assert ret2[1] == expected[1]
+ nose.tools.eq_(ret2[2].lst, expected[2])
+ else:
+ assert ret is None