From e80b2af4aed325741d8b51458d59351e70743aef Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Sat, 30 Jul 2016 20:38:06 +0530 Subject: Add support for comma separated cookies --- netlib/http/cookies.py | 81 +++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 30 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 1421d8eb..6fec7daf 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -14,12 +14,9 @@ information. Duplicate cookies are preserved in parsing, and can be set in formatting. We do attempt to escape and quote values where needed, but will not reject data that violate the specs. -Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do -not parse the comma-separated variant of Set-Cookie that allows multiple -cookies to be set in a single header. Technically this should be feasible, but -it turns out that violations of RFC6265 that makes the parsing problem -indeterminate are much more common than genuine occurences of the multi-cookie -variants. Serialization follows RFC6265. +Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We +also parse the comma-separated variant of Set-Cookie that allows multiple +cookies to be set in a single header. Serialization follows RFC6265. http://tools.ietf.org/html/rfc6265 http://tools.ietf.org/html/rfc2109 @@ -51,7 +48,7 @@ def _read_token(s, start): """ Read a token - the LHS of a token/value pair in a cookie. """ - return _read_until(s, start, ";=") + return _read_until(s, start, ",;=") def _read_quoted_string(s, start): @@ -84,9 +81,6 @@ def _read_quoted_string(s, start): def _read_value(s, start, delims): """ Reads a value - the RHS of a token/value pair in a cookie. - - special: If the value is special, commas are premitted. Else comma - terminates. This helps us support old and new style values. """ if start >= len(s): return "", start @@ -96,27 +90,49 @@ def _read_value(s, start, delims): return _read_until(s, start, delims) +# TODO: Disallow LHS-only Cookie values def _read_pairs(s, off=0): """ - Read pairs of lhs=rhs values. + Read pairs of lhs=rhs values while handling multiple cookies. off: start offset - specials: a lower-cased list of keys that may contain commas """ - vals = [] + cookies = [] + pairs = [] + while True: lhs, off = _read_token(s, off) lhs = lhs.lstrip() + if lhs: rhs = None - if off < len(s): - if s[off] == "=": - rhs, off = _read_value(s, off + 1, ";") - vals.append([lhs, rhs]) + if off < len(s) and s[off] == "=": + rhs, off = _read_value(s, off + 1, ";,") + + # expires values can contain commas in them so they need to + # be handled separately. + if lhs.lower() == "expires": + # This is a heuristic we use to determine whether we've + # only read a part of the datetime and should read more. + if len(rhs) <= 3: + trail, off = _read_value(s, off + 1, ";,") + rhs = rhs + "," + trail + + pairs.append([lhs, rhs]) + + # comma marks the beginning of a new cookie + if off < len(s) and s[off] == ",": + cookies.append(pairs) + pairs = [] + off += 1 + if not off < len(s): break - return vals, off + + if pairs or not cookies: + cookies.append(pairs) + return cookies, off def _has_special(s): @@ -167,10 +183,11 @@ def _parse_set_cookie_pairs(s): def parse_set_cookie_headers(headers): ret = [] for header in headers: - v = parse_set_cookie_header(header) - if v: - name, value, attrs = v - ret.append((name, SetCookie(value, attrs))) + cookies = parse_set_cookie_header(header) + if cookies: + for cookie in cookies: + name, value, attrs = cookie + ret.append((name, SetCookie(value, attrs))) return ret @@ -193,13 +210,17 @@ def parse_set_cookie_header(line): """ Parse a Set-Cookie header value - Returns a (name, value, attrs) tuple, or None, where attrs is an - CookieAttrs dict of attributes. No attempt is made to parse attribute - values - they are treated purely as strings. + Returns a list of (name, value, attrs) tuple for each cokie, or None. + Where attrs is a CookieAttrs dict of attributes. No attempt is made + to parse attribute values - they are treated purely as strings. """ - pairs = _parse_set_cookie_pairs(line) - if pairs: - return pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:]) + cookies = [ + (pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:])) + for pairs in _parse_set_cookie_pairs(line) if pairs + ] + + if cookies: + return cookies def format_set_cookie_header(name, value, attrs): @@ -216,7 +237,7 @@ def format_set_cookie_header(name, value, attrs): def parse_cookie_headers(cookie_headers): cookie_list = [] for header in cookie_headers: - cookie_list.extend(parse_cookie_header(header)) + cookie_list.extend(parse_cookie_header(header)[0]) return cookie_list @@ -245,7 +266,7 @@ def refresh_set_cookie_header(c, delta): A refreshed Set-Cookie string """ - name, value, attrs = parse_set_cookie_header(c) + name, value, attrs = parse_set_cookie_header(c)[0] if not name or not value: raise ValueError("Invalid Cookie") -- cgit v1.2.3 From 734a7d8a6831b17313091a80817b30403118132f Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Wed, 3 Aug 2016 17:45:41 +0530 Subject: Minor refactor --- netlib/http/cookies.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 6fec7daf..2a1f62f0 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -185,8 +185,7 @@ def parse_set_cookie_headers(headers): for header in headers: cookies = parse_set_cookie_header(header) if cookies: - for cookie in cookies: - name, value, attrs = cookie + for name, value, attrs in cookies: ret.append((name, SetCookie(value, attrs))) return ret @@ -221,6 +220,8 @@ def parse_set_cookie_header(line): if cookies: return cookies + else: + return None def format_set_cookie_header(name, value, attrs): -- cgit v1.2.3 From 29046e6b4881bde9d8823a01879b8fe87bdf15e0 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 6 Sep 2016 11:39:49 +0530 Subject: Move CookieAttrs and SetCookie to top --- netlib/http/cookies.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 2a1f62f0..9b93e600 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -28,8 +28,21 @@ _cookie_params = set(( 'secure', 'httponly', 'version', )) +ESCAPE = re.compile(r"([\"\\])") -# TODO: Disallow LHS-only Cookie values + +class CookieAttrs(multidict.ImmutableMultiDict): + @staticmethod + def _kconv(key): + return key.lower() + + @staticmethod + def _reduce_values(values): + # See the StickyCookieTest for a weird cookie that only makes sense + # if we take the last part. + return values[-1] + +SetCookie = collections.namedtuple("SetCookie", ["value", "attrs"]) def _read_until(s, start, term): @@ -90,7 +103,6 @@ def _read_value(s, start, delims): return _read_until(s, start, delims) -# TODO: Disallow LHS-only Cookie values def _read_pairs(s, off=0): """ Read pairs of lhs=rhs values while handling multiple cookies. @@ -145,9 +157,6 @@ def _has_special(s): return False -ESCAPE = re.compile(r"([\"\\])") - - def _format_pairs(lst, specials=(), sep="; "): """ specials: A lower-cased list of keys that will not be quoted. @@ -190,19 +199,6 @@ def parse_set_cookie_headers(headers): return ret -class CookieAttrs(multidict.ImmutableMultiDict): - @staticmethod - def _kconv(key): - return key.lower() - - @staticmethod - def _reduce_values(values): - # See the StickyCookieTest for a weird cookie that only makes sense - # if we take the last part. - return values[-1] - - -SetCookie = collections.namedtuple("SetCookie", ["value", "attrs"]) def parse_set_cookie_header(line): -- cgit v1.2.3 From 90a48ccc06d01a13c52d8038b1300ff6b80a1292 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 6 Sep 2016 11:40:41 +0530 Subject: Rename _read_token to _read_key Since we also have a _read_value --- netlib/http/cookies.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 9b93e600..cdf742ce 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -57,13 +57,6 @@ def _read_until(s, start, term): return s[start:i + 1], i + 1 -def _read_token(s, start): - """ - Read a token - the LHS of a token/value pair in a cookie. - """ - return _read_until(s, start, ",;=") - - def _read_quoted_string(s, start): """ start: offset to the first quote of the string to be read @@ -91,6 +84,13 @@ def _read_quoted_string(s, start): return "".join(ret), i + 1 +def _read_key(s, start, delims=";="): + """ + Read a key - the LHS of a token/value pair in a cookie. + """ + return _read_until(s, start, delims) + + def _read_value(s, start, delims): """ Reads a value - the RHS of a token/value pair in a cookie. @@ -113,7 +113,7 @@ def _read_pairs(s, off=0): pairs = [] while True: - lhs, off = _read_token(s, off) + lhs, off = _read_key(s, off, ";=,") lhs = lhs.lstrip() if lhs: -- cgit v1.2.3 From 7802a0ba228df11f3c3defaf9a06c9f78745b701 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 6 Sep 2016 11:42:26 +0530 Subject: Rename _read_pairs to _read_cookie_pairs We will have a separate _read_set_cookie_pairs --- netlib/http/cookies.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index cdf742ce..3b5568c9 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -103,12 +103,31 @@ def _read_value(s, start, delims): return _read_until(s, start, delims) -def _read_pairs(s, off=0): +def _read_cookie_pairs(s, off=0): """ - Read pairs of lhs=rhs values while handling multiple cookies. + Read pairs of lhs=rhs values from Cookie headers. off: start offset """ + pairs = [] + + while True: + lhs, off = _read_key(s, off) + lhs = lhs.lstrip() + + if lhs: + rhs = None + if off < len(s) and s[off] == "=": + rhs, off = _read_value(s, off + 1, ";") + + pairs.append([lhs, rhs]) + + off += 1 + + if not off < len(s): + break + + return pairs, off cookies = [] pairs = [] @@ -185,7 +204,7 @@ def _parse_set_cookie_pairs(s): For Set-Cookie, we support multiple cookies as described in RFC2109. This function therefore returns a list of lists. """ - pairs, off_ = _read_pairs(s) + pairs, off_ = _read_cookie_pairs(line) return pairs -- cgit v1.2.3 From 06804e544883de187a6f64163c18d3d63e7a3047 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 6 Sep 2016 11:43:45 +0530 Subject: Add a new pairs reader for SetCookie headers --- netlib/http/cookies.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 3b5568c9..774b1d14 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -128,6 +128,15 @@ def _read_cookie_pairs(s, off=0): break return pairs, off + + +def _read_set_cookie_pairs(s, off=0): + """ + Read pairs of lhs=rhs values from SetCookie headers while handling multiple cookies. + + off: start offset + specials: attributes that are treated specially + """ cookies = [] pairs = [] @@ -140,10 +149,12 @@ def _read_cookie_pairs(s, off=0): if off < len(s) and s[off] == "=": rhs, off = _read_value(s, off + 1, ";,") - # expires values can contain commas in them so they need to - # be handled separately. + # Special handliing of attributes if lhs.lower() == "expires": - # This is a heuristic we use to determine whether we've + # 'expires' values can contain commas in them so they need to + # be handled separately. + + # '3' is just a heuristic we use to determine whether we've # only read a part of the datetime and should read more. if len(rhs) <= 3: trail, off = _read_value(s, off + 1, ";,") @@ -163,6 +174,7 @@ def _read_cookie_pairs(s, off=0): if pairs or not cookies: cookies.append(pairs) + return cookies, off -- cgit v1.2.3 From fd4f662871e388357f399ee3a2b3953892bfcbe5 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 6 Sep 2016 11:44:06 +0530 Subject: Refactor functions --- netlib/http/cookies.py | 61 +++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 35 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 774b1d14..73460a44 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -211,25 +211,27 @@ def _format_set_cookie_pairs(lst): ) -def _parse_set_cookie_pairs(s): +def parse_cookie_header(line): """ - For Set-Cookie, we support multiple cookies as described in RFC2109. - This function therefore returns a list of lists. + Parse a Cookie header value. + Returns a list of (lhs, rhs) tuples. """ pairs, off_ = _read_cookie_pairs(line) return pairs -def parse_set_cookie_headers(headers): - ret = [] - for header in headers: - cookies = parse_set_cookie_header(header) - if cookies: - for name, value, attrs in cookies: - ret.append((name, SetCookie(value, attrs))) - return ret +def parse_cookie_headers(cookie_headers): + cookie_list = [] + for header in cookie_headers: + cookie_list.extend(parse_cookie_header(header)[0]) + return cookie_list +def format_cookie_header(lst): + """ + Formats a Cookie header value. + """ + return _format_pairs(lst) def parse_set_cookie_header(line): @@ -240,9 +242,11 @@ def parse_set_cookie_header(line): Where attrs is a CookieAttrs dict of attributes. No attempt is made to parse attribute values - they are treated purely as strings. """ + cookie_pairs, off = _read_set_cookie_pairs(line) + cookies = [ (pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:])) - for pairs in _parse_set_cookie_pairs(line) if pairs + for pairs in cookie_pairs if pairs ] if cookies: @@ -251,6 +255,16 @@ def parse_set_cookie_header(line): return None +def parse_set_cookie_headers(headers): + ret = [] + for header in headers: + cookies = parse_set_cookie_header(header) + if cookies: + for name, value, attrs in cookies: + ret.append((name, SetCookie(value, attrs))) + return ret + + def format_set_cookie_header(name, value, attrs): """ Formats a Set-Cookie header value. @@ -262,29 +276,6 @@ def format_set_cookie_header(name, value, attrs): return _format_set_cookie_pairs(pairs) -def parse_cookie_headers(cookie_headers): - cookie_list = [] - for header in cookie_headers: - cookie_list.extend(parse_cookie_header(header)[0]) - return cookie_list - - -def parse_cookie_header(line): - """ - Parse a Cookie header value. - Returns a list of (lhs, rhs) tuples. - """ - pairs, off_ = _read_pairs(line) - return pairs - - -def format_cookie_header(lst): - """ - Formats a Cookie header value. - """ - return _format_pairs(lst) - - def refresh_set_cookie_header(c, delta): """ Args: -- cgit v1.2.3 From a104d65851763256a777cec9ea37236dc9c94a06 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Thu, 22 Sep 2016 00:22:43 +0530 Subject: Fixup an error with indices --- netlib/http/cookies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 73460a44..02f82065 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -223,7 +223,7 @@ def parse_cookie_header(line): def parse_cookie_headers(cookie_headers): cookie_list = [] for header in cookie_headers: - cookie_list.extend(parse_cookie_header(header)[0]) + cookie_list.extend(parse_cookie_header(header)) return cookie_list -- cgit v1.2.3 From 6e6ed35e3ec509ca443033c953d95a11ce1338c2 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 27 Sep 2016 21:04:52 +0530 Subject: Modify format_set_cookie_header to take cookie list and return a comma separated string of cookies --- netlib/http/cookies.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 02f82065..2c0fcafd 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -188,12 +188,12 @@ def _has_special(s): return False -def _format_pairs(lst, specials=(), sep="; "): +def _format_pairs(pairs, specials=(), sep="; "): """ specials: A lower-cased list of keys that will not be quoted. """ vals = [] - for k, v in lst: + for k, v in pairs: if v is None: vals.append(k) else: @@ -256,24 +256,33 @@ def parse_set_cookie_header(line): def parse_set_cookie_headers(headers): - ret = [] + rv = [] for header in headers: cookies = parse_set_cookie_header(header) if cookies: for name, value, attrs in cookies: - ret.append((name, SetCookie(value, attrs))) - return ret + rv.append((name, SetCookie(value, attrs))) + return rv -def format_set_cookie_header(name, value, attrs): +def format_set_cookie_header(set_cookies): """ Formats a Set-Cookie header value. """ - pairs = [(name, value)] - pairs.extend( - attrs.fields if hasattr(attrs, "fields") else attrs - ) - return _format_set_cookie_pairs(pairs) + + rv = [] + + for set_cookie in set_cookies: + name, value, attrs = set_cookie + + pairs = [(name, value)] + pairs.extend( + attrs.fields if hasattr(attrs, "fields") else attrs + ) + + rv.append(_format_set_cookie_pairs(pairs)) + + return ", ".join(rv) def refresh_set_cookie_header(c, delta): @@ -303,10 +312,10 @@ def refresh_set_cookie_header(c, delta): # For now, we just ignore this. attrs = attrs.with_delitem("expires") - ret = format_set_cookie_header(name, value, attrs) - if not ret: + rv = format_set_cookie_header([(name, value, attrs)]) + if not rv: raise ValueError("Invalid Cookie") - return ret + return rv def get_expiration_ts(cookie_attrs): -- cgit v1.2.3 From fba7fe101e0bec7bb6b10362477274caa523d54e Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 27 Sep 2016 21:06:26 +0530 Subject: Clarify that 'expires' values MUST have a comma now --- netlib/http/cookies.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'netlib/http/cookies.py') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 2c0fcafd..a3ac4806 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -154,8 +154,11 @@ def _read_set_cookie_pairs(s, off=0): # 'expires' values can contain commas in them so they need to # be handled separately. + # We actually bank on the fact that the expires value WILL + # contain a comma. Things will fail, if they don't. + # '3' is just a heuristic we use to determine whether we've - # only read a part of the datetime and should read more. + # only read a part of the expires value and we should read more. if len(rhs) <= 3: trail, off = _read_value(s, off + 1, ";,") rhs = rhs + "," + trail -- cgit v1.2.3