aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
Diffstat (limited to 'netlib')
-rw-r--r--netlib/http_cookies.py197
-rw-r--r--netlib/odict.py35
-rw-r--r--netlib/wsgi.py29
3 files changed, 242 insertions, 19 deletions
diff --git a/netlib/http_cookies.py b/netlib/http_cookies.py
new file mode 100644
index 00000000..dab95ed0
--- /dev/null
+++ b/netlib/http_cookies.py
@@ -0,0 +1,197 @@
+"""
+A flexible module for cookie parsing and manipulation.
+
+This module differs from usual standards-compliant cookie modules in a number of
+ways. We try to be as permissive as possible, and to retain even mal-formed
+information. Duplicate cookies are preserved in parsing, and can be set in
+formatting. We do attempt to escape and quote values where needed, but will not
+reject data that violate the specs.
+
+Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do
+not parse the comma-separated variant of Set-Cookie that allows multiple cookies
+to be set in a single header. Technically this should be feasible, but it turns
+out that violations of RFC6265 that makes the parsing problem indeterminate are
+much more common than genuine occurences of the multi-cookie variants.
+Serialization follows RFC6265.
+
+ http://tools.ietf.org/html/rfc6265
+ http://tools.ietf.org/html/rfc2109
+ http://tools.ietf.org/html/rfc2965
+"""
+
+# TODO
+# - Disallow LHS-only Cookie values
+
+import re
+
+import odict
+
+
+def _read_until(s, start, term):
+ """
+ Read until one of the characters in term is reached.
+ """
+ if start == len(s):
+ return "", start+1
+ for i in range(start, len(s)):
+ if s[i] in term:
+ return s[start:i], i
+ return s[start:i+1], i+1
+
+
+def _read_token(s, start):
+ """
+ Read a token - the LHS of a token/value pair in a cookie.
+ """
+ return _read_until(s, start, ";=")
+
+
+def _read_quoted_string(s, start):
+ """
+ start: offset to the first quote of the string to be read
+
+ A sort of loose super-set of the various quoted string specifications.
+
+ RFC6265 disallows backslashes or double quotes within quoted strings.
+ Prior RFCs use backslashes to escape. This leaves us free to apply
+ backslash escaping by default and be compatible with everything.
+ """
+ escaping = False
+ ret = []
+ # Skip the first quote
+ for i in range(start+1, len(s)):
+ if escaping:
+ ret.append(s[i])
+ escaping = False
+ elif s[i] == '"':
+ break
+ elif s[i] == "\\":
+ escaping = True
+ pass
+ else:
+ ret.append(s[i])
+ return "".join(ret), i+1
+
+
+def _read_value(s, start, delims):
+ """
+ Reads a value - the RHS of a token/value pair in a cookie.
+
+ special: If the value is special, commas are premitted. Else comma
+ terminates. This helps us support old and new style values.
+ """
+ if start >= len(s):
+ return "", start
+ elif s[start] == '"':
+ return _read_quoted_string(s, start)
+ else:
+ return _read_until(s, start, delims)
+
+
+def _read_pairs(s, off=0, specials=()):
+ """
+ Read pairs of lhs=rhs values.
+
+ off: start offset
+ specials: a lower-cased list of keys that may contain commas
+ """
+ vals = []
+ while 1:
+ lhs, off = _read_token(s, off)
+ lhs = lhs.lstrip()
+ if lhs:
+ rhs = None
+ if off < len(s):
+ if s[off] == "=":
+ rhs, off = _read_value(s, off+1, ";")
+ vals.append([lhs, rhs])
+ off += 1
+ if not off < len(s):
+ break
+ return vals, off
+
+
+def _has_special(s):
+ for i in s:
+ if i in '",;\\':
+ return True
+ o = ord(i)
+ if o < 0x21 or o > 0x7e:
+ return True
+ return False
+
+
+ESCAPE = re.compile(r"([\"\\])")
+
+
+def _format_pairs(lst, specials=(), sep="; "):
+ """
+ specials: A lower-cased list of keys that will not be quoted.
+ """
+ vals = []
+ for k, v in lst:
+ if v is None:
+ vals.append(k)
+ else:
+ if k.lower() not in specials and _has_special(v):
+ v = ESCAPE.sub(r"\\\1", v)
+ v = '"%s"'%v
+ vals.append("%s=%s"%(k, v))
+ return sep.join(vals)
+
+
+def _format_set_cookie_pairs(lst):
+ return _format_pairs(
+ lst,
+ specials = ("expires", "path")
+ )
+
+
+def _parse_set_cookie_pairs(s):
+ """
+ For Set-Cookie, we support multiple cookies as described in RFC2109.
+ This function therefore returns a list of lists.
+ """
+ pairs, off = _read_pairs(
+ s,
+ specials = ("expires", "path")
+ )
+ return pairs
+
+
+def parse_set_cookie_header(str):
+ """
+ Parse a Set-Cookie header value
+
+ Returns a (name, value, attrs) tuple, or None, where attrs is an
+ ODictCaseless set of attributes. No attempt is made to parse attribute
+ values - they are treated purely as strings.
+ """
+ pairs = _parse_set_cookie_pairs(str)
+ if pairs:
+ return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:])
+
+
+def format_set_cookie_header(name, value, attrs):
+ """
+ Formats a Set-Cookie header value.
+ """
+ pairs = [[name, value]]
+ pairs.extend(attrs.lst)
+ return _format_set_cookie_pairs(pairs)
+
+
+def parse_cookie_header(str):
+ """
+ Parse a Cookie header value.
+ Returns a (possibly empty) ODict object.
+ """
+ pairs, off = _read_pairs(str)
+ return odict.ODict(pairs)
+
+
+def format_cookie_header(od):
+ """
+ Formats a Cookie header value.
+ """
+ return _format_pairs(od.lst)
diff --git a/netlib/odict.py b/netlib/odict.py
index 7a2f611b..dd738c55 100644
--- a/netlib/odict.py
+++ b/netlib/odict.py
@@ -13,7 +13,8 @@ def safe_subn(pattern, repl, target, *args, **kwargs):
class ODict(object):
"""
- A dictionary-like object for managing ordered (key, value) data.
+ A dictionary-like object for managing ordered (key, value) data. Think
+ about it as a convenient interface to a list of (key, value) tuples.
"""
def __init__(self, lst=None):
self.lst = lst or []
@@ -64,11 +65,20 @@ class ODict(object):
key, they are cleared.
"""
if isinstance(valuelist, basestring):
- raise ValueError("Expected list of values instead of string. Example: odict['Host'] = ['www.example.com']")
-
- new = self._filter_lst(k, self.lst)
- for i in valuelist:
- new.append([k, i])
+ raise ValueError(
+ "Expected list of values instead of string. "
+ "Example: odict['Host'] = ['www.example.com']"
+ )
+ kc = self._kconv(k)
+ new = []
+ for i in self.lst:
+ if self._kconv(i[0]) == kc:
+ if valuelist:
+ new.append([k, valuelist.pop(0)])
+ else:
+ new.append(i)
+ while valuelist:
+ new.append([k, valuelist.pop(0)])
self.lst = new
def __delitem__(self, k):
@@ -84,7 +94,7 @@ class ODict(object):
return False
def add(self, key, value):
- self.lst.append([key, str(value)])
+ self.lst.append([key, value])
def get(self, k, d=None):
if k in self:
@@ -108,10 +118,19 @@ class ODict(object):
lst = copy.deepcopy(self.lst)
return self.__class__(lst)
+ def extend(self, other):
+ """
+ Add the contents of other, preserving any duplicates.
+ """
+ self.lst.extend(other.lst)
+
def __repr__(self):
+ return repr(self.lst)
+
+ def format(self):
elements = []
for itm in self.lst:
- elements.append(itm[0] + ": " + itm[1])
+ elements.append(itm[0] + ": " + str(itm[1]))
elements.append("")
return "\r\n".join(elements)
diff --git a/netlib/wsgi.py b/netlib/wsgi.py
index bac27d5a..1b979608 100644
--- a/netlib/wsgi.py
+++ b/netlib/wsgi.py
@@ -1,5 +1,8 @@
from __future__ import (absolute_import, print_function, division)
-import cStringIO, urllib, time, traceback
+import cStringIO
+import urllib
+import time
+import traceback
from . import odict, tcp
@@ -23,15 +26,18 @@ class Request(object):
def date_time_string():
"""Return the current date and time formatted for a message header."""
WEEKS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
- MONTHS = [None,
- 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+ MONTHS = [
+ None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+ ]
now = time.time()
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
- WEEKS[wd],
- day, MONTHS[month], year,
- hh, mm, ss)
+ WEEKS[wd],
+ day, MONTHS[month], year,
+ hh, mm, ss
+ )
return s
@@ -100,6 +106,7 @@ class WSGIAdaptor(object):
status = None,
headers = None
)
+
def write(data):
if not state["headers_sent"]:
soc.write("HTTP/1.1 %s\r\n"%state["status"])
@@ -108,7 +115,7 @@ class WSGIAdaptor(object):
h["Server"] = [self.sversion]
if 'date' not in h:
h["Date"] = [date_time_string()]
- soc.write(str(h))
+ soc.write(h.format())
soc.write("\r\n")
state["headers_sent"] = True
if data:
@@ -130,7 +137,9 @@ class WSGIAdaptor(object):
errs = cStringIO.StringIO()
try:
- dataiter = self.app(self.make_environ(request, errs, **env), start_response)
+ dataiter = self.app(
+ self.make_environ(request, errs, **env), start_response
+ )
for i in dataiter:
write(i)
if not state["headers_sent"]:
@@ -143,5 +152,3 @@ class WSGIAdaptor(object):
except Exception: # pragma: no cover
pass
return errs.getvalue()
-
-