diff options
author | Aldo Cortesi <aldo@nullcube.com> | 2011-08-19 21:38:05 +1200 |
---|---|---|
committer | Aldo Cortesi <aldo@nullcube.com> | 2011-08-19 21:53:52 +1200 |
commit | 45f4768a5cc4e820d1b5d7d221e2974f6ddf390d (patch) | |
tree | 1af515b9525144b37f5223f73334391d72c479a5 /libmproxy/netstring.py | |
parent | a566684e3280ebbe15dd397710ee1b26bf8bd571 (diff) | |
download | mitmproxy-45f4768a5cc4e820d1b5d7d221e2974f6ddf390d.tar.gz mitmproxy-45f4768a5cc4e820d1b5d7d221e2974f6ddf390d.tar.bz2 mitmproxy-45f4768a5cc4e820d1b5d7d221e2974f6ddf390d.zip |
Add attribution and license for tnetstring.py
Diffstat (limited to 'libmproxy/netstring.py')
-rw-r--r-- | libmproxy/netstring.py | 393 |
1 files changed, 0 insertions, 393 deletions
diff --git a/libmproxy/netstring.py b/libmproxy/netstring.py deleted file mode 100644 index 03e38c6a..00000000 --- a/libmproxy/netstring.py +++ /dev/null @@ -1,393 +0,0 @@ -""" - -tnetstring: data serialization using typed netstrings -====================================================== - - -This is a data serialization library. It's a lot like JSON but it uses a -new syntax called "typed netstrings" that Zed has proposed for use in the -Mongrel2 webserver. It's designed to be simpler and easier to implement -than JSON, with a happy consequence of also being faster in many cases. - -An ordinary netstring is a blob of data prefixed with its length and postfixed -with a sanity-checking comma. The string "hello world" encodes like this:: - - 11:hello world, - -Typed netstrings add other datatypes by replacing the comma with a type tag. -Here's the integer 12345 encoded as a tnetstring:: - - 5:12345# - -And here's the list [12345,True,0] which mixes integers and bools:: - - 19:5:12345#4:true!1:0#] - -Simple enough? This module gives you the following functions: - - :dump: dump an object as a tnetstring to a file - :dumps: dump an object as a tnetstring to a string - :load: load a tnetstring-encoded object from a file - :loads: load a tnetstring-encoded object from a string - :pop: pop a tnetstring-encoded object from the front of a string - -Note that since parsing a tnetstring requires reading all the data into memory -at once, there's no efficiency gain from using the file-based versions of these -functions. They're only here so you can use load() to read precisely one -item from a file or socket without consuming any extra data. - -By default tnetstrings work only with byte strings, not unicode. If you want -unicode strings then pass an optional encoding to the various functions, -like so:: - - >>> print repr(tnetstring.loads("2:\\xce\\xb1,")) - '\\xce\\xb1' - >>> - >>> print repr(tnetstring.loads("2:\\xce\\xb1,","utf8")) - u'\u03b1' - -""" - -__ver_major__ = 0 -__ver_minor__ = 2 -__ver_patch__ = 0 -__ver_sub__ = "" -__version__ = "%d.%d.%d%s" % (__ver_major__,__ver_minor__,__ver_patch__,__ver_sub__) - - -from collections import deque - - -def dumps(value,encoding=None): - """dumps(object,encoding=None) -> string - - This function dumps a python object as a tnetstring. - """ - # This uses a deque to collect output fragments in reverse order, - # then joins them together at the end. It's measurably faster - # than creating all the intermediate strings. - # If you're reading this to get a handle on the tnetstring format, - # consider the _gdumps() function instead; it's a standard top-down - # generator that's simpler to understand but much less efficient. - q = deque() - _rdumpq(q,0,value,encoding) - return "".join(q) - - -def dump(value,file,encoding=None): - """dump(object,file,encoding=None) - - This function dumps a python object as a tnetstring and writes it to - the given file. - """ - file.write(dumps(value,encoding)) - - -def _rdumpq(q,size,value,encoding=None): - """Dump value as a tnetstring, to a deque instance, last chunks first. - - This function generates the tnetstring representation of the given value, - pushing chunks of the output onto the given deque instance. It pushes - the last chunk first, then recursively generates more chunks. - - When passed in the current size of the string in the queue, it will return - the new size of the string in the queue. - - Operating last-chunk-first makes it easy to calculate the size written - for recursive structures without having to build their representation as - a string. This is measurably faster than generating the intermediate - strings, especially on deeply nested structures. - """ - write = q.appendleft - if value is None: - write("0:~") - return size + 3 - if value is True: - write("4:true!") - return size + 7 - if value is False: - write("5:false!") - return size + 8 - if isinstance(value,(int,long)): - data = str(value) - ldata = len(data) - span = str(ldata) - write("#") - write(data) - write(":") - write(span) - return size + 2 + len(span) + ldata - if isinstance(value,(float,)): - # Use repr() for float rather than str(). - # It round-trips more accurately. - # Probably unnecessary in later python versions that - # use David Gay's ftoa routines. - data = repr(value) - ldata = len(data) - span = str(ldata) - write("^") - write(data) - write(":") - write(span) - return size + 2 + len(span) + ldata - if isinstance(value,str): - lvalue = len(value) - span = str(lvalue) - write(",") - write(value) - write(":") - write(span) - return size + 2 + len(span) + lvalue - if isinstance(value,(list,tuple,)): - write("]") - init_size = size = size + 1 - for item in reversed(value): - size = _rdumpq(q,size,item,encoding) - span = str(size - init_size) - write(":") - write(span) - return size + 1 + len(span) - if isinstance(value,dict): - write("}") - init_size = size = size + 1 - for (k,v) in value.iteritems(): - size = _rdumpq(q,size,v,encoding) - size = _rdumpq(q,size,k,encoding) - span = str(size - init_size) - write(":") - write(span) - return size + 1 + len(span) - if isinstance(value,unicode): - if encoding is None: - raise ValueError("must specify encoding to dump unicode strings") - value = value.encode(encoding) - lvalue = len(value) - span = str(lvalue) - write(",") - write(value) - write(":") - write(span) - return size + 2 + len(span) + lvalue - raise ValueError("unserializable object") - - -def _gdumps(value,encoding): - """Generate fragments of value dumped as a tnetstring. - - This is the naive dumping algorithm, implemented as a generator so that - it's easy to pass to "".join() without building a new list. - - This is mainly here for comparison purposes; the _rdumpq version is - measurably faster as it doesn't have to build intermediate strins. - """ - if value is None: - yield "0:~" - elif value is True: - yield "4:true!" - elif value is False: - yield "5:false!" - elif isinstance(value,(int,long)): - data = str(value) - yield str(len(data)) - yield ":" - yield data - yield "#" - elif isinstance(value,(float,)): - data = repr(value) - yield str(len(data)) - yield ":" - yield data - yield "^" - elif isinstance(value,(str,)): - yield str(len(value)) - yield ":" - yield value - yield "," - elif isinstance(value,(list,tuple,)): - sub = [] - for item in value: - sub.extend(_gdumps(item)) - sub = "".join(sub) - yield str(len(sub)) - yield ":" - yield sub - yield "]" - elif isinstance(value,(dict,)): - sub = [] - for (k,v) in value.iteritems(): - sub.extend(_gdumps(k)) - sub.extend(_gdumps(v)) - sub = "".join(sub) - yield str(len(sub)) - yield ":" - yield sub - yield "}" - elif isinstance(value,(unicode,)): - if encoding is None: - raise ValueError("must specify encoding to dump unicode strings") - value = value.encode(encoding) - yield str(len(value)) - yield ":" - yield value - yield "," - else: - raise ValueError("unserializable object") - - -def loads(string,encoding=None): - """loads(string,encoding=None) -> object - - This function parses a tnetstring into a python object. - """ - # No point duplicating effort here. In the C-extension version, - # loads() is measurably faster then pop() since it can avoid - # the overhead of building a second string. - return pop(string,encoding)[0] - - -def load(file,encoding=None): - """load(file,encoding=None) -> object - - This function reads a tnetstring from a file and parses it into a - python object. The file must support the read() method, and this - function promises not to read more data than necessary. - """ - # Read the length prefix one char at a time. - # Note that the netstring spec explicitly forbids padding zeros. - c = file.read(1) - if not c.isdigit(): - raise ValueError("not a tnetstring: missing or invalid length prefix") - datalen = ord(c) - ord("0") - c = file.read(1) - if datalen != 0: - while c.isdigit(): - datalen = (10 * datalen) + (ord(c) - ord("0")) - if datalen > 999999999: - errmsg = "not a tnetstring: absurdly large length prefix" - raise ValueError(errmsg) - c = file.read(1) - if c != ":": - raise ValueError("not a tnetstring: missing or invalid length prefix") - # Now we can read and parse the payload. - # This repeats the dispatch logic of pop() so we can avoid - # re-constructing the outermost tnetstring. - data = file.read(datalen) - if len(data) != datalen: - raise ValueError("not a tnetstring: length prefix too big") - type = file.read(1) - if type == ",": - if encoding is not None: - return data.decode(encoding) - return data - if type == "#": - try: - return int(data) - except ValueError: - raise ValueError("not a tnetstring: invalid integer literal") - if type == "^": - try: - return float(data) - except ValueError: - raise ValueError("not a tnetstring: invalid float literal") - if type == "!": - if data == "true": - return True - elif data == "false": - return False - else: - raise ValueError("not a tnetstring: invalid boolean literal") - if type == "~": - if data: - raise ValueError("not a tnetstring: invalid null literal") - return None - if type == "]": - l = [] - while data: - (item,data) = pop(data,encoding) - l.append(item) - return l - if type == "}": - d = {} - while data: - (key,data) = pop(data,encoding) - (val,data) = pop(data,encoding) - d[key] = val - return d - raise ValueError("unknown type tag") - - - -def pop(string,encoding=None): - """pop(string,encoding=None) -> (object, remain) - - This function parses a tnetstring into a python object. - It returns a tuple giving the parsed object and a string - containing any unparsed data from the end of the string. - """ - # Parse out data length, type and remaining string. - try: - (dlen,rest) = string.split(":",1) - dlen = int(dlen) - except ValueError: - raise ValueError("not a tnetstring: missing or invalid length prefix") - try: - (data,type,remain) = (rest[:dlen],rest[dlen],rest[dlen+1:]) - except IndexError: - # This fires if len(rest) < dlen, meaning we don't need - # to further validate that data is the right length. - raise ValueError("not a tnetstring: invalid length prefix") - # Parse the data based on the type tag. - if type == ",": - if encoding is not None: - return (data.decode(encoding),remain) - return (data,remain) - if type == "#": - try: - return (int(data),remain) - except ValueError: - raise ValueError("not a tnetstring: invalid integer literal") - if type == "^": - try: - return (float(data),remain) - except ValueError: - raise ValueError("not a tnetstring: invalid float literal") - if type == "!": - if data == "true": - return (True,remain) - elif data == "false": - return (False,remain) - else: - raise ValueError("not a tnetstring: invalid boolean literal") - if type == "~": - if data: - raise ValueError("not a tnetstring: invalid null literal") - return (None,remain) - if type == "]": - l = [] - while data: - (item,data) = pop(data,encoding) - l.append(item) - return (l,remain) - if type == "}": - d = {} - while data: - (key,data) = pop(data,encoding) - (val,data) = pop(data,encoding) - d[key] = val - return (d,remain) - raise ValueError("unknown type tag") - - - -# Use the c-extension version if available -try: - import _tnetstring -except ImportError: - pass -else: - dumps = _tnetstring.dumps - load = _tnetstring.load - loads = _tnetstring.loads - pop = _tnetstring.pop - - |