aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/netstring.py
diff options
context:
space:
mode:
Diffstat (limited to 'libmproxy/netstring.py')
-rw-r--r--libmproxy/netstring.py393
1 files changed, 0 insertions, 393 deletions
diff --git a/libmproxy/netstring.py b/libmproxy/netstring.py
deleted file mode 100644
index 03e38c6a..00000000
--- a/libmproxy/netstring.py
+++ /dev/null
@@ -1,393 +0,0 @@
-"""
-
-tnetstring: data serialization using typed netstrings
-======================================================
-
-
-This is a data serialization library. It's a lot like JSON but it uses a
-new syntax called "typed netstrings" that Zed has proposed for use in the
-Mongrel2 webserver. It's designed to be simpler and easier to implement
-than JSON, with a happy consequence of also being faster in many cases.
-
-An ordinary netstring is a blob of data prefixed with its length and postfixed
-with a sanity-checking comma. The string "hello world" encodes like this::
-
- 11:hello world,
-
-Typed netstrings add other datatypes by replacing the comma with a type tag.
-Here's the integer 12345 encoded as a tnetstring::
-
- 5:12345#
-
-And here's the list [12345,True,0] which mixes integers and bools::
-
- 19:5:12345#4:true!1:0#]
-
-Simple enough? This module gives you the following functions:
-
- :dump: dump an object as a tnetstring to a file
- :dumps: dump an object as a tnetstring to a string
- :load: load a tnetstring-encoded object from a file
- :loads: load a tnetstring-encoded object from a string
- :pop: pop a tnetstring-encoded object from the front of a string
-
-Note that since parsing a tnetstring requires reading all the data into memory
-at once, there's no efficiency gain from using the file-based versions of these
-functions. They're only here so you can use load() to read precisely one
-item from a file or socket without consuming any extra data.
-
-By default tnetstrings work only with byte strings, not unicode. If you want
-unicode strings then pass an optional encoding to the various functions,
-like so::
-
- >>> print repr(tnetstring.loads("2:\\xce\\xb1,"))
- '\\xce\\xb1'
- >>>
- >>> print repr(tnetstring.loads("2:\\xce\\xb1,","utf8"))
- u'\u03b1'
-
-"""
-
-__ver_major__ = 0
-__ver_minor__ = 2
-__ver_patch__ = 0
-__ver_sub__ = ""
-__version__ = "%d.%d.%d%s" % (__ver_major__,__ver_minor__,__ver_patch__,__ver_sub__)
-
-
-from collections import deque
-
-
-def dumps(value,encoding=None):
- """dumps(object,encoding=None) -> string
-
- This function dumps a python object as a tnetstring.
- """
- # This uses a deque to collect output fragments in reverse order,
- # then joins them together at the end. It's measurably faster
- # than creating all the intermediate strings.
- # If you're reading this to get a handle on the tnetstring format,
- # consider the _gdumps() function instead; it's a standard top-down
- # generator that's simpler to understand but much less efficient.
- q = deque()
- _rdumpq(q,0,value,encoding)
- return "".join(q)
-
-
-def dump(value,file,encoding=None):
- """dump(object,file,encoding=None)
-
- This function dumps a python object as a tnetstring and writes it to
- the given file.
- """
- file.write(dumps(value,encoding))
-
-
-def _rdumpq(q,size,value,encoding=None):
- """Dump value as a tnetstring, to a deque instance, last chunks first.
-
- This function generates the tnetstring representation of the given value,
- pushing chunks of the output onto the given deque instance. It pushes
- the last chunk first, then recursively generates more chunks.
-
- When passed in the current size of the string in the queue, it will return
- the new size of the string in the queue.
-
- Operating last-chunk-first makes it easy to calculate the size written
- for recursive structures without having to build their representation as
- a string. This is measurably faster than generating the intermediate
- strings, especially on deeply nested structures.
- """
- write = q.appendleft
- if value is None:
- write("0:~")
- return size + 3
- if value is True:
- write("4:true!")
- return size + 7
- if value is False:
- write("5:false!")
- return size + 8
- if isinstance(value,(int,long)):
- data = str(value)
- ldata = len(data)
- span = str(ldata)
- write("#")
- write(data)
- write(":")
- write(span)
- return size + 2 + len(span) + ldata
- if isinstance(value,(float,)):
- # Use repr() for float rather than str().
- # It round-trips more accurately.
- # Probably unnecessary in later python versions that
- # use David Gay's ftoa routines.
- data = repr(value)
- ldata = len(data)
- span = str(ldata)
- write("^")
- write(data)
- write(":")
- write(span)
- return size + 2 + len(span) + ldata
- if isinstance(value,str):
- lvalue = len(value)
- span = str(lvalue)
- write(",")
- write(value)
- write(":")
- write(span)
- return size + 2 + len(span) + lvalue
- if isinstance(value,(list,tuple,)):
- write("]")
- init_size = size = size + 1
- for item in reversed(value):
- size = _rdumpq(q,size,item,encoding)
- span = str(size - init_size)
- write(":")
- write(span)
- return size + 1 + len(span)
- if isinstance(value,dict):
- write("}")
- init_size = size = size + 1
- for (k,v) in value.iteritems():
- size = _rdumpq(q,size,v,encoding)
- size = _rdumpq(q,size,k,encoding)
- span = str(size - init_size)
- write(":")
- write(span)
- return size + 1 + len(span)
- if isinstance(value,unicode):
- if encoding is None:
- raise ValueError("must specify encoding to dump unicode strings")
- value = value.encode(encoding)
- lvalue = len(value)
- span = str(lvalue)
- write(",")
- write(value)
- write(":")
- write(span)
- return size + 2 + len(span) + lvalue
- raise ValueError("unserializable object")
-
-
-def _gdumps(value,encoding):
- """Generate fragments of value dumped as a tnetstring.
-
- This is the naive dumping algorithm, implemented as a generator so that
- it's easy to pass to "".join() without building a new list.
-
- This is mainly here for comparison purposes; the _rdumpq version is
- measurably faster as it doesn't have to build intermediate strins.
- """
- if value is None:
- yield "0:~"
- elif value is True:
- yield "4:true!"
- elif value is False:
- yield "5:false!"
- elif isinstance(value,(int,long)):
- data = str(value)
- yield str(len(data))
- yield ":"
- yield data
- yield "#"
- elif isinstance(value,(float,)):
- data = repr(value)
- yield str(len(data))
- yield ":"
- yield data
- yield "^"
- elif isinstance(value,(str,)):
- yield str(len(value))
- yield ":"
- yield value
- yield ","
- elif isinstance(value,(list,tuple,)):
- sub = []
- for item in value:
- sub.extend(_gdumps(item))
- sub = "".join(sub)
- yield str(len(sub))
- yield ":"
- yield sub
- yield "]"
- elif isinstance(value,(dict,)):
- sub = []
- for (k,v) in value.iteritems():
- sub.extend(_gdumps(k))
- sub.extend(_gdumps(v))
- sub = "".join(sub)
- yield str(len(sub))
- yield ":"
- yield sub
- yield "}"
- elif isinstance(value,(unicode,)):
- if encoding is None:
- raise ValueError("must specify encoding to dump unicode strings")
- value = value.encode(encoding)
- yield str(len(value))
- yield ":"
- yield value
- yield ","
- else:
- raise ValueError("unserializable object")
-
-
-def loads(string,encoding=None):
- """loads(string,encoding=None) -> object
-
- This function parses a tnetstring into a python object.
- """
- # No point duplicating effort here. In the C-extension version,
- # loads() is measurably faster then pop() since it can avoid
- # the overhead of building a second string.
- return pop(string,encoding)[0]
-
-
-def load(file,encoding=None):
- """load(file,encoding=None) -> object
-
- This function reads a tnetstring from a file and parses it into a
- python object. The file must support the read() method, and this
- function promises not to read more data than necessary.
- """
- # Read the length prefix one char at a time.
- # Note that the netstring spec explicitly forbids padding zeros.
- c = file.read(1)
- if not c.isdigit():
- raise ValueError("not a tnetstring: missing or invalid length prefix")
- datalen = ord(c) - ord("0")
- c = file.read(1)
- if datalen != 0:
- while c.isdigit():
- datalen = (10 * datalen) + (ord(c) - ord("0"))
- if datalen > 999999999:
- errmsg = "not a tnetstring: absurdly large length prefix"
- raise ValueError(errmsg)
- c = file.read(1)
- if c != ":":
- raise ValueError("not a tnetstring: missing or invalid length prefix")
- # Now we can read and parse the payload.
- # This repeats the dispatch logic of pop() so we can avoid
- # re-constructing the outermost tnetstring.
- data = file.read(datalen)
- if len(data) != datalen:
- raise ValueError("not a tnetstring: length prefix too big")
- type = file.read(1)
- if type == ",":
- if encoding is not None:
- return data.decode(encoding)
- return data
- if type == "#":
- try:
- return int(data)
- except ValueError:
- raise ValueError("not a tnetstring: invalid integer literal")
- if type == "^":
- try:
- return float(data)
- except ValueError:
- raise ValueError("not a tnetstring: invalid float literal")
- if type == "!":
- if data == "true":
- return True
- elif data == "false":
- return False
- else:
- raise ValueError("not a tnetstring: invalid boolean literal")
- if type == "~":
- if data:
- raise ValueError("not a tnetstring: invalid null literal")
- return None
- if type == "]":
- l = []
- while data:
- (item,data) = pop(data,encoding)
- l.append(item)
- return l
- if type == "}":
- d = {}
- while data:
- (key,data) = pop(data,encoding)
- (val,data) = pop(data,encoding)
- d[key] = val
- return d
- raise ValueError("unknown type tag")
-
-
-
-def pop(string,encoding=None):
- """pop(string,encoding=None) -> (object, remain)
-
- This function parses a tnetstring into a python object.
- It returns a tuple giving the parsed object and a string
- containing any unparsed data from the end of the string.
- """
- # Parse out data length, type and remaining string.
- try:
- (dlen,rest) = string.split(":",1)
- dlen = int(dlen)
- except ValueError:
- raise ValueError("not a tnetstring: missing or invalid length prefix")
- try:
- (data,type,remain) = (rest[:dlen],rest[dlen],rest[dlen+1:])
- except IndexError:
- # This fires if len(rest) < dlen, meaning we don't need
- # to further validate that data is the right length.
- raise ValueError("not a tnetstring: invalid length prefix")
- # Parse the data based on the type tag.
- if type == ",":
- if encoding is not None:
- return (data.decode(encoding),remain)
- return (data,remain)
- if type == "#":
- try:
- return (int(data),remain)
- except ValueError:
- raise ValueError("not a tnetstring: invalid integer literal")
- if type == "^":
- try:
- return (float(data),remain)
- except ValueError:
- raise ValueError("not a tnetstring: invalid float literal")
- if type == "!":
- if data == "true":
- return (True,remain)
- elif data == "false":
- return (False,remain)
- else:
- raise ValueError("not a tnetstring: invalid boolean literal")
- if type == "~":
- if data:
- raise ValueError("not a tnetstring: invalid null literal")
- return (None,remain)
- if type == "]":
- l = []
- while data:
- (item,data) = pop(data,encoding)
- l.append(item)
- return (l,remain)
- if type == "}":
- d = {}
- while data:
- (key,data) = pop(data,encoding)
- (val,data) = pop(data,encoding)
- d[key] = val
- return (d,remain)
- raise ValueError("unknown type tag")
-
-
-
-# Use the c-extension version if available
-try:
- import _tnetstring
-except ImportError:
- pass
-else:
- dumps = _tnetstring.dumps
- load = _tnetstring.load
- loads = _tnetstring.loads
- pop = _tnetstring.pop
-
-