aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2016-05-31 19:07:55 +1200
committerAldo Cortesi <aldo@nullcube.com>2016-05-31 19:07:55 +1200
commitec34cae6181d6af0150ac730d70b96104a07e9d5 (patch)
tree875987f36e83f92ce9a38cd6e22326a948f29609 /netlib
parent15b2374ef9d6a8cbafdff7c79694921387836ff3 (diff)
downloadmitmproxy-ec34cae6181d6af0150ac730d70b96104a07e9d5.tar.gz
mitmproxy-ec34cae6181d6af0150ac730d70b96104a07e9d5.tar.bz2
mitmproxy-ec34cae6181d6af0150ac730d70b96104a07e9d5.zip
utils.multipartdecode -> http.multipart.decode
also utils.parse_content_type -> http.headers.parse_content_type
Diffstat (limited to 'netlib')
-rw-r--r--netlib/http/headers.py27
-rw-r--r--netlib/http/multipart.py32
-rw-r--r--netlib/http/request.py3
-rw-r--r--netlib/utils.py56
4 files changed, 61 insertions, 57 deletions
diff --git a/netlib/http/headers.py b/netlib/http/headers.py
index 6165fd61..8f669ec1 100644
--- a/netlib/http/headers.py
+++ b/netlib/http/headers.py
@@ -175,3 +175,30 @@ class Headers(MultiDict):
fields.append([name, value])
self.fields = fields
return replacements
+
+
+def parse_content_type(c):
+ """
+ A simple parser for content-type values. Returns a (type, subtype,
+ parameters) tuple, where type and subtype are strings, and parameters
+ is a dict. If the string could not be parsed, return None.
+
+ E.g. the following string:
+
+ text/html; charset=UTF-8
+
+ Returns:
+
+ ("text", "html", {"charset": "UTF-8"})
+ """
+ parts = c.split(";", 1)
+ ts = parts[0].split("/", 1)
+ if len(ts) != 2:
+ return None
+ d = {}
+ if len(parts) == 2:
+ for i in parts[1].split(";"):
+ clause = i.split("=", 1)
+ if len(clause) == 2:
+ d[clause[0].strip()] = clause[1].strip()
+ return ts[0].lower(), ts[1].lower(), d
diff --git a/netlib/http/multipart.py b/netlib/http/multipart.py
new file mode 100644
index 00000000..a135eb86
--- /dev/null
+++ b/netlib/http/multipart.py
@@ -0,0 +1,32 @@
+import re
+
+from . import headers
+
+
+def decode(hdrs, content):
+ """
+ Takes a multipart boundary encoded string and returns list of (key, value) tuples.
+ """
+ v = hdrs.get("content-type")
+ if v:
+ v = headers.parse_content_type(v)
+ if not v:
+ return []
+ try:
+ boundary = v[2]["boundary"].encode("ascii")
+ except (KeyError, UnicodeError):
+ return []
+
+ rx = re.compile(br'\bname="([^"]+)"')
+ r = []
+
+ for i in content.split(b"--" + boundary):
+ parts = i.splitlines()
+ if len(parts) > 1 and parts[0][0:2] != b"--":
+ match = rx.search(parts[1])
+ if match:
+ key = match.group(1)
+ value = b"".join(parts[3 + parts[2:].index(b""):])
+ r.append((key, value))
+ return r
+ return []
diff --git a/netlib/http/request.py b/netlib/http/request.py
index d552bc70..2fcea67d 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -7,6 +7,7 @@ from six.moves import urllib
from netlib import utils
import netlib.http.url
+from netlib.http import multipart
from . import cookies
from .. import encoding
from ..multidict import MultiDictView
@@ -369,7 +370,7 @@ class Request(Message):
def _get_multipart_form(self):
is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
- return utils.multipartdecode(self.headers, self.content)
+ return multipart.decode(self.headers, self.content)
return ()
def _set_multipart_form(self, value):
diff --git a/netlib/utils.py b/netlib/utils.py
index a2d8c97d..a0150e77 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -190,62 +190,6 @@ def hostport(scheme, host, port):
return "%s:%d" % (host, port)
-def parse_content_type(c):
- """
- A simple parser for content-type values. Returns a (type, subtype,
- parameters) tuple, where type and subtype are strings, and parameters
- is a dict. If the string could not be parsed, return None.
-
- E.g. the following string:
-
- text/html; charset=UTF-8
-
- Returns:
-
- ("text", "html", {"charset": "UTF-8"})
- """
- parts = c.split(";", 1)
- ts = parts[0].split("/", 1)
- if len(ts) != 2:
- return None
- d = {}
- if len(parts) == 2:
- for i in parts[1].split(";"):
- clause = i.split("=", 1)
- if len(clause) == 2:
- d[clause[0].strip()] = clause[1].strip()
- return ts[0].lower(), ts[1].lower(), d
-
-
-def multipartdecode(headers, content):
- """
- Takes a multipart boundary encoded string and returns list of (key, value) tuples.
- """
- v = headers.get("content-type")
- if v:
- v = parse_content_type(v)
- if not v:
- return []
- try:
- boundary = v[2]["boundary"].encode("ascii")
- except (KeyError, UnicodeError):
- return []
-
- rx = re.compile(br'\bname="([^"]+)"')
- r = []
-
- for i in content.split(b"--" + boundary):
- parts = i.splitlines()
- if len(parts) > 1 and parts[0][0:2] != b"--":
- match = rx.search(parts[1])
- if match:
- key = match.group(1)
- value = b"".join(parts[3 + parts[2:].index(b""):])
- r.append((key, value))
- return r
- return []
-
-
def safe_subn(pattern, repl, target, *args, **kwargs):
"""
There are Unicode conversion problems with re.subn. We try to smooth