aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2016-07-24 21:17:35 -0700
committerGitHub <noreply@github.com>2016-07-24 21:17:35 -0700
commit56796aeda25dda66621ce78af227ff46049ef811 (patch)
treec4e4960a455ad35a60b5f1094fc0402164f55101 /netlib
parentf9edffc58e5198d7995c3652acee9116ae9fe7d8 (diff)
parent013288f7caa1f90a8313bbda8cedb7294d219066 (diff)
downloadmitmproxy-56796aeda25dda66621ce78af227ff46049ef811.tar.gz
mitmproxy-56796aeda25dda66621ce78af227ff46049ef811.tar.bz2
mitmproxy-56796aeda25dda66621ce78af227ff46049ef811.zip
Merge pull request #1426 from mhils/query-encoding
Fix query/path_components encoding
Diffstat (limited to 'netlib')
-rw-r--r--netlib/http/request.py26
-rw-r--r--netlib/http/url.py42
-rw-r--r--netlib/strutils.py3
3 files changed, 52 insertions, 19 deletions
diff --git a/netlib/http/request.py b/netlib/http/request.py
index ecaa9b79..061217a3 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -253,14 +253,13 @@ class Request(message.Message):
)
def _get_query(self):
- _, _, _, _, query, _ = urllib.parse.urlparse(self.url)
+ query = urllib.parse.urlparse(self.url).query
return tuple(netlib.http.url.decode(query))
- def _set_query(self, value):
- query = netlib.http.url.encode(value)
- scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = netlib.http.url.parse(
- urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
+ def _set_query(self, query_data):
+ query = netlib.http.url.encode(query_data)
+ _, _, path, params, _, fragment = urllib.parse.urlparse(self.url)
+ self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
@query.setter
def query(self, value):
@@ -296,19 +295,18 @@ class Request(message.Message):
The URL's path components as a tuple of strings.
Components are unquoted.
"""
- _, _, path, _, _, _ = urllib.parse.urlparse(self.url)
+ path = urllib.parse.urlparse(self.url).path
# This needs to be a tuple so that it's immutable.
# Otherwise, this would fail silently:
# request.path_components.append("foo")
- return tuple(urllib.parse.unquote(i) for i in path.split("/") if i)
+ return tuple(netlib.http.url.unquote(i) for i in path.split("/") if i)
@path_components.setter
def path_components(self, components):
- components = map(lambda x: urllib.parse.quote(x, safe=""), components)
+ components = map(lambda x: netlib.http.url.quote(x, safe=""), components)
path = "/" + "/".join(components)
- scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = netlib.http.url.parse(
- urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
+ _, _, _, params, query, fragment = urllib.parse.urlparse(self.url)
+ self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
def anticache(self):
"""
@@ -365,13 +363,13 @@ class Request(message.Message):
pass
return ()
- def _set_urlencoded_form(self, value):
+ def _set_urlencoded_form(self, form_data):
"""
Sets the body to the URL-encoded form data, and adds the appropriate content-type header.
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
- self.content = netlib.http.url.encode(value).encode()
+ self.content = netlib.http.url.encode(form_data).encode()
@urlencoded_form.setter
def urlencoded_form(self, value):
diff --git a/netlib/http/url.py b/netlib/http/url.py
index 1c8c007a..076854b9 100644
--- a/netlib/http/url.py
+++ b/netlib/http/url.py
@@ -82,19 +82,51 @@ def unparse(scheme, host, port, path=""):
def encode(s):
- # type: (six.text_type, bytes) -> str
+ # type: Sequence[Tuple[str,str]] -> str
"""
Takes a list of (key, value) tuples and returns a urlencoded string.
"""
- s = [tuple(i) for i in s]
- return urllib.parse.urlencode(s, False)
+ if six.PY2:
+ return urllib.parse.urlencode(s, False)
+ else:
+ return urllib.parse.urlencode(s, False, errors="surrogateescape")
def decode(s):
"""
- Takes a urlencoded string and returns a list of (key, value) tuples.
+ Takes a urlencoded string and returns a list of surrogate-escaped (key, value) tuples.
+ """
+ if six.PY2:
+ return urllib.parse.parse_qsl(s, keep_blank_values=True)
+ else:
+ return urllib.parse.parse_qsl(s, keep_blank_values=True, errors='surrogateescape')
+
+
+def quote(b, safe="/"):
+ """
+ Returns:
+ An ascii-encodable str.
+ """
+ # type: (str) -> str
+ if six.PY2:
+ return urllib.parse.quote(b, safe=safe)
+ else:
+ return urllib.parse.quote(b, safe=safe, errors="surrogateescape")
+
+
+def unquote(s):
"""
- return urllib.parse.parse_qsl(s, keep_blank_values=True)
+ Args:
+ s: A surrogate-escaped str
+ Returns:
+ A surrogate-escaped str
+ """
+ # type: (str) -> str
+
+ if six.PY2:
+ return urllib.parse.unquote(s)
+ else:
+ return urllib.parse.unquote(s, errors="surrogateescape")
def hostport(scheme, host, port):
diff --git a/netlib/strutils.py b/netlib/strutils.py
index 96c8b10f..8f27ebb7 100644
--- a/netlib/strutils.py
+++ b/netlib/strutils.py
@@ -98,6 +98,9 @@ def bytes_to_escaped_str(data, keep_spacing=False):
def escaped_str_to_bytes(data):
"""
Take an escaped string and return the unescaped bytes equivalent.
+
+ Raises:
+ ValueError, if the escape sequence is invalid.
"""
if not isinstance(data, six.string_types):
if six.PY2: