From a682074e9ed5e94683389f67cc192e6547d6310e Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 24 Jul 2016 19:06:49 -0700 Subject: improve query/path_components getter/setter --- netlib/http/request.py | 26 ++++++++++++-------------- netlib/http/url.py | 42 +++++++++++++++++++++++++++++++++++++----- netlib/strutils.py | 3 +++ 3 files changed, 52 insertions(+), 19 deletions(-) diff --git a/netlib/http/request.py b/netlib/http/request.py index ecaa9b79..061217a3 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -253,14 +253,13 @@ class Request(message.Message): ) def _get_query(self): - _, _, _, _, query, _ = urllib.parse.urlparse(self.url) + query = urllib.parse.urlparse(self.url).query return tuple(netlib.http.url.decode(query)) - def _set_query(self, value): - query = netlib.http.url.encode(value) - scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse( - urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) + def _set_query(self, query_data): + query = netlib.http.url.encode(query_data) + _, _, path, params, _, fragment = urllib.parse.urlparse(self.url) + self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment]) @query.setter def query(self, value): @@ -296,19 +295,18 @@ class Request(message.Message): The URL's path components as a tuple of strings. Components are unquoted. """ - _, _, path, _, _, _ = urllib.parse.urlparse(self.url) + path = urllib.parse.urlparse(self.url).path # This needs to be a tuple so that it's immutable. # Otherwise, this would fail silently: # request.path_components.append("foo") - return tuple(urllib.parse.unquote(i) for i in path.split("/") if i) + return tuple(netlib.http.url.unquote(i) for i in path.split("/") if i) @path_components.setter def path_components(self, components): - components = map(lambda x: urllib.parse.quote(x, safe=""), components) + components = map(lambda x: netlib.http.url.quote(x, safe=""), components) path = "/" + "/".join(components) - scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse( - urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) + _, _, _, params, query, fragment = urllib.parse.urlparse(self.url) + self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment]) def anticache(self): """ @@ -365,13 +363,13 @@ class Request(message.Message): pass return () - def _set_urlencoded_form(self, value): + def _set_urlencoded_form(self, form_data): """ Sets the body to the URL-encoded form data, and adds the appropriate content-type header. This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = netlib.http.url.encode(value).encode() + self.content = netlib.http.url.encode(form_data).encode() @urlencoded_form.setter def urlencoded_form(self, value): diff --git a/netlib/http/url.py b/netlib/http/url.py index 1c8c007a..076854b9 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -82,19 +82,51 @@ def unparse(scheme, host, port, path=""): def encode(s): - # type: (six.text_type, bytes) -> str + # type: Sequence[Tuple[str,str]] -> str """ Takes a list of (key, value) tuples and returns a urlencoded string. """ - s = [tuple(i) for i in s] - return urllib.parse.urlencode(s, False) + if six.PY2: + return urllib.parse.urlencode(s, False) + else: + return urllib.parse.urlencode(s, False, errors="surrogateescape") def decode(s): """ - Takes a urlencoded string and returns a list of (key, value) tuples. + Takes a urlencoded string and returns a list of surrogate-escaped (key, value) tuples. + """ + if six.PY2: + return urllib.parse.parse_qsl(s, keep_blank_values=True) + else: + return urllib.parse.parse_qsl(s, keep_blank_values=True, errors='surrogateescape') + + +def quote(b, safe="/"): + """ + Returns: + An ascii-encodable str. + """ + # type: (str) -> str + if six.PY2: + return urllib.parse.quote(b, safe=safe) + else: + return urllib.parse.quote(b, safe=safe, errors="surrogateescape") + + +def unquote(s): """ - return urllib.parse.parse_qsl(s, keep_blank_values=True) + Args: + s: A surrogate-escaped str + Returns: + A surrogate-escaped str + """ + # type: (str) -> str + + if six.PY2: + return urllib.parse.unquote(s) + else: + return urllib.parse.unquote(s, errors="surrogateescape") def hostport(scheme, host, port): diff --git a/netlib/strutils.py b/netlib/strutils.py index 96c8b10f..8f27ebb7 100644 --- a/netlib/strutils.py +++ b/netlib/strutils.py @@ -98,6 +98,9 @@ def bytes_to_escaped_str(data, keep_spacing=False): def escaped_str_to_bytes(data): """ Take an escaped string and return the unescaped bytes equivalent. + + Raises: + ValueError, if the escape sequence is invalid. """ if not isinstance(data, six.string_types): if six.PY2: -- cgit v1.2.3 From 013288f7caa1f90a8313bbda8cedb7294d219066 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 24 Jul 2016 19:08:45 -0700 Subject: add quote/unquote tests --- test/netlib/http/test_url.py | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py index 26b37230..768e5130 100644 --- a/test/netlib/http/test_url.py +++ b/test/netlib/http/test_url.py @@ -1,3 +1,4 @@ +import six from netlib import tutils from netlib.http import url @@ -57,10 +58,49 @@ def test_unparse(): assert url.unparse("https", "foo.com", 443, "") == "https://foo.com" -def test_urlencode(): +if six.PY2: + surrogates = bytes(bytearray(range(256))) +else: + surrogates = bytes(range(256)).decode("utf8", "surrogateescape") + +surrogates_quoted = ( + '%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F' + '%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F' + '%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-./' + '0123456789%3A%3B%3C%3D%3E%3F' + '%40ABCDEFGHIJKLMNO' + 'PQRSTUVWXYZ%5B%5C%5D%5E_' + '%60abcdefghijklmno' + 'pqrstuvwxyz%7B%7C%7D%7E%7F' + '%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F' + '%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F' + '%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF' + '%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF' + '%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF' + '%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF' + '%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF' + '%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF' +) + + +def test_encode(): assert url.encode([('foo', 'bar')]) + assert url.encode([('foo', surrogates)]) -def test_urldecode(): +def test_decode(): s = "one=two&three=four" assert len(url.decode(s)) == 2 + assert url.decode(surrogates) + + +def test_quote(): + assert url.quote("foo") == "foo" + assert url.quote("foo bar") == "foo%20bar" + assert url.quote(surrogates) == surrogates_quoted + + +def test_unquote(): + assert url.unquote("foo") == "foo" + assert url.unquote("foo%20bar") == "foo bar" + assert url.unquote(surrogates_quoted) == surrogates -- cgit v1.2.3