From a682074e9ed5e94683389f67cc192e6547d6310e Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 24 Jul 2016 19:06:49 -0700 Subject: improve query/path_components getter/setter --- netlib/http/request.py | 26 ++++++++++++-------------- netlib/http/url.py | 42 +++++++++++++++++++++++++++++++++++++----- netlib/strutils.py | 3 +++ 3 files changed, 52 insertions(+), 19 deletions(-) (limited to 'netlib') diff --git a/netlib/http/request.py b/netlib/http/request.py index ecaa9b79..061217a3 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -253,14 +253,13 @@ class Request(message.Message): ) def _get_query(self): - _, _, _, _, query, _ = urllib.parse.urlparse(self.url) + query = urllib.parse.urlparse(self.url).query return tuple(netlib.http.url.decode(query)) - def _set_query(self, value): - query = netlib.http.url.encode(value) - scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse( - urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) + def _set_query(self, query_data): + query = netlib.http.url.encode(query_data) + _, _, path, params, _, fragment = urllib.parse.urlparse(self.url) + self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment]) @query.setter def query(self, value): @@ -296,19 +295,18 @@ class Request(message.Message): The URL's path components as a tuple of strings. Components are unquoted. """ - _, _, path, _, _, _ = urllib.parse.urlparse(self.url) + path = urllib.parse.urlparse(self.url).path # This needs to be a tuple so that it's immutable. # Otherwise, this would fail silently: # request.path_components.append("foo") - return tuple(urllib.parse.unquote(i) for i in path.split("/") if i) + return tuple(netlib.http.url.unquote(i) for i in path.split("/") if i) @path_components.setter def path_components(self, components): - components = map(lambda x: urllib.parse.quote(x, safe=""), components) + components = map(lambda x: netlib.http.url.quote(x, safe=""), components) path = "/" + "/".join(components) - scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse( - urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) + _, _, _, params, query, fragment = urllib.parse.urlparse(self.url) + self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment]) def anticache(self): """ @@ -365,13 +363,13 @@ class Request(message.Message): pass return () - def _set_urlencoded_form(self, value): + def _set_urlencoded_form(self, form_data): """ Sets the body to the URL-encoded form data, and adds the appropriate content-type header. This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = netlib.http.url.encode(value).encode() + self.content = netlib.http.url.encode(form_data).encode() @urlencoded_form.setter def urlencoded_form(self, value): diff --git a/netlib/http/url.py b/netlib/http/url.py index 1c8c007a..076854b9 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -82,19 +82,51 @@ def unparse(scheme, host, port, path=""): def encode(s): - # type: (six.text_type, bytes) -> str + # type: Sequence[Tuple[str,str]] -> str """ Takes a list of (key, value) tuples and returns a urlencoded string. """ - s = [tuple(i) for i in s] - return urllib.parse.urlencode(s, False) + if six.PY2: + return urllib.parse.urlencode(s, False) + else: + return urllib.parse.urlencode(s, False, errors="surrogateescape") def decode(s): """ - Takes a urlencoded string and returns a list of (key, value) tuples. + Takes a urlencoded string and returns a list of surrogate-escaped (key, value) tuples. + """ + if six.PY2: + return urllib.parse.parse_qsl(s, keep_blank_values=True) + else: + return urllib.parse.parse_qsl(s, keep_blank_values=True, errors='surrogateescape') + + +def quote(b, safe="/"): + """ + Returns: + An ascii-encodable str. + """ + # type: (str) -> str + if six.PY2: + return urllib.parse.quote(b, safe=safe) + else: + return urllib.parse.quote(b, safe=safe, errors="surrogateescape") + + +def unquote(s): """ - return urllib.parse.parse_qsl(s, keep_blank_values=True) + Args: + s: A surrogate-escaped str + Returns: + A surrogate-escaped str + """ + # type: (str) -> str + + if six.PY2: + return urllib.parse.unquote(s) + else: + return urllib.parse.unquote(s, errors="surrogateescape") def hostport(scheme, host, port): diff --git a/netlib/strutils.py b/netlib/strutils.py index 96c8b10f..8f27ebb7 100644 --- a/netlib/strutils.py +++ b/netlib/strutils.py @@ -98,6 +98,9 @@ def bytes_to_escaped_str(data, keep_spacing=False): def escaped_str_to_bytes(data): """ Take an escaped string and return the unescaped bytes equivalent. + + Raises: + ValueError, if the escape sequence is invalid. """ if not isinstance(data, six.string_types): if six.PY2: -- cgit v1.2.3