From 4e6c9c4e935458d23add259dc63c5e0a85fba9c8 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 18:42:56 +1200 Subject: Extract url functions from netlib.utils and move to netlib.http.url --- netlib/http/url.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 netlib/http/url.py (limited to 'netlib/http/url.py') diff --git a/netlib/http/url.py b/netlib/http/url.py new file mode 100644 index 00000000..3961998b --- /dev/null +++ b/netlib/http/url.py @@ -0,0 +1,95 @@ +import six +from six.moves import urllib + +from .. import utils + +# PY2 workaround +def decode_parse_result(result, enc): + if hasattr(result, "decode"): + return result.decode(enc) + else: + return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) + + +# PY2 workaround +def encode_parse_result(result, enc): + if hasattr(result, "encode"): + return result.encode(enc) + else: + return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) + + +def parse_url(url): + """ + URL-parsing function that checks that + - port is an integer 0-65535 + - host is a valid IDNA-encoded hostname with no null-bytes + - path is valid ASCII + + Args: + A URL (as bytes or as unicode) + + Returns: + A (scheme, host, port, path) tuple + + Raises: + ValueError, if the URL is not properly formatted. + """ + parsed = urllib.parse.urlparse(url) + + if not parsed.hostname: + raise ValueError("No hostname given") + + if isinstance(url, six.binary_type): + host = parsed.hostname + + # this should not raise a ValueError, + # but we try to be very forgiving here and accept just everything. + # decode_parse_result(parsed, "ascii") + else: + host = parsed.hostname.encode("idna") + parsed = encode_parse_result(parsed, "ascii") + + port = parsed.port + if not port: + port = 443 if parsed.scheme == b"https" else 80 + + full_path = urllib.parse.urlunparse( + (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) + ) + if not full_path.startswith(b"/"): + full_path = b"/" + full_path + + if not utils.is_valid_host(host): + raise ValueError("Invalid Host") + if not utils.is_valid_port(port): + raise ValueError("Invalid Port") + + return parsed.scheme, host, port, full_path + + +def unparse_url(scheme, host, port, path=""): + """ + Returns a URL string, constructed from the specified components. + + Args: + All args must be str. + """ + if path == "*": + path = "" + return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path) + + +def urlencode(s): + """ + Takes a list of (key, value) tuples and returns a urlencoded string. + """ + s = [tuple(i) for i in s] + return urllib.parse.urlencode(s, False) + + +def urldecode(s): + """ + Takes a urlencoded string and returns a list of (key, value) tuples. + """ + return urllib.parse.parse_qsl(s, keep_blank_values=True) -- cgit v1.2.3 From 6dda2b2ee544c3890f04b7bf99272998e29992b6 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 18:46:19 +1200 Subject: Module is part of the name - url.decode, not url.urldecode A pattern we need to use far more often in the codebase --- netlib/http/url.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'netlib/http/url.py') diff --git a/netlib/http/url.py b/netlib/http/url.py index 3961998b..147ed572 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -19,7 +19,7 @@ def encode_parse_result(result, enc): return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) -def parse_url(url): +def parse(url): """ URL-parsing function that checks that - port is an integer 0-65535 @@ -68,7 +68,7 @@ def parse_url(url): return parsed.scheme, host, port, full_path -def unparse_url(scheme, host, port, path=""): +def unparse(scheme, host, port, path=""): """ Returns a URL string, constructed from the specified components. @@ -80,7 +80,7 @@ def unparse_url(scheme, host, port, path=""): return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path) -def urlencode(s): +def encode(s): """ Takes a list of (key, value) tuples and returns a urlencoded string. """ @@ -88,7 +88,7 @@ def urlencode(s): return urllib.parse.urlencode(s, False) -def urldecode(s): +def decode(s): """ Takes a urlencoded string and returns a list of (key, value) tuples. """ -- cgit v1.2.3 From 40a030f215e1943aefdb2eb6fe2a264b9b1ee33c Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 31 May 2016 19:58:28 +1200 Subject: Satisfy flake8 --- netlib/http/url.py | 1 + 1 file changed, 1 insertion(+) (limited to 'netlib/http/url.py') diff --git a/netlib/http/url.py b/netlib/http/url.py index 147ed572..8ce28578 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -3,6 +3,7 @@ from six.moves import urllib from .. import utils + # PY2 workaround def decode_parse_result(result, enc): if hasattr(result, "decode"): -- cgit v1.2.3