diff options
Diffstat (limited to 'netlib/utils.py')
-rw-r--r-- | netlib/utils.py | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/netlib/utils.py b/netlib/utils.py index 86e33f33..39354605 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -1,5 +1,10 @@ from __future__ import (absolute_import, print_function, division) import os.path +import cgi +import urllib +import urlparse +import string + def isascii(s): try: @@ -131,6 +136,81 @@ class Data(object): return fullpath + + +def is_valid_port(port): + if not 0 <= port <= 65535: + return False + return True + + +def is_valid_host(host): + try: + host.decode("idna") + except ValueError: + return False + if "\0" in host: + return None + return True + + +def parse_url(url): + """ + Returns a (scheme, host, port, path) tuple, or None on error. + + Checks that: + port is an integer 0-65535 + host is a valid IDNA-encoded hostname with no null-bytes + path is valid ASCII + """ + try: + scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) + except ValueError: + return None + if not scheme: + return None + if '@' in netloc: + # FIXME: Consider what to do with the discarded credentials here Most + # probably we should extend the signature to return these as a separate + # value. + _, netloc = string.rsplit(netloc, '@', maxsplit=1) + if ':' in netloc: + host, port = string.rsplit(netloc, ':', maxsplit=1) + try: + port = int(port) + except ValueError: + return None + else: + host = netloc + if scheme == "https": + port = 443 + else: + port = 80 + path = urlparse.urlunparse(('', '', path, params, query, fragment)) + if not path.startswith("/"): + path = "/" + path + if not is_valid_host(host): + return None + if not isascii(path): + return None + if not is_valid_port(port): + return None + return scheme, host, port, path + + +def get_header_tokens(headers, key): + """ + Retrieve all tokens for a header key. A number of different headers + follow a pattern where each header line can containe comma-separated + tokens, and headers can be set multiple times. + """ + toks = [] + for i in headers[key]: + for j in i.split(","): + toks.append(j.strip()) + return toks + + def hostport(scheme, host, port): """ Returns the host component, with a port specifcation if needed. @@ -139,3 +219,23 @@ def hostport(scheme, host, port): return host else: return "%s:%s" % (host, port) + +def unparse_url(scheme, host, port, path=""): + """ + Returns a URL string, constructed from the specified compnents. + """ + return "%s://%s%s" % (scheme, hostport(scheme, host, port), path) + + +def urlencode(s): + """ + Takes a list of (key, value) tuples and returns a urlencoded string. + """ + s = [tuple(i) for i in s] + return urllib.urlencode(s, False) + +def urldecode(s): + """ + Takes a urlencoded string and returns a list of (key, value) tuples. + """ + return cgi.parse_qsl(s, keep_blank_values=True) |