aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'netlib/utils.py')
-rw-r--r--netlib/utils.py100
1 files changed, 100 insertions, 0 deletions
diff --git a/netlib/utils.py b/netlib/utils.py
index 86e33f33..39354605 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -1,5 +1,10 @@
from __future__ import (absolute_import, print_function, division)
import os.path
+import cgi
+import urllib
+import urlparse
+import string
+
def isascii(s):
try:
@@ -131,6 +136,81 @@ class Data(object):
return fullpath
+
+
+def is_valid_port(port):
+ if not 0 <= port <= 65535:
+ return False
+ return True
+
+
+def is_valid_host(host):
+ try:
+ host.decode("idna")
+ except ValueError:
+ return False
+ if "\0" in host:
+ return None
+ return True
+
+
+def parse_url(url):
+ """
+ Returns a (scheme, host, port, path) tuple, or None on error.
+
+ Checks that:
+ port is an integer 0-65535
+ host is a valid IDNA-encoded hostname with no null-bytes
+ path is valid ASCII
+ """
+ try:
+ scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
+ except ValueError:
+ return None
+ if not scheme:
+ return None
+ if '@' in netloc:
+ # FIXME: Consider what to do with the discarded credentials here Most
+ # probably we should extend the signature to return these as a separate
+ # value.
+ _, netloc = string.rsplit(netloc, '@', maxsplit=1)
+ if ':' in netloc:
+ host, port = string.rsplit(netloc, ':', maxsplit=1)
+ try:
+ port = int(port)
+ except ValueError:
+ return None
+ else:
+ host = netloc
+ if scheme == "https":
+ port = 443
+ else:
+ port = 80
+ path = urlparse.urlunparse(('', '', path, params, query, fragment))
+ if not path.startswith("/"):
+ path = "/" + path
+ if not is_valid_host(host):
+ return None
+ if not isascii(path):
+ return None
+ if not is_valid_port(port):
+ return None
+ return scheme, host, port, path
+
+
+def get_header_tokens(headers, key):
+ """
+ Retrieve all tokens for a header key. A number of different headers
+ follow a pattern where each header line can containe comma-separated
+ tokens, and headers can be set multiple times.
+ """
+ toks = []
+ for i in headers[key]:
+ for j in i.split(","):
+ toks.append(j.strip())
+ return toks
+
+
def hostport(scheme, host, port):
"""
Returns the host component, with a port specifcation if needed.
@@ -139,3 +219,23 @@ def hostport(scheme, host, port):
return host
else:
return "%s:%s" % (host, port)
+
+def unparse_url(scheme, host, port, path=""):
+ """
+ Returns a URL string, constructed from the specified compnents.
+ """
+ return "%s://%s%s" % (scheme, hostport(scheme, host, port), path)
+
+
+def urlencode(s):
+ """
+ Takes a list of (key, value) tuples and returns a urlencoded string.
+ """
+ s = [tuple(i) for i in s]
+ return urllib.urlencode(s, False)
+
+def urldecode(s):
+ """
+ Takes a urlencoded string and returns a list of (key, value) tuples.
+ """
+ return cgi.parse_qsl(s, keep_blank_values=True)