1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
from __future__ import (absolute_import, print_function, division)
import binascii
import collections
import string
import sys
import urlparse
from .. import utils
class Response(object):
def __init__(
self,
httpversion,
status_code,
msg,
headers,
content,
sslinfo=None,
):
self.httpversion = httpversion
self.status_code = status_code
self.msg = msg
self.headers = headers
self.content = content
self.sslinfo = sslinfo
def __eq__(self, other):
return self.__dict__ == other.__dict__
def __repr__(self):
return "Response(%s - %s)" % (self.status_code, self.msg)
def is_valid_port(port):
if not 0 <= port <= 65535:
return False
return True
def is_valid_host(host):
try:
host.decode("idna")
except ValueError:
return False
if "\0" in host:
return None
return True
def parse_url(url):
"""
Returns a (scheme, host, port, path) tuple, or None on error.
Checks that:
port is an integer 0-65535
host is a valid IDNA-encoded hostname with no null-bytes
path is valid ASCII
"""
try:
scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
except ValueError:
return None
if not scheme:
return None
if '@' in netloc:
# FIXME: Consider what to do with the discarded credentials here Most
# probably we should extend the signature to return these as a separate
# value.
_, netloc = string.rsplit(netloc, '@', maxsplit=1)
if ':' in netloc:
host, port = string.rsplit(netloc, ':', maxsplit=1)
try:
port = int(port)
except ValueError:
return None
else:
host = netloc
if scheme == "https":
port = 443
else:
port = 80
path = urlparse.urlunparse(('', '', path, params, query, fragment))
if not path.startswith("/"):
path = "/" + path
if not is_valid_host(host):
return None
if not utils.isascii(path):
return None
if not is_valid_port(port):
return None
return scheme, host, port, path
|