aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/http1/protocol.py
diff options
context:
space:
mode:
authorThomas Kriechbaumer <thomas@kriechbaumer.name>2015-07-14 23:02:14 +0200
committerThomas Kriechbaumer <thomas@kriechbaumer.name>2015-07-22 15:30:51 +0200
commitf50deb7b763d093a22a4d331e16465a2fb0329cf (patch)
treea11e92fca2a0deffeb801cc6f931bd79aec0d669 /netlib/http/http1/protocol.py
parentbd5ee212840e3be731ea93e14ef1375745383d88 (diff)
downloadmitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.tar.gz
mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.tar.bz2
mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.zip
move bits around
Diffstat (limited to 'netlib/http/http1/protocol.py')
-rw-r--r--netlib/http/http1/protocol.py518
1 files changed, 518 insertions, 0 deletions
diff --git a/netlib/http/http1/protocol.py b/netlib/http/http1/protocol.py
new file mode 100644
index 00000000..0f7a0bd3
--- /dev/null
+++ b/netlib/http/http1/protocol.py
@@ -0,0 +1,518 @@
+from __future__ import (absolute_import, print_function, division)
+import binascii
+import collections
+import string
+import sys
+import urlparse
+
+from netlib import odict, utils, tcp, http
+from .. import status_codes
+from ..exceptions import *
+
+
+def get_request_line(fp):
+ """
+ Get a line, possibly preceded by a blank.
+ """
+ line = fp.readline()
+ if line == "\r\n" or line == "\n":
+ # Possible leftover from previous message
+ line = fp.readline()
+ return line
+
+def read_headers(fp):
+ """
+ Read a set of headers from a file pointer. Stop once a blank line is
+ reached. Return a ODictCaseless object, or None if headers are invalid.
+ """
+ ret = []
+ name = ''
+ while True:
+ line = fp.readline()
+ if not line or line == '\r\n' or line == '\n':
+ break
+ if line[0] in ' \t':
+ if not ret:
+ return None
+ # continued header
+ ret[-1][1] = ret[-1][1] + '\r\n ' + line.strip()
+ else:
+ i = line.find(':')
+ # We're being liberal in what we accept, here.
+ if i > 0:
+ name = line[:i]
+ value = line[i + 1:].strip()
+ ret.append([name, value])
+ else:
+ return None
+ return odict.ODictCaseless(ret)
+
+
+def read_chunked(fp, limit, is_request):
+ """
+ Read a chunked HTTP body.
+
+ May raise HttpError.
+ """
+ # FIXME: Should check if chunked is the final encoding in the headers
+ # http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-16#section-3.3
+ # 3.3 2.
+ total = 0
+ code = 400 if is_request else 502
+ while True:
+ line = fp.readline(128)
+ if line == "":
+ raise HttpErrorConnClosed(code, "Connection closed prematurely")
+ if line != '\r\n' and line != '\n':
+ try:
+ length = int(line, 16)
+ except ValueError:
+ raise HttpError(
+ code,
+ "Invalid chunked encoding length: %s" % line
+ )
+ total += length
+ if limit is not None and total > limit:
+ msg = "HTTP Body too large. Limit is %s," \
+ " chunked content longer than %s" % (limit, total)
+ raise HttpError(code, msg)
+ chunk = fp.read(length)
+ suffix = fp.readline(5)
+ if suffix != '\r\n':
+ raise HttpError(code, "Malformed chunked body")
+ yield line, chunk, '\r\n'
+ if length == 0:
+ return
+
+
+def get_header_tokens(headers, key):
+ """
+ Retrieve all tokens for a header key. A number of different headers
+ follow a pattern where each header line can containe comma-separated
+ tokens, and headers can be set multiple times.
+ """
+ toks = []
+ for i in headers[key]:
+ for j in i.split(","):
+ toks.append(j.strip())
+ return toks
+
+
+def has_chunked_encoding(headers):
+ return "chunked" in [
+ i.lower() for i in get_header_tokens(headers, "transfer-encoding")
+ ]
+
+
+def parse_http_protocol(s):
+ """
+ Parse an HTTP protocol declaration. Returns a (major, minor) tuple, or
+ None.
+ """
+ if not s.startswith("HTTP/"):
+ return None
+ _, version = s.split('/', 1)
+ if "." not in version:
+ return None
+ major, minor = version.split('.', 1)
+ try:
+ major = int(major)
+ minor = int(minor)
+ except ValueError:
+ return None
+ return major, minor
+
+
+def parse_http_basic_auth(s):
+ # TODO: check if this is HTTP/1 only - otherwise move it to netlib.http.semantics
+ words = s.split()
+ if len(words) != 2:
+ return None
+ scheme = words[0]
+ try:
+ user = binascii.a2b_base64(words[1])
+ except binascii.Error:
+ return None
+ parts = user.split(':')
+ if len(parts) != 2:
+ return None
+ return scheme, parts[0], parts[1]
+
+
+def assemble_http_basic_auth(scheme, username, password):
+ # TODO: check if this is HTTP/1 only - otherwise move it to netlib.http.semantics
+ v = binascii.b2a_base64(username + ":" + password)
+ return scheme + " " + v
+
+
+def parse_init(line):
+ try:
+ method, url, protocol = string.split(line)
+ except ValueError:
+ return None
+ httpversion = parse_http_protocol(protocol)
+ if not httpversion:
+ return None
+ if not utils.isascii(method):
+ return None
+ return method, url, httpversion
+
+
+def parse_init_connect(line):
+ """
+ Returns (host, port, httpversion) if line is a valid CONNECT line.
+ http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1
+ """
+ v = parse_init(line)
+ if not v:
+ return None
+ method, url, httpversion = v
+
+ if method.upper() != 'CONNECT':
+ return None
+ try:
+ host, port = url.split(":")
+ except ValueError:
+ return None
+ try:
+ port = int(port)
+ except ValueError:
+ return None
+ if not http.is_valid_port(port):
+ return None
+ if not http.is_valid_host(host):
+ return None
+ return host, port, httpversion
+
+
+def parse_init_proxy(line):
+ v = parse_init(line)
+ if not v:
+ return None
+ method, url, httpversion = v
+
+ parts = http.parse_url(url)
+ if not parts:
+ return None
+ scheme, host, port, path = parts
+ return method, scheme, host, port, path, httpversion
+
+
+def parse_init_http(line):
+ """
+ Returns (method, url, httpversion)
+ """
+ v = parse_init(line)
+ if not v:
+ return None
+ method, url, httpversion = v
+ if not utils.isascii(url):
+ return None
+ if not (url.startswith("/") or url == "*"):
+ return None
+ return method, url, httpversion
+
+
+def connection_close(httpversion, headers):
+ """
+ Checks the message to see if the client connection should be closed
+ according to RFC 2616 Section 8.1 Note that a connection should be
+ closed as well if the response has been read until end of the stream.
+ """
+ # At first, check if we have an explicit Connection header.
+ if "connection" in headers:
+ toks = get_header_tokens(headers, "connection")
+ if "close" in toks:
+ return True
+ elif "keep-alive" in toks:
+ return False
+ # If we don't have a Connection header, HTTP 1.1 connections are assumed to
+ # be persistent
+ if httpversion == (1, 1):
+ return False
+ return True
+
+
+def parse_response_line(line):
+ parts = line.strip().split(" ", 2)
+ if len(parts) == 2: # handle missing message gracefully
+ parts.append("")
+ if len(parts) != 3:
+ return None
+ proto, code, msg = parts
+ try:
+ code = int(code)
+ except ValueError:
+ return None
+ return (proto, code, msg)
+
+
+def read_http_body(*args, **kwargs):
+ return "".join(
+ content for _, content, _ in read_http_body_chunked(*args, **kwargs)
+ )
+
+
+def read_http_body_chunked(
+ rfile,
+ headers,
+ limit,
+ request_method,
+ response_code,
+ is_request,
+ max_chunk_size=None
+):
+ """
+ Read an HTTP message body:
+
+ rfile: A file descriptor to read from
+ headers: An ODictCaseless object
+ limit: Size limit.
+ is_request: True if the body to read belongs to a request, False
+ otherwise
+ """
+ if max_chunk_size is None:
+ max_chunk_size = limit or sys.maxsize
+
+ expected_size = expected_http_body_size(
+ headers, is_request, request_method, response_code
+ )
+
+ if expected_size is None:
+ if has_chunked_encoding(headers):
+ # Python 3: yield from
+ for x in read_chunked(rfile, limit, is_request):
+ yield x
+ else: # pragma: nocover
+ raise HttpError(
+ 400 if is_request else 502,
+ "Content-Length unknown but no chunked encoding"
+ )
+ elif expected_size >= 0:
+ if limit is not None and expected_size > limit:
+ raise HttpError(
+ 400 if is_request else 509,
+ "HTTP Body too large. Limit is %s, content-length was %s" % (
+ limit, expected_size
+ )
+ )
+ bytes_left = expected_size
+ while bytes_left:
+ chunk_size = min(bytes_left, max_chunk_size)
+ yield "", rfile.read(chunk_size), ""
+ bytes_left -= chunk_size
+ else:
+ bytes_left = limit or -1
+ while bytes_left:
+ chunk_size = min(bytes_left, max_chunk_size)
+ content = rfile.read(chunk_size)
+ if not content:
+ return
+ yield "", content, ""
+ bytes_left -= chunk_size
+ not_done = rfile.read(1)
+ if not_done:
+ raise HttpError(
+ 400 if is_request else 509,
+ "HTTP Body too large. Limit is %s," % limit
+ )
+
+
+def expected_http_body_size(headers, is_request, request_method, response_code):
+ """
+ Returns the expected body length:
+ - a positive integer, if the size is known in advance
+ - None, if the size in unknown in advance (chunked encoding or invalid
+ data)
+ - -1, if all data should be read until end of stream.
+
+ May raise HttpError.
+ """
+ # Determine response size according to
+ # http://tools.ietf.org/html/rfc7230#section-3.3
+ if request_method:
+ request_method = request_method.upper()
+
+ if (not is_request and (
+ request_method == "HEAD" or
+ (request_method == "CONNECT" and response_code == 200) or
+ response_code in [204, 304] or
+ 100 <= response_code <= 199)):
+ return 0
+ if has_chunked_encoding(headers):
+ return None
+ if "content-length" in headers:
+ try:
+ size = int(headers["content-length"][0])
+ if size < 0:
+ raise ValueError()
+ return size
+ except ValueError:
+ return None
+ if is_request:
+ return 0
+ return -1
+
+
+# TODO: make this a regular class - just like Response
+Request = collections.namedtuple(
+ "Request",
+ [
+ "form_in",
+ "method",
+ "scheme",
+ "host",
+ "port",
+ "path",
+ "httpversion",
+ "headers",
+ "content"
+ ]
+)
+
+
+def read_request(rfile, include_body=True, body_size_limit=None, wfile=None):
+ """
+ Parse an HTTP request from a file stream
+
+ Args:
+ rfile (file): Input file to read from
+ include_body (bool): Read response body as well
+ body_size_limit (bool): Maximum body size
+ wfile (file): If specified, HTTP Expect headers are handled
+ automatically, by writing a HTTP 100 CONTINUE response to the stream.
+
+ Returns:
+ Request: The HTTP request
+
+ Raises:
+ HttpError: If the input is invalid.
+ """
+ httpversion, host, port, scheme, method, path, headers, content = (
+ None, None, None, None, None, None, None, None)
+
+ request_line = get_request_line(rfile)
+ if not request_line:
+ raise tcp.NetLibDisconnect()
+
+ request_line_parts = parse_init(request_line)
+ if not request_line_parts:
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ method, path, httpversion = request_line_parts
+
+ if path == '*' or path.startswith("/"):
+ form_in = "relative"
+ if not utils.isascii(path):
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ elif method.upper() == 'CONNECT':
+ form_in = "authority"
+ r = parse_init_connect(request_line)
+ if not r:
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ host, port, _ = r
+ path = None
+ else:
+ form_in = "absolute"
+ r = parse_init_proxy(request_line)
+ if not r:
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ _, scheme, host, port, path, _ = r
+
+ headers = read_headers(rfile)
+ if headers is None:
+ raise HttpError(400, "Invalid headers")
+
+ expect_header = headers.get_first("expect", "").lower()
+ if expect_header == "100-continue" and httpversion >= (1, 1):
+ wfile.write(
+ 'HTTP/1.1 100 Continue\r\n'
+ '\r\n'
+ )
+ wfile.flush()
+ del headers['expect']
+
+ if include_body:
+ content = read_http_body(
+ rfile, headers, body_size_limit, method, None, True
+ )
+
+ return Request(
+ form_in,
+ method,
+ scheme,
+ host,
+ port,
+ path,
+ httpversion,
+ headers,
+ content
+ )
+
+
+def read_response(rfile, request_method, body_size_limit, include_body=True):
+ """
+ Returns an http.Response
+
+ By default, both response header and body are read.
+ If include_body=False is specified, content may be one of the
+ following:
+ - None, if the response is technically allowed to have a response body
+ - "", if the response must not have a response body (e.g. it's a
+ response to a HEAD request)
+ """
+
+ line = rfile.readline()
+ # Possible leftover from previous message
+ if line == "\r\n" or line == "\n":
+ line = rfile.readline()
+ if not line:
+ raise HttpErrorConnClosed(502, "Server disconnect.")
+ parts = parse_response_line(line)
+ if not parts:
+ raise HttpError(502, "Invalid server response: %s" % repr(line))
+ proto, code, msg = parts
+ httpversion = parse_http_protocol(proto)
+ if httpversion is None:
+ raise HttpError(502, "Invalid HTTP version in line: %s" % repr(proto))
+ headers = read_headers(rfile)
+ if headers is None:
+ raise HttpError(502, "Invalid headers.")
+
+ if include_body:
+ content = read_http_body(
+ rfile,
+ headers,
+ body_size_limit,
+ request_method,
+ code,
+ False
+ )
+ else:
+ # if include_body==False then a None content means the body should be
+ # read separately
+ content = None
+ return http.Response(httpversion, code, msg, headers, content)
+
+
+def request_preamble(method, resource, http_major="1", http_minor="1"):
+ return '%s %s HTTP/%s.%s' % (
+ method, resource, http_major, http_minor
+ )
+
+
+def response_preamble(code, message=None, http_major="1", http_minor="1"):
+ if message is None:
+ message = status_codes.RESPONSES.get(code)
+ return 'HTTP/%s.%s %s %s' % (http_major, http_minor, code, message)