move bits around

author: Thomas Kriechbaumer <thomas@kriechbaumer.name> 2015-07-14 23:02:14 +0200
committer: Thomas Kriechbaumer <thomas@kriechbaumer.name> 2015-07-22 15:30:51 +0200
commit: f50deb7b763d093a22a4d331e16465a2fb0329cf (patch)
tree: a11e92fca2a0deffeb801cc6f931bd79aec0d669 /netlib/http/http1/protocol.py
parent: bd5ee212840e3be731ea93e14ef1375745383d88 (diff)
download: mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.tar.gz
mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.tar.bz2
mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.zip
1 files changed, 518 insertions, 0 deletions
diff --git a/netlib/http/http1/protocol.py b/netlib/http/http1/protocol.py
new file mode 100644
index 00000000..0f7a0bd3
--- /dev/null
+++ b/netlib/http/http1/protocol.py
@@ -0,0 +1,518 @@
+from __future__ import (absolute_import, print_function, division)
+import binascii
+import collections
+import string
+import sys
+import urlparse
+
+from netlib import odict, utils, tcp, http
+from .. import status_codes
+from ..exceptions import *
+
+
+def get_request_line(fp):
+    """
+        Get a line, possibly preceded by a blank.
+    """
+    line = fp.readline()
+    if line == "\r\n" or line == "\n":
+        # Possible leftover from previous message
+        line = fp.readline()
+    return line
+
+def read_headers(fp):
+    """
+        Read a set of headers from a file pointer. Stop once a blank line is
+        reached. Return a ODictCaseless object, or None if headers are invalid.
+    """
+    ret = []
+    name = ''
+    while True:
+        line = fp.readline()
+        if not line or line == '\r\n' or line == '\n':
+            break
+        if line[0] in ' \t':
+            if not ret:
+                return None
+            # continued header
+            ret[-1][1] = ret[-1][1] + '\r\n ' + line.strip()
+        else:
+            i = line.find(':')
+            # We're being liberal in what we accept, here.
+            if i > 0:
+                name = line[:i]
+                value = line[i + 1:].strip()
+                ret.append([name, value])
+            else:
+                return None
+    return odict.ODictCaseless(ret)
+
+
+def read_chunked(fp, limit, is_request):
+    """
+        Read a chunked HTTP body.
+
+        May raise HttpError.
+    """
+    # FIXME: Should check if chunked is the final encoding in the headers
+    # http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-16#section-3.3
+    # 3.3 2.
+    total = 0
+    code = 400 if is_request else 502
+    while True:
+        line = fp.readline(128)
+        if line == "":
+            raise HttpErrorConnClosed(code, "Connection closed prematurely")
+        if line != '\r\n' and line != '\n':
+            try:
+                length = int(line, 16)
+            except ValueError:
+                raise HttpError(
+                    code,
+                    "Invalid chunked encoding length: %s" % line
+                )
+            total += length
+            if limit is not None and total > limit:
+                msg = "HTTP Body too large. Limit is %s," \
+                      " chunked content longer than %s" % (limit, total)
+                raise HttpError(code, msg)
+            chunk = fp.read(length)
+            suffix = fp.readline(5)
+            if suffix != '\r\n':
+                raise HttpError(code, "Malformed chunked body")
+            yield line, chunk, '\r\n'
+            if length == 0:
+                return
+
+
+def get_header_tokens(headers, key):
+    """
+        Retrieve all tokens for a header key. A number of different headers
+        follow a pattern where each header line can containe comma-separated
+        tokens, and headers can be set multiple times.
+    """
+    toks = []
+    for i in headers[key]:
+        for j in i.split(","):
+            toks.append(j.strip())
+    return toks
+
+
+def has_chunked_encoding(headers):
+    return "chunked" in [
+        i.lower() for i in get_header_tokens(headers, "transfer-encoding")
+    ]
+
+
+def parse_http_protocol(s):
+    """
+        Parse an HTTP protocol declaration. Returns a (major, minor) tuple, or
+        None.
+    """
+    if not s.startswith("HTTP/"):
+        return None
+    _, version = s.split('/', 1)
+    if "." not in version:
+        return None
+    major, minor = version.split('.', 1)
+    try:
+        major = int(major)
+        minor = int(minor)
+    except ValueError:
+        return None
+    return major, minor
+
+
+def parse_http_basic_auth(s):
+    # TODO: check if this is HTTP/1 only - otherwise move it to netlib.http.semantics
+    words = s.split()
+    if len(words) != 2:
+        return None
+    scheme = words[0]
+    try:
+        user = binascii.a2b_base64(words[1])
+    except binascii.Error:
+        return None
+    parts = user.split(':')
+    if len(parts) != 2:
+        return None
+    return scheme, parts[0], parts[1]
+
+
+def assemble_http_basic_auth(scheme, username, password):
+    # TODO: check if this is HTTP/1 only - otherwise move it to netlib.http.semantics
+    v = binascii.b2a_base64(username + ":" + password)
+    return scheme + " " + v
+
+
+def parse_init(line):
+    try:
+        method, url, protocol = string.split(line)
+    except ValueError:
+        return None
+    httpversion = parse_http_protocol(protocol)
+    if not httpversion:
+        return None
+    if not utils.isascii(method):
+        return None
+    return method, url, httpversion
+
+
+def parse_init_connect(line):
+    """
+        Returns (host, port, httpversion) if line is a valid CONNECT line.
+        http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1
+    """
+    v = parse_init(line)
+    if not v:
+        return None
+    method, url, httpversion = v
+
+    if method.upper() != 'CONNECT':
+        return None
+    try:
+        host, port = url.split(":")
+    except ValueError:
+        return None
+    try:
+        port = int(port)
+    except ValueError:
+        return None
+    if not http.is_valid_port(port):
+        return None
+    if not http.is_valid_host(host):
+        return None
+    return host, port, httpversion
+
+
+def parse_init_proxy(line):
+    v = parse_init(line)
+    if not v:
+        return None
+    method, url, httpversion = v
+
+    parts = http.parse_url(url)
+    if not parts:
+        return None
+    scheme, host, port, path = parts
+    return method, scheme, host, port, path, httpversion
+
+
+def parse_init_http(line):
+    """
+        Returns (method, url, httpversion)
+    """
+    v = parse_init(line)
+    if not v:
+        return None
+    method, url, httpversion = v
+    if not utils.isascii(url):
+        return None
+    if not (url.startswith("/") or url == "*"):
+        return None
+    return method, url, httpversion
+
+
+def connection_close(httpversion, headers):
+    """
+        Checks the message to see if the client connection should be closed
+        according to RFC 2616 Section 8.1 Note that a connection should be
+        closed as well if the response has been read until end of the stream.
+    """
+    # At first, check if we have an explicit Connection header.
+    if "connection" in headers:
+        toks = get_header_tokens(headers, "connection")
+        if "close" in toks:
+            return True
+        elif "keep-alive" in toks:
+            return False
+    # If we don't have a Connection header, HTTP 1.1 connections are assumed to
+    # be persistent
+    if httpversion == (1, 1):
+        return False
+    return True
+
+
+def parse_response_line(line):
+    parts = line.strip().split(" ", 2)
+    if len(parts) == 2:  # handle missing message gracefully
+        parts.append("")
+    if len(parts) != 3:
+        return None
+    proto, code, msg = parts
+    try:
+        code = int(code)
+    except ValueError:
+        return None
+    return (proto, code, msg)
+
+
+def read_http_body(*args, **kwargs):
+    return "".join(
+        content for _, content, _ in read_http_body_chunked(*args, **kwargs)
+    )
+
+
+def read_http_body_chunked(
+    rfile,
+    headers,
+    limit,
+    request_method,
+    response_code,
+    is_request,
+    max_chunk_size=None
+):
+    """
+        Read an HTTP message body:
+
+            rfile: A file descriptor to read from
+            headers: An ODictCaseless object
+            limit: Size limit.
+            is_request: True if the body to read belongs to a request, False
+            otherwise
+    """
+    if max_chunk_size is None:
+        max_chunk_size = limit or sys.maxsize
+
+    expected_size = expected_http_body_size(
+        headers, is_request, request_method, response_code
+    )
+
+    if expected_size is None:
+        if has_chunked_encoding(headers):
+            # Python 3: yield from
+            for x in read_chunked(rfile, limit, is_request):
+                yield x
+        else:  # pragma: nocover
+            raise HttpError(
+                400 if is_request else 502,
+                "Content-Length unknown but no chunked encoding"
+            )
+    elif expected_size >= 0:
+        if limit is not None and expected_size > limit:
+            raise HttpError(
+                400 if is_request else 509,
+                "HTTP Body too large. Limit is %s, content-length was %s" % (
+                    limit, expected_size
+                )
+            )
+        bytes_left = expected_size
+        while bytes_left:
+            chunk_size = min(bytes_left, max_chunk_size)
+            yield "", rfile.read(chunk_size), ""
+            bytes_left -= chunk_size
+    else:
+        bytes_left = limit or -1
+        while bytes_left:
+            chunk_size = min(bytes_left, max_chunk_size)
+            content = rfile.read(chunk_size)
+            if not content:
+                return
+            yield "", content, ""
+            bytes_left -= chunk_size
+        not_done = rfile.read(1)
+        if not_done:
+            raise HttpError(
+                400 if is_request else 509,
+                "HTTP Body too large. Limit is %s," % limit
+            )
+
+
+def expected_http_body_size(headers, is_request, request_method, response_code):
+    """
+        Returns the expected body length:
+         - a positive integer, if the size is known in advance
+         - None, if the size in unknown in advance (chunked encoding or invalid
+         data)
+         - -1, if all data should be read until end of stream.
+
+        May raise HttpError.
+    """
+    # Determine response size according to
+    # http://tools.ietf.org/html/rfc7230#section-3.3
+    if request_method:
+        request_method = request_method.upper()
+
+    if (not is_request and (
+            request_method == "HEAD" or
+            (request_method == "CONNECT" and response_code == 200) or
+            response_code in [204, 304] or
+            100 <= response_code <= 199)):
+        return 0
+    if has_chunked_encoding(headers):
+        return None
+    if "content-length" in headers:
+        try:
+            size = int(headers["content-length"][0])
+            if size < 0:
+                raise ValueError()
+            return size
+        except ValueError:
+            return None
+    if is_request:
+        return 0
+    return -1
+
+
+# TODO: make this a regular class - just like Response
+Request = collections.namedtuple(
+    "Request",
+    [
+        "form_in",
+        "method",
+        "scheme",
+        "host",
+        "port",
+        "path",
+        "httpversion",
+        "headers",
+        "content"
+    ]
+)
+
+
+def read_request(rfile, include_body=True, body_size_limit=None, wfile=None):
+    """
+    Parse an HTTP request from a file stream
+
+    Args:
+        rfile (file): Input file to read from
+        include_body (bool): Read response body as well
+        body_size_limit (bool): Maximum body size
+        wfile (file): If specified, HTTP Expect headers are handled
+        automatically, by writing a HTTP 100 CONTINUE response to the stream.
+
+    Returns:
+        Request: The HTTP request
+
+    Raises:
+        HttpError: If the input is invalid.
+    """
+    httpversion, host, port, scheme, method, path, headers, content = (
+        None, None, None, None, None, None, None, None)
+
+    request_line = get_request_line(rfile)
+    if not request_line:
+        raise tcp.NetLibDisconnect()
+
+    request_line_parts = parse_init(request_line)
+    if not request_line_parts:
+        raise HttpError(
+            400,
+            "Bad HTTP request line: %s" % repr(request_line)
+        )
+    method, path, httpversion = request_line_parts
+
+    if path == '*' or path.startswith("/"):
+        form_in = "relative"
+        if not utils.isascii(path):
+            raise HttpError(
+                400,
+                "Bad HTTP request line: %s" % repr(request_line)
+            )
+    elif method.upper() == 'CONNECT':
+        form_in = "authority"
+        r = parse_init_connect(request_line)
+        if not r:
+            raise HttpError(
+                400,
+                "Bad HTTP request line: %s" % repr(request_line)
+            )
+        host, port, _ = r
+        path = None
+    else:
+        form_in = "absolute"
+        r = parse_init_proxy(request_line)
+        if not r:
+            raise HttpError(
+                400,
+                "Bad HTTP request line: %s" % repr(request_line)
+            )
+        _, scheme, host, port, path, _ = r
+
+    headers = read_headers(rfile)
+    if headers is None:
+        raise HttpError(400, "Invalid headers")
+
+    expect_header = headers.get_first("expect", "").lower()
+    if expect_header == "100-continue" and httpversion >= (1, 1):
+        wfile.write(
+            'HTTP/1.1 100 Continue\r\n'
+            '\r\n'
+        )
+        wfile.flush()
+        del headers['expect']
+
+    if include_body:
+        content = read_http_body(
+            rfile, headers, body_size_limit, method, None, True
+        )
+
+    return Request(
+        form_in,
+        method,
+        scheme,
+        host,
+        port,
+        path,
+        httpversion,
+        headers,
+        content
+    )
+
+
+def read_response(rfile, request_method, body_size_limit, include_body=True):
+    """
+        Returns an http.Response
+
+        By default, both response header and body are read.
+        If include_body=False is specified, content may be one of the
+        following:
+        - None, if the response is technically allowed to have a response body
+        - "", if the response must not have a response body (e.g. it's a
+        response to a HEAD request)
+    """
+
+    line = rfile.readline()
+    # Possible leftover from previous message
+    if line == "\r\n" or line == "\n":
+        line = rfile.readline()
+    if not line:
+        raise HttpErrorConnClosed(502, "Server disconnect.")
+    parts = parse_response_line(line)
+    if not parts:
+        raise HttpError(502, "Invalid server response: %s" % repr(line))
+    proto, code, msg = parts
+    httpversion = parse_http_protocol(proto)
+    if httpversion is None:
+        raise HttpError(502, "Invalid HTTP version in line: %s" % repr(proto))
+    headers = read_headers(rfile)
+    if headers is None:
+        raise HttpError(502, "Invalid headers.")
+
+    if include_body:
+        content = read_http_body(
+            rfile,
+            headers,
+            body_size_limit,
+            request_method,
+            code,
+            False
+        )
+    else:
+        # if include_body==False then a None content means the body should be
+        # read separately
+        content = None
+    return http.Response(httpversion, code, msg, headers, content)
+
+
+def request_preamble(method, resource, http_major="1", http_minor="1"):
+    return '%s %s HTTP/%s.%s' % (
+        method, resource, http_major, http_minor
+    )
+
+
+def response_preamble(code, message=None, http_major="1", http_minor="1"):
+    if message is None:
+        message = status_codes.RESPONSES.get(code)
+    return 'HTTP/%s.%s %s %s' % (http_major, http_minor, code, message)
author	Thomas Kriechbaumer <thomas@kriechbaumer.name>	2015-07-14 23:02:14 +0200
committer	Thomas Kriechbaumer <thomas@kriechbaumer.name>	2015-07-22 15:30:51 +0200
commit	f50deb7b763d093a22a4d331e16465a2fb0329cf (patch)
tree	a11e92fca2a0deffeb801cc6f931bd79aec0d669 /netlib/http/http1/protocol.py
parent	bd5ee212840e3be731ea93e14ef1375745383d88 (diff)
download	mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.tar.gz mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.tar.bz2 mitmproxy-f50deb7b763d093a22a4d331e16465a2fb0329cf.zip