aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/http1/protocol.py
diff options
context:
space:
mode:
Diffstat (limited to 'netlib/http/http1/protocol.py')
-rw-r--r--netlib/http/http1/protocol.py563
1 files changed, 282 insertions, 281 deletions
diff --git a/netlib/http/http1/protocol.py b/netlib/http/http1/protocol.py
index d2a77399..e7727e00 100644
--- a/netlib/http/http1/protocol.py
+++ b/netlib/http/http1/protocol.py
@@ -15,15 +15,144 @@ class HTTP1Protocol(object):
self.tcp_handler = tcp_handler
- def get_request_line(self):
+ def read_request(self, include_body=True, body_size_limit=None, allow_empty=False):
"""
- Get a line, possibly preceded by a blank.
+ Parse an HTTP request from a file stream
+
+ Args:
+ include_body (bool): Read response body as well
+ body_size_limit (bool): Maximum body size
+ wfile (file): If specified, HTTP Expect headers are handled
+ automatically, by writing a HTTP 100 CONTINUE response to the stream.
+
+ Returns:
+ Request: The HTTP request
+
+ Raises:
+ HttpError: If the input is invalid.
+ """
+ httpversion, host, port, scheme, method, path, headers, content = (
+ None, None, None, None, None, None, None, None)
+
+ request_line = self._get_request_line()
+ if not request_line:
+ if allow_empty:
+ return http.EmptyRequest()
+ else:
+ raise tcp.NetLibDisconnect()
+
+ request_line_parts = self._parse_init(request_line)
+ if not request_line_parts:
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ method, path, httpversion = request_line_parts
+
+ if path == '*' or path.startswith("/"):
+ form_in = "relative"
+ if not utils.isascii(path):
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ elif method.upper() == 'CONNECT':
+ form_in = "authority"
+ r = self._parse_init_connect(request_line)
+ if not r:
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ host, port, _ = r
+ return http.ConnectRequest(host, port)
+ else:
+ form_in = "absolute"
+ r = self._parse_init_proxy(request_line)
+ if not r:
+ raise HttpError(
+ 400,
+ "Bad HTTP request line: %s" % repr(request_line)
+ )
+ _, scheme, host, port, path, _ = r
+
+ headers = self.read_headers()
+ if headers is None:
+ raise HttpError(400, "Invalid headers")
+
+ expect_header = headers.get_first("expect", "").lower()
+ if expect_header == "100-continue" and httpversion >= (1, 1):
+ self.tcp_handler.wfile.write(
+ 'HTTP/1.1 100 Continue\r\n'
+ '\r\n'
+ )
+ self.tcp_handler.wfile.flush()
+ del headers['expect']
+
+ if include_body:
+ content = self.read_http_body(
+ headers,
+ body_size_limit,
+ method,
+ None,
+ True
+ )
+
+ return http.Request(
+ form_in,
+ method,
+ scheme,
+ host,
+ port,
+ path,
+ httpversion,
+ headers,
+ content
+ )
+
+
+ def read_response(self, request_method, body_size_limit, include_body=True):
+ """
+ Returns an http.Response
+
+ By default, both response header and body are read.
+ If include_body=False is specified, content may be one of the
+ following:
+ - None, if the response is technically allowed to have a response body
+ - "", if the response must not have a response body (e.g. it's a
+ response to a HEAD request)
"""
+
line = self.tcp_handler.rfile.readline()
+ # Possible leftover from previous message
if line == "\r\n" or line == "\n":
- # Possible leftover from previous message
line = self.tcp_handler.rfile.readline()
- return line
+ if not line:
+ raise HttpErrorConnClosed(502, "Server disconnect.")
+ parts = self.parse_response_line(line)
+ if not parts:
+ raise HttpError(502, "Invalid server response: %s" % repr(line))
+ proto, code, msg = parts
+ httpversion = self._parse_http_protocol(proto)
+ if httpversion is None:
+ raise HttpError(502, "Invalid HTTP version in line: %s" % repr(proto))
+ headers = self.read_headers()
+ if headers is None:
+ raise HttpError(502, "Invalid headers.")
+
+ if include_body:
+ content = self.read_http_body(
+ headers,
+ body_size_limit,
+ request_method,
+ code,
+ False
+ )
+ else:
+ # if include_body==False then a None content means the body should be
+ # read separately
+ content = None
+ return http.Response(httpversion, code, msg, headers, content)
def read_headers(self):
@@ -56,7 +185,146 @@ class HTTP1Protocol(object):
return odict.ODictCaseless(ret)
- def read_chunked(self, limit, is_request):
+ def read_http_body(self, *args, **kwargs):
+ return "".join(
+ content for _, content, _ in self.read_http_body_chunked(*args, **kwargs)
+ )
+
+
+ def read_http_body_chunked(
+ self,
+ headers,
+ limit,
+ request_method,
+ response_code,
+ is_request,
+ max_chunk_size=None
+ ):
+ """
+ Read an HTTP message body:
+ headers: An ODictCaseless object
+ limit: Size limit.
+ is_request: True if the body to read belongs to a request, False
+ otherwise
+ """
+ if max_chunk_size is None:
+ max_chunk_size = limit or sys.maxsize
+
+ expected_size = self.expected_http_body_size(
+ headers, is_request, request_method, response_code
+ )
+
+ if expected_size is None:
+ if self.has_chunked_encoding(headers):
+ # Python 3: yield from
+ for x in self._read_chunked(limit, is_request):
+ yield x
+ else: # pragma: nocover
+ raise HttpError(
+ 400 if is_request else 502,
+ "Content-Length unknown but no chunked encoding"
+ )
+ elif expected_size >= 0:
+ if limit is not None and expected_size > limit:
+ raise HttpError(
+ 400 if is_request else 509,
+ "HTTP Body too large. Limit is %s, content-length was %s" % (
+ limit, expected_size
+ )
+ )
+ bytes_left = expected_size
+ while bytes_left:
+ chunk_size = min(bytes_left, max_chunk_size)
+ yield "", self.tcp_handler.rfile.read(chunk_size), ""
+ bytes_left -= chunk_size
+ else:
+ bytes_left = limit or -1
+ while bytes_left:
+ chunk_size = min(bytes_left, max_chunk_size)
+ content = self.tcp_handler.rfile.read(chunk_size)
+ if not content:
+ return
+ yield "", content, ""
+ bytes_left -= chunk_size
+ not_done = self.tcp_handler.rfile.read(1)
+ if not_done:
+ raise HttpError(
+ 400 if is_request else 509,
+ "HTTP Body too large. Limit is %s," % limit
+ )
+
+
+ @classmethod
+ def expected_http_body_size(self, headers, is_request, request_method, response_code):
+ """
+ Returns the expected body length:
+ - a positive integer, if the size is known in advance
+ - None, if the size in unknown in advance (chunked encoding or invalid
+ data)
+ - -1, if all data should be read until end of stream.
+
+ May raise HttpError.
+ """
+ # Determine response size according to
+ # http://tools.ietf.org/html/rfc7230#section-3.3
+ if request_method:
+ request_method = request_method.upper()
+
+ if (not is_request and (
+ request_method == "HEAD" or
+ (request_method == "CONNECT" and response_code == 200) or
+ response_code in [204, 304] or
+ 100 <= response_code <= 199)):
+ return 0
+ if self.has_chunked_encoding(headers):
+ return None
+ if "content-length" in headers:
+ try:
+ size = int(headers["content-length"][0])
+ if size < 0:
+ raise ValueError()
+ return size
+ except ValueError:
+ return None
+ if is_request:
+ return 0
+ return -1
+
+
+ @classmethod
+ def request_preamble(self, method, resource, http_major="1", http_minor="1"):
+ return '%s %s HTTP/%s.%s' % (
+ method, resource, http_major, http_minor
+ )
+
+
+ @classmethod
+ def response_preamble(self, code, message=None, http_major="1", http_minor="1"):
+ if message is None:
+ message = status_codes.RESPONSES.get(code)
+ return 'HTTP/%s.%s %s %s' % (http_major, http_minor, code, message)
+
+
+ @classmethod
+ def has_chunked_encoding(self, headers):
+ return "chunked" in [
+ i.lower() for i in http.get_header_tokens(headers, "transfer-encoding")
+ ]
+
+
+ def _get_request_line(self):
+ """
+ Get a line, possibly preceded by a blank.
+ """
+ line = self.tcp_handler.rfile.readline()
+ if line == "\r\n" or line == "\n":
+ # Possible leftover from previous message
+ line = self.tcp_handler.rfile.readline()
+ return line
+
+
+
+ def _read_chunked(self, limit, is_request):
"""
Read a chunked HTTP body.
@@ -94,14 +362,7 @@ class HTTP1Protocol(object):
@classmethod
- def has_chunked_encoding(self, headers):
- return "chunked" in [
- i.lower() for i in http.get_header_tokens(headers, "transfer-encoding")
- ]
-
-
- @classmethod
- def parse_http_protocol(self, line):
+ def _parse_http_protocol(self, line):
"""
Parse an HTTP protocol declaration.
Returns a (major, minor) tuple, or None.
@@ -121,12 +382,12 @@ class HTTP1Protocol(object):
@classmethod
- def parse_init(self, line):
+ def _parse_init(self, line):
try:
method, url, protocol = string.split(line)
except ValueError:
return None
- httpversion = self.parse_http_protocol(protocol)
+ httpversion = self._parse_http_protocol(protocol)
if not httpversion:
return None
if not utils.isascii(method):
@@ -135,12 +396,12 @@ class HTTP1Protocol(object):
@classmethod
- def parse_init_connect(self, line):
+ def _parse_init_connect(self, line):
"""
Returns (host, port, httpversion) if line is a valid CONNECT line.
http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1
"""
- v = self.parse_init(line)
+ v = self._parse_init(line)
if not v:
return None
method, url, httpversion = v
@@ -163,8 +424,8 @@ class HTTP1Protocol(object):
@classmethod
- def parse_init_proxy(self, line):
- v = self.parse_init(line)
+ def _parse_init_proxy(self, line):
+ v = self._parse_init(line)
if not v:
return None
method, url, httpversion = v
@@ -177,11 +438,11 @@ class HTTP1Protocol(object):
@classmethod
- def parse_init_http(self, line):
+ def _parse_init_http(self, line):
"""
Returns (method, url, httpversion)
"""
- v = self.parse_init(line)
+ v = self._parse_init(line)
if not v:
return None
method, url, httpversion = v
@@ -225,263 +486,3 @@ class HTTP1Protocol(object):
except ValueError:
return None
return (proto, code, msg)
-
-
- def read_http_body(self, *args, **kwargs):
- return "".join(
- content for _, content, _ in self.read_http_body_chunked(*args, **kwargs)
- )
-
-
- def read_http_body_chunked(
- self,
- headers,
- limit,
- request_method,
- response_code,
- is_request,
- max_chunk_size=None
- ):
- """
- Read an HTTP message body:
- headers: An ODictCaseless object
- limit: Size limit.
- is_request: True if the body to read belongs to a request, False
- otherwise
- """
- if max_chunk_size is None:
- max_chunk_size = limit or sys.maxsize
-
- expected_size = self.expected_http_body_size(
- headers, is_request, request_method, response_code
- )
-
- if expected_size is None:
- if self.has_chunked_encoding(headers):
- # Python 3: yield from
- for x in self.read_chunked(limit, is_request):
- yield x
- else: # pragma: nocover
- raise HttpError(
- 400 if is_request else 502,
- "Content-Length unknown but no chunked encoding"
- )
- elif expected_size >= 0:
- if limit is not None and expected_size > limit:
- raise HttpError(
- 400 if is_request else 509,
- "HTTP Body too large. Limit is %s, content-length was %s" % (
- limit, expected_size
- )
- )
- bytes_left = expected_size
- while bytes_left:
- chunk_size = min(bytes_left, max_chunk_size)
- yield "", self.tcp_handler.rfile.read(chunk_size), ""
- bytes_left -= chunk_size
- else:
- bytes_left = limit or -1
- while bytes_left:
- chunk_size = min(bytes_left, max_chunk_size)
- content = self.tcp_handler.rfile.read(chunk_size)
- if not content:
- return
- yield "", content, ""
- bytes_left -= chunk_size
- not_done = self.tcp_handler.rfile.read(1)
- if not_done:
- raise HttpError(
- 400 if is_request else 509,
- "HTTP Body too large. Limit is %s," % limit
- )
-
-
- @classmethod
- def expected_http_body_size(self, headers, is_request, request_method, response_code):
- """
- Returns the expected body length:
- - a positive integer, if the size is known in advance
- - None, if the size in unknown in advance (chunked encoding or invalid
- data)
- - -1, if all data should be read until end of stream.
-
- May raise HttpError.
- """
- # Determine response size according to
- # http://tools.ietf.org/html/rfc7230#section-3.3
- if request_method:
- request_method = request_method.upper()
-
- if (not is_request and (
- request_method == "HEAD" or
- (request_method == "CONNECT" and response_code == 200) or
- response_code in [204, 304] or
- 100 <= response_code <= 199)):
- return 0
- if self.has_chunked_encoding(headers):
- return None
- if "content-length" in headers:
- try:
- size = int(headers["content-length"][0])
- if size < 0:
- raise ValueError()
- return size
- except ValueError:
- return None
- if is_request:
- return 0
- return -1
-
-
- def read_request(self, include_body=True, body_size_limit=None, allow_empty=False):
- """
- Parse an HTTP request from a file stream
-
- Args:
- include_body (bool): Read response body as well
- body_size_limit (bool): Maximum body size
- wfile (file): If specified, HTTP Expect headers are handled
- automatically, by writing a HTTP 100 CONTINUE response to the stream.
-
- Returns:
- Request: The HTTP request
-
- Raises:
- HttpError: If the input is invalid.
- """
- httpversion, host, port, scheme, method, path, headers, content = (
- None, None, None, None, None, None, None, None)
-
- request_line = self.get_request_line()
- if not request_line:
- if allow_empty:
- return http.EmptyRequest()
- else:
- raise tcp.NetLibDisconnect()
-
- request_line_parts = self.parse_init(request_line)
- if not request_line_parts:
- raise HttpError(
- 400,
- "Bad HTTP request line: %s" % repr(request_line)
- )
- method, path, httpversion = request_line_parts
-
- if path == '*' or path.startswith("/"):
- form_in = "relative"
- if not utils.isascii(path):
- raise HttpError(
- 400,
- "Bad HTTP request line: %s" % repr(request_line)
- )
- elif method.upper() == 'CONNECT':
- form_in = "authority"
- r = self.parse_init_connect(request_line)
- if not r:
- raise HttpError(
- 400,
- "Bad HTTP request line: %s" % repr(request_line)
- )
- host, port, _ = r
- return http.ConnectRequest(host, port)
- else:
- form_in = "absolute"
- r = self.parse_init_proxy(request_line)
- if not r:
- raise HttpError(
- 400,
- "Bad HTTP request line: %s" % repr(request_line)
- )
- _, scheme, host, port, path, _ = r
-
- headers = self.read_headers()
- if headers is None:
- raise HttpError(400, "Invalid headers")
-
- expect_header = headers.get_first("expect", "").lower()
- if expect_header == "100-continue" and httpversion >= (1, 1):
- self.tcp_handler.wfile.write(
- 'HTTP/1.1 100 Continue\r\n'
- '\r\n'
- )
- self.tcp_handler.wfile.flush()
- del headers['expect']
-
- if include_body:
- content = self.read_http_body(
- headers,
- body_size_limit,
- method,
- None,
- True
- )
-
- return http.Request(
- form_in,
- method,
- scheme,
- host,
- port,
- path,
- httpversion,
- headers,
- content
- )
-
-
- def read_response(self, request_method, body_size_limit, include_body=True):
- """
- Returns an http.Response
-
- By default, both response header and body are read.
- If include_body=False is specified, content may be one of the
- following:
- - None, if the response is technically allowed to have a response body
- - "", if the response must not have a response body (e.g. it's a
- response to a HEAD request)
- """
-
- line = self.tcp_handler.rfile.readline()
- # Possible leftover from previous message
- if line == "\r\n" or line == "\n":
- line = self.tcp_handler.rfile.readline()
- if not line:
- raise HttpErrorConnClosed(502, "Server disconnect.")
- parts = self.parse_response_line(line)
- if not parts:
- raise HttpError(502, "Invalid server response: %s" % repr(line))
- proto, code, msg = parts
- httpversion = self.parse_http_protocol(proto)
- if httpversion is None:
- raise HttpError(502, "Invalid HTTP version in line: %s" % repr(proto))
- headers = self.read_headers()
- if headers is None:
- raise HttpError(502, "Invalid headers.")
-
- if include_body:
- content = self.read_http_body(
- headers,
- body_size_limit,
- request_method,
- code,
- False
- )
- else:
- # if include_body==False then a None content means the body should be
- # read separately
- content = None
- return http.Response(httpversion, code, msg, headers, content)
-
-
- @classmethod
- def request_preamble(self, method, resource, http_major="1", http_minor="1"):
- return '%s %s HTTP/%s.%s' % (
- method, resource, http_major, http_minor
- )
-
-
- @classmethod
- def response_preamble(self, code, message=None, http_major="1", http_minor="1"):
- if message is None:
- message = status_codes.RESPONSES.get(code)
- return 'HTTP/%s.%s %s %s' % (http_major, http_minor, code, message)