aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
Diffstat (limited to 'netlib')
-rw-r--r--netlib/debug.py45
-rw-r--r--netlib/encoding.py97
-rw-r--r--netlib/http/cookies.py29
-rw-r--r--netlib/http/headers.py19
-rw-r--r--netlib/http/http1/assemble.py4
-rw-r--r--netlib/http/http1/read.py5
-rw-r--r--netlib/http/message.py254
-rw-r--r--netlib/http/request.py14
-rw-r--r--netlib/http/response.py5
-rw-r--r--netlib/strutils.py6
-rw-r--r--netlib/utils.py7
-rw-r--r--netlib/wsgi.py4
12 files changed, 346 insertions, 143 deletions
diff --git a/netlib/debug.py b/netlib/debug.py
index a395afcb..29c7f655 100644
--- a/netlib/debug.py
+++ b/netlib/debug.py
@@ -7,8 +7,6 @@ import signal
import platform
import traceback
-import psutil
-
from netlib import version
from OpenSSL import SSL
@@ -19,7 +17,7 @@ def sysinfo():
"Mitmproxy version: %s" % version.VERSION,
"Python version: %s" % platform.python_version(),
"Platform: %s" % platform.platform(),
- "SSL version: %s" % SSL.SSLeay_version(SSL.SSLEAY_VERSION),
+ "SSL version: %s" % SSL.SSLeay_version(SSL.SSLEAY_VERSION).decode(),
]
d = platform.linux_distribution()
t = "Linux distro: %s %s %s" % d
@@ -40,15 +38,32 @@ def sysinfo():
def dump_info(sig, frm, file=sys.stdout): # pragma: no cover
- p = psutil.Process()
-
print("****************************************************", file=file)
print("Summary", file=file)
print("=======", file=file)
- print("num threads: ", p.num_threads(), file=file)
- if hasattr(p, "num_fds"):
- print("num fds: ", p.num_fds(), file=file)
- print("memory: ", p.memory_info(), file=file)
+
+ try:
+ import psutil
+ except:
+ print("(psutil not installed, skipping some debug info)", file=file)
+ else:
+ p = psutil.Process()
+ print("num threads: ", p.num_threads(), file=file)
+ if hasattr(p, "num_fds"):
+ print("num fds: ", p.num_fds(), file=file)
+ print("memory: ", p.memory_info(), file=file)
+
+ print(file=file)
+ print("Files", file=file)
+ print("=====", file=file)
+ for i in p.open_files():
+ print(i, file=file)
+
+ print(file=file)
+ print("Connections", file=file)
+ print("===========", file=file)
+ for i in p.connections():
+ print(i, file=file)
print(file=file)
print("Threads", file=file)
@@ -63,18 +78,6 @@ def dump_info(sig, frm, file=sys.stdout): # pragma: no cover
for i in bthreads:
print(i._threadinfo(), file=file)
- print(file=file)
- print("Files", file=file)
- print("=====", file=file)
- for i in p.open_files():
- print(i, file=file)
-
- print(file=file)
- print("Connections", file=file)
- print("===========", file=file)
- for i in p.connections():
- print(i, file=file)
-
print("****************************************************", file=file)
diff --git a/netlib/encoding.py b/netlib/encoding.py
index 98502451..8b67b543 100644
--- a/netlib/encoding.py
+++ b/netlib/encoding.py
@@ -1,39 +1,62 @@
"""
- Utility functions for decoding response bodies.
+Utility functions for decoding response bodies.
"""
from __future__ import absolute_import
+
+import codecs
from io import BytesIO
import gzip
import zlib
+from typing import Union # noqa
+
-ENCODINGS = {"identity", "gzip", "deflate"}
+def decode(obj, encoding, errors='strict'):
+ # type: (Union[str, bytes], str) -> Union[str, bytes]
+ """
+ Decode the given input object
+ Returns:
+ The decoded value
-def decode(e, content):
- if not isinstance(content, bytes):
- return None
- encoding_map = {
- "identity": identity,
- "gzip": decode_gzip,
- "deflate": decode_deflate,
- }
- if e not in encoding_map:
- return None
- return encoding_map[e](content)
+ Raises:
+ ValueError, if decoding fails.
+ """
+ try:
+ try:
+ return custom_decode[encoding](obj)
+ except KeyError:
+ return codecs.decode(obj, encoding, errors)
+ except Exception as e:
+ raise ValueError("{} when decoding {} with {}".format(
+ type(e).__name__,
+ repr(obj)[:10],
+ repr(encoding),
+ ))
+
+
+def encode(obj, encoding, errors='strict'):
+ # type: (Union[str, bytes], str) -> Union[str, bytes]
+ """
+ Encode the given input object
+ Returns:
+ The encoded value
-def encode(e, content):
- if not isinstance(content, bytes):
- return None
- encoding_map = {
- "identity": identity,
- "gzip": encode_gzip,
- "deflate": encode_deflate,
- }
- if e not in encoding_map:
- return None
- return encoding_map[e](content)
+ Raises:
+ ValueError, if encoding fails.
+ """
+ try:
+ try:
+ return custom_encode[encoding](obj)
+ except KeyError:
+ return codecs.encode(obj, encoding, errors)
+ except Exception as e:
+ raise ValueError("{} when encoding {} with {}".format(
+ type(e).__name__,
+ repr(obj)[:10],
+ repr(encoding),
+ ))
def identity(content):
@@ -46,10 +69,7 @@ def identity(content):
def decode_gzip(content):
gfile = gzip.GzipFile(fileobj=BytesIO(content))
- try:
- return gfile.read()
- except (IOError, EOFError):
- return None
+ return gfile.read()
def encode_gzip(content):
@@ -70,12 +90,9 @@ def decode_deflate(content):
http://bugs.python.org/issue5784
"""
try:
- try:
- return zlib.decompress(content)
- except zlib.error:
- return zlib.decompress(content, -15)
+ return zlib.decompress(content)
except zlib.error:
- return None
+ return zlib.decompress(content, -15)
def encode_deflate(content):
@@ -84,4 +101,16 @@ def encode_deflate(content):
"""
return zlib.compress(content)
-__all__ = ["ENCODINGS", "encode", "decode"]
+
+custom_decode = {
+ "identity": identity,
+ "gzip": decode_gzip,
+ "deflate": decode_deflate,
+}
+custom_encode = {
+ "identity": identity,
+ "gzip": encode_gzip,
+ "deflate": encode_deflate,
+}
+
+__all__ = ["encode", "decode"]
diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py
index 768a85df..dd0af99c 100644
--- a/netlib/http/cookies.py
+++ b/netlib/http/cookies.py
@@ -1,7 +1,8 @@
import collections
+import email.utils
import re
+import time
-import email.utils
from netlib import multidict
"""
@@ -260,3 +261,29 @@ def refresh_set_cookie_header(c, delta):
if not ret:
raise ValueError("Invalid Cookie")
return ret
+
+
+def is_expired(cookie_attrs):
+ """
+ Determines whether a cookie has expired.
+
+ Returns: boolean
+ """
+
+ # See if 'expires' time is in the past
+ expires = False
+ if 'expires' in cookie_attrs:
+ e = email.utils.parsedate_tz(cookie_attrs["expires"])
+ if e:
+ exp_ts = email.utils.mktime_tz(e)
+ now_ts = time.time()
+ expires = exp_ts < now_ts
+
+ # or if Max-Age is 0
+ max_age = False
+ try:
+ max_age = int(cookie_attrs.get('Max-Age', 1)) == 0
+ except ValueError:
+ pass
+
+ return expires or max_age
diff --git a/netlib/http/headers.py b/netlib/http/headers.py
index 413add87..36e5060c 100644
--- a/netlib/http/headers.py
+++ b/netlib/http/headers.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function, division
import re
+import collections
import six
from netlib import multidict
from netlib import strutils
@@ -183,8 +184,8 @@ class Headers(multidict.MultiDict):
pass
else:
replacements += n
- fields.append([name, value])
- self.fields = fields
+ fields.append((name, value))
+ self.fields = tuple(fields)
return replacements
@@ -206,10 +207,22 @@ def parse_content_type(c):
ts = parts[0].split("/", 1)
if len(ts) != 2:
return None
- d = {}
+ d = collections.OrderedDict()
if len(parts) == 2:
for i in parts[1].split(";"):
clause = i.split("=", 1)
if len(clause) == 2:
d[clause[0].strip()] = clause[1].strip()
return ts[0].lower(), ts[1].lower(), d
+
+
+def assemble_content_type(type, subtype, parameters):
+ if not parameters:
+ return "{}/{}".format(type, subtype)
+ params = "; ".join(
+ "{}={}".format(k, v)
+ for k, v in parameters.items()
+ )
+ return "{}/{}; {}".format(
+ type, subtype, params
+ )
diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py
index 511328f1..e74732d2 100644
--- a/netlib/http/http1/assemble.py
+++ b/netlib/http/http1/assemble.py
@@ -5,7 +5,7 @@ from netlib import exceptions
def assemble_request(request):
- if request.content is None:
+ if request.data.content is None:
raise exceptions.HttpException("Cannot assemble flow with missing content")
head = assemble_request_head(request)
body = b"".join(assemble_body(request.data.headers, [request.data.content]))
@@ -19,7 +19,7 @@ def assemble_request_head(request):
def assemble_response(response):
- if response.content is None:
+ if response.data.content is None:
raise exceptions.HttpException("Cannot assemble flow with missing content")
head = assemble_response_head(response)
body = b"".join(assemble_body(response.data.headers, [response.data.content]))
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
index a4c341fd..70fffbd4 100644
--- a/netlib/http/http1/read.py
+++ b/netlib/http/http1/read.py
@@ -244,7 +244,7 @@ def _read_request_line(rfile):
raise exceptions.HttpReadDisconnect("Client disconnected")
try:
- method, path, http_version = line.split(b" ")
+ method, path, http_version = line.split()
if path == b"*" or path.startswith(b"/"):
form = "relative"
@@ -291,8 +291,7 @@ def _read_response_line(rfile):
raise exceptions.HttpReadDisconnect("Server disconnected")
try:
-
- parts = line.split(b" ", 2)
+ parts = line.split(None, 2)
if len(parts) == 2: # handle missing message gracefully
parts.append(b"")
diff --git a/netlib/http/message.py b/netlib/http/message.py
index b268fec9..34709f0a 100644
--- a/netlib/http/message.py
+++ b/netlib/http/message.py
@@ -52,7 +52,23 @@ class MessageData(basetypes.Serializable):
return cls(**state)
+class CachedDecode(object):
+ __slots__ = ["encoded", "encoding", "strict", "decoded"]
+
+ def __init__(self, object, encoding, strict, decoded):
+ self.encoded = object
+ self.encoding = encoding
+ self.strict = strict
+ self.decoded = decoded
+
+no_cached_decode = CachedDecode(None, None, None, None)
+
+
class Message(basetypes.Serializable):
+ def __init__(self):
+ self._content_cache = no_cached_decode # type: CachedDecode
+ self._text_cache = no_cached_decode # type: CachedDecode
+
def __eq__(self, other):
if isinstance(other, Message):
return self.data == other.data
@@ -90,22 +106,82 @@ class Message(basetypes.Serializable):
self.data.headers = h
@property
- def content(self):
+ def raw_content(self):
+ # type: () -> bytes
"""
The raw (encoded) HTTP message body
- See also: :py:attr:`text`
+ See also: :py:attr:`content`, :py:class:`text`
"""
return self.data.content
- @content.setter
- def content(self, content):
- # type: (Optional[bytes]) -> None
+ @raw_content.setter
+ def raw_content(self, content):
self.data.content = content
- if isinstance(content, six.text_type):
- raise ValueError("Message content must be bytes, not {}".format(type(content).__name__))
- if isinstance(content, bytes):
- self.headers["content-length"] = str(len(content))
+
+ def get_content(self, strict=True):
+ # type: (bool) -> bytes
+ """
+ The HTTP message body decoded with the content-encoding header (e.g. gzip)
+
+ Raises:
+ ValueError, when the content-encoding is invalid and strict is True.
+
+ See also: :py:class:`raw_content`, :py:attr:`text`
+ """
+ if self.raw_content is None:
+ return None
+ ce = self.headers.get("content-encoding")
+ cached = (
+ self._content_cache.encoded == self.raw_content and
+ (self._content_cache.strict or not strict) and
+ self._content_cache.encoding == ce
+ )
+ if not cached:
+ is_strict = True
+ if ce:
+ try:
+ decoded = encoding.decode(self.raw_content, ce)
+ except ValueError:
+ if strict:
+ raise
+ is_strict = False
+ decoded = self.raw_content
+ else:
+ decoded = self.raw_content
+ self._content_cache = CachedDecode(self.raw_content, ce, is_strict, decoded)
+ return self._content_cache.decoded
+
+ def set_content(self, value):
+ if value is None:
+ self.raw_content = None
+ return
+ if not isinstance(value, bytes):
+ raise TypeError(
+ "Message content must be bytes, not {}. "
+ "Please use .text if you want to assign a str."
+ .format(type(value).__name__)
+ )
+ ce = self.headers.get("content-encoding")
+ cached = (
+ self._content_cache.decoded == value and
+ self._content_cache.encoding == ce and
+ self._content_cache.strict
+ )
+ if not cached:
+ try:
+ encoded = encoding.encode(value, ce or "identity")
+ except ValueError:
+ # So we have an invalid content-encoding?
+ # Let's remove it!
+ del self.headers["content-encoding"]
+ ce = None
+ encoded = value
+ self._content_cache = CachedDecode(encoded, ce, True, value)
+ self.raw_content = self._content_cache.encoded
+ self.headers["content-length"] = str(len(self.raw_content))
+
+ content = property(get_content, set_content)
@property
def http_version(self):
@@ -140,56 +216,108 @@ class Message(basetypes.Serializable):
def timestamp_end(self, timestamp_end):
self.data.timestamp_end = timestamp_end
- @property
- def text(self):
- """
- The decoded HTTP message body.
- Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive.
+ def _get_content_type_charset(self):
+ # type: () -> Optional[str]
+ ct = headers.parse_content_type(self.headers.get("content-type", ""))
+ if ct:
+ return ct[2].get("charset")
- .. note::
- This is not implemented yet.
+ def _guess_encoding(self):
+ # type: () -> str
+ enc = self._get_content_type_charset()
+ if enc:
+ return enc
- See also: :py:attr:`content`, :py:class:`decoded`
+ if "json" in self.headers.get("content-type", ""):
+ return "utf8"
+ else:
+ # We may also want to check for HTML meta tags here at some point.
+ return "latin-1"
+
+ def get_text(self, strict=True):
+ # type: (bool) -> six.text_type
"""
- # This attribute should be called text, because that's what requests does.
- raise NotImplementedError()
+ The HTTP message body decoded with both content-encoding header (e.g. gzip)
+ and content-type header charset.
- @text.setter
- def text(self, text):
- raise NotImplementedError()
+ Raises:
+ ValueError, when either content-encoding or charset is invalid and strict is True.
- def decode(self):
+ See also: :py:attr:`content`, :py:class:`raw_content`
+ """
+ if self.raw_content is None:
+ return None
+ enc = self._guess_encoding()
+
+ content = self.get_content(strict)
+ cached = (
+ self._text_cache.encoded == content and
+ (self._text_cache.strict or not strict) and
+ self._text_cache.encoding == enc
+ )
+ if not cached:
+ is_strict = self._content_cache.strict
+ try:
+ decoded = encoding.decode(content, enc)
+ except ValueError:
+ if strict:
+ raise
+ is_strict = False
+ decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
+ self._text_cache = CachedDecode(content, enc, is_strict, decoded)
+ return self._text_cache.decoded
+
+ def set_text(self, text):
+ if text is None:
+ self.content = None
+ return
+ enc = self._guess_encoding()
+
+ cached = (
+ self._text_cache.decoded == text and
+ self._text_cache.encoding == enc and
+ self._text_cache.strict
+ )
+ if not cached:
+ try:
+ encoded = encoding.encode(text, enc)
+ except ValueError:
+ # Fall back to UTF-8 and update the content-type header.
+ ct = headers.parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {})
+ ct[2]["charset"] = "utf-8"
+ self.headers["content-type"] = headers.assemble_content_type(*ct)
+ enc = "utf8"
+ encoded = text.encode(enc, "replace" if six.PY2 else "surrogateescape")
+ self._text_cache = CachedDecode(encoded, enc, True, text)
+ self.content = self._text_cache.encoded
+
+ text = property(get_text, set_text)
+
+ def decode(self, strict=True):
"""
- Decodes body based on the current Content-Encoding header, then
- removes the header. If there is no Content-Encoding header, no
- action is taken.
+ Decodes body based on the current Content-Encoding header, then
+ removes the header. If there is no Content-Encoding header, no
+ action is taken.
- Returns:
- True, if decoding succeeded.
- False, otherwise.
+ Raises:
+ ValueError, when the content-encoding is invalid and strict is True.
"""
- ce = self.headers.get("content-encoding")
- data = encoding.decode(ce, self.content)
- if data is None:
- return False
- self.content = data
+ self.raw_content = self.get_content(strict)
self.headers.pop("content-encoding", None)
- return True
def encode(self, e):
"""
- Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
+ Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
+ Any existing content-encodings are overwritten,
+ the content is not decoded beforehand.
- Returns:
- True, if decoding succeeded.
- False, otherwise.
+ Raises:
+ ValueError, when the specified content-encoding is invalid.
"""
- data = encoding.encode(e, self.content)
- if data is None:
- return False
- self.content = data
self.headers["content-encoding"] = e
- return True
+ self.content = self.raw_content
+ if "content-encoding" not in self.headers:
+ raise ValueError("Invalid content encoding {}".format(repr(e)))
def replace(self, pattern, repl, flags=0):
"""
@@ -206,10 +334,9 @@ class Message(basetypes.Serializable):
repl = strutils.escaped_str_to_bytes(repl)
replacements = 0
if self.content:
- with decoded(self):
- self.content, replacements = re.subn(
- pattern, repl, self.content, flags=flags
- )
+ self.content, replacements = re.subn(
+ pattern, repl, self.content, flags=flags
+ )
replacements += self.headers.replace(pattern, repl, flags)
return replacements
@@ -228,29 +355,16 @@ class Message(basetypes.Serializable):
class decoded(object):
"""
- A context manager that decodes a request or response, and then
- re-encodes it with the same encoding after execution of the block.
-
- Example:
-
- .. code-block:: python
-
- with decoded(request):
- request.content = request.content.replace("foo", "bar")
+ Deprecated: You can now directly use :py:attr:`content`.
+ :py:attr:`raw_content` has the encoded content.
"""
- def __init__(self, message):
- self.message = message
- ce = message.headers.get("content-encoding")
- if ce in encoding.ENCODINGS:
- self.ce = ce
- else:
- self.ce = None
+ def __init__(self, message): # pragma no cover
+ warnings.warn("decoded() is deprecated, you can now directly use .content instead. "
+ ".raw_content has the encoded content.", DeprecationWarning)
- def __enter__(self):
- if self.ce:
- self.message.decode()
+ def __enter__(self): # pragma no cover
+ pass
- def __exit__(self, type, value, tb):
- if self.ce:
- self.message.encode(self.ce)
+ def __exit__(self, type, value, tb): # pragma no cover
+ pass
diff --git a/netlib/http/request.py b/netlib/http/request.py
index c4c39942..ecaa9b79 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -5,7 +5,6 @@ import re
import six
from six.moves import urllib
-from netlib import encoding
from netlib import multidict
from netlib import strutils
from netlib.http import multipart
@@ -56,6 +55,7 @@ class Request(message.Message):
An HTTP request.
"""
def __init__(self, *args, **kwargs):
+ super(Request, self).__init__()
self.data = RequestData(*args, **kwargs)
def __repr__(self):
@@ -339,7 +339,7 @@ class Request(message.Message):
self.headers["accept-encoding"] = (
', '.join(
e
- for e in encoding.ENCODINGS
+ for e in {"gzip", "identity", "deflate"}
if e in accept_encoding
)
)
@@ -359,7 +359,10 @@ class Request(message.Message):
def _get_urlencoded_form(self):
is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
- return tuple(netlib.http.url.decode(self.content))
+ try:
+ return tuple(netlib.http.url.decode(self.content))
+ except ValueError:
+ pass
return ()
def _set_urlencoded_form(self, value):
@@ -388,7 +391,10 @@ class Request(message.Message):
def _get_multipart_form(self):
is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
- return multipart.decode(self.headers, self.content)
+ try:
+ return multipart.decode(self.headers, self.content)
+ except ValueError:
+ pass
return ()
def _set_multipart_form(self, value):
diff --git a/netlib/http/response.py b/netlib/http/response.py
index 7cfb55c8..85f54940 100644
--- a/netlib/http/response.py
+++ b/netlib/http/response.py
@@ -37,13 +37,14 @@ class Response(message.Message):
An HTTP response.
"""
def __init__(self, *args, **kwargs):
+ super(Response, self).__init__()
self.data = ResponseData(*args, **kwargs)
def __repr__(self):
- if self.content:
+ if self.raw_content:
details = "{}, {}".format(
self.headers.get("content-type", "unknown content type"),
- human.pretty_size(len(self.content))
+ human.pretty_size(len(self.raw_content))
)
else:
details = "no content"
diff --git a/netlib/strutils.py b/netlib/strutils.py
index 9208f954..32e77927 100644
--- a/netlib/strutils.py
+++ b/netlib/strutils.py
@@ -57,8 +57,8 @@ def escape_control_characters(text, keep_spacing=True):
Args:
keep_spacing: If True, tabs and newlines will not be replaced.
"""
- # type: (six.text_type) -> six.text_type
- if not isinstance(text, six.text_type):
+ # type: (six.string_types) -> six.text_type
+ if not isinstance(text, six.string_types):
raise ValueError("text type must be unicode but is {}".format(type(text).__name__))
trans = _control_char_trans_newline if keep_spacing else _control_char_trans
@@ -146,7 +146,7 @@ def hexdump(s):
A generator of (offset, hex, str) tuples
"""
for i in range(0, len(s), 16):
- offset = "{:0=10x}".format(i).encode()
+ offset = "{:0=10x}".format(i)
part = s[i:i + 16]
x = " ".join("{:0=2x}".format(i) for i in six.iterbytes(part))
x = x.ljust(47) # 16*2 + 15
diff --git a/netlib/utils.py b/netlib/utils.py
index 23c16dc3..9eebf22c 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -56,6 +56,13 @@ class Data(object):
dirname = os.path.dirname(inspect.getsourcefile(m))
self.dirname = os.path.abspath(dirname)
+ def push(self, subpath):
+ """
+ Change the data object to a path relative to the module.
+ """
+ self.dirname = os.path.join(self.dirname, subpath)
+ return self
+
def path(self, path):
"""
Returns a path to the package data housed at 'path' under this
diff --git a/netlib/wsgi.py b/netlib/wsgi.py
index c66fddc2..0def75b5 100644
--- a/netlib/wsgi.py
+++ b/netlib/wsgi.py
@@ -54,6 +54,10 @@ class WSGIAdaptor(object):
self.app, self.domain, self.port, self.sversion = app, domain, port, sversion
def make_environ(self, flow, errsoc, **extra):
+ """
+ Raises:
+ ValueError, if the content-encoding is invalid.
+ """
path = strutils.native(flow.request.path, "latin-1")
if '?' in path:
path_info, query = strutils.native(path, "latin-1").split('?', 1)