aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2016-07-04 13:58:09 -0700
committerMaximilian Hils <git@maximilianhils.com>2016-07-04 13:58:09 -0700
commita6b3551934e2b8768177d6831ca08f97f5bdae44 (patch)
tree39fd8a2223b726b7b496b93ed55ade32e9ebf05e
parent2f8a1fd2cb1374941f436f36bbfa0d0b3d9213c7 (diff)
downloadmitmproxy-a6b3551934e2b8768177d6831ca08f97f5bdae44.tar.gz
mitmproxy-a6b3551934e2b8768177d6831ca08f97f5bdae44.tar.bz2
mitmproxy-a6b3551934e2b8768177d6831ca08f97f5bdae44.zip
raise ValueError if content-encoding is invalid
-rw-r--r--mitmproxy/console/common.py40
-rw-r--r--mitmproxy/console/flowview.py38
-rw-r--r--mitmproxy/dump.py13
-rw-r--r--mitmproxy/filt.py36
-rw-r--r--mitmproxy/flow/export.py18
-rw-r--r--netlib/http/message.py42
-rw-r--r--netlib/http/request.py12
-rw-r--r--netlib/wsgi.py6
-rw-r--r--test/netlib/http/test_message.py18
9 files changed, 154 insertions, 69 deletions
diff --git a/mitmproxy/console/common.py b/mitmproxy/console/common.py
index b4369c0c..ef220b4c 100644
--- a/mitmproxy/console/common.py
+++ b/mitmproxy/console/common.py
@@ -256,24 +256,34 @@ def copy_flow_format_data(part, scope, flow):
else:
data = ""
if scope in ("q", "a"):
- if flow.request.content is None:
+ request = flow.request.copy()
+ try:
+ request.decode()
+ except ValueError:
+ pass
+ if request.raw_content is None:
return None, "Request content is missing"
if part == "h":
- data += netlib.http.http1.assemble_request(flow.request)
+ data += netlib.http.http1.assemble_request(request)
elif part == "c":
- data += flow.request.content
+ data += request.raw_content
else:
raise ValueError("Unknown part: {}".format(part))
- if scope == "a" and flow.request.content and flow.response:
+ if scope == "a" and flow.request.raw_content and flow.response:
# Add padding between request and response
data += "\r\n" * 2
if scope in ("s", "a") and flow.response:
- if flow.response.content is None:
+ response = flow.response.copy()
+ try:
+ response.decode()
+ except ValueError:
+ pass
+ if response.raw_content is None:
return None, "Response content is missing"
if part == "h":
- data += netlib.http.http1.assemble_response(flow.response)
+ data += netlib.http.http1.assemble_response(response)
elif part == "c":
- data += flow.response.content
+ data += response.raw_content
else:
raise ValueError("Unknown part: {}".format(part))
return data, False
@@ -361,8 +371,8 @@ def ask_save_body(part, master, state, flow):
"q" (request), "s" (response) or None (ask user if necessary).
"""
- request_has_content = flow.request and flow.request.content
- response_has_content = flow.response and flow.response.content
+ request_has_content = flow.request and flow.request.raw_content
+ response_has_content = flow.response and flow.response.raw_content
if part is None:
# We first need to determine whether we want to save the request or the
@@ -383,14 +393,22 @@ def ask_save_body(part, master, state, flow):
ask_save_body("q", master, state, flow)
elif part == "q" and request_has_content:
+ try:
+ content = flow.request.content
+ except ValueError:
+ content = flow.request.raw_content
ask_save_path(
"Save request content",
- flow.request.content
+ content
)
elif part == "s" and response_has_content:
+ try:
+ content = flow.response.content
+ except ValueError:
+ content = flow.response.raw_content
ask_save_path(
"Save response content",
- flow.response.content
+ content
)
else:
signals.status_message.send(message="No content to save.")
diff --git a/mitmproxy/console/flowview.py b/mitmproxy/console/flowview.py
index 208b0d44..c4bb6c40 100644
--- a/mitmproxy/console/flowview.py
+++ b/mitmproxy/console/flowview.py
@@ -200,25 +200,33 @@ class FlowView(tabs.Tabs):
def _get_content_view(self, viewmode, message, max_lines, _):
try:
+ content = message.content
+ if content != message.raw_content:
+ enc = "[decoded {}]".format(
+ message.headers.get("content-encoding")
+ )
+ else:
+ enc = None
+ except ValueError:
+ content = message.raw_content
+ enc = "[cannot decode]"
+ try:
query = None
if isinstance(message, models.HTTPRequest):
query = message.query
description, lines = contentviews.get_content_view(
- viewmode, message.content, headers=message.headers, query=query
+ viewmode, content, headers=message.headers, query=query
)
except exceptions.ContentViewException:
s = "Content viewer failed: \n" + traceback.format_exc()
signals.add_event(s, "error")
description, lines = contentviews.get_content_view(
- contentviews.get("Raw"), message.content, headers=message.headers
+ contentviews.get("Raw"), content, headers=message.headers
)
description = description.replace("Raw", "Couldn't parse: falling back to Raw")
- if message.content != message.raw_content:
- description = "[decoded {enc}] {desc}".format(
- enc=message.headers.get("content-encoding"),
- desc=description
- )
+ if enc:
+ description = " ".join(enc, description)
# Give hint that you have to tab for the response.
if description == "No content" and isinstance(message, models.HTTPRequest):
@@ -419,10 +427,14 @@ class FlowView(tabs.Tabs):
# editing message bodies, this can cause problems. For now, I
# just strip the newlines off the end of the body when we return
# from an editor.
- c = self.master.spawn_editor(message.content or b"")
+ try:
+ content = message.content
+ except ValueError:
+ content = message.raw_content
+ c = self.master.spawn_editor(content or b"")
message.content = c.rstrip(b"\n")
elif part == "f":
- if not message.urlencoded_form and message.content:
+ if not message.urlencoded_form and message.raw_content:
signals.status_prompt_onekey.send(
prompt = "Existing body is not a URL-encoded form. Clear and edit?",
keys = [
@@ -682,10 +694,14 @@ class FlowView(tabs.Tabs):
)
key = None
elif key == "v":
- if conn.content:
+ if conn.raw_content:
t = conn.headers.get("content-type")
if "EDITOR" in os.environ or "PAGER" in os.environ:
- self.master.spawn_external_viewer(conn.content, t)
+ try:
+ content = conn.content
+ except ValueError:
+ content = conn.raw_content
+ self.master.spawn_external_viewer(content, t)
else:
signals.status_message.send(
message = "Error! Set $EDITOR or $PAGER."
diff --git a/mitmproxy/dump.py b/mitmproxy/dump.py
index ea242bba..0a9b76a7 100644
--- a/mitmproxy/dump.py
+++ b/mitmproxy/dump.py
@@ -187,15 +187,20 @@ class DumpMaster(flow.FlowMaster):
)
self.echo(headers, indent=4)
if self.o.flow_detail >= 3:
- if message.content is None:
+ try:
+ content = message.content
+ except ValueError:
+ content = message.raw_content
+
+ if content is None:
self.echo("(content missing)", indent=4)
- elif message.content:
+ elif content:
self.echo("")
try:
type, lines = contentviews.get_content_view(
contentviews.get("Auto"),
- message.content,
+ content,
headers=getattr(message, "headers", None)
)
except exceptions.ContentViewException:
@@ -203,7 +208,7 @@ class DumpMaster(flow.FlowMaster):
self.add_event(s, "debug")
type, lines = contentviews.get_content_view(
contentviews.get("Raw"),
- message.content,
+ content,
headers=getattr(message, "headers", None)
)
diff --git a/mitmproxy/filt.py b/mitmproxy/filt.py
index 95bae1ae..e8687b9f 100644
--- a/mitmproxy/filt.py
+++ b/mitmproxy/filt.py
@@ -193,12 +193,18 @@ class FBod(_Rex):
help = "Body"
def __call__(self, f):
- if f.request and f.request.content:
- if self.re.search(f.request.content):
- return True
- if f.response and f.response.content:
- if self.re.search(f.response.content):
- return True
+ if f.request and f.request.raw_content:
+ try:
+ if self.re.search(f.request.content):
+ return True
+ except ValueError:
+ pass
+ if f.response and f.response.raw_content:
+ try:
+ if self.re.search(f.response.content):
+ return True
+ except ValueError:
+ pass
return False
@@ -207,9 +213,12 @@ class FBodRequest(_Rex):
help = "Request body"
def __call__(self, f):
- if f.request and f.request.content:
- if self.re.search(f.request.content):
- return True
+ if f.request and f.request.raw_content:
+ try:
+ if self.re.search(f.request.content):
+ return True
+ except ValueError:
+ pass
class FBodResponse(_Rex):
@@ -217,9 +226,12 @@ class FBodResponse(_Rex):
help = "Response body"
def __call__(self, f):
- if f.response and f.response.content:
- if self.re.search(f.response.content):
- return True
+ if f.response and f.response.raw_content:
+ try:
+ if self.re.search(f.response.content):
+ return True
+ except ValueError:
+ pass
class FMethod(_Rex):
diff --git a/mitmproxy/flow/export.py b/mitmproxy/flow/export.py
index f0ac02ab..9da18f22 100644
--- a/mitmproxy/flow/export.py
+++ b/mitmproxy/flow/export.py
@@ -19,17 +19,23 @@ def dictstr(items, indent):
def curl_command(flow):
data = "curl "
- for k, v in flow.request.headers.fields:
+ request = flow.request.copy()
+ try:
+ request.decode()
+ except ValueError:
+ pass
+
+ for k, v in request.headers.fields:
data += "-H '%s:%s' " % (k, v)
- if flow.request.method != "GET":
- data += "-X %s " % flow.request.method
+ if request.method != "GET":
+ data += "-X %s " % request.method
- full_url = flow.request.scheme + "://" + flow.request.host + flow.request.path
+ full_url = request.scheme + "://" + request.host + request.path
data += "'%s'" % full_url
- if flow.request.content:
- data += " --data-binary '%s'" % flow.request.content
+ if request.raw_content:
+ data += " --data-binary '%s'" % request.raw_content
return data
diff --git a/netlib/http/message.py b/netlib/http/message.py
index ca3a4145..86ff64d1 100644
--- a/netlib/http/message.py
+++ b/netlib/http/message.py
@@ -124,6 +124,9 @@ class Message(basetypes.Serializable):
"""
The HTTP message body decoded with the content-encoding header (e.g. gzip)
+ Raises:
+ ValueError, when getting the content and the content-encoding is invalid.
+
See also: :py:class:`raw_content`, :py:attr:`text`
"""
ce = self.headers.get("content-encoding")
@@ -132,17 +135,21 @@ class Message(basetypes.Serializable):
self._content_cache.encoding == ce
)
if not cached:
- try:
- if not ce:
- raise ValueError()
+ if ce:
decoded = encoding.decode(self.raw_content, ce)
- except ValueError:
+ else:
decoded = self.raw_content
self._content_cache = CachedDecode(self.raw_content, ce, decoded)
return self._content_cache.decoded
@content.setter
def content(self, value):
+ if value is not None and not isinstance(value, bytes):
+ raise TypeError(
+ "Message content must be bytes, not {}. "
+ "Please use .text if you want to assign a str."
+ .format(type(value).__name__)
+ )
ce = self.headers.get("content-encoding")
cached = (
self._content_cache.decoded == value and
@@ -150,15 +157,15 @@ class Message(basetypes.Serializable):
)
if not cached:
try:
- if not ce:
- raise ValueError()
- encoded = encoding.encode(value, ce)
+ if ce and value is not None:
+ encoded = encoding.encode(value, ce)
+ else:
+ encoded = value
except ValueError:
- # Do we have an unknown content-encoding?
- # If so, we want to remove it.
- if value and ce:
- self.headers.pop("content-encoding", None)
- ce = None
+ # So we have an invalid content-encoding?
+ # Let's remove it!
+ del self.headers["content-encoding"]
+ ce = None
encoded = value
self._content_cache = CachedDecode(encoded, ce, value)
self.raw_content = self._content_cache.encoded
@@ -262,6 +269,9 @@ class Message(basetypes.Serializable):
Decodes body based on the current Content-Encoding header, then
removes the header. If there is no Content-Encoding header, no
action is taken.
+
+ Raises:
+ ValueError, when the content-encoding is invalid.
"""
self.raw_content = self.content
self.headers.pop("content-encoding", None)
@@ -269,10 +279,16 @@ class Message(basetypes.Serializable):
def encode(self, e):
"""
Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
+ Any existing content-encodings are overwritten,
+ the content is not decoded beforehand.
+
+ Raises:
+ ValueError, when the specified content-encoding is invalid.
"""
- self.decode() # remove the current encoding
self.headers["content-encoding"] = e
self.content = self.raw_content
+ if "content-encoding" not in self.headers:
+ raise ValueError("Invalid content encoding {}".format(repr(e)))
def replace(self, pattern, repl, flags=0):
"""
diff --git a/netlib/http/request.py b/netlib/http/request.py
index 4ce94549..a8ec6238 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -347,7 +347,10 @@ class Request(message.Message):
def _get_urlencoded_form(self):
is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
- return tuple(netlib.http.url.decode(self.content))
+ try:
+ return tuple(netlib.http.url.decode(self.content))
+ except ValueError:
+ pass
return ()
def _set_urlencoded_form(self, value):
@@ -356,7 +359,7 @@ class Request(message.Message):
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
- self.content = netlib.http.url.encode(value)
+ self.content = netlib.http.url.encode(value).encode()
@urlencoded_form.setter
def urlencoded_form(self, value):
@@ -376,7 +379,10 @@ class Request(message.Message):
def _get_multipart_form(self):
is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
- return multipart.decode(self.headers, self.content)
+ try:
+ return multipart.decode(self.headers, self.content)
+ except ValueError:
+ pass
return ()
def _set_multipart_form(self, value):
diff --git a/netlib/wsgi.py b/netlib/wsgi.py
index c66fddc2..2444f449 100644
--- a/netlib/wsgi.py
+++ b/netlib/wsgi.py
@@ -60,10 +60,14 @@ class WSGIAdaptor(object):
else:
path_info = path
query = ''
+ try:
+ content = flow.request.content
+ except ValueError:
+ content = flow.request.raw_content
environ = {
'wsgi.version': (1, 0),
'wsgi.url_scheme': strutils.native(flow.request.scheme, "latin-1"),
- 'wsgi.input': BytesIO(flow.request.content or b""),
+ 'wsgi.input': BytesIO(content or b""),
'wsgi.errors': errsoc,
'wsgi.multithread': True,
'wsgi.multiprocess': False,
diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py
index e1707a91..ed7d3da5 100644
--- a/test/netlib/http/test_message.py
+++ b/test/netlib/http/test_message.py
@@ -5,7 +5,7 @@ import mock
import six
from netlib.tutils import tresp
-from netlib import http
+from netlib import http, tutils
def _test_passthrough_attr(message, attr):
@@ -92,9 +92,6 @@ class TestMessage(object):
assert resp.data.content == b"bar"
assert resp.headers["content-length"] == "0"
- def test_content_basic(self):
- _test_passthrough_attr(tresp(), "content")
-
def test_headers(self):
_test_passthrough_attr(tresp(), "headers")
@@ -149,18 +146,22 @@ class TestMessageContentEncoding(object):
r = tresp()
r.headers["content-encoding"] = "zopfli"
r.raw_content = b"foo"
- assert r.content == b"foo"
+ with tutils.raises(ValueError):
+ assert r.content
assert r.headers["content-encoding"]
def test_cannot_decode(self):
r = tresp()
r.encode("gzip")
r.raw_content = b"foo"
- assert r.content == b"foo"
+ with tutils.raises(ValueError):
+ assert r.content
assert r.headers["content-encoding"]
- r.decode()
+
+ with tutils.raises(ValueError):
+ r.decode()
assert r.raw_content == b"foo"
- assert "content-encoding" not in r.headers
+ assert "content-encoding" in r.headers
def test_cannot_encode(self):
r = tresp()
@@ -213,6 +214,7 @@ class TestMessageText(object):
r.encode("identity")
with mock.patch("netlib.encoding.encode") as e:
+ e.return_value = b""
r.text = u"ü"
assert e.call_count == 0
r.text = u"ä"