aboutsummaryrefslogtreecommitdiffstats
path: root/test/netlib
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2016-07-02 01:51:47 -0700
committerMaximilian Hils <git@maximilianhils.com>2016-07-02 01:51:47 -0700
commit6032c4f2352260d32032800a2ff694339e2af6b2 (patch)
treee242ede8ebb828f424f270aeb5143516ed048939 /test/netlib
parent2c09e0416bcf94d9ebef7c11bb1883388e8e2c5d (diff)
downloadmitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.gz
mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.bz2
mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.zip
message.content -> .raw_content, implement .text
This PR improves our handling of HTTP message body encodings: - The unaltered message body is now accessible as `.raw_content` - The "content-encoding"-decoded content (i.e. gzip removed) content is not `.content`, as this is what we want in 99% of the cases. - `.text` now provides the "content-encoding"-decoded and then "content-type charset"-decoded message body. - The decoded values for `.content` and `.text` are cached, so that repeated access and `x.text = x.text` is cheap. - The `decoded()` decorator is now deprecated, as we can now just use `.content`. Similarly `HTTPMessage.get_decoded_content()` is deprecated.
Diffstat (limited to 'test/netlib')
-rw-r--r--test/netlib/http/test_message.py117
-rw-r--r--test/netlib/test_encoding.py40
2 files changed, 105 insertions, 52 deletions
diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py
index f5bf7f0c..aecde1ec 100644
--- a/test/netlib/http/test_message.py
+++ b/test/netlib/http/test_message.py
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
-from netlib.http import decoded
+import six
+
from netlib.tutils import tresp
@@ -76,6 +77,9 @@ class TestMessage(object):
resp.content = b""
assert resp.data.content == b""
assert resp.headers["content-length"] == "0"
+ resp.raw_content = b"bar"
+ assert resp.data.content == b"bar"
+ assert resp.headers["content-length"] == "0"
def test_content_basic(self):
_test_passthrough_attr(tresp(), "content")
@@ -93,61 +97,108 @@ class TestMessage(object):
_test_decoded_attr(tresp(), "http_version")
-class TestDecodedDecorator(object):
-
+class TestMessageContentEncoding(object):
def test_simple(self):
r = tresp()
- assert r.content == b"message"
+ assert r.raw_content == b"message"
assert "content-encoding" not in r.headers
- assert r.encode("gzip")
+ r.encode("gzip")
assert r.headers["content-encoding"]
- assert r.content != b"message"
- with decoded(r):
- assert "content-encoding" not in r.headers
- assert r.content == b"message"
- assert r.headers["content-encoding"]
- assert r.content != b"message"
+ assert r.raw_content != b"message"
+ assert r.content == b"message"
+ assert r.raw_content != b"message"
def test_modify(self):
r = tresp()
assert "content-encoding" not in r.headers
- assert r.encode("gzip")
-
- with decoded(r):
- r.content = b"foo"
+ r.encode("gzip")
- assert r.content != b"foo"
+ r.content = b"foo"
+ assert r.raw_content != b"foo"
r.decode()
- assert r.content == b"foo"
+ assert r.raw_content == b"foo"
def test_unknown_ce(self):
r = tresp()
r.headers["content-encoding"] = "zopfli"
- r.content = b"foo"
- with decoded(r):
- assert r.headers["content-encoding"]
- assert r.content == b"foo"
- assert r.headers["content-encoding"]
+ r.raw_content = b"foo"
assert r.content == b"foo"
+ assert r.headers["content-encoding"]
def test_cannot_decode(self):
r = tresp()
- assert r.encode("gzip")
- r.content = b"foo"
- with decoded(r):
- assert r.headers["content-encoding"]
- assert r.content == b"foo"
+ r.encode("gzip")
+ r.raw_content = b"foo"
+ assert r.content == b"foo"
assert r.headers["content-encoding"]
- assert r.content != b"foo"
r.decode()
- assert r.content == b"foo"
+ assert r.raw_content == b"foo"
+ assert "content-encoding" not in r.headers
def test_cannot_encode(self):
r = tresp()
- assert r.encode("gzip")
- with decoded(r):
- r.content = None
+ r.encode("gzip")
+ r.content = None
+ assert r.headers["content-encoding"]
+ assert r.raw_content is None
+ r.headers["content-encoding"] = "zopfli"
+ r.content = b"foo"
assert "content-encoding" not in r.headers
- assert r.content is None
+ assert r.raw_content == b"foo"
+
+
+class TestMessageText(object):
+ def test_simple(self):
+ r = tresp(content=b'\xc3\xbc')
+ assert r.raw_content == b"\xc3\xbc"
+ assert r.content == b"\xc3\xbc"
+ assert r.text == u"ü"
+
+ r.encode("gzip")
+ assert r.text == u"ü"
+ r.decode()
+ assert r.text == u"ü"
+
+ r.headers["content-type"] = "text/html; charset=latin1"
+ assert r.content == b"\xc3\xbc"
+ assert r.text == u"ü"
+
+ def test_modify(self):
+ r = tresp()
+
+ r.text = u"ü"
+ assert r.raw_content == b"\xc3\xbc"
+
+ r.headers["content-type"] = "text/html; charset=latin1"
+ r.text = u"ü"
+ assert r.raw_content == b"\xfc"
+ assert r.headers["content-length"] == "1"
+
+ def test_unknown_ce(self):
+ r = tresp()
+ r.headers["content-type"] = "text/html; charset=wtf"
+ r.raw_content = b"foo"
+ assert r.text == u"foo"
+
+ def test_cannot_decode(self):
+ r = tresp()
+ r.raw_content = b"\xFF"
+ assert r.text == u'\ufffd' if six.PY2 else '\udcff'
+
+ def test_cannot_encode(self):
+ r = tresp()
+ r.content = None
+ assert "content-type" not in r.headers
+ assert r.raw_content is None
+
+ r.headers["content-type"] = "text/html; charset=latin1"
+ r.text = u"☃"
+ assert r.headers["content-type"] == "text/html; charset=utf-8"
+ assert r.raw_content == b'\xe2\x98\x83'
+
+ r.headers["content-type"] = "text/html; charset=latin1"
+ r.text = u'\udcff'
+ assert r.headers["content-type"] == "text/html; charset=utf-8"
+ assert r.raw_content == b'\xed\xb3\xbf' if six.PY2 else b"\xFF"
diff --git a/test/netlib/test_encoding.py b/test/netlib/test_encoding.py
index 0ff1aad1..de10fc48 100644
--- a/test/netlib/test_encoding.py
+++ b/test/netlib/test_encoding.py
@@ -1,37 +1,39 @@
-from netlib import encoding
+from netlib import encoding, tutils
def test_identity():
- assert b"string" == encoding.decode("identity", b"string")
- assert b"string" == encoding.encode("identity", b"string")
- assert not encoding.encode("nonexistent", b"string")
- assert not encoding.decode("nonexistent encoding", b"string")
+ assert b"string" == encoding.decode(b"string", "identity")
+ assert b"string" == encoding.encode(b"string", "identity")
+ with tutils.raises(ValueError):
+ encoding.encode(b"string", "nonexistent encoding")
def test_gzip():
assert b"string" == encoding.decode(
- "gzip",
encoding.encode(
- "gzip",
- b"string"
- )
+ b"string",
+ "gzip"
+ ),
+ "gzip"
)
- assert encoding.decode("gzip", b"bogus") is None
+ with tutils.raises(ValueError):
+ encoding.decode(b"bogus", "gzip")
def test_deflate():
assert b"string" == encoding.decode(
- "deflate",
encoding.encode(
- "deflate",
- b"string"
- )
+ b"string",
+ "deflate"
+ ),
+ "deflate"
)
assert b"string" == encoding.decode(
- "deflate",
encoding.encode(
- "deflate",
- b"string"
- )[2:-4]
+ b"string",
+ "deflate"
+ )[2:-4],
+ "deflate"
)
- assert encoding.decode("deflate", b"bogus") is None
+ with tutils.raises(ValueError):
+ encoding.decode(b"bogus", "deflate")