message.content -> .raw_content, implement .text

This PR improves our handling of HTTP message body encodings: - The unaltered message body is now accessible as `.raw_content` - The "content-encoding"-decoded content (i.e. gzip removed) content is not `.content`, as this is what we want in 99% of the cases. - `.text` now provides the "content-encoding"-decoded and then "content-type charset"-decoded message body. - The decoded values for `.content` and `.text` are cached, so that repeated access and `x.text = x.text` is cheap. - The `decoded()` decorator is now deprecated, as we can now just use `.content`. Similarly `HTTPMessage.get_decoded_content()` is deprecated.
author: Maximilian Hils <git@maximilianhils.com> 2016-07-02 01:51:47 -0700
committer: Maximilian Hils <git@maximilianhils.com> 2016-07-02 01:51:47 -0700
commit: 6032c4f2352260d32032800a2ff694339e2af6b2 (patch)
tree: e242ede8ebb828f424f270aeb5143516ed048939 /test/netlib
parent: 2c09e0416bcf94d9ebef7c11bb1883388e8e2c5d (diff)
download: mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.gz
mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.bz2
mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.zip
2 files changed, 105 insertions, 52 deletions
diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py
index f5bf7f0c..aecde1ec 100644
--- a/test/netlib/http/test_message.py
+++ b/test/netlib/http/test_message.py
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, print_function, division
 
-from netlib.http import decoded
+import six
+
 from netlib.tutils import tresp
 
 
@@ -76,6 +77,9 @@ class TestMessage(object):
         resp.content = b""
         assert resp.data.content == b""
         assert resp.headers["content-length"] == "0"
+        resp.raw_content = b"bar"
+        assert resp.data.content == b"bar"
+        assert resp.headers["content-length"] == "0"
 
     def test_content_basic(self):
         _test_passthrough_attr(tresp(), "content")
@@ -93,61 +97,108 @@ class TestMessage(object):
         _test_decoded_attr(tresp(), "http_version")
 
 
-class TestDecodedDecorator(object):
-
+class TestMessageContentEncoding(object):
     def test_simple(self):
         r = tresp()
-        assert r.content == b"message"
+        assert r.raw_content == b"message"
         assert "content-encoding" not in r.headers
-        assert r.encode("gzip")
+        r.encode("gzip")
 
         assert r.headers["content-encoding"]
-        assert r.content != b"message"
-        with decoded(r):
-            assert "content-encoding" not in r.headers
-            assert r.content == b"message"
-        assert r.headers["content-encoding"]
-        assert r.content != b"message"
+        assert r.raw_content != b"message"
+        assert r.content == b"message"
+        assert r.raw_content != b"message"
 
     def test_modify(self):
         r = tresp()
         assert "content-encoding" not in r.headers
-        assert r.encode("gzip")
-
-        with decoded(r):
-            r.content = b"foo"
+        r.encode("gzip")
 
-        assert r.content != b"foo"
+        r.content = b"foo"
+        assert r.raw_content != b"foo"
         r.decode()
-        assert r.content == b"foo"
+        assert r.raw_content == b"foo"
 
     def test_unknown_ce(self):
         r = tresp()
         r.headers["content-encoding"] = "zopfli"
-        r.content = b"foo"
-        with decoded(r):
-            assert r.headers["content-encoding"]
-            assert r.content == b"foo"
-        assert r.headers["content-encoding"]
+        r.raw_content = b"foo"
         assert r.content == b"foo"
+        assert r.headers["content-encoding"]
 
     def test_cannot_decode(self):
         r = tresp()
-        assert r.encode("gzip")
-        r.content = b"foo"
-        with decoded(r):
-            assert r.headers["content-encoding"]
-            assert r.content == b"foo"
+        r.encode("gzip")
+        r.raw_content = b"foo"
+        assert r.content == b"foo"
         assert r.headers["content-encoding"]
-        assert r.content != b"foo"
         r.decode()
-        assert r.content == b"foo"
+        assert r.raw_content == b"foo"
+        assert "content-encoding" not in r.headers
 
     def test_cannot_encode(self):
         r = tresp()
-        assert r.encode("gzip")
-        with decoded(r):
-            r.content = None
+        r.encode("gzip")
+        r.content = None
+        assert r.headers["content-encoding"]
+        assert r.raw_content is None
 
+        r.headers["content-encoding"] = "zopfli"
+        r.content = b"foo"
         assert "content-encoding" not in r.headers
-        assert r.content is None
+        assert r.raw_content == b"foo"
+
+
+class TestMessageText(object):
+    def test_simple(self):
+        r = tresp(content=b'\xc3\xbc')
+        assert r.raw_content == b"\xc3\xbc"
+        assert r.content == b"\xc3\xbc"
+        assert r.text == u"ü"
+
+        r.encode("gzip")
+        assert r.text == u"ü"
+        r.decode()
+        assert r.text == u"ü"
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        assert r.content == b"\xc3\xbc"
+        assert r.text == u"Ã¼"
+
+    def test_modify(self):
+        r = tresp()
+
+        r.text = u"ü"
+        assert r.raw_content == b"\xc3\xbc"
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        r.text = u"ü"
+        assert r.raw_content == b"\xfc"
+        assert r.headers["content-length"] == "1"
+
+    def test_unknown_ce(self):
+        r = tresp()
+        r.headers["content-type"] = "text/html; charset=wtf"
+        r.raw_content = b"foo"
+        assert r.text == u"foo"
+
+    def test_cannot_decode(self):
+        r = tresp()
+        r.raw_content = b"\xFF"
+        assert r.text == u'\ufffd' if six.PY2 else '\udcff'
+
+    def test_cannot_encode(self):
+        r = tresp()
+        r.content = None
+        assert "content-type" not in r.headers
+        assert r.raw_content is None
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        r.text = u"☃"
+        assert r.headers["content-type"] == "text/html; charset=utf-8"
+        assert r.raw_content == b'\xe2\x98\x83'
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        r.text = u'\udcff'
+        assert r.headers["content-type"] == "text/html; charset=utf-8"
+        assert r.raw_content == b'\xed\xb3\xbf' if six.PY2 else b"\xFF"
diff --git a/test/netlib/test_encoding.py b/test/netlib/test_encoding.py
index 0ff1aad1..de10fc48 100644
--- a/test/netlib/test_encoding.py
+++ b/test/netlib/test_encoding.py
@@ -1,37 +1,39 @@
-from netlib import encoding
+from netlib import encoding, tutils
 
 
 def test_identity():
-    assert b"string" == encoding.decode("identity", b"string")
-    assert b"string" == encoding.encode("identity", b"string")
-    assert not encoding.encode("nonexistent", b"string")
-    assert not encoding.decode("nonexistent encoding", b"string")
+    assert b"string" == encoding.decode(b"string", "identity")
+    assert b"string" == encoding.encode(b"string", "identity")
+    with tutils.raises(ValueError):
+        encoding.encode(b"string", "nonexistent encoding")
 
 
 def test_gzip():
     assert b"string" == encoding.decode(
-        "gzip",
         encoding.encode(
-            "gzip",
-            b"string"
-        )
+            b"string",
+            "gzip"
+        ),
+        "gzip"
     )
-    assert encoding.decode("gzip", b"bogus") is None
+    with tutils.raises(ValueError):
+        encoding.decode(b"bogus", "gzip")
 
 
 def test_deflate():
     assert b"string" == encoding.decode(
-        "deflate",
         encoding.encode(
-            "deflate",
-            b"string"
-        )
+            b"string",
+            "deflate"
+        ),
+        "deflate"
     )
     assert b"string" == encoding.decode(
-        "deflate",
         encoding.encode(
-            "deflate",
-            b"string"
-        )[2:-4]
+            b"string",
+            "deflate"
+        )[2:-4],
+        "deflate"
     )
-    assert encoding.decode("deflate", b"bogus") is None
+    with tutils.raises(ValueError):
+        encoding.decode(b"bogus", "deflate")
author	Maximilian Hils <git@maximilianhils.com>	2016-07-02 01:51:47 -0700
committer	Maximilian Hils <git@maximilianhils.com>	2016-07-02 01:51:47 -0700
commit	6032c4f2352260d32032800a2ff694339e2af6b2 (patch)
tree	e242ede8ebb828f424f270aeb5143516ed048939 /test/netlib
parent	2c09e0416bcf94d9ebef7c11bb1883388e8e2c5d (diff)
download	mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.gz mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.bz2 mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.zip