From 6032c4f2352260d32032800a2ff694339e2af6b2 Mon Sep 17 00:00:00 2001
From: Maximilian Hils <git@maximilianhils.com>
Date: Sat, 2 Jul 2016 01:51:47 -0700
Subject: message.content -> .raw_content, implement .text

This PR improves our handling of HTTP message body encodings:

- The unaltered message body is now accessible as `.raw_content`
- The "content-encoding"-decoded content (i.e. gzip removed) content
  is not `.content`, as this is what we want in 99% of the cases.
- `.text` now provides the "content-encoding"-decoded and then
  "content-type charset"-decoded message body.
- The decoded values for `.content` and `.text` are cached,
  so that repeated access and `x.text = x.text` is cheap.
- The `decoded()` decorator is now deprecated, as we can now just use
  `.content`. Similarly `HTTPMessage.get_decoded_content()` is
  deprecated.
---
 test/netlib/http/test_message.py | 117 ++++++++++++++++++++++++++++-----------
 1 file changed, 84 insertions(+), 33 deletions(-)

(limited to 'test/netlib/http/test_message.py')

diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py
index f5bf7f0c..aecde1ec 100644
--- a/test/netlib/http/test_message.py
+++ b/test/netlib/http/test_message.py
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, print_function, division
 
-from netlib.http import decoded
+import six
+
 from netlib.tutils import tresp
 
 
@@ -76,6 +77,9 @@ class TestMessage(object):
         resp.content = b""
         assert resp.data.content == b""
         assert resp.headers["content-length"] == "0"
+        resp.raw_content = b"bar"
+        assert resp.data.content == b"bar"
+        assert resp.headers["content-length"] == "0"
 
     def test_content_basic(self):
         _test_passthrough_attr(tresp(), "content")
@@ -93,61 +97,108 @@ class TestMessage(object):
         _test_decoded_attr(tresp(), "http_version")
 
 
-class TestDecodedDecorator(object):
-
+class TestMessageContentEncoding(object):
     def test_simple(self):
         r = tresp()
-        assert r.content == b"message"
+        assert r.raw_content == b"message"
         assert "content-encoding" not in r.headers
-        assert r.encode("gzip")
+        r.encode("gzip")
 
         assert r.headers["content-encoding"]
-        assert r.content != b"message"
-        with decoded(r):
-            assert "content-encoding" not in r.headers
-            assert r.content == b"message"
-        assert r.headers["content-encoding"]
-        assert r.content != b"message"
+        assert r.raw_content != b"message"
+        assert r.content == b"message"
+        assert r.raw_content != b"message"
 
     def test_modify(self):
         r = tresp()
         assert "content-encoding" not in r.headers
-        assert r.encode("gzip")
-
-        with decoded(r):
-            r.content = b"foo"
+        r.encode("gzip")
 
-        assert r.content != b"foo"
+        r.content = b"foo"
+        assert r.raw_content != b"foo"
         r.decode()
-        assert r.content == b"foo"
+        assert r.raw_content == b"foo"
 
     def test_unknown_ce(self):
         r = tresp()
         r.headers["content-encoding"] = "zopfli"
-        r.content = b"foo"
-        with decoded(r):
-            assert r.headers["content-encoding"]
-            assert r.content == b"foo"
-        assert r.headers["content-encoding"]
+        r.raw_content = b"foo"
         assert r.content == b"foo"
+        assert r.headers["content-encoding"]
 
     def test_cannot_decode(self):
         r = tresp()
-        assert r.encode("gzip")
-        r.content = b"foo"
-        with decoded(r):
-            assert r.headers["content-encoding"]
-            assert r.content == b"foo"
+        r.encode("gzip")
+        r.raw_content = b"foo"
+        assert r.content == b"foo"
         assert r.headers["content-encoding"]
-        assert r.content != b"foo"
         r.decode()
-        assert r.content == b"foo"
+        assert r.raw_content == b"foo"
+        assert "content-encoding" not in r.headers
 
     def test_cannot_encode(self):
         r = tresp()
-        assert r.encode("gzip")
-        with decoded(r):
-            r.content = None
+        r.encode("gzip")
+        r.content = None
+        assert r.headers["content-encoding"]
+        assert r.raw_content is None
 
+        r.headers["content-encoding"] = "zopfli"
+        r.content = b"foo"
         assert "content-encoding" not in r.headers
-        assert r.content is None
+        assert r.raw_content == b"foo"
+
+
+class TestMessageText(object):
+    def test_simple(self):
+        r = tresp(content=b'\xc3\xbc')
+        assert r.raw_content == b"\xc3\xbc"
+        assert r.content == b"\xc3\xbc"
+        assert r.text == u"ü"
+
+        r.encode("gzip")
+        assert r.text == u"ü"
+        r.decode()
+        assert r.text == u"ü"
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        assert r.content == b"\xc3\xbc"
+        assert r.text == u"Ã¼"
+
+    def test_modify(self):
+        r = tresp()
+
+        r.text = u"ü"
+        assert r.raw_content == b"\xc3\xbc"
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        r.text = u"ü"
+        assert r.raw_content == b"\xfc"
+        assert r.headers["content-length"] == "1"
+
+    def test_unknown_ce(self):
+        r = tresp()
+        r.headers["content-type"] = "text/html; charset=wtf"
+        r.raw_content = b"foo"
+        assert r.text == u"foo"
+
+    def test_cannot_decode(self):
+        r = tresp()
+        r.raw_content = b"\xFF"
+        assert r.text == u'\ufffd' if six.PY2 else '\udcff'
+
+    def test_cannot_encode(self):
+        r = tresp()
+        r.content = None
+        assert "content-type" not in r.headers
+        assert r.raw_content is None
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        r.text = u"☃"
+        assert r.headers["content-type"] == "text/html; charset=utf-8"
+        assert r.raw_content == b'\xe2\x98\x83'
+
+        r.headers["content-type"] = "text/html; charset=latin1"
+        r.text = u'\udcff'
+        assert r.headers["content-type"] == "text/html; charset=utf-8"
+        assert r.raw_content == b'\xed\xb3\xbf' if six.PY2 else b"\xFF"
-- 
cgit v1.2.3


From 2f8a1fd2cb1374941f436f36bbfa0d0b3d9213c7 Mon Sep 17 00:00:00 2001
From: Maximilian Hils <git@maximilianhils.com>
Date: Sat, 2 Jul 2016 03:03:42 -0700
Subject: tests++

---
 test/netlib/http/test_message.py | 44 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

(limited to 'test/netlib/http/test_message.py')

diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py
index aecde1ec..e1707a91 100644
--- a/test/netlib/http/test_message.py
+++ b/test/netlib/http/test_message.py
@@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, print_function, division
 
+import mock
 import six
 
 from netlib.tutils import tresp
+from netlib import http
 
 
 def _test_passthrough_attr(message, attr):
@@ -69,6 +71,15 @@ class TestMessage(object):
 
         assert resp != 0
 
+    def test_hash(self):
+        resp = tresp()
+        assert hash(resp)
+
+    def test_serializable(self):
+        resp = tresp()
+        resp2 = http.Response.from_state(resp.get_state())
+        assert resp == resp2
+
     def test_content_length_update(self):
         resp = tresp()
         resp.content = b"foo"
@@ -93,7 +104,7 @@ class TestMessage(object):
     def test_timestamp_end(self):
         _test_passthrough_attr(tresp(), "timestamp_end")
 
-    def teste_http_version(self):
+    def test_http_version(self):
         _test_decoded_attr(tresp(), "http_version")
 
 
@@ -109,6 +120,14 @@ class TestMessageContentEncoding(object):
         assert r.content == b"message"
         assert r.raw_content != b"message"
 
+        r.raw_content = b"foo"
+        with mock.patch("netlib.encoding.decode") as e:
+            assert r.content
+            assert e.call_count == 1
+            e.reset_mock()
+            assert r.content
+            assert e.call_count == 0
+
     def test_modify(self):
         r = tresp()
         assert "content-encoding" not in r.headers
@@ -119,6 +138,13 @@ class TestMessageContentEncoding(object):
         r.decode()
         assert r.raw_content == b"foo"
 
+        r.encode("identity")
+        with mock.patch("netlib.encoding.encode") as e:
+            r.content = b"foo"
+            assert e.call_count == 0
+            r.content = b"bar"
+            assert e.call_count == 1
+
     def test_unknown_ce(self):
         r = tresp()
         r.headers["content-encoding"] = "zopfli"
@@ -165,6 +191,15 @@ class TestMessageText(object):
         assert r.content == b"\xc3\xbc"
         assert r.text == u"Ã¼"
 
+        r.encode("identity")
+        r.raw_content = b"foo"
+        with mock.patch("netlib.encoding.decode") as e:
+            assert r.text
+            assert e.call_count == 2
+            e.reset_mock()
+            assert r.text
+            assert e.call_count == 0
+
     def test_modify(self):
         r = tresp()
 
@@ -176,6 +211,13 @@ class TestMessageText(object):
         assert r.raw_content == b"\xfc"
         assert r.headers["content-length"] == "1"
 
+        r.encode("identity")
+        with mock.patch("netlib.encoding.encode") as e:
+            r.text = u"ü"
+            assert e.call_count == 0
+            r.text = u"ä"
+            assert e.call_count == 2
+
     def test_unknown_ce(self):
         r = tresp()
         r.headers["content-type"] = "text/html; charset=wtf"
-- 
cgit v1.2.3


From a6b3551934e2b8768177d6831ca08f97f5bdae44 Mon Sep 17 00:00:00 2001
From: Maximilian Hils <git@maximilianhils.com>
Date: Mon, 4 Jul 2016 13:58:09 -0700
Subject: raise ValueError if content-encoding is invalid

---
 test/netlib/http/test_message.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'test/netlib/http/test_message.py')

diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py
index e1707a91..ed7d3da5 100644
--- a/test/netlib/http/test_message.py
+++ b/test/netlib/http/test_message.py
@@ -5,7 +5,7 @@ import mock
 import six
 
 from netlib.tutils import tresp
-from netlib import http
+from netlib import http, tutils
 
 
 def _test_passthrough_attr(message, attr):
@@ -92,9 +92,6 @@ class TestMessage(object):
         assert resp.data.content == b"bar"
         assert resp.headers["content-length"] == "0"
 
-    def test_content_basic(self):
-        _test_passthrough_attr(tresp(), "content")
-
     def test_headers(self):
         _test_passthrough_attr(tresp(), "headers")
 
@@ -149,18 +146,22 @@ class TestMessageContentEncoding(object):
         r = tresp()
         r.headers["content-encoding"] = "zopfli"
         r.raw_content = b"foo"
-        assert r.content == b"foo"
+        with tutils.raises(ValueError):
+            assert r.content
         assert r.headers["content-encoding"]
 
     def test_cannot_decode(self):
         r = tresp()
         r.encode("gzip")
         r.raw_content = b"foo"
-        assert r.content == b"foo"
+        with tutils.raises(ValueError):
+            assert r.content
         assert r.headers["content-encoding"]
-        r.decode()
+
+        with tutils.raises(ValueError):
+            r.decode()
         assert r.raw_content == b"foo"
-        assert "content-encoding" not in r.headers
+        assert "content-encoding" in r.headers
 
     def test_cannot_encode(self):
         r = tresp()
@@ -213,6 +214,7 @@ class TestMessageText(object):
 
         r.encode("identity")
         with mock.patch("netlib.encoding.encode") as e:
+            e.return_value = b""
             r.text = u"ü"
             assert e.call_count == 0
             r.text = u"ä"
-- 
cgit v1.2.3


From a3c7c84d49c3e6563e7f37ef60c989f99ed96788 Mon Sep 17 00:00:00 2001
From: Maximilian Hils <git@maximilianhils.com>
Date: Fri, 15 Jul 2016 22:50:33 -0700
Subject: improve message content semantics

---
 test/netlib/http/test_message.py | 77 +++++++++++++++++++++++++++++++++-------
 1 file changed, 65 insertions(+), 12 deletions(-)

(limited to 'test/netlib/http/test_message.py')

diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py
index ed7d3da5..8b178e04 100644
--- a/test/netlib/http/test_message.py
+++ b/test/netlib/http/test_message.py
@@ -142,6 +142,9 @@ class TestMessageContentEncoding(object):
             r.content = b"bar"
             assert e.call_count == 1
 
+        with tutils.raises(TypeError):
+            r.content = u"foo"
+
     def test_unknown_ce(self):
         r = tresp()
         r.headers["content-encoding"] = "zopfli"
@@ -149,6 +152,7 @@ class TestMessageContentEncoding(object):
         with tutils.raises(ValueError):
             assert r.content
         assert r.headers["content-encoding"]
+        assert r.get_content(strict=False) == b"foo"
 
     def test_cannot_decode(self):
         r = tresp()
@@ -157,12 +161,25 @@ class TestMessageContentEncoding(object):
         with tutils.raises(ValueError):
             assert r.content
         assert r.headers["content-encoding"]
+        assert r.get_content(strict=False) == b"foo"
 
         with tutils.raises(ValueError):
             r.decode()
         assert r.raw_content == b"foo"
         assert "content-encoding" in r.headers
 
+        r.decode(strict=False)
+        assert r.content == b"foo"
+        assert "content-encoding" not in r.headers
+
+    def test_none(self):
+        r = tresp(content=None)
+        assert r.content is None
+        r.content = b"foo"
+        assert r.content is not None
+        r.content = None
+        assert r.content is None
+
     def test_cannot_encode(self):
         r = tresp()
         r.encode("gzip")
@@ -175,12 +192,17 @@ class TestMessageContentEncoding(object):
         assert "content-encoding" not in r.headers
         assert r.raw_content == b"foo"
 
+        with tutils.raises(ValueError):
+            r.encode("zopfli")
+        assert r.raw_content == b"foo"
+        assert "content-encoding" not in r.headers
+
 
 class TestMessageText(object):
     def test_simple(self):
-        r = tresp(content=b'\xc3\xbc')
-        assert r.raw_content == b"\xc3\xbc"
-        assert r.content == b"\xc3\xbc"
+        r = tresp(content=b'\xfc')
+        assert r.raw_content == b"\xfc"
+        assert r.content == b"\xfc"
         assert r.text == u"ü"
 
         r.encode("gzip")
@@ -189,8 +211,10 @@ class TestMessageText(object):
         assert r.text == u"ü"
 
         r.headers["content-type"] = "text/html; charset=latin1"
-        assert r.content == b"\xc3\xbc"
+        r.content = b"\xc3\xbc"
         assert r.text == u"Ã¼"
+        r.headers["content-type"] = "text/html; charset=utf8"
+        assert r.text == u"ü"
 
         r.encode("identity")
         r.raw_content = b"foo"
@@ -201,16 +225,29 @@ class TestMessageText(object):
             assert r.text
             assert e.call_count == 0
 
+    def test_guess_json(self):
+        r = tresp(content=b'"\xc3\xbc"')
+        r.headers["content-type"] = "application/json"
+        assert r.text == u'"ü"'
+
+    def test_none(self):
+        r = tresp(content=None)
+        assert r.text is None
+        r.text = b"foo"
+        assert r.text is not None
+        r.text = None
+        assert r.text is None
+
     def test_modify(self):
         r = tresp()
 
         r.text = u"ü"
-        assert r.raw_content == b"\xc3\xbc"
+        assert r.raw_content == b"\xfc"
 
-        r.headers["content-type"] = "text/html; charset=latin1"
+        r.headers["content-type"] = "text/html; charset=utf8"
         r.text = u"ü"
-        assert r.raw_content == b"\xfc"
-        assert r.headers["content-length"] == "1"
+        assert r.raw_content == b"\xc3\xbc"
+        assert r.headers["content-length"] == "2"
 
         r.encode("identity")
         with mock.patch("netlib.encoding.encode") as e:
@@ -224,12 +261,18 @@ class TestMessageText(object):
         r = tresp()
         r.headers["content-type"] = "text/html; charset=wtf"
         r.raw_content = b"foo"
-        assert r.text == u"foo"
+        with tutils.raises(ValueError):
+            assert r.text == u"foo"
+        assert r.get_text(strict=False) == u"foo"
 
     def test_cannot_decode(self):
         r = tresp()
+        r.headers["content-type"] = "text/html; charset=utf8"
         r.raw_content = b"\xFF"
-        assert r.text == u'\ufffd' if six.PY2 else '\udcff'
+        with tutils.raises(ValueError):
+            assert r.text
+
+        assert r.get_text(strict=False) == u'\ufffd' if six.PY2 else '\udcff'
 
     def test_cannot_encode(self):
         r = tresp()
@@ -237,9 +280,19 @@ class TestMessageText(object):
         assert "content-type" not in r.headers
         assert r.raw_content is None
 
-        r.headers["content-type"] = "text/html; charset=latin1"
+        r.headers["content-type"] = "text/html; charset=latin1; foo=bar"
         r.text = u"☃"
-        assert r.headers["content-type"] == "text/html; charset=utf-8"
+        assert r.headers["content-type"] == "text/html; charset=utf-8; foo=bar"
+        assert r.raw_content == b'\xe2\x98\x83'
+
+        r.headers["content-type"] = "gibberish"
+        r.text = u"☃"
+        assert r.headers["content-type"] == "text/plain; charset=utf-8"
+        assert r.raw_content == b'\xe2\x98\x83'
+
+        del r.headers["content-type"]
+        r.text = u"☃"
+        assert r.headers["content-type"] == "text/plain; charset=utf-8"
         assert r.raw_content == b'\xe2\x98\x83'
 
         r.headers["content-type"] = "text/html; charset=latin1"
-- 
cgit v1.2.3