From 6032c4f2352260d32032800a2ff694339e2af6b2 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 2 Jul 2016 01:51:47 -0700 Subject: message.content -> .raw_content, implement .text This PR improves our handling of HTTP message body encodings: - The unaltered message body is now accessible as `.raw_content` - The "content-encoding"-decoded content (i.e. gzip removed) content is not `.content`, as this is what we want in 99% of the cases. - `.text` now provides the "content-encoding"-decoded and then "content-type charset"-decoded message body. - The decoded values for `.content` and `.text` are cached, so that repeated access and `x.text = x.text` is cheap. - The `decoded()` decorator is now deprecated, as we can now just use `.content`. Similarly `HTTPMessage.get_decoded_content()` is deprecated. --- test/netlib/http/test_message.py | 117 ++++++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 33 deletions(-) (limited to 'test/netlib/http/test_message.py') diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py index f5bf7f0c..aecde1ec 100644 --- a/test/netlib/http/test_message.py +++ b/test/netlib/http/test_message.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division -from netlib.http import decoded +import six + from netlib.tutils import tresp @@ -76,6 +77,9 @@ class TestMessage(object): resp.content = b"" assert resp.data.content == b"" assert resp.headers["content-length"] == "0" + resp.raw_content = b"bar" + assert resp.data.content == b"bar" + assert resp.headers["content-length"] == "0" def test_content_basic(self): _test_passthrough_attr(tresp(), "content") @@ -93,61 +97,108 @@ class TestMessage(object): _test_decoded_attr(tresp(), "http_version") -class TestDecodedDecorator(object): - +class TestMessageContentEncoding(object): def test_simple(self): r = tresp() - assert r.content == b"message" + assert r.raw_content == b"message" assert "content-encoding" not in r.headers - assert r.encode("gzip") + r.encode("gzip") assert r.headers["content-encoding"] - assert r.content != b"message" - with decoded(r): - assert "content-encoding" not in r.headers - assert r.content == b"message" - assert r.headers["content-encoding"] - assert r.content != b"message" + assert r.raw_content != b"message" + assert r.content == b"message" + assert r.raw_content != b"message" def test_modify(self): r = tresp() assert "content-encoding" not in r.headers - assert r.encode("gzip") - - with decoded(r): - r.content = b"foo" + r.encode("gzip") - assert r.content != b"foo" + r.content = b"foo" + assert r.raw_content != b"foo" r.decode() - assert r.content == b"foo" + assert r.raw_content == b"foo" def test_unknown_ce(self): r = tresp() r.headers["content-encoding"] = "zopfli" - r.content = b"foo" - with decoded(r): - assert r.headers["content-encoding"] - assert r.content == b"foo" - assert r.headers["content-encoding"] + r.raw_content = b"foo" assert r.content == b"foo" + assert r.headers["content-encoding"] def test_cannot_decode(self): r = tresp() - assert r.encode("gzip") - r.content = b"foo" - with decoded(r): - assert r.headers["content-encoding"] - assert r.content == b"foo" + r.encode("gzip") + r.raw_content = b"foo" + assert r.content == b"foo" assert r.headers["content-encoding"] - assert r.content != b"foo" r.decode() - assert r.content == b"foo" + assert r.raw_content == b"foo" + assert "content-encoding" not in r.headers def test_cannot_encode(self): r = tresp() - assert r.encode("gzip") - with decoded(r): - r.content = None + r.encode("gzip") + r.content = None + assert r.headers["content-encoding"] + assert r.raw_content is None + r.headers["content-encoding"] = "zopfli" + r.content = b"foo" assert "content-encoding" not in r.headers - assert r.content is None + assert r.raw_content == b"foo" + + +class TestMessageText(object): + def test_simple(self): + r = tresp(content=b'\xc3\xbc') + assert r.raw_content == b"\xc3\xbc" + assert r.content == b"\xc3\xbc" + assert r.text == u"ü" + + r.encode("gzip") + assert r.text == u"ü" + r.decode() + assert r.text == u"ü" + + r.headers["content-type"] = "text/html; charset=latin1" + assert r.content == b"\xc3\xbc" + assert r.text == u"ü" + + def test_modify(self): + r = tresp() + + r.text = u"ü" + assert r.raw_content == b"\xc3\xbc" + + r.headers["content-type"] = "text/html; charset=latin1" + r.text = u"ü" + assert r.raw_content == b"\xfc" + assert r.headers["content-length"] == "1" + + def test_unknown_ce(self): + r = tresp() + r.headers["content-type"] = "text/html; charset=wtf" + r.raw_content = b"foo" + assert r.text == u"foo" + + def test_cannot_decode(self): + r = tresp() + r.raw_content = b"\xFF" + assert r.text == u'\ufffd' if six.PY2 else '\udcff' + + def test_cannot_encode(self): + r = tresp() + r.content = None + assert "content-type" not in r.headers + assert r.raw_content is None + + r.headers["content-type"] = "text/html; charset=latin1" + r.text = u"☃" + assert r.headers["content-type"] == "text/html; charset=utf-8" + assert r.raw_content == b'\xe2\x98\x83' + + r.headers["content-type"] = "text/html; charset=latin1" + r.text = u'\udcff' + assert r.headers["content-type"] == "text/html; charset=utf-8" + assert r.raw_content == b'\xed\xb3\xbf' if six.PY2 else b"\xFF" -- cgit v1.2.3 From 2f8a1fd2cb1374941f436f36bbfa0d0b3d9213c7 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 2 Jul 2016 03:03:42 -0700 Subject: tests++ --- test/netlib/http/test_message.py | 44 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'test/netlib/http/test_message.py') diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py index aecde1ec..e1707a91 100644 --- a/test/netlib/http/test_message.py +++ b/test/netlib/http/test_message.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division +import mock import six from netlib.tutils import tresp +from netlib import http def _test_passthrough_attr(message, attr): @@ -69,6 +71,15 @@ class TestMessage(object): assert resp != 0 + def test_hash(self): + resp = tresp() + assert hash(resp) + + def test_serializable(self): + resp = tresp() + resp2 = http.Response.from_state(resp.get_state()) + assert resp == resp2 + def test_content_length_update(self): resp = tresp() resp.content = b"foo" @@ -93,7 +104,7 @@ class TestMessage(object): def test_timestamp_end(self): _test_passthrough_attr(tresp(), "timestamp_end") - def teste_http_version(self): + def test_http_version(self): _test_decoded_attr(tresp(), "http_version") @@ -109,6 +120,14 @@ class TestMessageContentEncoding(object): assert r.content == b"message" assert r.raw_content != b"message" + r.raw_content = b"foo" + with mock.patch("netlib.encoding.decode") as e: + assert r.content + assert e.call_count == 1 + e.reset_mock() + assert r.content + assert e.call_count == 0 + def test_modify(self): r = tresp() assert "content-encoding" not in r.headers @@ -119,6 +138,13 @@ class TestMessageContentEncoding(object): r.decode() assert r.raw_content == b"foo" + r.encode("identity") + with mock.patch("netlib.encoding.encode") as e: + r.content = b"foo" + assert e.call_count == 0 + r.content = b"bar" + assert e.call_count == 1 + def test_unknown_ce(self): r = tresp() r.headers["content-encoding"] = "zopfli" @@ -165,6 +191,15 @@ class TestMessageText(object): assert r.content == b"\xc3\xbc" assert r.text == u"ü" + r.encode("identity") + r.raw_content = b"foo" + with mock.patch("netlib.encoding.decode") as e: + assert r.text + assert e.call_count == 2 + e.reset_mock() + assert r.text + assert e.call_count == 0 + def test_modify(self): r = tresp() @@ -176,6 +211,13 @@ class TestMessageText(object): assert r.raw_content == b"\xfc" assert r.headers["content-length"] == "1" + r.encode("identity") + with mock.patch("netlib.encoding.encode") as e: + r.text = u"ü" + assert e.call_count == 0 + r.text = u"ä" + assert e.call_count == 2 + def test_unknown_ce(self): r = tresp() r.headers["content-type"] = "text/html; charset=wtf" -- cgit v1.2.3 From a6b3551934e2b8768177d6831ca08f97f5bdae44 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 4 Jul 2016 13:58:09 -0700 Subject: raise ValueError if content-encoding is invalid --- test/netlib/http/test_message.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'test/netlib/http/test_message.py') diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py index e1707a91..ed7d3da5 100644 --- a/test/netlib/http/test_message.py +++ b/test/netlib/http/test_message.py @@ -5,7 +5,7 @@ import mock import six from netlib.tutils import tresp -from netlib import http +from netlib import http, tutils def _test_passthrough_attr(message, attr): @@ -92,9 +92,6 @@ class TestMessage(object): assert resp.data.content == b"bar" assert resp.headers["content-length"] == "0" - def test_content_basic(self): - _test_passthrough_attr(tresp(), "content") - def test_headers(self): _test_passthrough_attr(tresp(), "headers") @@ -149,18 +146,22 @@ class TestMessageContentEncoding(object): r = tresp() r.headers["content-encoding"] = "zopfli" r.raw_content = b"foo" - assert r.content == b"foo" + with tutils.raises(ValueError): + assert r.content assert r.headers["content-encoding"] def test_cannot_decode(self): r = tresp() r.encode("gzip") r.raw_content = b"foo" - assert r.content == b"foo" + with tutils.raises(ValueError): + assert r.content assert r.headers["content-encoding"] - r.decode() + + with tutils.raises(ValueError): + r.decode() assert r.raw_content == b"foo" - assert "content-encoding" not in r.headers + assert "content-encoding" in r.headers def test_cannot_encode(self): r = tresp() @@ -213,6 +214,7 @@ class TestMessageText(object): r.encode("identity") with mock.patch("netlib.encoding.encode") as e: + e.return_value = b"" r.text = u"ü" assert e.call_count == 0 r.text = u"ä" -- cgit v1.2.3 From a3c7c84d49c3e6563e7f37ef60c989f99ed96788 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 15 Jul 2016 22:50:33 -0700 Subject: improve message content semantics --- test/netlib/http/test_message.py | 77 +++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 12 deletions(-) (limited to 'test/netlib/http/test_message.py') diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py index ed7d3da5..8b178e04 100644 --- a/test/netlib/http/test_message.py +++ b/test/netlib/http/test_message.py @@ -142,6 +142,9 @@ class TestMessageContentEncoding(object): r.content = b"bar" assert e.call_count == 1 + with tutils.raises(TypeError): + r.content = u"foo" + def test_unknown_ce(self): r = tresp() r.headers["content-encoding"] = "zopfli" @@ -149,6 +152,7 @@ class TestMessageContentEncoding(object): with tutils.raises(ValueError): assert r.content assert r.headers["content-encoding"] + assert r.get_content(strict=False) == b"foo" def test_cannot_decode(self): r = tresp() @@ -157,12 +161,25 @@ class TestMessageContentEncoding(object): with tutils.raises(ValueError): assert r.content assert r.headers["content-encoding"] + assert r.get_content(strict=False) == b"foo" with tutils.raises(ValueError): r.decode() assert r.raw_content == b"foo" assert "content-encoding" in r.headers + r.decode(strict=False) + assert r.content == b"foo" + assert "content-encoding" not in r.headers + + def test_none(self): + r = tresp(content=None) + assert r.content is None + r.content = b"foo" + assert r.content is not None + r.content = None + assert r.content is None + def test_cannot_encode(self): r = tresp() r.encode("gzip") @@ -175,12 +192,17 @@ class TestMessageContentEncoding(object): assert "content-encoding" not in r.headers assert r.raw_content == b"foo" + with tutils.raises(ValueError): + r.encode("zopfli") + assert r.raw_content == b"foo" + assert "content-encoding" not in r.headers + class TestMessageText(object): def test_simple(self): - r = tresp(content=b'\xc3\xbc') - assert r.raw_content == b"\xc3\xbc" - assert r.content == b"\xc3\xbc" + r = tresp(content=b'\xfc') + assert r.raw_content == b"\xfc" + assert r.content == b"\xfc" assert r.text == u"ü" r.encode("gzip") @@ -189,8 +211,10 @@ class TestMessageText(object): assert r.text == u"ü" r.headers["content-type"] = "text/html; charset=latin1" - assert r.content == b"\xc3\xbc" + r.content = b"\xc3\xbc" assert r.text == u"ü" + r.headers["content-type"] = "text/html; charset=utf8" + assert r.text == u"ü" r.encode("identity") r.raw_content = b"foo" @@ -201,16 +225,29 @@ class TestMessageText(object): assert r.text assert e.call_count == 0 + def test_guess_json(self): + r = tresp(content=b'"\xc3\xbc"') + r.headers["content-type"] = "application/json" + assert r.text == u'"ü"' + + def test_none(self): + r = tresp(content=None) + assert r.text is None + r.text = b"foo" + assert r.text is not None + r.text = None + assert r.text is None + def test_modify(self): r = tresp() r.text = u"ü" - assert r.raw_content == b"\xc3\xbc" + assert r.raw_content == b"\xfc" - r.headers["content-type"] = "text/html; charset=latin1" + r.headers["content-type"] = "text/html; charset=utf8" r.text = u"ü" - assert r.raw_content == b"\xfc" - assert r.headers["content-length"] == "1" + assert r.raw_content == b"\xc3\xbc" + assert r.headers["content-length"] == "2" r.encode("identity") with mock.patch("netlib.encoding.encode") as e: @@ -224,12 +261,18 @@ class TestMessageText(object): r = tresp() r.headers["content-type"] = "text/html; charset=wtf" r.raw_content = b"foo" - assert r.text == u"foo" + with tutils.raises(ValueError): + assert r.text == u"foo" + assert r.get_text(strict=False) == u"foo" def test_cannot_decode(self): r = tresp() + r.headers["content-type"] = "text/html; charset=utf8" r.raw_content = b"\xFF" - assert r.text == u'\ufffd' if six.PY2 else '\udcff' + with tutils.raises(ValueError): + assert r.text + + assert r.get_text(strict=False) == u'\ufffd' if six.PY2 else '\udcff' def test_cannot_encode(self): r = tresp() @@ -237,9 +280,19 @@ class TestMessageText(object): assert "content-type" not in r.headers assert r.raw_content is None - r.headers["content-type"] = "text/html; charset=latin1" + r.headers["content-type"] = "text/html; charset=latin1; foo=bar" r.text = u"☃" - assert r.headers["content-type"] == "text/html; charset=utf-8" + assert r.headers["content-type"] == "text/html; charset=utf-8; foo=bar" + assert r.raw_content == b'\xe2\x98\x83' + + r.headers["content-type"] = "gibberish" + r.text = u"☃" + assert r.headers["content-type"] == "text/plain; charset=utf-8" + assert r.raw_content == b'\xe2\x98\x83' + + del r.headers["content-type"] + r.text = u"☃" + assert r.headers["content-type"] == "text/plain; charset=utf-8" assert r.raw_content == b'\xe2\x98\x83' r.headers["content-type"] = "text/html; charset=latin1" -- cgit v1.2.3