diff options
author | Maximilian Hils <git@maximilianhils.com> | 2016-07-02 01:51:47 -0700 |
---|---|---|
committer | Maximilian Hils <git@maximilianhils.com> | 2016-07-02 01:51:47 -0700 |
commit | 6032c4f2352260d32032800a2ff694339e2af6b2 (patch) | |
tree | e242ede8ebb828f424f270aeb5143516ed048939 | |
parent | 2c09e0416bcf94d9ebef7c11bb1883388e8e2c5d (diff) | |
download | mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.gz mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.bz2 mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.zip |
message.content -> .raw_content, implement .text
This PR improves our handling of HTTP message body encodings:
- The unaltered message body is now accessible as `.raw_content`
- The "content-encoding"-decoded content (i.e. gzip removed) content
is not `.content`, as this is what we want in 99% of the cases.
- `.text` now provides the "content-encoding"-decoded and then
"content-type charset"-decoded message body.
- The decoded values for `.content` and `.text` are cached,
so that repeated access and `x.text = x.text` is cheap.
- The `decoded()` decorator is now deprecated, as we can now just use
`.content`. Similarly `HTTPMessage.get_decoded_content()` is
deprecated.
-rw-r--r-- | docs/dev/models.rst | 2 | ||||
-rw-r--r-- | mitmproxy/console/common.py | 37 | ||||
-rw-r--r-- | mitmproxy/console/flowview.py | 33 | ||||
-rw-r--r-- | mitmproxy/contentviews.py | 17 | ||||
-rw-r--r-- | mitmproxy/dump.py | 4 | ||||
-rw-r--r-- | mitmproxy/filt.py | 8 | ||||
-rw-r--r-- | mitmproxy/flow/master.py | 8 | ||||
-rw-r--r-- | mitmproxy/flow/modules.py | 4 | ||||
-rw-r--r-- | mitmproxy/models/http.py | 8 | ||||
-rw-r--r-- | mitmproxy/protocol/http.py | 4 | ||||
-rw-r--r-- | mitmproxy/web/app.py | 4 | ||||
-rw-r--r-- | netlib/encoding.py | 97 | ||||
-rw-r--r-- | netlib/http/http1/assemble.py | 4 | ||||
-rw-r--r-- | netlib/http/message.py | 192 | ||||
-rw-r--r-- | netlib/http/request.py | 4 | ||||
-rw-r--r-- | netlib/http/response.py | 5 | ||||
-rw-r--r-- | test/mitmproxy/test_contentview.py | 22 | ||||
-rw-r--r-- | test/mitmproxy/test_examples.py | 10 | ||||
-rw-r--r-- | test/mitmproxy/test_flow.py | 18 | ||||
-rw-r--r-- | test/mitmproxy/test_protocol_http2.py | 6 | ||||
-rw-r--r-- | test/mitmproxy/tservers.py | 1 | ||||
-rw-r--r-- | test/netlib/http/test_message.py | 117 | ||||
-rw-r--r-- | test/netlib/test_encoding.py | 40 |
23 files changed, 377 insertions, 268 deletions
diff --git a/docs/dev/models.rst b/docs/dev/models.rst index 02f36f58..7260f1f7 100644 --- a/docs/dev/models.rst +++ b/docs/dev/models.rst @@ -56,8 +56,6 @@ Datastructures :special-members: :no-undoc-members: - .. autoclass:: decoded - .. automodule:: netlib.multidict .. autoclass:: MultiDictView diff --git a/mitmproxy/console/common.py b/mitmproxy/console/common.py index b450c19d..b4369c0c 100644 --- a/mitmproxy/console/common.py +++ b/mitmproxy/console/common.py @@ -7,7 +7,6 @@ import urwid.util import netlib from mitmproxy import flow -from mitmproxy import models from mitmproxy import utils from mitmproxy.console import signals from netlib import human @@ -259,26 +258,24 @@ def copy_flow_format_data(part, scope, flow): if scope in ("q", "a"): if flow.request.content is None: return None, "Request content is missing" - with models.decoded(flow.request): - if part == "h": - data += netlib.http.http1.assemble_request(flow.request) - elif part == "c": - data += flow.request.content - else: - raise ValueError("Unknown part: {}".format(part)) + if part == "h": + data += netlib.http.http1.assemble_request(flow.request) + elif part == "c": + data += flow.request.content + else: + raise ValueError("Unknown part: {}".format(part)) if scope == "a" and flow.request.content and flow.response: # Add padding between request and response data += "\r\n" * 2 if scope in ("s", "a") and flow.response: if flow.response.content is None: return None, "Response content is missing" - with models.decoded(flow.response): - if part == "h": - data += netlib.http.http1.assemble_response(flow.response) - elif part == "c": - data += flow.response.content - else: - raise ValueError("Unknown part: {}".format(part)) + if part == "h": + data += netlib.http.http1.assemble_response(flow.response) + elif part == "c": + data += flow.response.content + else: + raise ValueError("Unknown part: {}".format(part)) return data, False @@ -388,12 +385,12 @@ def ask_save_body(part, master, state, flow): elif part == "q" and request_has_content: ask_save_path( "Save request content", - flow.request.get_decoded_content() + flow.request.content ) elif part == "s" and response_has_content: ask_save_path( "Save response content", - flow.response.get_decoded_content() + flow.response.content ) else: signals.status_message.send(message="No content to save.") @@ -418,9 +415,9 @@ def format_flow(f, focus, extended=False, hostheader=False, marked=False): marked = marked, ) if f.response: - if f.response.content: - contentdesc = human.pretty_size(len(f.response.content)) - elif f.response.content is None: + if f.response.raw_content: + contentdesc = human.pretty_size(len(f.response.raw_content)) + elif f.response.raw_content is None: contentdesc = "[content missing]" else: contentdesc = "[no content]" diff --git a/mitmproxy/console/flowview.py b/mitmproxy/console/flowview.py index e9b23176..208b0d44 100644 --- a/mitmproxy/console/flowview.py +++ b/mitmproxy/console/flowview.py @@ -176,7 +176,7 @@ class FlowView(tabs.Tabs): self.show() def content_view(self, viewmode, message): - if message.content is None: + if message.raw_content is None: msg, body = "", [urwid.Text([("error", "[content missing]")])] return msg, body else: @@ -214,6 +214,12 @@ class FlowView(tabs.Tabs): ) description = description.replace("Raw", "Couldn't parse: falling back to Raw") + if message.content != message.raw_content: + description = "[decoded {enc}] {desc}".format( + enc=message.headers.get("content-encoding"), + desc=description + ) + # Give hint that you have to tab for the response. if description == "No content" and isinstance(message, models.HTTPRequest): description = "No request content (press tab to view response)" @@ -407,15 +413,14 @@ class FlowView(tabs.Tabs): ) ) if part == "r": - with models.decoded(message): - # Fix an issue caused by some editors when editing a - # request/response body. Many editors make it hard to save a - # file without a terminating newline on the last line. When - # editing message bodies, this can cause problems. For now, I - # just strip the newlines off the end of the body when we return - # from an editor. - c = self.master.spawn_editor(message.content or "") - message.content = c.rstrip("\n") + # Fix an issue caused by some editors when editing a + # request/response body. Many editors make it hard to save a + # file without a terminating newline on the last line. When + # editing message bodies, this can cause problems. For now, I + # just strip the newlines off the end of the body when we return + # from an editor. + c = self.master.spawn_editor(message.content or b"") + message.content = c.rstrip(b"\n") elif part == "f": if not message.urlencoded_form and message.content: signals.status_prompt_onekey.send( @@ -512,14 +517,10 @@ class FlowView(tabs.Tabs): signals.flow_change.send(self, flow = self.flow) def delete_body(self, t): - if t == "m": - val = None - else: - val = None if self.tab_offset == TAB_REQ: - self.flow.request.content = val + self.flow.request.content = None else: - self.flow.response.content = val + self.flow.response.content = None signals.flow_change.send(self, flow = self.flow) def keypress(self, size, key): diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py index de88c9ea..c9ea14ba 100644 --- a/mitmproxy/contentviews.py +++ b/mitmproxy/contentviews.py @@ -618,15 +618,6 @@ def get_content_view(viewmode, data, **metadata): Raises: ContentViewException, if the content view threw an error. """ - msg = [] - - headers = metadata.get("headers", {}) - enc = headers.get("content-encoding") - if enc and enc != "identity": - decoded = encoding.decode(enc, data) - if decoded: - data = decoded - msg.append("[decoded %s]" % enc) try: ret = viewmode(data, **metadata) # Third-party viewers can fail in unexpected ways... @@ -637,8 +628,8 @@ def get_content_view(viewmode, data, **metadata): sys.exc_info()[2] ) if not ret: - ret = get("Raw")(data, **metadata) - msg.append("Couldn't parse: falling back to Raw") + desc = "Couldn't parse: falling back to Raw" + _, content = get("Raw")(data, **metadata) else: - msg.append(ret[0]) - return " ".join(msg), safe_to_print(ret[1]) + desc, content = ret + return desc, safe_to_print(content) diff --git a/mitmproxy/dump.py b/mitmproxy/dump.py index 6670be9b..ea242bba 100644 --- a/mitmproxy/dump.py +++ b/mitmproxy/dump.py @@ -290,10 +290,10 @@ class DumpMaster(flow.FlowMaster): code = click.style(str(code), fg=code_color, bold=True, blink=(code == 418)) reason = click.style(strutils.bytes_to_escaped_str(flow.response.reason), fg=code_color, bold=True) - if flow.response.content is None: + if flow.response.raw_content is None: size = "(content missing)" else: - size = human.pretty_size(len(flow.response.content)) + size = human.pretty_size(len(flow.response.raw_content)) size = click.style(size, bold=True) arrows = click.style("<<", bold=True) diff --git a/mitmproxy/filt.py b/mitmproxy/filt.py index b1b72aa7..95bae1ae 100644 --- a/mitmproxy/filt.py +++ b/mitmproxy/filt.py @@ -194,10 +194,10 @@ class FBod(_Rex): def __call__(self, f): if f.request and f.request.content: - if self.re.search(f.request.get_decoded_content()): + if self.re.search(f.request.content): return True if f.response and f.response.content: - if self.re.search(f.response.get_decoded_content()): + if self.re.search(f.response.content): return True return False @@ -208,7 +208,7 @@ class FBodRequest(_Rex): def __call__(self, f): if f.request and f.request.content: - if self.re.search(f.request.get_decoded_content()): + if self.re.search(f.request.content): return True @@ -218,7 +218,7 @@ class FBodResponse(_Rex): def __call__(self, f): if f.response and f.response.content: - if self.re.search(f.response.get_decoded_content()): + if self.re.search(f.response.content): return True diff --git a/mitmproxy/flow/master.py b/mitmproxy/flow/master.py index efb5d013..a4aa9a7e 100644 --- a/mitmproxy/flow/master.py +++ b/mitmproxy/flow/master.py @@ -16,7 +16,6 @@ from mitmproxy.flow import modules from mitmproxy.onboarding import app from mitmproxy.protocol import http_replay from mitmproxy.proxy.config import HostMatcher -from netlib import strutils class FlowMaster(controller.Master): @@ -348,13 +347,16 @@ class FlowMaster(controller.Master): return "Can't replay live request." if f.intercepted: return "Can't replay while intercepting..." - if f.request.content is None: + if f.request.raw_content is None: return "Can't replay request with missing content..." if f.request: f.backup() f.request.is_replay = True + + # TODO: We should be able to remove this. if "Content-Length" in f.request.headers: - f.request.headers["Content-Length"] = str(len(f.request.content)) + f.request.headers["Content-Length"] = str(len(f.request.raw_content)) + f.response = None f.error = None self.process_new_request(f) diff --git a/mitmproxy/flow/modules.py b/mitmproxy/flow/modules.py index 2998d259..85dff0f1 100644 --- a/mitmproxy/flow/modules.py +++ b/mitmproxy/flow/modules.py @@ -157,7 +157,7 @@ class StreamLargeBodies(object): expected_size = http1.expected_http_body_size( flow.request, flow.response if not is_request else None ) - if not r.content and not (0 <= expected_size <= self.max_size): + if not r.raw_content and not (0 <= expected_size <= self.max_size): # r.stream may already be a callable, which we want to preserve. r.stream = r.stream or True @@ -251,7 +251,7 @@ class ServerPlaybackState: if p[0] not in self.ignore_payload_params ) else: - key.append(str(r.content)) + key.append(str(r.raw_content)) if not self.ignore_host: key.append(r.host) diff --git a/mitmproxy/models/http.py b/mitmproxy/models/http.py index 01f5f1ee..a50808ef 100644 --- a/mitmproxy/models/http.py +++ b/mitmproxy/models/http.py @@ -1,9 +1,9 @@ from __future__ import absolute_import, print_function, division import cgi +import warnings from mitmproxy.models.flow import Flow -from netlib import encoding from netlib import version from netlib.http import Headers from netlib.http import Request @@ -20,10 +20,8 @@ class MessageMixin(object): header. Doesn't change the message iteself or its headers. """ - ce = self.headers.get("content-encoding") - if not self.content or ce not in encoding.ENCODINGS: - return self.content - return encoding.decode(ce, self.content) + warnings.warn(".get_decoded_content() is deprecated, please use .content directly instead.", DeprecationWarning) + return self.content class HTTPRequest(MessageMixin, Request): diff --git a/mitmproxy/protocol/http.py b/mitmproxy/protocol/http.py index 187c17f6..2c70f288 100644 --- a/mitmproxy/protocol/http.py +++ b/mitmproxy/protocol/http.py @@ -41,10 +41,10 @@ class _HttpTransmissionLayer(base.Layer): yield "this is a generator" # pragma: no cover def send_response(self, response): - if response.content is None: + if response.data.content is None: raise netlib.exceptions.HttpException("Cannot assemble flow with missing content") self.send_response_headers(response) - self.send_response_body(response, [response.content]) + self.send_response_body(response, [response.data.content]) def send_response_headers(self, response): raise NotImplementedError() diff --git a/mitmproxy/web/app.py b/mitmproxy/web/app.py index a2798472..50fbaed8 100644 --- a/mitmproxy/web/app.py +++ b/mitmproxy/web/app.py @@ -272,7 +272,7 @@ class FlowContent(RequestHandler): def get(self, flow_id, message): message = getattr(self.flow, message) - if not message.content: + if not message.raw_content: raise APIError(400, "No content.") content_encoding = message.headers.get("Content-Encoding", None) @@ -295,7 +295,7 @@ class FlowContent(RequestHandler): self.set_header("Content-Type", "application/text") self.set_header("X-Content-Type-Options", "nosniff") self.set_header("X-Frame-Options", "DENY") - self.write(message.content) + self.write(message.raw_content) class Events(RequestHandler): diff --git a/netlib/encoding.py b/netlib/encoding.py index 98502451..8b67b543 100644 --- a/netlib/encoding.py +++ b/netlib/encoding.py @@ -1,39 +1,62 @@ """ - Utility functions for decoding response bodies. +Utility functions for decoding response bodies. """ from __future__ import absolute_import + +import codecs from io import BytesIO import gzip import zlib +from typing import Union # noqa + -ENCODINGS = {"identity", "gzip", "deflate"} +def decode(obj, encoding, errors='strict'): + # type: (Union[str, bytes], str) -> Union[str, bytes] + """ + Decode the given input object + Returns: + The decoded value -def decode(e, content): - if not isinstance(content, bytes): - return None - encoding_map = { - "identity": identity, - "gzip": decode_gzip, - "deflate": decode_deflate, - } - if e not in encoding_map: - return None - return encoding_map[e](content) + Raises: + ValueError, if decoding fails. + """ + try: + try: + return custom_decode[encoding](obj) + except KeyError: + return codecs.decode(obj, encoding, errors) + except Exception as e: + raise ValueError("{} when decoding {} with {}".format( + type(e).__name__, + repr(obj)[:10], + repr(encoding), + )) + + +def encode(obj, encoding, errors='strict'): + # type: (Union[str, bytes], str) -> Union[str, bytes] + """ + Encode the given input object + Returns: + The encoded value -def encode(e, content): - if not isinstance(content, bytes): - return None - encoding_map = { - "identity": identity, - "gzip": encode_gzip, - "deflate": encode_deflate, - } - if e not in encoding_map: - return None - return encoding_map[e](content) + Raises: + ValueError, if encoding fails. + """ + try: + try: + return custom_encode[encoding](obj) + except KeyError: + return codecs.encode(obj, encoding, errors) + except Exception as e: + raise ValueError("{} when encoding {} with {}".format( + type(e).__name__, + repr(obj)[:10], + repr(encoding), + )) def identity(content): @@ -46,10 +69,7 @@ def identity(content): def decode_gzip(content): gfile = gzip.GzipFile(fileobj=BytesIO(content)) - try: - return gfile.read() - except (IOError, EOFError): - return None + return gfile.read() def encode_gzip(content): @@ -70,12 +90,9 @@ def decode_deflate(content): http://bugs.python.org/issue5784 """ try: - try: - return zlib.decompress(content) - except zlib.error: - return zlib.decompress(content, -15) + return zlib.decompress(content) except zlib.error: - return None + return zlib.decompress(content, -15) def encode_deflate(content): @@ -84,4 +101,16 @@ def encode_deflate(content): """ return zlib.compress(content) -__all__ = ["ENCODINGS", "encode", "decode"] + +custom_decode = { + "identity": identity, + "gzip": decode_gzip, + "deflate": decode_deflate, +} +custom_encode = { + "identity": identity, + "gzip": encode_gzip, + "deflate": encode_deflate, +} + +__all__ = ["encode", "decode"] diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py index 511328f1..e74732d2 100644 --- a/netlib/http/http1/assemble.py +++ b/netlib/http/http1/assemble.py @@ -5,7 +5,7 @@ from netlib import exceptions def assemble_request(request): - if request.content is None: + if request.data.content is None: raise exceptions.HttpException("Cannot assemble flow with missing content") head = assemble_request_head(request) body = b"".join(assemble_body(request.data.headers, [request.data.content])) @@ -19,7 +19,7 @@ def assemble_request_head(request): def assemble_response(response): - if response.content is None: + if response.data.content is None: raise exceptions.HttpException("Cannot assemble flow with missing content") head = assemble_response_head(response) body = b"".join(assemble_body(response.data.headers, [response.data.content])) diff --git a/netlib/http/message.py b/netlib/http/message.py index 0583c246..668198f8 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -52,7 +52,22 @@ class MessageData(basetypes.Serializable): return cls(**state) +class CachedDecode(object): + __slots__ = ["encoded", "encoding", "decoded"] + + def __init__(self, object, encoding, decoded): + self.encoded = object + self.encoding = encoding + self.decoded = decoded + +no_cached_decode = CachedDecode(None, None, None) + + class Message(basetypes.Serializable): + def __init__(self): + self._content_cache = no_cached_decode # type: CachedDecode + self._text_cache = no_cached_decode # type: CachedDecode + def __eq__(self, other): if isinstance(other, Message): return self.data == other.data @@ -90,19 +105,65 @@ class Message(basetypes.Serializable): self.data.headers = h @property - def content(self): + def raw_content(self): + # type: () -> bytes """ The raw (encoded) HTTP message body - See also: :py:attr:`text` + See also: :py:attr:`content`, :py:class:`text` """ return self.data.content - @content.setter - def content(self, content): + @raw_content.setter + def raw_content(self, content): self.data.content = content - if isinstance(content, bytes): - self.headers["content-length"] = str(len(content)) + + @property + def content(self): + # type: () -> bytes + """ + The HTTP message body decoded with the content-encoding header (e.g. gzip) + + See also: :py:class:`raw_content`, :py:attr:`text` + """ + ce = self.headers.get("content-encoding") + cached = ( + self._content_cache.encoded == self.raw_content and + self._content_cache.encoding == ce + ) + if not cached: + try: + if not ce: + raise ValueError() + decoded = encoding.decode(self.raw_content, ce) + except ValueError: + decoded = self.raw_content + self._content_cache = CachedDecode(self.raw_content, ce, decoded) + return self._content_cache.decoded + + @content.setter + def content(self, value): + ce = self.headers.get("content-encoding") + cached = ( + self._content_cache.decoded == value and + self._content_cache.encoding == ce + ) + if not cached: + try: + if not ce: + raise ValueError() + encoded = encoding.encode(value, ce) + except ValueError: + # Do we have an unknown content-encoding? + # If so, we want to remove it. + if value and ce: + self.headers.pop("content-encoding", None) + ce = None + encoded = value + self._content_cache = CachedDecode(encoded, ce, value) + self.raw_content = self._content_cache.encoded + if isinstance(self.raw_content, bytes): + self.headers["content-length"] = str(len(self.raw_content)) @property def http_version(self): @@ -137,56 +198,81 @@ class Message(basetypes.Serializable): def timestamp_end(self, timestamp_end): self.data.timestamp_end = timestamp_end + def _get_content_type_charset(self): + # type: () -> Optional[str] + ct = headers.parse_content_type(self.headers.get("content-type", "")) + if ct: + return ct[2].get("charset") + @property def text(self): + # type: () -> six.text_type """ - The decoded HTTP message body. - Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive. - - .. note:: - This is not implemented yet. + The HTTP message body decoded with both content-encoding header (e.g. gzip) + and content-type header charset. - See also: :py:attr:`content`, :py:class:`decoded` + See also: :py:attr:`content`, :py:class:`raw_content` """ # This attribute should be called text, because that's what requests does. - raise NotImplementedError() + enc = self._get_content_type_charset() + + # We may also want to check for HTML meta tags here at some point. + + cached = ( + self._text_cache.encoded == self.content and + self._text_cache.encoding == enc + ) + if not cached: + try: + if not enc: + raise ValueError() + decoded = encoding.decode(self.content, enc) + except ValueError: + decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape") + self._text_cache = CachedDecode(self.content, enc, decoded) + return self._text_cache.decoded @text.setter def text(self, text): - raise NotImplementedError() + enc = self._get_content_type_charset() + cached = ( + self._text_cache.decoded == text and + self._text_cache.encoding == enc + ) + if not cached: + try: + if not enc: + raise ValueError() + encoded = encoding.encode(text, enc) + except ValueError: + # Do we have an unknown content-type charset? + # If so, we want to replace it with utf8. + if text and enc: + self.headers["content-type"] = re.sub( + "charset=[^;]+", + "charset=utf-8", + self.headers["content-type"] + ) + encoded = text.encode("utf8", "replace" if six.PY2 else "surrogateescape") + self._text_cache = CachedDecode(encoded, enc, text) + self.content = self._text_cache.encoded def decode(self): """ - Decodes body based on the current Content-Encoding header, then - removes the header. If there is no Content-Encoding header, no - action is taken. - - Returns: - True, if decoding succeeded. - False, otherwise. + Decodes body based on the current Content-Encoding header, then + removes the header. If there is no Content-Encoding header, no + action is taken. """ - ce = self.headers.get("content-encoding") - data = encoding.decode(ce, self.content) - if data is None: - return False - self.content = data + self.raw_content = self.content self.headers.pop("content-encoding", None) - return True def encode(self, e): """ - Encodes body with the encoding e, where e is "gzip", "deflate" or "identity". - - Returns: - True, if decoding succeeded. - False, otherwise. + Encodes body with the encoding e, where e is "gzip", "deflate" or "identity". """ - data = encoding.encode(e, self.content) - if data is None: - return False - self.content = data + self.decode() # remove the current encoding self.headers["content-encoding"] = e - return True + self.content = self.raw_content def replace(self, pattern, repl, flags=0): """ @@ -203,10 +289,9 @@ class Message(basetypes.Serializable): repl = strutils.escaped_str_to_bytes(repl) replacements = 0 if self.content: - with decoded(self): - self.content, replacements = re.subn( - pattern, repl, self.content, flags=flags - ) + self.content, replacements = re.subn( + pattern, repl, self.content, flags=flags + ) replacements += self.headers.replace(pattern, repl, flags) return replacements @@ -225,29 +310,16 @@ class Message(basetypes.Serializable): class decoded(object): """ - A context manager that decodes a request or response, and then - re-encodes it with the same encoding after execution of the block. - - Example: - - .. code-block:: python - - with decoded(request): - request.content = request.content.replace("foo", "bar") + Deprecated: You can now directly use :py:attr:`content`. + :py:attr:`raw_content` has the encoded content. """ def __init__(self, message): - self.message = message - ce = message.headers.get("content-encoding") - if ce in encoding.ENCODINGS: - self.ce = ce - else: - self.ce = None + warnings.warn("decoded() is deprecated, you can now directly use .content instead. " + ".raw_content has the encoded content.", DeprecationWarning) def __enter__(self): - if self.ce: - self.message.decode() + pass def __exit__(self, type, value, tb): - if self.ce: - self.message.encode(self.ce) + pass
\ No newline at end of file diff --git a/netlib/http/request.py b/netlib/http/request.py index d9f4ed00..4ce94549 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -5,7 +5,6 @@ import re import six from six.moves import urllib -from netlib import encoding from netlib import multidict from netlib import strutils from netlib.http import multipart @@ -44,6 +43,7 @@ class Request(message.Message): An HTTP request. """ def __init__(self, *args, **kwargs): + super(Request, self).__init__() self.data = RequestData(*args, **kwargs) def __repr__(self): @@ -327,7 +327,7 @@ class Request(message.Message): self.headers["accept-encoding"] = ( ', '.join( e - for e in encoding.ENCODINGS + for e in {"gzip", "identity", "deflate"} if e in accept_encoding ) ) diff --git a/netlib/http/response.py b/netlib/http/response.py index 17d69418..d2273edd 100644 --- a/netlib/http/response.py +++ b/netlib/http/response.py @@ -30,13 +30,14 @@ class Response(message.Message): An HTTP response. """ def __init__(self, *args, **kwargs): + super(Response, self).__init__() self.data = ResponseData(*args, **kwargs) def __repr__(self): - if self.content: + if self.raw_content: details = "{}, {}".format( self.headers.get("content-type", "unknown content type"), - human.pretty_size(len(self.content)) + human.pretty_size(len(self.raw_content)) ) else: details = "no content" diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py index 52fceeac..4b099d8d 100644 --- a/test/mitmproxy/test_contentview.py +++ b/test/mitmproxy/test_contentview.py @@ -209,28 +209,6 @@ Larry headers=Headers() ) - r = cv.get_content_view( - cv.get("Auto"), - encoding.encode('gzip', b"[1, 2, 3]"), - headers=Headers( - content_type="application/json", - content_encoding="gzip" - ) - ) - assert "decoded gzip" in r[0] - assert "JSON" in r[0] - - r = cv.get_content_view( - cv.get("XML"), - encoding.encode('gzip', b"[1, 2, 3]"), - headers=Headers( - content_type="application/json", - content_encoding="gzip" - ) - ) - assert "decoded gzip" in r[0] - assert "Raw" in r[0] - def test_add_cv(self): class TestContentView(cv.View): name = "test" diff --git a/test/mitmproxy/test_examples.py b/test/mitmproxy/test_examples.py index 607d6faf..22d3c425 100644 --- a/test/mitmproxy/test_examples.py +++ b/test/mitmproxy/test_examples.py @@ -73,9 +73,9 @@ def test_add_header(): def test_custom_contentviews(): with example("custom_contentviews.py") as ex: pig = ex.ctx.contentview - _, fmt = pig("<html>test!</html>") - assert any('esttay!' in val[0][1] for val in fmt) - assert not pig("gobbledygook") + _, fmt = pig(b"<html>test!</html>") + assert any(b'esttay!' in val[0][1] for val in fmt) + assert not pig(b"gobbledygook") def test_iframe_injector(): @@ -103,7 +103,7 @@ def test_modify_form(): def test_modify_querystring(): - flow = tutils.tflow(req=netutils.treq(path="/search?q=term")) + flow = tutils.tflow(req=netutils.treq(path=b"/search?q=term")) with example("modify_querystring.py") as ex: ex.run("request", flow) assert flow.request.query["mitmproxy"] == "rocks" @@ -126,7 +126,7 @@ def test_modify_response_body(): def test_redirect_requests(): - flow = tutils.tflow(req=netutils.treq(host="example.org")) + flow = tutils.tflow(req=netutils.treq(host=b"example.org")) with example("redirect_requests.py") as ex: ex.run("request", flow) assert flow.request.host == "mitmproxy.org" diff --git a/test/mitmproxy/test_flow.py b/test/mitmproxy/test_flow.py index 9eaab9aa..5753e728 100644 --- a/test/mitmproxy/test_flow.py +++ b/test/mitmproxy/test_flow.py @@ -518,13 +518,13 @@ class TestFlow(object): f.replace("foo", "bar") - assert f.request.content != "abarb" + assert f.request.raw_content != "abarb" f.request.decode() - assert f.request.content == "abarb" + assert f.request.raw_content == "abarb" - assert f.response.content != "abarb" + assert f.response.raw_content != "abarb" f.response.decode() - assert f.response.content == "abarb" + assert f.response.raw_content == "abarb" class TestState: @@ -1102,16 +1102,6 @@ class TestRequest: r.constrain_encoding() assert "oink" not in r.headers["accept-encoding"] - def test_get_decoded_content(self): - r = HTTPRequest.wrap(netlib.tutils.treq()) - r.content = None - r.headers["content-encoding"] = "identity" - assert r.get_decoded_content() is None - - r.content = "falafel" - r.encode("gzip") - assert r.get_decoded_content() == "falafel" - def test_get_content_type(self): resp = HTTPResponse.wrap(netlib.tutils.tresp()) resp.headers = Headers(content_type="text/plain") diff --git a/test/mitmproxy/test_protocol_http2.py b/test/mitmproxy/test_protocol_http2.py index 932c8df2..6e021b2c 100644 --- a/test/mitmproxy/test_protocol_http2.py +++ b/test/mitmproxy/test_protocol_http2.py @@ -120,7 +120,7 @@ class _Http2TestBase(object): client.wfile.flush() # read CONNECT response - while client.rfile.readline() != "\r\n": + while client.rfile.readline() != b"\r\n": pass client.convert_to_ssl(alpn_protos=[b'h2']) @@ -197,7 +197,7 @@ class TestSimple(_Http2TestBase, _Http2ServerBase): (':path', '/'), ('ClIeNt-FoO', 'client-bar-1'), ('ClIeNt-FoO', 'client-bar-2'), - ], body='my request body echoed back to me') + ], body=b'my request body echoed back to me') done = False while not done: @@ -269,7 +269,7 @@ class TestWithBodies(_Http2TestBase, _Http2ServerBase): (':scheme', 'https'), (':path', '/'), ], - body='foobar with request body', + body=b'foobar with request body', ) done = False diff --git a/test/mitmproxy/tservers.py b/test/mitmproxy/tservers.py index 51f4b4e2..6d8730f5 100644 --- a/test/mitmproxy/tservers.py +++ b/test/mitmproxy/tservers.py @@ -11,7 +11,6 @@ import pathod.pathoc from mitmproxy import flow, controller from mitmproxy.cmdline import APP_HOST, APP_PORT -from netlib import strutils testapp = flask.Flask(__name__) diff --git a/test/netlib/http/test_message.py b/test/netlib/http/test_message.py index f5bf7f0c..aecde1ec 100644 --- a/test/netlib/http/test_message.py +++ b/test/netlib/http/test_message.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division -from netlib.http import decoded +import six + from netlib.tutils import tresp @@ -76,6 +77,9 @@ class TestMessage(object): resp.content = b"" assert resp.data.content == b"" assert resp.headers["content-length"] == "0" + resp.raw_content = b"bar" + assert resp.data.content == b"bar" + assert resp.headers["content-length"] == "0" def test_content_basic(self): _test_passthrough_attr(tresp(), "content") @@ -93,61 +97,108 @@ class TestMessage(object): _test_decoded_attr(tresp(), "http_version") -class TestDecodedDecorator(object): - +class TestMessageContentEncoding(object): def test_simple(self): r = tresp() - assert r.content == b"message" + assert r.raw_content == b"message" assert "content-encoding" not in r.headers - assert r.encode("gzip") + r.encode("gzip") assert r.headers["content-encoding"] - assert r.content != b"message" - with decoded(r): - assert "content-encoding" not in r.headers - assert r.content == b"message" - assert r.headers["content-encoding"] - assert r.content != b"message" + assert r.raw_content != b"message" + assert r.content == b"message" + assert r.raw_content != b"message" def test_modify(self): r = tresp() assert "content-encoding" not in r.headers - assert r.encode("gzip") - - with decoded(r): - r.content = b"foo" + r.encode("gzip") - assert r.content != b"foo" + r.content = b"foo" + assert r.raw_content != b"foo" r.decode() - assert r.content == b"foo" + assert r.raw_content == b"foo" def test_unknown_ce(self): r = tresp() r.headers["content-encoding"] = "zopfli" - r.content = b"foo" - with decoded(r): - assert r.headers["content-encoding"] - assert r.content == b"foo" - assert r.headers["content-encoding"] + r.raw_content = b"foo" assert r.content == b"foo" + assert r.headers["content-encoding"] def test_cannot_decode(self): r = tresp() - assert r.encode("gzip") - r.content = b"foo" - with decoded(r): - assert r.headers["content-encoding"] - assert r.content == b"foo" + r.encode("gzip") + r.raw_content = b"foo" + assert r.content == b"foo" assert r.headers["content-encoding"] - assert r.content != b"foo" r.decode() - assert r.content == b"foo" + assert r.raw_content == b"foo" + assert "content-encoding" not in r.headers def test_cannot_encode(self): r = tresp() - assert r.encode("gzip") - with decoded(r): - r.content = None + r.encode("gzip") + r.content = None + assert r.headers["content-encoding"] + assert r.raw_content is None + r.headers["content-encoding"] = "zopfli" + r.content = b"foo" assert "content-encoding" not in r.headers - assert r.content is None + assert r.raw_content == b"foo" + + +class TestMessageText(object): + def test_simple(self): + r = tresp(content=b'\xc3\xbc') + assert r.raw_content == b"\xc3\xbc" + assert r.content == b"\xc3\xbc" + assert r.text == u"ü" + + r.encode("gzip") + assert r.text == u"ü" + r.decode() + assert r.text == u"ü" + + r.headers["content-type"] = "text/html; charset=latin1" + assert r.content == b"\xc3\xbc" + assert r.text == u"ü" + + def test_modify(self): + r = tresp() + + r.text = u"ü" + assert r.raw_content == b"\xc3\xbc" + + r.headers["content-type"] = "text/html; charset=latin1" + r.text = u"ü" + assert r.raw_content == b"\xfc" + assert r.headers["content-length"] == "1" + + def test_unknown_ce(self): + r = tresp() + r.headers["content-type"] = "text/html; charset=wtf" + r.raw_content = b"foo" + assert r.text == u"foo" + + def test_cannot_decode(self): + r = tresp() + r.raw_content = b"\xFF" + assert r.text == u'\ufffd' if six.PY2 else '\udcff' + + def test_cannot_encode(self): + r = tresp() + r.content = None + assert "content-type" not in r.headers + assert r.raw_content is None + + r.headers["content-type"] = "text/html; charset=latin1" + r.text = u"☃" + assert r.headers["content-type"] == "text/html; charset=utf-8" + assert r.raw_content == b'\xe2\x98\x83' + + r.headers["content-type"] = "text/html; charset=latin1" + r.text = u'\udcff' + assert r.headers["content-type"] == "text/html; charset=utf-8" + assert r.raw_content == b'\xed\xb3\xbf' if six.PY2 else b"\xFF" diff --git a/test/netlib/test_encoding.py b/test/netlib/test_encoding.py index 0ff1aad1..de10fc48 100644 --- a/test/netlib/test_encoding.py +++ b/test/netlib/test_encoding.py @@ -1,37 +1,39 @@ -from netlib import encoding +from netlib import encoding, tutils def test_identity(): - assert b"string" == encoding.decode("identity", b"string") - assert b"string" == encoding.encode("identity", b"string") - assert not encoding.encode("nonexistent", b"string") - assert not encoding.decode("nonexistent encoding", b"string") + assert b"string" == encoding.decode(b"string", "identity") + assert b"string" == encoding.encode(b"string", "identity") + with tutils.raises(ValueError): + encoding.encode(b"string", "nonexistent encoding") def test_gzip(): assert b"string" == encoding.decode( - "gzip", encoding.encode( - "gzip", - b"string" - ) + b"string", + "gzip" + ), + "gzip" ) - assert encoding.decode("gzip", b"bogus") is None + with tutils.raises(ValueError): + encoding.decode(b"bogus", "gzip") def test_deflate(): assert b"string" == encoding.decode( - "deflate", encoding.encode( - "deflate", - b"string" - ) + b"string", + "deflate" + ), + "deflate" ) assert b"string" == encoding.decode( - "deflate", encoding.encode( - "deflate", - b"string" - )[2:-4] + b"string", + "deflate" + )[2:-4], + "deflate" ) - assert encoding.decode("deflate", b"bogus") is None + with tutils.raises(ValueError): + encoding.decode(b"bogus", "deflate") |