aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.appveyor.yml2
-rw-r--r--examples/simple/modify_body_inject_iframe.py2
-rw-r--r--mitmproxy/contentviews.py672
-rw-r--r--mitmproxy/contentviews/__init__.py183
-rw-r--r--mitmproxy/contentviews/auto.py27
-rw-r--r--mitmproxy/contentviews/base.py65
-rw-r--r--mitmproxy/contentviews/css.py25
-rw-r--r--mitmproxy/contentviews/hex.py19
-rw-r--r--mitmproxy/contentviews/html_outline.py17
-rw-r--r--mitmproxy/contentviews/image.py45
-rw-r--r--mitmproxy/contentviews/javascript.py20
-rw-r--r--mitmproxy/contentviews/json.py27
-rw-r--r--mitmproxy/contentviews/multipart.py21
-rw-r--r--mitmproxy/contentviews/protobuf.py45
-rw-r--r--mitmproxy/contentviews/query.py15
-rw-r--r--mitmproxy/contentviews/raw.py12
-rw-r--r--mitmproxy/contentviews/urlencoded.py17
-rw-r--r--mitmproxy/contentviews/wbxml.py20
-rw-r--r--mitmproxy/contentviews/xml_html.py234
-rw-r--r--mitmproxy/utils/sliding_window.py30
-rw-r--r--requirements.txt1
-rw-r--r--setup.py1
-rw-r--r--test/mitmproxy/addons/test_dumper.py2
-rw-r--r--test/mitmproxy/contentviews/__init__.py9
-rw-r--r--test/mitmproxy/contentviews/test_api.py85
-rw-r--r--test/mitmproxy/contentviews/test_auto.py47
-rw-r--r--test/mitmproxy/contentviews/test_css.py29
-rw-r--r--test/mitmproxy/contentviews/test_hex.py7
-rw-r--r--test/mitmproxy/contentviews/test_html_outline.py9
-rw-r--r--test/mitmproxy/contentviews/test_image.py17
-rw-r--r--test/mitmproxy/contentviews/test_javascript.py10
-rw-r--r--test/mitmproxy/contentviews/test_json.py16
-rw-r--r--test/mitmproxy/contentviews/test_multipart.py25
-rw-r--r--test/mitmproxy/contentviews/test_protobuf.py12
-rw-r--r--test/mitmproxy/contentviews/test_query.py13
-rw-r--r--test/mitmproxy/contentviews/test_raw.py7
-rw-r--r--test/mitmproxy/contentviews/test_urlencoded.py15
-rw-r--r--test/mitmproxy/contentviews/test_xml_html.py29
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/cdata-formatted.xml10
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/cdata.xml10
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/comment-formatted.xml10
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/comment.xml10
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/inline-formatted.html14
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/inline.html7
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/simple-formatted.html10
-rw-r--r--test/mitmproxy/contentviews/test_xml_html_data/simple.html1
-rw-r--r--test/mitmproxy/data/amf01bin432 -> 0 bytes
-rw-r--r--test/mitmproxy/data/amf02bin286 -> 0 bytes
-rw-r--r--test/mitmproxy/data/amf03bin33691 -> 0 bytes
-rw-r--r--test/mitmproxy/test_contentview.py284
-rw-r--r--test/mitmproxy/test_custom_contentview.py48
-rw-r--r--test/mitmproxy/test_examples.py2
-rw-r--r--test/mitmproxy/utils/test_sliding_window.py27
-rw-r--r--tox.ini3
54 files changed, 1257 insertions, 1011 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 5421eb5a..5cf194a9 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -27,7 +27,7 @@ test_script:
- ps: |
$Env:VERSION = $(python mitmproxy/version.py)
$Env:SKIP_MITMPROXY = "python -c `"print('skip mitmproxy')`""
- tox -e wheel -- https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl
+ tox -e wheel
tox -e rtool -- bdist
deploy_script:
diff --git a/examples/simple/modify_body_inject_iframe.py b/examples/simple/modify_body_inject_iframe.py
index 33d18bbd..e3d5fee9 100644
--- a/examples/simple/modify_body_inject_iframe.py
+++ b/examples/simple/modify_body_inject_iframe.py
@@ -11,7 +11,7 @@ class Injector:
def response(self, flow):
if flow.request.host in self.iframe_url:
return
- html = BeautifulSoup(flow.response.content, "lxml")
+ html = BeautifulSoup(flow.response.content, "html.parser")
if html.body:
iframe = html.new_tag(
"iframe",
diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py
deleted file mode 100644
index ef0c80e0..00000000
--- a/mitmproxy/contentviews.py
+++ /dev/null
@@ -1,672 +0,0 @@
-"""
-Mitmproxy Content Views
-=======================
-
-mitmproxy includes a set of content views which can be used to
-format/decode/highlight data. While they are currently used for HTTP message
-bodies only, the may be used in other contexts in the future, e.g. to decode
-protobuf messages sent as WebSocket frames.
-
-Thus, the View API is very minimalistic. The only arguments are `data` and
-`**metadata`, where `data` is the actual content (as bytes). The contents on
-metadata depend on the protocol in use. For HTTP, the message headers are
-passed as the ``headers`` keyword argument. For HTTP requests, the query
-parameters are passed as the ``query`` keyword argument.
-"""
-
-import datetime
-import io
-import json
-import logging
-import subprocess
-import traceback
-from typing import Generator
-from typing import Mapping
-from typing import Tuple
-from typing import Union
-
-import cssutils
-import html2text
-import jsbeautifier
-import lxml.etree
-import lxml.html
-from PIL import ExifTags
-from PIL import Image
-from mitmproxy import exceptions
-from mitmproxy.contrib.wbxml import ASCommandResponse
-from mitmproxy.net import http
-from mitmproxy.types import multidict
-from mitmproxy.utils import strutils
-from mitmproxy.net.http import url
-
-try:
- import pyamf
- from pyamf import remoting, flex
-except ImportError: # pragma no cover
- pyamf = None
-
-# Default view cutoff *in lines*
-VIEW_CUTOFF = 512
-
-KEY_MAX = 30
-
-
-def pretty_json(s: bytes) -> bytes:
- try:
- p = json.loads(s.decode('utf-8'))
- except ValueError:
- return None
- pretty = json.dumps(p, sort_keys=True, indent=4, ensure_ascii=False)
- if isinstance(pretty, str):
- # json.dumps _may_ decide to return unicode, if the JSON object is not ascii.
- # From limited testing this is always valid utf8 (otherwise json.loads will fail earlier),
- # so we can just re-encode it here.
- return pretty.encode("utf8", "strict")
- return pretty
-
-
-def format_dict(
- d: Mapping[Union[str, bytes], Union[str, bytes]]
-) -> Generator[Tuple[Union[str, bytes], Union[str, bytes]], None, None]:
- """
- Helper function that transforms the given dictionary into a list of
- ("key", key )
- ("value", value)
- tuples, where key is padded to a uniform width.
- """
- max_key_len = max(len(k) for k in d.keys())
- max_key_len = min(max_key_len, KEY_MAX)
- for key, value in d.items():
- key += b":" if isinstance(key, bytes) else u":"
- key = key.ljust(max_key_len + 2)
- yield [
- ("header", key),
- ("text", value)
- ]
-
-
-def format_text(text):
- """
- Helper function that transforms bytes into the view output format.
- """
- for line in text.splitlines():
- yield [("text", line)]
-
-
-class View:
- name = None
- prompt = ()
- content_types = []
-
- def __call__(self, data: bytes, **metadata):
- """
- Transform raw data into human-readable output.
-
- Args:
- data: the data to decode/format.
- metadata: optional keyword-only arguments for metadata. Implementations must not
- rely on a given argument being present.
-
- Returns:
- A (description, content generator) tuple.
-
- The content generator yields lists of (style, text) tuples, where each list represents
- a single line. ``text`` is a unfiltered byte string which may need to be escaped,
- depending on the used output.
-
- Caveats:
- The content generator must not yield tuples of tuples,
- because urwid cannot process that. You have to yield a *list* of tuples per line.
- """
- raise NotImplementedError()
-
-
-class ViewAuto(View):
- name = "Auto"
- prompt = ("auto", "a")
- content_types = []
-
- def __call__(self, data, **metadata):
- headers = metadata.get("headers", {})
- ctype = headers.get("content-type")
- if data and ctype:
- ct = http.parse_content_type(ctype) if ctype else None
- ct = "%s/%s" % (ct[0], ct[1])
- if ct in content_types_map:
- return content_types_map[ct][0](data, **metadata)
- elif strutils.is_xml(data):
- return get("XML")(data, **metadata)
- if metadata.get("query"):
- return get("Query")(data, **metadata)
- if data and strutils.is_mostly_bin(data):
- return get("Hex")(data)
- if not data:
- return "No content", []
- return get("Raw")(data)
-
-
-class ViewRaw(View):
- name = "Raw"
- prompt = ("raw", "r")
- content_types = []
-
- def __call__(self, data, **metadata):
- return "Raw", format_text(strutils.bytes_to_escaped_str(data, True))
-
-
-class ViewHex(View):
- name = "Hex"
- prompt = ("hex", "e")
- content_types = []
-
- @staticmethod
- def _format(data):
- for offset, hexa, s in strutils.hexdump(data):
- yield [
- ("offset", offset + " "),
- ("text", hexa + " "),
- ("text", s)
- ]
-
- def __call__(self, data, **metadata):
- return "Hex", self._format(data)
-
-
-class ViewXML(View):
- name = "XML"
- prompt = ("xml", "x")
- content_types = ["text/xml"]
-
- def __call__(self, data, **metadata):
- parser = lxml.etree.XMLParser(
- remove_blank_text=True,
- resolve_entities=False,
- strip_cdata=False,
- recover=False
- )
- try:
- document = lxml.etree.fromstring(data, parser)
- except lxml.etree.XMLSyntaxError:
- return None
- docinfo = document.getroottree().docinfo
-
- prev = []
- p = document.getroottree().getroot().getprevious()
- while p is not None:
- prev.insert(
- 0,
- lxml.etree.tostring(p)
- )
- p = p.getprevious()
- doctype = docinfo.doctype
- if prev:
- doctype += "\n".join(p.decode() for p in prev).strip()
- doctype = doctype.strip()
-
- s = lxml.etree.tostring(
- document,
- pretty_print=True,
- xml_declaration=True,
- doctype=doctype or None,
- encoding=docinfo.encoding
- )
-
- return "XML-like data", format_text(s)
-
-
-class ViewJSON(View):
- name = "JSON"
- prompt = ("json", "s")
- content_types = [
- "application/json",
- "application/vnd.api+json"
- ]
-
- def __call__(self, data, **metadata):
- pj = pretty_json(data)
- if pj:
- return "JSON", format_text(pj)
-
-
-class ViewHTML(View):
- name = "HTML"
- prompt = ("html", "h")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- if strutils.is_xml(data):
- parser = lxml.etree.HTMLParser(
- strip_cdata=True,
- remove_blank_text=True
- )
- d = lxml.html.fromstring(data, parser=parser)
- docinfo = d.getroottree().docinfo
- s = lxml.etree.tostring(
- d,
- pretty_print=True,
- doctype=docinfo.doctype,
- encoding='utf8'
- )
- return "HTML", format_text(s)
-
-
-class ViewHTMLOutline(View):
- name = "HTML Outline"
- prompt = ("html outline", "o")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- data = data.decode("utf-8", "replace")
- h = html2text.HTML2Text(baseurl="")
- h.ignore_images = True
- h.body_width = 0
- outline = h.handle(data)
- return "HTML Outline", format_text(outline)
-
-
-class ViewURLEncoded(View):
- name = "URL-encoded"
- prompt = ("urlencoded", "u")
- content_types = ["application/x-www-form-urlencoded"]
-
- def __call__(self, data, **metadata):
- try:
- data = data.decode("ascii", "strict")
- except ValueError:
- return None
- d = url.decode(data)
- return "URLEncoded form", format_dict(multidict.MultiDict(d))
-
-
-class ViewMultipart(View):
- name = "Multipart Form"
- prompt = ("multipart", "m")
- content_types = ["multipart/form-data"]
-
- @staticmethod
- def _format(v):
- yield [("highlight", "Form data:\n")]
- for message in format_dict(multidict.MultiDict(v)):
- yield message
-
- def __call__(self, data, **metadata):
- headers = metadata.get("headers", {})
- v = http.multipart.decode(headers, data)
- if v:
- return "Multipart form", self._format(v)
-
-
-if pyamf:
- class DummyObject(dict):
- def __init__(self, alias):
- dict.__init__(self)
-
- def __readamf__(self, input):
- data = input.readObject()
- self["data"] = data
-
- def pyamf_class_loader(s):
- for i in pyamf.CLASS_LOADERS:
- if i != pyamf_class_loader:
- v = i(s)
- if v:
- return v
- return DummyObject
-
- pyamf.register_class_loader(pyamf_class_loader)
-
- class ViewAMF(View):
- name = "AMF"
- prompt = ("amf", "f")
- content_types = ["application/x-amf"]
-
- def unpack(self, b, seen=None):
- if seen is None:
- seen = set([])
-
- if hasattr(b, "body"):
- return self.unpack(b.body, seen)
- if isinstance(b, DummyObject):
- if id(b) in seen:
- return "<recursion>"
- else:
- seen.add(id(b))
- for k, v in b.items():
- b[k] = self.unpack(v, seen)
- return b
- elif isinstance(b, dict):
- for k, v in b.items():
- b[k] = self.unpack(v, seen)
- return b
- elif isinstance(b, list):
- return [self.unpack(i) for i in b]
- elif isinstance(b, datetime.datetime):
- return str(b)
- elif isinstance(b, flex.ArrayCollection):
- return [self.unpack(i, seen) for i in b]
- else:
- return b
-
- def _format(self, envelope):
- for target, message in iter(envelope):
- if isinstance(message, pyamf.remoting.Request):
- yield [
- ("header", "Request: "),
- ("text", str(target)),
- ]
- else:
- yield [
- ("header", "Response: "),
- ("text", "%s, code %s" % (target, message.status)),
- ]
-
- s = json.dumps(self.unpack(message), indent=4)
- for msg in format_text(s):
- yield msg
-
- def __call__(self, data, **metadata):
- envelope = remoting.decode(data, strict=False)
- if envelope:
- return "AMF v%s" % envelope.amfVersion, self._format(envelope)
-
-
-class ViewJavaScript(View):
- name = "JavaScript"
- prompt = ("javascript", "j")
- content_types = [
- "application/x-javascript",
- "application/javascript",
- "text/javascript"
- ]
-
- def __call__(self, data, **metadata):
- opts = jsbeautifier.default_options()
- opts.indent_size = 2
- data = data.decode("utf-8", "replace")
- res = jsbeautifier.beautify(data, opts)
- return "JavaScript", format_text(res)
-
-
-class ViewCSS(View):
- name = "CSS"
- prompt = ("css", "c")
- content_types = [
- "text/css"
- ]
-
- def __call__(self, data, **metadata):
- cssutils.log.setLevel(logging.CRITICAL)
- cssutils.ser.prefs.keepComments = True
- cssutils.ser.prefs.omitLastSemicolon = False
- cssutils.ser.prefs.indentClosingBrace = False
- cssutils.ser.prefs.validOnly = False
-
- sheet = cssutils.parseString(data)
- beautified = sheet.cssText
-
- return "CSS", format_text(beautified)
-
-
-class ViewImage(View):
- name = "Image"
- prompt = ("image", "i")
- content_types = [
- "image/png",
- "image/jpeg",
- "image/gif",
- "image/vnd.microsoft.icon",
- "image/x-icon",
- ]
-
- def __call__(self, data, **metadata):
- try:
- img = Image.open(io.BytesIO(data))
- except IOError:
- return None
- parts = [
- ("Format", str(img.format_description)),
- ("Size", "%s x %s px" % img.size),
- ("Mode", str(img.mode)),
- ]
- for i in sorted(img.info.keys()):
- if i != "exif":
- parts.append(
- (str(i), str(img.info[i]))
- )
- if hasattr(img, "_getexif"):
- ex = img._getexif()
- if ex:
- for i in sorted(ex.keys()):
- tag = ExifTags.TAGS.get(i, i)
- parts.append(
- (str(tag), str(ex[i]))
- )
- fmt = format_dict(multidict.MultiDict(parts))
- return "%s image" % img.format, fmt
-
-
-class ViewProtobuf(View):
- """Human friendly view of protocol buffers
- The view uses the protoc compiler to decode the binary
- """
-
- name = "Protocol Buffer"
- prompt = ("protobuf", "p")
- content_types = [
- "application/x-protobuf",
- "application/x-protobuffer",
- ]
-
- @staticmethod
- def is_available():
- try:
- p = subprocess.Popen(
- ["protoc", "--version"],
- stdout=subprocess.PIPE
- )
- out, _ = p.communicate()
- return out.startswith("libprotoc")
- except:
- return False
-
- def decode_protobuf(self, content):
- # if Popen raises OSError, it will be caught in
- # get_content_view and fall back to Raw
- p = subprocess.Popen(['protoc', '--decode_raw'],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- out, err = p.communicate(input=content)
- if out:
- return out
- else:
- return err
-
- def __call__(self, data, **metadata):
- decoded = self.decode_protobuf(data)
- return "Protobuf", format_text(decoded)
-
-
-class ViewQuery(View):
- name = "Query"
- prompt = ("query", "q")
- content_types = []
-
- def __call__(self, data, **metadata):
- query = metadata.get("query")
- if query:
- return "Query", format_dict(query)
- else:
- return "Query", format_text("")
-
-
-class ViewWBXML(View):
- name = "WBXML"
- prompt = ("wbxml", "w")
- content_types = [
- "application/vnd.wap.wbxml",
- "application/vnd.ms-sync.wbxml"
- ]
-
- def __call__(self, data, **metadata):
- try:
- parser = ASCommandResponse.ASCommandResponse(data)
- parsedContent = parser.xmlString
- if parsedContent:
- return "WBXML", format_text(parsedContent)
- except:
- return None
-
-
-views = []
-content_types_map = {}
-view_prompts = []
-
-
-def get(name):
- for i in views:
- if i.name.lower() == name.lower():
- return i
-
-
-def get_by_shortcut(c):
- for i in views:
- if i.prompt[1] == c:
- return i
-
-
-def add(view):
- # TODO: auto-select a different name (append an integer?)
- for i in views:
- if i.name == view.name:
- raise exceptions.ContentViewException("Duplicate view: " + view.name)
-
- # TODO: the UI should auto-prompt for a replacement shortcut
- for prompt in view_prompts:
- if prompt[1] == view.prompt[1]:
- raise exceptions.ContentViewException("Duplicate view shortcut: " + view.prompt[1])
-
- views.append(view)
-
- for ct in view.content_types:
- l = content_types_map.setdefault(ct, [])
- l.append(view)
-
- view_prompts.append(view.prompt)
-
-
-def remove(view):
- for ct in view.content_types:
- l = content_types_map.setdefault(ct, [])
- l.remove(view)
-
- if not len(l):
- del content_types_map[ct]
-
- view_prompts.remove(view.prompt)
- views.remove(view)
-
-
-add(ViewAuto())
-add(ViewRaw())
-add(ViewHex())
-add(ViewJSON())
-add(ViewXML())
-add(ViewWBXML())
-add(ViewHTML())
-add(ViewHTMLOutline())
-add(ViewJavaScript())
-add(ViewCSS())
-add(ViewURLEncoded())
-add(ViewMultipart())
-add(ViewImage())
-add(ViewQuery())
-
-if pyamf:
- add(ViewAMF())
-
-if ViewProtobuf.is_available():
- add(ViewProtobuf())
-
-
-def safe_to_print(lines, encoding="utf8"):
- """
- Wraps a content generator so that each text portion is a *safe to print* unicode string.
- """
- for line in lines:
- clean_line = []
- for (style, text) in line:
- if isinstance(text, bytes):
- text = text.decode(encoding, "replace")
- text = strutils.escape_control_characters(text)
- clean_line.append((style, text))
- yield clean_line
-
-
-def get_message_content_view(viewname, message):
- """
- Like get_content_view, but also handles message encoding.
- """
- viewmode = get(viewname)
- if not viewmode:
- viewmode = get("auto")
- try:
- content = message.content
- except ValueError:
- content = message.raw_content
- enc = "[cannot decode]"
- else:
- if isinstance(message, http.Message) and content != message.raw_content:
- enc = "[decoded {}]".format(
- message.headers.get("content-encoding")
- )
- else:
- enc = None
-
- if content is None:
- return "", iter([[("error", "content missing")]]), None
-
- metadata = {}
- if isinstance(message, http.Request):
- metadata["query"] = message.query
- if isinstance(message, http.Message):
- metadata["headers"] = message.headers
-
- description, lines, error = get_content_view(
- viewmode, content, **metadata
- )
-
- if enc:
- description = "{} {}".format(enc, description)
-
- return description, lines, error
-
-
-def get_content_view(viewmode, data, **metadata):
- """
- Args:
- viewmode: the view to use.
- data, **metadata: arguments passed to View instance.
-
- Returns:
- A (description, content generator, error) tuple.
- If the content view raised an exception generating the view,
- the exception is returned in error and the flow is formatted in raw mode.
- In contrast to calling the views directly, text is always safe-to-print unicode.
- """
- try:
- ret = viewmode(data, **metadata)
- if ret is None:
- ret = "Couldn't parse: falling back to Raw", get("Raw")(data, **metadata)[1]
- desc, content = ret
- error = None
- # Third-party viewers can fail in unexpected ways...
- except Exception:
- desc = "Couldn't parse: falling back to Raw"
- _, content = get("Raw")(data, **metadata)
- error = "{} Content viewer failed: \n{}".format(
- getattr(viewmode, "name"),
- traceback.format_exc()
- )
-
- return desc, safe_to_print(content), error
diff --git a/mitmproxy/contentviews/__init__.py b/mitmproxy/contentviews/__init__.py
new file mode 100644
index 00000000..357172e3
--- /dev/null
+++ b/mitmproxy/contentviews/__init__.py
@@ -0,0 +1,183 @@
+"""
+Mitmproxy Content Views
+=======================
+
+mitmproxy includes a set of content views which can be used to
+format/decode/highlight data. While they are currently used for HTTP message
+bodies only, the may be used in other contexts in the future, e.g. to decode
+protobuf messages sent as WebSocket frames.
+
+Thus, the View API is very minimalistic. The only arguments are `data` and
+`**metadata`, where `data` is the actual content (as bytes). The contents on
+metadata depend on the protocol in use. For HTTP, the message headers are
+passed as the ``headers`` keyword argument. For HTTP requests, the query
+parameters are passed as the ``query`` keyword argument.
+"""
+import traceback
+from typing import Dict, Optional # noqa
+from typing import List # noqa
+from typing import Tuple # noqa
+
+from mitmproxy import exceptions
+from mitmproxy.net import http
+from mitmproxy.utils import strutils
+from . import (
+ auto, raw, hex, json, xml_html, html_outline, wbxml, javascript, css,
+ urlencoded, multipart, image, query, protobuf
+)
+from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict
+
+views = [] # type: List[View]
+content_types_map = {} # type: Dict[str, List[View]]
+view_prompts = [] # type: List[Tuple[str, str]]
+
+
+def get(name: str) -> Optional[View]:
+ for i in views:
+ if i.name.lower() == name.lower():
+ return i
+
+
+def get_by_shortcut(c: str) -> Optional[View]:
+ for i in views:
+ if i.prompt[1] == c:
+ return i
+
+
+def add(view: View) -> None:
+ # TODO: auto-select a different name (append an integer?)
+ for i in views:
+ if i.name == view.name:
+ raise exceptions.ContentViewException("Duplicate view: " + view.name)
+
+ # TODO: the UI should auto-prompt for a replacement shortcut
+ for prompt in view_prompts:
+ if prompt[1] == view.prompt[1]:
+ raise exceptions.ContentViewException("Duplicate view shortcut: " + view.prompt[1])
+
+ views.append(view)
+
+ for ct in view.content_types:
+ l = content_types_map.setdefault(ct, [])
+ l.append(view)
+
+ view_prompts.append(view.prompt)
+
+
+def remove(view: View) -> None:
+ for ct in view.content_types:
+ l = content_types_map.setdefault(ct, [])
+ l.remove(view)
+
+ if not len(l):
+ del content_types_map[ct]
+
+ view_prompts.remove(view.prompt)
+ views.remove(view)
+
+
+def safe_to_print(lines, encoding="utf8"):
+ """
+ Wraps a content generator so that each text portion is a *safe to print* unicode string.
+ """
+ for line in lines:
+ clean_line = []
+ for (style, text) in line:
+ if isinstance(text, bytes):
+ text = text.decode(encoding, "replace")
+ text = strutils.escape_control_characters(text)
+ clean_line.append((style, text))
+ yield clean_line
+
+
+def get_message_content_view(viewname, message):
+ """
+ Like get_content_view, but also handles message encoding.
+ """
+ viewmode = get(viewname)
+ if not viewmode:
+ viewmode = get("auto")
+ try:
+ content = message.content
+ except ValueError:
+ content = message.raw_content
+ enc = "[cannot decode]"
+ else:
+ if isinstance(message, http.Message) and content != message.raw_content:
+ enc = "[decoded {}]".format(
+ message.headers.get("content-encoding")
+ )
+ else:
+ enc = None
+
+ if content is None:
+ return "", iter([[("error", "content missing")]]), None
+
+ metadata = {}
+ if isinstance(message, http.Request):
+ metadata["query"] = message.query
+ if isinstance(message, http.Message):
+ metadata["headers"] = message.headers
+
+ description, lines, error = get_content_view(
+ viewmode, content, **metadata
+ )
+
+ if enc:
+ description = "{} {}".format(enc, description)
+
+ return description, lines, error
+
+
+def get_content_view(viewmode: View, data: bytes, **metadata):
+ """
+ Args:
+ viewmode: the view to use.
+ data, **metadata: arguments passed to View instance.
+
+ Returns:
+ A (description, content generator, error) tuple.
+ If the content view raised an exception generating the view,
+ the exception is returned in error and the flow is formatted in raw mode.
+ In contrast to calling the views directly, text is always safe-to-print unicode.
+ """
+ try:
+ ret = viewmode(data, **metadata)
+ if ret is None:
+ ret = "Couldn't parse: falling back to Raw", get("Raw")(data, **metadata)[1]
+ desc, content = ret
+ error = None
+ # Third-party viewers can fail in unexpected ways...
+ except Exception:
+ desc = "Couldn't parse: falling back to Raw"
+ _, content = get("Raw")(data, **metadata)
+ error = "{} Content viewer failed: \n{}".format(
+ getattr(viewmode, "name"),
+ traceback.format_exc()
+ )
+
+ return desc, safe_to_print(content), error
+
+
+add(auto.ViewAuto())
+add(raw.ViewRaw())
+add(hex.ViewHex())
+add(json.ViewJSON())
+add(xml_html.ViewXmlHtml())
+add(wbxml.ViewWBXML())
+add(html_outline.ViewHTMLOutline())
+add(javascript.ViewJavaScript())
+add(css.ViewCSS())
+add(urlencoded.ViewURLEncoded())
+add(multipart.ViewMultipart())
+add(image.ViewImage())
+add(query.ViewQuery())
+
+if protobuf.ViewProtobuf.is_available():
+ add(protobuf.ViewProtobuf())
+
+__all__ = [
+ "View", "VIEW_CUTOFF", "KEY_MAX", "format_text", "format_dict",
+ "get", "get_by_shortcut", "add", "remove",
+ "get_content_view", "get_message_content_view",
+]
diff --git a/mitmproxy/contentviews/auto.py b/mitmproxy/contentviews/auto.py
new file mode 100644
index 00000000..7b3cbd78
--- /dev/null
+++ b/mitmproxy/contentviews/auto.py
@@ -0,0 +1,27 @@
+from mitmproxy import contentviews
+from mitmproxy.net import http
+from mitmproxy.utils import strutils
+from . import base
+
+
+class ViewAuto(base.View):
+ name = "Auto"
+ prompt = ("auto", "a")
+
+ def __call__(self, data, **metadata):
+ headers = metadata.get("headers", {})
+ ctype = headers.get("content-type")
+ if data and ctype:
+ ct = http.parse_content_type(ctype) if ctype else None
+ ct = "%s/%s" % (ct[0], ct[1])
+ if ct in contentviews.content_types_map:
+ return contentviews.content_types_map[ct][0](data, **metadata)
+ elif strutils.is_xml(data):
+ return contentviews.get("XML/HTML")(data, **metadata)
+ if metadata.get("query"):
+ return contentviews.get("Query")(data, **metadata)
+ if data and strutils.is_mostly_bin(data):
+ return contentviews.get("Hex")(data)
+ if not data:
+ return "No content", []
+ return contentviews.get("Raw")(data)
diff --git a/mitmproxy/contentviews/base.py b/mitmproxy/contentviews/base.py
new file mode 100644
index 00000000..0de4f786
--- /dev/null
+++ b/mitmproxy/contentviews/base.py
@@ -0,0 +1,65 @@
+# Default view cutoff *in lines*
+
+from typing import Iterable, AnyStr, List
+from typing import Mapping
+from typing import Tuple
+
+VIEW_CUTOFF = 512
+
+KEY_MAX = 30
+
+
+class View:
+ name = None # type: str
+ prompt = None # type: Tuple[str,str]
+ content_types = [] # type: List[str]
+
+ def __call__(self, data: bytes, **metadata):
+ """
+ Transform raw data into human-readable output.
+
+ Args:
+ data: the data to decode/format.
+ metadata: optional keyword-only arguments for metadata. Implementations must not
+ rely on a given argument being present.
+
+ Returns:
+ A (description, content generator) tuple.
+
+ The content generator yields lists of (style, text) tuples, where each list represents
+ a single line. ``text`` is a unfiltered byte string which may need to be escaped,
+ depending on the used output.
+
+ Caveats:
+ The content generator must not yield tuples of tuples,
+ because urwid cannot process that. You have to yield a *list* of tuples per line.
+ """
+ raise NotImplementedError() # pragma: no cover
+
+
+def format_dict(
+ d: Mapping[AnyStr, AnyStr]
+) -> Iterable[List[Tuple[str, AnyStr]]]:
+ """
+ Helper function that transforms the given dictionary into a list of
+ ("key", key )
+ ("value", value)
+ tuples, where key is padded to a uniform width.
+ """
+ max_key_len = max(len(k) for k in d.keys())
+ max_key_len = min(max_key_len, KEY_MAX)
+ for key, value in d.items():
+ key += b":" if isinstance(key, bytes) else u":"
+ key = key.ljust(max_key_len + 2)
+ yield [
+ ("header", key),
+ ("text", value)
+ ]
+
+
+def format_text(text: AnyStr) -> Iterable[List[Tuple[str, AnyStr]]]:
+ """
+ Helper function that transforms bytes into the view output format.
+ """
+ for line in text.splitlines():
+ yield [("text", line)]
diff --git a/mitmproxy/contentviews/css.py b/mitmproxy/contentviews/css.py
new file mode 100644
index 00000000..353a3257
--- /dev/null
+++ b/mitmproxy/contentviews/css.py
@@ -0,0 +1,25 @@
+import logging
+
+import cssutils
+
+from . import base
+
+
+class ViewCSS(base.View):
+ name = "CSS"
+ prompt = ("css", "c")
+ content_types = [
+ "text/css"
+ ]
+
+ def __call__(self, data, **metadata):
+ cssutils.log.setLevel(logging.CRITICAL)
+ cssutils.ser.prefs.keepComments = True
+ cssutils.ser.prefs.omitLastSemicolon = False
+ cssutils.ser.prefs.indentClosingBrace = False
+ cssutils.ser.prefs.validOnly = False
+
+ sheet = cssutils.parseString(data)
+ beautified = sheet.cssText
+
+ return "CSS", base.format_text(beautified)
diff --git a/mitmproxy/contentviews/hex.py b/mitmproxy/contentviews/hex.py
new file mode 100644
index 00000000..6251a8f3
--- /dev/null
+++ b/mitmproxy/contentviews/hex.py
@@ -0,0 +1,19 @@
+from mitmproxy.utils import strutils
+from . import base
+
+
+class ViewHex(base.View):
+ name = "Hex"
+ prompt = ("hex", "e")
+
+ @staticmethod
+ def _format(data):
+ for offset, hexa, s in strutils.hexdump(data):
+ yield [
+ ("offset", offset + " "),
+ ("text", hexa + " "),
+ ("text", s)
+ ]
+
+ def __call__(self, data, **metadata):
+ return "Hex", self._format(data)
diff --git a/mitmproxy/contentviews/html_outline.py b/mitmproxy/contentviews/html_outline.py
new file mode 100644
index 00000000..d6c51b29
--- /dev/null
+++ b/mitmproxy/contentviews/html_outline.py
@@ -0,0 +1,17 @@
+import html2text
+
+from mitmproxy.contentviews import base
+
+
+class ViewHTMLOutline(base.View):
+ name = "HTML Outline"
+ prompt = ("html outline", "o")
+ content_types = ["text/html"]
+
+ def __call__(self, data, **metadata):
+ data = data.decode("utf-8", "replace")
+ h = html2text.HTML2Text(baseurl="")
+ h.ignore_images = True
+ h.body_width = 0
+ outline = h.handle(data)
+ return "HTML Outline", base.format_text(outline)
diff --git a/mitmproxy/contentviews/image.py b/mitmproxy/contentviews/image.py
new file mode 100644
index 00000000..57b1fffb
--- /dev/null
+++ b/mitmproxy/contentviews/image.py
@@ -0,0 +1,45 @@
+import io
+
+from PIL import ExifTags
+from PIL import Image
+
+from mitmproxy.types import multidict
+from . import base
+
+
+class ViewImage(base.View):
+ name = "Image"
+ prompt = ("image", "i")
+ content_types = [
+ "image/png",
+ "image/jpeg",
+ "image/gif",
+ "image/vnd.microsoft.icon",
+ "image/x-icon",
+ ]
+
+ def __call__(self, data, **metadata):
+ try:
+ img = Image.open(io.BytesIO(data))
+ except IOError:
+ return None
+ parts = [
+ ("Format", str(img.format_description)),
+ ("Size", "%s x %s px" % img.size),
+ ("Mode", str(img.mode)),
+ ]
+ for i in sorted(img.info.keys()):
+ if i != "exif":
+ parts.append(
+ (str(i), str(img.info[i]))
+ )
+ if hasattr(img, "_getexif"):
+ ex = img._getexif()
+ if ex:
+ for i in sorted(ex.keys()):
+ tag = ExifTags.TAGS.get(i, i)
+ parts.append(
+ (str(tag), str(ex[i]))
+ )
+ fmt = base.format_dict(multidict.MultiDict(parts))
+ return "%s image" % img.format, fmt
diff --git a/mitmproxy/contentviews/javascript.py b/mitmproxy/contentviews/javascript.py
new file mode 100644
index 00000000..c2fab875
--- /dev/null
+++ b/mitmproxy/contentviews/javascript.py
@@ -0,0 +1,20 @@
+import jsbeautifier
+
+from . import base
+
+
+class ViewJavaScript(base.View):
+ name = "JavaScript"
+ prompt = ("javascript", "j")
+ content_types = [
+ "application/x-javascript",
+ "application/javascript",
+ "text/javascript"
+ ]
+
+ def __call__(self, data, **metadata):
+ opts = jsbeautifier.default_options()
+ opts.indent_size = 2
+ data = data.decode("utf-8", "replace")
+ res = jsbeautifier.beautify(data, opts)
+ return "JavaScript", base.format_text(res)
diff --git a/mitmproxy/contentviews/json.py b/mitmproxy/contentviews/json.py
new file mode 100644
index 00000000..de7f1093
--- /dev/null
+++ b/mitmproxy/contentviews/json.py
@@ -0,0 +1,27 @@
+import json
+from typing import Optional
+
+from mitmproxy.contentviews import base
+
+
+def pretty_json(s: bytes) -> Optional[bytes]:
+ try:
+ p = json.loads(s.decode('utf-8'))
+ except ValueError:
+ return None
+ pretty = json.dumps(p, sort_keys=True, indent=4, ensure_ascii=False)
+ return pretty.encode("utf8", "strict")
+
+
+class ViewJSON(base.View):
+ name = "JSON"
+ prompt = ("json", "s")
+ content_types = [
+ "application/json",
+ "application/vnd.api+json"
+ ]
+
+ def __call__(self, data, **metadata):
+ pj = pretty_json(data)
+ if pj:
+ return "JSON", base.format_text(pj)
diff --git a/mitmproxy/contentviews/multipart.py b/mitmproxy/contentviews/multipart.py
new file mode 100644
index 00000000..0b0e51e2
--- /dev/null
+++ b/mitmproxy/contentviews/multipart.py
@@ -0,0 +1,21 @@
+from mitmproxy.net import http
+from mitmproxy.types import multidict
+from . import base
+
+
+class ViewMultipart(base.View):
+ name = "Multipart Form"
+ prompt = ("multipart", "m")
+ content_types = ["multipart/form-data"]
+
+ @staticmethod
+ def _format(v):
+ yield [("highlight", "Form data:\n")]
+ for message in base.format_dict(multidict.MultiDict(v)):
+ yield message
+
+ def __call__(self, data, **metadata):
+ headers = metadata.get("headers", {})
+ v = http.multipart.decode(headers, data)
+ if v:
+ return "Multipart form", self._format(v)
diff --git a/mitmproxy/contentviews/protobuf.py b/mitmproxy/contentviews/protobuf.py
new file mode 100644
index 00000000..620d9444
--- /dev/null
+++ b/mitmproxy/contentviews/protobuf.py
@@ -0,0 +1,45 @@
+import subprocess
+
+from . import base
+
+
+class ViewProtobuf(base.View):
+ """Human friendly view of protocol buffers
+ The view uses the protoc compiler to decode the binary
+ """
+
+ name = "Protocol Buffer"
+ prompt = ("protobuf", "p")
+ content_types = [
+ "application/x-protobuf",
+ "application/x-protobuffer",
+ ]
+
+ @staticmethod
+ def is_available():
+ try:
+ p = subprocess.Popen(
+ ["protoc", "--version"],
+ stdout=subprocess.PIPE
+ )
+ out, _ = p.communicate()
+ return out.startswith("libprotoc")
+ except:
+ return False
+
+ def decode_protobuf(self, content):
+ # if Popen raises OSError, it will be caught in
+ # get_content_view and fall back to Raw
+ p = subprocess.Popen(['protoc', '--decode_raw'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = p.communicate(input=content)
+ if out:
+ return out
+ else:
+ return err
+
+ def __call__(self, data, **metadata):
+ decoded = self.decode_protobuf(data)
+ return "Protobuf", base.format_text(decoded)
diff --git a/mitmproxy/contentviews/query.py b/mitmproxy/contentviews/query.py
new file mode 100644
index 00000000..93f47829
--- /dev/null
+++ b/mitmproxy/contentviews/query.py
@@ -0,0 +1,15 @@
+from typing import List # noqa
+
+from . import base
+
+
+class ViewQuery(base.View):
+ name = "Query"
+ prompt = ("query", "q")
+
+ def __call__(self, data, **metadata):
+ query = metadata.get("query")
+ if query:
+ return "Query", base.format_dict(query)
+ else:
+ return "Query", base.format_text("")
diff --git a/mitmproxy/contentviews/raw.py b/mitmproxy/contentviews/raw.py
new file mode 100644
index 00000000..dcc53aa7
--- /dev/null
+++ b/mitmproxy/contentviews/raw.py
@@ -0,0 +1,12 @@
+from typing import List # noqa
+
+from mitmproxy.utils import strutils
+from . import base
+
+
+class ViewRaw(base.View):
+ name = "Raw"
+ prompt = ("raw", "r")
+
+ def __call__(self, data, **metadata):
+ return "Raw", base.format_text(strutils.bytes_to_escaped_str(data, True))
diff --git a/mitmproxy/contentviews/urlencoded.py b/mitmproxy/contentviews/urlencoded.py
new file mode 100644
index 00000000..79fe9c1c
--- /dev/null
+++ b/mitmproxy/contentviews/urlencoded.py
@@ -0,0 +1,17 @@
+from mitmproxy.net.http import url
+from mitmproxy.types import multidict
+from . import base
+
+
+class ViewURLEncoded(base.View):
+ name = "URL-encoded"
+ prompt = ("urlencoded", "u")
+ content_types = ["application/x-www-form-urlencoded"]
+
+ def __call__(self, data, **metadata):
+ try:
+ data = data.decode("ascii", "strict")
+ except ValueError:
+ return None
+ d = url.decode(data)
+ return "URLEncoded form", base.format_dict(multidict.MultiDict(d))
diff --git a/mitmproxy/contentviews/wbxml.py b/mitmproxy/contentviews/wbxml.py
new file mode 100644
index 00000000..d626e188
--- /dev/null
+++ b/mitmproxy/contentviews/wbxml.py
@@ -0,0 +1,20 @@
+from mitmproxy.contrib.wbxml import ASCommandResponse
+from . import base
+
+
+class ViewWBXML(base.View):
+ name = "WBXML"
+ prompt = ("wbxml", "w")
+ content_types = [
+ "application/vnd.wap.wbxml",
+ "application/vnd.ms-sync.wbxml"
+ ]
+
+ def __call__(self, data, **metadata):
+ try:
+ parser = ASCommandResponse.ASCommandResponse(data)
+ parsedContent = parser.xmlString
+ if parsedContent:
+ return "WBXML", base.format_text(parsedContent)
+ except:
+ return None
diff --git a/mitmproxy/contentviews/xml_html.py b/mitmproxy/contentviews/xml_html.py
new file mode 100644
index 00000000..0f2ce57d
--- /dev/null
+++ b/mitmproxy/contentviews/xml_html.py
@@ -0,0 +1,234 @@
+import io
+import re
+import textwrap
+from typing import Iterable
+
+from mitmproxy.contentviews import base
+from mitmproxy.utils import sliding_window
+
+"""
+A custom XML/HTML prettifier. Compared to other prettifiers, its main features are:
+
+- Implemented in pure Python.
+- Modifies whitespace only.
+- Works with any input.
+- Lazy evaluation.
+
+The implementation is split into two main parts: tokenization and formatting of tokens.
+"""
+
+# http://www.xml.com/pub/a/2001/07/25/namingparts.html - this is close enough for what we do.
+REGEX_TAG = re.compile("[a-zA-Z0-9._:\-]+(?!=)")
+# https://www.w3.org/TR/html5/syntax.html#void-elements
+HTML_VOID_ELEMENTS = {
+ "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
+ "source", "track", "wbr"
+}
+NO_INDENT_TAGS = {"xml", "doctype", "html"}
+INDENT = 2
+
+
+class Token:
+ def __init__(self, data):
+ self.data = data
+
+ def __repr__(self):
+ return "{}({})".format(
+ type(self).__name__,
+ self.data
+ )
+
+
+class Text(Token):
+ @property
+ def text(self):
+ return self.data.strip()
+
+
+class Tag(Token):
+ @property
+ def tag(self):
+ t = REGEX_TAG.search(self.data)
+ if t is not None:
+ return t.group(0).lower()
+ return "<empty>"
+
+ @property
+ def is_comment(self) -> bool:
+ return self.data.startswith("<!--")
+
+ @property
+ def is_cdata(self) -> bool:
+ return self.data.startswith("<![CDATA[")
+
+ @property
+ def is_closing(self):
+ return self.data.startswith("</")
+
+ @property
+ def is_self_closing(self):
+ return self.is_comment or self.is_cdata or self.data.endswith(
+ "/>") or self.tag in HTML_VOID_ELEMENTS
+
+ @property
+ def is_opening(self):
+ return not self.is_closing and not self.is_self_closing
+
+ @property
+ def done(self):
+ if self.is_comment:
+ return self.data.endswith("-->")
+ elif self.is_cdata:
+ return self.data.endswith("]]>")
+ else:
+ # This fails for attributes that contain an unescaped ">"
+ return self.data.endswith(">")
+
+
+def tokenize(data: str) -> Iterable[Token]:
+ token = Text("") # type: Token
+
+ i = 0
+
+ def readuntil(char, start, include=1):
+ nonlocal i
+ end = data.find(char, start)
+ if end == -1:
+ end = len(data)
+ ret = data[i:end + include]
+ i = end + include
+ return ret
+
+ while i < len(data):
+ if isinstance(token, Text):
+ token.data = readuntil("<", i, 0)
+ if token.text:
+ yield token
+ token = Tag("")
+ elif isinstance(token, Tag):
+ token.data += readuntil(">", i, 1)
+ if token.done:
+ yield token
+ token = Text("")
+ if token.data.strip():
+ yield token
+
+
+def indent_text(data: str, prefix: str) -> str:
+ # Add spacing to first line so that we dedent in cases like this:
+ # <li>This is
+ # example text
+ # over multiple lines
+ # </li>
+ dedented = textwrap.dedent(" " * 32 + data).strip()
+ return textwrap.indent(dedented, prefix[:32])
+
+
+def is_inline_text(a: Token, b: Token, c: Token) -> bool:
+ if isinstance(a, Tag) and isinstance(b, Text) and isinstance(c, Tag):
+ if a.is_opening and "\n" not in b.data and c.is_closing and a.tag == c.tag:
+ return True
+
+
+def is_inline(prev2: Token, prev1: Token, t: Token, next1: Token, next2: Token) -> bool:
+ if isinstance(t, Text):
+ return is_inline_text(prev1, t, next1)
+ elif isinstance(t, Tag):
+ if is_inline_text(prev2, prev1, t) or is_inline_text(t, next1, next2):
+ return True
+ if isinstance(next1, Tag) and t.is_opening and next1.is_closing and t.tag == next1.tag:
+ return True # <div></div> (start tag)
+ if isinstance(prev1, Tag) and prev1.is_opening and t.is_closing and prev1.tag == t.tag:
+ return True # <div></div> (end tag)
+
+
+class ElementStack:
+ """
+ Keep track of how deeply nested our document is.
+ """
+
+ def __init__(self):
+ self.open_tags = []
+ self.indent = ""
+
+ def push_tag(self, tag: str):
+ if len(self.open_tags) > 16:
+ return
+ self.open_tags.append(tag)
+ if tag not in NO_INDENT_TAGS:
+ self.indent += " " * INDENT
+
+ def pop_tag(self, tag: str):
+ if tag in self.open_tags:
+ remove_indent = 0
+ while True:
+ t = self.open_tags.pop()
+ if t not in NO_INDENT_TAGS:
+ remove_indent += INDENT
+ if t == tag:
+ break
+ self.indent = self.indent[:-remove_indent]
+ else:
+ pass # this closing tag has no start tag. let's keep indentation as-is.
+
+
+def format_xml(tokens: Iterable[Token]) -> str:
+ out = io.StringIO()
+
+ context = ElementStack()
+
+ for prev2, prev1, token, next1, next2 in sliding_window.window(tokens, 2, 2):
+ if isinstance(token, Tag):
+ if token.is_opening:
+ out.write(indent_text(token.data, context.indent))
+
+ if not is_inline(prev2, prev1, token, next1, next2):
+ out.write("\n")
+
+ context.push_tag(token.tag)
+ elif token.is_closing:
+ context.pop_tag(token.tag)
+
+ if is_inline(prev2, prev1, token, next1, next2):
+ out.write(token.data)
+ else:
+ out.write(indent_text(token.data, context.indent))
+ out.write("\n")
+
+ else: # self-closing
+ out.write(indent_text(token.data, context.indent))
+ out.write("\n")
+ elif isinstance(token, Text):
+ if is_inline(prev2, prev1, token, next1, next2):
+ out.write(token.text)
+ else:
+ out.write(indent_text(token.data, context.indent))
+ out.write("\n")
+ else: # pragma: no cover
+ raise RuntimeError()
+
+ return out.getvalue()
+
+
+class ViewXmlHtml(base.View):
+ name = "XML/HTML"
+ prompt = ("xml/html", "x")
+ content_types = ["text/xml", "text/html"]
+
+ def __call__(self, data, **metadata):
+ # TODO:
+ # We should really have the message text as str here,
+ # not the message content as bytes.
+ # https://github.com/mitmproxy/mitmproxy/issues/1662#issuecomment-266192578
+ data = data.decode("utf8", "xmlcharrefreplace")
+ tokens = tokenize(data)
+ # TODO:
+ # Performance: Don't render the whole document right away.
+ # Let's wait with this until we have a sequence-like interface,
+ # this thing is reasonably fast right now anyway.
+ pretty = base.format_text(format_xml(tokens))
+ if "html" in data.lower():
+ t = "HTML"
+ else:
+ t = "XML"
+ return t, pretty
diff --git a/mitmproxy/utils/sliding_window.py b/mitmproxy/utils/sliding_window.py
new file mode 100644
index 00000000..4714b8e3
--- /dev/null
+++ b/mitmproxy/utils/sliding_window.py
@@ -0,0 +1,30 @@
+import itertools
+from typing import TypeVar, Iterator, Tuple, Optional
+
+T = TypeVar('T')
+
+
+def window(iterator: Iterator[T], behind: int = 0, ahead: int = 0) -> Iterator[Tuple[Optional[T]]]:
+ """
+ Sliding window for an iterator.
+
+ Example:
+ >>> for prev, i, nxt in window(range(10), 1, 1):
+ >>> print(prev, i, nxt)
+
+ None 0 1
+ 0 1 2
+ 1 2 3
+ 2 3 None
+ """
+ # TODO: move into utils
+ iters = list(itertools.tee(iterator, behind + 1 + ahead))
+ for i in range(behind):
+ iters[i] = itertools.chain((behind - i) * [None], iters[i])
+ for i in range(ahead):
+ iters[-1 - i] = itertools.islice(
+ itertools.chain(iters[-1 - i], (ahead - i) * [None]),
+ (ahead - i),
+ None
+ )
+ return zip(*iters)
diff --git a/requirements.txt b/requirements.txt
index 67a02a97..ab8e8a0b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1 @@
-https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl; sys_platform == 'win32' and python_version == '3.5'
-e .[dev,examples,contentviews]
diff --git a/setup.py b/setup.py
index 4f4d5c7d..476d5559 100644
--- a/setup.py
+++ b/setup.py
@@ -69,7 +69,6 @@ setup(
"html2text>=2016.1.8, <=2016.9.19",
"hyperframe>=4.0.1, <5",
"jsbeautifier>=1.6.3, <1.7",
- "lxml>=3.5.0, <=3.6.0", # no wheels for 3.6.1 yet.
"Pillow>=3.2, <3.5",
"passlib>=1.6.5, <1.8",
"pyasn1>=0.1.9, <0.2",
diff --git a/test/mitmproxy/addons/test_dumper.py b/test/mitmproxy/addons/test_dumper.py
index 760efa08..8b15c85b 100644
--- a/test/mitmproxy/addons/test_dumper.py
+++ b/test/mitmproxy/addons/test_dumper.py
@@ -131,7 +131,7 @@ def test_echo_request_line():
class TestContentView:
- @mock.patch("mitmproxy.contentviews.ViewAuto.__call__")
+ @mock.patch("mitmproxy.contentviews.auto.ViewAuto.__call__")
def test_contentview(self, view_auto):
view_auto.side_effect = exceptions.ContentViewException("")
sio = io.StringIO()
diff --git a/test/mitmproxy/contentviews/__init__.py b/test/mitmproxy/contentviews/__init__.py
new file mode 100644
index 00000000..9adc57ec
--- /dev/null
+++ b/test/mitmproxy/contentviews/__init__.py
@@ -0,0 +1,9 @@
+def full_eval(instance):
+ def call(data, **metadata):
+ x = instance(data, **metadata)
+ if x is None:
+ return None
+ name, generator = x
+ return name, list(generator)
+
+ return call
diff --git a/test/mitmproxy/contentviews/test_api.py b/test/mitmproxy/contentviews/test_api.py
new file mode 100644
index 00000000..8e6c3427
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_api.py
@@ -0,0 +1,85 @@
+import mock
+
+from mitmproxy import contentviews
+from mitmproxy.exceptions import ContentViewException
+from mitmproxy.net.http import Headers
+from mitmproxy.test import tutils
+
+
+class TestContentView(contentviews.View):
+ name = "test"
+ prompt = ("t", "test")
+ content_types = ["test/123"]
+
+
+def test_add_remove():
+ tcv = TestContentView()
+ contentviews.add(tcv)
+
+ # repeated addition causes exception
+ with tutils.raises(ContentViewException):
+ contentviews.add(tcv)
+
+ # Same shortcut doesn't work either.
+ with tutils.raises(ContentViewException):
+ contentviews.add(TestContentView())
+
+ contentviews.remove(tcv)
+
+
+def test_get_content_view():
+ desc, lines, err = contentviews.get_content_view(
+ contentviews.get("Raw"),
+ b"[1, 2, 3]",
+ )
+ assert "Raw" in desc
+ assert list(lines)
+ assert not err
+
+ desc, lines, err = contentviews.get_content_view(
+ contentviews.get("Auto"),
+ b"[1, 2, 3]",
+ headers=Headers(content_type="application/json")
+ )
+ assert desc == "JSON"
+
+ desc, lines, err = contentviews.get_content_view(
+ contentviews.get("JSON"),
+ b"[1, 2",
+ )
+ assert "Couldn't parse" in desc
+
+ with mock.patch("mitmproxy.contentviews.auto.ViewAuto.__call__") as view_auto:
+ view_auto.side_effect = ValueError
+
+ desc, lines, err = contentviews.get_content_view(
+ contentviews.get("Auto"),
+ b"[1, 2",
+ )
+ assert err
+ assert "Couldn't parse" in desc
+
+
+def test_get_message_content_view():
+ r = tutils.treq()
+ desc, lines, err = contentviews.get_message_content_view("raw", r)
+ assert desc == "Raw"
+
+ desc, lines, err = contentviews.get_message_content_view("unknown", r)
+ assert desc == "Raw"
+
+ r.encode("gzip")
+ desc, lines, err = contentviews.get_message_content_view("raw", r)
+ assert desc == "[decoded gzip] Raw"
+
+ r.headers["content-encoding"] = "deflate"
+ desc, lines, err = contentviews.get_message_content_view("raw", r)
+ assert desc == "[cannot decode] Raw"
+
+ r.content = None
+ desc, lines, err = contentviews.get_message_content_view("raw", r)
+ assert list(lines) == [[("error", "content missing")]]
+
+
+def test_get_by_shortcut():
+ assert contentviews.get_by_shortcut("s")
diff --git a/test/mitmproxy/contentviews/test_auto.py b/test/mitmproxy/contentviews/test_auto.py
new file mode 100644
index 00000000..a077affa
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_auto.py
@@ -0,0 +1,47 @@
+from mitmproxy.contentviews import auto
+from mitmproxy.net import http
+from mitmproxy.types import multidict
+from . import full_eval
+
+
+def test_view_auto():
+ v = full_eval(auto.ViewAuto())
+ f = v(
+ b"foo",
+ headers=http.Headers()
+ )
+ assert f[0] == "Raw"
+
+ f = v(
+ b"<html></html>",
+ headers=http.Headers(content_type="text/html")
+ )
+ assert f[0] == "HTML"
+
+ f = v(
+ b"foo",
+ headers=http.Headers(content_type="text/flibble")
+ )
+ assert f[0] == "Raw"
+
+ f = v(
+ b"<xml></xml>",
+ headers=http.Headers(content_type="text/flibble")
+ )
+ assert f[0].startswith("XML")
+
+ f = v(b"\xFF" * 30)
+ assert f[0] == "Hex"
+
+ f = v(
+ b"",
+ headers=http.Headers()
+ )
+ assert f[0] == "No content"
+
+ f = v(
+ b"",
+ headers=http.Headers(),
+ query=multidict.MultiDict([("foo", "bar")]),
+ )
+ assert f[0] == "Query"
diff --git a/test/mitmproxy/contentviews/test_css.py b/test/mitmproxy/contentviews/test_css.py
new file mode 100644
index 00000000..ecb9259b
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_css.py
@@ -0,0 +1,29 @@
+from mitmproxy.contentviews import css
+from mitmproxy.test import tutils
+from . import full_eval
+
+try:
+ import cssutils
+except:
+ cssutils = None
+
+
+def test_view_css():
+ v = full_eval(css.ViewCSS())
+
+ with open(tutils.test_data.path('mitmproxy/data/1.css'), 'r') as fp:
+ fixture_1 = fp.read()
+
+ result = v('a')
+
+ if cssutils:
+ assert len(list(result[1])) == 0
+ else:
+ assert len(list(result[1])) == 1
+
+ result = v(fixture_1)
+
+ if cssutils:
+ assert len(list(result[1])) > 1
+ else:
+ assert len(list(result[1])) == 1
diff --git a/test/mitmproxy/contentviews/test_hex.py b/test/mitmproxy/contentviews/test_hex.py
new file mode 100644
index 00000000..4292007e
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_hex.py
@@ -0,0 +1,7 @@
+from mitmproxy.contentviews import hex
+from . import full_eval
+
+
+def test_view_hex():
+ v = full_eval(hex.ViewHex())
+ assert v(b"foo")
diff --git a/test/mitmproxy/contentviews/test_html_outline.py b/test/mitmproxy/contentviews/test_html_outline.py
new file mode 100644
index 00000000..9e664e52
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_html_outline.py
@@ -0,0 +1,9 @@
+from mitmproxy.contentviews import html_outline
+from test.mitmproxy.contentviews import full_eval
+
+
+def test_view_html_outline():
+ v = full_eval(html_outline.ViewHTMLOutline())
+ s = b"<html><br><br></br><p>one</p></html>"
+ assert v(s)
+ assert v(b'\xfe')
diff --git a/test/mitmproxy/contentviews/test_image.py b/test/mitmproxy/contentviews/test_image.py
new file mode 100644
index 00000000..9e7e28f5
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_image.py
@@ -0,0 +1,17 @@
+from mitmproxy.contentviews import image
+from mitmproxy.test import tutils
+from . import full_eval
+
+
+def test_view_image():
+ v = full_eval(image.ViewImage())
+ for img in [
+ "mitmproxy/data/image.png",
+ "mitmproxy/data/image.gif",
+ "mitmproxy/data/image-err1.jpg",
+ "mitmproxy/data/image.ico"
+ ]:
+ with open(tutils.test_data.path(img), "rb") as f:
+ assert v(f.read())
+
+ assert not v(b"flibble")
diff --git a/test/mitmproxy/contentviews/test_javascript.py b/test/mitmproxy/contentviews/test_javascript.py
new file mode 100644
index 00000000..43039c93
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_javascript.py
@@ -0,0 +1,10 @@
+from mitmproxy.contentviews import javascript
+from . import full_eval
+
+
+def test_view_javascript():
+ v = full_eval(javascript.ViewJavaScript())
+ assert v(b"[1, 2, 3]")
+ assert v(b"[1, 2, 3")
+ assert v(b"function(a){[1, 2, 3]}")
+ assert v(b"\xfe") # invalid utf-8
diff --git a/test/mitmproxy/contentviews/test_json.py b/test/mitmproxy/contentviews/test_json.py
new file mode 100644
index 00000000..5e87b570
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_json.py
@@ -0,0 +1,16 @@
+from mitmproxy.contentviews import json
+from . import full_eval
+
+
+def test_pretty_json():
+ assert json.pretty_json(b'{"foo": 1}')
+ assert not json.pretty_json(b"moo")
+ assert json.pretty_json(b'{"foo" : "\xe4\xb8\x96\xe7\x95\x8c"}') # utf8 with chinese characters
+ assert not json.pretty_json(b'{"foo" : "\xFF"}')
+
+
+def test_view_json():
+ v = full_eval(json.ViewJSON())
+ assert v(b"{}")
+ assert not v(b"{")
+ assert v(b"[1, 2, 3, 4, 5]")
diff --git a/test/mitmproxy/contentviews/test_multipart.py b/test/mitmproxy/contentviews/test_multipart.py
new file mode 100644
index 00000000..48a5ccc9
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_multipart.py
@@ -0,0 +1,25 @@
+from mitmproxy.contentviews import multipart
+from mitmproxy.net import http
+from . import full_eval
+
+
+def test_view_multipart():
+ view = full_eval(multipart.ViewMultipart())
+ v = b"""
+--AaB03x
+Content-Disposition: form-data; name="submit-name"
+
+Larry
+--AaB03x
+ """.strip()
+ h = http.Headers(content_type="multipart/form-data; boundary=AaB03x")
+ assert view(v, headers=h)
+
+ h = http.Headers()
+ assert not view(v, headers=h)
+
+ h = http.Headers(content_type="multipart/form-data")
+ assert not view(v, headers=h)
+
+ h = http.Headers(content_type="unparseable")
+ assert not view(v, headers=h)
diff --git a/test/mitmproxy/contentviews/test_protobuf.py b/test/mitmproxy/contentviews/test_protobuf.py
new file mode 100644
index 00000000..1224b8db
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_protobuf.py
@@ -0,0 +1,12 @@
+from mitmproxy.contentviews import protobuf
+from mitmproxy.test import tutils
+from . import full_eval
+
+if protobuf.ViewProtobuf.is_available():
+ def test_view_protobuf_request():
+ v = full_eval(protobuf.ViewProtobuf())
+
+ p = tutils.test_data.path("mitmproxy/data/protobuf01")
+ content_type, output = v(open(p, "rb").read())
+ assert content_type == "Protobuf"
+ assert output.next()[0][1] == '1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"'
diff --git a/test/mitmproxy/contentviews/test_query.py b/test/mitmproxy/contentviews/test_query.py
new file mode 100644
index 00000000..d2bddd05
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_query.py
@@ -0,0 +1,13 @@
+from mitmproxy.contentviews import query
+from mitmproxy.types import multidict
+from . import full_eval
+
+
+def test_view_query():
+ d = ""
+ v = full_eval(query.ViewQuery())
+ f = v(d, query=multidict.MultiDict([("foo", "bar")]))
+ assert f[0] == "Query"
+ assert f[1] == [[("header", "foo: "), ("text", "bar")]]
+
+ assert v(d) == ("Query", [])
diff --git a/test/mitmproxy/contentviews/test_raw.py b/test/mitmproxy/contentviews/test_raw.py
new file mode 100644
index 00000000..0e6e1b34
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_raw.py
@@ -0,0 +1,7 @@
+from mitmproxy.contentviews import raw
+from . import full_eval
+
+
+def test_view_raw():
+ v = full_eval(raw.ViewRaw())
+ assert v(b"foo")
diff --git a/test/mitmproxy/contentviews/test_urlencoded.py b/test/mitmproxy/contentviews/test_urlencoded.py
new file mode 100644
index 00000000..d01f9aaa
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_urlencoded.py
@@ -0,0 +1,15 @@
+from mitmproxy.contentviews import urlencoded
+from mitmproxy.net.http import url
+from . import full_eval
+
+
+def test_view_urlencoded():
+ v = full_eval(urlencoded.ViewURLEncoded())
+
+ d = url.encode([("one", "two"), ("three", "four")]).encode()
+ assert v(d)
+
+ d = url.encode([("adsfa", "")]).encode()
+ assert v(d)
+
+ assert not v(b"\xFF\x00")
diff --git a/test/mitmproxy/contentviews/test_xml_html.py b/test/mitmproxy/contentviews/test_xml_html.py
new file mode 100644
index 00000000..899ecfde
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html.py
@@ -0,0 +1,29 @@
+import pytest
+
+from mitmproxy.contentviews import xml_html
+from mitmproxy.test import tutils
+from . import full_eval
+
+data = tutils.test_data.push("mitmproxy/contentviews/test_xml_html_data/")
+
+
+def test_simple():
+ v = full_eval(xml_html.ViewXmlHtml())
+ assert v(b"foo") == ('XML', [[('text', 'foo')]])
+ assert v(b"<html></html>") == ('HTML', [[('text', '<html></html>')]])
+
+
+@pytest.mark.parametrize("filename", [
+ "simple.html",
+ "cdata.xml",
+ "comment.xml",
+ "inline.html",
+])
+def test_format_xml(filename):
+ path = data.path(filename)
+ with open(path) as f:
+ input = f.read()
+ with open(path.replace(".", "-formatted.")) as f:
+ expected = f.read()
+ tokens = xml_html.tokenize(input)
+ assert xml_html.format_xml(tokens) == expected
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/cdata-formatted.xml b/test/mitmproxy/contentviews/test_xml_html_data/cdata-formatted.xml
new file mode 100644
index 00000000..44a81a83
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/cdata-formatted.xml
@@ -0,0 +1,10 @@
+<exampleOfACDATA>
+ <![CDATA[
+ Since this is a CDATA section
+ I can use all sorts of reserved characters
+ like > < " and &
+ or write things like
+ <foo></bar>
+ but my document is still well formed!
+ ]]>
+</exampleOfACDATA>
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/cdata.xml b/test/mitmproxy/contentviews/test_xml_html_data/cdata.xml
new file mode 100644
index 00000000..b4c5dfca
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/cdata.xml
@@ -0,0 +1,10 @@
+<exampleOfACDATA>
+<![CDATA[
+ Since this is a CDATA section
+ I can use all sorts of reserved characters
+ like > < " and &
+or write things like
+ <foo></bar>
+ but my document is still well formed!
+]]>
+</exampleOfACDATA> \ No newline at end of file
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/comment-formatted.xml b/test/mitmproxy/contentviews/test_xml_html_data/comment-formatted.xml
new file mode 100644
index 00000000..d0da6665
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/comment-formatted.xml
@@ -0,0 +1,10 @@
+<exampleOfAComment>
+ <!--
+ Since this is a comment
+ I can use all sorts of reserved characters
+ like > < " and &
+ or write things like
+ <foo></bar>
+ but my document is still well formed!
+ -->
+</exampleOfAComment>
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/comment.xml b/test/mitmproxy/contentviews/test_xml_html_data/comment.xml
new file mode 100644
index 00000000..3f54ddba
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/comment.xml
@@ -0,0 +1,10 @@
+<exampleOfAComment>
+<!--
+ Since this is a comment
+ I can use all sorts of reserved characters
+ like > < " and &
+ or write things like
+ <foo></bar>
+ but my document is still well formed!
+-->
+</exampleOfAComment> \ No newline at end of file
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/inline-formatted.html b/test/mitmproxy/contentviews/test_xml_html_data/inline-formatted.html
new file mode 100644
index 00000000..5253bf4f
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/inline-formatted.html
@@ -0,0 +1,14 @@
+<html>
+<head>
+ <title>Test Page</title>
+</head>
+<body>
+ <p>
+ <i class="fa fa-alert"></i>
+ Some things should be
+ <b>inline</b>
+ , some things shouldn't!
+ </p>
+ <i class="fa fa-warning"/>
+</body>
+</html>
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/inline.html b/test/mitmproxy/contentviews/test_xml_html_data/inline.html
new file mode 100644
index 00000000..3e4b16b9
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/inline.html
@@ -0,0 +1,7 @@
+<html>
+<head><title>Test Page</title></head>
+<body>
+ <p><i class="fa fa-alert"></i>Some things should be <b>inline</b>, some things shouldn't!</p>
+ <i class="fa fa-warning"/>
+</body>
+</html> \ No newline at end of file
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/simple-formatted.html b/test/mitmproxy/contentviews/test_xml_html_data/simple-formatted.html
new file mode 100644
index 00000000..23438428
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/simple-formatted.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <title>title</title>
+</head>
+<body>
+ <h1>Hello World</h1>
+ <!-- page content -->
+</body>
+</html>
diff --git a/test/mitmproxy/contentviews/test_xml_html_data/simple.html b/test/mitmproxy/contentviews/test_xml_html_data/simple.html
new file mode 100644
index 00000000..73e81a5e
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_xml_html_data/simple.html
@@ -0,0 +1 @@
+<!DOCTYPE html><html lang="en"><head><title>title</title></head><body><h1>Hello World</h1><!-- page content --></body></html>
diff --git a/test/mitmproxy/data/amf01 b/test/mitmproxy/data/amf01
deleted file mode 100644
index c8fc261d..00000000
--- a/test/mitmproxy/data/amf01
+++ /dev/null
Binary files differ
diff --git a/test/mitmproxy/data/amf02 b/test/mitmproxy/data/amf02
deleted file mode 100644
index ba69f130..00000000
--- a/test/mitmproxy/data/amf02
+++ /dev/null
Binary files differ
diff --git a/test/mitmproxy/data/amf03 b/test/mitmproxy/data/amf03
deleted file mode 100644
index d9fa736a..00000000
--- a/test/mitmproxy/data/amf03
+++ /dev/null
Binary files differ
diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py
deleted file mode 100644
index 1f16765b..00000000
--- a/test/mitmproxy/test_contentview.py
+++ /dev/null
@@ -1,284 +0,0 @@
-import mock
-from mitmproxy.exceptions import ContentViewException
-from mitmproxy.net.http import Headers
-from mitmproxy.net.http import url
-from mitmproxy.types import multidict
-
-import mitmproxy.contentviews as cv
-from mitmproxy.test import tutils
-
-try:
- import pyamf
-except ImportError:
- pyamf = None
-
-try:
- import cssutils
-except:
- cssutils = None
-
-
-class TestContentView:
-
- def test_view_auto(self):
- v = cv.ViewAuto()
- f = v(
- b"foo",
- headers=Headers()
- )
- assert f[0] == "Raw"
-
- f = v(
- b"<html></html>",
- headers=Headers(content_type="text/html")
- )
- assert f[0] == "HTML"
-
- f = v(
- b"foo",
- headers=Headers(content_type="text/flibble")
- )
- assert f[0] == "Raw"
-
- f = v(
- b"<xml></xml>",
- headers=Headers(content_type="text/flibble")
- )
- assert f[0].startswith("XML")
-
- f = v(
- b"",
- headers=Headers()
- )
- assert f[0] == "No content"
-
- f = v(
- b"",
- headers=Headers(),
- query=multidict.MultiDict([("foo", "bar")]),
- )
- assert f[0] == "Query"
-
- def test_view_urlencoded(self):
- d = url.encode([("one", "two"), ("three", "four")]).encode()
- v = cv.ViewURLEncoded()
- assert v(d)
- d = url.encode([("adsfa", "")]).encode()
- v = cv.ViewURLEncoded()
- assert v(d)
-
- def test_view_html(self):
- v = cv.ViewHTML()
- s = b"<html><br><br></br><p>one</p></html>"
- assert v(s)
-
- s = b"gobbledygook"
- assert not v(s)
-
- def test_view_html_outline(self):
- v = cv.ViewHTMLOutline()
- s = b"<html><br><br></br><p>one</p></html>"
- assert v(s)
- assert v(b'\xfe')
-
- def test_view_json(self):
- cv.VIEW_CUTOFF = 100
- v = cv.ViewJSON()
- assert v(b"{}")
- assert not v(b"{")
- assert v(b"[1, 2, 3, 4, 5]")
-
- def test_view_xml(self):
- v = cv.ViewXML()
- assert v(b"<foo></foo>")
- assert not v(b"<foo>")
- s = b"""<?xml version="1.0" encoding="UTF-8"?>
- <?xml-stylesheet title="XSL_formatting"?>
- <rss
- xmlns:media="http://search.yahoo.com/mrss/"
- xmlns:atom="http://www.w3.org/2005/Atom"
- version="2.0">
- </rss>
- """
- assert v(s)
-
- def test_view_raw(self):
- v = cv.ViewRaw()
- assert v(b"foo")
-
- def test_view_javascript(self):
- v = cv.ViewJavaScript()
- assert v(b"[1, 2, 3]")
- assert v(b"[1, 2, 3")
- assert v(b"function(a){[1, 2, 3]}")
- assert v(b"\xfe") # invalid utf-8
-
- def test_view_css(self):
- v = cv.ViewCSS()
-
- with open(tutils.test_data.path('mitmproxy/data/1.css'), 'r') as fp:
- fixture_1 = fp.read()
-
- result = v('a')
-
- if cssutils:
- assert len(list(result[1])) == 0
- else:
- assert len(list(result[1])) == 1
-
- result = v(fixture_1)
-
- if cssutils:
- assert len(list(result[1])) > 1
- else:
- assert len(list(result[1])) == 1
-
- def test_view_hex(self):
- v = cv.ViewHex()
- assert v(b"foo")
-
- def test_view_image(self):
- v = cv.ViewImage()
- p = tutils.test_data.path("mitmproxy/data/image.png")
- assert v(open(p, "rb").read())
-
- p = tutils.test_data.path("mitmproxy/data/image.gif")
- assert v(open(p, "rb").read())
-
- p = tutils.test_data.path("mitmproxy/data/image-err1.jpg")
- assert v(open(p, "rb").read())
-
- p = tutils.test_data.path("mitmproxy/data/image.ico")
- assert v(open(p, "rb").read())
-
- assert not v(b"flibble")
-
- def test_view_multipart(self):
- view = cv.ViewMultipart()
- v = b"""
---AaB03x
-Content-Disposition: form-data; name="submit-name"
-
-Larry
---AaB03x
- """.strip()
- h = Headers(content_type="multipart/form-data; boundary=AaB03x")
- assert view(v, headers=h)
-
- h = Headers()
- assert not view(v, headers=h)
-
- h = Headers(content_type="multipart/form-data")
- assert not view(v, headers=h)
-
- h = Headers(content_type="unparseable")
- assert not view(v, headers=h)
-
- def test_view_query(self):
- d = ""
- v = cv.ViewQuery()
- f = v(d, query=multidict.MultiDict([("foo", "bar")]))
- assert f[0] == "Query"
- assert [x for x in f[1]] == [[("header", "foo: "), ("text", "bar")]]
-
- def test_add_cv(self):
- class TestContentView(cv.View):
- name = "test"
- prompt = ("t", "test")
-
- tcv = TestContentView()
- cv.add(tcv)
-
- # repeated addition causes exception
- tutils.raises(
- ContentViewException,
- cv.add,
- tcv
- )
-
-
-def test_get_content_view():
- desc, lines, err = cv.get_content_view(
- cv.get("Raw"),
- b"[1, 2, 3]",
- )
- assert "Raw" in desc
- assert list(lines)
- assert not err
-
- desc, lines, err = cv.get_content_view(
- cv.get("Auto"),
- b"[1, 2, 3]",
- headers=Headers(content_type="application/json")
- )
- assert desc == "JSON"
-
- desc, lines, err = cv.get_content_view(
- cv.get("JSON"),
- b"[1, 2",
- )
- assert "Couldn't parse" in desc
-
- with mock.patch("mitmproxy.contentviews.ViewAuto.__call__") as view_auto:
- view_auto.side_effect = ValueError
-
- desc, lines, err = cv.get_content_view(
- cv.get("Auto"),
- b"[1, 2",
- )
- assert err
- assert "Couldn't parse" in desc
-
-
-def test_get_message_content_view():
- r = tutils.treq()
- desc, lines, err = cv.get_message_content_view("raw", r)
- assert desc == "Raw"
-
- r.encode("gzip")
- desc, lines, err = cv.get_message_content_view("raw", r)
- assert desc == "[decoded gzip] Raw"
-
- r.headers["content-encoding"] = "deflate"
- desc, lines, err = cv.get_message_content_view("raw", r)
- assert desc == "[cannot decode] Raw"
-
- r.content = None
- desc, lines, err = cv.get_message_content_view("raw", r)
- assert list(lines) == [[("error", "content missing")]]
-
-
-if pyamf:
- def test_view_amf_request():
- v = cv.ViewAMF()
-
- p = tutils.test_data.path("mitmproxy/data/amf01")
- assert v(open(p, "rb").read())
-
- p = tutils.test_data.path("mitmproxy/data/amf02")
- assert v(open(p, "rb").read())
-
- def test_view_amf_response():
- v = cv.ViewAMF()
- p = tutils.test_data.path("mitmproxy/data/amf03")
- assert v(open(p, "rb").read())
-
-if cv.ViewProtobuf.is_available():
- def test_view_protobuf_request():
- v = cv.ViewProtobuf()
-
- p = tutils.test_data.path("mitmproxy/data/protobuf01")
- content_type, output = v(open(p, "rb").read())
- assert content_type == "Protobuf"
- assert output.next()[0][1] == '1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"'
-
-
-def test_get_by_shortcut():
- assert cv.get_by_shortcut("h")
-
-
-def test_pretty_json():
- assert cv.pretty_json(b'{"foo": 1}')
- assert not cv.pretty_json(b"moo")
- assert cv.pretty_json(b'{"foo" : "\xe4\xb8\x96\xe7\x95\x8c"}') # utf8 with chinese characters
- assert not cv.pretty_json(b'{"foo" : "\xFF"}')
diff --git a/test/mitmproxy/test_custom_contentview.py b/test/mitmproxy/test_custom_contentview.py
deleted file mode 100644
index 28f7fb33..00000000
--- a/test/mitmproxy/test_custom_contentview.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import mitmproxy.contentviews as cv
-from mitmproxy.net.http import Headers
-
-
-def test_custom_views():
- class ViewNoop(cv.View):
- name = "noop"
- prompt = ("noop", "n")
- content_types = ["text/none"]
-
- def __call__(self, data, **metadata):
- return "noop", cv.format_text(data)
-
- view_obj = ViewNoop()
-
- cv.add(view_obj)
-
- assert cv.get("noop")
-
- r = cv.get_content_view(
- cv.get("noop"),
- "[1, 2, 3]",
- headers=Headers(
- content_type="text/plain"
- )
- )
- assert "noop" in r[0]
-
- # now try content-type matching
- r = cv.get_content_view(
- cv.get("Auto"),
- "[1, 2, 3]",
- headers=Headers(
- content_type="text/none"
- )
- )
- assert "noop" in r[0]
-
- # now try removing the custom view
- cv.remove(view_obj)
- r = cv.get_content_view(
- cv.get("Auto"),
- b"[1, 2, 3]",
- headers=Headers(
- content_type="text/none"
- )
- )
- assert "noop" not in r[0]
diff --git a/test/mitmproxy/test_examples.py b/test/mitmproxy/test_examples.py
index 94637350..8db2507f 100644
--- a/test/mitmproxy/test_examples.py
+++ b/test/mitmproxy/test_examples.py
@@ -56,7 +56,7 @@ class TestScripts(mastertest.MasterTest):
tscript("simple/modify_body_inject_iframe.py")
m, sc = tscript("simple/modify_body_inject_iframe.py", "http://example.org/evil_iframe")
- f = tflow.tflow(resp=tutils.tresp(content=b"<html>mitmproxy</html>"))
+ f = tflow.tflow(resp=tutils.tresp(content=b"<html><body>mitmproxy</body></html>"))
m.response(f)
content = f.response.content
assert b'iframe' in content and b'evil_iframe' in content
diff --git a/test/mitmproxy/utils/test_sliding_window.py b/test/mitmproxy/utils/test_sliding_window.py
new file mode 100644
index 00000000..23c76032
--- /dev/null
+++ b/test/mitmproxy/utils/test_sliding_window.py
@@ -0,0 +1,27 @@
+from mitmproxy.utils import sliding_window
+
+
+def test_simple():
+ y = list(sliding_window.window(range(1000, 1005), 1, 2))
+ assert y == [
+ # prev this next next2
+ (None, 1000, 1001, 1002),
+ (1000, 1001, 1002, 1003),
+ (1001, 1002, 1003, 1004),
+ (1002, 1003, 1004, None),
+ (1003, 1004, None, None)
+ ]
+
+
+def test_is_lazy():
+ done = False
+
+ def gen():
+ nonlocal done
+ done = True
+ yield 42
+
+ x = sliding_window.window(gen(), 1, 1)
+ assert not done
+ assert list(x)
+ assert done
diff --git a/tox.ini b/tox.ini
index dc76cb70..cc06f9c8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -29,7 +29,8 @@ commands =
mitmproxy/addonmanager.py \
mitmproxy/proxy/protocol/ \
mitmproxy/log.py \
- mitmproxy/tools/dump.py mitmproxy/tools/web
+ mitmproxy/tools/dump.py mitmproxy/tools/web \
+ mitmproxy/contentviews
[testenv:wheel]
recreate = True