From ba842489103caba2691084a1f32f3c79f8f564d6 Mon Sep 17 00:00:00 2001 From: Thomas Kriechbaumer Date: Thu, 29 Sep 2016 12:46:56 +0200 Subject: rename mitmproxy.filt -> mitmproxy.flowfilter --- docs/features/filters.rst | 3 +- examples/filt.py | 20 -- examples/flowfilter.py | 20 ++ mitmproxy/builtins/dumper.py | 6 +- mitmproxy/builtins/filestreamer.py | 9 +- mitmproxy/builtins/replace.py | 12 +- mitmproxy/builtins/setheaders.py | 16 +- mitmproxy/builtins/stickyauth.py | 5 +- mitmproxy/builtins/stickycookie.py | 4 +- mitmproxy/cmdline.py | 4 +- mitmproxy/console/grideditor/editors.py | 8 +- mitmproxy/console/help.py | 26 +- mitmproxy/console/master.py | 2 +- mitmproxy/filt.py | 525 ------------------------------ mitmproxy/flow/state.py | 28 +- mitmproxy/flowfilter.py | 543 ++++++++++++++++++++++++++++++++ mitmproxy/models/flow.py | 22 +- mitmproxy/web/app.py | 8 +- test/mitmproxy/test_filt.py | 442 -------------------------- test/mitmproxy/test_flow.py | 6 +- test/mitmproxy/test_flowfilter.py | 442 ++++++++++++++++++++++++++ 21 files changed, 1073 insertions(+), 1078 deletions(-) delete mode 100644 examples/filt.py create mode 100644 examples/flowfilter.py delete mode 100644 mitmproxy/filt.py create mode 100644 mitmproxy/flowfilter.py delete mode 100644 test/mitmproxy/test_filt.py create mode 100644 test/mitmproxy/test_flowfilter.py diff --git a/docs/features/filters.rst b/docs/features/filters.rst index 509b5d6b..e531f734 100644 --- a/docs/features/filters.rst +++ b/docs/features/filters.rst @@ -8,7 +8,7 @@ Filter expressions consist of the following operators: .. documentedlist:: :header: "Expression" "Description" - :listobject: mitmproxy.filt.help + :listobject: mitmproxy.flowfilter.help - Regexes are Python-style - Regexes can be specified as quoted strings @@ -36,4 +36,3 @@ Anything but requests with a text/html content type: .. code-block:: none !(~q & ~t "text/html") - diff --git a/examples/filt.py b/examples/filt.py deleted file mode 100644 index 9ccf9fa1..00000000 --- a/examples/filt.py +++ /dev/null @@ -1,20 +0,0 @@ -# This scripts demonstrates how to use mitmproxy's filter pattern in scripts. -# Usage: mitmdump -s "filt.py FILTER" -import sys -from mitmproxy import filt - - -class Filter: - def __init__(self, spec): - self.filter = filt.parse(spec) - - def response(self, flow): - if flow.match(self.filter): - print("Flow matches filter:") - print(flow) - - -def start(): - if len(sys.argv) != 2: - raise ValueError("Usage: -s 'filt.py FILTER'") - return Filter(sys.argv[1]) diff --git a/examples/flowfilter.py b/examples/flowfilter.py new file mode 100644 index 00000000..af645010 --- /dev/null +++ b/examples/flowfilter.py @@ -0,0 +1,20 @@ +# This scripts demonstrates how to use mitmproxy's filter pattern in scripts. +# Usage: mitmdump -s "filter.py FILTER" +import sys +from mitmproxy import filter + + +class Filter: + def __init__(self, spec): + self.filter = filter.parse(spec) + + def response(self, flow): + if flow.match(self.filter): + print("Flow matches filter:") + print(flow) + + +def start(): + if len(sys.argv) != 2: + raise ValueError("Usage: -s 'filt.py FILTER'") + return Filter(sys.argv[1]) diff --git a/mitmproxy/builtins/dumper.py b/mitmproxy/builtins/dumper.py index 60d00518..0a7738a6 100644 --- a/mitmproxy/builtins/dumper.py +++ b/mitmproxy/builtins/dumper.py @@ -9,7 +9,7 @@ import typing # noqa from mitmproxy import contentviews from mitmproxy import ctx from mitmproxy import exceptions -from mitmproxy import filt +from mitmproxy import flowfilter from netlib import human from netlib import strutils @@ -22,14 +22,14 @@ def indent(n, text): class Dumper(object): def __init__(self): - self.filter = None # type: filt.TFilter + self.filter = None # type: flowfilter.TFilter self.flow_detail = None # type: int self.outfp = None # type: typing.io.TextIO self.showhost = None # type: bool def configure(self, options, updated): if options.filtstr: - self.filter = filt.parse(options.filtstr) + self.filter = flowfilter.parse(options.filtstr) if not self.filter: raise exceptions.OptionsError( "Invalid filter expression: %s" % options.filtstr diff --git a/mitmproxy/builtins/filestreamer.py b/mitmproxy/builtins/filestreamer.py index ffa565ac..55427753 100644 --- a/mitmproxy/builtins/filestreamer.py +++ b/mitmproxy/builtins/filestreamer.py @@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function, division import os.path from mitmproxy import exceptions +from mitmproxy import flowfilter from mitmproxy.flow import io @@ -25,17 +26,17 @@ class FileStreamer: self.done() if options.outfile: - filt = None + flt = None if options.get("filtstr"): - filt = filt.parse(options.filtstr) - if not filt: + flt = flowfilter.parse(options.filtstr) + if not flt: raise exceptions.OptionsError( "Invalid filter specification: %s" % options.filtstr ) path, mode = options.outfile if mode not in ("wb", "ab"): raise exceptions.OptionsError("Invalid mode.") - err = self.start_stream_to_path(path, mode, filt) + err = self.start_stream_to_path(path, mode, flt) if err: raise exceptions.OptionsError(err) diff --git a/mitmproxy/builtins/replace.py b/mitmproxy/builtins/replace.py index df3cab04..b675b779 100644 --- a/mitmproxy/builtins/replace.py +++ b/mitmproxy/builtins/replace.py @@ -1,7 +1,7 @@ import re from mitmproxy import exceptions -from mitmproxy import filt +from mitmproxy import flowfilter class Replace: @@ -18,8 +18,8 @@ class Replace: """ lst = [] for fpatt, rex, s in options.replacements: - cpatt = filt.parse(fpatt) - if not cpatt: + flt = flowfilter.parse(fpatt) + if not flt: raise exceptions.OptionsError( "Invalid filter pattern: %s" % fpatt ) @@ -29,12 +29,12 @@ class Replace: raise exceptions.OptionsError( "Invalid regular expression: %s - %s" % (rex, str(e)) ) - lst.append((rex, s, cpatt)) + lst.append((rex, s, flt)) self.lst = lst def execute(self, f): - for rex, s, cpatt in self.lst: - if cpatt(f): + for rex, s, flt in self.lst: + if flt(f): if f.response: f.response.replace(rex, s, flags=re.DOTALL) else: diff --git a/mitmproxy/builtins/setheaders.py b/mitmproxy/builtins/setheaders.py index 4cb9905e..5695e1e8 100644 --- a/mitmproxy/builtins/setheaders.py +++ b/mitmproxy/builtins/setheaders.py @@ -1,5 +1,5 @@ from mitmproxy import exceptions -from mitmproxy import filt +from mitmproxy import flowfilter class SetHeaders: @@ -15,19 +15,19 @@ class SetHeaders: value: Header value string """ for fpatt, header, value in options.setheaders: - cpatt = filt.parse(fpatt) - if not cpatt: + flt = flowfilter.parse(fpatt) + if not flt: raise exceptions.OptionsError( "Invalid setheader filter pattern %s" % fpatt ) - self.lst.append((fpatt, header, value, cpatt)) + self.lst.append((fpatt, header, value, flt)) def run(self, f, hdrs): - for _, header, value, cpatt in self.lst: - if cpatt(f): + for _, header, value, flt in self.lst: + if flt(f): hdrs.pop(header, None) - for _, header, value, cpatt in self.lst: - if cpatt(f): + for _, header, value, flt in self.lst: + if flt(f): hdrs.add(header, value) def request(self, flow): diff --git a/mitmproxy/builtins/stickyauth.py b/mitmproxy/builtins/stickyauth.py index 98fb65ed..51728c23 100644 --- a/mitmproxy/builtins/stickyauth.py +++ b/mitmproxy/builtins/stickyauth.py @@ -1,18 +1,17 @@ from __future__ import absolute_import, print_function, division -from mitmproxy import filt from mitmproxy import exceptions +from mitmproxy import flowfilter class StickyAuth: def __init__(self): - # Compiled filter self.flt = None self.hosts = {} def configure(self, options, updated): if options.stickyauth: - flt = filt.parse(options.stickyauth) + flt = flowfilter.parse(options.stickyauth) if not flt: raise exceptions.OptionsError( "stickyauth: invalid filter expression: %s" % options.stickyauth diff --git a/mitmproxy/builtins/stickycookie.py b/mitmproxy/builtins/stickycookie.py index 88333d5c..028eea9c 100644 --- a/mitmproxy/builtins/stickycookie.py +++ b/mitmproxy/builtins/stickycookie.py @@ -3,7 +3,7 @@ from six.moves import http_cookiejar from netlib.http import cookies from mitmproxy import exceptions -from mitmproxy import filt +from mitmproxy import flowfilter def ckey(attrs, f): @@ -34,7 +34,7 @@ class StickyCookie: def configure(self, options, updated): if options.stickycookie: - flt = filt.parse(options.stickycookie) + flt = flowfilter.parse(options.stickycookie) if not flt: raise exceptions.OptionsError( "stickycookie: invalid filter expression: %s" % options.stickycookie diff --git a/mitmproxy/cmdline.py b/mitmproxy/cmdline.py index ff431909..e97be5d4 100644 --- a/mitmproxy/cmdline.py +++ b/mitmproxy/cmdline.py @@ -4,7 +4,7 @@ import configargparse import os import re from mitmproxy import exceptions -from mitmproxy import filt +from mitmproxy import flowfilter from mitmproxy import options from mitmproxy import platform from netlib import human @@ -32,7 +32,7 @@ def _parse_hook(s): if not a: raise ParseException("Empty clause: %s" % str(patt)) - if not filt.parse(patt): + if not flowfilter.parse(patt): raise ParseException("Malformed filter pattern: %s" % patt) return patt, a, b diff --git a/mitmproxy/console/grideditor/editors.py b/mitmproxy/console/grideditor/editors.py index a17fd766..0c9a2a02 100644 --- a/mitmproxy/console/grideditor/editors.py +++ b/mitmproxy/console/grideditor/editors.py @@ -1,9 +1,9 @@ from __future__ import absolute_import, print_function, division import re import urwid -from mitmproxy import filt -from mitmproxy.builtins import script from mitmproxy import exceptions +from mitmproxy import flowfilter +from mitmproxy.builtins import script from mitmproxy.console import common from mitmproxy.console.grideditor import base from mitmproxy.console.grideditor import col_bytes @@ -81,7 +81,7 @@ class ReplaceEditor(base.GridEditor): def is_error(self, col, val): if col == 0: - if not filt.parse(val): + if not flowfilter.parse(val): return "Invalid filter specification." elif col == 1: try: @@ -101,7 +101,7 @@ class SetHeadersEditor(base.GridEditor): def is_error(self, col, val): if col == 0: - if not filt.parse(val): + if not flowfilter.parse(val): return "Invalid filter specification" return False diff --git a/mitmproxy/console/help.py b/mitmproxy/console/help.py index e3e2f54c..7393d7c4 100644 --- a/mitmproxy/console/help.py +++ b/mitmproxy/console/help.py @@ -4,7 +4,7 @@ import platform import urwid -from mitmproxy import filt +from mitmproxy import flowfilter from mitmproxy.console import common from mitmproxy.console import signals @@ -60,29 +60,7 @@ class HelpView(urwid.ListBox): ) text.append(urwid.Text([("head", "\n\nFilter expressions:\n")])) - f = [] - for i in filt.filt_unary: - f.append( - ("~%s" % i.code, i.help) - ) - for i in filt.filt_rex: - f.append( - ("~%s regex" % i.code, i.help) - ) - for i in filt.filt_int: - f.append( - ("~%s int" % i.code, i.help) - ) - f.sort() - f.extend( - [ - ("!", "unary not"), - ("&", "and"), - ("|", "or"), - ("(...)", "grouping"), - ] - ) - text.extend(common.format_keyvals(f, key="key", val="text", indent=4)) + text.extend(common.format_keyvals(flowfilter.help, key="key", val="text", indent=4)) text.append( urwid.Text( diff --git a/mitmproxy/console/master.py b/mitmproxy/console/master.py index 6652bf0c..75a27c39 100644 --- a/mitmproxy/console/master.py +++ b/mitmproxy/console/master.py @@ -34,7 +34,7 @@ from mitmproxy.console import palettes from mitmproxy.console import signals from mitmproxy.console import statusbar from mitmproxy.console import window -from mitmproxy.filt import FMarked +from mitmproxy.flowfilter import FMarked from netlib import tcp, strutils EVENTLOG_SIZE = 500 diff --git a/mitmproxy/filt.py b/mitmproxy/filt.py deleted file mode 100644 index eb3e392b..00000000 --- a/mitmproxy/filt.py +++ /dev/null @@ -1,525 +0,0 @@ -""" - The following operators are understood: - - ~q Request - ~s Response - - Headers: - - Patterns are matched against "name: value" strings. Field names are - all-lowercase. - - ~a Asset content-type in response. Asset content types are: - text/javascript - application/x-javascript - application/javascript - text/css - image/* - application/x-shockwave-flash - ~h rex Header line in either request or response - ~hq rex Header in request - ~hs rex Header in response - - ~b rex Expression in the body of either request or response - ~bq rex Expression in the body of request - ~bq rex Expression in the body of response - ~t rex Shortcut for content-type header. - - ~d rex Request domain - ~m rex Method - ~u rex URL - ~c CODE Response code. - rex Equivalent to ~u rex -""" -from __future__ import absolute_import, print_function, division - -import re -import sys -import functools - -from mitmproxy.models.http import HTTPFlow -from mitmproxy.models.tcp import TCPFlow -from mitmproxy.models.flow import Flow - -from netlib import strutils - -import pyparsing as pp -from typing import Callable - - -def only(*types): - def decorator(fn): - @functools.wraps(fn) - def filter_types(self, flow): - if isinstance(flow, types): - return fn(self, flow) - return False - return filter_types - return decorator - - -class _Token(object): - - def dump(self, indent=0, fp=sys.stdout): - print("{spacing}{name}{expr}".format( - spacing="\t" * indent, - name=self.__class__.__name__, - expr=getattr(self, "expr", "") - ), file=fp) - - -class _Action(_Token): - - @classmethod - def make(klass, s, loc, toks): - return klass(*toks[1:]) - - -class FErr(_Action): - code = "e" - help = "Match error" - - def __call__(self, f): - return True if f.error else False - - -class FMarked(_Action): - code = "marked" - help = "Match marked flows" - - def __call__(self, f): - return f.marked - - -class FHTTP(_Action): - code = "http" - help = "Match HTTP flows" - - @only(HTTPFlow) - def __call__(self, f): - return True - - -class FTCP(_Action): - code = "tcp" - help = "Match TCP flows" - - @only(TCPFlow) - def __call__(self, f): - return True - - -class FReq(_Action): - code = "q" - help = "Match request with no response" - - @only(HTTPFlow) - def __call__(self, f): - if not f.response: - return True - - -class FResp(_Action): - code = "s" - help = "Match response" - - @only(HTTPFlow) - def __call__(self, f): - return bool(f.response) - - -class _Rex(_Action): - flags = 0 - is_binary = True - - def __init__(self, expr): - self.expr = expr - if self.is_binary: - expr = strutils.escaped_str_to_bytes(expr) - try: - self.re = re.compile(expr, self.flags) - except: - raise ValueError("Cannot compile expression.") - - -def _check_content_type(rex, message): - return any( - name.lower() == b"content-type" and - rex.search(value) - for name, value in message.headers.fields - ) - - -class FAsset(_Action): - code = "a" - help = "Match asset in response: CSS, Javascript, Flash, images." - ASSET_TYPES = [ - b"text/javascript", - b"application/x-javascript", - b"application/javascript", - b"text/css", - b"image/.*", - b"application/x-shockwave-flash" - ] - ASSET_TYPES = [re.compile(x) for x in ASSET_TYPES] - - @only(HTTPFlow) - def __call__(self, f): - if f.response: - for i in self.ASSET_TYPES: - if _check_content_type(i, f.response): - return True - return False - - -class FContentType(_Rex): - code = "t" - help = "Content-type header" - - @only(HTTPFlow) - def __call__(self, f): - if _check_content_type(self.re, f.request): - return True - elif f.response and _check_content_type(self.re, f.response): - return True - return False - - -class FContentTypeRequest(_Rex): - code = "tq" - help = "Request Content-Type header" - - @only(HTTPFlow) - def __call__(self, f): - return _check_content_type(self.re, f.request) - - -class FContentTypeResponse(_Rex): - code = "ts" - help = "Response Content-Type header" - - @only(HTTPFlow) - def __call__(self, f): - if f.response: - return _check_content_type(self.re, f.response) - return False - - -class FHead(_Rex): - code = "h" - help = "Header" - flags = re.MULTILINE - - @only(HTTPFlow) - def __call__(self, f): - if f.request and self.re.search(bytes(f.request.headers)): - return True - if f.response and self.re.search(bytes(f.response.headers)): - return True - return False - - -class FHeadRequest(_Rex): - code = "hq" - help = "Request header" - flags = re.MULTILINE - - @only(HTTPFlow) - def __call__(self, f): - if f.request and self.re.search(bytes(f.request.headers)): - return True - - -class FHeadResponse(_Rex): - code = "hs" - help = "Response header" - flags = re.MULTILINE - - @only(HTTPFlow) - def __call__(self, f): - if f.response and self.re.search(bytes(f.response.headers)): - return True - - -class FBod(_Rex): - code = "b" - help = "Body" - flags = re.DOTALL - - @only(HTTPFlow, TCPFlow) - def __call__(self, f): - if isinstance(f, HTTPFlow): - if f.request and f.request.raw_content: - if self.re.search(f.request.get_content(strict=False)): - return True - if f.response and f.response.raw_content: - if self.re.search(f.response.get_content(strict=False)): - return True - elif isinstance(f, TCPFlow): - for msg in f.messages: - if self.re.search(msg.content): - return True - return False - - -class FBodRequest(_Rex): - code = "bq" - help = "Request body" - flags = re.DOTALL - - @only(HTTPFlow, TCPFlow) - def __call__(self, f): - if isinstance(f, HTTPFlow): - if f.request and f.request.raw_content: - if self.re.search(f.request.get_content(strict=False)): - return True - elif isinstance(f, TCPFlow): - for msg in f.messages: - if msg.from_client and self.re.search(msg.content): - return True - - -class FBodResponse(_Rex): - code = "bs" - help = "Response body" - flags = re.DOTALL - - @only(HTTPFlow, TCPFlow) - def __call__(self, f): - if isinstance(f, HTTPFlow): - if f.response and f.response.raw_content: - if self.re.search(f.response.get_content(strict=False)): - return True - elif isinstance(f, TCPFlow): - for msg in f.messages: - if not msg.from_client and self.re.search(msg.content): - return True - - -class FMethod(_Rex): - code = "m" - help = "Method" - flags = re.IGNORECASE - - @only(HTTPFlow) - def __call__(self, f): - return bool(self.re.search(f.request.data.method)) - - -class FDomain(_Rex): - code = "d" - help = "Domain" - flags = re.IGNORECASE - - @only(HTTPFlow) - def __call__(self, f): - return bool(self.re.search(f.request.data.host)) - - -class FUrl(_Rex): - code = "u" - help = "URL" - is_binary = False - # FUrl is special, because it can be "naked". - - @classmethod - def make(klass, s, loc, toks): - if len(toks) > 1: - toks = toks[1:] - return klass(*toks) - - @only(HTTPFlow) - def __call__(self, f): - return self.re.search(f.request.url) - - -class FSrc(_Rex): - code = "src" - help = "Match source address" - is_binary = False - - def __call__(self, f): - return f.client_conn.address and self.re.search(repr(f.client_conn.address)) - - -class FDst(_Rex): - code = "dst" - help = "Match destination address" - is_binary = False - - def __call__(self, f): - return f.server_conn.address and self.re.search(repr(f.server_conn.address)) - - -class _Int(_Action): - - def __init__(self, num): - self.num = int(num) - - -class FCode(_Int): - code = "c" - help = "HTTP response code" - - @only(HTTPFlow) - def __call__(self, f): - if f.response and f.response.status_code == self.num: - return True - - -class FAnd(_Token): - - def __init__(self, lst): - self.lst = lst - - def dump(self, indent=0, fp=sys.stdout): - super(FAnd, self).dump(indent, fp) - for i in self.lst: - i.dump(indent + 1, fp) - - def __call__(self, f): - return all(i(f) for i in self.lst) - - -class FOr(_Token): - - def __init__(self, lst): - self.lst = lst - - def dump(self, indent=0, fp=sys.stdout): - super(FOr, self).dump(indent, fp) - for i in self.lst: - i.dump(indent + 1, fp) - - def __call__(self, f): - return any(i(f) for i in self.lst) - - -class FNot(_Token): - - def __init__(self, itm): - self.itm = itm[0] - - def dump(self, indent=0, fp=sys.stdout): - super(FNot, self).dump(indent, fp) - self.itm.dump(indent + 1, fp) - - def __call__(self, f): - return not self.itm(f) - - -filt_unary = [ - FAsset, - FErr, - FHTTP, - FMarked, - FReq, - FResp, - FTCP, -] -filt_rex = [ - FBod, - FBodRequest, - FBodResponse, - FContentType, - FContentTypeRequest, - FContentTypeResponse, - FDomain, - FDst, - FHead, - FHeadRequest, - FHeadResponse, - FMethod, - FSrc, - FUrl, -] -filt_int = [ - FCode -] - - -def _make(): - # Order is important - multi-char expressions need to come before narrow - # ones. - parts = [] - for klass in filt_unary: - f = pp.Literal("~%s" % klass.code) + pp.WordEnd() - f.setParseAction(klass.make) - parts.append(f) - - simplerex = "".join(c for c in pp.printables if c not in "()~'\"") - rex = pp.Word(simplerex) |\ - pp.QuotedString("\"", escChar='\\') |\ - pp.QuotedString("'", escChar='\\') - for klass in filt_rex: - f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + rex.copy() - f.setParseAction(klass.make) - parts.append(f) - - for klass in filt_int: - f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + pp.Word(pp.nums) - f.setParseAction(klass.make) - parts.append(f) - - # A naked rex is a URL rex: - f = rex.copy() - f.setParseAction(FUrl.make) - parts.append(f) - - atom = pp.MatchFirst(parts) - expr = pp.operatorPrecedence(atom, - [(pp.Literal("!").suppress(), - 1, - pp.opAssoc.RIGHT, - lambda x: FNot(*x)), - (pp.Literal("&").suppress(), - 2, - pp.opAssoc.LEFT, - lambda x: FAnd(*x)), - (pp.Literal("|").suppress(), - 2, - pp.opAssoc.LEFT, - lambda x: FOr(*x)), - ]) - expr = pp.OneOrMore(expr) - return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x) -bnf = _make() - - -TFilter = Callable[[Flow], bool] - - -def parse(s): - # type: (str) -> TFilter - try: - filt = bnf.parseString(s, parseAll=True)[0] - filt.pattern = s - return filt - except pp.ParseException: - return None - except ValueError: - return None - - -help = [] -for i in filt_unary: - help.append( - ("~%s" % i.code, i.help) - ) -for i in filt_rex: - help.append( - ("~%s regex" % i.code, i.help) - ) -for i in filt_int: - help.append( - ("~%s int" % i.code, i.help) - ) -help.sort() -help.extend( - [ - ("!", "unary not"), - ("&", "and"), - ("|", "or"), - ("(...)", "grouping"), - ] -) diff --git a/mitmproxy/flow/state.py b/mitmproxy/flow/state.py index 8576fadc..759e53e4 100644 --- a/mitmproxy/flow/state.py +++ b/mitmproxy/flow/state.py @@ -5,7 +5,7 @@ from abc import abstractmethod, ABCMeta import six from typing import List # noqa -from mitmproxy import filt +from mitmproxy import flowfilter from mitmproxy import models # noqa @@ -53,11 +53,11 @@ def _pos(*args): class FlowView(FlowList): - def __init__(self, store, filt=None): + def __init__(self, store, flt=None): super(FlowView, self).__init__() - if not filt: - filt = _pos - self._build(store, filt) + if not flt: + flt = _pos + self._build(store, flt) self.store = store self.store.views.append(self) @@ -65,9 +65,9 @@ class FlowView(FlowList): def _close(self): self.store.views.remove(self) - def _build(self, flows, filt=None): - if filt: - self.filt = filt + def _build(self, flows, flt=None): + if flt: + self.filt = flt self._list = list(filter(self.filt, flows)) def _add(self, f): @@ -229,21 +229,21 @@ class State(object): if txt == self.filter_txt: return if txt: - f = filt.parse(txt) - if not f: + flt = flowfilter.parse(txt) + if not flt: return "Invalid filter expression." self.view._close() - self.view = FlowView(self.flows, f) + self.view = FlowView(self.flows, flt) else: self.view._close() self.view = FlowView(self.flows, None) def set_intercept(self, txt): if txt: - f = filt.parse(txt) - if not f: + flt = flowfilter.parse(txt) + if not flt: return "Invalid filter expression." - self.intercept = f + self.intercept = flt else: self.intercept = None diff --git a/mitmproxy/flowfilter.py b/mitmproxy/flowfilter.py new file mode 100644 index 00000000..a6398957 --- /dev/null +++ b/mitmproxy/flowfilter.py @@ -0,0 +1,543 @@ +""" + The following operators are understood: + + ~q Request + ~s Response + + Headers: + + Patterns are matched against "name: value" strings. Field names are + all-lowercase. + + ~a Asset content-type in response. Asset content types are: + text/javascript + application/x-javascript + application/javascript + text/css + image/* + application/x-shockwave-flash + ~h rex Header line in either request or response + ~hq rex Header in request + ~hs rex Header in response + + ~b rex Expression in the body of either request or response + ~bq rex Expression in the body of request + ~bq rex Expression in the body of response + ~t rex Shortcut for content-type header. + + ~d rex Request domain + ~m rex Method + ~u rex URL + ~c CODE Response code. + rex Equivalent to ~u rex +""" +from __future__ import absolute_import, print_function, division + +import re +import sys +import functools + +from mitmproxy.models.http import HTTPFlow +from mitmproxy.models.tcp import TCPFlow +from mitmproxy.models.flow import Flow + +from netlib import strutils + +import pyparsing as pp +from typing import Callable + + +def only(*types): + def decorator(fn): + @functools.wraps(fn) + def filter_types(self, flow): + if isinstance(flow, types): + return fn(self, flow) + return False + return filter_types + return decorator + + +class _Token(object): + + def dump(self, indent=0, fp=sys.stdout): + print("{spacing}{name}{expr}".format( + spacing="\t" * indent, + name=self.__class__.__name__, + expr=getattr(self, "expr", "") + ), file=fp) + + +class _Action(_Token): + + @classmethod + def make(klass, s, loc, toks): + return klass(*toks[1:]) + + +class FErr(_Action): + code = "e" + help = "Match error" + + def __call__(self, f): + return True if f.error else False + + +class FMarked(_Action): + code = "marked" + help = "Match marked flows" + + def __call__(self, f): + return f.marked + + +class FHTTP(_Action): + code = "http" + help = "Match HTTP flows" + + @only(HTTPFlow) + def __call__(self, f): + return True + + +class FTCP(_Action): + code = "tcp" + help = "Match TCP flows" + + @only(TCPFlow) + def __call__(self, f): + return True + + +class FReq(_Action): + code = "q" + help = "Match request with no response" + + @only(HTTPFlow) + def __call__(self, f): + if not f.response: + return True + + +class FResp(_Action): + code = "s" + help = "Match response" + + @only(HTTPFlow) + def __call__(self, f): + return bool(f.response) + + +class _Rex(_Action): + flags = 0 + is_binary = True + + def __init__(self, expr): + self.expr = expr + if self.is_binary: + expr = strutils.escaped_str_to_bytes(expr) + try: + self.re = re.compile(expr, self.flags) + except: + raise ValueError("Cannot compile expression.") + + +def _check_content_type(rex, message): + return any( + name.lower() == b"content-type" and + rex.search(value) + for name, value in message.headers.fields + ) + + +class FAsset(_Action): + code = "a" + help = "Match asset in response: CSS, Javascript, Flash, images." + ASSET_TYPES = [ + b"text/javascript", + b"application/x-javascript", + b"application/javascript", + b"text/css", + b"image/.*", + b"application/x-shockwave-flash" + ] + ASSET_TYPES = [re.compile(x) for x in ASSET_TYPES] + + @only(HTTPFlow) + def __call__(self, f): + if f.response: + for i in self.ASSET_TYPES: + if _check_content_type(i, f.response): + return True + return False + + +class FContentType(_Rex): + code = "t" + help = "Content-type header" + + @only(HTTPFlow) + def __call__(self, f): + if _check_content_type(self.re, f.request): + return True + elif f.response and _check_content_type(self.re, f.response): + return True + return False + + +class FContentTypeRequest(_Rex): + code = "tq" + help = "Request Content-Type header" + + @only(HTTPFlow) + def __call__(self, f): + return _check_content_type(self.re, f.request) + + +class FContentTypeResponse(_Rex): + code = "ts" + help = "Response Content-Type header" + + @only(HTTPFlow) + def __call__(self, f): + if f.response: + return _check_content_type(self.re, f.response) + return False + + +class FHead(_Rex): + code = "h" + help = "Header" + flags = re.MULTILINE + + @only(HTTPFlow) + def __call__(self, f): + if f.request and self.re.search(bytes(f.request.headers)): + return True + if f.response and self.re.search(bytes(f.response.headers)): + return True + return False + + +class FHeadRequest(_Rex): + code = "hq" + help = "Request header" + flags = re.MULTILINE + + @only(HTTPFlow) + def __call__(self, f): + if f.request and self.re.search(bytes(f.request.headers)): + return True + + +class FHeadResponse(_Rex): + code = "hs" + help = "Response header" + flags = re.MULTILINE + + @only(HTTPFlow) + def __call__(self, f): + if f.response and self.re.search(bytes(f.response.headers)): + return True + + +class FBod(_Rex): + code = "b" + help = "Body" + flags = re.DOTALL + + @only(HTTPFlow, TCPFlow) + def __call__(self, f): + if isinstance(f, HTTPFlow): + if f.request and f.request.raw_content: + if self.re.search(f.request.get_content(strict=False)): + return True + if f.response and f.response.raw_content: + if self.re.search(f.response.get_content(strict=False)): + return True + elif isinstance(f, TCPFlow): + for msg in f.messages: + if self.re.search(msg.content): + return True + return False + + +class FBodRequest(_Rex): + code = "bq" + help = "Request body" + flags = re.DOTALL + + @only(HTTPFlow, TCPFlow) + def __call__(self, f): + if isinstance(f, HTTPFlow): + if f.request and f.request.raw_content: + if self.re.search(f.request.get_content(strict=False)): + return True + elif isinstance(f, TCPFlow): + for msg in f.messages: + if msg.from_client and self.re.search(msg.content): + return True + + +class FBodResponse(_Rex): + code = "bs" + help = "Response body" + flags = re.DOTALL + + @only(HTTPFlow, TCPFlow) + def __call__(self, f): + if isinstance(f, HTTPFlow): + if f.response and f.response.raw_content: + if self.re.search(f.response.get_content(strict=False)): + return True + elif isinstance(f, TCPFlow): + for msg in f.messages: + if not msg.from_client and self.re.search(msg.content): + return True + + +class FMethod(_Rex): + code = "m" + help = "Method" + flags = re.IGNORECASE + + @only(HTTPFlow) + def __call__(self, f): + return bool(self.re.search(f.request.data.method)) + + +class FDomain(_Rex): + code = "d" + help = "Domain" + flags = re.IGNORECASE + + @only(HTTPFlow) + def __call__(self, f): + return bool(self.re.search(f.request.data.host)) + + +class FUrl(_Rex): + code = "u" + help = "URL" + is_binary = False + # FUrl is special, because it can be "naked". + + @classmethod + def make(klass, s, loc, toks): + if len(toks) > 1: + toks = toks[1:] + return klass(*toks) + + @only(HTTPFlow) + def __call__(self, f): + return self.re.search(f.request.url) + + +class FSrc(_Rex): + code = "src" + help = "Match source address" + is_binary = False + + def __call__(self, f): + return f.client_conn.address and self.re.search(repr(f.client_conn.address)) + + +class FDst(_Rex): + code = "dst" + help = "Match destination address" + is_binary = False + + def __call__(self, f): + return f.server_conn.address and self.re.search(repr(f.server_conn.address)) + + +class _Int(_Action): + + def __init__(self, num): + self.num = int(num) + + +class FCode(_Int): + code = "c" + help = "HTTP response code" + + @only(HTTPFlow) + def __call__(self, f): + if f.response and f.response.status_code == self.num: + return True + + +class FAnd(_Token): + + def __init__(self, lst): + self.lst = lst + + def dump(self, indent=0, fp=sys.stdout): + super(FAnd, self).dump(indent, fp) + for i in self.lst: + i.dump(indent + 1, fp) + + def __call__(self, f): + return all(i(f) for i in self.lst) + + +class FOr(_Token): + + def __init__(self, lst): + self.lst = lst + + def dump(self, indent=0, fp=sys.stdout): + super(FOr, self).dump(indent, fp) + for i in self.lst: + i.dump(indent + 1, fp) + + def __call__(self, f): + return any(i(f) for i in self.lst) + + +class FNot(_Token): + + def __init__(self, itm): + self.itm = itm[0] + + def dump(self, indent=0, fp=sys.stdout): + super(FNot, self).dump(indent, fp) + self.itm.dump(indent + 1, fp) + + def __call__(self, f): + return not self.itm(f) + + +filter_unary = [ + FAsset, + FErr, + FHTTP, + FMarked, + FReq, + FResp, + FTCP, +] +filter_rex = [ + FBod, + FBodRequest, + FBodResponse, + FContentType, + FContentTypeRequest, + FContentTypeResponse, + FDomain, + FDst, + FHead, + FHeadRequest, + FHeadResponse, + FMethod, + FSrc, + FUrl, +] +filter_int = [ + FCode +] + + +def _make(): + # Order is important - multi-char expressions need to come before narrow + # ones. + parts = [] + for klass in filter_unary: + f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + f.setParseAction(klass.make) + parts.append(f) + + simplerex = "".join(c for c in pp.printables if c not in "()~'\"") + rex = pp.Word(simplerex) |\ + pp.QuotedString("\"", escChar='\\') |\ + pp.QuotedString("'", escChar='\\') + for klass in filter_rex: + f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + rex.copy() + f.setParseAction(klass.make) + parts.append(f) + + for klass in filter_int: + f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + pp.Word(pp.nums) + f.setParseAction(klass.make) + parts.append(f) + + # A naked rex is a URL rex: + f = rex.copy() + f.setParseAction(FUrl.make) + parts.append(f) + + atom = pp.MatchFirst(parts) + expr = pp.operatorPrecedence(atom, + [(pp.Literal("!").suppress(), + 1, + pp.opAssoc.RIGHT, + lambda x: FNot(*x)), + (pp.Literal("&").suppress(), + 2, + pp.opAssoc.LEFT, + lambda x: FAnd(*x)), + (pp.Literal("|").suppress(), + 2, + pp.opAssoc.LEFT, + lambda x: FOr(*x)), + ]) + expr = pp.OneOrMore(expr) + return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x) +bnf = _make() + + +TFilter = Callable[[Flow], bool] + + +def parse(s): + # type: (str) -> TFilter + try: + flt = bnf.parseString(s, parseAll=True)[0] + flt.pattern = s + return flt + except pp.ParseException: + return None + except ValueError: + return None + + +def match(self, flow, flt): + """ + Match a flow against a compiled filter expression. + Returns True if matched, False if not. + + If flt is a string, it will be compiled as a filter expression. + If the expression is invalid, ValueError is raised. + """ + if isinstance(flt, six.string_types): + + flt = parse(flt) + if not flt: + raise ValueError("Invalid filter expression.") + if flt: + return flt(flow) + return True + + +help = [] +for i in filter_unary: + help.append( + ("~%s" % i.code, i.help) + ) +for i in filter_rex: + help.append( + ("~%s regex" % i.code, i.help) + ) +for i in filter_int: + help.append( + ("~%s int" % i.code, i.help) + ) +help.sort() +help.extend( + [ + ("!", "unary not"), + ("&", "and"), + ("|", "or"), + ("(...)", "grouping"), + ] +) diff --git a/mitmproxy/models/flow.py b/mitmproxy/models/flow.py index fc673274..f8c0b210 100644 --- a/mitmproxy/models/flow.py +++ b/mitmproxy/models/flow.py @@ -189,20 +189,20 @@ class Flow(stateobject.StateObject): self.reply.commit() master.handle_accept_intercept(self) - def match(self, f): + def match(self, flt): """ - Match this flow against a compiled filter expression. Returns True - if matched, False if not. + Matches a flow against a compiled filter expression. + Returns True if matched, False if not. - If f is a string, it will be compiled as a filter expression. If - the expression is invalid, ValueError is raised. + If flt is a string, it will be compiled as a filter expression. + If the expression is invalid, ValueError is raised. """ - if isinstance(f, six.string_types): - from .. import filt + if isinstance(flt, six.string_types): + from ..flowfilter import parse - f = filt.parse(f) - if not f: + flt = parse(flt) + if not flt: raise ValueError("Invalid filter expression.") - if f: - return f(self) + if flt: + return flt(self) return True diff --git a/mitmproxy/web/app.py b/mitmproxy/web/app.py index 5498c2d9..34969870 100644 --- a/mitmproxy/web/app.py +++ b/mitmproxy/web/app.py @@ -14,7 +14,7 @@ import tornado.web from io import BytesIO from mitmproxy.flow import FlowWriter, FlowReader -from mitmproxy import filt +from mitmproxy import flowfilter from mitmproxy import models from mitmproxy import contentviews from netlib import version @@ -151,11 +151,11 @@ class IndexHandler(RequestHandler): self.render("index.html") -class FiltHelp(RequestHandler): +class FilterHelp(RequestHandler): def get(self): self.write(dict( - commands=filt.help + commands=flowfilter.help )) @@ -434,7 +434,7 @@ class Application(tornado.web.Application): self.master = master handlers = [ (r"/", IndexHandler), - (r"/filter-help", FiltHelp), + (r"/filter-help", FilterHelp), (r"/updates", ClientConnection), (r"/events", Events), (r"/flows", Flows), diff --git a/test/mitmproxy/test_filt.py b/test/mitmproxy/test_filt.py deleted file mode 100644 index 69f042bb..00000000 --- a/test/mitmproxy/test_filt.py +++ /dev/null @@ -1,442 +0,0 @@ -from six.moves import cStringIO as StringIO -from mock import patch - -from mitmproxy import filt - -from . import tutils - - -class TestParsing: - - def _dump(self, x): - c = StringIO() - x.dump(fp=c) - assert c.getvalue() - - def test_parse_err(self): - assert filt.parse("~h [") is None - - def test_simple(self): - assert not filt.parse("~b") - assert filt.parse("~q") - assert filt.parse("~c 10") - assert filt.parse("~m foobar") - assert filt.parse("~u foobar") - assert filt.parse("~q ~c 10") - p = filt.parse("~q ~c 10") - self._dump(p) - assert len(p.lst) == 2 - - def test_naked_url(self): - a = filt.parse("foobar ~h rex") - assert a.lst[0].expr == "foobar" - assert a.lst[1].expr == "rex" - self._dump(a) - - def test_quoting(self): - a = filt.parse("~u 'foo ~u bar' ~u voing") - assert a.lst[0].expr == "foo ~u bar" - assert a.lst[1].expr == "voing" - self._dump(a) - - a = filt.parse("~u foobar") - assert a.expr == "foobar" - - a = filt.parse(r"~u 'foobar\"\''") - assert a.expr == "foobar\"'" - - a = filt.parse(r'~u "foo \'bar"') - assert a.expr == "foo 'bar" - - def test_nesting(self): - a = filt.parse("(~u foobar & ~h voing)") - assert a.lst[0].expr == "foobar" - self._dump(a) - - def test_not(self): - a = filt.parse("!~h test") - assert a.itm.expr == "test" - a = filt.parse("!(~u test & ~h bar)") - assert a.itm.lst[0].expr == "test" - self._dump(a) - - def test_binaryops(self): - a = filt.parse("~u foobar | ~h voing") - isinstance(a, filt.FOr) - self._dump(a) - - a = filt.parse("~u foobar & ~h voing") - isinstance(a, filt.FAnd) - self._dump(a) - - def test_wideops(self): - a = filt.parse("~hq 'header: qvalue'") - assert isinstance(a, filt.FHeadRequest) - self._dump(a) - - -class TestMatchingHTTPFlow: - - def req(self): - return tutils.tflow() - - def resp(self): - return tutils.tflow(resp=True) - - def err(self): - return tutils.tflow(err=True) - - def q(self, q, o): - return filt.parse(q)(o) - - def test_http(self): - s = self.req() - assert self.q("~http", s) - assert not self.q("~tcp", s) - - def test_asset(self): - s = self.resp() - assert not self.q("~a", s) - s.response.headers["content-type"] = "text/javascript" - assert self.q("~a", s) - - def test_fcontenttype(self): - q = self.req() - s = self.resp() - assert not self.q("~t content", q) - assert not self.q("~t content", s) - - q.request.headers["content-type"] = "text/json" - assert self.q("~t json", q) - assert self.q("~tq json", q) - assert not self.q("~ts json", q) - - s.response.headers["content-type"] = "text/json" - assert self.q("~t json", s) - - del s.response.headers["content-type"] - s.request.headers["content-type"] = "text/json" - assert self.q("~t json", s) - assert self.q("~tq json", s) - assert not self.q("~ts json", s) - - def test_freq_fresp(self): - q = self.req() - s = self.resp() - - assert self.q("~q", q) - assert not self.q("~q", s) - - assert not self.q("~s", q) - assert self.q("~s", s) - - def test_ferr(self): - e = self.err() - assert self.q("~e", e) - - def test_head(self): - q = self.req() - s = self.resp() - assert not self.q("~h nonexistent", q) - assert self.q("~h qvalue", q) - assert self.q("~h header", q) - assert self.q("~h 'header: qvalue'", q) - - assert self.q("~h 'header: qvalue'", s) - assert self.q("~h 'header-response: svalue'", s) - - assert self.q("~hq 'header: qvalue'", s) - assert not self.q("~hq 'header-response: svalue'", s) - - assert self.q("~hq 'header: qvalue'", q) - assert not self.q("~hq 'header-request: svalue'", q) - - assert not self.q("~hs 'header: qvalue'", s) - assert self.q("~hs 'header-response: svalue'", s) - assert not self.q("~hs 'header: qvalue'", q) - - def match_body(self, q, s): - assert not self.q("~b nonexistent", q) - assert self.q("~b content", q) - assert self.q("~b message", s) - - assert not self.q("~bq nomatch", s) - assert self.q("~bq content", q) - assert self.q("~bq content", s) - assert not self.q("~bq message", q) - assert not self.q("~bq message", s) - - assert not self.q("~bs nomatch", s) - assert not self.q("~bs content", q) - assert not self.q("~bs content", s) - assert not self.q("~bs message", q) - assert self.q("~bs message", s) - - def test_body(self): - q = self.req() - s = self.resp() - self.match_body(q, s) - - q.request.encode("gzip") - s.request.encode("gzip") - s.response.encode("gzip") - self.match_body(q, s) - - def test_method(self): - q = self.req() - assert self.q("~m get", q) - assert not self.q("~m post", q) - - q.request.method = "oink" - assert not self.q("~m get", q) - - def test_domain(self): - q = self.req() - assert self.q("~d address", q) - assert not self.q("~d none", q) - - def test_url(self): - q = self.req() - s = self.resp() - assert self.q("~u address", q) - assert self.q("~u address:22/path", q) - assert not self.q("~u moo/path", q) - - assert self.q("~u address", s) - assert self.q("~u address:22/path", s) - assert not self.q("~u moo/path", s) - - def test_code(self): - q = self.req() - s = self.resp() - assert not self.q("~c 200", q) - assert self.q("~c 200", s) - assert not self.q("~c 201", s) - - def test_src(self): - q = self.req() - assert self.q("~src address", q) - assert not self.q("~src foobar", q) - assert self.q("~src :22", q) - assert not self.q("~src :99", q) - assert self.q("~src address:22", q) - - def test_dst(self): - q = self.req() - q.server_conn = tutils.tserver_conn() - assert self.q("~dst address", q) - assert not self.q("~dst foobar", q) - assert self.q("~dst :22", q) - assert not self.q("~dst :99", q) - assert self.q("~dst address:22", q) - - def test_and(self): - s = self.resp() - assert self.q("~c 200 & ~h head", s) - assert self.q("~c 200 & ~h head", s) - assert not self.q("~c 200 & ~h nohead", s) - assert self.q("(~c 200 & ~h head) & ~b content", s) - assert not self.q("(~c 200 & ~h head) & ~b nonexistent", s) - assert not self.q("(~c 200 & ~h nohead) & ~b content", s) - - def test_or(self): - s = self.resp() - assert self.q("~c 200 | ~h nohead", s) - assert self.q("~c 201 | ~h head", s) - assert not self.q("~c 201 | ~h nohead", s) - assert self.q("(~c 201 | ~h nohead) | ~s", s) - - def test_not(self): - s = self.resp() - assert not self.q("! ~c 200", s) - assert self.q("! ~c 201", s) - assert self.q("!~c 201 !~c 202", s) - assert not self.q("!~c 201 !~c 200", s) - - -class TestMatchingTCPFlow: - - def flow(self): - return tutils.ttcpflow() - - def err(self): - return tutils.ttcpflow(err=True) - - def q(self, q, o): - return filt.parse(q)(o) - - def test_tcp(self): - f = self.flow() - assert self.q("~tcp", f) - assert not self.q("~http", f) - - def test_ferr(self): - e = self.err() - assert self.q("~e", e) - - def test_body(self): - f = self.flow() - - # Messages sent by client or server - assert self.q("~b hello", f) - assert self.q("~b me", f) - assert not self.q("~b nonexistent", f) - - # Messages sent by client - assert self.q("~bq hello", f) - assert not self.q("~bq me", f) - assert not self.q("~bq nonexistent", f) - - # Messages sent by server - assert self.q("~bs me", f) - assert not self.q("~bs hello", f) - assert not self.q("~bs nonexistent", f) - - def test_src(self): - f = self.flow() - assert self.q("~src address", f) - assert not self.q("~src foobar", f) - assert self.q("~src :22", f) - assert not self.q("~src :99", f) - assert self.q("~src address:22", f) - - def test_dst(self): - f = self.flow() - f.server_conn = tutils.tserver_conn() - assert self.q("~dst address", f) - assert not self.q("~dst foobar", f) - assert self.q("~dst :22", f) - assert not self.q("~dst :99", f) - assert self.q("~dst address:22", f) - - def test_and(self): - f = self.flow() - f.server_conn = tutils.tserver_conn() - assert self.q("~b hello & ~b me", f) - assert not self.q("~src wrongaddress & ~b hello", f) - assert self.q("(~src :22 & ~dst :22) & ~b hello", f) - assert not self.q("(~src address:22 & ~dst :22) & ~b nonexistent", f) - assert not self.q("(~src address:22 & ~dst :99) & ~b hello", f) - - def test_or(self): - f = self.flow() - f.server_conn = tutils.tserver_conn() - assert self.q("~b hello | ~b me", f) - assert self.q("~src :22 | ~b me", f) - assert not self.q("~src :99 | ~dst :99", f) - assert self.q("(~src :22 | ~dst :22) | ~b me", f) - - def test_not(self): - f = self.flow() - assert not self.q("! ~src :22", f) - assert self.q("! ~src :99", f) - assert self.q("!~src :99 !~src :99", f) - assert not self.q("!~src :99 !~src :22", f) - - def test_request(self): - f = self.flow() - assert not self.q("~q", f) - - def test_response(self): - f = self.flow() - assert not self.q("~s", f) - - def test_headers(self): - f = self.flow() - assert not self.q("~h whatever", f) - - # Request headers - assert not self.q("~hq whatever", f) - - # Response headers - assert not self.q("~hs whatever", f) - - def test_content_type(self): - f = self.flow() - assert not self.q("~t whatever", f) - - # Request content-type - assert not self.q("~tq whatever", f) - - # Response content-type - assert not self.q("~ts whatever", f) - - def test_code(self): - f = self.flow() - assert not self.q("~c 200", f) - - def test_domain(self): - f = self.flow() - assert not self.q("~d whatever", f) - - def test_method(self): - f = self.flow() - assert not self.q("~m whatever", f) - - def test_url(self): - f = self.flow() - assert not self.q("~u whatever", f) - - -class TestMatchingDummyFlow: - - def flow(self): - return tutils.tdummyflow() - - def err(self): - return tutils.tdummyflow(err=True) - - def q(self, q, o): - return filt.parse(q)(o) - - def test_filters(self): - e = self.err() - f = self.flow() - f.server_conn = tutils.tserver_conn() - - assert not self.q("~a", f) - - assert not self.q("~b whatever", f) - assert not self.q("~bq whatever", f) - assert not self.q("~bs whatever", f) - - assert not self.q("~c 0", f) - - assert not self.q("~d whatever", f) - - assert self.q("~dst address", f) - assert not self.q("~dst nonexistent", f) - - assert self.q("~e", e) - assert not self.q("~e", f) - - assert not self.q("~http", f) - - assert not self.q("~h whatever", f) - assert not self.q("~hq whatever", f) - assert not self.q("~hs whatever", f) - - assert not self.q("~m whatever", f) - - assert not self.q("~s", f) - - assert self.q("~src address", f) - assert not self.q("~src nonexistent", f) - - assert not self.q("~tcp", f) - - assert not self.q("~t whatever", f) - assert not self.q("~tq whatever", f) - assert not self.q("~ts whatever", f) - - assert not self.q("~u whatever", f) - - assert not self.q("~q", f) - - -@patch('traceback.extract_tb') -def test_pyparsing_bug(extract_tb): - """https://github.com/mitmproxy/mitmproxy/issues/1087""" - # The text is a string with leading and trailing whitespace stripped; if the source is not available it is None. - extract_tb.return_value = [("", 1, "test", None)] - assert filt.parse("test") diff --git a/test/mitmproxy/test_flow.py b/test/mitmproxy/test_flow.py index 0fe45afb..6b24e55a 100644 --- a/test/mitmproxy/test_flow.py +++ b/test/mitmproxy/test_flow.py @@ -3,7 +3,7 @@ import io import netlib.utils from netlib.http import Headers -from mitmproxy import filt, flow, options +from mitmproxy import flowfilter, flow, options from mitmproxy.contrib import tnetstring from mitmproxy.exceptions import FlowReadException, Kill from mitmproxy.models import Error @@ -400,8 +400,8 @@ class TestSerialize: def test_filter(self): sio = io.BytesIO() - fl = filt.parse("~c 200") - w = flow.FilteredFlowWriter(sio, fl) + flt = flowfilter.parse("~c 200") + w = flow.FilteredFlowWriter(sio, flt) f = tutils.tflow(resp=True) f.response.status_code = 200 diff --git a/test/mitmproxy/test_flowfilter.py b/test/mitmproxy/test_flowfilter.py new file mode 100644 index 00000000..e8d19ffa --- /dev/null +++ b/test/mitmproxy/test_flowfilter.py @@ -0,0 +1,442 @@ +from six.moves import cStringIO as StringIO +from mock import patch + +from mitmproxy import flowfilter + +from . import tutils + + +class TestParsing: + + def _dump(self, x): + c = StringIO() + x.dump(fp=c) + assert c.getvalue() + + def test_parse_err(self): + assert flowfilter.parse("~h [") is None + + def test_simple(self): + assert not flowfilter.parse("~b") + assert flowfilter.parse("~q") + assert flowfilter.parse("~c 10") + assert flowfilter.parse("~m foobar") + assert flowfilter.parse("~u foobar") + assert flowfilter.parse("~q ~c 10") + p = flowfilter.parse("~q ~c 10") + self._dump(p) + assert len(p.lst) == 2 + + def test_naked_url(self): + a = flowfilter.parse("foobar ~h rex") + assert a.lst[0].expr == "foobar" + assert a.lst[1].expr == "rex" + self._dump(a) + + def test_quoting(self): + a = flowfilter.parse("~u 'foo ~u bar' ~u voing") + assert a.lst[0].expr == "foo ~u bar" + assert a.lst[1].expr == "voing" + self._dump(a) + + a = flowfilter.parse("~u foobar") + assert a.expr == "foobar" + + a = flowfilter.parse(r"~u 'foobar\"\''") + assert a.expr == "foobar\"'" + + a = flowfilter.parse(r'~u "foo \'bar"') + assert a.expr == "foo 'bar" + + def test_nesting(self): + a = flowfilter.parse("(~u foobar & ~h voing)") + assert a.lst[0].expr == "foobar" + self._dump(a) + + def test_not(self): + a = flowfilter.parse("!~h test") + assert a.itm.expr == "test" + a = flowfilter.parse("!(~u test & ~h bar)") + assert a.itm.lst[0].expr == "test" + self._dump(a) + + def test_binaryops(self): + a = flowfilter.parse("~u foobar | ~h voing") + isinstance(a, flowfilter.FOr) + self._dump(a) + + a = flowfilter.parse("~u foobar & ~h voing") + isinstance(a, flowfilter.FAnd) + self._dump(a) + + def test_wideops(self): + a = flowfilter.parse("~hq 'header: qvalue'") + assert isinstance(a, flowfilter.FHeadRequest) + self._dump(a) + + +class TestMatchingHTTPFlow: + + def req(self): + return tutils.tflow() + + def resp(self): + return tutils.tflow(resp=True) + + def err(self): + return tutils.tflow(err=True) + + def q(self, q, o): + return flowfilter.parse(q)(o) + + def test_http(self): + s = self.req() + assert self.q("~http", s) + assert not self.q("~tcp", s) + + def test_asset(self): + s = self.resp() + assert not self.q("~a", s) + s.response.headers["content-type"] = "text/javascript" + assert self.q("~a", s) + + def test_fcontenttype(self): + q = self.req() + s = self.resp() + assert not self.q("~t content", q) + assert not self.q("~t content", s) + + q.request.headers["content-type"] = "text/json" + assert self.q("~t json", q) + assert self.q("~tq json", q) + assert not self.q("~ts json", q) + + s.response.headers["content-type"] = "text/json" + assert self.q("~t json", s) + + del s.response.headers["content-type"] + s.request.headers["content-type"] = "text/json" + assert self.q("~t json", s) + assert self.q("~tq json", s) + assert not self.q("~ts json", s) + + def test_freq_fresp(self): + q = self.req() + s = self.resp() + + assert self.q("~q", q) + assert not self.q("~q", s) + + assert not self.q("~s", q) + assert self.q("~s", s) + + def test_ferr(self): + e = self.err() + assert self.q("~e", e) + + def test_head(self): + q = self.req() + s = self.resp() + assert not self.q("~h nonexistent", q) + assert self.q("~h qvalue", q) + assert self.q("~h header", q) + assert self.q("~h 'header: qvalue'", q) + + assert self.q("~h 'header: qvalue'", s) + assert self.q("~h 'header-response: svalue'", s) + + assert self.q("~hq 'header: qvalue'", s) + assert not self.q("~hq 'header-response: svalue'", s) + + assert self.q("~hq 'header: qvalue'", q) + assert not self.q("~hq 'header-request: svalue'", q) + + assert not self.q("~hs 'header: qvalue'", s) + assert self.q("~hs 'header-response: svalue'", s) + assert not self.q("~hs 'header: qvalue'", q) + + def match_body(self, q, s): + assert not self.q("~b nonexistent", q) + assert self.q("~b content", q) + assert self.q("~b message", s) + + assert not self.q("~bq nomatch", s) + assert self.q("~bq content", q) + assert self.q("~bq content", s) + assert not self.q("~bq message", q) + assert not self.q("~bq message", s) + + assert not self.q("~bs nomatch", s) + assert not self.q("~bs content", q) + assert not self.q("~bs content", s) + assert not self.q("~bs message", q) + assert self.q("~bs message", s) + + def test_body(self): + q = self.req() + s = self.resp() + self.match_body(q, s) + + q.request.encode("gzip") + s.request.encode("gzip") + s.response.encode("gzip") + self.match_body(q, s) + + def test_method(self): + q = self.req() + assert self.q("~m get", q) + assert not self.q("~m post", q) + + q.request.method = "oink" + assert not self.q("~m get", q) + + def test_domain(self): + q = self.req() + assert self.q("~d address", q) + assert not self.q("~d none", q) + + def test_url(self): + q = self.req() + s = self.resp() + assert self.q("~u address", q) + assert self.q("~u address:22/path", q) + assert not self.q("~u moo/path", q) + + assert self.q("~u address", s) + assert self.q("~u address:22/path", s) + assert not self.q("~u moo/path", s) + + def test_code(self): + q = self.req() + s = self.resp() + assert not self.q("~c 200", q) + assert self.q("~c 200", s) + assert not self.q("~c 201", s) + + def test_src(self): + q = self.req() + assert self.q("~src address", q) + assert not self.q("~src foobar", q) + assert self.q("~src :22", q) + assert not self.q("~src :99", q) + assert self.q("~src address:22", q) + + def test_dst(self): + q = self.req() + q.server_conn = tutils.tserver_conn() + assert self.q("~dst address", q) + assert not self.q("~dst foobar", q) + assert self.q("~dst :22", q) + assert not self.q("~dst :99", q) + assert self.q("~dst address:22", q) + + def test_and(self): + s = self.resp() + assert self.q("~c 200 & ~h head", s) + assert self.q("~c 200 & ~h head", s) + assert not self.q("~c 200 & ~h nohead", s) + assert self.q("(~c 200 & ~h head) & ~b content", s) + assert not self.q("(~c 200 & ~h head) & ~b nonexistent", s) + assert not self.q("(~c 200 & ~h nohead) & ~b content", s) + + def test_or(self): + s = self.resp() + assert self.q("~c 200 | ~h nohead", s) + assert self.q("~c 201 | ~h head", s) + assert not self.q("~c 201 | ~h nohead", s) + assert self.q("(~c 201 | ~h nohead) | ~s", s) + + def test_not(self): + s = self.resp() + assert not self.q("! ~c 200", s) + assert self.q("! ~c 201", s) + assert self.q("!~c 201 !~c 202", s) + assert not self.q("!~c 201 !~c 200", s) + + +class TestMatchingTCPFlow: + + def flow(self): + return tutils.ttcpflow() + + def err(self): + return tutils.ttcpflow(err=True) + + def q(self, q, o): + return flowfilter.parse(q)(o) + + def test_tcp(self): + f = self.flow() + assert self.q("~tcp", f) + assert not self.q("~http", f) + + def test_ferr(self): + e = self.err() + assert self.q("~e", e) + + def test_body(self): + f = self.flow() + + # Messages sent by client or server + assert self.q("~b hello", f) + assert self.q("~b me", f) + assert not self.q("~b nonexistent", f) + + # Messages sent by client + assert self.q("~bq hello", f) + assert not self.q("~bq me", f) + assert not self.q("~bq nonexistent", f) + + # Messages sent by server + assert self.q("~bs me", f) + assert not self.q("~bs hello", f) + assert not self.q("~bs nonexistent", f) + + def test_src(self): + f = self.flow() + assert self.q("~src address", f) + assert not self.q("~src foobar", f) + assert self.q("~src :22", f) + assert not self.q("~src :99", f) + assert self.q("~src address:22", f) + + def test_dst(self): + f = self.flow() + f.server_conn = tutils.tserver_conn() + assert self.q("~dst address", f) + assert not self.q("~dst foobar", f) + assert self.q("~dst :22", f) + assert not self.q("~dst :99", f) + assert self.q("~dst address:22", f) + + def test_and(self): + f = self.flow() + f.server_conn = tutils.tserver_conn() + assert self.q("~b hello & ~b me", f) + assert not self.q("~src wrongaddress & ~b hello", f) + assert self.q("(~src :22 & ~dst :22) & ~b hello", f) + assert not self.q("(~src address:22 & ~dst :22) & ~b nonexistent", f) + assert not self.q("(~src address:22 & ~dst :99) & ~b hello", f) + + def test_or(self): + f = self.flow() + f.server_conn = tutils.tserver_conn() + assert self.q("~b hello | ~b me", f) + assert self.q("~src :22 | ~b me", f) + assert not self.q("~src :99 | ~dst :99", f) + assert self.q("(~src :22 | ~dst :22) | ~b me", f) + + def test_not(self): + f = self.flow() + assert not self.q("! ~src :22", f) + assert self.q("! ~src :99", f) + assert self.q("!~src :99 !~src :99", f) + assert not self.q("!~src :99 !~src :22", f) + + def test_request(self): + f = self.flow() + assert not self.q("~q", f) + + def test_response(self): + f = self.flow() + assert not self.q("~s", f) + + def test_headers(self): + f = self.flow() + assert not self.q("~h whatever", f) + + # Request headers + assert not self.q("~hq whatever", f) + + # Response headers + assert not self.q("~hs whatever", f) + + def test_content_type(self): + f = self.flow() + assert not self.q("~t whatever", f) + + # Request content-type + assert not self.q("~tq whatever", f) + + # Response content-type + assert not self.q("~ts whatever", f) + + def test_code(self): + f = self.flow() + assert not self.q("~c 200", f) + + def test_domain(self): + f = self.flow() + assert not self.q("~d whatever", f) + + def test_method(self): + f = self.flow() + assert not self.q("~m whatever", f) + + def test_url(self): + f = self.flow() + assert not self.q("~u whatever", f) + + +class TestMatchingDummyFlow: + + def flow(self): + return tutils.tdummyflow() + + def err(self): + return tutils.tdummyflow(err=True) + + def q(self, q, o): + return flowfilter.parse(q)(o) + + def test_filters(self): + e = self.err() + f = self.flow() + f.server_conn = tutils.tserver_conn() + + assert not self.q("~a", f) + + assert not self.q("~b whatever", f) + assert not self.q("~bq whatever", f) + assert not self.q("~bs whatever", f) + + assert not self.q("~c 0", f) + + assert not self.q("~d whatever", f) + + assert self.q("~dst address", f) + assert not self.q("~dst nonexistent", f) + + assert self.q("~e", e) + assert not self.q("~e", f) + + assert not self.q("~http", f) + + assert not self.q("~h whatever", f) + assert not self.q("~hq whatever", f) + assert not self.q("~hs whatever", f) + + assert not self.q("~m whatever", f) + + assert not self.q("~s", f) + + assert self.q("~src address", f) + assert not self.q("~src nonexistent", f) + + assert not self.q("~tcp", f) + + assert not self.q("~t whatever", f) + assert not self.q("~tq whatever", f) + assert not self.q("~ts whatever", f) + + assert not self.q("~u whatever", f) + + assert not self.q("~q", f) + + +@patch('traceback.extract_tb') +def test_pyparsing_bug(extract_tb): + """https://github.com/mitmproxy/mitmproxy/issues/1087""" + # The text is a string with leading and trailing whitespace stripped; if the source is not available it is None. + extract_tb.return_value = [("", 1, "test", None)] + assert flowfilter.parse("test") -- cgit v1.2.3