diff options
-rw-r--r-- | examples/har_extractor.py | 130 | ||||
-rw-r--r-- | libmproxy/console/__init__.py | 30 | ||||
-rw-r--r-- | libmproxy/dump.py | 11 | ||||
-rw-r--r-- | libmproxy/flow.py | 17 | ||||
-rw-r--r-- | libmproxy/web/__init__.py | 9 | ||||
-rw-r--r-- | libmproxy/web/app.py | 9 | ||||
-rw-r--r-- | test/test_dump.py | 14 |
7 files changed, 136 insertions, 84 deletions
diff --git a/examples/har_extractor.py b/examples/har_extractor.py index 531f32aa..5c228ece 100644 --- a/examples/har_extractor.py +++ b/examples/har_extractor.py @@ -1,22 +1,18 @@ """ - This inline script utilizes harparser.HAR from https://github.com/JustusW/harparser - to generate a HAR log object. + + This inline script utilizes harparser.HAR from + https://github.com/JustusW/harparser to generate a HAR log object. """ -try: - from harparser import HAR - from pytz import UTC -except ImportError as e: - import sys - print >> sys.stderr, "\r\nMissing dependencies: please run `pip install mitmproxy[examples]`.\r\n" - raise +from harparser import HAR -from datetime import datetime, timedelta, tzinfo +from datetime import datetime class _HARLog(HAR.log): - # The attributes need to be registered here for them to actually be available later via self. This is - # due to HAREncodable linking __getattr__ to __getitem__. Anything that is set only in __init__ will - # just be added as key/value pair to self.__classes__. + # The attributes need to be registered here for them to actually be + # available later via self. This is due to HAREncodable linking __getattr__ + # to __getitem__. Anything that is set only in __init__ will just be added + # as key/value pair to self.__classes__. __page_list__ = [] __page_count__ = 0 __page_ref__ = {} @@ -58,55 +54,66 @@ class _HARLog(HAR.log): def start(context, argv): """ - On start we create a HARLog instance. You will have to adapt this to suit your actual needs - of HAR generation. As it will probably be necessary to cluster logs by IPs or reset them - from time to time. + On start we create a HARLog instance. You will have to adapt this to + suit your actual needs of HAR generation. As it will probably be + necessary to cluster logs by IPs or reset them from time to time. """ context.dump_file = None if len(argv) > 1: context.dump_file = argv[1] else: - raise ValueError('Usage: -s "har_extractor.py filename" ' - '(- will output to stdout, filenames ending with .zhar will result in compressed har)') + raise ValueError( + 'Usage: -s "har_extractor.py filename" ' + '(- will output to stdout, filenames ending with .zhar ' + 'will result in compressed har)' + ) context.HARLog = _HARLog(['https://github.com']) context.seen_server = set() def response(context, flow): """ - Called when a server response has been received. At the time of this message both - a request and a response are present and completely done. + Called when a server response has been received. At the time of this + message both a request and a response are present and completely done. """ # Values are converted from float seconds to int milliseconds later. ssl_time = -.001 connect_time = -.001 if flow.server_conn not in context.seen_server: - # Calculate the connect_time for this server_conn. Afterwards add it to seen list, in - # order to avoid the connect_time being present in entries that use an existing connection. + # Calculate the connect_time for this server_conn. Afterwards add it to + # seen list, in order to avoid the connect_time being present in entries + # that use an existing connection. connect_time = flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start context.seen_server.add(flow.server_conn) if flow.server_conn.timestamp_ssl_setup is not None: - # Get the ssl_time for this server_conn as the difference between the start of the successful - # tcp setup and the successful ssl setup. If no ssl setup has been made it is left as -1 since - # it doesn't apply to this connection. + # Get the ssl_time for this server_conn as the difference between + # the start of the successful tcp setup and the successful ssl + # setup. If no ssl setup has been made it is left as -1 since it + # doesn't apply to this connection. ssl_time = flow.server_conn.timestamp_ssl_setup - flow.server_conn.timestamp_tcp_setup - # Calculate the raw timings from the different timestamps present in the request and response object. - # For lack of a way to measure it dns timings can not be calculated. The same goes for HAR blocked: - # MITMProxy will open a server connection as soon as it receives the host and port from the client - # connection. So the time spent waiting is actually spent waiting between request.timestamp_end and - # response.timestamp_start thus it correlates to HAR wait instead. - timings_raw = {'send': flow.request.timestamp_end - flow.request.timestamp_start, - 'wait': flow.response.timestamp_start - flow.request.timestamp_end, - 'receive': flow.response.timestamp_end - flow.response.timestamp_start, - 'connect': connect_time, - 'ssl': ssl_time} - - # HAR timings are integers in ms, so we have to re-encode the raw timings to that format. + # Calculate the raw timings from the different timestamps present in the + # request and response object. For lack of a way to measure it dns timings + # can not be calculated. The same goes for HAR blocked: MITMProxy will open + # a server connection as soon as it receives the host and port from the + # client connection. So the time spent waiting is actually spent waiting + # between request.timestamp_end and response.timestamp_start thus it + # correlates to HAR wait instead. + timings_raw = { + 'send': flow.request.timestamp_end - flow.request.timestamp_start, + 'wait': flow.response.timestamp_start - flow.request.timestamp_end, + 'receive': flow.response.timestamp_end - flow.response.timestamp_start, + 'connect': connect_time, + 'ssl': ssl_time + } + + # HAR timings are integers in ms, so we have to re-encode the raw timings to + # that format. timings = dict([(key, int(1000 * value)) for key, value in timings_raw.iteritems()]) - # The full_time is the sum of all timings. Timings set to -1 will be ignored as per spec. + # The full_time is the sum of all timings. Timings set to -1 will be ignored + # as per spec. full_time = 0 for item in timings.values(): if item > -1: @@ -157,21 +164,30 @@ def response(context, flow): "cache": {}, "timings": timings, }) - # If the current url is in the page list of context.HARLog or does not have a referrer we add it as a new - # pages object. + # If the current url is in the page list of context.HARLog or does not have + # a referrer we add it as a new pages object. if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get('Referer', None) is None: page_id = context.HARLog.create_page_id() - context.HARLog.add(HAR.pages({"startedDateTime": entry['startedDateTime'], - "id": page_id, - "title": flow.request.url, })) + context.HARLog.add( + HAR.pages({ + "startedDateTime": entry['startedDateTime'], + "id": page_id, + "title": flow.request.url, + }) + ) context.HARLog.set_page_ref(flow.request.url, page_id) entry['pageref'] = page_id - # Lookup the referer in the page_ref of context.HARLog to point this entries pageref attribute to the right - # pages object, then set it as a new reference to build a reference tree. + # Lookup the referer in the page_ref of context.HARLog to point this entries + # pageref attribute to the right pages object, then set it as a new + # reference to build a reference tree. elif context.HARLog.get_page_ref(flow.request.headers.get('Referer', (None, ))[0]) is not None: - entry['pageref'] = context.HARLog.get_page_ref(flow.request.headers['Referer'][0]) - context.HARLog.set_page_ref(flow.request.headers['Referer'][0], entry['pageref']) + entry['pageref'] = context.HARLog.get_page_ref( + flow.request.headers['Referer'][0] + ) + context.HARLog.set_page_ref( + flow.request.headers['Referer'][0], entry['pageref'] + ) context.HARLog.add(entry) @@ -186,22 +202,28 @@ def done(context): json_dump = context.HARLog.json() compressed_json_dump = context.HARLog.compress() - print "=" * 100 if context.dump_file == '-': - pprint(json.loads(json_dump)) + context.log(pprint.pformat(json.loads(json_dump))) elif context.dump_file.endswith('.zhar'): file(context.dump_file, "w").write(compressed_json_dump) else: file(context.dump_file, "w").write(json_dump) - print "=" * 100 - print "HAR log finished with %s bytes (%s bytes compressed)" % (len(json_dump), len(compressed_json_dump)) - print "Compression rate is %s%%" % str(100. * len(compressed_json_dump) / len(json_dump)) - print "=" * 100 + context.log( + "HAR log finished with %s bytes (%s bytes compressed)" % ( + len(json_dump), len(compressed_json_dump) + ) + ) + context.log( + "Compression rate is %s%%" % str( + 100. * len(compressed_json_dump) / len(json_dump) + ) + ) def print_attributes(obj, filter_string=None, hide_privates=False): """ - Useful helper method to quickly get all attributes of an object and its values. + Useful helper method to quickly get all attributes of an object and its + values. """ for attr in dir(obj): if hide_privates and "__" in attr: diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py index aa277a48..f5b6a2a6 100644 --- a/libmproxy/console/__init__.py +++ b/libmproxy/console/__init__.py @@ -599,13 +599,20 @@ class ConsoleMaster(flow.FlowMaster): self.view_flowlist() - self.server.start_slave(controller.Slave, controller.Channel(self.masterq, self.should_exit)) + self.server.start_slave( + controller.Slave, + controller.Channel(self.masterq, self.should_exit) + ) if self.options.rfile: - ret = self.load_flows(self.options.rfile) + ret = self.load_flows_path(self.options.rfile) if ret and self.state.flow_count(): - self.add_event("File truncated or corrupted. Loaded as many flows as possible.","error") - elif not self.state.flow_count(): + self.add_event( + "File truncated or corrupted. " + "Loaded as many flows as possible.", + "error" + ) + elif ret and not self.state.flow_count(): self.shutdown() print >> sys.stderr, "Could not load file:", ret sys.exit(1) @@ -700,23 +707,16 @@ class ConsoleMaster(flow.FlowMaster): def load_flows_callback(self, path): if not path: return - ret = self.load_flows(path) + ret = self.load_flows_path(path) return ret or "Flows loaded from %s"%path - def load_flows(self, path): + def load_flows_path(self, path): self.state.last_saveload = path - path = os.path.expanduser(path) - try: - f = file(path, "rb") - fr = flow.FlowReader(f) - except IOError, v: - return v.strerror reterr = None try: - flow.FlowMaster.load_flows(self, fr) + flow.FlowMaster.load_flows_file(self, path) except flow.FlowReadError, v: - reterr = v.strerror - f.close() + reterr = str(v) if self.flow_list_walker: self.sync_list_view() return reterr diff --git a/libmproxy/dump.py b/libmproxy/dump.py index 731592dc..91ccb42b 100644 --- a/libmproxy/dump.py +++ b/libmproxy/dump.py @@ -134,16 +134,11 @@ class DumpMaster(flow.FlowMaster): raise DumpError(err) if options.rfile: - path = os.path.expanduser(options.rfile) try: - f = file(path, "rb") - freader = flow.FlowReader(f) - except IOError, v: - raise DumpError(v.strerror) - try: - self.load_flows(freader) + self.load_flows_file(options.rfile) except flow.FlowReadError, v: - self.add_event("Flow file corrupted. Stopped loading.", "error") + self.add_event("Flow file corrupted.", "error") + raise DumpError(v) if self.o.app: self.start_app(self.o.app_host, self.o.app_port) diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 58b4604c..f3b138e2 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -6,6 +6,7 @@ from abc import abstractmethod, ABCMeta import hashlib import Cookie import cookielib +import os import re from netlib import odict, wsgi import netlib.http @@ -785,8 +786,20 @@ class FlowMaster(controller.Master): """ Load flows from a FlowReader object. """ + cnt = 0 for i in fr.stream(): + cnt += 1 self.load_flow(i) + return cnt + + def load_flows_file(self, path): + path = os.path.expanduser(path) + try: + f = file(path, "rb") + freader = FlowReader(f) + except IOError, v: + raise FlowReadError(v.strerror) + return self.load_flows(freader) def process_new_request(self, f): if self.stickycookie_state: @@ -961,7 +974,9 @@ class FlowReader: data = tnetstring.load(self.fo) if tuple(data["version"][:2]) != version.IVERSION[:2]: v = ".".join(str(i) for i in data["version"]) - raise FlowReadError("Incompatible serialized data version: %s" % v) + raise FlowReadError( + "Incompatible serialized data version: %s" % v + ) off = self.fo.tell() yield handle.protocols[data["type"]]["flow"].from_state(data) except ValueError, v: diff --git a/libmproxy/web/__init__.py b/libmproxy/web/__init__.py index d981ab30..4d4d6c7c 100644 --- a/libmproxy/web/__init__.py +++ b/libmproxy/web/__init__.py @@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function import collections import tornado.ioloop import tornado.httpserver +import os from .. import controller, flow from . import app @@ -124,6 +125,14 @@ class WebMaster(flow.FlowMaster): self.options = options super(WebMaster, self).__init__(server, WebState()) self.app = app.Application(self, self.options.wdebug) + if options.rfile: + try: + print(self.load_flows_file(options.rfile)) + except flow.FlowReadError, v: + self.add_event( + "Could not read flow file: %s"%v, + "error" + ) def tick(self): flow.FlowMaster.tick(self, self.masterq, timeout=0) diff --git a/libmproxy/web/app.py b/libmproxy/web/app.py index 31cbf2e2..8598acf5 100644 --- a/libmproxy/web/app.py +++ b/libmproxy/web/app.py @@ -18,9 +18,12 @@ class RequestHandler(tornado.web.RequestHandler): self.set_header("X-Frame-Options", "DENY") self.add_header("X-XSS-Protection", "1; mode=block") self.add_header("X-Content-Type-Options", "nosniff") - self.add_header("Content-Security-Policy", "default-src 'self'; " - "connect-src 'self' ws://* ; " - "style-src 'self' 'unsafe-inline'") + self.add_header( + "Content-Security-Policy", + "default-src 'self'; " + "connect-src 'self' ws://* ; " + "style-src 'self' 'unsafe-inline'" + ) @property def state(self): diff --git a/test/test_dump.py b/test/test_dump.py index aa91d262..927b5b50 100644 --- a/test/test_dump.py +++ b/test/test_dump.py @@ -99,15 +99,23 @@ class TestDumpMaster: with tutils.tmpdir() as t: p = os.path.join(t, "read") self._flowfile(p) - assert "GET" in self._dummy_cycle(0, None, "", flow_detail=1, rfile=p) + assert "GET" in self._dummy_cycle( + 0, + None, + "", + flow_detail=1, + rfile=p + ) tutils.raises( dump.DumpError, self._dummy_cycle, 0, None, "", verbosity=1, rfile="/nonexistent" ) + tutils.raises( + dump.DumpError, self._dummy_cycle, + 0, None, "", verbosity=1, rfile="test_dump.py" + ) - # We now just ignore errors - self._dummy_cycle(0, None, "", verbosity=1, rfile=tutils.test_data.path("test_dump.py")) def test_options(self): o = dump.Options(verbosity = 2) |