aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--examples/har_extractor.py130
-rw-r--r--libmproxy/console/__init__.py30
-rw-r--r--libmproxy/dump.py11
-rw-r--r--libmproxy/flow.py17
-rw-r--r--libmproxy/web/__init__.py9
-rw-r--r--libmproxy/web/app.py9
-rw-r--r--test/test_dump.py14
7 files changed, 136 insertions, 84 deletions
diff --git a/examples/har_extractor.py b/examples/har_extractor.py
index 531f32aa..5c228ece 100644
--- a/examples/har_extractor.py
+++ b/examples/har_extractor.py
@@ -1,22 +1,18 @@
"""
- This inline script utilizes harparser.HAR from https://github.com/JustusW/harparser
- to generate a HAR log object.
+
+ This inline script utilizes harparser.HAR from
+ https://github.com/JustusW/harparser to generate a HAR log object.
"""
-try:
- from harparser import HAR
- from pytz import UTC
-except ImportError as e:
- import sys
- print >> sys.stderr, "\r\nMissing dependencies: please run `pip install mitmproxy[examples]`.\r\n"
- raise
+from harparser import HAR
-from datetime import datetime, timedelta, tzinfo
+from datetime import datetime
class _HARLog(HAR.log):
- # The attributes need to be registered here for them to actually be available later via self. This is
- # due to HAREncodable linking __getattr__ to __getitem__. Anything that is set only in __init__ will
- # just be added as key/value pair to self.__classes__.
+ # The attributes need to be registered here for them to actually be
+ # available later via self. This is due to HAREncodable linking __getattr__
+ # to __getitem__. Anything that is set only in __init__ will just be added
+ # as key/value pair to self.__classes__.
__page_list__ = []
__page_count__ = 0
__page_ref__ = {}
@@ -58,55 +54,66 @@ class _HARLog(HAR.log):
def start(context, argv):
"""
- On start we create a HARLog instance. You will have to adapt this to suit your actual needs
- of HAR generation. As it will probably be necessary to cluster logs by IPs or reset them
- from time to time.
+ On start we create a HARLog instance. You will have to adapt this to
+ suit your actual needs of HAR generation. As it will probably be
+ necessary to cluster logs by IPs or reset them from time to time.
"""
context.dump_file = None
if len(argv) > 1:
context.dump_file = argv[1]
else:
- raise ValueError('Usage: -s "har_extractor.py filename" '
- '(- will output to stdout, filenames ending with .zhar will result in compressed har)')
+ raise ValueError(
+ 'Usage: -s "har_extractor.py filename" '
+ '(- will output to stdout, filenames ending with .zhar '
+ 'will result in compressed har)'
+ )
context.HARLog = _HARLog(['https://github.com'])
context.seen_server = set()
def response(context, flow):
"""
- Called when a server response has been received. At the time of this message both
- a request and a response are present and completely done.
+ Called when a server response has been received. At the time of this
+ message both a request and a response are present and completely done.
"""
# Values are converted from float seconds to int milliseconds later.
ssl_time = -.001
connect_time = -.001
if flow.server_conn not in context.seen_server:
- # Calculate the connect_time for this server_conn. Afterwards add it to seen list, in
- # order to avoid the connect_time being present in entries that use an existing connection.
+ # Calculate the connect_time for this server_conn. Afterwards add it to
+ # seen list, in order to avoid the connect_time being present in entries
+ # that use an existing connection.
connect_time = flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start
context.seen_server.add(flow.server_conn)
if flow.server_conn.timestamp_ssl_setup is not None:
- # Get the ssl_time for this server_conn as the difference between the start of the successful
- # tcp setup and the successful ssl setup. If no ssl setup has been made it is left as -1 since
- # it doesn't apply to this connection.
+ # Get the ssl_time for this server_conn as the difference between
+ # the start of the successful tcp setup and the successful ssl
+ # setup. If no ssl setup has been made it is left as -1 since it
+ # doesn't apply to this connection.
ssl_time = flow.server_conn.timestamp_ssl_setup - flow.server_conn.timestamp_tcp_setup
- # Calculate the raw timings from the different timestamps present in the request and response object.
- # For lack of a way to measure it dns timings can not be calculated. The same goes for HAR blocked:
- # MITMProxy will open a server connection as soon as it receives the host and port from the client
- # connection. So the time spent waiting is actually spent waiting between request.timestamp_end and
- # response.timestamp_start thus it correlates to HAR wait instead.
- timings_raw = {'send': flow.request.timestamp_end - flow.request.timestamp_start,
- 'wait': flow.response.timestamp_start - flow.request.timestamp_end,
- 'receive': flow.response.timestamp_end - flow.response.timestamp_start,
- 'connect': connect_time,
- 'ssl': ssl_time}
-
- # HAR timings are integers in ms, so we have to re-encode the raw timings to that format.
+ # Calculate the raw timings from the different timestamps present in the
+ # request and response object. For lack of a way to measure it dns timings
+ # can not be calculated. The same goes for HAR blocked: MITMProxy will open
+ # a server connection as soon as it receives the host and port from the
+ # client connection. So the time spent waiting is actually spent waiting
+ # between request.timestamp_end and response.timestamp_start thus it
+ # correlates to HAR wait instead.
+ timings_raw = {
+ 'send': flow.request.timestamp_end - flow.request.timestamp_start,
+ 'wait': flow.response.timestamp_start - flow.request.timestamp_end,
+ 'receive': flow.response.timestamp_end - flow.response.timestamp_start,
+ 'connect': connect_time,
+ 'ssl': ssl_time
+ }
+
+ # HAR timings are integers in ms, so we have to re-encode the raw timings to
+ # that format.
timings = dict([(key, int(1000 * value)) for key, value in timings_raw.iteritems()])
- # The full_time is the sum of all timings. Timings set to -1 will be ignored as per spec.
+ # The full_time is the sum of all timings. Timings set to -1 will be ignored
+ # as per spec.
full_time = 0
for item in timings.values():
if item > -1:
@@ -157,21 +164,30 @@ def response(context, flow):
"cache": {},
"timings": timings, })
- # If the current url is in the page list of context.HARLog or does not have a referrer we add it as a new
- # pages object.
+ # If the current url is in the page list of context.HARLog or does not have
+ # a referrer we add it as a new pages object.
if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get('Referer', None) is None:
page_id = context.HARLog.create_page_id()
- context.HARLog.add(HAR.pages({"startedDateTime": entry['startedDateTime'],
- "id": page_id,
- "title": flow.request.url, }))
+ context.HARLog.add(
+ HAR.pages({
+ "startedDateTime": entry['startedDateTime'],
+ "id": page_id,
+ "title": flow.request.url,
+ })
+ )
context.HARLog.set_page_ref(flow.request.url, page_id)
entry['pageref'] = page_id
- # Lookup the referer in the page_ref of context.HARLog to point this entries pageref attribute to the right
- # pages object, then set it as a new reference to build a reference tree.
+ # Lookup the referer in the page_ref of context.HARLog to point this entries
+ # pageref attribute to the right pages object, then set it as a new
+ # reference to build a reference tree.
elif context.HARLog.get_page_ref(flow.request.headers.get('Referer', (None, ))[0]) is not None:
- entry['pageref'] = context.HARLog.get_page_ref(flow.request.headers['Referer'][0])
- context.HARLog.set_page_ref(flow.request.headers['Referer'][0], entry['pageref'])
+ entry['pageref'] = context.HARLog.get_page_ref(
+ flow.request.headers['Referer'][0]
+ )
+ context.HARLog.set_page_ref(
+ flow.request.headers['Referer'][0], entry['pageref']
+ )
context.HARLog.add(entry)
@@ -186,22 +202,28 @@ def done(context):
json_dump = context.HARLog.json()
compressed_json_dump = context.HARLog.compress()
- print "=" * 100
if context.dump_file == '-':
- pprint(json.loads(json_dump))
+ context.log(pprint.pformat(json.loads(json_dump)))
elif context.dump_file.endswith('.zhar'):
file(context.dump_file, "w").write(compressed_json_dump)
else:
file(context.dump_file, "w").write(json_dump)
- print "=" * 100
- print "HAR log finished with %s bytes (%s bytes compressed)" % (len(json_dump), len(compressed_json_dump))
- print "Compression rate is %s%%" % str(100. * len(compressed_json_dump) / len(json_dump))
- print "=" * 100
+ context.log(
+ "HAR log finished with %s bytes (%s bytes compressed)" % (
+ len(json_dump), len(compressed_json_dump)
+ )
+ )
+ context.log(
+ "Compression rate is %s%%" % str(
+ 100. * len(compressed_json_dump) / len(json_dump)
+ )
+ )
def print_attributes(obj, filter_string=None, hide_privates=False):
"""
- Useful helper method to quickly get all attributes of an object and its values.
+ Useful helper method to quickly get all attributes of an object and its
+ values.
"""
for attr in dir(obj):
if hide_privates and "__" in attr:
diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py
index aa277a48..f5b6a2a6 100644
--- a/libmproxy/console/__init__.py
+++ b/libmproxy/console/__init__.py
@@ -599,13 +599,20 @@ class ConsoleMaster(flow.FlowMaster):
self.view_flowlist()
- self.server.start_slave(controller.Slave, controller.Channel(self.masterq, self.should_exit))
+ self.server.start_slave(
+ controller.Slave,
+ controller.Channel(self.masterq, self.should_exit)
+ )
if self.options.rfile:
- ret = self.load_flows(self.options.rfile)
+ ret = self.load_flows_path(self.options.rfile)
if ret and self.state.flow_count():
- self.add_event("File truncated or corrupted. Loaded as many flows as possible.","error")
- elif not self.state.flow_count():
+ self.add_event(
+ "File truncated or corrupted. "
+ "Loaded as many flows as possible.",
+ "error"
+ )
+ elif ret and not self.state.flow_count():
self.shutdown()
print >> sys.stderr, "Could not load file:", ret
sys.exit(1)
@@ -700,23 +707,16 @@ class ConsoleMaster(flow.FlowMaster):
def load_flows_callback(self, path):
if not path:
return
- ret = self.load_flows(path)
+ ret = self.load_flows_path(path)
return ret or "Flows loaded from %s"%path
- def load_flows(self, path):
+ def load_flows_path(self, path):
self.state.last_saveload = path
- path = os.path.expanduser(path)
- try:
- f = file(path, "rb")
- fr = flow.FlowReader(f)
- except IOError, v:
- return v.strerror
reterr = None
try:
- flow.FlowMaster.load_flows(self, fr)
+ flow.FlowMaster.load_flows_file(self, path)
except flow.FlowReadError, v:
- reterr = v.strerror
- f.close()
+ reterr = str(v)
if self.flow_list_walker:
self.sync_list_view()
return reterr
diff --git a/libmproxy/dump.py b/libmproxy/dump.py
index 731592dc..91ccb42b 100644
--- a/libmproxy/dump.py
+++ b/libmproxy/dump.py
@@ -134,16 +134,11 @@ class DumpMaster(flow.FlowMaster):
raise DumpError(err)
if options.rfile:
- path = os.path.expanduser(options.rfile)
try:
- f = file(path, "rb")
- freader = flow.FlowReader(f)
- except IOError, v:
- raise DumpError(v.strerror)
- try:
- self.load_flows(freader)
+ self.load_flows_file(options.rfile)
except flow.FlowReadError, v:
- self.add_event("Flow file corrupted. Stopped loading.", "error")
+ self.add_event("Flow file corrupted.", "error")
+ raise DumpError(v)
if self.o.app:
self.start_app(self.o.app_host, self.o.app_port)
diff --git a/libmproxy/flow.py b/libmproxy/flow.py
index 58b4604c..f3b138e2 100644
--- a/libmproxy/flow.py
+++ b/libmproxy/flow.py
@@ -6,6 +6,7 @@ from abc import abstractmethod, ABCMeta
import hashlib
import Cookie
import cookielib
+import os
import re
from netlib import odict, wsgi
import netlib.http
@@ -785,8 +786,20 @@ class FlowMaster(controller.Master):
"""
Load flows from a FlowReader object.
"""
+ cnt = 0
for i in fr.stream():
+ cnt += 1
self.load_flow(i)
+ return cnt
+
+ def load_flows_file(self, path):
+ path = os.path.expanduser(path)
+ try:
+ f = file(path, "rb")
+ freader = FlowReader(f)
+ except IOError, v:
+ raise FlowReadError(v.strerror)
+ return self.load_flows(freader)
def process_new_request(self, f):
if self.stickycookie_state:
@@ -961,7 +974,9 @@ class FlowReader:
data = tnetstring.load(self.fo)
if tuple(data["version"][:2]) != version.IVERSION[:2]:
v = ".".join(str(i) for i in data["version"])
- raise FlowReadError("Incompatible serialized data version: %s" % v)
+ raise FlowReadError(
+ "Incompatible serialized data version: %s" % v
+ )
off = self.fo.tell()
yield handle.protocols[data["type"]]["flow"].from_state(data)
except ValueError, v:
diff --git a/libmproxy/web/__init__.py b/libmproxy/web/__init__.py
index d981ab30..4d4d6c7c 100644
--- a/libmproxy/web/__init__.py
+++ b/libmproxy/web/__init__.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function
import collections
import tornado.ioloop
import tornado.httpserver
+import os
from .. import controller, flow
from . import app
@@ -124,6 +125,14 @@ class WebMaster(flow.FlowMaster):
self.options = options
super(WebMaster, self).__init__(server, WebState())
self.app = app.Application(self, self.options.wdebug)
+ if options.rfile:
+ try:
+ print(self.load_flows_file(options.rfile))
+ except flow.FlowReadError, v:
+ self.add_event(
+ "Could not read flow file: %s"%v,
+ "error"
+ )
def tick(self):
flow.FlowMaster.tick(self, self.masterq, timeout=0)
diff --git a/libmproxy/web/app.py b/libmproxy/web/app.py
index 31cbf2e2..8598acf5 100644
--- a/libmproxy/web/app.py
+++ b/libmproxy/web/app.py
@@ -18,9 +18,12 @@ class RequestHandler(tornado.web.RequestHandler):
self.set_header("X-Frame-Options", "DENY")
self.add_header("X-XSS-Protection", "1; mode=block")
self.add_header("X-Content-Type-Options", "nosniff")
- self.add_header("Content-Security-Policy", "default-src 'self'; "
- "connect-src 'self' ws://* ; "
- "style-src 'self' 'unsafe-inline'")
+ self.add_header(
+ "Content-Security-Policy",
+ "default-src 'self'; "
+ "connect-src 'self' ws://* ; "
+ "style-src 'self' 'unsafe-inline'"
+ )
@property
def state(self):
diff --git a/test/test_dump.py b/test/test_dump.py
index aa91d262..927b5b50 100644
--- a/test/test_dump.py
+++ b/test/test_dump.py
@@ -99,15 +99,23 @@ class TestDumpMaster:
with tutils.tmpdir() as t:
p = os.path.join(t, "read")
self._flowfile(p)
- assert "GET" in self._dummy_cycle(0, None, "", flow_detail=1, rfile=p)
+ assert "GET" in self._dummy_cycle(
+ 0,
+ None,
+ "",
+ flow_detail=1,
+ rfile=p
+ )
tutils.raises(
dump.DumpError, self._dummy_cycle,
0, None, "", verbosity=1, rfile="/nonexistent"
)
+ tutils.raises(
+ dump.DumpError, self._dummy_cycle,
+ 0, None, "", verbosity=1, rfile="test_dump.py"
+ )
- # We now just ignore errors
- self._dummy_cycle(0, None, "", verbosity=1, rfile=tutils.test_data.path("test_dump.py"))
def test_options(self):
o = dump.Options(verbosity = 2)