diff options
Diffstat (limited to 'libmproxy')
-rw-r--r-- | libmproxy/playback.py | 129 | ||||
-rw-r--r-- | libmproxy/proxy.py | 19 | ||||
-rw-r--r-- | libmproxy/record.py | 68 | ||||
-rw-r--r-- | libmproxy/recorder.py | 273 |
4 files changed, 486 insertions, 3 deletions
diff --git a/libmproxy/playback.py b/libmproxy/playback.py new file mode 100644 index 00000000..920b2e0c --- /dev/null +++ b/libmproxy/playback.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom <henrik@henriknordstrom.net> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Alternatively you may use this file under a GPLv3 license as follows: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import sys +import controller +import utils +import proxy +import recorder + +class PlaybackMaster(controller.Master): + """ + A simple master that plays back recorded responses. + """ + def __init__(self, server, options): + self.verbosity = options.verbose + self.store = recorder.Recorder(options) + controller.Master.__init__(self, server) + + def run(self): + try: + return controller.Master.run(self) + except KeyboardInterrupt: + self.shutdown() + + def process_missing_response(self, request): + response = None + print >> sys.stderr, self.store.normalize_request(request).assemble_proxy() + print >> sys.stderr, "Actions:" + print >> sys.stderr, " q Quit" + print >> sys.stderr, " a(dd) Add pattern rule" + print >> sys.stderr, " A(dd) Add pattern rule (forced)" + print >> sys.stderr, " e(rror) respond with a 404 error" + print >> sys.stderr, " k(ill) kill the request, empty response" + print >> sys.stderr, " f(orward) forward the request to the requested server and cache response" + command = raw_input("Action: ") + command = command[:1] + if command == 'q': + self.shutdown() + return None + elif command == 'a' or command == 'A': + filt = raw_input("Filter: ") + search = raw_input("Search pattern: ") + replace = raw_input("Replacement string: ") + self.store.add_rule(filt, search, replace) + if command == 'A': + self.store.save_rule(filt, search, replace) + elif command == 'e': + return proxy.Response(request, "404", "Not found", utils.Headers(), "Not found") + elif command == 'k': + return None + elif command == 'f': + return request + else: + print >> sys.stderr, "ERROR: Unknown command" + return self.process_missing_response(request) + try: + response = self.store.get_response(request) + if command == 'a': + self.store.save_rule(filt, search, replace) + except proxy.ProxyError: + print >> sys.stderr, "ERROR: Malformed substitution rule" + self.store.forget_last_rule() + response = self.process_missing_response(request) + except IOError: + print >> sys.stderr, "NOTICE: Response still not found" + if command == 'a': + self.store.forget_last_rule() + response = self.process_missing_response(request) + return response + + def handle_request(self, msg): + request = msg + try: + response = self.store.get_response(request) + except IOError: + if self.verbosity > 0: + print >> sys.stderr, ">>", + print >> sys.stderr, request.short() + print >> sys.stderr, "<<", + print >> sys.stderr, "ERROR: No matching response.", + print >> sys.stderr, ",".join(self.store.cookies) + response = self.process_missing_response(msg) + msg.ack(response) + + def handle_response(self, msg): + request = msg.request + response = msg + if self.verbosity > 0: + print >> sys.stderr, ">>", + print >> sys.stderr, request.short() + print >> sys.stderr, "<<", + print >> sys.stderr, response.short() + if not response.is_cached(): + self.store.save_response(response) + msg.ack(self.store.filter_response(msg)) diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py index e3eace3b..631e2470 100644 --- a/libmproxy/proxy.py +++ b/libmproxy/proxy.py @@ -124,6 +124,7 @@ def parse_request_line(request): class Request(controller.Msg): FMT = '%s %s HTTP/1.1\r\n%s\r\n%s' + FMT_PROXY = '%s %s://%s:%s%s HTTP/1.1\r\n%s\r\n%s' def __init__(self, client_conn, host, port, scheme, method, path, headers, content, timestamp=None): self.client_conn = client_conn self.host, self.port, self.scheme = host, port, scheme @@ -132,6 +133,9 @@ class Request(controller.Msg): self.close = False controller.Msg.__init__(self) + def is_cached(self): + return False + def get_state(self): return dict( host = self.host, @@ -189,7 +193,10 @@ class Request(controller.Msg): def short(self): return "%s %s"%(self.method, self.url()) - def assemble(self): + def assemble_proxy(self): + return self.assemble(True) + + def assemble(self, _proxy = False): """ Assembles the request for transmission to the server. We make some modifications to make sure interception works properly. @@ -210,8 +217,10 @@ class Request(controller.Msg): content = "" if self.close: headers["connection"] = ["close"] - data = (self.method, self.path, str(headers), content) - return self.FMT%data + if not _proxy: + return self.FMT % (self.method, self.path, str(headers), content) + else: + return self.FMT_PROXY % (self.method, self.scheme, self.host, self.port, self.path, str(headers), content) class Response(controller.Msg): @@ -221,6 +230,7 @@ class Response(controller.Msg): self.code, self.msg = code, msg self.headers, self.content = headers, content self.timestamp = timestamp or time.time() + self.cached = False controller.Msg.__init__(self) def get_state(self): @@ -256,6 +266,9 @@ class Response(controller.Msg): def is_response(self): return True + def is_cached(self): + return self.cached + def short(self): return "%s %s"%(self.code, self.msg) diff --git a/libmproxy/record.py b/libmproxy/record.py new file mode 100644 index 00000000..d32c8711 --- /dev/null +++ b/libmproxy/record.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom <henrik@henriknordstrom.net> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Alternatively you may use this file under a GPLv3 license as follows: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import sys +import controller +import utils +import recorder + +class RecordMaster(controller.Master): + """ + A simple master that just records to files. + """ + def __init__(self, server, options): + self.verbosity = options.verbose + self.store = recorder.Recorder(options) + controller.Master.__init__(self, server) + + def run(self): + try: + return controller.Master.run(self) + except KeyboardInterrupt: + self.shutdown() + + def handle_request(self, msg): + msg.ack(self.store.filter_request(msg)) + + def handle_response(self, msg): + if self.verbosity > 0: + print >> sys.stderr, ">>", + print >> sys.stderr, msg.request.short() + print >> sys.stderr, "<<", + print >> sys.stderr, msg.short() + self.store.save_response(msg) + msg.ack(self.store.filter_response(msg)) diff --git a/libmproxy/recorder.py b/libmproxy/recorder.py new file mode 100644 index 00000000..51c8a6e0 --- /dev/null +++ b/libmproxy/recorder.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom <henrik@henriknordstrom.net> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Alternatively you may use this file under a GPLv3 license as follows: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import sys +import time +import hashlib +import utils +import proxy +import collections +import itertools +import string +import Cookie +import filt +import re +import cStringIO + +def constant_factory(value): + return itertools.repeat(value).next + +class PatternRule: + """ + Request pattern rule + :_ivar _match filt pattern rule + :_ivar _search Regex pattern to search for + :_ivar _replace Replacement string + """ + def __init__(self, pattern, search, replace): + self.match = filt.parse(pattern) + self.search = re.compile(search) + self.replace = replace + def execute(self, request, text): + if self.match and not self.match(request): + return text + return re.sub(self.search, self.replace, text) + +class RecorderConnection(proxy.ServerConnection): + """ + Simulated ServerConnection connecting to the cache + """ + # Note: This may chane in future. Division between Recorder + # and RecorderConnection is not yet finalized + def __init__(self, request, fp): + self.host = request.host + self.port = request.port + self.scheme = request.scheme + self.close = False + self.server = fp + self.rfile = fp + self.wfile = fp + + def send_request(self, request): + self.request = request + + def read_response(self): + response = proxy.ServerConnection.read_response(self) + response.cached = True + return response + +class Recorder: + """ + A simple record/playback cache + """ + def __init__(self, options): + self.sequence = collections.defaultdict(int) + self.cookies = {} + try: + for cookie in options.cookies: + self.cookies[cookie] = True + except AttributeError: pass + self.verbosity = options.verbose + self.storedir = options.cache + self.patterns = [] + self.indexfp = None + self.reset_config() + + def reset_config(self): + self.patterns = [] + self.load_config("default") + + def add_rule(self, match, search, replace): + self.patterns.append(PatternRule(match, search, replace)) + + def forget_last_rule(self): + self.patterns.pop() + + def save_rule(self, match, search, replace, configfile = "default"): + fp = self.open(configfile + ".cfg", "a") + print >> fp, "Condition: " + match + print >> fp, "Search: " + search + print >> fp, "Replace: " + replace + fp.close() + + def load_config(self, name): + """ + Load configuration settings from name + """ + try: + file = name + ".cfg" + if self.verbosity > 2: + print >> sys.stderr, "config: " + file + fp = self.open(file, "r") + except IOError: + return False + for line in fp: + directive, value = line.split(" ", 1) + value = value.strip("\r\n") + if directive == "Cookie:": + self.cookies[value] = True + if directive == "Condition:": + match = value + if directive == "Search:": + search = value + if directive == "Replace:": + self.add_rule(match, search, value) + fp.close() + return True + + def filter_request(self, request): + """ + Filter forwarded requests to enable better recording + """ + request = request.copy() + headers = request.headers + utils.try_del(headers, 'if-modified-since') + utils.try_del(headers, 'if-none-match') + return request + + def normalize_request(self, request): + """ + Filter request to simplify storage matching + """ + request.close = False + req_text = request.assemble_proxy() + orig_req_text = req_text + for pattern in self.patterns: + req_text = pattern.execute(request, req_text) + if req_text == orig_req_text: + return request + fp = cStringIO.StringIO(req_text) + request_line = fp.readline() + method, scheme, host, port, path, httpminor = proxy.parse_request_line(request_line) + headers = utils.Headers() + headers.read(fp) + if request.content is None: + content = None + else: + content = fp.read() + return proxy.Request(request.client_conn, host, port, scheme, method, path, headers, content) + + def open(self, path, mode): + return open(self.storedir + "/" + path, mode) + + def pathn(self, request): + """ + Create cache file name and sequence number + """ + request = self.normalize_request(request) + request = self.filter_request(request) + headers = request.headers + urlkey = (request.host + request.path)[:80].translate(string.maketrans(":/?","__.")) + id = "" + if headers.has_key("cookie"): + cookies = Cookie.SimpleCookie("; ".join(headers["cookie"])) + del headers["cookie"] + for key, morsel in cookies.iteritems(): + if self.cookies.has_key(key): + id = id + key + "=" + morsel.value + "\n" + if self.verbosity > 1: + print >> sys.stderr, "ID: " + id + m = hashlib.sha224(id) + req_text = request.assemble_proxy() + if self.verbosity > 2: + print >> sys.stderr, req_text + m.update(req_text) + path = urlkey+"."+m.hexdigest() + n = str(self.sequence[path]) + if self.verbosity > 1: + print >> sys.stderr, "PATH: " + path + "." + n + return path, n + + def filter_response(self, response): + if response.headers.has_key('set-cookie'): + for header in response.headers['set-cookie']: + key = header.split('=',1)[0] + self.cookies[key] = True + return response + + def save_response(self, response): + """ + Save response for later playback + """ + + if self.indexfp is None: + self.indexfp = self.open("index.txt", "a") + try: + cfg = self.open("default.cfg", "r") + except: + cfg = self.open("default.cfg", "w") + for cookie in iter(self.cookies): + print >> cfg, "Cookie: " + cookie + cfg.close() + request = response.request + req_text = request.assemble_proxy() + resp_text = response.assemble() + path, n = self.pathn(request) + self.sequence[path] += 1 + + f = self.open(path+"."+n+".req", 'w') + f.write(req_text) + f.close() + f = self.open(path+"."+n+".resp", 'w') + f.write(resp_text) + f.close() + + print >> self.indexfp , time.time(), request.method, request.path + if request.headers.has_key('referer'): + print >> self.indexfp, 'referer:', ','.join(request.headers['referer']) + if len(self.cookies) > 0: + print >> self.indexfp, 'cookies:', ','.join(self.cookies) + print >> self.indexfp , path + print >> self.indexfp , "" + + + def get_response(self, request): + """ + Retrieve previously saved response saved by save_response + """ + path, n = self.pathn(request) + try: + fp = self.open(path+"."+n+".resp", 'r') + self.sequence[path]+=1 + except IOError: + fp = self.open(path+".resp", 'r') + server = RecorderConnection(request, fp) + fp = None # Handed over to RecorderConnection + server.send_request(request) + response = server.read_response() + server.terminate() + return response |