aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy
diff options
context:
space:
mode:
authorHenrik Nordstrom <henrik@henriknordstrom.net>2010-11-17 14:11:56 +0100
committerHenrik Nordstrom <henrik@henriknordstrom.net>2011-02-10 02:59:51 +0100
commitd11dd742d8593087959b6f1e0d9cc1f956dee03e (patch)
tree9ac076b44004556cfc83682999a4774639759868 /libmproxy
parent4bae297fbbe294a962116f574ca1b8ae434e0886 (diff)
downloadmitmproxy-d11dd742d8593087959b6f1e0d9cc1f956dee03e.tar.gz
mitmproxy-d11dd742d8593087959b6f1e0d9cc1f956dee03e.tar.bz2
mitmproxy-d11dd742d8593087959b6f1e0d9cc1f956dee03e.zip
Simple record & playback functionality
Diffstat (limited to 'libmproxy')
-rw-r--r--libmproxy/playback.py129
-rw-r--r--libmproxy/proxy.py19
-rw-r--r--libmproxy/record.py68
-rw-r--r--libmproxy/recorder.py273
4 files changed, 486 insertions, 3 deletions
diff --git a/libmproxy/playback.py b/libmproxy/playback.py
new file mode 100644
index 00000000..920b2e0c
--- /dev/null
+++ b/libmproxy/playback.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 Henrik Nordstrom <henrik@henriknordstrom.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
+# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Alternatively you may use this file under a GPLv3 license as follows:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import controller
+import utils
+import proxy
+import recorder
+
+class PlaybackMaster(controller.Master):
+ """
+ A simple master that plays back recorded responses.
+ """
+ def __init__(self, server, options):
+ self.verbosity = options.verbose
+ self.store = recorder.Recorder(options)
+ controller.Master.__init__(self, server)
+
+ def run(self):
+ try:
+ return controller.Master.run(self)
+ except KeyboardInterrupt:
+ self.shutdown()
+
+ def process_missing_response(self, request):
+ response = None
+ print >> sys.stderr, self.store.normalize_request(request).assemble_proxy()
+ print >> sys.stderr, "Actions:"
+ print >> sys.stderr, " q Quit"
+ print >> sys.stderr, " a(dd) Add pattern rule"
+ print >> sys.stderr, " A(dd) Add pattern rule (forced)"
+ print >> sys.stderr, " e(rror) respond with a 404 error"
+ print >> sys.stderr, " k(ill) kill the request, empty response"
+ print >> sys.stderr, " f(orward) forward the request to the requested server and cache response"
+ command = raw_input("Action: ")
+ command = command[:1]
+ if command == 'q':
+ self.shutdown()
+ return None
+ elif command == 'a' or command == 'A':
+ filt = raw_input("Filter: ")
+ search = raw_input("Search pattern: ")
+ replace = raw_input("Replacement string: ")
+ self.store.add_rule(filt, search, replace)
+ if command == 'A':
+ self.store.save_rule(filt, search, replace)
+ elif command == 'e':
+ return proxy.Response(request, "404", "Not found", utils.Headers(), "Not found")
+ elif command == 'k':
+ return None
+ elif command == 'f':
+ return request
+ else:
+ print >> sys.stderr, "ERROR: Unknown command"
+ return self.process_missing_response(request)
+ try:
+ response = self.store.get_response(request)
+ if command == 'a':
+ self.store.save_rule(filt, search, replace)
+ except proxy.ProxyError:
+ print >> sys.stderr, "ERROR: Malformed substitution rule"
+ self.store.forget_last_rule()
+ response = self.process_missing_response(request)
+ except IOError:
+ print >> sys.stderr, "NOTICE: Response still not found"
+ if command == 'a':
+ self.store.forget_last_rule()
+ response = self.process_missing_response(request)
+ return response
+
+ def handle_request(self, msg):
+ request = msg
+ try:
+ response = self.store.get_response(request)
+ except IOError:
+ if self.verbosity > 0:
+ print >> sys.stderr, ">>",
+ print >> sys.stderr, request.short()
+ print >> sys.stderr, "<<",
+ print >> sys.stderr, "ERROR: No matching response.",
+ print >> sys.stderr, ",".join(self.store.cookies)
+ response = self.process_missing_response(msg)
+ msg.ack(response)
+
+ def handle_response(self, msg):
+ request = msg.request
+ response = msg
+ if self.verbosity > 0:
+ print >> sys.stderr, ">>",
+ print >> sys.stderr, request.short()
+ print >> sys.stderr, "<<",
+ print >> sys.stderr, response.short()
+ if not response.is_cached():
+ self.store.save_response(response)
+ msg.ack(self.store.filter_response(msg))
diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py
index e3eace3b..631e2470 100644
--- a/libmproxy/proxy.py
+++ b/libmproxy/proxy.py
@@ -124,6 +124,7 @@ def parse_request_line(request):
class Request(controller.Msg):
FMT = '%s %s HTTP/1.1\r\n%s\r\n%s'
+ FMT_PROXY = '%s %s://%s:%s%s HTTP/1.1\r\n%s\r\n%s'
def __init__(self, client_conn, host, port, scheme, method, path, headers, content, timestamp=None):
self.client_conn = client_conn
self.host, self.port, self.scheme = host, port, scheme
@@ -132,6 +133,9 @@ class Request(controller.Msg):
self.close = False
controller.Msg.__init__(self)
+ def is_cached(self):
+ return False
+
def get_state(self):
return dict(
host = self.host,
@@ -189,7 +193,10 @@ class Request(controller.Msg):
def short(self):
return "%s %s"%(self.method, self.url())
- def assemble(self):
+ def assemble_proxy(self):
+ return self.assemble(True)
+
+ def assemble(self, _proxy = False):
"""
Assembles the request for transmission to the server. We make some
modifications to make sure interception works properly.
@@ -210,8 +217,10 @@ class Request(controller.Msg):
content = ""
if self.close:
headers["connection"] = ["close"]
- data = (self.method, self.path, str(headers), content)
- return self.FMT%data
+ if not _proxy:
+ return self.FMT % (self.method, self.path, str(headers), content)
+ else:
+ return self.FMT_PROXY % (self.method, self.scheme, self.host, self.port, self.path, str(headers), content)
class Response(controller.Msg):
@@ -221,6 +230,7 @@ class Response(controller.Msg):
self.code, self.msg = code, msg
self.headers, self.content = headers, content
self.timestamp = timestamp or time.time()
+ self.cached = False
controller.Msg.__init__(self)
def get_state(self):
@@ -256,6 +266,9 @@ class Response(controller.Msg):
def is_response(self):
return True
+ def is_cached(self):
+ return self.cached
+
def short(self):
return "%s %s"%(self.code, self.msg)
diff --git a/libmproxy/record.py b/libmproxy/record.py
new file mode 100644
index 00000000..d32c8711
--- /dev/null
+++ b/libmproxy/record.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 Henrik Nordstrom <henrik@henriknordstrom.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
+# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Alternatively you may use this file under a GPLv3 license as follows:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import controller
+import utils
+import recorder
+
+class RecordMaster(controller.Master):
+ """
+ A simple master that just records to files.
+ """
+ def __init__(self, server, options):
+ self.verbosity = options.verbose
+ self.store = recorder.Recorder(options)
+ controller.Master.__init__(self, server)
+
+ def run(self):
+ try:
+ return controller.Master.run(self)
+ except KeyboardInterrupt:
+ self.shutdown()
+
+ def handle_request(self, msg):
+ msg.ack(self.store.filter_request(msg))
+
+ def handle_response(self, msg):
+ if self.verbosity > 0:
+ print >> sys.stderr, ">>",
+ print >> sys.stderr, msg.request.short()
+ print >> sys.stderr, "<<",
+ print >> sys.stderr, msg.short()
+ self.store.save_response(msg)
+ msg.ack(self.store.filter_response(msg))
diff --git a/libmproxy/recorder.py b/libmproxy/recorder.py
new file mode 100644
index 00000000..51c8a6e0
--- /dev/null
+++ b/libmproxy/recorder.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 Henrik Nordstrom <henrik@henriknordstrom.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
+# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Alternatively you may use this file under a GPLv3 license as follows:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import time
+import hashlib
+import utils
+import proxy
+import collections
+import itertools
+import string
+import Cookie
+import filt
+import re
+import cStringIO
+
+def constant_factory(value):
+ return itertools.repeat(value).next
+
+class PatternRule:
+ """
+ Request pattern rule
+ :_ivar _match filt pattern rule
+ :_ivar _search Regex pattern to search for
+ :_ivar _replace Replacement string
+ """
+ def __init__(self, pattern, search, replace):
+ self.match = filt.parse(pattern)
+ self.search = re.compile(search)
+ self.replace = replace
+ def execute(self, request, text):
+ if self.match and not self.match(request):
+ return text
+ return re.sub(self.search, self.replace, text)
+
+class RecorderConnection(proxy.ServerConnection):
+ """
+ Simulated ServerConnection connecting to the cache
+ """
+ # Note: This may chane in future. Division between Recorder
+ # and RecorderConnection is not yet finalized
+ def __init__(self, request, fp):
+ self.host = request.host
+ self.port = request.port
+ self.scheme = request.scheme
+ self.close = False
+ self.server = fp
+ self.rfile = fp
+ self.wfile = fp
+
+ def send_request(self, request):
+ self.request = request
+
+ def read_response(self):
+ response = proxy.ServerConnection.read_response(self)
+ response.cached = True
+ return response
+
+class Recorder:
+ """
+ A simple record/playback cache
+ """
+ def __init__(self, options):
+ self.sequence = collections.defaultdict(int)
+ self.cookies = {}
+ try:
+ for cookie in options.cookies:
+ self.cookies[cookie] = True
+ except AttributeError: pass
+ self.verbosity = options.verbose
+ self.storedir = options.cache
+ self.patterns = []
+ self.indexfp = None
+ self.reset_config()
+
+ def reset_config(self):
+ self.patterns = []
+ self.load_config("default")
+
+ def add_rule(self, match, search, replace):
+ self.patterns.append(PatternRule(match, search, replace))
+
+ def forget_last_rule(self):
+ self.patterns.pop()
+
+ def save_rule(self, match, search, replace, configfile = "default"):
+ fp = self.open(configfile + ".cfg", "a")
+ print >> fp, "Condition: " + match
+ print >> fp, "Search: " + search
+ print >> fp, "Replace: " + replace
+ fp.close()
+
+ def load_config(self, name):
+ """
+ Load configuration settings from name
+ """
+ try:
+ file = name + ".cfg"
+ if self.verbosity > 2:
+ print >> sys.stderr, "config: " + file
+ fp = self.open(file, "r")
+ except IOError:
+ return False
+ for line in fp:
+ directive, value = line.split(" ", 1)
+ value = value.strip("\r\n")
+ if directive == "Cookie:":
+ self.cookies[value] = True
+ if directive == "Condition:":
+ match = value
+ if directive == "Search:":
+ search = value
+ if directive == "Replace:":
+ self.add_rule(match, search, value)
+ fp.close()
+ return True
+
+ def filter_request(self, request):
+ """
+ Filter forwarded requests to enable better recording
+ """
+ request = request.copy()
+ headers = request.headers
+ utils.try_del(headers, 'if-modified-since')
+ utils.try_del(headers, 'if-none-match')
+ return request
+
+ def normalize_request(self, request):
+ """
+ Filter request to simplify storage matching
+ """
+ request.close = False
+ req_text = request.assemble_proxy()
+ orig_req_text = req_text
+ for pattern in self.patterns:
+ req_text = pattern.execute(request, req_text)
+ if req_text == orig_req_text:
+ return request
+ fp = cStringIO.StringIO(req_text)
+ request_line = fp.readline()
+ method, scheme, host, port, path, httpminor = proxy.parse_request_line(request_line)
+ headers = utils.Headers()
+ headers.read(fp)
+ if request.content is None:
+ content = None
+ else:
+ content = fp.read()
+ return proxy.Request(request.client_conn, host, port, scheme, method, path, headers, content)
+
+ def open(self, path, mode):
+ return open(self.storedir + "/" + path, mode)
+
+ def pathn(self, request):
+ """
+ Create cache file name and sequence number
+ """
+ request = self.normalize_request(request)
+ request = self.filter_request(request)
+ headers = request.headers
+ urlkey = (request.host + request.path)[:80].translate(string.maketrans(":/?","__."))
+ id = ""
+ if headers.has_key("cookie"):
+ cookies = Cookie.SimpleCookie("; ".join(headers["cookie"]))
+ del headers["cookie"]
+ for key, morsel in cookies.iteritems():
+ if self.cookies.has_key(key):
+ id = id + key + "=" + morsel.value + "\n"
+ if self.verbosity > 1:
+ print >> sys.stderr, "ID: " + id
+ m = hashlib.sha224(id)
+ req_text = request.assemble_proxy()
+ if self.verbosity > 2:
+ print >> sys.stderr, req_text
+ m.update(req_text)
+ path = urlkey+"."+m.hexdigest()
+ n = str(self.sequence[path])
+ if self.verbosity > 1:
+ print >> sys.stderr, "PATH: " + path + "." + n
+ return path, n
+
+ def filter_response(self, response):
+ if response.headers.has_key('set-cookie'):
+ for header in response.headers['set-cookie']:
+ key = header.split('=',1)[0]
+ self.cookies[key] = True
+ return response
+
+ def save_response(self, response):
+ """
+ Save response for later playback
+ """
+
+ if self.indexfp is None:
+ self.indexfp = self.open("index.txt", "a")
+ try:
+ cfg = self.open("default.cfg", "r")
+ except:
+ cfg = self.open("default.cfg", "w")
+ for cookie in iter(self.cookies):
+ print >> cfg, "Cookie: " + cookie
+ cfg.close()
+ request = response.request
+ req_text = request.assemble_proxy()
+ resp_text = response.assemble()
+ path, n = self.pathn(request)
+ self.sequence[path] += 1
+
+ f = self.open(path+"."+n+".req", 'w')
+ f.write(req_text)
+ f.close()
+ f = self.open(path+"."+n+".resp", 'w')
+ f.write(resp_text)
+ f.close()
+
+ print >> self.indexfp , time.time(), request.method, request.path
+ if request.headers.has_key('referer'):
+ print >> self.indexfp, 'referer:', ','.join(request.headers['referer'])
+ if len(self.cookies) > 0:
+ print >> self.indexfp, 'cookies:', ','.join(self.cookies)
+ print >> self.indexfp , path
+ print >> self.indexfp , ""
+
+
+ def get_response(self, request):
+ """
+ Retrieve previously saved response saved by save_response
+ """
+ path, n = self.pathn(request)
+ try:
+ fp = self.open(path+"."+n+".resp", 'r')
+ self.sequence[path]+=1
+ except IOError:
+ fp = self.open(path+".resp", 'r')
+ server = RecorderConnection(request, fp)
+ fp = None # Handed over to RecorderConnection
+ server.send_request(request)
+ response = server.read_response()
+ server.terminate()
+ return response