aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/proxy.py
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2011-08-03 22:38:23 +1200
committerAldo Cortesi <aldo@nullcube.com>2011-08-03 22:41:38 +1200
commit57c653be5f8a6fe0d1785421faa6513ebd3d48c0 (patch)
treec2334815d6b20ec7719eba351126d307f11bf29f /libmproxy/proxy.py
parentcbd8d09849fbbd8ccd8f5cbe29f09949fc344767 (diff)
downloadmitmproxy-57c653be5f8a6fe0d1785421faa6513ebd3d48c0.tar.gz
mitmproxy-57c653be5f8a6fe0d1785421faa6513ebd3d48c0.tar.bz2
mitmproxy-57c653be5f8a6fe0d1785421faa6513ebd3d48c0.zip
Move all HTTP objects to flow.py
That's Request, Response, ClientConnect, ClientDisconnect, Error, and Headers.
Diffstat (limited to 'libmproxy/proxy.py')
-rw-r--r--libmproxy/proxy.py470
1 files changed, 26 insertions, 444 deletions
diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py
index 75c5bf8a..fbc0420c 100644
--- a/libmproxy/proxy.py
+++ b/libmproxy/proxy.py
@@ -5,11 +5,10 @@
Development started from Neil Schemenauer's munchy.py
"""
-import sys, os, string, socket, urlparse, re, select, copy, base64, time, Cookie
-from email.utils import parsedate_tz, formatdate, mktime_tz
-import shutil, tempfile
+import sys, os, string, socket, select, time, Cookie
+import shutil, tempfile, threading
import optparse, SocketServer, ssl
-import utils, controller, encoding
+import utils, controller, flow
NAME = "mitmproxy"
@@ -70,28 +69,6 @@ def read_http_body(rfile, connection, headers, all):
return content
-def parse_url(url):
- """
- Returns a (scheme, host, port, path) tuple, or None on error.
- """
- scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
- if not scheme:
- return None
- if ':' in netloc:
- host, port = string.split(netloc, ':')
- port = int(port)
- else:
- host = netloc
- if scheme == "https":
- port = 443
- else:
- port = 80
- path = urlparse.urlunparse(('', '', path, params, query, fragment))
- if not path.startswith("/"):
- path = "/" + path
- return scheme, host, port, path
-
-
def parse_request_line(request):
"""
Parse a proxy request line. Return (method, scheme, host, port, path, minor).
@@ -113,7 +90,7 @@ def parse_request_line(request):
if url.startswith("/") or url == "*":
scheme, port, host, path = None, None, None, url
else:
- parts = parse_url(url)
+ parts = utils.parse_url(url)
if not parts:
raise ProxyError(400, "Invalid url: %s"%url)
scheme, host, port, path = parts
@@ -127,416 +104,6 @@ def parse_request_line(request):
return method, scheme, host, port, path, minor
-class HTTPMsg(controller.Msg):
- def decode(self):
- """
- Alters Response object, decoding its content based on the current
- Content-Encoding header and changing Content-Encoding header to
- 'identity'.
- """
- ce = self.headers["content-encoding"]
- if not ce or ce[0] not in encoding.ENCODINGS:
- return
- self.content = encoding.decode(
- ce[0],
- self.content
- )
- del self.headers["content-encoding"]
-
- def encode(self, e):
- """
- Alters Response object, encoding its content with the specified
- coding. This method should only be called on Responses with
- Content-Encoding headers of 'identity'.
- """
- self.content = encoding.encode(e, self.content)
- self.headers["content-encoding"] = [e]
-
-
-class Request(HTTPMsg):
- FMT = '%s %s HTTP/1.1\r\n%s\r\n%s'
- FMT_PROXY = '%s %s://%s:%s%s HTTP/1.1\r\n%s\r\n%s'
- def __init__(self, client_conn, host, port, scheme, method, path, headers, content, timestamp=None):
- self.client_conn = client_conn
- self.host, self.port, self.scheme = host, port, scheme
- self.method, self.path, self.headers, self.content = method, path, headers, content
- self.timestamp = timestamp or utils.timestamp()
- self.close = False
- controller.Msg.__init__(self)
-
- # Have this request's cookies been modified by sticky cookies or auth?
- self.stickycookie = False
- self.stickyauth = False
-
- def anticache(self):
- """
- Modifies this request to remove headers that might produce a cached
- response. That is, we remove ETags and If-Modified-Since headers.
- """
- delheaders = [
- "if-modified-since",
- "if-none-match",
- ]
- for i in delheaders:
- del self.headers[i]
-
- def anticomp(self):
- """
- Modifies this request to remove headers that will compress the
- resource's data.
- """
- self.headers["accept-encoding"] = ["identity"]
-
- def constrain_encoding(self):
- """
- Limits the permissible Accept-Encoding values, based on what we can
- decode appropriately.
- """
- if self.headers["accept-encoding"]:
- self.headers["accept-encoding"] = [', '.join([
- e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
- ])]
-
- def set_replay(self):
- self.client_conn = None
-
- def is_replay(self):
- if self.client_conn:
- return False
- else:
- return True
-
- def load_state(self, state):
- if state["client_conn"]:
- if self.client_conn:
- self.client_conn.load_state(state["client_conn"])
- else:
- self.client_conn = ClientConnect.from_state(state["client_conn"])
- else:
- self.client_conn = None
- self.host = state["host"]
- self.port = state["port"]
- self.scheme = state["scheme"]
- self.method = state["method"]
- self.path = state["path"]
- self.headers = utils.Headers.from_state(state["headers"])
- self.content = base64.decodestring(state["content"])
- self.timestamp = state["timestamp"]
-
- def get_state(self):
- return dict(
- client_conn = self.client_conn.get_state() if self.client_conn else None,
- host = self.host,
- port = self.port,
- scheme = self.scheme,
- method = self.method,
- path = self.path,
- headers = self.headers.get_state(),
- content = base64.encodestring(self.content),
- timestamp = self.timestamp,
- )
-
- @classmethod
- def from_state(klass, state):
- return klass(
- ClientConnect.from_state(state["client_conn"]),
- str(state["host"]),
- state["port"],
- str(state["scheme"]),
- str(state["method"]),
- str(state["path"]),
- utils.Headers.from_state(state["headers"]),
- base64.decodestring(state["content"]),
- state["timestamp"]
- )
-
- def __hash__(self):
- return id(self)
-
- def __eq__(self, other):
- return self.get_state() == other.get_state()
-
- def copy(self):
- c = copy.copy(self)
- c.headers = self.headers.copy()
- return c
-
- def hostport(self):
- if (self.port, self.scheme) in [(80, "http"), (443, "https")]:
- host = self.host
- else:
- host = "%s:%s"%(self.host, self.port)
- return host
-
- def url(self):
- return "%s://%s%s"%(self.scheme, self.hostport(), self.path)
-
- def set_url(self, url):
- parts = parse_url(url)
- if not parts:
- return False
- self.scheme, self.host, self.port, self.path = parts
- return True
-
- def is_response(self):
- return False
-
- def assemble(self, _proxy = False):
- """
- Assembles the request for transmission to the server. We make some
- modifications to make sure interception works properly.
- """
- headers = self.headers.copy()
- utils.del_all(
- headers,
- [
- 'proxy-connection',
- 'keep-alive',
- 'connection',
- 'content-length',
- 'transfer-encoding'
- ]
- )
- if not 'host' in headers:
- headers["host"] = [self.hostport()]
- content = self.content
- if content is not None:
- headers["content-length"] = [str(len(content))]
- else:
- content = ""
- if self.close:
- headers["connection"] = ["close"]
- if not _proxy:
- return self.FMT % (self.method, self.path, str(headers), content)
- else:
- return self.FMT_PROXY % (self.method, self.scheme, self.host, self.port, self.path, str(headers), content)
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in both the headers
- and the body of the request. Returns the number of replacements
- made.
- """
- self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs)
- self.path, pc = re.subn(pattern, repl, self.path, *args, **kwargs)
- c += pc
- c += self.headers.replace(pattern, repl, *args, **kwargs)
- return c
-
-
-class Response(HTTPMsg):
- FMT = '%s\r\n%s\r\n%s'
- def __init__(self, request, code, msg, headers, content, timestamp=None):
- self.request = request
- self.code, self.msg = code, msg
- self.headers, self.content = headers, content
- self.timestamp = timestamp or utils.timestamp()
- controller.Msg.__init__(self)
- self.replay = False
-
- def _refresh_cookie(self, c, delta):
- """
- Takes a cookie string c and a time delta in seconds, and returns
- a refreshed cookie string.
- """
- c = Cookie.SimpleCookie(str(c))
- for i in c.values():
- if "expires" in i:
- d = parsedate_tz(i["expires"])
- if d:
- d = mktime_tz(d) + delta
- i["expires"] = formatdate(d)
- else:
- # This can happen when the expires tag is invalid.
- # reddit.com sends a an expires tag like this: "Thu, 31 Dec
- # 2037 23:59:59 GMT", which is valid RFC 1123, but not
- # strictly correct according tot he cookie spec. Browsers
- # appear to parse this tolerantly - maybe we should too.
- # For now, we just ignore this.
- del i["expires"]
- return c.output(header="").strip()
-
- def refresh(self, now=None):
- """
- This fairly complex and heuristic function refreshes a server
- response for replay.
-
- - It adjusts date, expires and last-modified headers.
- - It adjusts cookie expiration.
- """
- if not now:
- now = time.time()
- delta = now - self.timestamp
- refresh_headers = [
- "date",
- "expires",
- "last-modified",
- ]
- for i in refresh_headers:
- if i in self.headers:
- d = parsedate_tz(self.headers[i][0])
- if d:
- new = mktime_tz(d) + delta
- self.headers[i] = [formatdate(new)]
- c = []
- for i in self.headers["set-cookie"]:
- c.append(self._refresh_cookie(i, delta))
- if c:
- self.headers["set-cookie"] = c
-
- def set_replay(self):
- self.replay = True
-
- def is_replay(self):
- return self.replay
-
- def load_state(self, state):
- self.code = state["code"]
- self.msg = state["msg"]
- self.headers = utils.Headers.from_state(state["headers"])
- self.content = base64.decodestring(state["content"])
- self.timestamp = state["timestamp"]
-
- def get_state(self):
- return dict(
- code = self.code,
- msg = self.msg,
- headers = self.headers.get_state(),
- timestamp = self.timestamp,
- content = base64.encodestring(self.content)
- )
-
- @classmethod
- def from_state(klass, request, state):
- return klass(
- request,
- state["code"],
- str(state["msg"]),
- utils.Headers.from_state(state["headers"]),
- base64.decodestring(state["content"]),
- state["timestamp"],
- )
-
- def __eq__(self, other):
- return self.get_state() == other.get_state()
-
- def copy(self):
- c = copy.copy(self)
- c.headers = self.headers.copy()
- return c
-
- def is_response(self):
- return True
-
- def assemble(self):
- """
- Assembles the response for transmission to the client. We make some
- modifications to make sure interception works properly.
- """
- headers = self.headers.copy()
- utils.del_all(
- headers,
- ['proxy-connection', 'connection', 'keep-alive', 'transfer-encoding']
- )
- content = self.content
- if content is not None:
- headers["content-length"] = [str(len(content))]
- else:
- content = ""
- if self.request.client_conn.close:
- headers["connection"] = ["close"]
- proto = "HTTP/1.1 %s %s"%(self.code, str(self.msg))
- data = (proto, str(headers), content)
- return self.FMT%data
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in both the headers
- and the body of the response. Returns the number of replacements
- made.
- """
- self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs)
- c += self.headers.replace(pattern, repl, *args, **kwargs)
- return c
-
-
-class ClientDisconnect(controller.Msg):
- def __init__(self, client_conn):
- controller.Msg.__init__(self)
- self.client_conn = client_conn
-
-
-class ClientConnect(controller.Msg):
- def __init__(self, address):
- """
- address is an (address, port) tuple, or None if this connection has
- been replayed from within mitmproxy.
- """
- self.address = address
- self.close = False
- self.requestcount = 0
- self.connection_error = None
- controller.Msg.__init__(self)
-
- def __eq__(self, other):
- return self.get_state() == other.get_state()
-
- def load_state(self, state):
- self.address = state
-
- def get_state(self):
- return list(self.address) if self.address else None
-
- @classmethod
- def from_state(klass, state):
- if state:
- return klass(state)
- else:
- return None
-
- def copy(self):
- return copy.copy(self)
-
-
-class Error(controller.Msg):
- def __init__(self, request, msg, timestamp=None):
- self.request, self.msg = request, msg
- self.timestamp = timestamp or utils.timestamp()
- controller.Msg.__init__(self)
-
- def load_state(self, state):
- self.msg = state["msg"]
- self.timestamp = state["timestamp"]
-
- def copy(self):
- return copy.copy(self)
-
- def get_state(self):
- return dict(
- msg = self.msg,
- timestamp = self.timestamp,
- )
-
- @classmethod
- def from_state(klass, state):
- return klass(
- None,
- state["msg"],
- state["timestamp"],
- )
-
- def __eq__(self, other):
- return self.get_state() == other.get_state()
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in both the headers
- and the body of the request. Returns the number of replacements
- made.
- """
- self.msg, c = re.subn(pattern, repl, self.msg, *args, **kwargs)
- return c
-
class FileLike:
def __init__(self, o):
@@ -574,6 +141,21 @@ class FileLike:
#begin nocover
+class RequestReplayThread(threading.Thread):
+ def __init__(self, flow, masterq):
+ self.flow, self.masterq = flow, masterq
+ threading.Thread.__init__(self)
+
+ def run(self):
+ try:
+ server = ServerConnection(self.flow.request)
+ server.send_request(self.flow.request)
+ response = server.read_response()
+ response.send(self.masterq)
+ except ProxyError, v:
+ err = flow.Error(self.flow.request, v.msg)
+ err.send(self.masterq)
+
class ServerConnection:
def __init__(self, request):
@@ -616,7 +198,7 @@ class ServerConnection:
raise ProxyError(502, "Invalid server response: %s."%line)
proto, code, msg = parts
code = int(code)
- headers = utils.Headers()
+ headers = flow.Headers()
headers.read(self.rfile)
if code >= 100 and code <= 199:
return self.read_response()
@@ -624,7 +206,7 @@ class ServerConnection:
content = ""
else:
content = read_http_body(self.rfile, self, headers, True)
- return Response(self.request, code, msg, headers, content)
+ return flow.Response(self.request, code, msg, headers, content)
def terminate(self):
try:
@@ -642,11 +224,11 @@ class ProxyHandler(SocketServer.StreamRequestHandler):
SocketServer.StreamRequestHandler.__init__(self, request, client_address, server)
def handle(self):
- cc = ClientConnect(self.client_address)
+ cc = flow.ClientConnect(self.client_address)
cc.send(self.mqueue)
while not cc.close:
self.handle_request(cc)
- cd = ClientDisconnect(cc)
+ cd = flow.ClientDisconnect(cc)
cd.send(self.mqueue)
self.finish()
@@ -691,7 +273,7 @@ class ProxyHandler(SocketServer.StreamRequestHandler):
cc.close = True
cc.connection_error = "%s: %s"%(e.code, e.msg)
if request:
- err = Error(request, e.msg)
+ err = flow.Error(request, e.msg)
err.send(self.mqueue)
self.send_error(e.code, e.msg)
if server:
@@ -742,7 +324,7 @@ class ProxyHandler(SocketServer.StreamRequestHandler):
method, scheme, host, port, path, httpminor = parse_request_line(self.rfile.readline())
if scheme is None:
scheme = "https"
- headers = utils.Headers()
+ headers = flow.Headers()
headers.read(self.rfile)
if host is None and "host" in headers:
netloc = headers["host"][0]
@@ -779,7 +361,7 @@ class ProxyHandler(SocketServer.StreamRequestHandler):
if value == "keep-alive":
client_conn.close = False
content = read_http_body(self.rfile, client_conn, headers, False)
- return Request(client_conn, host, port, scheme, method, path, headers, content)
+ return flow.Request(client_conn, host, port, scheme, method, path, headers, content)
def send_response(self, response):
self.wfile.write(response.assemble())