From 57c653be5f8a6fe0d1785421faa6513ebd3d48c0 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Wed, 3 Aug 2011 22:38:23 +1200 Subject: Move all HTTP objects to flow.py That's Request, Response, ClientConnect, ClientDisconnect, Error, and Headers. --- libmproxy/proxy.py | 470 +++-------------------------------------------------- 1 file changed, 26 insertions(+), 444 deletions(-) (limited to 'libmproxy/proxy.py') diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py index 75c5bf8a..fbc0420c 100644 --- a/libmproxy/proxy.py +++ b/libmproxy/proxy.py @@ -5,11 +5,10 @@ Development started from Neil Schemenauer's munchy.py """ -import sys, os, string, socket, urlparse, re, select, copy, base64, time, Cookie -from email.utils import parsedate_tz, formatdate, mktime_tz -import shutil, tempfile +import sys, os, string, socket, select, time, Cookie +import shutil, tempfile, threading import optparse, SocketServer, ssl -import utils, controller, encoding +import utils, controller, flow NAME = "mitmproxy" @@ -70,28 +69,6 @@ def read_http_body(rfile, connection, headers, all): return content -def parse_url(url): - """ - Returns a (scheme, host, port, path) tuple, or None on error. - """ - scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) - if not scheme: - return None - if ':' in netloc: - host, port = string.split(netloc, ':') - port = int(port) - else: - host = netloc - if scheme == "https": - port = 443 - else: - port = 80 - path = urlparse.urlunparse(('', '', path, params, query, fragment)) - if not path.startswith("/"): - path = "/" + path - return scheme, host, port, path - - def parse_request_line(request): """ Parse a proxy request line. Return (method, scheme, host, port, path, minor). @@ -113,7 +90,7 @@ def parse_request_line(request): if url.startswith("/") or url == "*": scheme, port, host, path = None, None, None, url else: - parts = parse_url(url) + parts = utils.parse_url(url) if not parts: raise ProxyError(400, "Invalid url: %s"%url) scheme, host, port, path = parts @@ -127,416 +104,6 @@ def parse_request_line(request): return method, scheme, host, port, path, minor -class HTTPMsg(controller.Msg): - def decode(self): - """ - Alters Response object, decoding its content based on the current - Content-Encoding header and changing Content-Encoding header to - 'identity'. - """ - ce = self.headers["content-encoding"] - if not ce or ce[0] not in encoding.ENCODINGS: - return - self.content = encoding.decode( - ce[0], - self.content - ) - del self.headers["content-encoding"] - - def encode(self, e): - """ - Alters Response object, encoding its content with the specified - coding. This method should only be called on Responses with - Content-Encoding headers of 'identity'. - """ - self.content = encoding.encode(e, self.content) - self.headers["content-encoding"] = [e] - - -class Request(HTTPMsg): - FMT = '%s %s HTTP/1.1\r\n%s\r\n%s' - FMT_PROXY = '%s %s://%s:%s%s HTTP/1.1\r\n%s\r\n%s' - def __init__(self, client_conn, host, port, scheme, method, path, headers, content, timestamp=None): - self.client_conn = client_conn - self.host, self.port, self.scheme = host, port, scheme - self.method, self.path, self.headers, self.content = method, path, headers, content - self.timestamp = timestamp or utils.timestamp() - self.close = False - controller.Msg.__init__(self) - - # Have this request's cookies been modified by sticky cookies or auth? - self.stickycookie = False - self.stickyauth = False - - def anticache(self): - """ - Modifies this request to remove headers that might produce a cached - response. That is, we remove ETags and If-Modified-Since headers. - """ - delheaders = [ - "if-modified-since", - "if-none-match", - ] - for i in delheaders: - del self.headers[i] - - def anticomp(self): - """ - Modifies this request to remove headers that will compress the - resource's data. - """ - self.headers["accept-encoding"] = ["identity"] - - def constrain_encoding(self): - """ - Limits the permissible Accept-Encoding values, based on what we can - decode appropriately. - """ - if self.headers["accept-encoding"]: - self.headers["accept-encoding"] = [', '.join([ - e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0] - ])] - - def set_replay(self): - self.client_conn = None - - def is_replay(self): - if self.client_conn: - return False - else: - return True - - def load_state(self, state): - if state["client_conn"]: - if self.client_conn: - self.client_conn.load_state(state["client_conn"]) - else: - self.client_conn = ClientConnect.from_state(state["client_conn"]) - else: - self.client_conn = None - self.host = state["host"] - self.port = state["port"] - self.scheme = state["scheme"] - self.method = state["method"] - self.path = state["path"] - self.headers = utils.Headers.from_state(state["headers"]) - self.content = base64.decodestring(state["content"]) - self.timestamp = state["timestamp"] - - def get_state(self): - return dict( - client_conn = self.client_conn.get_state() if self.client_conn else None, - host = self.host, - port = self.port, - scheme = self.scheme, - method = self.method, - path = self.path, - headers = self.headers.get_state(), - content = base64.encodestring(self.content), - timestamp = self.timestamp, - ) - - @classmethod - def from_state(klass, state): - return klass( - ClientConnect.from_state(state["client_conn"]), - str(state["host"]), - state["port"], - str(state["scheme"]), - str(state["method"]), - str(state["path"]), - utils.Headers.from_state(state["headers"]), - base64.decodestring(state["content"]), - state["timestamp"] - ) - - def __hash__(self): - return id(self) - - def __eq__(self, other): - return self.get_state() == other.get_state() - - def copy(self): - c = copy.copy(self) - c.headers = self.headers.copy() - return c - - def hostport(self): - if (self.port, self.scheme) in [(80, "http"), (443, "https")]: - host = self.host - else: - host = "%s:%s"%(self.host, self.port) - return host - - def url(self): - return "%s://%s%s"%(self.scheme, self.hostport(), self.path) - - def set_url(self, url): - parts = parse_url(url) - if not parts: - return False - self.scheme, self.host, self.port, self.path = parts - return True - - def is_response(self): - return False - - def assemble(self, _proxy = False): - """ - Assembles the request for transmission to the server. We make some - modifications to make sure interception works properly. - """ - headers = self.headers.copy() - utils.del_all( - headers, - [ - 'proxy-connection', - 'keep-alive', - 'connection', - 'content-length', - 'transfer-encoding' - ] - ) - if not 'host' in headers: - headers["host"] = [self.hostport()] - content = self.content - if content is not None: - headers["content-length"] = [str(len(content))] - else: - content = "" - if self.close: - headers["connection"] = ["close"] - if not _proxy: - return self.FMT % (self.method, self.path, str(headers), content) - else: - return self.FMT_PROXY % (self.method, self.scheme, self.host, self.port, self.path, str(headers), content) - - def replace(self, pattern, repl, *args, **kwargs): - """ - Replaces a regular expression pattern with repl in both the headers - and the body of the request. Returns the number of replacements - made. - """ - self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs) - self.path, pc = re.subn(pattern, repl, self.path, *args, **kwargs) - c += pc - c += self.headers.replace(pattern, repl, *args, **kwargs) - return c - - -class Response(HTTPMsg): - FMT = '%s\r\n%s\r\n%s' - def __init__(self, request, code, msg, headers, content, timestamp=None): - self.request = request - self.code, self.msg = code, msg - self.headers, self.content = headers, content - self.timestamp = timestamp or utils.timestamp() - controller.Msg.__init__(self) - self.replay = False - - def _refresh_cookie(self, c, delta): - """ - Takes a cookie string c and a time delta in seconds, and returns - a refreshed cookie string. - """ - c = Cookie.SimpleCookie(str(c)) - for i in c.values(): - if "expires" in i: - d = parsedate_tz(i["expires"]) - if d: - d = mktime_tz(d) + delta - i["expires"] = formatdate(d) - else: - # This can happen when the expires tag is invalid. - # reddit.com sends a an expires tag like this: "Thu, 31 Dec - # 2037 23:59:59 GMT", which is valid RFC 1123, but not - # strictly correct according tot he cookie spec. Browsers - # appear to parse this tolerantly - maybe we should too. - # For now, we just ignore this. - del i["expires"] - return c.output(header="").strip() - - def refresh(self, now=None): - """ - This fairly complex and heuristic function refreshes a server - response for replay. - - - It adjusts date, expires and last-modified headers. - - It adjusts cookie expiration. - """ - if not now: - now = time.time() - delta = now - self.timestamp - refresh_headers = [ - "date", - "expires", - "last-modified", - ] - for i in refresh_headers: - if i in self.headers: - d = parsedate_tz(self.headers[i][0]) - if d: - new = mktime_tz(d) + delta - self.headers[i] = [formatdate(new)] - c = [] - for i in self.headers["set-cookie"]: - c.append(self._refresh_cookie(i, delta)) - if c: - self.headers["set-cookie"] = c - - def set_replay(self): - self.replay = True - - def is_replay(self): - return self.replay - - def load_state(self, state): - self.code = state["code"] - self.msg = state["msg"] - self.headers = utils.Headers.from_state(state["headers"]) - self.content = base64.decodestring(state["content"]) - self.timestamp = state["timestamp"] - - def get_state(self): - return dict( - code = self.code, - msg = self.msg, - headers = self.headers.get_state(), - timestamp = self.timestamp, - content = base64.encodestring(self.content) - ) - - @classmethod - def from_state(klass, request, state): - return klass( - request, - state["code"], - str(state["msg"]), - utils.Headers.from_state(state["headers"]), - base64.decodestring(state["content"]), - state["timestamp"], - ) - - def __eq__(self, other): - return self.get_state() == other.get_state() - - def copy(self): - c = copy.copy(self) - c.headers = self.headers.copy() - return c - - def is_response(self): - return True - - def assemble(self): - """ - Assembles the response for transmission to the client. We make some - modifications to make sure interception works properly. - """ - headers = self.headers.copy() - utils.del_all( - headers, - ['proxy-connection', 'connection', 'keep-alive', 'transfer-encoding'] - ) - content = self.content - if content is not None: - headers["content-length"] = [str(len(content))] - else: - content = "" - if self.request.client_conn.close: - headers["connection"] = ["close"] - proto = "HTTP/1.1 %s %s"%(self.code, str(self.msg)) - data = (proto, str(headers), content) - return self.FMT%data - - def replace(self, pattern, repl, *args, **kwargs): - """ - Replaces a regular expression pattern with repl in both the headers - and the body of the response. Returns the number of replacements - made. - """ - self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs) - c += self.headers.replace(pattern, repl, *args, **kwargs) - return c - - -class ClientDisconnect(controller.Msg): - def __init__(self, client_conn): - controller.Msg.__init__(self) - self.client_conn = client_conn - - -class ClientConnect(controller.Msg): - def __init__(self, address): - """ - address is an (address, port) tuple, or None if this connection has - been replayed from within mitmproxy. - """ - self.address = address - self.close = False - self.requestcount = 0 - self.connection_error = None - controller.Msg.__init__(self) - - def __eq__(self, other): - return self.get_state() == other.get_state() - - def load_state(self, state): - self.address = state - - def get_state(self): - return list(self.address) if self.address else None - - @classmethod - def from_state(klass, state): - if state: - return klass(state) - else: - return None - - def copy(self): - return copy.copy(self) - - -class Error(controller.Msg): - def __init__(self, request, msg, timestamp=None): - self.request, self.msg = request, msg - self.timestamp = timestamp or utils.timestamp() - controller.Msg.__init__(self) - - def load_state(self, state): - self.msg = state["msg"] - self.timestamp = state["timestamp"] - - def copy(self): - return copy.copy(self) - - def get_state(self): - return dict( - msg = self.msg, - timestamp = self.timestamp, - ) - - @classmethod - def from_state(klass, state): - return klass( - None, - state["msg"], - state["timestamp"], - ) - - def __eq__(self, other): - return self.get_state() == other.get_state() - - def replace(self, pattern, repl, *args, **kwargs): - """ - Replaces a regular expression pattern with repl in both the headers - and the body of the request. Returns the number of replacements - made. - """ - self.msg, c = re.subn(pattern, repl, self.msg, *args, **kwargs) - return c - class FileLike: def __init__(self, o): @@ -574,6 +141,21 @@ class FileLike: #begin nocover +class RequestReplayThread(threading.Thread): + def __init__(self, flow, masterq): + self.flow, self.masterq = flow, masterq + threading.Thread.__init__(self) + + def run(self): + try: + server = ServerConnection(self.flow.request) + server.send_request(self.flow.request) + response = server.read_response() + response.send(self.masterq) + except ProxyError, v: + err = flow.Error(self.flow.request, v.msg) + err.send(self.masterq) + class ServerConnection: def __init__(self, request): @@ -616,7 +198,7 @@ class ServerConnection: raise ProxyError(502, "Invalid server response: %s."%line) proto, code, msg = parts code = int(code) - headers = utils.Headers() + headers = flow.Headers() headers.read(self.rfile) if code >= 100 and code <= 199: return self.read_response() @@ -624,7 +206,7 @@ class ServerConnection: content = "" else: content = read_http_body(self.rfile, self, headers, True) - return Response(self.request, code, msg, headers, content) + return flow.Response(self.request, code, msg, headers, content) def terminate(self): try: @@ -642,11 +224,11 @@ class ProxyHandler(SocketServer.StreamRequestHandler): SocketServer.StreamRequestHandler.__init__(self, request, client_address, server) def handle(self): - cc = ClientConnect(self.client_address) + cc = flow.ClientConnect(self.client_address) cc.send(self.mqueue) while not cc.close: self.handle_request(cc) - cd = ClientDisconnect(cc) + cd = flow.ClientDisconnect(cc) cd.send(self.mqueue) self.finish() @@ -691,7 +273,7 @@ class ProxyHandler(SocketServer.StreamRequestHandler): cc.close = True cc.connection_error = "%s: %s"%(e.code, e.msg) if request: - err = Error(request, e.msg) + err = flow.Error(request, e.msg) err.send(self.mqueue) self.send_error(e.code, e.msg) if server: @@ -742,7 +324,7 @@ class ProxyHandler(SocketServer.StreamRequestHandler): method, scheme, host, port, path, httpminor = parse_request_line(self.rfile.readline()) if scheme is None: scheme = "https" - headers = utils.Headers() + headers = flow.Headers() headers.read(self.rfile) if host is None and "host" in headers: netloc = headers["host"][0] @@ -779,7 +361,7 @@ class ProxyHandler(SocketServer.StreamRequestHandler): if value == "keep-alive": client_conn.close = False content = read_http_body(self.rfile, client_conn, headers, False) - return Request(client_conn, host, port, scheme, method, path, headers, content) + return flow.Request(client_conn, host, port, scheme, method, path, headers, content) def send_response(self, response): self.wfile.write(response.assemble()) -- cgit v1.2.3