From b0cfeff06d9dd99a16dfae19c5df3c73c5864fb9 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Wed, 3 Sep 2014 16:57:56 +0200 Subject: fix #341 - work on flows instead of request/response internally. --- libmproxy/protocol/http.py | 102 ++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 57 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 658c08ed..3f9eecb3 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -77,9 +77,6 @@ class HTTPMessage(stateobject.SimpleStateObject): self.timestamp_start = timestamp_start if timestamp_start is not None else utils.timestamp() self.timestamp_end = timestamp_end if timestamp_end is not None else utils.timestamp() - self.flow = None # will usually be set by the flow backref mixin - """@type: HTTPFlow""" - _stateobject_attributes = dict( httpversion=tuple, headers=ODictCaseless, @@ -346,10 +343,10 @@ class HTTPRequest(HTTPMessage): del headers[k] if headers["Upgrade"] == ["h2c"]: # Suppress HTTP2 https://http2.github.io/http2-spec/index.html#discover-http del headers["Upgrade"] - if not 'host' in headers: + if not 'host' in headers and self.scheme and self.host and self.port: headers["Host"] = [utils.hostport(self.scheme, - self.host or self.flow.server_conn.address.host, - self.port or self.flow.server_conn.address.port)] + self.host, + self.port)] if self.content: headers["Content-Length"] = [str(len(self.content))] @@ -429,16 +426,16 @@ class HTTPRequest(HTTPMessage): self.headers["Content-Type"] = [HDR_FORM_URLENCODED] self.content = utils.urlencode(odict.lst) - def get_path_components(self): + def get_path_components(self, f): """ Returns the path components of the URL as a list of strings. Components are unquoted. """ - _, _, path, _, _, _ = urlparse.urlparse(self.get_url()) + _, _, path, _, _, _ = urlparse.urlparse(self.get_url(False, f)) return [urllib.unquote(i) for i in path.split("/") if i] - def set_path_components(self, lst): + def set_path_components(self, lst, f): """ Takes a list of strings, and sets the path component of the URL. @@ -446,27 +443,27 @@ class HTTPRequest(HTTPMessage): """ lst = [urllib.quote(i, safe="") for i in lst] path = "/" + "/".join(lst) - scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url()) - self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment])) + scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url(False, f)) + self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f) - def get_query(self): + def get_query(self, f): """ Gets the request query string. Returns an ODict object. """ - _, _, _, _, query, _ = urlparse.urlparse(self.get_url()) + _, _, _, _, query, _ = urlparse.urlparse(self.get_url(False, f)) if query: return ODict(utils.urldecode(query)) return ODict([]) - def set_query(self, odict): + def set_query(self, odict, f): """ Takes an ODict object, and sets the request query string. """ - scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url()) + scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url(False, f)) query = utils.urlencode(odict.lst) - self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment])) + self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f) - def get_host(self, hostheader=False): + def get_host(self, hostheader, flow): """ Heuristic to get the host of the request. @@ -484,16 +481,16 @@ class HTTPRequest(HTTPMessage): if self.host: host = self.host else: - for s in self.flow.server_conn.state: + for s in flow.server_conn.state: if s[0] == "http" and s[1]["state"] == "connect": host = s[1]["host"] break if not host: - host = self.flow.server_conn.address.host + host = flow.server_conn.address.host host = host.encode("idna") return host - def get_scheme(self): + def get_scheme(self, flow): """ Returns the request port, either from the request itself or from the flow's server connection """ @@ -501,20 +498,20 @@ class HTTPRequest(HTTPMessage): return self.scheme if self.form_out == "authority": # On SSLed connections, the original CONNECT request is still unencrypted. return "http" - return "https" if self.flow.server_conn.ssl_established else "http" + return "https" if flow.server_conn.ssl_established else "http" - def get_port(self): + def get_port(self, flow): """ Returns the request port, either from the request itself or from the flow's server connection """ if self.port: return self.port - for s in self.flow.server_conn.state: + for s in flow.server_conn.state: if s[0] == "http" and s[1].get("state") == "connect": return s[1]["port"] - return self.flow.server_conn.address.port + return flow.server_conn.address.port - def get_url(self, hostheader=False): + def get_url(self, hostheader, flow): """ Returns a URL string, constructed from the Request's URL components. @@ -522,13 +519,13 @@ class HTTPRequest(HTTPMessage): Host header to construct the URL. """ if self.form_out == "authority": # upstream proxy mode - return "%s:%s" % (self.get_host(hostheader), self.get_port()) - return utils.unparse_url(self.get_scheme(), - self.get_host(hostheader), - self.get_port(), + return "%s:%s" % (self.get_host(hostheader, flow), self.get_port(flow)) + return utils.unparse_url(self.get_scheme(flow), + self.get_host(hostheader, flow), + self.get_port(flow), self.path).encode('ascii') - def set_url(self, url): + def set_url(self, url, flow): """ Parses a URL specification, and updates the Request's information accordingly. @@ -543,14 +540,14 @@ class HTTPRequest(HTTPMessage): self.path = path - if host != self.get_host() or port != self.get_port(): - if self.flow.live: - self.flow.live.change_server((host, port), ssl=is_ssl) + if host != self.get_host(False, flow) or port != self.get_port(flow): + if flow.live: + flow.live.change_server((host, port), ssl=is_ssl) else: # There's not live server connection, we're just changing the attributes here. - self.flow.server_conn = ServerConnection((host, port), + flow.server_conn = ServerConnection((host, port), proxy.AddressPriority.MANUALLY_CHANGED) - self.flow.server_conn.ssl_established = is_ssl + flow.server_conn.ssl_established = is_ssl # If this is an absolute request, replace the attributes on the request object as well. if self.host: @@ -802,8 +799,6 @@ class HTTPFlow(Flow): self.intercepting = False # FIXME: Should that rather be an attribute of Flow? - _backrefattr = Flow._backrefattr + ("request", "response") - _stateobject_attributes = Flow._stateobject_attributes.copy() _stateobject_attributes.update( request=HTTPRequest, @@ -855,13 +850,10 @@ class HTTPFlow(Flow): Kill this request. """ self.error = Error("Connection killed") - self.error.reply = controller.DummyReply() - if self.request and not self.request.reply.acked: - self.request.reply(KILL) - elif self.response and not self.response.reply.acked: - self.response.reply(KILL) - master.handle_error(self.error) self.intercepting = False + self.reply(KILL) + self.reply = controller.DummyReply() + master.handle_error(self) def intercept(self): """ @@ -874,12 +866,8 @@ class HTTPFlow(Flow): """ Continue with the flow - called after an intercept(). """ - if self.request: - if not self.request.reply.acked: - self.request.reply() - elif self.response and not self.response.reply.acked: - self.response.reply() - self.intercepting = False + self.intercepting = False + self.reply() def replace(self, pattern, repl, *args, **kwargs): """ @@ -961,7 +949,7 @@ class HTTPHandler(ProtocolHandler): # in an Error object that has an attached request that has not been # sent through to the Master. flow.request = req - request_reply = self.c.channel.ask("request", flow.request) + request_reply = self.c.channel.ask("request", flow) self.determine_server_address(flow, flow.request) # The inline script may have changed request.host flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow @@ -976,7 +964,7 @@ class HTTPHandler(ProtocolHandler): flow.response = self.get_response_from_server(flow.request, include_body=False) # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True - self.c.channel.ask("responseheaders", flow.response) + self.c.channel.ask("responseheaders", flow) # now get the rest of the request body, if body still needs to be read but not streaming this response if flow.response.stream: @@ -991,7 +979,7 @@ class HTTPHandler(ProtocolHandler): flow.server_conn = self.c.server_conn self.c.log("response", "debug", [flow.response._assemble_first_line()]) - response_reply = self.c.channel.ask("response", flow.response) + response_reply = self.c.channel.ask("response", flow) if response_reply is None or response_reply == KILL: return False @@ -1079,7 +1067,7 @@ class HTTPHandler(ProtocolHandler): # TODO: no flows without request or with both request and response at the moment. if flow.request and not flow.response: flow.error = Error(message) - self.c.channel.ask("error", flow.error) + self.c.channel.ask("error", flow) try: code = getattr(error, "code", 502) @@ -1204,12 +1192,12 @@ class RequestReplayThread(threading.Thread): except proxy.ProxyError: pass if not server_address: - server_address = (r.get_host(), r.get_port()) + server_address = (r.get_host(False, self.flow), r.get_port(self.flow)) server = ServerConnection(server_address, None) server.connect() - if server_ssl or r.get_scheme() == "https": + if server_ssl or r.get_scheme(self.flow) == "https": if self.config.http_form_out == "absolute": # form_out == absolute -> forward mode -> send CONNECT send_connect_request(server, r.get_host(), r.get_port()) r.form_out = "relative" @@ -1218,9 +1206,9 @@ class RequestReplayThread(threading.Thread): server.send(r._assemble()) self.flow.response = HTTPResponse.from_stream(server.rfile, r.method, body_size_limit=self.config.body_size_limit) - self.channel.ask("response", self.flow.response) + self.channel.ask("response", self.flow) except (proxy.ProxyError, http.HttpError, tcp.NetLibError), v: self.flow.error = Error(repr(v)) - self.channel.ask("error", self.flow.error) + self.channel.ask("error", self.flow) finally: r.form_out = form_out_backup \ No newline at end of file -- cgit v1.2.3 From cd43c5ba9c2981aeffee354cbcb574b6f5e435ba Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Wed, 3 Sep 2014 20:12:30 +0200 Subject: simplify server changes for inline scripts --- libmproxy/protocol/http.py | 95 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 18 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 3f9eecb3..7577e0d3 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -26,7 +26,7 @@ def get_line(fp): return line -def send_connect_request(conn, host, port): +def send_connect_request(conn, host, port, update_state=True): upstream_request = HTTPRequest("authority", "CONNECT", None, host, port, None, (1, 1), ODictCaseless(), "") conn.send(upstream_request._assemble()) @@ -36,6 +36,12 @@ def send_connect_request(conn, host, port): "Cannot establish SSL " + "connection with upstream proxy: \r\n" + str(resp._assemble())) + if update_state: + conn.state.append(("http", { + "state": "connect", + "host": host, + "port": port} + )) return resp @@ -545,8 +551,7 @@ class HTTPRequest(HTTPMessage): flow.live.change_server((host, port), ssl=is_ssl) else: # There's not live server connection, we're just changing the attributes here. - flow.server_conn = ServerConnection((host, port), - proxy.AddressPriority.MANUALLY_CHANGED) + flow.server_conn = ServerConnection((host, port)) flow.server_conn.ssl_established = is_ssl # If this is an absolute request, replace the attributes on the request object as well. @@ -815,7 +820,7 @@ class HTTPFlow(Flow): s = " Date: Wed, 3 Sep 2014 23:44:54 +0200 Subject: improve HTTPRequest syntax --- libmproxy/protocol/http.py | 153 ++++++++++++++++++--------------------------- 1 file changed, 60 insertions(+), 93 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 7577e0d3..90d8ff16 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -411,7 +411,14 @@ class HTTPRequest(HTTPMessage): e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0] )] - def get_form_urlencoded(self): + def update_host_header(self): + """ + Update the host header to reflect the current target. + """ + self.headers["Host"] = [self.host] + + @property + def form_urlencoded(self): """ Retrieves the URL-encoded form data, returning an ODict object. Returns an empty ODict if there is no data or the content-type @@ -421,7 +428,8 @@ class HTTPRequest(HTTPMessage): return ODict(utils.urldecode(self.content)) return ODict([]) - def set_form_urlencoded(self, odict): + @form_urlencoded.setter + def form_urlencoded(self, odict): """ Sets the body to the URL-encoded form data, and adds the appropriate content-type header. Note that this will destory the @@ -432,16 +440,18 @@ class HTTPRequest(HTTPMessage): self.headers["Content-Type"] = [HDR_FORM_URLENCODED] self.content = utils.urlencode(odict.lst) - def get_path_components(self, f): + @property + def path_components(self): """ Returns the path components of the URL as a list of strings. Components are unquoted. """ - _, _, path, _, _, _ = urlparse.urlparse(self.get_url(False, f)) + _, _, path, _, _, _ = urlparse.urlparse(self.url) return [urllib.unquote(i) for i in path.split("/") if i] - def set_path_components(self, lst, f): + @path_components.setter + def path_components(self, lst): """ Takes a list of strings, and sets the path component of the URL. @@ -449,32 +459,34 @@ class HTTPRequest(HTTPMessage): """ lst = [urllib.quote(i, safe="") for i in lst] path = "/" + "/".join(lst) - scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url(False, f)) - self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f) + scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url) + self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment]) - def get_query(self, f): + @property + def query(self): """ Gets the request query string. Returns an ODict object. """ - _, _, _, _, query, _ = urlparse.urlparse(self.get_url(False, f)) + _, _, _, _, query, _ = urlparse.urlparse(self.url) if query: return ODict(utils.urldecode(query)) return ODict([]) - def set_query(self, odict, f): + @query.setter + def query(self, odict): """ Takes an ODict object, and sets the request query string. """ - scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url(False, f)) + scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url) query = utils.urlencode(odict.lst) - self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f) + self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment]) - def get_host(self, hostheader, flow): + def pretty_host(self, hostheader): """ Heuristic to get the host of the request. - Note that get_host() does not always return the TCP destination of the request, - e.g. on a transparently intercepted request to an unrelated HTTP proxy. + Note that pretty_host() does not always return the TCP destination of the request, + e.g. if an upstream proxy is in place If hostheader is set to True, the Host: header will be used as additional (and preferred) data source. This is handy in transparent mode, where only the ip of the destination is known, but not the @@ -484,54 +496,27 @@ class HTTPRequest(HTTPMessage): if hostheader: host = self.headers.get_first("host") if not host: - if self.host: - host = self.host - else: - for s in flow.server_conn.state: - if s[0] == "http" and s[1]["state"] == "connect": - host = s[1]["host"] - break - if not host: - host = flow.server_conn.address.host + host = self.host host = host.encode("idna") return host - def get_scheme(self, flow): - """ - Returns the request port, either from the request itself or from the flow's server connection - """ - if self.scheme: - return self.scheme - if self.form_out == "authority": # On SSLed connections, the original CONNECT request is still unencrypted. - return "http" - return "https" if flow.server_conn.ssl_established else "http" - - def get_port(self, flow): - """ - Returns the request port, either from the request itself or from the flow's server connection - """ - if self.port: - return self.port - for s in flow.server_conn.state: - if s[0] == "http" and s[1].get("state") == "connect": - return s[1]["port"] - return flow.server_conn.address.port + def pretty_url(self, hostheader): + if self.form_out == "authority": # upstream proxy mode + return "%s:%s" % (self.pretty_host(hostheader), self.port) + return utils.unparse_url(self.scheme, + self.pretty_host(hostheader), + self.port, + self.path).encode('ascii') - def get_url(self, hostheader, flow): + @property + def url(self): """ Returns a URL string, constructed from the Request's URL components. - - If hostheader is True, we use the value specified in the request - Host header to construct the URL. """ - if self.form_out == "authority": # upstream proxy mode - return "%s:%s" % (self.get_host(hostheader, flow), self.get_port(flow)) - return utils.unparse_url(self.get_scheme(flow), - self.get_host(hostheader, flow), - self.get_port(flow), - self.path).encode('ascii') + return self.pretty_url(False) - def set_url(self, url, flow): + @url.setter + def url(self, url): """ Parses a URL specification, and updates the Request's information accordingly. @@ -540,31 +525,11 @@ class HTTPRequest(HTTPMessage): """ parts = http.parse_url(url) if not parts: - return False - scheme, host, port, path = parts - is_ssl = (True if scheme == "https" else False) - - self.path = path - - if host != self.get_host(False, flow) or port != self.get_port(flow): - if flow.live: - flow.live.change_server((host, port), ssl=is_ssl) - else: - # There's not live server connection, we're just changing the attributes here. - flow.server_conn = ServerConnection((host, port)) - flow.server_conn.ssl_established = is_ssl - - # If this is an absolute request, replace the attributes on the request object as well. - if self.host: - self.host = host - if self.port: - self.port = port - if self.scheme: - self.scheme = scheme + raise ValueError("Invalid URL: %s" % url) + self.scheme, self.host, self.port, self.path = parts - return True - - def get_cookies(self): + @property + def cookies(self): cookie_headers = self.headers.get("cookie") if not cookie_headers: return None @@ -760,7 +725,8 @@ class HTTPResponse(HTTPMessage): if c: self.headers["set-cookie"] = c - def get_cookies(self): + @property + def cookies(self): cookie_headers = self.headers.get("set-cookie") if not cookie_headers: return None @@ -1127,12 +1093,12 @@ class HTTPHandler(ProtocolHandler): if not request.host: # Host/Port Complication: In upstream mode, use the server we CONNECTed to, # not the upstream proxy. - for s in flow.server_conn.state: - if s[0] == "http" and s[1]["state"] == "connect": - request.host, request.port = s[1]["host"], s[1]["port"] - if not request.host: - request.host = flow.server_conn.address.host - request.port = flow.server_conn.address.port + if flow.server_conn: + for s in flow.server_conn.state: + if s[0] == "http" and s[1]["state"] == "connect": + request.host, request.port = s[1]["host"], s[1]["port"] + if not request.host and flow.server_conn: + request.host, request.port = flow.server_conn.address.host, flow.server_conn.address.port # Now we can process the request. if request.form_in == "authority": @@ -1242,7 +1208,9 @@ class RequestReplayThread(threading.Thread): r.form_out = self.config.http_form_out server_address, server_ssl = False, False - if self.config.get_upstream_server: + # If the flow is live, r.host is already the correct upstream server unless modified by a script. + # If modified by a script, we probably want to keep the modified destination. + if self.config.get_upstream_server and not self.flow.live: try: # this will fail in transparent mode upstream_info = self.config.get_upstream_server(self.flow.client_conn) @@ -1251,17 +1219,16 @@ class RequestReplayThread(threading.Thread): except proxy.ProxyError: pass if not server_address: - server_address = (r.get_host(False, self.flow), r.get_port(self.flow)) + server_address = (r.host, r.port) - server = ServerConnection(server_address, None) + server = ServerConnection(server_address) server.connect() - if server_ssl or r.get_scheme(self.flow) == "https": + if server_ssl or r.scheme == "https": if self.config.http_form_out == "absolute": # form_out == absolute -> forward mode -> send CONNECT - send_connect_request(server, r.get_host(), r.get_port()) + send_connect_request(server, r.host, r.port) r.form_out = "relative" - server.establish_ssl(self.config.clientcerts, - self.flow.server_conn.sni) + server.establish_ssl(self.config.clientcerts, sni=r.host) server.send(r._assemble()) self.flow.response = HTTPResponse.from_stream(server.rfile, r.method, body_size_limit=self.config.body_size_limit) -- cgit v1.2.3 From a7a3b5703adff7de12fb479a90ea2628465a4486 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 5 Sep 2014 00:18:17 +0200 Subject: change replay_request behaviour, refs #346; test upstream proxy mode --- libmproxy/protocol/http.py | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 253192dd..90ee127c 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1203,33 +1203,28 @@ class RequestReplayThread(threading.Thread): threading.Thread.__init__(self) def run(self): + r = self.flow.request + form_out_backup = r.form_out try: - r = self.flow.request - form_out_backup = r.form_out - - r.form_out = self.config.http_form_out - server_address, server_ssl = False, False - # If the flow is live, r.host is already the correct upstream server unless modified by a script. - # If modified by a script, we probably want to keep the modified destination. - if self.config.get_upstream_server and not self.flow.live: - try: - # this will fail in transparent mode - upstream_info = self.config.get_upstream_server(self.flow.client_conn) - server_ssl = upstream_info[1] - server_address = upstream_info[2:] - except proxy.ProxyError: - pass - if not server_address: - server_address = (r.host, r.port) - - server = ServerConnection(server_address) - server.connect() - - if server_ssl or r.scheme == "https": - if self.config.http_form_out == "absolute": # form_out == absolute -> forward mode -> send CONNECT + # In all modes, we directly connect to the server displayed + if self.config.http_form_out == "absolute": # form_out == absolute -> forward mode + server_address = self.config.get_upstream_server(self.flow.client_conn)[2:] + server = ServerConnection(server_address) + server.connect() + if r.scheme == "https": send_connect_request(server, r.host, r.port) + server.establish_ssl(self.config.clientcerts, sni=r.host) r.form_out = "relative" - server.establish_ssl(self.config.clientcerts, sni=r.host) + else: + r.form_out = "absolute" + else: + server_address = (r.host, r.port) + server = ServerConnection(server_address) + server.connect() + if r.scheme == "https": + server.establish_ssl(self.config.clientcerts, sni=r.host) + r.form_out = "relative" + server.send(r._assemble()) self.flow.response = HTTPResponse.from_stream(server.rfile, r.method, body_size_limit=self.config.body_size_limit) -- cgit v1.2.3 From 2a6337343a14f7f72c28d8bf5f24220f6d9ca6d0 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 5 Sep 2014 15:16:20 +0200 Subject: update docs, mostly revert 2f44b26b4cd014e03dd62a125d79af9b81663a93 --- libmproxy/protocol/http.py | 89 ++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 46 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 90ee127c..9593c3cb 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -29,13 +29,13 @@ def get_line(fp): def send_connect_request(conn, host, port, update_state=True): upstream_request = HTTPRequest("authority", "CONNECT", None, host, port, None, (1, 1), ODictCaseless(), "") - conn.send(upstream_request._assemble()) + conn.send(upstream_request.assemble()) resp = HTTPResponse.from_stream(conn.rfile, upstream_request.method) if resp.code != 200: raise proxy.ProxyError(resp.code, "Cannot establish SSL " + "connection with upstream proxy: \r\n" + - str(resp._assemble())) + str(resp.assemble())) if update_state: conn.state.append(("http", { "state": "connect", @@ -73,6 +73,9 @@ class decoded(object): class HTTPMessage(stateobject.SimpleStateObject): + """ + Base class for HTTPRequest and HTTPResponse + """ def __init__(self, httpversion, headers, content, timestamp_start=None, timestamp_end=None): self.httpversion = httpversion @@ -162,31 +165,31 @@ class HTTPMessage(stateobject.SimpleStateObject): """ Parse an HTTP message from a file stream """ - raise NotImplementedError # pragma: nocover + raise NotImplementedError() # pragma: nocover def _assemble_first_line(self): """ Returns the assembled request/response line """ - raise NotImplementedError # pragma: nocover + raise NotImplementedError() # pragma: nocover def _assemble_headers(self): """ Returns the assembled headers """ - raise NotImplementedError # pragma: nocover + raise NotImplementedError() # pragma: nocover def _assemble_head(self): """ Returns the assembled request/response line plus headers """ - raise NotImplementedError # pragma: nocover + raise NotImplementedError() # pragma: nocover - def _assemble(self): + def assemble(self): """ Returns the assembled request/response """ - raise NotImplementedError # pragma: nocover + raise NotImplementedError() # pragma: nocover class HTTPRequest(HTTPMessage): @@ -195,7 +198,17 @@ class HTTPRequest(HTTPMessage): Exposes the following attributes: - flow: Flow object the request belongs to + method: HTTP method + + scheme: URL scheme (http/https) (absolute-form only) + + host: Host portion of the URL (absolute-form and authority-form only) + + port: Destination port (absolute-form and authority-form only) + + path: Path portion of the URL (not present in authority-form) + + httpversion: HTTP version tuple, e.g. (1,1) headers: ODictCaseless object @@ -211,18 +224,6 @@ class HTTPRequest(HTTPMessage): form_out: The request form which mitmproxy has send out to the destination - method: HTTP method - - scheme: URL scheme (http/https) (absolute-form only) - - host: Host portion of the URL (absolute-form and authority-form only) - - port: Destination port (absolute-form and authority-form only) - - path: Path portion of the URL (not present in authority-form) - - httpversion: HTTP version tuple - timestamp_start: Timestamp indicating when request transmission started timestamp_end: Timestamp indicating when request transmission ended @@ -364,7 +365,7 @@ class HTTPRequest(HTTPMessage): def _assemble_head(self, form=None): return "%s\r\n%s\r\n" % (self._assemble_first_line(form), self._assemble_headers()) - def _assemble(self, form=None): + def assemble(self, form=None): """ Assembles the request for transmission to the server. We make some modifications to make sure interception works properly. @@ -417,8 +418,7 @@ class HTTPRequest(HTTPMessage): """ self.headers["Host"] = [self.host] - @property - def form_urlencoded(self): + def get_form_urlencoded(self): """ Retrieves the URL-encoded form data, returning an ODict object. Returns an empty ODict if there is no data or the content-type @@ -428,8 +428,7 @@ class HTTPRequest(HTTPMessage): return ODict(utils.urldecode(self.content)) return ODict([]) - @form_urlencoded.setter - def form_urlencoded(self, odict): + def set_form_urlencoded(self, odict): """ Sets the body to the URL-encoded form data, and adds the appropriate content-type header. Note that this will destory the @@ -440,8 +439,7 @@ class HTTPRequest(HTTPMessage): self.headers["Content-Type"] = [HDR_FORM_URLENCODED] self.content = utils.urlencode(odict.lst) - @property - def path_components(self): + def get_path_components(self): """ Returns the path components of the URL as a list of strings. @@ -450,8 +448,7 @@ class HTTPRequest(HTTPMessage): _, _, path, _, _, _ = urlparse.urlparse(self.url) return [urllib.unquote(i) for i in path.split("/") if i] - @path_components.setter - def path_components(self, lst): + def set_path_components(self, lst): """ Takes a list of strings, and sets the path component of the URL. @@ -462,8 +459,7 @@ class HTTPRequest(HTTPMessage): scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url) self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment]) - @property - def query(self): + def get_query(self): """ Gets the request query string. Returns an ODict object. """ @@ -472,8 +468,7 @@ class HTTPRequest(HTTPMessage): return ODict(utils.urldecode(query)) return ODict([]) - @query.setter - def query(self, odict): + def set_query(self, odict): """ Takes an ODict object, and sets the request query string. """ @@ -528,8 +523,7 @@ class HTTPRequest(HTTPMessage): raise ValueError("Invalid URL: %s" % url) self.scheme, self.host, self.port, self.path = parts - @property - def cookies(self): + def get_cookies(self): cookie_headers = self.headers.get("cookie") if not cookie_headers: return None @@ -560,7 +554,7 @@ class HTTPResponse(HTTPMessage): Exposes the following attributes: - flow: Flow object the request belongs to + httpversion: HTTP version tuple, e.g. (1,1) code: HTTP response code @@ -572,8 +566,6 @@ class HTTPResponse(HTTPMessage): is content associated, but not present. CONTENT_MISSING evaluates to False to make checking for the presence of content natural. - httpversion: HTTP version tuple - timestamp_start: Timestamp indicating when request transmission started timestamp_end: Timestamp indicating when request transmission ended @@ -661,7 +653,7 @@ class HTTPResponse(HTTPMessage): return '%s\r\n%s\r\n' % ( self._assemble_first_line(), self._assemble_headers(preserve_transfer_encoding=preserve_transfer_encoding)) - def _assemble(self): + def assemble(self): """ Assembles the response for transmission to the client. We make some modifications to make sure interception works properly. @@ -726,8 +718,7 @@ class HTTPResponse(HTTPMessage): if c: self.headers["set-cookie"] = c - @property - def cookies(self): + def get_cookies(self): cookie_headers = self.headers.get("set-cookie") if not cookie_headers: return None @@ -745,12 +736,14 @@ class HTTPResponse(HTTPMessage): class HTTPFlow(Flow): """ - A Flow is a collection of objects representing a single HTTP + A HTTPFlow is a collection of objects representing a single HTTP transaction. The main attributes are: request: HTTPRequest object response: HTTPResponse object error: Error object + server_conn: ServerConnection object + client_conn: ClientConnection object Note that it's possible for a Flow to have both a response and an error object. This might happen, for instance, when a response was received @@ -866,6 +859,10 @@ class HttpAuthenticationError(Exception): class HTTPHandler(ProtocolHandler): + """ + HTTPHandler implements mitmproxys understanding of the HTTP protocol. + + """ def __init__(self, c): super(HTTPHandler, self).__init__(c) self.expected_form_in = c.config.http_form_in @@ -878,7 +875,7 @@ class HTTPHandler(ProtocolHandler): def get_response_from_server(self, request, include_body=True): self.c.establish_server_connection() - request_raw = request._assemble() + request_raw = request.assemble() for i in range(2): try: @@ -957,7 +954,7 @@ class HTTPHandler(ProtocolHandler): if not flow.response.stream: # no streaming: # we already received the full response from the server and can send it to the client straight away. - self.c.client_conn.send(flow.response._assemble()) + self.c.client_conn.send(flow.response.assemble()) else: # streaming: # First send the body and then transfer the response incrementally: @@ -1225,7 +1222,7 @@ class RequestReplayThread(threading.Thread): server.establish_ssl(self.config.clientcerts, sni=r.host) r.form_out = "relative" - server.send(r._assemble()) + server.send(r.assemble()) self.flow.response = HTTPResponse.from_stream(server.rfile, r.method, body_size_limit=self.config.body_size_limit) self.channel.ask("response", self.flow) -- cgit v1.2.3 From 32e1ed212da8095530abad4ac41f10ee8a599c74 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 5 Sep 2014 19:39:05 +0200 Subject: streamline HTTPHandler.handle_flow() --- libmproxy/protocol/http.py | 191 +++++++++++++++++++++++---------------------- 1 file changed, 96 insertions(+), 95 deletions(-) (limited to 'libmproxy/protocol/http.py') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 9593c3cb..c67cb471 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -160,13 +160,6 @@ class HTTPMessage(stateobject.SimpleStateObject): c += self.headers.replace(pattern, repl, *args, **kwargs) return c - @classmethod - def from_stream(cls, rfile, include_body=True, body_size_limit=None): - """ - Parse an HTTP message from a file stream - """ - raise NotImplementedError() # pragma: nocover - def _assemble_first_line(self): """ Returns the assembled request/response line @@ -644,7 +637,8 @@ class HTTPResponse(HTTPMessage): if self.content: headers["Content-Length"] = [str(len(self.content))] - elif not preserve_transfer_encoding and 'Transfer-Encoding' in self.headers: # add content-length for chuncked transfer-encoding with no content + # add content-length for chuncked transfer-encoding with no content + elif not preserve_transfer_encoding and 'Transfer-Encoding' in self.headers: headers["Content-Length"] = ["0"] return str(headers) @@ -873,19 +867,21 @@ class HTTPHandler(ProtocolHandler): while self.handle_flow(): pass - def get_response_from_server(self, request, include_body=True): + def get_response_from_server(self, flow): self.c.establish_server_connection() - request_raw = request.assemble() + request_raw = flow.request.assemble() - for i in range(2): + for attempt in (0, 1): try: self.c.server_conn.send(request_raw) - res = HTTPResponse.from_stream(self.c.server_conn.rfile, request.method, - body_size_limit=self.c.config.body_size_limit, include_body=include_body) - return res + # Only get the headers at first... + flow.response = HTTPResponse.from_stream(self.c.server_conn.rfile, flow.request.method, + body_size_limit=self.c.config.body_size_limit, + include_body=False) + break except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v: self.c.log("error in server communication: %s" % repr(v), level="debug") - if i < 1: + if attempt == 0: # In any case, we try to reconnect at least once. # This is necessary because it might be possible that we already initiated an upstream connection # after clientconnect that has already been expired, e.g consider the following event log: @@ -899,13 +895,24 @@ class HTTPHandler(ProtocolHandler): else: raise + # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True + self.c.channel.ask("responseheaders", flow) + + # now get the rest of the request body, if body still needs to be read but not streaming this response + if flow.response.stream: + flow.response.content = CONTENT_MISSING + else: + flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, + self.c.config.body_size_limit, + flow.request.method, flow.response.code, False) + def handle_flow(self): flow = HTTPFlow(self.c.client_conn, self.c.server_conn, self.live) try: try: req = HTTPRequest.from_stream(self.c.client_conn.rfile, body_size_limit=self.c.config.body_size_limit) - except tcp.NetLibDisconnect: # specifically ignore disconnects that happen before/between requests. + except tcp.NetLibDisconnect: # don't throw an error for disconnects that happen before/between requests. return False self.c.log("request", "debug", [req._assemble_first_line(req.form_in)]) ret = self.process_request(flow, req) @@ -927,20 +934,7 @@ class HTTPHandler(ProtocolHandler): if isinstance(request_reply, HTTPResponse): flow.response = request_reply else: - - # read initially in "stream" mode, so we can get the headers separately - flow.response = self.get_response_from_server(flow.request, include_body=False) - - # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True - self.c.channel.ask("responseheaders", flow) - - # now get the rest of the request body, if body still needs to be read but not streaming this response - if flow.response.stream: - flow.response.content = CONTENT_MISSING - else: - flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, - self.c.config.body_size_limit, - flow.request.method, flow.response.code, False) + self.get_response_from_server(flow) # no further manipulation of self.c.server_conn beyond this point # we can safely set it as the final attribute value here. @@ -951,67 +945,34 @@ class HTTPHandler(ProtocolHandler): if response_reply is None or response_reply == KILL: return False - if not flow.response.stream: - # no streaming: - # we already received the full response from the server and can send it to the client straight away. - self.c.client_conn.send(flow.response.assemble()) - else: - # streaming: - # First send the body and then transfer the response incrementally: - h = flow.response._assemble_head(preserve_transfer_encoding=True) - self.c.client_conn.send(h) - for chunk in http.read_http_body_chunked(self.c.server_conn.rfile, - flow.response.headers, - self.c.config.body_size_limit, flow.request.method, - flow.response.code, False, 4096): - for part in chunk: - self.c.client_conn.wfile.write(part) - self.c.client_conn.wfile.flush() - flow.response.timestamp_end = utils.timestamp() - - flow.timestamp_end = utils.timestamp() + self.send_response_to_client(flow) - close_connection = ( - http.connection_close(flow.request.httpversion, flow.request.headers) or - http.connection_close(flow.response.httpversion, flow.response.headers) or - http.expected_http_body_size(flow.response.headers, False, flow.request.method, - flow.response.code) == -1) - if close_connection: - if flow.request.form_in == "authority" and flow.response.code == 200: - # Workaround for https://github.com/mitmproxy/mitmproxy/issues/313: - # Some proxies (e.g. Charles) send a CONNECT response with HTTP/1.0 and no Content-Length header - pass - else: - return False + if self.check_close_connection(flow): + return False # We sent a CONNECT request to an upstream proxy. if flow.request.form_in == "authority" and flow.response.code == 200: - # TODO: Eventually add headers (space/usefulness tradeoff) - # Make sure to add state info before the actual upgrade happens. - # During the upgrade, we may receive an SNI indication from the client, + # TODO: Possibly add headers (memory consumption/usefulness tradeoff) + # Make sure to add state info before the actual processing of the CONNECT request happens. + # During an SSL upgrade, we may receive an SNI indication from the client, # which resets the upstream connection. If this is the case, we must # already re-issue the CONNECT request at this point. self.c.server_conn.state.append(("http", {"state": "connect", "host": flow.request.host, "port": flow.request.port})) - - if self.c.check_ignore_address((flow.request.host, flow.request.port)): - self.c.log("Ignore host: %s:%s" % self.c.server_conn.address(), "info") - TCPHandler(self.c).handle_messages() + if not self.process_connect_request((flow.request.host, flow.request.port)): return False - else: - if flow.request.port in self.c.config.ssl_ports: - self.ssl_upgrade() - self.skip_authentication = True # If the user has changed the target server on this connection, # restore the original target server flow.live.restore_server() - flow.live = None - return True + return True # Next flow please. except (HttpAuthenticationError, http.HttpError, proxy.ProxyError, tcp.NetLibError), e: self.handle_error(e, flow) + finally: + flow.timestamp_end = utils.timestamp() + flow.live = None # Connection is not live anymore. return False def handle_server_reconnect(self, state): @@ -1060,16 +1021,6 @@ class HTTPHandler(ProtocolHandler): self.c.client_conn.wfile.write(html_content) self.c.client_conn.wfile.flush() - def ssl_upgrade(self): - """ - Upgrade the connection to SSL after an authority (CONNECT) request has been made. - """ - self.c.log("Received CONNECT request. Upgrading to SSL...", "debug") - self.expected_form_in = "relative" - self.expected_form_out = "relative" - self.c.establish_ssl(server=True, client=True) - self.c.log("Upgrade to SSL completed.", "debug") - def process_request(self, flow, request): """ @returns: @@ -1114,16 +1065,7 @@ class HTTPHandler(ProtocolHandler): ('Proxy-agent: %s\r\n' % self.c.server_version) + '\r\n' ) - - if self.c.check_ignore_address(self.c.server_conn.address): - self.c.log("Ignore host: %s:%s" % self.c.server_conn.address(), "info") - TCPHandler(self.c).handle_messages() - return False - else: - if self.c.server_conn.address.port in self.c.config.ssl_ports: - self.ssl_upgrade() - self.skip_authentication = True - return True + return self.process_connect_request(self.c.server_conn.address) else: # upstream proxy mode return None else: @@ -1140,7 +1082,6 @@ class HTTPHandler(ProtocolHandler): self.c.set_server_address((request.host, request.port)) flow.server_conn = self.c.server_conn - return None raise http.HttpError(400, "Invalid HTTP request form (expected: %s, got: %s)" % @@ -1182,6 +1123,66 @@ class HTTPHandler(ProtocolHandler): flow.server_conn = self.c.server_conn + def send_response_to_client(self, flow): + if not flow.response.stream: + # no streaming: + # we already received the full response from the server and can send it to the client straight away. + self.c.client_conn.send(flow.response.assemble()) + else: + # streaming: + # First send the body and then transfer the response incrementally: + h = flow.response._assemble_head(preserve_transfer_encoding=True) + self.c.client_conn.send(h) + for chunk in http.read_http_body_chunked(self.c.server_conn.rfile, + flow.response.headers, + self.c.config.body_size_limit, flow.request.method, + flow.response.code, False, 4096): + for part in chunk: + self.c.client_conn.wfile.write(part) + self.c.client_conn.wfile.flush() + flow.response.timestamp_end = utils.timestamp() + + def check_close_connection(self, flow): + """ + Checks if the connection should be closed depending on the HTTP semantics. Returns True, if so. + """ + close_connection = ( + http.connection_close(flow.request.httpversion, flow.request.headers) or + http.connection_close(flow.response.httpversion, flow.response.headers) or + http.expected_http_body_size(flow.response.headers, False, flow.request.method, + flow.response.code) == -1) + if close_connection: + if flow.request.form_in == "authority" and flow.response.code == 200: + # Workaround for https://github.com/mitmproxy/mitmproxy/issues/313: + # Some proxies (e.g. Charles) send a CONNECT response with HTTP/1.0 and no Content-Length header + pass + else: + return True + return False + + def process_connect_request(self, address): + """ + Process a CONNECT request. + Returns True if the CONNECT request has been processed successfully. + Returns False, if the connection should be closed immediately. + """ + address = tcp.Address.wrap(address) + if self.c.check_ignore_address(address): + self.c.log("Ignore host: %s:%s" % address(), "info") + TCPHandler(self.c).handle_messages() + return False + else: + self.expected_form_in = "relative" + self.expected_form_out = "relative" + self.skip_authentication = True + + if address.port in self.c.config.ssl_ports: + self.c.log("Received CONNECT request to SSL port. Upgrading to SSL...", "debug") + self.c.establish_ssl(server=True, client=True) + self.c.log("Upgrade to SSL completed.", "debug") + + return True + def authenticate(self, request): if self.c.config.authenticator: if self.c.config.authenticator.authenticate(request.headers): -- cgit v1.2.3