From c47ddaa3a025597f8706c437f792c1ad12c388ab Mon Sep 17 00:00:00 2001 From: Brad Peabody Date: Thu, 17 Jul 2014 22:43:26 -0700 Subject: basic attempt to implement streaming response, needs testing --- libmproxy/flow.py | 7 ++++++ libmproxy/protocol/http.py | 61 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 5 deletions(-) (limited to 'libmproxy') diff --git a/libmproxy/flow.py b/libmproxy/flow.py index b6b49022..5b99427a 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -706,6 +706,13 @@ class FlowMaster(controller.Master): self.process_new_request(f) return f + def handle_responseheaders(self, r): + f = self.state.add_response(r) + if f: + self.run_script_hook("responseheaders", f) + r.reply() + return f + def handle_response(self, r): f = self.state.add_response(r) if f: diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index b7ff5b4b..f3d5f666 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -865,15 +865,16 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): pass self.c.close = True - def get_response_from_server(self, request): + def get_response_from_server(self, request, stream=False): self.c.establish_server_connection() request_raw = request._assemble() for i in range(2): try: self.c.server_conn.send(request_raw) - return HTTPResponse.from_stream(self.c.server_conn.rfile, request.method, - body_size_limit=self.c.config.body_size_limit) + res = HTTPResponse.from_stream(self.c.server_conn.rfile, request.method, + body_size_limit=self.c.config.body_size_limit, include_content=(not stream)) + return res except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v: self.c.log("error in server communication: %s" % str(v), level="debug") if i < 1: @@ -892,6 +893,8 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): def handle_flow(self): flow = HTTPFlow(self.c.client_conn, self.c.server_conn, self.change_server) + flow.stream_expecting_body = False + flow.stream = False try: req = HTTPRequest.from_stream(self.c.client_conn.rfile, body_size_limit=self.c.config.body_size_limit) @@ -915,7 +918,23 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): if isinstance(request_reply, HTTPResponse): flow.response = request_reply else: - flow.response = self.get_response_from_server(flow.request) + + # read initially in "stream" mode, so we can get the headers separately + flow.response = self.get_response_from_server(flow.request,stream=True) + + if flow.response.content == None: + flow.stream_expecting_body = True + flow.response.content = "" # set this to empty string or other things get really confused, + # flow.stream_expecting_body now contains the state info of whether or not + # body still remains to be read + + # call the appropriate script hook - this is an opportunity for + responseheaders_reply = self.c.channel.ask("responseheaders", flow.response) + # hm - do we need to do something with responseheaders_reply?? + + # now get the rest of the request body, if body still needs to be read but not streaming this response + if flow.stream_expecting_body and not flow.stream: + flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, False) flow.server_conn = self.c.server_conn # no further manipulation of self.c.server_conn beyond this point # we can safely set it as the final attribute value here. @@ -925,7 +944,39 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): if response_reply is None or response_reply == KILL: return False - self.c.client_conn.send(flow.response._assemble()) + if not flow.stream or not flow.stream_expecting_body: + # if not streaming or there is no body to be read, we'll already have the body, just send it + self.c.client_conn.send(flow.response._assemble()) + else: + + # if streaming, we still need to read the body and stream its bits back to the client + + # start with head + h = flow.response._assemble_head() + self.c.client_conn.send(h) + + # if chunked then we send back each chunk + if http.has_chunked_encoding(flow.response.headers): + while 1: + content = http.read_next_chunk(self.c.server_conn.rfile, flow.response.headers, False) + if not http.write_chunk(self.c.client_conn.rfile, content): + break + + else: # not chunked, we send back 4k at a time + clen = http.expected_http_body_size(flow.response.headers, False) + clen = clen if clen >= 0 else (64 * 1024 * 1024 * 1024) # arbitrary max of 64G if no length set + rcount = 0 + blocksize = 4096 + while 1: + bytes_to_read = min(blocksize, clen - rcount) + content = self.c.server_conn.rfile.read(bytes_to_read) + if content == "": # check for EOF + break + rcount += len(content) + self.c.client_conn.rfile.write(content) + if rcount >= clen: # check for having read up to clen + break + flow.timestamp_end = utils.timestamp() if (http.connection_close(flow.request.httpversion, flow.request.headers) or -- cgit v1.2.3 From 560e23af092ab566e75060346ebde739ac07f179 Mon Sep 17 00:00:00 2001 From: Brad Peabody Date: Sat, 19 Jul 2014 19:10:14 -0700 Subject: fixed handling of Transfer-Encoding header during streaming; wrote tests for streaming support --- libmproxy/protocol/http.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'libmproxy') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index f3d5f666..2c3c8f97 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -632,24 +632,21 @@ class HTTPResponse(HTTPMessage): return 'HTTP/%s.%s %s %s' % \ (self.httpversion[0], self.httpversion[1], self.code, self.msg) - def _assemble_headers(self): + def _assemble_headers(self, preserve_transfer_encoding=False): headers = self.headers.copy() - utils.del_all( - headers, - [ - 'Proxy-Connection', - 'Transfer-Encoding' - ] - ) + utils.del_all(headers,['Proxy-Connection']) + if not preserve_transfer_encoding: + utils.del_all(headers,['Transfer-Encoding']) + if self.content: headers["Content-Length"] = [str(len(self.content))] - elif 'Transfer-Encoding' in self.headers: # add content-length for chuncked transfer-encoding with no content + elif not preserve_transfer_encoding and 'Transfer-Encoding' in self.headers: # add content-length for chuncked transfer-encoding with no content headers["Content-Length"] = ["0"] return str(headers) - def _assemble_head(self): - return '%s\r\n%s\r\n' % (self._assemble_first_line(), self._assemble_headers()) + def _assemble_head(self, preserve_transfer_encoding=False): + return '%s\r\n%s\r\n' % (self._assemble_first_line(), self._assemble_headers(preserve_transfer_encoding=preserve_transfer_encoding)) def _assemble(self): """ @@ -928,7 +925,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): # flow.stream_expecting_body now contains the state info of whether or not # body still remains to be read - # call the appropriate script hook - this is an opportunity for + # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True responseheaders_reply = self.c.channel.ask("responseheaders", flow.response) # hm - do we need to do something with responseheaders_reply?? @@ -944,6 +941,8 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): if response_reply is None or response_reply == KILL: return False + disconnected_while_streaming = False + if not flow.stream or not flow.stream_expecting_body: # if not streaming or there is no body to be read, we'll already have the body, just send it self.c.client_conn.send(flow.response._assemble()) @@ -952,15 +951,17 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): # if streaming, we still need to read the body and stream its bits back to the client # start with head - h = flow.response._assemble_head() + h = flow.response._assemble_head(preserve_transfer_encoding=True) self.c.client_conn.send(h) # if chunked then we send back each chunk if http.has_chunked_encoding(flow.response.headers): while 1: content = http.read_next_chunk(self.c.server_conn.rfile, flow.response.headers, False) - if not http.write_chunk(self.c.client_conn.rfile, content): + if not http.write_chunk(self.c.client_conn.wfile, content): break + self.c.client_conn.wfile.flush() + self.c.client_conn.wfile.flush() else: # not chunked, we send back 4k at a time clen = http.expected_http_body_size(flow.response.headers, False) @@ -969,17 +970,21 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): blocksize = 4096 while 1: bytes_to_read = min(blocksize, clen - rcount) + if bytes_to_read == 0: + break content = self.c.server_conn.rfile.read(bytes_to_read) if content == "": # check for EOF + disconnected_while_streaming = True break rcount += len(content) - self.c.client_conn.rfile.write(content) + self.c.client_conn.wfile.write(content) + self.c.client_conn.wfile.flush() if rcount >= clen: # check for having read up to clen break flow.timestamp_end = utils.timestamp() - if (http.connection_close(flow.request.httpversion, flow.request.headers) or + if (disconnected_while_streaming or http.connection_close(flow.request.httpversion, flow.request.headers) or http.connection_close(flow.response.httpversion, flow.response.headers)): return False @@ -989,6 +994,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): # If the user has changed the target server on this connection, # restore the original target server self.restore_server() + return True except (HttpAuthenticationError, http.HttpError, proxy.ProxyError, tcp.NetLibError), e: self.handle_error(e, flow) -- cgit v1.2.3 From 7398db80db004546070139c0c7e79bba4f92b318 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 20 Jul 2014 11:17:53 +0200 Subject: simplify responseheader scripthook --- libmproxy/flow.py | 8 +++----- libmproxy/protocol/http.py | 20 ++++++-------------- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'libmproxy') diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 5b99427a..55097756 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -706,11 +706,9 @@ class FlowMaster(controller.Master): self.process_new_request(f) return f - def handle_responseheaders(self, r): - f = self.state.add_response(r) - if f: - self.run_script_hook("responseheaders", f) - r.reply() + def handle_responseheaders(self, f): + self.run_script_hook("responseheaders", f) + f.reply() return f def handle_response(self, r): diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 242443ec..711cb06c 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -870,7 +870,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): try: self.c.server_conn.send(request_raw) res = HTTPResponse.from_stream(self.c.server_conn.rfile, request.method, - body_size_limit=self.c.config.body_size_limit, include_content=(not stream)) + body_size_limit=self.c.config.body_size_limit, include_body=(not stream)) return res except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v: self.c.log("error in server communication: %s" % str(v), level="debug") @@ -890,8 +890,6 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): def handle_flow(self): flow = HTTPFlow(self.c.client_conn, self.c.server_conn, self.change_server) - flow.stream_expecting_body = False - flow.stream = False try: req = HTTPRequest.from_stream(self.c.client_conn.rfile, body_size_limit=self.c.config.body_size_limit) @@ -917,20 +915,14 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): else: # read initially in "stream" mode, so we can get the headers separately - flow.response = self.get_response_from_server(flow.request,stream=True) - - if flow.response.content == None: - flow.stream_expecting_body = True - flow.response.content = "" # set this to empty string or other things get really confused, - # flow.stream_expecting_body now contains the state info of whether or not - # body still remains to be read + flow.response = self.get_response_from_server(flow.request, stream=True) + flow.response.stream = False # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True - responseheaders_reply = self.c.channel.ask("responseheaders", flow.response) - # hm - do we need to do something with responseheaders_reply?? + self.c.channel.ask("responseheaders", flow) # now get the rest of the request body, if body still needs to be read but not streaming this response - if flow.stream_expecting_body and not flow.stream: + if not flow.response.stream and flow.response.content is None: flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, False) flow.server_conn = self.c.server_conn # no further manipulation of self.c.server_conn beyond this point @@ -943,7 +935,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): disconnected_while_streaming = False - if not flow.stream or not flow.stream_expecting_body: + if flow.response.content is not None: # if not streaming or there is no body to be read, we'll already have the body, just send it self.c.client_conn.send(flow.response._assemble()) else: -- cgit v1.2.3 From 562ac9e721c33b05e8889d4932dede794a9746a8 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 21 Jul 2014 14:09:24 +0200 Subject: unify stream handling --- libmproxy/protocol/http.py | 65 +++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) (limited to 'libmproxy') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 711cb06c..31dd39f5 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -293,7 +293,8 @@ class HTTPRequest(HTTPMessage): raise http.HttpError(400, "Invalid headers") if include_body: - content = http.read_http_body(rfile, headers, body_size_limit, True) + content = http.read_http_body(rfile, headers, body_size_limit, + method, None, True) timestamp_end = utils.timestamp() return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, @@ -305,7 +306,7 @@ class HTTPRequest(HTTPMessage): if form == "relative": path = self.path if self.method != "OPTIONS" else "*" request_line = '%s %s HTTP/%s.%s' % \ - (self.method, path, self.httpversion[0], self.httpversion[1]) + (self.method, path, self.httpversion[0], self.httpversion[1]) elif form == "authority": request_line = '%s %s:%s HTTP/%s.%s' % (self.method, self.host, self.port, self.httpversion[0], self.httpversion[1]) @@ -634,9 +635,9 @@ class HTTPResponse(HTTPMessage): def _assemble_headers(self, preserve_transfer_encoding=False): headers = self.headers.copy() - utils.del_all(headers,['Proxy-Connection']) + utils.del_all(headers, ['Proxy-Connection']) if not preserve_transfer_encoding: - utils.del_all(headers,['Transfer-Encoding']) + utils.del_all(headers, ['Transfer-Encoding']) if self.content: headers["Content-Length"] = [str(len(self.content))] @@ -646,7 +647,8 @@ class HTTPResponse(HTTPMessage): return str(headers) def _assemble_head(self, preserve_transfer_encoding=False): - return '%s\r\n%s\r\n' % (self._assemble_first_line(), self._assemble_headers(preserve_transfer_encoding=preserve_transfer_encoding)) + return '%s\r\n%s\r\n' % ( + self._assemble_first_line(), self._assemble_headers(preserve_transfer_encoding=preserve_transfer_encoding)) def _assemble(self): """ @@ -862,7 +864,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): pass self.c.close = True - def get_response_from_server(self, request, stream=False): + def get_response_from_server(self, request, include_body=True): self.c.establish_server_connection() request_raw = request._assemble() @@ -870,7 +872,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): try: self.c.server_conn.send(request_raw) res = HTTPResponse.from_stream(self.c.server_conn.rfile, request.method, - body_size_limit=self.c.config.body_size_limit, include_body=(not stream)) + body_size_limit=self.c.config.body_size_limit, include_body=include_body) return res except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v: self.c.log("error in server communication: %s" % str(v), level="debug") @@ -915,7 +917,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): else: # read initially in "stream" mode, so we can get the headers separately - flow.response = self.get_response_from_server(flow.request, stream=True) + flow.response = self.get_response_from_server(flow.request, include_body=False) flow.response.stream = False # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True @@ -923,7 +925,9 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): # now get the rest of the request body, if body still needs to be read but not streaming this response if not flow.response.stream and flow.response.content is None: - flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, False) + flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, + self.c.config.body_size_limit, + flow.request.method, flow.response.code, False) flow.server_conn = self.c.server_conn # no further manipulation of self.c.server_conn beyond this point # we can safely set it as the final attribute value here. @@ -933,8 +937,6 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): if response_reply is None or response_reply == KILL: return False - disconnected_while_streaming = False - if flow.response.content is not None: # if not streaming or there is no body to be read, we'll already have the body, just send it self.c.client_conn.send(flow.response._assemble()) @@ -946,38 +948,19 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): h = flow.response._assemble_head(preserve_transfer_encoding=True) self.c.client_conn.send(h) - # if chunked then we send back each chunk - if http.has_chunked_encoding(flow.response.headers): - while 1: - content = http.read_next_chunk(self.c.server_conn.rfile, flow.response.headers, False) - if not http.write_chunk(self.c.client_conn.wfile, content): - break - self.c.client_conn.wfile.flush() + for chunk in http.read_http_body_chunked(self.c.server_conn.rfile, + flow.response.headers, + self.c.config.body_size_limit, "GET", 200, False, 4096): + for part in chunk: + self.c.client_conn.wfile.write(part) self.c.client_conn.wfile.flush() - else: # not chunked, we send back 4k at a time - clen = http.expected_http_body_size(flow.response.headers, False) - clen = clen if clen >= 0 else (64 * 1024 * 1024 * 1024) # arbitrary max of 64G if no length set - rcount = 0 - blocksize = 4096 - while 1: - bytes_to_read = min(blocksize, clen - rcount) - if bytes_to_read == 0: - break - content = self.c.server_conn.rfile.read(bytes_to_read) - if content == "": # check for EOF - disconnected_while_streaming = True - break - rcount += len(content) - self.c.client_conn.wfile.write(content) - self.c.client_conn.wfile.flush() - if rcount >= clen: # check for having read up to clen - break - flow.timestamp_end = utils.timestamp() - if (disconnected_while_streaming or http.connection_close(flow.request.httpversion, flow.request.headers) or - http.connection_close(flow.response.httpversion, flow.response.headers)): + if (http.connection_close(flow.request.httpversion, flow.request.headers) or + http.connection_close(flow.response.httpversion, flow.response.headers) or + http.expected_http_body_size(flow.response.headers, False, flow.request.method, + flow.response.code) == -1): return False if flow.request.form_in == "authority": @@ -1009,7 +992,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): if flow.request and not flow.response: self.c.channel.ask("error", flow.error) else: - pass # FIXME: Do we want to persist errors without flows? + pass # FIXME: Do we want to persist errors without flows? try: self.send_error(code, message, headers) @@ -1109,7 +1092,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): return True raise http.HttpError(400, "Invalid HTTP request form (expected: %s, got: %s)" % - (self.expected_form_in, request.form_in)) + (self.expected_form_in, request.form_in)) def authenticate(self, request): if self.c.config.authenticator: -- cgit v1.2.3 From 4b4a18a2e4d7cf3e8862192b68f5a2295da9acbe Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 21 Jul 2014 21:06:55 +0200 Subject: add --stream options, various fixes --- libmproxy/cmdline.py | 102 +++++++++++++++++++++--------------------- libmproxy/console/__init__.py | 3 ++ libmproxy/dump.py | 14 +++++- libmproxy/flow.py | 22 +++++++++ libmproxy/protocol/http.py | 19 ++++---- 5 files changed, 98 insertions(+), 62 deletions(-) (limited to 'libmproxy') diff --git a/libmproxy/cmdline.py b/libmproxy/cmdline.py index e407cd10..c9c0c75e 100644 --- a/libmproxy/cmdline.py +++ b/libmproxy/cmdline.py @@ -3,7 +3,7 @@ import re import argparse from argparse import ArgumentTypeError from netlib import http -from . import proxy, filt +from . import filt, utils from .proxy import config APP_HOST = "mitm.it" @@ -23,13 +23,13 @@ def _parse_hook(s): elif len(parts) == 3: patt, a, b = parts else: - raise ParseException("Malformed hook specifier - too few clauses: %s"%s) + raise ParseException("Malformed hook specifier - too few clauses: %s" % s) if not a: - raise ParseException("Empty clause: %s"%str(patt)) + raise ParseException("Empty clause: %s" % str(patt)) if not filt.parse(patt): - raise ParseException("Malformed filter pattern: %s"%patt) + raise ParseException("Malformed filter pattern: %s" % patt) return patt, a, b @@ -64,7 +64,7 @@ def parse_replace_hook(s): try: re.compile(regex) except re.error, e: - raise ParseException("Malformed replacement regex: %s"%str(e.message)) + raise ParseException("Malformed replacement regex: %s" % str(e.message)) return patt, regex, replacement @@ -98,7 +98,6 @@ def parse_setheader(s): def parse_server_spec(url): - normalized_url = re.sub("^https?2", "", url) p = http.parse_url(normalized_url) @@ -125,6 +124,8 @@ def get_common_options(options): if options.stickyauth_filt: stickyauth = options.stickyauth_filt + stream_large_bodies = utils.parse_size(options.stream_large_bodies) + reps = [] for i in options.replace: try: @@ -140,10 +141,9 @@ def get_common_options(options): try: v = open(path, "rb").read() except IOError, e: - raise ArgumentTypeError("Could not read replace file: %s"%path) + raise ArgumentTypeError("Could not read replace file: %s" % path) reps.append((patt, rex, v)) - setheaders = [] for i in options.setheader: try: @@ -153,29 +153,30 @@ def get_common_options(options): setheaders.append(p) return dict( - app = options.app, - app_host = options.app_host, - app_port = options.app_port, - app_external = options.app_external, - - anticache = options.anticache, - anticomp = options.anticomp, - client_replay = options.client_replay, - kill = options.kill, - no_server = options.no_server, - refresh_server_playback = not options.norefresh, - rheaders = options.rheaders, - rfile = options.rfile, - replacements = reps, - setheaders = setheaders, - server_replay = options.server_replay, - scripts = options.scripts, - stickycookie = stickycookie, - stickyauth = stickyauth, - showhost = options.showhost, - wfile = options.wfile, - verbosity = options.verbose, - nopop = options.nopop, + app=options.app, + app_host=options.app_host, + app_port=options.app_port, + app_external=options.app_external, + + anticache=options.anticache, + anticomp=options.anticomp, + client_replay=options.client_replay, + kill=options.kill, + no_server=options.no_server, + refresh_server_playback=not options.norefresh, + rheaders=options.rheaders, + rfile=options.rfile, + replacements=reps, + setheaders=setheaders, + server_replay=options.server_replay, + scripts=options.scripts, + stickycookie=stickycookie, + stickyauth=stickyauth, + stream_large_bodies=stream_large_bodies, + showhost=options.showhost, + wfile=options.wfile, + verbosity=options.verbose, + nopop=options.nopop, ) @@ -187,8 +188,8 @@ def common_options(parser): ) parser.add_argument( "--confdir", - action="store", type = str, dest="confdir", default='~/.mitmproxy', - help = "Configuration directory. (~/.mitmproxy)" + action="store", type=str, dest="confdir", default='~/.mitmproxy', + help="Configuration directory. (~/.mitmproxy)" ) parser.add_argument( "--host", @@ -240,10 +241,16 @@ def common_options(parser): "-Z", action="store", dest="body_size_limit", default=None, metavar="SIZE", - help="Byte size limit of HTTP request and response bodies."\ + help="Byte size limit of HTTP request and response bodies." \ " Understands k/m/g suffixes, i.e. 3m for 3 megabytes." ) - + parser.add_argument( + "--stream", + action="store", dest="stream_large_bodies", default=None, + metavar="SIZE", + help="Stream data to the client if response body exceeds the given threshold. " + "If streamed, the body will not be stored in any way. Understands k/m/g suffixes, i.e. 3m for 3 megabytes." + ) group = parser.add_argument_group("Proxy Options") # We could make a mutually exclusive group out of -R, -U, -T, but we don't do that because @@ -251,8 +258,8 @@ def common_options(parser): # - our own error messages are more helpful group.add_argument( "-b", - action="store", type = str, dest="addr", default='', - help = "Address to bind proxy to (defaults to all interfaces)" + action="store", type=str, dest="addr", default='', + help="Address to bind proxy to (defaults to all interfaces)" ) group.add_argument( "-U", @@ -266,8 +273,8 @@ def common_options(parser): ) group.add_argument( "-p", - action="store", type = int, dest="port", default=8080, - help = "Proxy service port." + action="store", type=int, dest="port", default=8080, + help="Proxy service port." ) group.add_argument( "-R", @@ -280,7 +287,6 @@ def common_options(parser): help="Set transparent proxy mode." ) - group = parser.add_argument_group( "Advanced Proxy Options", """ @@ -304,7 +310,6 @@ def common_options(parser): help="Override the destination server all requests are sent to: http[s][2http[s]]://host[:port]" ) - group = parser.add_argument_group("Web App") group.add_argument( "-a", @@ -315,7 +320,7 @@ def common_options(parser): "--app-host", action="store", dest="app_host", default=APP_HOST, metavar="host", help="Domain to serve the app from. For transparent mode, use an IP when\ - a DNS entry for the app domain is not present. Default: %s"%APP_HOST + a DNS entry for the app domain is not present. Default: %s" % APP_HOST ) group.add_argument( @@ -329,7 +334,6 @@ def common_options(parser): help="Serve the app outside of the proxy." ) - group = parser.add_argument_group("Client Replay") group.add_argument( "-c", @@ -352,22 +356,21 @@ def common_options(parser): "--rheader", action="append", dest="rheaders", type=str, help="Request headers to be considered during replay. " - "Can be passed multiple times." + "Can be passed multiple times." ) group.add_argument( "--norefresh", action="store_true", dest="norefresh", default=False, - help= "Disable response refresh, " - "which updates times in cookies and headers for replayed responses." + help="Disable response refresh, " + "which updates times in cookies and headers for replayed responses." ) group.add_argument( "--no-pop", action="store_true", dest="nopop", default=False, help="Disable response pop from response flow. " - "This makes it possible to replay same response multiple times." + "This makes it possible to replay same response multiple times." ) - group = parser.add_argument_group( "Replacements", """ @@ -389,7 +392,6 @@ def common_options(parser): help="Replacement pattern, where the replacement clause is a path to a file." ) - group = parser.add_argument_group( "Set Headers", """ @@ -405,7 +407,6 @@ def common_options(parser): help="Header set pattern." ) - group = parser.add_argument_group( "Proxy Authentication", """ @@ -434,5 +435,4 @@ def common_options(parser): help="Allow access to users specified in an Apache htpasswd file." ) - config.ssl_option_group(parser) diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py index e660f312..073e0882 100644 --- a/libmproxy/console/__init__.py +++ b/libmproxy/console/__init__.py @@ -343,6 +343,7 @@ class Options(object): "server_replay", "stickycookie", "stickyauth", + "stream_large_bodies", "verbosity", "wfile", "nopop", @@ -391,6 +392,8 @@ class ConsoleMaster(flow.FlowMaster): print >> sys.stderr, "Sticky auth error:", r sys.exit(1) + self.set_stream_large_bodies(options.stream_large_bodies) + self.refresh_server_playback = options.refresh_server_playback self.anticache = options.anticache self.anticomp = options.anticomp diff --git a/libmproxy/dump.py b/libmproxy/dump.py index 1f107241..aeb34cc3 100644 --- a/libmproxy/dump.py +++ b/libmproxy/dump.py @@ -2,6 +2,7 @@ from __future__ import absolute_import import sys, os import netlib.utils from . import flow, filt, utils +from .protocol import http class DumpError(Exception): pass @@ -30,6 +31,7 @@ class Options(object): "showhost", "stickycookie", "stickyauth", + "stream_large_bodies", "verbosity", "wfile", ] @@ -69,6 +71,8 @@ class DumpMaster(flow.FlowMaster): self.showhost = options.showhost self.refresh_server_playback = options.refresh_server_playback + self.set_stream_large_bodies(options.stream_large_bodies) + if filtstr: self.filt = filt.parse(filtstr) else: @@ -80,6 +84,7 @@ class DumpMaster(flow.FlowMaster): if options.stickyauth: self.set_stickyauth(options.stickyauth) + if options.wfile: path = os.path.expanduser(options.wfile) try: @@ -157,12 +162,17 @@ class DumpMaster(flow.FlowMaster): if f.response: if self.o.flow_detail > 0: - sz = utils.pretty_size(len(f.response.content)) + if f.response.content == http.CONTENT_MISSING: + sz = "(content missing)" + else: + sz = utils.pretty_size(len(f.response.content)) result = " << %s %s"%(str_response(f.response), sz) if self.o.flow_detail > 1: result = result + "\n\n" + self.indent(4, f.response.headers) if self.o.flow_detail > 2: - if utils.isBin(f.response.content): + if f.response.content == http.CONTENT_MISSING: + cont = self.indent(4, "(content missing)") + elif utils.isBin(f.response.content): d = netlib.utils.hexdump(f.response.content) d = "\n".join("%s\t%s %s"%i for i in d) cont = self.indent(4, d) diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 55097756..6b751bc9 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -145,6 +145,17 @@ class SetHeaders: f.request.headers.add(header, value) +class StreamLargeBodies(object): + def __init__(self, max_size): + self.max_size = max_size + + def run(self, flow, is_request): + r = flow.request if is_request else flow.response + code = flow.response.code if flow.response else None + expected_size = netlib.http.expected_http_body_size(r.headers, is_request, flow.request.method, code) + if not (0 <= expected_size <= self.max_size): + r.stream = True + class ClientPlaybackState: def __init__(self, flows, exit): self.flows, self.exit = flows, exit @@ -437,6 +448,7 @@ class FlowMaster(controller.Master): self.anticache = False self.anticomp = False + self.stream_large_bodies = False self.refresh_server_playback = False self.replacehooks = ReplaceHooks() self.setheaders = SetHeaders() @@ -522,6 +534,12 @@ class FlowMaster(controller.Master): self.stickycookie_state = None self.stickycookie_txt = None + def set_stream_large_bodies(self, max_size): + if max_size is not None: + self.stream_large_bodies = StreamLargeBodies(max_size) + else: + self.stream_large_bodies = False + def set_stickyauth(self, txt): if txt: flt = filt.parse(txt) @@ -708,6 +726,10 @@ class FlowMaster(controller.Master): def handle_responseheaders(self, f): self.run_script_hook("responseheaders", f) + + if self.stream_large_bodies: + self.stream_large_bodies.run(f, False) + f.reply() return f diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 31dd39f5..4648c7cf 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -924,7 +924,9 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): self.c.channel.ask("responseheaders", flow) # now get the rest of the request body, if body still needs to be read but not streaming this response - if not flow.response.stream and flow.response.content is None: + if flow.response.stream: + flow.response.content = CONTENT_MISSING + else: flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, flow.request.method, flow.response.code, False) @@ -937,20 +939,19 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): if response_reply is None or response_reply == KILL: return False - if flow.response.content is not None: - # if not streaming or there is no body to be read, we'll already have the body, just send it + if not flow.response.stream: + # no streaming: + # we already received the full response from the server and can send it to the client straight away. self.c.client_conn.send(flow.response._assemble()) else: - - # if streaming, we still need to read the body and stream its bits back to the client - - # start with head + # streaming: + # First send the body and then transfer the response incrementally: h = flow.response._assemble_head(preserve_transfer_encoding=True) self.c.client_conn.send(h) - for chunk in http.read_http_body_chunked(self.c.server_conn.rfile, flow.response.headers, - self.c.config.body_size_limit, "GET", 200, False, 4096): + self.c.config.body_size_limit, flow.request.method, + flow.response.code, False, 4096): for part in chunk: self.c.client_conn.wfile.write(part) self.c.client_conn.wfile.flush() -- cgit v1.2.3 From d9ac029ec7d18e5c1a483c7141ba86ad185874b0 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Wed, 23 Jul 2014 00:21:33 +0200 Subject: always initialize HTTPResponse.stream attribute --- libmproxy/protocol/http.py | 2 +- libmproxy/proxy/server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'libmproxy') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 4648c7cf..8b9cb448 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -592,6 +592,7 @@ class HTTPResponse(HTTPMessage): # Is this request replayed? self.is_replay = False + self.stream = False _stateobject_attributes = HTTPMessage._stateobject_attributes.copy() _stateobject_attributes.update( @@ -918,7 +919,6 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): # read initially in "stream" mode, so we can get the headers separately flow.response = self.get_response_from_server(flow.request, include_body=False) - flow.response.stream = False # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True self.c.channel.ask("responseheaders", flow) diff --git a/libmproxy/proxy/server.py b/libmproxy/proxy/server.py index 741e5f93..734e2798 100644 --- a/libmproxy/proxy/server.py +++ b/libmproxy/proxy/server.py @@ -95,14 +95,14 @@ class ConnectionHandler: except (ProxyError, tcp.NetLibError), e: handle_error(self.conntype, self, e) - except Exception, e: + except Exception: import traceback, sys self.log(traceback.format_exc(), "error") print >> sys.stderr, traceback.format_exc() print >> sys.stderr, "mitmproxy has crashed!" print >> sys.stderr, "Please lodge a bug report at: https://github.com/mitmproxy/mitmproxy" - raise e + raise self.del_server_connection() self.log("clientdisconnect", "info") -- cgit v1.2.3 From 5bb4e37dfda567adb8ae2bc1e98552dd6f87e6fe Mon Sep 17 00:00:00 2001 From: Brad Peabody Date: Thu, 24 Jul 2014 18:46:39 -0700 Subject: fix to make it so streaming with mitmproxy doesn't explode due to no reply on response --- libmproxy/console/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'libmproxy') diff --git a/libmproxy/console/common.py b/libmproxy/console/common.py index a8440f79..c59d52a0 100644 --- a/libmproxy/console/common.py +++ b/libmproxy/console/common.py @@ -197,7 +197,7 @@ def format_flow(f, focus, extended=False, hostheader=False, padding=2): d.update(dict( resp_code = f.response.code, resp_is_replay = f.response.is_replay, - resp_acked = f.response.reply.acked, + resp_acked = hasattr(f.response, "reply") and f.response.reply.acked, resp_clen = contentdesc, resp_rate = "{0}/s".format(rate), )) -- cgit v1.2.3 From 4382829b7d9fde4358e12f7c9f195c7d7c854ff1 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 25 Jul 2014 18:47:48 +0200 Subject: workaround: always make sure that flow.response.reply exists --- libmproxy/console/common.py | 2 +- libmproxy/console/flowview.py | 2 +- libmproxy/controller.py | 2 +- libmproxy/flow.py | 5 +++-- libmproxy/protocol/http.py | 5 +++-- 5 files changed, 9 insertions(+), 7 deletions(-) (limited to 'libmproxy') diff --git a/libmproxy/console/common.py b/libmproxy/console/common.py index c59d52a0..a8440f79 100644 --- a/libmproxy/console/common.py +++ b/libmproxy/console/common.py @@ -197,7 +197,7 @@ def format_flow(f, focus, extended=False, hostheader=False, padding=2): d.update(dict( resp_code = f.response.code, resp_is_replay = f.response.is_replay, - resp_acked = hasattr(f.response, "reply") and f.response.reply.acked, + resp_acked = f.response.reply.acked, resp_clen = contentdesc, resp_rate = "{0}/s".format(rate), )) diff --git a/libmproxy/console/flowview.py b/libmproxy/console/flowview.py index ac10e809..4aaf8944 100644 --- a/libmproxy/console/flowview.py +++ b/libmproxy/console/flowview.py @@ -153,7 +153,7 @@ class FlowView(common.WWrap): def cont_view_handle_missing(self, conn, viewmode): if conn.content == CONTENT_MISSING: - msg, body = "", [urwid.Text([("error", "[content missing]")])], 0 + msg, body = "", [urwid.Text([("error", "[content missing]")])] else: msg, body = self.content_view(viewmode, conn) diff --git a/libmproxy/controller.py b/libmproxy/controller.py index 63e44241..4a72cf80 100644 --- a/libmproxy/controller.py +++ b/libmproxy/controller.py @@ -49,7 +49,7 @@ class Channel: try: # The timeout is here so we can handle a should_exit event. g = m.reply.q.get(timeout=0.5) - except Queue.Empty: # pragma: nocover + except Queue.Empty: # pragma: nocover continue return g diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 6b751bc9..fce4cd91 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -724,13 +724,14 @@ class FlowMaster(controller.Master): self.process_new_request(f) return f - def handle_responseheaders(self, f): + def handle_responseheaders(self, resp): + f = resp.flow self.run_script_hook("responseheaders", f) if self.stream_large_bodies: self.stream_large_bodies.run(f, False) - f.reply() + resp.reply() return f def handle_response(self, r): diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 8b9cb448..cc6533b2 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -921,7 +921,7 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): flow.response = self.get_response_from_server(flow.request, include_body=False) # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True - self.c.channel.ask("responseheaders", flow) + self.c.channel.ask("responseheaders", flow.response) # now get the rest of the request body, if body still needs to be read but not streaming this response if flow.response.stream: @@ -931,8 +931,9 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): self.c.config.body_size_limit, flow.request.method, flow.response.code, False) - flow.server_conn = self.c.server_conn # no further manipulation of self.c.server_conn beyond this point + # no further manipulation of self.c.server_conn beyond this point # we can safely set it as the final attribute value here. + flow.server_conn = self.c.server_conn self.c.log("response", "debug", [flow.response._assemble_first_line()]) response_reply = self.c.channel.ask("response", flow.response) -- cgit v1.2.3 From 5a808ca2a7db9649eac69f8e4eb50e5ef24197fa Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 25 Jul 2014 18:55:01 +0200 Subject: add status bar indicator for streaming --- libmproxy/console/__init__.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'libmproxy') diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py index 073e0882..3d025739 100644 --- a/libmproxy/console/__init__.py +++ b/libmproxy/console/__init__.py @@ -165,6 +165,8 @@ class StatusBar(common.WWrap): opts.append("no-upstream-cert") if self.master.state.follow_focus: opts.append("following") + if self.master.stream_large_bodies: + opts.append("stream:%s" % utils.pretty_size(self.master.stream_large_bodies.max_size)) if opts: r.append("[%s]"%(":".join(opts))) -- cgit v1.2.3