diff options
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/flowbasic | 24 | ||||
-rw-r--r-- | examples/har_dump.py | 216 | ||||
-rw-r--r-- | examples/har_extractor.py | 264 | ||||
-rw-r--r-- | examples/redirect_requests.py | 7 |
4 files changed, 230 insertions, 281 deletions
diff --git a/examples/flowbasic b/examples/flowbasic index 74af4e08..67c6f596 100755 --- a/examples/flowbasic +++ b/examples/flowbasic @@ -8,7 +8,7 @@ Note that request and response messages are not automatically replied to, so we need to implement handlers to do this. """ -from mitmproxy import flow, controller +from mitmproxy import flow, controller, options from mitmproxy.proxy import ProxyServer, ProxyConfig @@ -21,21 +21,23 @@ class MyMaster(flow.FlowMaster): @controller.handler def request(self, f): - f = flow.FlowMaster.request(self, f) - print(f) + print("request", f) @controller.handler def response(self, f): - f = flow.FlowMaster.response(self, f) - print(f) + print("response", f) + @controller.handler + def error(self, f): + print("error", f) + + @controller.handler + def log(self, l): + print("log", l.msg) -config = ProxyConfig( - port=8080, - # use ~/.mitmproxy/mitmproxy-ca.pem as default CA file. - cadir="~/.mitmproxy/" -) +opts = options.Options(cadir="~/.mitmproxy/") +config = ProxyConfig(opts) state = flow.State() server = ProxyServer(config) -m = MyMaster(server, state) +m = MyMaster(opts, server, state) m.run() diff --git a/examples/har_dump.py b/examples/har_dump.py new file mode 100644 index 00000000..95090edb --- /dev/null +++ b/examples/har_dump.py @@ -0,0 +1,216 @@ +""" +This inline script can be used to dump flows as HAR files. +""" + + +import pprint +import json +import sys +import base64 +import zlib + +from datetime import datetime +import pytz + +import mitmproxy + +from netlib import version +from netlib import strutils +from netlib.http import cookies + +HAR = {} + +# A list of server seen till now is maintained so we can avoid +# using 'connect' time for entries that use an existing connection. +SERVERS_SEEN = set() + + +def start(): + """ + Called once on script startup before any other events. + """ + if len(sys.argv) != 2: + raise ValueError( + 'Usage: -s "har_dump.py filename" ' + '(- will output to stdout, filenames ending with .zhar ' + 'will result in compressed har)' + ) + + HAR.update({ + "log": { + "version": "1.2", + "creator": { + "name": "mitmproxy har_dump", + "version": "0.1", + "comment": "mitmproxy version %s" % version.MITMPROXY + }, + "entries": [] + } + }) + + +def response(flow): + """ + Called when a server response has been received. + """ + + # -1 indicates that these values do not apply to current request + ssl_time = -1 + connect_time = -1 + + if flow.server_conn and flow.server_conn not in SERVERS_SEEN: + connect_time = (flow.server_conn.timestamp_tcp_setup - + flow.server_conn.timestamp_start) + + if flow.server_conn.timestamp_ssl_setup is not None: + ssl_time = (flow.server_conn.timestamp_ssl_setup - + flow.server_conn.timestamp_tcp_setup) + + SERVERS_SEEN.add(flow.server_conn) + + # Calculate raw timings from timestamps. DNS timings can not be calculated + # for lack of a way to measure it. The same goes for HAR blocked. + # mitmproxy will open a server connection as soon as it receives the host + # and port from the client connection. So, the time spent waiting is actually + # spent waiting between request.timestamp_end and response.timestamp_start + # thus it correlates to HAR wait instead. + timings_raw = { + 'send': flow.request.timestamp_end - flow.request.timestamp_start, + 'receive': flow.response.timestamp_end - flow.response.timestamp_start, + 'wait': flow.response.timestamp_start - flow.request.timestamp_end, + 'connect': connect_time, + 'ssl': ssl_time, + } + + # HAR timings are integers in ms, so we re-encode the raw timings to that format. + timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()]) + + # full_time is the sum of all timings. + # Timings set to -1 will be ignored as per spec. + full_time = sum(v for v in timings.values() if v > -1) + + started_date_time = format_datetime(datetime.utcfromtimestamp(flow.request.timestamp_start)) + + # Response body size and encoding + response_body_size = len(flow.response.raw_content) + response_body_decoded_size = len(flow.response.content) + response_body_compression = response_body_decoded_size - response_body_size + + entry = { + "startedDateTime": started_date_time, + "time": full_time, + "request": { + "method": flow.request.method, + "url": flow.request.url, + "httpVersion": flow.request.http_version, + "cookies": format_request_cookies(flow.request.cookies.fields), + "headers": name_value(flow.request.headers), + "queryString": name_value(flow.request.query or {}), + "headersSize": len(str(flow.request.headers)), + "bodySize": len(flow.request.content), + }, + "response": { + "status": flow.response.status_code, + "statusText": flow.response.reason, + "httpVersion": flow.response.http_version, + "cookies": format_response_cookies(flow.response.cookies.fields), + "headers": name_value(flow.response.headers), + "content": { + "size": response_body_size, + "compression": response_body_compression, + "mimeType": flow.response.headers.get('Content-Type', '') + }, + "redirectURL": flow.response.headers.get('Location', ''), + "headersSize": len(str(flow.response.headers)), + "bodySize": response_body_size, + }, + "cache": {}, + "timings": timings, + } + + # Store binay data as base64 + if strutils.is_mostly_bin(flow.response.content): + b64 = base64.b64encode(flow.response.content) + entry["response"]["content"]["text"] = b64.decode('ascii') + entry["response"]["content"]["encoding"] = "base64" + else: + entry["response"]["content"]["text"] = flow.response.text + + if flow.request.method in ["POST", "PUT", "PATCH"]: + entry["request"]["postData"] = { + "mimeType": flow.request.headers.get("Content-Type", "").split(";")[0], + "text": flow.request.content, + "params": name_value(flow.request.urlencoded_form) + } + + if flow.server_conn: + entry["serverIPAddress"] = str(flow.server_conn.ip_address.address[0]) + + HAR["log"]["entries"].append(entry) + + +def done(): + """ + Called once on script shutdown, after any other events. + """ + dump_file = sys.argv[1] + + if dump_file == '-': + mitmproxy.ctx.log(pprint.pformat(HAR)) + else: + json_dump = json.dumps(HAR, indent=2) + + if dump_file.endswith('.zhar'): + json_dump = zlib.compress(json_dump, 9) + + with open(dump_file, "w") as f: + f.write(json_dump) + + mitmproxy.ctx.log("HAR dump finished (wrote %s bytes to file)" % len(json_dump)) + + +def format_datetime(dt): + return dt.replace(tzinfo=pytz.timezone("UTC")).isoformat() + + +def format_cookies(cookie_list): + rv = [] + + for name, value, attrs in cookie_list: + cookie_har = { + "name": name, + "value": value, + } + + # HAR only needs some attributes + for key in ["path", "domain", "comment"]: + if key in attrs: + cookie_har[key] = attrs[key] + + # These keys need to be boolean! + for key in ["httpOnly", "secure"]: + cookie_har[key] = bool(key in attrs) + + # Expiration time needs to be formatted + expire_ts = cookies.get_expiration_ts(attrs) + if expire_ts is not None: + cookie_har["expires"] = format_datetime(datetime.fromtimestamp(expire_ts)) + + rv.append(cookie_har) + + return rv + + +def format_request_cookies(fields): + return format_cookies(cookies.group_cookies(fields)) + + +def format_response_cookies(fields): + return format_cookies((c[0], c[1].value, c[1].attrs) for c in fields) + + +def name_value(obj): + """ + Convert (key, value) pairs to HAR format. + """ + return [{"name": k, "value": v} for k, v in obj.items()] diff --git a/examples/har_extractor.py b/examples/har_extractor.py deleted file mode 100644 index 76059d8e..00000000 --- a/examples/har_extractor.py +++ /dev/null @@ -1,264 +0,0 @@ -""" - This inline script utilizes harparser.HAR from - https://github.com/JustusW/harparser to generate a HAR log object. -""" -import mitmproxy.ctx -import six -import sys -import pytz -from harparser import HAR - -from datetime import datetime - - -class _HARLog(HAR.log): - # The attributes need to be registered here for them to actually be - # available later via self. This is due to HAREncodable linking __getattr__ - # to __getitem__. Anything that is set only in __init__ will just be added - # as key/value pair to self.__classes__. - __page_list__ = [] - __page_count__ = 0 - __page_ref__ = {} - - def __init__(self, page_list=[]): - self.__page_list__ = page_list - self.__page_count__ = 0 - self.__page_ref__ = {} - - HAR.log.__init__(self, {"version": "1.2", - "creator": {"name": "MITMPROXY HARExtractor", - "version": "0.1", - "comment": ""}, - "pages": [], - "entries": []}) - - def reset(self): - self.__init__(self.__page_list__) - - def add(self, obj): - if isinstance(obj, HAR.pages): - self['pages'].append(obj) - if isinstance(obj, HAR.entries): - self['entries'].append(obj) - - def create_page_id(self): - self.__page_count__ += 1 - return "autopage_%s" % str(self.__page_count__) - - def set_page_ref(self, page, ref): - self.__page_ref__[page] = ref - - def get_page_ref(self, page): - return self.__page_ref__.get(page, None) - - def get_page_list(self): - return self.__page_list__ - - -class Context(object): - pass - -context = Context() - - -def start(): - """ - On start we create a HARLog instance. You will have to adapt this to - suit your actual needs of HAR generation. As it will probably be - necessary to cluster logs by IPs or reset them from time to time. - """ - if sys.version_info >= (3, 0): - raise RuntimeError( - "har_extractor.py does not work on Python 3. " - "Please check out https://github.com/mitmproxy/mitmproxy/issues/1320 " - "if you want to help making this work again." - ) - context.dump_file = None - if len(sys.argv) > 1: - context.dump_file = sys.argv[1] - else: - raise ValueError( - 'Usage: -s "har_extractor.py filename" ' - '(- will output to stdout, filenames ending with .zhar ' - 'will result in compressed har)' - ) - context.HARLog = _HARLog() - context.seen_server = set() - - -def response(flow): - """ - Called when a server response has been received. At the time of this - message both a request and a response are present and completely done. - """ - # Values are converted from float seconds to int milliseconds later. - ssl_time = -.001 - connect_time = -.001 - if flow.server_conn not in context.seen_server: - # Calculate the connect_time for this server_conn. Afterwards add it to - # seen list, in order to avoid the connect_time being present in entries - # that use an existing connection. - connect_time = (flow.server_conn.timestamp_tcp_setup - - flow.server_conn.timestamp_start) - context.seen_server.add(flow.server_conn) - - if flow.server_conn.timestamp_ssl_setup is not None: - # Get the ssl_time for this server_conn as the difference between - # the start of the successful tcp setup and the successful ssl - # setup. If no ssl setup has been made it is left as -1 since it - # doesn't apply to this connection. - ssl_time = (flow.server_conn.timestamp_ssl_setup - - flow.server_conn.timestamp_tcp_setup) - - # Calculate the raw timings from the different timestamps present in the - # request and response object. For lack of a way to measure it dns timings - # can not be calculated. The same goes for HAR blocked: MITMProxy will open - # a server connection as soon as it receives the host and port from the - # client connection. So the time spent waiting is actually spent waiting - # between request.timestamp_end and response.timestamp_start thus it - # correlates to HAR wait instead. - timings_raw = { - 'send': flow.request.timestamp_end - flow.request.timestamp_start, - 'wait': flow.response.timestamp_start - flow.request.timestamp_end, - 'receive': flow.response.timestamp_end - flow.response.timestamp_start, - 'connect': connect_time, - 'ssl': ssl_time - } - - # HAR timings are integers in ms, so we have to re-encode the raw timings to - # that format. - timings = dict([(k, int(1000 * v)) for k, v in six.iteritems(timings_raw)]) - - # The full_time is the sum of all timings. - # Timings set to -1 will be ignored as per spec. - full_time = sum(v for v in timings.values() if v > -1) - - started_date_time = datetime.utcfromtimestamp( - flow.request.timestamp_start).replace(tzinfo=pytz.timezone("UTC")).isoformat() - - request_query_string = [{"name": k, "value": v} - for k, v in flow.request.query or {}] - - response_body_size = len(flow.response.content) - response_body_decoded_size = len(flow.response.content) - response_body_compression = response_body_decoded_size - response_body_size - - entry = HAR.entries({ - "startedDateTime": started_date_time, - "time": full_time, - "request": { - "method": flow.request.method, - "url": flow.request.url, - "httpVersion": flow.request.http_version, - "cookies": format_cookies(flow.request.cookies), - "headers": format_headers(flow.request.headers), - "queryString": request_query_string, - "headersSize": len(str(flow.request.headers)), - "bodySize": len(flow.request.content), - }, - "response": { - "status": flow.response.status_code, - "statusText": flow.response.reason, - "httpVersion": flow.response.http_version, - "cookies": format_cookies(flow.response.cookies), - "headers": format_headers(flow.response.headers), - "content": { - "size": response_body_size, - "compression": response_body_compression, - "mimeType": flow.response.headers.get('Content-Type', '') - }, - "redirectURL": flow.response.headers.get('Location', ''), - "headersSize": len(str(flow.response.headers)), - "bodySize": response_body_size, - }, - "cache": {}, - "timings": timings, - }) - - # If the current url is in the page list of context.HARLog or - # does not have a referrer, we add it as a new pages object. - is_new_page = ( - flow.request.url in context.HARLog.get_page_list() or - flow.request.headers.get('Referer') is None - ) - if is_new_page: - page_id = context.HARLog.create_page_id() - context.HARLog.add( - HAR.pages({ - "startedDateTime": entry['startedDateTime'], - "id": page_id, - "title": flow.request.url, - "pageTimings": {} - }) - ) - context.HARLog.set_page_ref(flow.request.url, page_id) - entry['pageref'] = page_id - - # Lookup the referer in the page_ref of context.HARLog to point this entries - # pageref attribute to the right pages object, then set it as a new - # reference to build a reference tree. - elif context.HARLog.get_page_ref(flow.request.headers.get('Referer')) is not None: - entry['pageref'] = context.HARLog.get_page_ref( - flow.request.headers['Referer'] - ) - context.HARLog.set_page_ref( - flow.request.headers['Referer'], entry['pageref'] - ) - - context.HARLog.add(entry) - - -def done(): - """ - Called once on script shutdown, after any other events. - """ - import pprint - import json - - json_dump = context.HARLog.json() - compressed_json_dump = context.HARLog.compress() - - if context.dump_file == '-': - mitmproxy.ctx.log(pprint.pformat(json.loads(json_dump))) - elif context.dump_file.endswith('.zhar'): - with open(context.dump_file, "wb") as f: - f.write(compressed_json_dump) - else: - with open(context.dump_file, "wb") as f: - f.write(json_dump) - mitmproxy.ctx.log( - "HAR log finished with %s bytes (%s bytes compressed)" % ( - len(json_dump), len(compressed_json_dump) - ) - ) - mitmproxy.ctx.log( - "Compression rate is %s%%" % str( - 100. * len(compressed_json_dump) / len(json_dump) - ) - ) - - -def format_cookies(obj): - if obj: - return [{"name": k.strip(), "value": v[0]} for k, v in obj.items()] - return "" - - -def format_headers(obj): - if obj: - return [{"name": k, "value": v} for k, v in obj.fields] - return "" - - -def print_attributes(obj, filter_string=None, hide_privates=False): - """ - Useful helper method to quickly get all attributes of an object and its - values. - """ - for attr in dir(obj): - if hide_privates and "__" in attr: - continue - if filter_string is not None and filter_string not in attr: - continue - value = getattr(obj, attr) - print("%s.%s" % ('obj', attr), value, type(value)) diff --git a/examples/redirect_requests.py b/examples/redirect_requests.py index 36594bcd..8cde1bfd 100644 --- a/examples/redirect_requests.py +++ b/examples/redirect_requests.py @@ -2,7 +2,6 @@ This example shows two ways to redirect flows to other destinations. """ from mitmproxy.models import HTTPResponse -from netlib.http import Headers def request(flow): @@ -12,11 +11,7 @@ def request(flow): # Method 1: Answer with a locally generated response if flow.request.pretty_host.endswith("example.com"): - resp = HTTPResponse( - b"HTTP/1.1", 200, b"OK", - Headers(Content_Type="text/html"), - b"helloworld" - ) + resp = HTTPResponse.make(200, b"Hello World", {"Content-Type": "text/html"}) flow.reply.send(resp) # Method 2: Redirect the request to a different server |