From 5a68d21e8c87e05f2ad0c18e6c7c505f5e9fc93d Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Wed, 19 Oct 2016 15:08:35 +1300 Subject: Remove flow module entirely, move contents to top level mitmproxy.flow.io -> mitmproxy.io mitmproxy.flow.export -> mitmproxy.export --- examples/flowbasic | 5 +- examples/flowwriter.py | 5 +- examples/read_dumpfile | 2 +- mitmproxy/addons/clientplayback.py | 7 +- mitmproxy/addons/filestreamer.py | 2 +- mitmproxy/addons/serverplayback.py | 6 +- mitmproxy/console/common.py | 2 +- mitmproxy/console/flowlist.py | 2 +- mitmproxy/console/flowview.py | 2 +- mitmproxy/console/master.py | 6 +- mitmproxy/dump.py | 4 +- mitmproxy/export.py | 200 +++++++++++++++++++++++++++++ mitmproxy/flow/__init__.py | 7 - mitmproxy/flow/export.py | 200 ----------------------------- mitmproxy/flow/io.py | 72 ----------- mitmproxy/flow/io_compat.py | 136 -------------------- mitmproxy/io.py | 72 +++++++++++ mitmproxy/io_compat.py | 136 ++++++++++++++++++++ mitmproxy/master.py | 2 +- mitmproxy/web/app.py | 6 +- test/mitmproxy/addons/test_filestreamer.py | 2 +- test/mitmproxy/mastertest.py | 4 +- test/mitmproxy/test_dump.py | 7 +- test/mitmproxy/test_flow.py | 19 +-- test/mitmproxy/test_flow_export.py | 2 +- test/mitmproxy/test_flow_format_compat.py | 10 +- 26 files changed, 458 insertions(+), 460 deletions(-) create mode 100644 mitmproxy/export.py delete mode 100644 mitmproxy/flow/__init__.py delete mode 100644 mitmproxy/flow/export.py delete mode 100644 mitmproxy/flow/io.py delete mode 100644 mitmproxy/flow/io_compat.py create mode 100644 mitmproxy/io.py create mode 100644 mitmproxy/io_compat.py diff --git a/examples/flowbasic b/examples/flowbasic index bac98916..cb1e4ea4 100755 --- a/examples/flowbasic +++ b/examples/flowbasic @@ -8,7 +8,7 @@ Note that request and response messages are not automatically replied to, so we need to implement handlers to do this. """ -from mitmproxy import flow, controller, options +from mitmproxy import controller, options, master from mitmproxy.proxy import ProxyServer, ProxyConfig @@ -37,7 +37,6 @@ class MyMaster(master.Master): opts = options.Options(cadir="~/.mitmproxy/") config = ProxyConfig(opts) -state = state.State() server = ProxyServer(config) -m = MyMaster(opts, server, state) +m = MyMaster(opts, server) m.run() diff --git a/examples/flowwriter.py b/examples/flowwriter.py index df2e5a40..afce85aa 100644 --- a/examples/flowwriter.py +++ b/examples/flowwriter.py @@ -1,7 +1,6 @@ import random import sys - -from mitmproxy.flow import FlowWriter +from mimtproxy import io class Writer: @@ -10,7 +9,7 @@ class Writer: f = sys.stdout else: f = open(path, "wb") - self.w = FlowWriter(f) + self.w = io.FlowWriter(f) def response(self, flow): if random.choice([True, False]): diff --git a/examples/read_dumpfile b/examples/read_dumpfile index b1001c3d..e0e9064a 100644 --- a/examples/read_dumpfile +++ b/examples/read_dumpfile @@ -9,7 +9,7 @@ import pprint import sys with open(sys.argv[1], "rb") as logfile: - freader = flow.FlowReader(logfile) + freader = io.FlowReader(logfile) pp = pprint.PrettyPrinter(indent=4) try: for f in freader.stream(): diff --git a/mitmproxy/addons/clientplayback.py b/mitmproxy/addons/clientplayback.py index 75ef2ffd..848d07c3 100644 --- a/mitmproxy/addons/clientplayback.py +++ b/mitmproxy/addons/clientplayback.py @@ -1,4 +1,7 @@ -from mitmproxy import exceptions, flow, ctx +from mitmproxy import exceptions +from mitmproxy import ctx +from mitmproxy import io + class ClientPlayback: @@ -21,7 +24,7 @@ class ClientPlayback: if options.client_replay: ctx.log.info(options.client_replay) try: - flows = flow.read_flows_from_paths(options.client_replay) + flows = io.read_flows_from_paths(options.client_replay) except exceptions.FlowReadException as e: raise exceptions.OptionsError(str(e)) self.load(flows) diff --git a/mitmproxy/addons/filestreamer.py b/mitmproxy/addons/filestreamer.py index b1643b21..552d936f 100644 --- a/mitmproxy/addons/filestreamer.py +++ b/mitmproxy/addons/filestreamer.py @@ -2,7 +2,7 @@ import os.path from mitmproxy import exceptions from mitmproxy import flowfilter -from mitmproxy.flow import io +from mitmproxy import io class FileStreamer: diff --git a/mitmproxy/addons/serverplayback.py b/mitmproxy/addons/serverplayback.py index 383e2754..1161ce23 100644 --- a/mitmproxy/addons/serverplayback.py +++ b/mitmproxy/addons/serverplayback.py @@ -2,7 +2,9 @@ import urllib import hashlib from netlib import strutils -from mitmproxy import exceptions, flow, ctx +from mitmproxy import exceptions +from mitmproxy import ctx +from mitmproxy import io class ServerPlayback: @@ -91,7 +93,7 @@ class ServerPlayback: self.clear() if options.server_replay: try: - flows = flow.read_flows_from_paths(options.server_replay) + flows = io.read_flows_from_paths(options.server_replay) except exceptions.FlowReadException as e: raise exceptions.OptionsError(str(e)) self.load(flows) diff --git a/mitmproxy/console/common.py b/mitmproxy/console/common.py index 5e335c71..91253668 100644 --- a/mitmproxy/console/common.py +++ b/mitmproxy/console/common.py @@ -9,7 +9,7 @@ import urwid.util import netlib from mitmproxy import utils from mitmproxy.console import signals -from mitmproxy.flow import export +from mitmproxy import export from netlib import human try: diff --git a/mitmproxy/console/flowlist.py b/mitmproxy/console/flowlist.py index ba555647..09a5d027 100644 --- a/mitmproxy/console/flowlist.py +++ b/mitmproxy/console/flowlist.py @@ -4,7 +4,7 @@ import netlib.http.url from mitmproxy import exceptions from mitmproxy.console import common from mitmproxy.console import signals -from mitmproxy.flow import export +from mitmproxy import export def _mkhelp(): diff --git a/mitmproxy/console/flowview.py b/mitmproxy/console/flowview.py index b8f91bdb..19afcdbc 100644 --- a/mitmproxy/console/flowview.py +++ b/mitmproxy/console/flowview.py @@ -15,7 +15,7 @@ from mitmproxy.console import grideditor from mitmproxy.console import searchable from mitmproxy.console import signals from mitmproxy.console import tabs -from mitmproxy.flow import export +from mitmproxy import export from netlib.http import Headers from netlib.http import status_codes diff --git a/mitmproxy/console/master.py b/mitmproxy/console/master.py index 46dd8254..4921373f 100644 --- a/mitmproxy/console/master.py +++ b/mitmproxy/console/master.py @@ -19,7 +19,7 @@ from mitmproxy import contentviews from mitmproxy import controller from mitmproxy import exceptions from mitmproxy import master -from mitmproxy import flow +from mitmproxy import io from mitmproxy import flowfilter from mitmproxy import utils from mitmproxy.addons import state @@ -346,7 +346,7 @@ class ConsoleMaster(master.Master): - a list of flows, otherwise. """ try: - return flow.read_flows_from_paths(path) + return io.read_flows_from_paths(path) except exceptions.FlowReadException as e: signals.status_message.send(message=str(e)) @@ -580,7 +580,7 @@ class ConsoleMaster(master.Master): path = os.path.expanduser(path) try: f = open(path, "wb") - fw = flow.FlowWriter(f) + fw = io.FlowWriter(f) for i in flows: fw.add(i) f.close() diff --git a/mitmproxy/dump.py b/mitmproxy/dump.py index e8b3126f..47f69303 100644 --- a/mitmproxy/dump.py +++ b/mitmproxy/dump.py @@ -3,8 +3,8 @@ from typing import Optional from mitmproxy import controller from mitmproxy import exceptions -from mitmproxy import flow from mitmproxy import addons +from mitmproxy import io from mitmproxy import options from mitmproxy import master from mitmproxy.addons import dumper, termlog @@ -68,7 +68,7 @@ class DumpMaster(master.Master): or raises a DumpError if that fails. """ try: - return flow.read_flows_from_paths(paths) + return io.read_flows_from_paths(paths) except exceptions.FlowReadException as e: raise DumpError(str(e)) diff --git a/mitmproxy/export.py b/mitmproxy/export.py new file mode 100644 index 00000000..e5f4d34a --- /dev/null +++ b/mitmproxy/export.py @@ -0,0 +1,200 @@ +import json +import re +import textwrap +import urllib + +import netlib.http + + +def _native(s): + if isinstance(s, bytes): + return s.decode() + return s + + +def dictstr(items, indent): + lines = [] + for k, v in items: + lines.append(indent + "%s: %s,\n" % (repr(_native(k)), repr(_native(v)))) + return "{\n%s}\n" % "".join(lines) + + +def curl_command(flow): + data = "curl " + + request = flow.request.copy() + request.decode(strict=False) + + for k, v in request.headers.items(multi=True): + data += "-H '%s:%s' " % (k, v) + + if request.method != "GET": + data += "-X %s " % request.method + + full_url = request.scheme + "://" + request.host + request.path + data += "'%s'" % full_url + + if request.content: + data += " --data-binary '%s'" % _native(request.content) + + return data + + +def python_code(flow): + code = textwrap.dedent(""" + import requests + + url = '{url}' + {headers}{params}{data} + response = requests.request( + method='{method}', + url=url,{args} + ) + + print(response.text) + """).strip() + + components = [urllib.parse.quote(c, safe="") for c in flow.request.path_components] + url = flow.request.scheme + "://" + flow.request.host + "/" + "/".join(components) + + args = "" + headers = "" + if flow.request.headers: + headers += "\nheaders = %s\n" % dictstr(flow.request.headers.fields, " ") + args += "\n headers=headers," + + params = "" + if flow.request.query: + params = "\nparams = %s\n" % dictstr(flow.request.query.collect(), " ") + args += "\n params=params," + + data = "" + if flow.request.body: + json_obj = is_json(flow.request.headers, flow.request.content) + if json_obj: + data = "\njson = %s\n" % dictstr(sorted(json_obj.items()), " ") + args += "\n json=json," + else: + data = "\ndata = '''%s'''\n" % _native(flow.request.content) + args += "\n data=data," + + code = code.format( + url=url, + headers=headers, + params=params, + data=data, + method=flow.request.method, + args=args, + ) + return code + + +def is_json(headers: netlib.http.Headers, content: bytes) -> bool: + if headers: + ct = netlib.http.parse_content_type(headers.get("content-type", "")) + if ct and "%s/%s" % (ct[0], ct[1]) == "application/json": + try: + return json.loads(content.decode("utf8", "surrogateescape")) + except ValueError: + return False + return False + + +def locust_code(flow): + code = textwrap.dedent(""" + from locust import HttpLocust, TaskSet, task + + class UserBehavior(TaskSet): + def on_start(self): + ''' on_start is called when a Locust start before any task is scheduled ''' + self.{name}() + + @task() + def {name}(self): + url = '{url}' + {headers}{params}{data} + self.response = self.client.request( + method='{method}', + url=url,{args} + ) + + ### Additional tasks can go here ### + + + class WebsiteUser(HttpLocust): + task_set = UserBehavior + min_wait = 1000 + max_wait = 3000 +""").strip() + + components = [urllib.parse.quote(c, safe="") for c in flow.request.path_components] + name = re.sub('\W|^(?=\d)', '_', "_".join(components)) + if name == "" or name is None: + new_name = "_".join([str(flow.request.host), str(flow.request.timestamp_start)]) + name = re.sub('\W|^(?=\d)', '_', new_name) + + url = flow.request.scheme + "://" + flow.request.host + "/" + "/".join(components) + + args = "" + headers = "" + if flow.request.headers: + lines = [ + (_native(k), _native(v)) for k, v in flow.request.headers.fields + if _native(k).lower() not in ["host", "cookie"] + ] + lines = [" '%s': '%s',\n" % (k, v) for k, v in lines] + headers += "\n headers = {\n%s }\n" % "".join(lines) + args += "\n headers=headers," + + params = "" + if flow.request.query: + lines = [" %s: %s,\n" % (repr(k), repr(v)) for k, v in flow.request.query.collect()] + params = "\n params = {\n%s }\n" % "".join(lines) + args += "\n params=params," + + data = "" + if flow.request.content: + data = "\n data = '''%s'''\n" % _native(flow.request.content) + args += "\n data=data," + + code = code.format( + name=name, + url=url, + headers=headers, + params=params, + data=data, + method=flow.request.method, + args=args, + ) + + host = flow.request.scheme + "://" + flow.request.host + code = code.replace(host, "' + self.locust.host + '") + code = code.replace(urllib.parse.quote_plus(host), "' + quote_plus(self.locust.host) + '") + code = code.replace(urllib.parse.quote(host), "' + quote(self.locust.host) + '") + code = code.replace("'' + ", "") + + return code + + +def locust_task(flow): + code = locust_code(flow) + start_task = len(code.split('@task')[0]) - 4 + end_task = -19 - len(code.split('### Additional')[1]) + task_code = code[start_task:end_task] + + return task_code + + +def url(flow): + return flow.request.url + + +EXPORTERS = [ + ("content", "c", None), + ("headers+content", "h", None), + ("url", "u", url), + ("as curl command", "r", curl_command), + ("as python code", "p", python_code), + ("as locust code", "l", locust_code), + ("as locust task", "t", locust_task), +] diff --git a/mitmproxy/flow/__init__.py b/mitmproxy/flow/__init__.py deleted file mode 100644 index 69e3ed08..00000000 --- a/mitmproxy/flow/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from mitmproxy.flow import export -from mitmproxy.flow.io import FlowWriter, FilteredFlowWriter, FlowReader, read_flows_from_paths - -__all__ = [ - "export", - "FlowWriter", "FilteredFlowWriter", "FlowReader", "read_flows_from_paths", -] diff --git a/mitmproxy/flow/export.py b/mitmproxy/flow/export.py deleted file mode 100644 index e5f4d34a..00000000 --- a/mitmproxy/flow/export.py +++ /dev/null @@ -1,200 +0,0 @@ -import json -import re -import textwrap -import urllib - -import netlib.http - - -def _native(s): - if isinstance(s, bytes): - return s.decode() - return s - - -def dictstr(items, indent): - lines = [] - for k, v in items: - lines.append(indent + "%s: %s,\n" % (repr(_native(k)), repr(_native(v)))) - return "{\n%s}\n" % "".join(lines) - - -def curl_command(flow): - data = "curl " - - request = flow.request.copy() - request.decode(strict=False) - - for k, v in request.headers.items(multi=True): - data += "-H '%s:%s' " % (k, v) - - if request.method != "GET": - data += "-X %s " % request.method - - full_url = request.scheme + "://" + request.host + request.path - data += "'%s'" % full_url - - if request.content: - data += " --data-binary '%s'" % _native(request.content) - - return data - - -def python_code(flow): - code = textwrap.dedent(""" - import requests - - url = '{url}' - {headers}{params}{data} - response = requests.request( - method='{method}', - url=url,{args} - ) - - print(response.text) - """).strip() - - components = [urllib.parse.quote(c, safe="") for c in flow.request.path_components] - url = flow.request.scheme + "://" + flow.request.host + "/" + "/".join(components) - - args = "" - headers = "" - if flow.request.headers: - headers += "\nheaders = %s\n" % dictstr(flow.request.headers.fields, " ") - args += "\n headers=headers," - - params = "" - if flow.request.query: - params = "\nparams = %s\n" % dictstr(flow.request.query.collect(), " ") - args += "\n params=params," - - data = "" - if flow.request.body: - json_obj = is_json(flow.request.headers, flow.request.content) - if json_obj: - data = "\njson = %s\n" % dictstr(sorted(json_obj.items()), " ") - args += "\n json=json," - else: - data = "\ndata = '''%s'''\n" % _native(flow.request.content) - args += "\n data=data," - - code = code.format( - url=url, - headers=headers, - params=params, - data=data, - method=flow.request.method, - args=args, - ) - return code - - -def is_json(headers: netlib.http.Headers, content: bytes) -> bool: - if headers: - ct = netlib.http.parse_content_type(headers.get("content-type", "")) - if ct and "%s/%s" % (ct[0], ct[1]) == "application/json": - try: - return json.loads(content.decode("utf8", "surrogateescape")) - except ValueError: - return False - return False - - -def locust_code(flow): - code = textwrap.dedent(""" - from locust import HttpLocust, TaskSet, task - - class UserBehavior(TaskSet): - def on_start(self): - ''' on_start is called when a Locust start before any task is scheduled ''' - self.{name}() - - @task() - def {name}(self): - url = '{url}' - {headers}{params}{data} - self.response = self.client.request( - method='{method}', - url=url,{args} - ) - - ### Additional tasks can go here ### - - - class WebsiteUser(HttpLocust): - task_set = UserBehavior - min_wait = 1000 - max_wait = 3000 -""").strip() - - components = [urllib.parse.quote(c, safe="") for c in flow.request.path_components] - name = re.sub('\W|^(?=\d)', '_', "_".join(components)) - if name == "" or name is None: - new_name = "_".join([str(flow.request.host), str(flow.request.timestamp_start)]) - name = re.sub('\W|^(?=\d)', '_', new_name) - - url = flow.request.scheme + "://" + flow.request.host + "/" + "/".join(components) - - args = "" - headers = "" - if flow.request.headers: - lines = [ - (_native(k), _native(v)) for k, v in flow.request.headers.fields - if _native(k).lower() not in ["host", "cookie"] - ] - lines = [" '%s': '%s',\n" % (k, v) for k, v in lines] - headers += "\n headers = {\n%s }\n" % "".join(lines) - args += "\n headers=headers," - - params = "" - if flow.request.query: - lines = [" %s: %s,\n" % (repr(k), repr(v)) for k, v in flow.request.query.collect()] - params = "\n params = {\n%s }\n" % "".join(lines) - args += "\n params=params," - - data = "" - if flow.request.content: - data = "\n data = '''%s'''\n" % _native(flow.request.content) - args += "\n data=data," - - code = code.format( - name=name, - url=url, - headers=headers, - params=params, - data=data, - method=flow.request.method, - args=args, - ) - - host = flow.request.scheme + "://" + flow.request.host - code = code.replace(host, "' + self.locust.host + '") - code = code.replace(urllib.parse.quote_plus(host), "' + quote_plus(self.locust.host) + '") - code = code.replace(urllib.parse.quote(host), "' + quote(self.locust.host) + '") - code = code.replace("'' + ", "") - - return code - - -def locust_task(flow): - code = locust_code(flow) - start_task = len(code.split('@task')[0]) - 4 - end_task = -19 - len(code.split('### Additional')[1]) - task_code = code[start_task:end_task] - - return task_code - - -def url(flow): - return flow.request.url - - -EXPORTERS = [ - ("content", "c", None), - ("headers+content", "h", None), - ("url", "u", url), - ("as curl command", "r", curl_command), - ("as python code", "p", python_code), - ("as locust code", "l", locust_code), - ("as locust task", "t", locust_task), -] diff --git a/mitmproxy/flow/io.py b/mitmproxy/flow/io.py deleted file mode 100644 index e2a6472c..00000000 --- a/mitmproxy/flow/io.py +++ /dev/null @@ -1,72 +0,0 @@ -import os - -from mitmproxy import exceptions -from mitmproxy import flowfilter -from mitmproxy import models -from mitmproxy.contrib import tnetstring -from mitmproxy.flow import io_compat - - -class FlowWriter: - def __init__(self, fo): - self.fo = fo - - def add(self, flow): - d = flow.get_state() - tnetstring.dump(d, self.fo) - - -class FlowReader: - def __init__(self, fo): - self.fo = fo - - def stream(self): - """ - Yields Flow objects from the dump. - """ - try: - while True: - data = tnetstring.load(self.fo) - try: - data = io_compat.migrate_flow(data) - except ValueError as e: - raise exceptions.FlowReadException(str(e)) - if data["type"] not in models.FLOW_TYPES: - raise exceptions.FlowReadException("Unknown flow type: {}".format(data["type"])) - yield models.FLOW_TYPES[data["type"]].from_state(data) - except ValueError as e: - if str(e) == "not a tnetstring: empty file": - return # Error is due to EOF - raise exceptions.FlowReadException("Invalid data format.") - - -class FilteredFlowWriter: - def __init__(self, fo, flt): - self.fo = fo - self.flt = flt - - def add(self, flow): - if self.flt and not flowfilter.match(self.flt, flow): - return - d = flow.get_state() - tnetstring.dump(d, self.fo) - - -def read_flows_from_paths(paths): - """ - Given a list of filepaths, read all flows and return a list of them. - From a performance perspective, streaming would be advisable - - however, if there's an error with one of the files, we want it to be raised immediately. - - Raises: - FlowReadException, if any error occurs. - """ - try: - flows = [] - for path in paths: - path = os.path.expanduser(path) - with open(path, "rb") as f: - flows.extend(FlowReader(f).stream()) - except IOError as e: - raise exceptions.FlowReadException(e.strerror) - return flows diff --git a/mitmproxy/flow/io_compat.py b/mitmproxy/flow/io_compat.py deleted file mode 100644 index fc190e2f..00000000 --- a/mitmproxy/flow/io_compat.py +++ /dev/null @@ -1,136 +0,0 @@ -""" -This module handles the import of mitmproxy flows generated by old versions. -""" - -from typing import Any - -from netlib import version, strutils - - -def convert_011_012(data): - data[b"version"] = (0, 12) - return data - - -def convert_012_013(data): - data[b"version"] = (0, 13) - return data - - -def convert_013_014(data): - data[b"request"][b"first_line_format"] = data[b"request"].pop(b"form_in") - data[b"request"][b"http_version"] = b"HTTP/" + ".".join( - str(x) for x in data[b"request"].pop(b"httpversion")).encode() - data[b"response"][b"http_version"] = b"HTTP/" + ".".join( - str(x) for x in data[b"response"].pop(b"httpversion")).encode() - data[b"response"][b"status_code"] = data[b"response"].pop(b"code") - data[b"response"][b"body"] = data[b"response"].pop(b"content") - data[b"server_conn"].pop(b"state") - data[b"server_conn"][b"via"] = None - data[b"version"] = (0, 14) - return data - - -def convert_014_015(data): - data[b"version"] = (0, 15) - return data - - -def convert_015_016(data): - for m in (b"request", b"response"): - if b"body" in data[m]: - data[m][b"content"] = data[m].pop(b"body") - if b"msg" in data[b"response"]: - data[b"response"][b"reason"] = data[b"response"].pop(b"msg") - data[b"request"].pop(b"form_out", None) - data[b"version"] = (0, 16) - return data - - -def convert_016_017(data): - data[b"server_conn"][b"peer_address"] = None - data[b"version"] = (0, 17) - return data - - -def convert_017_018(data): - # convert_unicode needs to be called for every dual release and the first py3-only release - data = convert_unicode(data) - - data["server_conn"]["ip_address"] = data["server_conn"].pop("peer_address") - data["marked"] = False - data["version"] = (0, 18) - return data - - -def convert_018_019(data): - data["version"] = (0, 19) - return data - - -def _convert_dict_keys(o: Any) -> Any: - if isinstance(o, dict): - return {strutils.native(k): _convert_dict_keys(v) for k, v in o.items()} - else: - return o - - -def _convert_dict_vals(o: dict, values_to_convert: dict) -> dict: - for k, v in values_to_convert.items(): - if not o or k not in o: - continue - if v is True: - o[k] = strutils.native(o[k]) - else: - _convert_dict_vals(o[k], v) - return o - - -def convert_unicode(data: dict) -> dict: - """ - This method converts between Python 3 and Python 2 dumpfiles. - """ - data = _convert_dict_keys(data) - data = _convert_dict_vals( - data, { - "type": True, - "id": True, - "request": { - "first_line_format": True - }, - "error": { - "msg": True - } - } - ) - return data - - -converters = { - (0, 11): convert_011_012, - (0, 12): convert_012_013, - (0, 13): convert_013_014, - (0, 14): convert_014_015, - (0, 15): convert_015_016, - (0, 16): convert_016_017, - (0, 17): convert_017_018, - (0, 18): convert_018_019, -} - - -def migrate_flow(flow_data): - while True: - flow_version = tuple(flow_data.get(b"version", flow_data.get("version"))) - if flow_version[:2] == version.IVERSION[:2]: - break - elif flow_version[:2] in converters: - flow_data = converters[flow_version[:2]](flow_data) - else: - v = ".".join(str(i) for i in flow_version) - raise ValueError( - "{} cannot read files serialized with version {}.".format(version.MITMPROXY, v) - ) - # TODO: This should finally be moved in the converter for the first py3-only release. - # It's here so that a py2 0.18 dump can be read by py3 0.18 and vice versa. - flow_data = convert_unicode(flow_data) - return flow_data diff --git a/mitmproxy/io.py b/mitmproxy/io.py new file mode 100644 index 00000000..c1b7168b --- /dev/null +++ b/mitmproxy/io.py @@ -0,0 +1,72 @@ +import os + +from mitmproxy import exceptions +from mitmproxy import flowfilter +from mitmproxy import models +from mitmproxy.contrib import tnetstring +from mitmproxy import io_compat + + +class FlowWriter: + def __init__(self, fo): + self.fo = fo + + def add(self, flow): + d = flow.get_state() + tnetstring.dump(d, self.fo) + + +class FlowReader: + def __init__(self, fo): + self.fo = fo + + def stream(self): + """ + Yields Flow objects from the dump. + """ + try: + while True: + data = tnetstring.load(self.fo) + try: + data = io_compat.migrate_flow(data) + except ValueError as e: + raise exceptions.FlowReadException(str(e)) + if data["type"] not in models.FLOW_TYPES: + raise exceptions.FlowReadException("Unknown flow type: {}".format(data["type"])) + yield models.FLOW_TYPES[data["type"]].from_state(data) + except ValueError as e: + if str(e) == "not a tnetstring: empty file": + return # Error is due to EOF + raise exceptions.FlowReadException("Invalid data format.") + + +class FilteredFlowWriter: + def __init__(self, fo, flt): + self.fo = fo + self.flt = flt + + def add(self, flow): + if self.flt and not flowfilter.match(self.flt, flow): + return + d = flow.get_state() + tnetstring.dump(d, self.fo) + + +def read_flows_from_paths(paths): + """ + Given a list of filepaths, read all flows and return a list of them. + From a performance perspective, streaming would be advisable - + however, if there's an error with one of the files, we want it to be raised immediately. + + Raises: + FlowReadException, if any error occurs. + """ + try: + flows = [] + for path in paths: + path = os.path.expanduser(path) + with open(path, "rb") as f: + flows.extend(FlowReader(f).stream()) + except IOError as e: + raise exceptions.FlowReadException(e.strerror) + return flows diff --git a/mitmproxy/io_compat.py b/mitmproxy/io_compat.py new file mode 100644 index 00000000..fc190e2f --- /dev/null +++ b/mitmproxy/io_compat.py @@ -0,0 +1,136 @@ +""" +This module handles the import of mitmproxy flows generated by old versions. +""" + +from typing import Any + +from netlib import version, strutils + + +def convert_011_012(data): + data[b"version"] = (0, 12) + return data + + +def convert_012_013(data): + data[b"version"] = (0, 13) + return data + + +def convert_013_014(data): + data[b"request"][b"first_line_format"] = data[b"request"].pop(b"form_in") + data[b"request"][b"http_version"] = b"HTTP/" + ".".join( + str(x) for x in data[b"request"].pop(b"httpversion")).encode() + data[b"response"][b"http_version"] = b"HTTP/" + ".".join( + str(x) for x in data[b"response"].pop(b"httpversion")).encode() + data[b"response"][b"status_code"] = data[b"response"].pop(b"code") + data[b"response"][b"body"] = data[b"response"].pop(b"content") + data[b"server_conn"].pop(b"state") + data[b"server_conn"][b"via"] = None + data[b"version"] = (0, 14) + return data + + +def convert_014_015(data): + data[b"version"] = (0, 15) + return data + + +def convert_015_016(data): + for m in (b"request", b"response"): + if b"body" in data[m]: + data[m][b"content"] = data[m].pop(b"body") + if b"msg" in data[b"response"]: + data[b"response"][b"reason"] = data[b"response"].pop(b"msg") + data[b"request"].pop(b"form_out", None) + data[b"version"] = (0, 16) + return data + + +def convert_016_017(data): + data[b"server_conn"][b"peer_address"] = None + data[b"version"] = (0, 17) + return data + + +def convert_017_018(data): + # convert_unicode needs to be called for every dual release and the first py3-only release + data = convert_unicode(data) + + data["server_conn"]["ip_address"] = data["server_conn"].pop("peer_address") + data["marked"] = False + data["version"] = (0, 18) + return data + + +def convert_018_019(data): + data["version"] = (0, 19) + return data + + +def _convert_dict_keys(o: Any) -> Any: + if isinstance(o, dict): + return {strutils.native(k): _convert_dict_keys(v) for k, v in o.items()} + else: + return o + + +def _convert_dict_vals(o: dict, values_to_convert: dict) -> dict: + for k, v in values_to_convert.items(): + if not o or k not in o: + continue + if v is True: + o[k] = strutils.native(o[k]) + else: + _convert_dict_vals(o[k], v) + return o + + +def convert_unicode(data: dict) -> dict: + """ + This method converts between Python 3 and Python 2 dumpfiles. + """ + data = _convert_dict_keys(data) + data = _convert_dict_vals( + data, { + "type": True, + "id": True, + "request": { + "first_line_format": True + }, + "error": { + "msg": True + } + } + ) + return data + + +converters = { + (0, 11): convert_011_012, + (0, 12): convert_012_013, + (0, 13): convert_013_014, + (0, 14): convert_014_015, + (0, 15): convert_015_016, + (0, 16): convert_016_017, + (0, 17): convert_017_018, + (0, 18): convert_018_019, +} + + +def migrate_flow(flow_data): + while True: + flow_version = tuple(flow_data.get(b"version", flow_data.get("version"))) + if flow_version[:2] == version.IVERSION[:2]: + break + elif flow_version[:2] in converters: + flow_data = converters[flow_version[:2]](flow_data) + else: + v = ".".join(str(i) for i in flow_version) + raise ValueError( + "{} cannot read files serialized with version {}.".format(version.MITMPROXY, v) + ) + # TODO: This should finally be moved in the converter for the first py3-only release. + # It's here so that a py2 0.18 dump can be read by py3 0.18 and vice versa. + flow_data = convert_unicode(flow_data) + return flow_data diff --git a/mitmproxy/master.py b/mitmproxy/master.py index aa9892e5..dd98c04c 100644 --- a/mitmproxy/master.py +++ b/mitmproxy/master.py @@ -11,7 +11,7 @@ from mitmproxy import events from mitmproxy import exceptions from mitmproxy import models from mitmproxy import log -from mitmproxy.flow import io +from mitmproxy import io from mitmproxy.protocol import http_replay from netlib import basethread from netlib import http diff --git a/mitmproxy/web/app.py b/mitmproxy/web/app.py index a81d04be..c45b32a9 100644 --- a/mitmproxy/web/app.py +++ b/mitmproxy/web/app.py @@ -9,11 +9,11 @@ import hashlib import tornado.websocket import tornado.web from io import BytesIO -from mitmproxy.flow import FlowWriter, FlowReader from mitmproxy import flowfilter from mitmproxy import models from mitmproxy import contentviews +from mitmproxy import io from netlib import version @@ -193,7 +193,7 @@ class DumpFlows(RequestHandler): self.set_header("Content-Type", "application/octet-stream") bio = BytesIO() - fw = FlowWriter(bio) + fw = io.FlowWriter(bio) for f in self.state.flows: fw.add(f) @@ -205,7 +205,7 @@ class DumpFlows(RequestHandler): content = self.request.files.values()[0][0].body bio = BytesIO(content) - self.state.load_flows(FlowReader(bio).stream()) + self.state.load_flows(io.FlowReader(bio).stream()) bio.close() diff --git a/test/mitmproxy/addons/test_filestreamer.py b/test/mitmproxy/addons/test_filestreamer.py index a85fcd0c..64208f84 100644 --- a/test/mitmproxy/addons/test_filestreamer.py +++ b/test/mitmproxy/addons/test_filestreamer.py @@ -4,7 +4,7 @@ import os.path from mitmproxy.addons import filestreamer from mitmproxy import master -from mitmproxy.flow import io +from mitmproxy import io from mitmproxy import options from mitmproxy import proxy diff --git a/test/mitmproxy/mastertest.py b/test/mitmproxy/mastertest.py index 915f9501..f82eec40 100644 --- a/test/mitmproxy/mastertest.py +++ b/test/mitmproxy/mastertest.py @@ -4,7 +4,7 @@ from . import tutils import netlib.tutils from mitmproxy import master -from mitmproxy import flow, proxy, models, options +from mitmproxy import io, proxy, models, options class TestMaster: @@ -33,7 +33,7 @@ class MasterTest: def flowfile(self, path): f = open(path, "wb") - fw = flow.FlowWriter(f) + fw = io.FlowWriter(f) t = tutils.tflow(resp=True) fw.add(t) f.close() diff --git a/test/mitmproxy/test_dump.py b/test/mitmproxy/test_dump.py index a81fbd27..c062a068 100644 --- a/test/mitmproxy/test_dump.py +++ b/test/mitmproxy/test_dump.py @@ -1,7 +1,8 @@ import os import io -from mitmproxy import dump, flow, exceptions, proxy +import mitmproxy.io +from mitmproxy import dump, exceptions, proxy from . import tutils, mastertest @@ -127,7 +128,7 @@ class TestDumpMaster(mastertest.MasterTest): self.dummy_cycle( self.mkmaster(None, outfile=(p, "wb"), verbosity=0), 1, b"" ) - assert len(list(flow.FlowReader(open(p, "rb")).stream())) == 1 + assert len(list(mitmproxy.io.FlowReader(open(p, "rb")).stream())) == 1 def test_write_append(self): with tutils.tmpdir() as d: @@ -140,7 +141,7 @@ class TestDumpMaster(mastertest.MasterTest): self.mkmaster(None, outfile=(p, "ab"), verbosity=0), 1, b"" ) - assert len(list(flow.FlowReader(open(p, "rb")).stream())) == 2 + assert len(list(mitmproxy.io.FlowReader(open(p, "rb")).stream())) == 2 def test_write_err(self): tutils.raises( diff --git a/test/mitmproxy/test_flow.py b/test/mitmproxy/test_flow.py index 86cd7d16..d33d395b 100644 --- a/test/mitmproxy/test_flow.py +++ b/test/mitmproxy/test_flow.py @@ -3,7 +3,8 @@ import io import netlib.utils from netlib.http import Headers -from mitmproxy import flowfilter, flow, options +import mitmproxy.io +from mitmproxy import flowfilter, options from mitmproxy.addons import state from mitmproxy.contrib import tnetstring from mitmproxy.exceptions import FlowReadException, Kill @@ -329,7 +330,7 @@ class TestSerialize: def _treader(self): sio = io.BytesIO() - w = flow.FlowWriter(sio) + w = mitmproxy.io.FlowWriter(sio) for i in range(3): f = tutils.tflow(resp=True) w.add(f) @@ -342,18 +343,18 @@ class TestSerialize: w.add(f) sio.seek(0) - return flow.FlowReader(sio) + return mitmproxy.io.FlowReader(sio) def test_roundtrip(self): sio = io.BytesIO() f = tutils.tflow() f.marked = True f.request.content = bytes(bytearray(range(256))) - w = flow.FlowWriter(sio) + w = mitmproxy.io.FlowWriter(sio) w.add(f) sio.seek(0) - r = flow.FlowReader(sio) + r = mitmproxy.io.FlowReader(sio) l = list(r.stream()) assert len(l) == 1 @@ -386,7 +387,7 @@ class TestSerialize: def test_filter(self): sio = io.BytesIO() flt = flowfilter.parse("~c 200") - w = flow.FilteredFlowWriter(sio, flt) + w = mitmproxy.io.FilteredFlowWriter(sio, flt) f = tutils.tflow(resp=True) f.response.status_code = 200 @@ -397,14 +398,14 @@ class TestSerialize: w.add(f) sio.seek(0) - r = flow.FlowReader(sio) + r = mitmproxy.io.FlowReader(sio) assert len(list(r.stream())) def test_error(self): sio = io.BytesIO() sio.write(b"bogus") sio.seek(0) - r = flow.FlowReader(sio) + r = mitmproxy.io.FlowReader(sio) tutils.raises(FlowReadException, list, r.stream()) f = FlowReadException("foo") @@ -418,7 +419,7 @@ class TestSerialize: tnetstring.dump(d, sio) sio.seek(0) - r = flow.FlowReader(sio) + r = mitmproxy.io.FlowReader(sio) tutils.raises("version", list, r.stream()) diff --git a/test/mitmproxy/test_flow_export.py b/test/mitmproxy/test_flow_export.py index 920f538b..aafd5a1c 100644 --- a/test/mitmproxy/test_flow_export.py +++ b/test/mitmproxy/test_flow_export.py @@ -2,7 +2,7 @@ import re import netlib.tutils from netlib.http import Headers -from mitmproxy.flow import export # heh +from mitmproxy import export # heh from . import tutils diff --git a/test/mitmproxy/test_flow_format_compat.py b/test/mitmproxy/test_flow_format_compat.py index cc80db81..4997aff9 100644 --- a/test/mitmproxy/test_flow_format_compat.py +++ b/test/mitmproxy/test_flow_format_compat.py @@ -1,11 +1,11 @@ -from mitmproxy.flow import FlowReader -from mitmproxy.exceptions import FlowReadException +from mitmproxy import io +from mitmproxy import exceptions from . import tutils def test_load(): with open(tutils.test_data.path("data/dumpfile-011"), "rb") as f: - flow_reader = FlowReader(f) + flow_reader = io.FlowReader(f) flows = list(flow_reader.stream()) assert len(flows) == 1 assert flows[0].request.url == "https://example.com/" @@ -13,6 +13,6 @@ def test_load(): def test_cannot_convert(): with open(tutils.test_data.path("data/dumpfile-010"), "rb") as f: - flow_reader = FlowReader(f) - with tutils.raises(FlowReadException): + flow_reader = io.FlowReader(f) + with tutils.raises(exceptions.FlowReadException): list(flow_reader.stream()) -- cgit v1.2.3