aboutsummaryrefslogtreecommitdiffstats
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/flowbasic24
-rw-r--r--examples/har_dump.py216
-rw-r--r--examples/har_extractor.py264
-rw-r--r--examples/redirect_requests.py7
4 files changed, 230 insertions, 281 deletions
diff --git a/examples/flowbasic b/examples/flowbasic
index 74af4e08..67c6f596 100755
--- a/examples/flowbasic
+++ b/examples/flowbasic
@@ -8,7 +8,7 @@
Note that request and response messages are not automatically replied to,
so we need to implement handlers to do this.
"""
-from mitmproxy import flow, controller
+from mitmproxy import flow, controller, options
from mitmproxy.proxy import ProxyServer, ProxyConfig
@@ -21,21 +21,23 @@ class MyMaster(flow.FlowMaster):
@controller.handler
def request(self, f):
- f = flow.FlowMaster.request(self, f)
- print(f)
+ print("request", f)
@controller.handler
def response(self, f):
- f = flow.FlowMaster.response(self, f)
- print(f)
+ print("response", f)
+ @controller.handler
+ def error(self, f):
+ print("error", f)
+
+ @controller.handler
+ def log(self, l):
+ print("log", l.msg)
-config = ProxyConfig(
- port=8080,
- # use ~/.mitmproxy/mitmproxy-ca.pem as default CA file.
- cadir="~/.mitmproxy/"
-)
+opts = options.Options(cadir="~/.mitmproxy/")
+config = ProxyConfig(opts)
state = flow.State()
server = ProxyServer(config)
-m = MyMaster(server, state)
+m = MyMaster(opts, server, state)
m.run()
diff --git a/examples/har_dump.py b/examples/har_dump.py
new file mode 100644
index 00000000..95090edb
--- /dev/null
+++ b/examples/har_dump.py
@@ -0,0 +1,216 @@
+"""
+This inline script can be used to dump flows as HAR files.
+"""
+
+
+import pprint
+import json
+import sys
+import base64
+import zlib
+
+from datetime import datetime
+import pytz
+
+import mitmproxy
+
+from netlib import version
+from netlib import strutils
+from netlib.http import cookies
+
+HAR = {}
+
+# A list of server seen till now is maintained so we can avoid
+# using 'connect' time for entries that use an existing connection.
+SERVERS_SEEN = set()
+
+
+def start():
+ """
+ Called once on script startup before any other events.
+ """
+ if len(sys.argv) != 2:
+ raise ValueError(
+ 'Usage: -s "har_dump.py filename" '
+ '(- will output to stdout, filenames ending with .zhar '
+ 'will result in compressed har)'
+ )
+
+ HAR.update({
+ "log": {
+ "version": "1.2",
+ "creator": {
+ "name": "mitmproxy har_dump",
+ "version": "0.1",
+ "comment": "mitmproxy version %s" % version.MITMPROXY
+ },
+ "entries": []
+ }
+ })
+
+
+def response(flow):
+ """
+ Called when a server response has been received.
+ """
+
+ # -1 indicates that these values do not apply to current request
+ ssl_time = -1
+ connect_time = -1
+
+ if flow.server_conn and flow.server_conn not in SERVERS_SEEN:
+ connect_time = (flow.server_conn.timestamp_tcp_setup -
+ flow.server_conn.timestamp_start)
+
+ if flow.server_conn.timestamp_ssl_setup is not None:
+ ssl_time = (flow.server_conn.timestamp_ssl_setup -
+ flow.server_conn.timestamp_tcp_setup)
+
+ SERVERS_SEEN.add(flow.server_conn)
+
+ # Calculate raw timings from timestamps. DNS timings can not be calculated
+ # for lack of a way to measure it. The same goes for HAR blocked.
+ # mitmproxy will open a server connection as soon as it receives the host
+ # and port from the client connection. So, the time spent waiting is actually
+ # spent waiting between request.timestamp_end and response.timestamp_start
+ # thus it correlates to HAR wait instead.
+ timings_raw = {
+ 'send': flow.request.timestamp_end - flow.request.timestamp_start,
+ 'receive': flow.response.timestamp_end - flow.response.timestamp_start,
+ 'wait': flow.response.timestamp_start - flow.request.timestamp_end,
+ 'connect': connect_time,
+ 'ssl': ssl_time,
+ }
+
+ # HAR timings are integers in ms, so we re-encode the raw timings to that format.
+ timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()])
+
+ # full_time is the sum of all timings.
+ # Timings set to -1 will be ignored as per spec.
+ full_time = sum(v for v in timings.values() if v > -1)
+
+ started_date_time = format_datetime(datetime.utcfromtimestamp(flow.request.timestamp_start))
+
+ # Response body size and encoding
+ response_body_size = len(flow.response.raw_content)
+ response_body_decoded_size = len(flow.response.content)
+ response_body_compression = response_body_decoded_size - response_body_size
+
+ entry = {
+ "startedDateTime": started_date_time,
+ "time": full_time,
+ "request": {
+ "method": flow.request.method,
+ "url": flow.request.url,
+ "httpVersion": flow.request.http_version,
+ "cookies": format_request_cookies(flow.request.cookies.fields),
+ "headers": name_value(flow.request.headers),
+ "queryString": name_value(flow.request.query or {}),
+ "headersSize": len(str(flow.request.headers)),
+ "bodySize": len(flow.request.content),
+ },
+ "response": {
+ "status": flow.response.status_code,
+ "statusText": flow.response.reason,
+ "httpVersion": flow.response.http_version,
+ "cookies": format_response_cookies(flow.response.cookies.fields),
+ "headers": name_value(flow.response.headers),
+ "content": {
+ "size": response_body_size,
+ "compression": response_body_compression,
+ "mimeType": flow.response.headers.get('Content-Type', '')
+ },
+ "redirectURL": flow.response.headers.get('Location', ''),
+ "headersSize": len(str(flow.response.headers)),
+ "bodySize": response_body_size,
+ },
+ "cache": {},
+ "timings": timings,
+ }
+
+ # Store binay data as base64
+ if strutils.is_mostly_bin(flow.response.content):
+ b64 = base64.b64encode(flow.response.content)
+ entry["response"]["content"]["text"] = b64.decode('ascii')
+ entry["response"]["content"]["encoding"] = "base64"
+ else:
+ entry["response"]["content"]["text"] = flow.response.text
+
+ if flow.request.method in ["POST", "PUT", "PATCH"]:
+ entry["request"]["postData"] = {
+ "mimeType": flow.request.headers.get("Content-Type", "").split(";")[0],
+ "text": flow.request.content,
+ "params": name_value(flow.request.urlencoded_form)
+ }
+
+ if flow.server_conn:
+ entry["serverIPAddress"] = str(flow.server_conn.ip_address.address[0])
+
+ HAR["log"]["entries"].append(entry)
+
+
+def done():
+ """
+ Called once on script shutdown, after any other events.
+ """
+ dump_file = sys.argv[1]
+
+ if dump_file == '-':
+ mitmproxy.ctx.log(pprint.pformat(HAR))
+ else:
+ json_dump = json.dumps(HAR, indent=2)
+
+ if dump_file.endswith('.zhar'):
+ json_dump = zlib.compress(json_dump, 9)
+
+ with open(dump_file, "w") as f:
+ f.write(json_dump)
+
+ mitmproxy.ctx.log("HAR dump finished (wrote %s bytes to file)" % len(json_dump))
+
+
+def format_datetime(dt):
+ return dt.replace(tzinfo=pytz.timezone("UTC")).isoformat()
+
+
+def format_cookies(cookie_list):
+ rv = []
+
+ for name, value, attrs in cookie_list:
+ cookie_har = {
+ "name": name,
+ "value": value,
+ }
+
+ # HAR only needs some attributes
+ for key in ["path", "domain", "comment"]:
+ if key in attrs:
+ cookie_har[key] = attrs[key]
+
+ # These keys need to be boolean!
+ for key in ["httpOnly", "secure"]:
+ cookie_har[key] = bool(key in attrs)
+
+ # Expiration time needs to be formatted
+ expire_ts = cookies.get_expiration_ts(attrs)
+ if expire_ts is not None:
+ cookie_har["expires"] = format_datetime(datetime.fromtimestamp(expire_ts))
+
+ rv.append(cookie_har)
+
+ return rv
+
+
+def format_request_cookies(fields):
+ return format_cookies(cookies.group_cookies(fields))
+
+
+def format_response_cookies(fields):
+ return format_cookies((c[0], c[1].value, c[1].attrs) for c in fields)
+
+
+def name_value(obj):
+ """
+ Convert (key, value) pairs to HAR format.
+ """
+ return [{"name": k, "value": v} for k, v in obj.items()]
diff --git a/examples/har_extractor.py b/examples/har_extractor.py
deleted file mode 100644
index 76059d8e..00000000
--- a/examples/har_extractor.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""
- This inline script utilizes harparser.HAR from
- https://github.com/JustusW/harparser to generate a HAR log object.
-"""
-import mitmproxy.ctx
-import six
-import sys
-import pytz
-from harparser import HAR
-
-from datetime import datetime
-
-
-class _HARLog(HAR.log):
- # The attributes need to be registered here for them to actually be
- # available later via self. This is due to HAREncodable linking __getattr__
- # to __getitem__. Anything that is set only in __init__ will just be added
- # as key/value pair to self.__classes__.
- __page_list__ = []
- __page_count__ = 0
- __page_ref__ = {}
-
- def __init__(self, page_list=[]):
- self.__page_list__ = page_list
- self.__page_count__ = 0
- self.__page_ref__ = {}
-
- HAR.log.__init__(self, {"version": "1.2",
- "creator": {"name": "MITMPROXY HARExtractor",
- "version": "0.1",
- "comment": ""},
- "pages": [],
- "entries": []})
-
- def reset(self):
- self.__init__(self.__page_list__)
-
- def add(self, obj):
- if isinstance(obj, HAR.pages):
- self['pages'].append(obj)
- if isinstance(obj, HAR.entries):
- self['entries'].append(obj)
-
- def create_page_id(self):
- self.__page_count__ += 1
- return "autopage_%s" % str(self.__page_count__)
-
- def set_page_ref(self, page, ref):
- self.__page_ref__[page] = ref
-
- def get_page_ref(self, page):
- return self.__page_ref__.get(page, None)
-
- def get_page_list(self):
- return self.__page_list__
-
-
-class Context(object):
- pass
-
-context = Context()
-
-
-def start():
- """
- On start we create a HARLog instance. You will have to adapt this to
- suit your actual needs of HAR generation. As it will probably be
- necessary to cluster logs by IPs or reset them from time to time.
- """
- if sys.version_info >= (3, 0):
- raise RuntimeError(
- "har_extractor.py does not work on Python 3. "
- "Please check out https://github.com/mitmproxy/mitmproxy/issues/1320 "
- "if you want to help making this work again."
- )
- context.dump_file = None
- if len(sys.argv) > 1:
- context.dump_file = sys.argv[1]
- else:
- raise ValueError(
- 'Usage: -s "har_extractor.py filename" '
- '(- will output to stdout, filenames ending with .zhar '
- 'will result in compressed har)'
- )
- context.HARLog = _HARLog()
- context.seen_server = set()
-
-
-def response(flow):
- """
- Called when a server response has been received. At the time of this
- message both a request and a response are present and completely done.
- """
- # Values are converted from float seconds to int milliseconds later.
- ssl_time = -.001
- connect_time = -.001
- if flow.server_conn not in context.seen_server:
- # Calculate the connect_time for this server_conn. Afterwards add it to
- # seen list, in order to avoid the connect_time being present in entries
- # that use an existing connection.
- connect_time = (flow.server_conn.timestamp_tcp_setup -
- flow.server_conn.timestamp_start)
- context.seen_server.add(flow.server_conn)
-
- if flow.server_conn.timestamp_ssl_setup is not None:
- # Get the ssl_time for this server_conn as the difference between
- # the start of the successful tcp setup and the successful ssl
- # setup. If no ssl setup has been made it is left as -1 since it
- # doesn't apply to this connection.
- ssl_time = (flow.server_conn.timestamp_ssl_setup -
- flow.server_conn.timestamp_tcp_setup)
-
- # Calculate the raw timings from the different timestamps present in the
- # request and response object. For lack of a way to measure it dns timings
- # can not be calculated. The same goes for HAR blocked: MITMProxy will open
- # a server connection as soon as it receives the host and port from the
- # client connection. So the time spent waiting is actually spent waiting
- # between request.timestamp_end and response.timestamp_start thus it
- # correlates to HAR wait instead.
- timings_raw = {
- 'send': flow.request.timestamp_end - flow.request.timestamp_start,
- 'wait': flow.response.timestamp_start - flow.request.timestamp_end,
- 'receive': flow.response.timestamp_end - flow.response.timestamp_start,
- 'connect': connect_time,
- 'ssl': ssl_time
- }
-
- # HAR timings are integers in ms, so we have to re-encode the raw timings to
- # that format.
- timings = dict([(k, int(1000 * v)) for k, v in six.iteritems(timings_raw)])
-
- # The full_time is the sum of all timings.
- # Timings set to -1 will be ignored as per spec.
- full_time = sum(v for v in timings.values() if v > -1)
-
- started_date_time = datetime.utcfromtimestamp(
- flow.request.timestamp_start).replace(tzinfo=pytz.timezone("UTC")).isoformat()
-
- request_query_string = [{"name": k, "value": v}
- for k, v in flow.request.query or {}]
-
- response_body_size = len(flow.response.content)
- response_body_decoded_size = len(flow.response.content)
- response_body_compression = response_body_decoded_size - response_body_size
-
- entry = HAR.entries({
- "startedDateTime": started_date_time,
- "time": full_time,
- "request": {
- "method": flow.request.method,
- "url": flow.request.url,
- "httpVersion": flow.request.http_version,
- "cookies": format_cookies(flow.request.cookies),
- "headers": format_headers(flow.request.headers),
- "queryString": request_query_string,
- "headersSize": len(str(flow.request.headers)),
- "bodySize": len(flow.request.content),
- },
- "response": {
- "status": flow.response.status_code,
- "statusText": flow.response.reason,
- "httpVersion": flow.response.http_version,
- "cookies": format_cookies(flow.response.cookies),
- "headers": format_headers(flow.response.headers),
- "content": {
- "size": response_body_size,
- "compression": response_body_compression,
- "mimeType": flow.response.headers.get('Content-Type', '')
- },
- "redirectURL": flow.response.headers.get('Location', ''),
- "headersSize": len(str(flow.response.headers)),
- "bodySize": response_body_size,
- },
- "cache": {},
- "timings": timings,
- })
-
- # If the current url is in the page list of context.HARLog or
- # does not have a referrer, we add it as a new pages object.
- is_new_page = (
- flow.request.url in context.HARLog.get_page_list() or
- flow.request.headers.get('Referer') is None
- )
- if is_new_page:
- page_id = context.HARLog.create_page_id()
- context.HARLog.add(
- HAR.pages({
- "startedDateTime": entry['startedDateTime'],
- "id": page_id,
- "title": flow.request.url,
- "pageTimings": {}
- })
- )
- context.HARLog.set_page_ref(flow.request.url, page_id)
- entry['pageref'] = page_id
-
- # Lookup the referer in the page_ref of context.HARLog to point this entries
- # pageref attribute to the right pages object, then set it as a new
- # reference to build a reference tree.
- elif context.HARLog.get_page_ref(flow.request.headers.get('Referer')) is not None:
- entry['pageref'] = context.HARLog.get_page_ref(
- flow.request.headers['Referer']
- )
- context.HARLog.set_page_ref(
- flow.request.headers['Referer'], entry['pageref']
- )
-
- context.HARLog.add(entry)
-
-
-def done():
- """
- Called once on script shutdown, after any other events.
- """
- import pprint
- import json
-
- json_dump = context.HARLog.json()
- compressed_json_dump = context.HARLog.compress()
-
- if context.dump_file == '-':
- mitmproxy.ctx.log(pprint.pformat(json.loads(json_dump)))
- elif context.dump_file.endswith('.zhar'):
- with open(context.dump_file, "wb") as f:
- f.write(compressed_json_dump)
- else:
- with open(context.dump_file, "wb") as f:
- f.write(json_dump)
- mitmproxy.ctx.log(
- "HAR log finished with %s bytes (%s bytes compressed)" % (
- len(json_dump), len(compressed_json_dump)
- )
- )
- mitmproxy.ctx.log(
- "Compression rate is %s%%" % str(
- 100. * len(compressed_json_dump) / len(json_dump)
- )
- )
-
-
-def format_cookies(obj):
- if obj:
- return [{"name": k.strip(), "value": v[0]} for k, v in obj.items()]
- return ""
-
-
-def format_headers(obj):
- if obj:
- return [{"name": k, "value": v} for k, v in obj.fields]
- return ""
-
-
-def print_attributes(obj, filter_string=None, hide_privates=False):
- """
- Useful helper method to quickly get all attributes of an object and its
- values.
- """
- for attr in dir(obj):
- if hide_privates and "__" in attr:
- continue
- if filter_string is not None and filter_string not in attr:
- continue
- value = getattr(obj, attr)
- print("%s.%s" % ('obj', attr), value, type(value))
diff --git a/examples/redirect_requests.py b/examples/redirect_requests.py
index 36594bcd..8cde1bfd 100644
--- a/examples/redirect_requests.py
+++ b/examples/redirect_requests.py
@@ -2,7 +2,6 @@
This example shows two ways to redirect flows to other destinations.
"""
from mitmproxy.models import HTTPResponse
-from netlib.http import Headers
def request(flow):
@@ -12,11 +11,7 @@ def request(flow):
# Method 1: Answer with a locally generated response
if flow.request.pretty_host.endswith("example.com"):
- resp = HTTPResponse(
- b"HTTP/1.1", 200, b"OK",
- Headers(Content_Type="text/html"),
- b"helloworld"
- )
+ resp = HTTPResponse.make(200, b"Hello World", {"Content-Type": "text/html"})
flow.reply.send(resp)
# Method 2: Redirect the request to a different server