aboutsummaryrefslogtreecommitdiffstats
path: root/examples/har_extractor.py
diff options
context:
space:
mode:
authorJustus Wingert <justus@abi007.info>2014-11-15 20:11:25 +0100
committerJustus Wingert <justus@abi007.info>2014-11-15 20:11:25 +0100
commit4342d79dc073277b51effda92179ad7050bebf68 (patch)
tree130e6cc3e500dc50ed0d18f6b3653611bd63da88 /examples/har_extractor.py
parent31249b9e2471e05cf3e9eed7fce1ae72cf17451b (diff)
downloadmitmproxy-4342d79dc073277b51effda92179ad7050bebf68.tar.gz
mitmproxy-4342d79dc073277b51effda92179ad7050bebf68.tar.bz2
mitmproxy-4342d79dc073277b51effda92179ad7050bebf68.zip
Removed the globals and replaced them with internal attributes of _HARLog. Minor bugfixes to make ssl timings work.
Diffstat (limited to 'examples/har_extractor.py')
-rw-r--r--examples/har_extractor.py80
1 files changed, 46 insertions, 34 deletions
diff --git a/examples/har_extractor.py b/examples/har_extractor.py
index cc2cf5d7..68fb1d0d 100644
--- a/examples/har_extractor.py
+++ b/examples/har_extractor.py
@@ -18,7 +18,15 @@ class UTC(tzinfo):
class _HARLog(HAR.log):
- def __init__(self):
+ __page_list__ = []
+ __page_count__ = 0
+ __page_ref__ = {}
+
+ def __init__(self, page_list):
+ self.__page_list__ = page_list
+ self.__page_count__ = 0
+ self.__page_ref__ = {}
+
HAR.log.__init__(self, {"version": "1.2",
"creator": {"name": "MITMPROXY HARExtractor",
"version": "0.1",
@@ -35,14 +43,27 @@ class _HARLog(HAR.log):
if isinstance(obj, HAR.entries):
self['entries'].append(obj)
+ def create_page_id(self):
+ self.__page_count__ += 1
+ return "autopage_%s" % str(self.__page_count__)
+
+ def set_page_ref(self, page, ref):
+ self.__page_ref__[page] = ref
+
+ def get_page_ref(self, page):
+ return self.__page_ref__.get(page, None)
+
+ def get_page_list(self):
+ return self.__page_list__
+
def start(context, argv):
"""
- On start we reset the HAR, it's not really necessary since it will have been
- instantiated earlier during initial parsing of this file. You will have to
- adapt this to suit your actual needs of HAR generation.
+ On start we create a HARLog instance. You will have to adapt this to suit your actual needs
+ of HAR generation. As it will probably be necessary to cluster logs by IPs or reset them
+ from time to time.
"""
- HARLog.reset()
+ context.HARLog = _HARLog(['https://github.com'])
context.seen_server_connect = set()
context.seen_server_ssl = set()
@@ -52,15 +73,15 @@ def response(context, flow):
Called when a server response has been received. At the time of this message both
a request and a response are present and completely done.
"""
- connect_time = -1
+ connect_time = -.001
if flow.server_conn not in context.seen_server_connect:
# Calculate the connect_time for this server_conn. Afterwards add it to seen list, in
# order to avoid the connect_time being present in entries that use an existing connection.
connect_time = flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start
context.seen_server_connect.add(flow.server_conn)
- ssl_time = -1
- if flow.server_conn not in context.seen_server_connect \
+ ssl_time = -.001
+ if flow.server_conn not in context.seen_server_ssl \
and flow.server_conn.timestamp_ssl_setup is not None:
# Get the ssl_time for this server_conn as the difference between the start of the successful
# tcp setup and the successful ssl setup. Afterwards add it to seen list, in order to avoid
@@ -134,22 +155,23 @@ def response(context, flow):
"cache": {},
"timings": timings, })
- # If the current url is in HARPAGE_LIST or does not have a referer we add it as a new pages object.
- if flow.request.url in HARPAGE_LIST or flow.request.headers.get('Referer', None) is None:
- PAGE_COUNT[1] += 1
- page_id = "_".join([str(v) for v in PAGE_COUNT])
- HARLog.add(HAR.pages({"startedDateTime": entry['startedDateTime'],
- "id": page_id,
- "title": flow.request.url, }))
- PAGE_REF[flow.request.url] = page_id
+ # If the current url is in the page list of context.HARLog or does not have a referrer we add it as a new
+ # pages object.
+ if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get('Referer', None) is None:
+ page_id = context.HARLog.create_page_id()
+ context.HARLog.add(HAR.pages({"startedDateTime": entry['startedDateTime'],
+ "id": page_id,
+ "title": flow.request.url, }))
+ context.HARLog.set_page_ref(flow.request.url, page_id)
entry['pageref'] = page_id
- # Lookup the referer in our PAGE_REF dict to point this entries pageref attribute to the right pages object.
- elif flow.request.headers.get('Referer', (None, ))[0] in PAGE_REF.keys():
- entry['pageref'] = PAGE_REF[flow.request.headers['Referer'][0]]
- PAGE_REF[flow.request.url] = entry['pageref']
+ # Lookup the referer in the page_ref of context.HARLog to point this entries pageref attribute to the right
+ # pages object, then set it as a new reference to build a reference tree.
+ elif context.HARLog.get_page_ref(flow.request.headers.get('Referer', (None, ))[0]) is not None:
+ entry['pageref'] = context.HARLog.get_page_ref(flow.request.headers['Referer'][0])
+ context.HARLog.set_page_ref(flow.request.headers['Referer'][0], entry['pageref'])
- HARLog.add(entry)
+ context.HARLog.add(entry)
def done(context):
@@ -159,8 +181,8 @@ def done(context):
from pprint import pprint
import json
- json_dump = HARLog.json()
- compressed_json_dump = HARLog.compress()
+ json_dump = context.HARLog.json()
+ compressed_json_dump = context.HARLog.compress()
print "=" * 100
pprint(json.loads(json_dump))
@@ -180,14 +202,4 @@ def print_attributes(obj, filter_string=None, hide_privates=False):
if filter_string is not None and filter_string not in attr:
continue
value = getattr(obj, attr)
- print "%s.%s" % ('obj', attr), value, type(value)
-
-
-# Some initializations. Add any page you want to have its own pages object to HARPAGE_LIST
-HARPAGE_LIST = ['https://github.com/']
-HARLog = _HARLog()
-
-CONNECT_TIMES = {}
-SSL_TIMES = {}
-PAGE_REF = {}
-PAGE_COUNT = ['autopage', 0]
+ print "%s.%s" % ('obj', attr), value, type(value) \ No newline at end of file