From 27faea2355218c90e5baa962b15653c17faf2f85 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Tue, 1 Mar 2016 20:20:18 +0530 Subject: Fixup HAR Extractor --- examples/har_extractor.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'examples') diff --git a/examples/har_extractor.py b/examples/har_extractor.py index e7718fe8..90c7bbf0 100644 --- a/examples/har_extractor.py +++ b/examples/har_extractor.py @@ -1,5 +1,4 @@ """ - This inline script utilizes harparser.HAR from https://github.com/JustusW/harparser to generate a HAR log object. """ @@ -122,26 +121,38 @@ def response(context, flow): if item > -1: full_time += item - started_date_time = datetime.fromtimestamp( - flow.request.timestamp_start, - tz=utc).isoformat() + started_date_time = datetime.utcfromtimestamp( + flow.request.timestamp_start).isoformat() + + request_query_string = "" + if flow.request.query: + request_query_string = [{"name": k, "value": v} + for k, v in flow.request.query] - request_query_string = [{"name": k, "value": v} - for k, v in flow.request.query] request_http_version = flow.request.http_version # Cookies are shaped as tuples by MITMProxy. request_cookies = [{"name": k.strip(), "value": v[0]} for k, v in flow.request.cookies.items()] - request_headers = [{"name": k, "value": v} for k, v in flow.request.headers] + + request_headers = "" + if flow.request.headers: + request_headers = [{"name": k, "value": v} + for k, v in flow.request.headers.fields] + request_headers_size = len(str(flow.request.headers)) request_body_size = len(flow.request.content) response_http_version = flow.response.http_version + # Cookies are shaped as tuples by MITMProxy. response_cookies = [{"name": k.strip(), "value": v[0]} for k, v in flow.response.cookies.items()] - response_headers = [{"name": k, "value": v} - for k, v in flow.response.headers] + + response_headers = "" + if flow.response.headers: + response_headers = [{"name": k, "value": v} + for k, v in flow.response.headers.fields] + response_headers_size = len(str(flow.response.headers)) response_body_size = len(flow.response.content) response_body_decoded_size = len(flow.response.get_decoded_content()) -- cgit v1.2.3 From 1f41719bbcf0366f98fd4838b19467eaf361faa6 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Wed, 2 Mar 2016 11:25:15 +0530 Subject: Remove some duplication and make more pythonic har_extractor --- examples/har_extractor.py | 140 +++++++++++++++++++--------------------------- 1 file changed, 59 insertions(+), 81 deletions(-) (limited to 'examples') diff --git a/examples/har_extractor.py b/examples/har_extractor.py index 90c7bbf0..43232ea9 100644 --- a/examples/har_extractor.py +++ b/examples/har_extractor.py @@ -82,17 +82,17 @@ def response(context, flow): # Calculate the connect_time for this server_conn. Afterwards add it to # seen list, in order to avoid the connect_time being present in entries # that use an existing connection. - connect_time = flow.server_conn.timestamp_tcp_setup - \ - flow.server_conn.timestamp_start + connect_time = (flow.server_conn.timestamp_tcp_setup - + flow.server_conn.timestamp_start) context.seen_server.add(flow.server_conn) if flow.server_conn.timestamp_ssl_setup is not None: # Get the ssl_time for this server_conn as the difference between # the start of the successful tcp setup and the successful ssl - # setup. If no ssl setup has been made it is left as -1 since it + # setup. If no ssl setup has been made it is left as -1 since it # doesn't apply to this connection. - ssl_time = flow.server_conn.timestamp_ssl_setup - \ - flow.server_conn.timestamp_tcp_setup + ssl_time = (flow.server_conn.timestamp_ssl_setup - + flow.server_conn.timestamp_tcp_setup) # Calculate the raw timings from the different timestamps present in the # request and response object. For lack of a way to measure it dns timings @@ -111,92 +111,58 @@ def response(context, flow): # HAR timings are integers in ms, so we have to re-encode the raw timings to # that format. - timings = dict([(key, int(1000 * value)) - for key, value in timings_raw.iteritems()]) + timings = dict([(k, int(1000 * v)) for k, v in timings_raw.iteritems()]) - # The full_time is the sum of all timings. Timings set to -1 will be ignored - # as per spec. - full_time = 0 - for item in timings.values(): - if item > -1: - full_time += item + # The full_time is the sum of all timings. + # Timings set to -1 will be ignored as per spec. + full_time = sum(v for v in timings.values() if v > -1) started_date_time = datetime.utcfromtimestamp( flow.request.timestamp_start).isoformat() - request_query_string = "" - if flow.request.query: - request_query_string = [{"name": k, "value": v} - for k, v in flow.request.query] + request_query_string = [{"name": k, "value": v} + for k, v in flow.request.query or {}] - request_http_version = flow.request.http_version - # Cookies are shaped as tuples by MITMProxy. - request_cookies = [{"name": k.strip(), "value": v[0]} - for k, v in flow.request.cookies.items()] - - request_headers = "" - if flow.request.headers: - request_headers = [{"name": k, "value": v} - for k, v in flow.request.headers.fields] - - request_headers_size = len(str(flow.request.headers)) - request_body_size = len(flow.request.content) - - response_http_version = flow.response.http_version - - # Cookies are shaped as tuples by MITMProxy. - response_cookies = [{"name": k.strip(), "value": v[0]} - for k, v in flow.response.cookies.items()] - - response_headers = "" - if flow.response.headers: - response_headers = [{"name": k, "value": v} - for k, v in flow.response.headers.fields] - - response_headers_size = len(str(flow.response.headers)) response_body_size = len(flow.response.content) response_body_decoded_size = len(flow.response.get_decoded_content()) response_body_compression = response_body_decoded_size - response_body_size - response_mime_type = flow.response.headers.get('Content-Type', '') - response_redirect_url = flow.response.headers.get('Location', '') - - entry = HAR.entries( - { - "startedDateTime": started_date_time, - "time": full_time, - "request": { - "method": flow.request.method, - "url": flow.request.url, - "httpVersion": request_http_version, - "cookies": request_cookies, - "headers": request_headers, - "queryString": request_query_string, - "headersSize": request_headers_size, - "bodySize": request_body_size, - }, - "response": { - "status": flow.response.status_code, - "statusText": flow.response.msg, - "httpVersion": response_http_version, - "cookies": response_cookies, - "headers": response_headers, - "content": { - "size": response_body_size, - "compression": response_body_compression, - "mimeType": response_mime_type}, - "redirectURL": response_redirect_url, - "headersSize": response_headers_size, - "bodySize": response_body_size, + + entry = HAR.entries({ + "startedDateTime": started_date_time, + "time": full_time, + "request": { + "method": flow.request.method, + "url": flow.request.url, + "httpVersion": flow.request.http_version, + "cookies": format_cookies(flow.request.cookies), + "headers": format_headers(flow.request.headers), + "queryString": request_query_string, + "headersSize": len(str(flow.request.headers)), + "bodySize": len(flow.request.content), + }, + "response": { + "status": flow.response.status_code, + "statusText": flow.response.msg, + "httpVersion": flow.response.http_version, + "cookies": format_cookies(flow.response.cookies), + "headers": format_headers(flow.response.headers), + "content": { + "size": response_body_size, + "compression": response_body_compression, + "mimeType": flow.response.headers.get('Content-Type', '') }, - "cache": {}, - "timings": timings, - }) - - # If the current url is in the page list of context.HARLog or does not have - # a referrer we add it as a new pages object. - if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get( - 'Referer', - None) is None: + "redirectURL": flow.response.headers.get('Location', ''), + "headersSize": len(str(flow.response.headers)), + "bodySize": response_body_size, + }, + "cache": {}, + "timings": timings, + }) + + # If the current url is in the page list of context.HARLog or + # does not have a referrer, we add it as a new pages object. + if (flow.request.url in context.HARLog.get_page_list() or + flow.request.headers.get('Referer') is None): page_id = context.HARLog.create_page_id() context.HARLog.add( HAR.pages({ @@ -250,6 +216,18 @@ def done(context): ) +def format_cookies(obj): + if obj: + return [{"name": k.strip(), "value": v[0]} for k, v in obj.items()] + return "" + + +def format_headers(obj): + if obj: + return [{"name": k, "value": v} for k, v in obj.fields] + return "" + + def print_attributes(obj, filter_string=None, hide_privates=False): """ Useful helper method to quickly get all attributes of an object and its -- cgit v1.2.3 From c982f579dd755adef2456bc2ad1d3fce06cbfd58 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Fri, 4 Mar 2016 02:32:01 +0530 Subject: Add basic test for har_extractor --- examples/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 examples/__init__.py (limited to 'examples') diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 00000000..e69de29b -- cgit v1.2.3 From 1c8059937927d28d620785c8e9d05d034a442ce1 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Fri, 4 Mar 2016 03:02:33 +0530 Subject: Use default empty page_list --- examples/har_extractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'examples') diff --git a/examples/har_extractor.py b/examples/har_extractor.py index 43232ea9..be9b986b 100644 --- a/examples/har_extractor.py +++ b/examples/har_extractor.py @@ -16,7 +16,7 @@ class _HARLog(HAR.log): __page_count__ = 0 __page_ref__ = {} - def __init__(self, page_list): + def __init__(self, page_list=[]): self.__page_list__ = page_list self.__page_count__ = 0 self.__page_ref__ = {} @@ -66,7 +66,7 @@ def start(context, argv): '(- will output to stdout, filenames ending with .zhar ' 'will result in compressed har)' ) - context.HARLog = _HARLog(['https://github.com']) + context.HARLog = _HARLog() context.seen_server = set() -- cgit v1.2.3 From 7108d727055324d08ba82cbce5c91173ee599201 Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Fri, 4 Mar 2016 03:02:49 +0530 Subject: Fix pprint import --- examples/har_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'examples') diff --git a/examples/har_extractor.py b/examples/har_extractor.py index be9b986b..25661f7c 100644 --- a/examples/har_extractor.py +++ b/examples/har_extractor.py @@ -192,7 +192,7 @@ def done(context): """ Called once on script shutdown, after any other events. """ - from pprint import pprint + import pprint import json json_dump = context.HARLog.json() -- cgit v1.2.3