aboutsummaryrefslogtreecommitdiffstats
path: root/examples/har_dump.py
blob: d19cad365ff619b4ac0ce1136cd8f85ba8a8c9ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
This inline script can be used to dump flows as HAR files.
"""


import pprint
import json
import sys

from datetime import datetime
import pytz

import mitmproxy
from mitmproxy import version

HAR = {}


def start():
    """
        Called once on script startup before any other events.
    """
    if len(sys.argv) != 2:
        raise ValueError(
            'Usage: -s "har_dump.py filename" '
            '(- will output to stdout, filenames ending with .zhar '
            'will result in compressed har)'
        )

    HAR.update({
        "log": {
            "version": "1.2",
            "creator": {
                "name": "mitmproxy har_dump",
                "version": "0.1",
                "comment": "mitmproxy version %s" % version.MITMPROXY
            },
            "entries": []
        }
    })


def response(flow):
    """
       Called when a server response has been received.
    """
    entries = HAR["log"]["entries"]

    # TODO: SSL and Connect Timings

    # Calculate raw timings from timestamps.

    # DNS timings can not be calculated for lack of a way to measure it.
    # The same goes for HAR blocked.

    # mitmproxy will open a server connection as soon as it receives the host
    # and port from the client connection. So, the time spent waiting is actually
    # spent waiting between request.timestamp_end and response.timestamp_start thus it
    # correlates to HAR wait instead.
    timings_raw = {
        'send': flow.request.timestamp_end - flow.request.timestamp_start,
        'receive': flow.response.timestamp_end - flow.response.timestamp_start,
        'wait': flow.response.timestamp_start - flow.request.timestamp_end,
    }

    # HAR timings are integers in ms, so we re-encode the raw timings to that format.
    timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()])

    # full_time is the sum of all timings.
    # Timings set to -1 will be ignored as per spec.
    full_time = sum(v for v in timings.values() if v > -1)

    started_date_time = datetime.utcfromtimestamp(
        flow.request.timestamp_start).replace(tzinfo=pytz.timezone("UTC")).isoformat()

    # Size calculations
    response_body_size = len(flow.response.content)
    response_body_decoded_size = len(flow.response.content)
    response_body_compression = response_body_decoded_size - response_body_size

    entries.append({
        "startedDateTime": started_date_time,
        "time": full_time,
        "request": {
            "method": flow.request.method,
            "url": flow.request.url,
            "httpVersion": flow.request.http_version,
            "cookies": name_value(flow.request.cookies),
            "headers": name_value(flow.request.headers),
            "queryString": name_value(flow.request.query or {}),
            "headersSize": len(str(flow.request.headers)),
            "bodySize": len(flow.request.content),
        },
        "response": {
            "status": flow.response.status_code,
            "statusText": flow.response.reason,
            "httpVersion": flow.response.http_version,
            "cookies": name_value(flow.response.cookies),
            "headers": name_value(flow.response.headers),
            "content": {
                "size": response_body_size,
                "compression": response_body_compression,
                "mimeType": flow.response.headers.get('Content-Type', '')
            },
            "redirectURL": flow.response.headers.get('Location', ''),
            "headersSize": len(str(flow.response.headers)),
            "bodySize": response_body_size,
        },
        "cache": {},
        "timings": timings,
    })


def done():
    """
        Called once on script shutdown, after any other events.
    """
    dump_file = sys.argv[1]

    if dump_file == '-':
        mitmproxy.ctx.log(pprint.pformat(HAR))
    # TODO: .zhar compression
    else:
        with open(dump_file, "wb") as f:
            f.write(json.dumps(HAR, indent=2))

    # TODO: Log results via mitmproxy.ctx.log


def name_value(obj):
    """
        Convert (key, value) pairs to HAR format.
    """

    items = []
    if hasattr(obj, 'fields'):
        items = obj.fields
    elif hasattr(obj, 'items'):
        items = obj.items()

    if items:
        return [{"name": k, "value": v} for k, v in items]
    else:
        return ""