diff options
27 files changed, 879 insertions, 478 deletions
diff --git a/examples/README b/examples/README index adfcd0f2..b4dec8e5 100644 --- a/examples/README +++ b/examples/README @@ -1,3 +1,7 @@ +Some inline scripts may require additional dependencies, which can be installed using +`pip install mitmproxy[examples]`. + + # inline script examples add_header.py Simple script that just adds a header to every request. change_upstream_proxy.py Dynamically change the upstream proxy diff --git a/examples/tls_passthrough.py b/examples/tls_passthrough.py new file mode 100644 index 00000000..7b4dec62 --- /dev/null +++ b/examples/tls_passthrough.py @@ -0,0 +1,136 @@ +""" +This inline script allows conditional TLS Interception based +on a user-defined strategy. + +Example: + + > mitmdump -s tls_passthrough.py + + 1. curl --proxy http://localhost:8080 https://example.com --insecure + // works - we'll also see the contents in mitmproxy + + 2. curl --proxy http://localhost:8080 https://example.com --insecure + // still works - we'll also see the contents in mitmproxy + + 3. curl --proxy http://localhost:8080 https://example.com + // fails with a certificate error, which we will also see in mitmproxy + + 4. curl --proxy http://localhost:8080 https://example.com + // works again, but mitmproxy does not intercept and we do *not* see the contents + +Authors: Maximilian Hils, Matthew Tuusberg +""" +from __future__ import (absolute_import, print_function, division) +import collections +import random + +from enum import Enum + +from libmproxy.exceptions import TlsException +from libmproxy.protocol import TlsLayer, RawTCPLayer + + +class InterceptionResult(Enum): + success = True + failure = False + skipped = None + + +class _TlsStrategy(object): + """ + Abstract base class for interception strategies. + """ + def __init__(self): + # A server_address -> interception results mapping + self.history = collections.defaultdict(lambda: collections.deque(maxlen=200)) + + def should_intercept(self, server_address): + """ + Returns: + True, if we should attempt to intercept the connection. + False, if we want to employ pass-through instead. + """ + raise NotImplementedError() + + def record_success(self, server_address): + self.history[server_address].append(InterceptionResult.success) + + def record_failure(self, server_address): + self.history[server_address].append(InterceptionResult.failure) + + def record_skipped(self, server_address): + self.history[server_address].append(InterceptionResult.skipped) + + +class ConservativeStrategy(_TlsStrategy): + """ + Conservative Interception Strategy - only intercept if there haven't been any failed attempts + in the history. + """ + + def should_intercept(self, server_address): + if InterceptionResult.failure in self.history[server_address]: + return False + return True + + +class ProbabilisticStrategy(_TlsStrategy): + """ + Fixed probability that we intercept a given connection. + """ + def __init__(self, p): + self.p = p + super(ProbabilisticStrategy, self).__init__() + + def should_intercept(self, server_address): + return random.uniform(0, 1) < self.p + + +class TlsFeedback(TlsLayer): + """ + Monkey-patch _establish_tls_with_client to get feedback if TLS could be established + successfully on the client connection (which may fail due to cert pinning). + """ + + def _establish_tls_with_client(self): + server_address = self.server_conn.address + tls_strategy = self.script_context.tls_strategy + + try: + super(TlsFeedback, self)._establish_tls_with_client() + except TlsException as e: + tls_strategy.record_failure(server_address) + raise e + else: + tls_strategy.record_success(server_address) + + +# inline script hooks below. + + +def start(context, argv): + if len(argv) == 2: + context.tls_strategy = ProbabilisticStrategy(float(argv[1])) + else: + context.tls_strategy = ConservativeStrategy() + + +def next_layer(context, next_layer): + """ + This hook does the actual magic - if the next layer is planned to be a TLS layer, + we check if we want to enter pass-through mode instead. + """ + if isinstance(next_layer, TlsLayer) and next_layer._client_tls: + server_address = next_layer.server_conn.address + + if context.tls_strategy.should_intercept(server_address): + # We try to intercept. + # Monkey-Patch the layer to get feedback from the TLSLayer if interception worked. + next_layer.__class__ = TlsFeedback + next_layer.script_context = context + else: + # We don't intercept - reply with a pass-through layer and add a "skipped" entry. + context.log("TLS passthrough for %s" % repr(next_layer.server_conn.address), "info") + next_layer_replacement = RawTCPLayer(next_layer.ctx, logging=False) + next_layer.reply(next_layer_replacement) + context.tls_strategy.record_skipped(server_address) diff --git a/libmproxy/cmdline.py b/libmproxy/cmdline.py index 7f6f69ef..3779953f 100644 --- a/libmproxy/cmdline.py +++ b/libmproxy/cmdline.py @@ -1,11 +1,11 @@ from __future__ import absolute_import import os import re + import configargparse -from netlib.tcp import Address, sslversion_choices +from netlib.tcp import Address, sslversion_choices import netlib.utils - from . import filt, utils, version from .proxy import config @@ -358,6 +358,20 @@ def proxy_options(parser): action="store", type=int, dest="port", default=8080, help="Proxy service port." ) + http2 = group.add_mutually_exclusive_group() + http2.add_argument("--http2", action="store_true", dest="http2") + http2.add_argument("--no-http2", action="store_false", dest="http2", + help="Explicitly enable/disable experimental HTTP2 support. " + "Disabled by default. " + "Default value will change in a future version." + ) + rawtcp = group.add_mutually_exclusive_group() + rawtcp.add_argument("--raw-tcp", action="store_true", dest="rawtcp") + rawtcp.add_argument("--no-raw-tcp", action="store_false", dest="rawtcp", + help="Explicitly enable/disable experimental raw tcp support. " + "Disabled by default. " + "Default value will change in a future version." + ) def proxy_ssl_options(parser): diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py index 2133f97f..3bc0c091 100644 --- a/libmproxy/console/__init__.py +++ b/libmproxy/console/__init__.py @@ -14,9 +14,9 @@ import traceback import urwid import weakref -from .. import controller, flow, script +from .. import controller, flow, script, contentviews from . import flowlist, flowview, help, window, signals, options -from . import grideditor, palettes, contentview, statusbar, palettepicker +from . import grideditor, palettes, statusbar, palettepicker EVENTLOG_SIZE = 500 @@ -26,7 +26,7 @@ class ConsoleState(flow.State): flow.State.__init__(self) self.focus = None self.follow_focus = None - self.default_body_view = contentview.get("Auto") + self.default_body_view = contentviews.get("Auto") self.flowsettings = weakref.WeakKeyDictionary() self.last_search = None @@ -648,7 +648,7 @@ class ConsoleMaster(flow.FlowMaster): return self.state.set_intercept(txt) def change_default_display_mode(self, t): - v = contentview.get_by_shortcut(t) + v = contentviews.get_by_shortcut(t) self.state.default_body_view = v self.refresh_focus() diff --git a/libmproxy/console/flowview.py b/libmproxy/console/flowview.py index 19917555..3e13fab4 100644 --- a/libmproxy/console/flowview.py +++ b/libmproxy/console/flowview.py @@ -1,15 +1,17 @@ from __future__ import absolute_import import os +import traceback import sys + import urwid from netlib import odict from netlib.http.semantics import CONTENT_MISSING, Headers - -from . import common, grideditor, contentview, signals, searchable, tabs +from . import common, grideditor, signals, searchable, tabs from . import flowdetailview -from .. import utils, controller +from .. import utils, controller, contentviews from ..models import HTTPRequest, HTTPResponse, decoded +from ..exceptions import ContentViewException class SearchError(Exception): @@ -165,10 +167,10 @@ class FlowView(tabs.Tabs): if flow == self.flow: self.show() - def content_view(self, viewmode, conn): - if conn.content == CONTENT_MISSING: + def content_view(self, viewmode, message): + if message.body == CONTENT_MISSING: msg, body = "", [urwid.Text([("error", "[content missing]")])] - return (msg, body) + return msg, body else: full = self.state.get_flow_setting( self.flow, @@ -178,16 +180,44 @@ class FlowView(tabs.Tabs): if full: limit = sys.maxsize else: - limit = contentview.VIEW_CUTOFF - description, text_objects = cache.get( - contentview.get_content_view, + limit = contentviews.VIEW_CUTOFF + return cache.get( + self._get_content_view, viewmode, - conn.headers, - conn.content, - limit, - isinstance(conn, HTTPRequest) + message, + limit + ) + + def _get_content_view(self, viewmode, message, max_lines): + + try: + description, lines = contentviews.get_content_view( + viewmode, message.body, headers=message.headers + ) + except ContentViewException: + s = "Content viewer failed: \n" + traceback.format_exc() + signals.add_event(s, "error") + description, lines = contentviews.get_content_view( + contentviews.get("Raw"), message.body, headers=message.headers ) - return (description, text_objects) + description = description.replace("Raw", "Couldn't parse: falling back to Raw") + + # Give hint that you have to tab for the response. + if description == "No content" and isinstance(message, HTTPRequest): + description = "No request content (press tab to view response)" + + text_objects = [] + for line in lines: + text_objects.append(urwid.Text(line)) + if len(text_objects) == max_lines: + text_objects.append(urwid.Text([ + ("highlight", "Stopped displaying data after %d lines. Press " % max_lines), + ("key", "f"), + ("highlight", " to load all data.") + ])) + break + + return description, text_objects def viewmode_get(self): override = self.state.get_flow_setting( @@ -211,9 +241,7 @@ class FlowView(tabs.Tabs): [ ("heading", msg), ] - ) - ] - cols.append( + ), urwid.Text( [ " ", @@ -223,7 +251,7 @@ class FlowView(tabs.Tabs): ], align="right" ) - ) + ] title = urwid.AttrWrap(urwid.Columns(cols), "heading") txt.append(title) @@ -455,7 +483,7 @@ class FlowView(tabs.Tabs): self.state.add_flow_setting( self.flow, (self.tab_offset, "prettyview"), - contentview.get_by_shortcut(t) + contentviews.get_by_shortcut(t) ) signals.flow_change.send(self, flow = self.flow) @@ -595,7 +623,7 @@ class FlowView(tabs.Tabs): scope = "s" common.ask_copy_part(scope, self.flow, self.master, self.state) elif key == "m": - p = list(contentview.view_prompts) + p = list(contentviews.view_prompts) p.insert(0, ("Clear", "C")) signals.status_prompt_onekey.send( self, diff --git a/libmproxy/console/options.py b/libmproxy/console/options.py index 58a4d469..a365a78c 100644 --- a/libmproxy/console/options.py +++ b/libmproxy/console/options.py @@ -1,6 +1,7 @@ import urwid -from . import common, signals, grideditor, contentview +from .. import contentviews +from . import common, signals, grideditor from . import select, palettes footer = [ @@ -157,7 +158,7 @@ class Options(urwid.WidgetWrap): self.master.scripts = [] self.master.set_stickyauth(None) self.master.set_stickycookie(None) - self.master.state.default_body_view = contentview.get("Auto") + self.master.state.default_body_view = contentviews.get("Auto") signals.update_settings.send(self) signals.status_message.send( @@ -232,7 +233,7 @@ class Options(urwid.WidgetWrap): def default_displaymode(self): signals.status_prompt_onekey.send( prompt = "Global default display mode", - keys = contentview.view_prompts, + keys = contentviews.view_prompts, callback = self.master.change_default_display_mode ) diff --git a/libmproxy/console/contentview.py b/libmproxy/contentviews.py index 17ed90e1..9af08033 100644 --- a/libmproxy/console/contentview.py +++ b/libmproxy/contentviews.py @@ -1,23 +1,38 @@ -from __future__ import absolute_import +""" +Mitmproxy Content Views +======================= + +mitmproxy includes a set of content views which can be used to format/decode/highlight data. +While they are currently used for HTTP message bodies only, the may be used in other contexts +in the future, e.g. to decode protobuf messages sent as WebSocket frames. + +Thus, the View API is very minimalistic. The only arguments are `data` and `**metadata`, +where `data` is the actual content (as bytes). The contents on metadata depend on the protocol in +use. For HTTP, the message headers are passed as the ``headers`` keyword argument. + +""" +from __future__ import (absolute_import, print_function, division) import cStringIO import json import logging +import subprocess +import sys + import lxml.html import lxml.etree from PIL import Image from PIL.ExifTags import TAGS -import subprocess -import traceback -import urwid import html2text +import six -import netlib.utils +from netlib.odict import ODict from netlib import encoding +from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type -from . import common, signals -from .. import utils -from ..contrib import jsbeautifier -from ..contrib.wbxml.ASCommandResponse import ASCommandResponse +from . import utils +from .exceptions import ContentViewException +from .contrib import jsbeautifier +from .contrib.wbxml.ASCommandResponse import ASCommandResponse try: import pyamf @@ -37,89 +52,119 @@ else: cssutils.ser.prefs.indentClosingBrace = False cssutils.ser.prefs.validOnly = False -VIEW_CUTOFF = 1024 * 50 +# Default view cutoff *in lines* +VIEW_CUTOFF = 512 +KEY_MAX = 30 -def _view_text(content, total, limit): + +def format_dict(d): """ - Generates a body for a chunk of text. + Helper function that transforms the given dictionary into a list of + ("key", key ) + ("value", value) + tuples, where key is padded to a uniform width. """ - txt = [] - for i in netlib.utils.cleanBin(content).splitlines(): - txt.append( - urwid.Text(("text", i), wrap="any") - ) - trailer(total, txt, limit) - return txt - - -def trailer(clen, txt, limit): - rem = clen - limit - if rem > 0: - txt.append(urwid.Text("")) - txt.append( - urwid.Text( - [ - ("highlight", "... %s of data not shown. Press " % netlib.utils.pretty_size(rem)), - ("key", "f"), - ("highlight", " to load all data.") - ] - ) - ) + max_key_len = max(len(k) for k in d.keys()) + max_key_len = min(max_key_len, KEY_MAX) + for key, value in d.items(): + key += ":" + key = key.ljust(max_key_len + 2) + yield [ + ("header", key), + ("text", value) + ] + +def format_text(text): + """ + Helper function that transforms bytes into the view output format. + """ + for line in text.splitlines(): + yield [("text", line)] + + +class View(object): + name = None + prompt = () + content_types = [] + + def __call__(self, data, **metadata): + """ + Transform raw data into human-readable output. -class ViewAuto: + Args: + data: the data to decode/format as bytes. + metadata: optional keyword-only arguments for metadata. Implementations must not + rely on a given argument being present. + + Returns: + A (description, content generator) tuple. + + The content generator yields lists of (style, text) tuples, where each list represents + a single line. ``text`` is a unfiltered byte string which may need to be escaped, + depending on the used output. + + Caveats: + The content generator must not yield tuples of tuples, + because urwid cannot process that. You have to yield a *list* of tuples per line. + """ + raise NotImplementedError() + + +class ViewAuto(View): name = "Auto" prompt = ("auto", "a") content_types = [] - def __call__(self, hdrs, content, limit): - ctype = hdrs.get("content-type") + def __call__(self, data, **metadata): + headers = metadata.get("headers", {}) + ctype = headers.get("content-type") if ctype: - ct = netlib.utils.parse_content_type(ctype) if ctype else None + ct = parse_content_type(ctype) if ctype else None ct = "%s/%s" % (ct[0], ct[1]) if ct in content_types_map: - return content_types_map[ct][0](hdrs, content, limit) - elif utils.isXML(content): - return get("XML")(hdrs, content, limit) - return get("Raw")(hdrs, content, limit) + return content_types_map[ct][0](data, **metadata) + elif utils.isXML(data): + return get("XML")(data, **metadata) + if utils.isMostlyBin(data): + return get("Hex")(data) + return get("Raw")(data) -class ViewRaw: +class ViewRaw(View): name = "Raw" prompt = ("raw", "r") content_types = [] - def __call__(self, hdrs, content, limit): - txt = _view_text(content[:limit], len(content), limit) - return "Raw", txt + def __call__(self, data, **metadata): + return "Raw", format_text(data) -class ViewHex: +class ViewHex(View): name = "Hex" prompt = ("hex", "e") content_types = [] - def __call__(self, hdrs, content, limit): - txt = [] - for offset, hexa, s in netlib.utils.hexdump(content[:limit]): - txt.append(urwid.Text([ - ("offset", offset), - " ", - ("text", hexa), - " ", - ("text", s), - ])) - trailer(len(content), txt, limit) - return "Hex", txt - - -class ViewXML: + @staticmethod + def _format(data): + for offset, hexa, s in hexdump(data): + yield [ + ("offset", offset + " "), + ("text", hexa + " "), + ("text", s) + ] + + def __call__(self, data, **metadata): + return "Hex", self._format(data) + + +class ViewXML(View): name = "XML" prompt = ("xml", "x") content_types = ["text/xml"] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): parser = lxml.etree.XMLParser( remove_blank_text=True, resolve_entities=False, @@ -127,7 +172,7 @@ class ViewXML: recover=False ) try: - document = lxml.etree.fromstring(content, parser) + document = lxml.etree.fromstring(data, parser) except lxml.etree.XMLSyntaxError: return None docinfo = document.getroottree().docinfo @@ -150,108 +195,84 @@ class ViewXML: pretty_print=True, xml_declaration=True, doctype=doctype or None, - encoding = docinfo.encoding + encoding=docinfo.encoding ) - txt = [] - for i in s[:limit].strip().split("\n"): - txt.append( - urwid.Text(("text", i)), - ) - trailer(len(content), txt, limit) - return "XML-like data", txt + return "XML-like data", format_text(s) -class ViewJSON: +class ViewJSON(View): name = "JSON" prompt = ("json", "s") content_types = ["application/json"] - def __call__(self, hdrs, content, limit): - lines = utils.pretty_json(content) - if lines: - txt = [] - sofar = 0 - for i in lines: - sofar += len(i) - txt.append( - urwid.Text(("text", i)), - ) - if sofar > limit: - break - trailer(sum(len(i) for i in lines), txt, limit) - return "JSON", txt + def __call__(self, data, **metadata): + pretty_json = utils.pretty_json(data) + if pretty_json: + return "JSON", format_text(pretty_json) -class ViewHTML: +class ViewHTML(View): name = "HTML" prompt = ("html", "h") content_types = ["text/html"] - def __call__(self, hdrs, content, limit): - if utils.isXML(content): + def __call__(self, data, **metadata): + if utils.isXML(data): parser = lxml.etree.HTMLParser( strip_cdata=True, remove_blank_text=True ) - d = lxml.html.fromstring(content, parser=parser) + d = lxml.html.fromstring(data, parser=parser) docinfo = d.getroottree().docinfo s = lxml.etree.tostring( d, pretty_print=True, doctype=docinfo.doctype ) - return "HTML", _view_text(s[:limit], len(s), limit) + return "HTML", format_text(s) -class ViewHTMLOutline: +class ViewHTMLOutline(View): name = "HTML Outline" prompt = ("html outline", "o") content_types = ["text/html"] - def __call__(self, hdrs, content, limit): - content = content.decode("utf-8") + def __call__(self, data, **metadata): + data = data.decode("utf-8") h = html2text.HTML2Text(baseurl="") h.ignore_images = True h.body_width = 0 - content = h.handle(content) - txt = _view_text(content[:limit], len(content), limit) - return "HTML Outline", txt + outline = h.handle(data) + return "HTML Outline", format_text(outline) -class ViewURLEncoded: +class ViewURLEncoded(View): name = "URL-encoded" prompt = ("urlencoded", "u") content_types = ["application/x-www-form-urlencoded"] - def __call__(self, hdrs, content, limit): - lines = netlib.utils.urldecode(content) - if lines: - body = common.format_keyvals( - [(k + ":", v) for (k, v) in lines], - key = "header", - val = "text" - ) - return "URLEncoded form", body + def __call__(self, data, **metadata): + d = urldecode(data) + return "URLEncoded form", format_dict(ODict(d)) -class ViewMultipart: +class ViewMultipart(View): name = "Multipart Form" prompt = ("multipart", "m") content_types = ["multipart/form-data"] - def __call__(self, hdrs, content, limit): - v = netlib.utils.multipartdecode(hdrs, content) + @staticmethod + def _format(v): + yield [("highlight", "Form data:\n")] + for message in format_dict(ODict(v)): + yield message + + def __call__(self, data, **metadata): + headers = metadata.get("headers", {}) + v = multipartdecode(headers, data) if v: - r = [ - urwid.Text(("highlight", "Form data:\n")), - ] - r.extend(common.format_keyvals( - v, - key = "header", - val = "text" - )) - return "Multipart form", r + return "Multipart form", self._format(v) if pyamf: @@ -263,6 +284,7 @@ if pyamf: data = input.readObject() self["data"] = data + def pyamf_class_loader(s): for i in pyamf.CLASS_LOADERS: if i != pyamf_class_loader: @@ -271,9 +293,11 @@ if pyamf: return v return DummyObject + pyamf.register_class_loader(pyamf_class_loader) - class ViewAMF: + + class ViewAMF(View): name = "AMF" prompt = ("amf", "f") content_types = ["application/x-amf"] @@ -300,31 +324,30 @@ if pyamf: else: return b - def __call__(self, hdrs, content, limit): - envelope = remoting.decode(content, strict=False) - if not envelope: - return None - - txt = [] + def _format(self, envelope): for target, message in iter(envelope): if isinstance(message, pyamf.remoting.Request): - txt.append(urwid.Text([ + yield [ ("header", "Request: "), ("text", str(target)), - ])) + ] else: - txt.append(urwid.Text([ + yield [ ("header", "Response: "), ("text", "%s, code %s" % (target, message.status)), - ])) + ] s = json.dumps(self.unpack(message), indent=4) - txt.extend(_view_text(s[:limit], len(s), limit)) + for msg in format_text(s): + yield msg - return "AMF v%s" % envelope.amfVersion, txt + def __call__(self, data, **metadata): + envelope = remoting.decode(data, strict=False) + if envelope: + return "AMF v%s" % envelope.amfVersion, self._format(envelope) -class ViewJavaScript: +class ViewJavaScript(View): name = "JavaScript" prompt = ("javascript", "j") content_types = [ @@ -333,31 +356,31 @@ class ViewJavaScript: "text/javascript" ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): opts = jsbeautifier.default_options() opts.indent_size = 2 - res = jsbeautifier.beautify(content[:limit], opts) - return "JavaScript", _view_text(res, len(res), limit) + res = jsbeautifier.beautify(data, opts) + return "JavaScript", format_text(res) -class ViewCSS: +class ViewCSS(View): name = "CSS" prompt = ("css", "c") content_types = [ "text/css" ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): if cssutils: - sheet = cssutils.parseString(content) + sheet = cssutils.parseString(data) beautified = sheet.cssText else: - beautified = content + beautified = data - return "CSS", _view_text(beautified, len(beautified), limit) + return "CSS", format_text(beautified) -class ViewImage: +class ViewImage(View): name = "Image" prompt = ("image", "i") content_types = [ @@ -368,9 +391,9 @@ class ViewImage: "image/x-icon", ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): try: - img = Image.open(cStringIO.StringIO(content)) + img = Image.open(cStringIO.StringIO(data)) except IOError: return None parts = [ @@ -391,20 +414,11 @@ class ViewImage: parts.append( (str(tag), str(ex[i])) ) - clean = [] - for i in parts: - clean.append( - [netlib.utils.cleanBin(i[0]), netlib.utils.cleanBin(i[1])] - ) - fmt = common.format_keyvals( - clean, - key = "header", - val = "text" - ) + fmt = format_dict(ODict(parts)) return "%s image" % img.format, fmt -class ViewProtobuf: +class ViewProtobuf(View): """Human friendly view of protocol buffers The view uses the protoc compiler to decode the binary """ @@ -441,13 +455,12 @@ class ViewProtobuf: else: return err - def __call__(self, hdrs, content, limit): - decoded = self.decode_protobuf(content) - txt = _view_text(decoded[:limit], len(decoded), limit) - return "Protobuf", txt + def __call__(self, data, **metadata): + decoded = self.decode_protobuf(data) + return "Protobuf", format_text(decoded) -class ViewWBXML: +class ViewWBXML(View): name = "WBXML" prompt = ("wbxml", "w") content_types = [ @@ -455,16 +468,17 @@ class ViewWBXML: "application/vnd.ms-sync.wbxml" ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): try: - parser = ASCommandResponse(content) + parser = ASCommandResponse(data) parsedContent = parser.xmlString - txt = _view_text(parsedContent, len(parsedContent), limit) - return "WBXML", txt + if parsedContent: + return "WBXML", format_text(parsedContent) except: return None + views = [ ViewAuto(), ViewRaw(), @@ -492,7 +506,6 @@ for i in views: l = content_types_map.setdefault(ct, []) l.append(i) - view_prompts = [i.prompt for i in views] @@ -508,34 +521,57 @@ def get(name): return i -def get_content_view(viewmode, headers, content, limit, is_request): +def safe_to_print(lines, encoding="utf8"): """ - Returns a (msg, body) tuple. + Wraps a content generator so that each text portion is a *safe to print* unicode string. """ - if not content: - if is_request: - return "No request content (press tab to view response)", "" - else: - return "No content", "" + for line in lines: + clean_line = [] + for (style, text) in line: + try: + text = clean_bin(text.decode(encoding, "strict")) + except UnicodeDecodeError: + text = clean_bin(text).decode(encoding, "strict") + clean_line.append((style, text)) + yield clean_line + + +def get_content_view(viewmode, data, **metadata): + """ + Args: + viewmode: the view to use. + data, **metadata: arguments passed to View instance. + + Returns: + A (description, content generator) tuple. + In contrast to calling the views directly, text is always safe-to-print unicode. + + Raises: + ContentViewException, if the content view threw an error. + """ + if not data: + return "No content", [] msg = [] + headers = metadata.get("headers", {}) enc = headers.get("content-encoding") if enc and enc != "identity": - decoded = encoding.decode(enc, content) + decoded = encoding.decode(enc, data) if decoded: - content = decoded + data = decoded msg.append("[decoded %s]" % enc) try: - ret = viewmode(headers, content, limit) + ret = viewmode(data, **metadata) # Third-party viewers can fail in unexpected ways... - except Exception: - s = traceback.format_exc() - s = "Content viewer failed: \n" + s - signals.add_event(s, "error") - ret = None + except Exception as e: + six.reraise( + ContentViewException, + ContentViewException(str(e)), + sys.exc_info()[2] + ) if not ret: - ret = get("Raw")(headers, content, limit) + ret = get("Raw")(data, **metadata) msg.append("Couldn't parse: falling back to Raw") else: msg.append(ret[0]) - return " ".join(msg), ret[1] + return " ".join(msg), safe_to_print(ret[1]) diff --git a/libmproxy/contrib/wbxml/ASCommandResponse.py b/libmproxy/contrib/wbxml/ASCommandResponse.py index 7bd31409..08d03445 100644 --- a/libmproxy/contrib/wbxml/ASCommandResponse.py +++ b/libmproxy/contrib/wbxml/ASCommandResponse.py @@ -38,10 +38,10 @@ class ASCommandResponse: if ( len(response) > 0): self.xmlString = self.decodeWBXML(self.wbxmlBody) else: - logging.error("Empty WBXML body passed") + raise ValueError("Empty WBXML body passed") except Exception as e: - logging.error("Error: {0}".format(e.message)) self.xmlString = None + raise ValueError("Error: {0}".format(e.message)) def getWBXMLBytes(self): return self.wbxmlBytes @@ -70,4 +70,3 @@ if __name__ == "__main__": logging.info("-"*100) instance = ASCommandResponse(byteWBXML) logging.info(instance.xmlString) -
\ No newline at end of file diff --git a/libmproxy/dump.py b/libmproxy/dump.py index 17b47dd2..9fc9e1b8 100644 --- a/libmproxy/dump.py +++ b/libmproxy/dump.py @@ -1,14 +1,16 @@ from __future__ import absolute_import, print_function -import json import sys import os +import traceback + +import click +import itertools from netlib.http.semantics import CONTENT_MISSING import netlib.utils - -from . import flow, filt, utils -from .protocol import http - +from . import flow, filt, contentviews +from .exceptions import ContentViewException +from .models import HTTPRequest class DumpError(Exception): pass @@ -55,26 +57,8 @@ class Options(object): setattr(self, i, None) -def str_response(resp): - r = "%s %s" % (resp.code, resp.msg) - if resp.is_replay: - r = "[replay] " + r - return r - - -def str_request(f, showhost): - if f.client_conn: - c = f.client_conn.address.host - else: - c = "[replay]" - r = "%s %s %s" % (c, f.request.method, f.request.pretty_url(showhost)) - if f.request.stickycookie: - r = "[stickycookie] " + r - return r - - class DumpMaster(flow.FlowMaster): - def __init__(self, server, options, outfile=sys.stdout): + def __init__(self, server, options, outfile=None): flow.FlowMaster.__init__(self, server, flow.State()) self.outfile = outfile self.o = options @@ -103,7 +87,7 @@ class DumpMaster(flow.FlowMaster): if options.outfile: path = os.path.expanduser(options.outfile[0]) try: - f = file(path, options.outfile[1]) + f = open(path, options.outfile[1]) self.start_stream(f, self.filt) except IOError as v: raise DumpError(v.strerror) @@ -163,72 +147,168 @@ class DumpMaster(flow.FlowMaster): def add_event(self, e, level="info"): needed = dict(error=0, info=1, debug=2).get(level, 1) if self.o.verbosity >= needed: - print(e, file=self.outfile) - self.outfile.flush() + self.echo( + e, + fg="red" if level == "error" else None, + dim=(level == "debug") + ) @staticmethod - def indent(n, t): - l = str(t).strip().splitlines() + def indent(n, text): + l = str(text).strip().splitlines() pad = " " * n return "\n".join(pad + i for i in l) - def _print_message(self, message): + def echo(self, text, indent=None, **style): + if indent: + text = self.indent(indent, text) + click.secho(text, file=self.outfile, **style) + + def _echo_message(self, message): if self.o.flow_detail >= 2: - print(self.indent(4, str(message.headers)), file=self.outfile) + headers = "\r\n".join( + "{}: {}".format( + click.style(k, fg="blue", bold=True), + click.style(v, fg="blue")) + for k, v in message.headers.fields + ) + self.echo(headers, indent=4) if self.o.flow_detail >= 3: - if message.content == CONTENT_MISSING: - print(self.indent(4, "(content missing)"), file=self.outfile) - elif message.content: - print("", file=self.outfile) - content = message.get_decoded_content() - if not utils.isBin(content): - try: - jsn = json.loads(content) - print( - self.indent( - 4, - json.dumps( - jsn, - indent=2)), - file=self.outfile) - except ValueError: - print(self.indent(4, content), file=self.outfile) + if message.body == CONTENT_MISSING: + self.echo("(content missing)", indent=4) + elif message.body: + self.echo("") + + try: + type, lines = contentviews.get_content_view( + contentviews.get("Auto"), + message.body, + headers=message.headers + ) + except ContentViewException: + s = "Content viewer failed: \n" + traceback.format_exc() + self.add_event(s, "debug") + type, lines = contentviews.get_content_view( + contentviews.get("Raw"), + message.body, + headers=message.headers + ) + + styles = dict( + highlight=dict(bold=True), + offset=dict(fg="blue"), + header=dict(fg="green", bold=True), + text=dict(fg="green") + ) + + def colorful(line): + yield u" " # we can already indent here + for (style, text) in line: + yield click.style(text, **styles.get(style, {})) + + if self.o.flow_detail == 3: + lines_to_echo = itertools.islice(lines, 70) else: - d = netlib.utils.hexdump(content) - d = "\n".join("%s\t%s %s" % i for i in d) - print(self.indent(4, d), file=self.outfile) + lines_to_echo = lines + + lines_to_echo = list(lines_to_echo) + + content = u"\r\n".join( + u"".join(colorful(line)) for line in lines_to_echo + ) + + self.echo(content) + if next(lines, None): + self.echo("(cut off)", indent=4, dim=True) + if self.o.flow_detail >= 2: - print("", file=self.outfile) + self.echo("") - def _process_flow(self, f): - self.state.delete_flow(f) - if self.filt and not f.match(self.filt): - return + def _echo_request_line(self, flow): + if flow.request.stickycookie: + stickycookie = click.style("[stickycookie] ", fg="yellow", bold=True) + else: + stickycookie = "" + + if flow.client_conn: + client = click.style(flow.client_conn.address.host, bold=True) + else: + client = click.style("[replay]", fg="yellow", bold=True) + + method = flow.request.method + method_color=dict( + GET="green", + DELETE="red" + ).get(method.upper(), "magenta") + method = click.style(method, fg=method_color, bold=True) + url = click.style(flow.request.pretty_url(self.showhost), bold=True) + + line = "{stickycookie}{client} {method} {url}".format( + stickycookie=stickycookie, + client=client, + method=method, + url=url + ) + self.echo(line) + + def _echo_response_line(self, flow): + if flow.response.is_replay: + replay = click.style("[replay] ", fg="yellow", bold=True) + else: + replay = "" + + code = flow.response.status_code + code_color = None + if 200 <= code < 300: + code_color = "green" + elif 300 <= code < 400: + code_color = "magenta" + elif 400 <= code < 600: + code_color = "red" + code = click.style(str(code), fg=code_color, bold=True, blink=(code == 418)) + msg = click.style(flow.response.msg, fg=code_color, bold=True) + + if flow.response.content == CONTENT_MISSING: + size = "(content missing)" + else: + size = netlib.utils.pretty_size(len(flow.response.content)) + size = click.style(size, bold=True) + + arrows = click.style("<<", bold=True) + line = "{replay} {arrows} {code} {msg} {size}".format( + replay=replay, + arrows=arrows, + code=code, + msg=msg, + size=size + ) + self.echo(line) + + def echo_flow(self, f): if self.o.flow_detail == 0: return if f.request: - print(str_request(f, self.showhost), file=self.outfile) - self._print_message(f.request) + self._echo_request_line(f) + self._echo_message(f.request) if f.response: - if f.response.content == CONTENT_MISSING: - sz = "(content missing)" - else: - sz = netlib.utils.pretty_size(len(f.response.content)) - print( - " << %s %s" % - (str_response( - f.response), - sz), - file=self.outfile) - self._print_message(f.response) + self._echo_response_line(f) + self._echo_message(f.response) if f.error: - print(" << {}".format(f.error.msg), file=self.outfile) + self.echo(" << {}".format(f.error.msg), bold=True, fg="red") + + if self.outfile: + self.outfile.flush() + + def _process_flow(self, f): + self.state.delete_flow(f) + if self.filt and not f.match(self.filt): + return - self.outfile.flush() + self.echo_flow(f) def handle_request(self, f): flow.FlowMaster.handle_request(self, f) diff --git a/libmproxy/exceptions.py b/libmproxy/exceptions.py index 6b997041..d916f457 100644 --- a/libmproxy/exceptions.py +++ b/libmproxy/exceptions.py @@ -11,18 +11,9 @@ from __future__ import (absolute_import, print_function, division) class ProxyException(Exception): """ Base class for all exceptions thrown by libmproxy. - - Args: - message: the error message - cause: (optional) an error object that caused this exception, e.g. an IOError. """ - def __init__(self, message, cause=None): - """ - :param message: Error Message - :param cause: Exception object that caused this exception to be thrown. - """ + def __init__(self, message=None): super(ProxyException, self).__init__(message) - self.cause = cause class ProtocolException(ProxyException): @@ -33,6 +24,12 @@ class TlsException(ProtocolException): pass +class ClientHandshakeException(TlsException): + def __init__(self, message, server): + super(ClientHandshakeException, self).__init__(message) + self.server = server + + class Socks5Exception(ProtocolException): pass @@ -47,3 +44,7 @@ class InvalidCredentials(HttpException): class ServerException(ProxyException): pass + + +class ContentViewException(ProxyException): + pass diff --git a/libmproxy/models/__init__.py b/libmproxy/models/__init__.py index a54f305f..ff1bcbc1 100644 --- a/libmproxy/models/__init__.py +++ b/libmproxy/models/__init__.py @@ -1,14 +1,14 @@ from __future__ import (absolute_import, print_function, division) from .http import ( - HTTPFlow, HTTPRequest, HTTPResponse, decoded, + HTTPFlow, HTTPRequest, HTTPResponse, Headers, decoded, make_error_response, make_connect_request, make_connect_response ) from .connections import ClientConnection, ServerConnection from .flow import Flow, Error __all__ = [ - "HTTPFlow", "HTTPRequest", "HTTPResponse", "decoded", + "HTTPFlow", "HTTPRequest", "HTTPResponse", "Headers", "decoded", "make_error_response", "make_connect_request", "make_connect_response", "ClientConnection", "ServerConnection", diff --git a/libmproxy/protocol/base.py b/libmproxy/protocol/base.py index 9d8c8bfe..b92aeea1 100644 --- a/libmproxy/protocol/base.py +++ b/libmproxy/protocol/base.py @@ -1,4 +1,8 @@ from __future__ import (absolute_import, print_function, division) +import sys + +import six + from netlib import tcp from ..models import ServerConnection from ..exceptions import ProtocolException @@ -172,8 +176,15 @@ class ServerConnectionMixin(object): try: self.server_conn.connect() except tcp.NetLibError as e: - raise ProtocolException( - "Server connection to %s failed: %s" % (repr(self.server_conn.address), e), e) + six.reraise( + ProtocolException, + ProtocolException( + "Server connection to {} failed: {}".format( + repr(self.server_conn.address), str(e) + ) + ), + sys.exc_info()[2] + ) class Kill(Exception): diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 93972111..230f2be9 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1,4 +1,9 @@ from __future__ import (absolute_import, print_function, division) +import itertools +import sys +import traceback + +import six from netlib import tcp from netlib.http import http1, HttpErrorConnClosed, HttpError, Headers @@ -7,7 +12,6 @@ from netlib.tcp import NetLibError, Address from netlib.http.http1 import HTTP1Protocol from netlib.http.http2 import HTTP2Protocol from netlib.http.http2.frame import GoAwayFrame, PriorityFrame, WindowUpdateFrame - from .. import utils from ..exceptions import InvalidCredentials, HttpException, ProtocolException from ..models import ( @@ -45,12 +49,25 @@ class _StreamingHttpLayer(_HttpLayer): raise NotImplementedError() yield "this is a generator" # pragma: no cover + def read_response(self, request_method): + response = self.read_response_headers() + response.body = "".join( + self.read_response_body(response.headers, request_method, response.code) + ) + return response + def send_response_headers(self, response): raise NotImplementedError def send_response_body(self, response, chunks): raise NotImplementedError() + def send_response(self, response): + if response.body == CONTENT_MISSING: + raise HttpError(502, "Cannot assemble flow with CONTENT_MISSING") + self.send_response_headers(response) + self.send_response_body(response, [response.body]) + class Http1Layer(_StreamingHttpLayer): def __init__(self, ctx, mode): @@ -68,17 +85,6 @@ class Http1Layer(_StreamingHttpLayer): def send_request(self, request): self.server_conn.send(self.server_protocol.assemble(request)) - def read_response(self, request_method): - return HTTPResponse.from_protocol( - self.server_protocol, - request_method=request_method, - body_size_limit=self.config.body_size_limit, - include_body=True - ) - - def send_response(self, response): - self.client_conn.send(self.client_protocol.assemble(response)) - def read_response_headers(self): return HTTPResponse.from_protocol( self.server_protocol, @@ -104,16 +110,21 @@ class Http1Layer(_StreamingHttpLayer): response, preserve_transfer_encoding=True ) - self.client_conn.send(h + "\r\n") + self.client_conn.wfile.write(h + "\r\n") + self.client_conn.wfile.flush() def send_response_body(self, response, chunks): if self.client_protocol.has_chunked_encoding(response.headers): - chunks = ( - "%d\r\n%s\r\n" % (len(chunk), chunk) - for chunk in chunks + chunks = itertools.chain( + ( + "{:x}\r\n{}\r\n".format(len(chunk), chunk) + for chunk in chunks if chunk + ), + ("0\r\n\r\n",) ) for chunk in chunks: - self.client_conn.send(chunk) + self.client_conn.wfile.write(chunk) + self.client_conn.wfile.flush() def check_close_connection(self, flow): close_connection = ( @@ -360,7 +371,13 @@ class HttpLayer(Layer): if self.check_close_connection(flow): return - # TODO: Implement HTTP Upgrade + # Handle 101 Switching Protocols + # It may be useful to pass additional args (such as the upgrade header) + # to next_layer in the future + if flow.response.status_code == 101: + layer = self.ctx.next_layer(self) + layer() + return # Upstream Proxy Mode: Handle CONNECT if flow.request.form_in == "authority" and flow.response.code == 200: @@ -368,9 +385,13 @@ class HttpLayer(Layer): return except (HttpErrorConnClosed, NetLibError, HttpError, ProtocolException) as e: + error_propagated = False if flow.request and not flow.response: - flow.error = Error(repr(e)) + flow.error = Error(str(e)) self.channel.ask("error", flow) + self.log(traceback.format_exc(), "debug") + error_propagated = True + try: self.send_response(make_error_response( getattr(e, "code", 502), @@ -378,10 +399,12 @@ class HttpLayer(Layer): )) except NetLibError: pass - if isinstance(e, ProtocolException): - raise e - else: - raise ProtocolException("Error in HTTP connection: %s" % repr(e), e) + + if not error_propagated: + if isinstance(e, ProtocolException): + six.reraise(ProtocolException, e, sys.exc_info()[2]) + else: + six.reraise(ProtocolException, ProtocolException("Error in HTTP connection: %s" % repr(e)), sys.exc_info()[2]) finally: flow.live = False @@ -511,7 +534,7 @@ class HttpLayer(Layer): if self.mode == "regular" or self.mode == "transparent": # If there's an existing connection that doesn't match our expectations, kill it. - if address != self.server_conn.address or tls != self.server_conn.ssl_established: + if address != self.server_conn.address or tls != self.server_conn.tls_established: self.set_server(address, tls, address.host) # Establish connection is neccessary. if not self.server_conn: diff --git a/libmproxy/protocol/rawtcp.py b/libmproxy/protocol/rawtcp.py index 86468773..24c19523 100644 --- a/libmproxy/protocol/rawtcp.py +++ b/libmproxy/protocol/rawtcp.py @@ -1,11 +1,13 @@ from __future__ import (absolute_import, print_function, division) import socket import select +import six +import sys from OpenSSL import SSL -from netlib.tcp import NetLibError -from netlib.utils import cleanBin +from netlib.tcp import NetLibError, ssl_read_select +from netlib.utils import clean_bin from ..exceptions import ProtocolException from .base import Layer @@ -28,7 +30,7 @@ class RawTCPLayer(Layer): try: while True: - r, _, _ = select.select(conns, [], [], 10) + r = ssl_read_select(conns, 10) for conn in r: dst = server if conn == client else client @@ -56,11 +58,15 @@ class RawTCPLayer(Layer): direction = "-> tcp -> {}".format(repr(self.server_conn.address)) else: direction = "<- tcp <- {}".format(repr(self.server_conn.address)) - data = cleanBin(buf[:size].tobytes()) + data = clean_bin(buf[:size].tobytes()) self.log( "{}\r\n{}".format(direction, data), "info" ) except (socket.error, NetLibError, SSL.Error) as e: - raise ProtocolException("TCP connection closed unexpectedly: {}".format(repr(e)), e) + six.reraise( + ProtocolException, + ProtocolException("TCP connection closed unexpectedly: {}".format(repr(e))), + sys.exc_info()[2] + ) diff --git a/libmproxy/protocol/tls.py b/libmproxy/protocol/tls.py index 6e8535ae..2935ca9f 100644 --- a/libmproxy/protocol/tls.py +++ b/libmproxy/protocol/tls.py @@ -1,16 +1,19 @@ from __future__ import (absolute_import, print_function, division) import struct +import sys from construct import ConstructError +import six from netlib.tcp import NetLibError, NetLibInvalidCertificateError from netlib.http.http1 import HTTP1Protocol from ..contrib.tls._constructs import ClientHello -from ..exceptions import ProtocolException, TlsException +from ..exceptions import ProtocolException, TlsException, ClientHandshakeException from .base import Layer + # taken from https://testssl.sh/openssl-rfc.mappping.html CIPHER_ID_NAME_MAP = { 0x00: 'NULL-MD5', @@ -224,6 +227,7 @@ class TlsLayer(Layer): def __init__(self, ctx, client_tls, server_tls): self.client_sni = None self.client_alpn_protocols = None + self.client_ciphers = [] super(TlsLayer, self).__init__(ctx) self._client_tls = client_tls @@ -387,7 +391,7 @@ class TlsLayer(Layer): self._establish_tls_with_client() except: pass - raise e + six.reraise(*sys.exc_info()) self._establish_tls_with_client() @@ -405,8 +409,22 @@ class TlsLayer(Layer): chain_file=chain_file, alpn_select_callback=self.__alpn_select_callback, ) + # Some TLS clients will not fail the handshake, + # but will immediately throw an "unexpected eof" error on the first read. + # The reason for this might be difficult to find, so we try to peek here to see if it + # raises ann error. + self.client_conn.rfile.peek(1) except NetLibError as e: - raise TlsException("Cannot establish TLS with client: %s" % repr(e), e) + six.reraise( + ClientHandshakeException, + ClientHandshakeException( + "Cannot establish TLS with client (sni: {sni}): {e}".format( + sni=self.client_sni, e=repr(e) + ), + self.client_sni or repr(self.server_conn.address) + ), + sys.exc_info()[2] + ) def _establish_tls_with_server(self): self.log("Establish TLS with server", "debug") @@ -416,9 +434,11 @@ class TlsLayer(Layer): # and mitmproxy would enter TCP passthrough mode, which we want to avoid. deprecated_http2_variant = lambda x: x.startswith("h2-") or x.startswith("spdy") if self.client_alpn_protocols: - alpn = filter(lambda x: not deprecated_http2_variant(x), self.client_alpn_protocols) + alpn = [x for x in self.client_alpn_protocols if not deprecated_http2_variant(x)] else: alpn = None + if alpn and "h2" in alpn and not self.config.http2 : + alpn.remove("h2") ciphers_server = self.config.ciphers_server if not ciphers_server: @@ -453,17 +473,25 @@ class TlsLayer(Layer): (tls_cert_err['depth'], tls_cert_err['errno']), "error") self.log("Aborting connection attempt", "error") - raise TlsException("Cannot establish TLS with {address} (sni: {sni}): {e}".format( - address=repr(self.server_conn.address), - sni=self.sni_for_server_connection, - e=repr(e), - ), e) + six.reraise( + TlsException, + TlsException("Cannot establish TLS with {address} (sni: {sni}): {e}".format( + address=repr(self.server_conn.address), + sni=self.sni_for_server_connection, + e=repr(e), + )), + sys.exc_info()[2] + ) except NetLibError as e: - raise TlsException("Cannot establish TLS with {address} (sni: {sni}): {e}".format( - address=repr(self.server_conn.address), - sni=self.sni_for_server_connection, - e=repr(e), - ), e) + six.reraise( + TlsException, + TlsException("Cannot establish TLS with {address} (sni: {sni}): {e}".format( + address=repr(self.server_conn.address), + sni=self.sni_for_server_connection, + e=repr(e), + )), + sys.exc_info()[2] + ) self.log("ALPN selected by server: %s" % self.alpn_for_client_connection, "debug") diff --git a/libmproxy/proxy/config.py b/libmproxy/proxy/config.py index 2a1b84cb..cd9eda5a 100644 --- a/libmproxy/proxy/config.py +++ b/libmproxy/proxy/config.py @@ -54,6 +54,8 @@ class ProxyConfig: authenticator=None, ignore_hosts=tuple(), tcp_hosts=tuple(), + http2=False, + rawtcp=False, ciphers_client=None, ciphers_server=None, certs=tuple(), @@ -78,6 +80,8 @@ class ProxyConfig: self.check_ignore = HostMatcher(ignore_hosts) self.check_tcp = HostMatcher(tcp_hosts) + self.http2 = http2 + self.rawtcp = rawtcp self.authenticator = authenticator self.cadir = os.path.expanduser(cadir) self.certstore = certutils.CertStore.from_store( @@ -183,6 +187,8 @@ def process_proxy_options(parser, options): upstream_server=upstream_server, ignore_hosts=options.ignore_hosts, tcp_hosts=options.tcp_hosts, + http2=options.http2, + rawtcp=options.rawtcp, authenticator=authenticator, ciphers_client=options.ciphers_client, ciphers_server=options.ciphers_server, @@ -192,4 +198,4 @@ def process_proxy_options(parser, options): ssl_verify_upstream_cert=options.ssl_verify_upstream_cert, ssl_verify_upstream_trusted_cadir=options.ssl_verify_upstream_trusted_cadir, ssl_verify_upstream_trusted_ca=options.ssl_verify_upstream_trusted_ca - )
\ No newline at end of file + ) diff --git a/libmproxy/proxy/modes/socks_proxy.py b/libmproxy/proxy/modes/socks_proxy.py index 0efeab67..545c38d6 100644 --- a/libmproxy/proxy/modes/socks_proxy.py +++ b/libmproxy/proxy/modes/socks_proxy.py @@ -48,7 +48,7 @@ class Socks5Proxy(Layer, ServerConnectionMixin): self.client_conn.wfile.flush() except (socks.SocksError, NetLibError) as e: - raise Socks5Exception("SOCKS5 mode failure: %s" % repr(e), e) + raise Socks5Exception("SOCKS5 mode failure: %s" % repr(e)) self.server_conn.address = connect_request.addr diff --git a/libmproxy/proxy/modes/transparent_proxy.py b/libmproxy/proxy/modes/transparent_proxy.py index d99485c9..da1d4632 100644 --- a/libmproxy/proxy/modes/transparent_proxy.py +++ b/libmproxy/proxy/modes/transparent_proxy.py @@ -14,7 +14,7 @@ class TransparentProxy(Layer, ServerConnectionMixin): try: self.server_conn.address = self.resolver.original_addr(self.client_conn.connection) except Exception as e: - raise ProtocolException("Transparent mode failure: %s" % repr(e), e) + raise ProtocolException("Transparent mode failure: %s" % repr(e)) layer = self.ctx.next_layer(self) try: diff --git a/libmproxy/proxy/root_context.py b/libmproxy/proxy/root_context.py index dccdf023..54bea1db 100644 --- a/libmproxy/proxy/root_context.py +++ b/libmproxy/proxy/root_context.py @@ -1,8 +1,13 @@ from __future__ import (absolute_import, print_function, division) +import string +import sys +import six + +from libmproxy.exceptions import ProtocolException from netlib.http.http1 import HTTP1Protocol from netlib.http.http2 import HTTP2Protocol - +from netlib.tcp import NetLibError from ..protocol import ( RawTCPLayer, TlsLayer, Http1Layer, Http2Layer, is_tls_record_magic, ServerConnectionMixin ) @@ -48,7 +53,10 @@ class RootContext(object): if self.config.check_ignore(top_layer.server_conn.address): return RawTCPLayer(top_layer, logging=False) - d = top_layer.client_conn.rfile.peek(3) + try: + d = top_layer.client_conn.rfile.peek(3) + except NetLibError as e: + six.reraise(ProtocolException, ProtocolException(str(e)), sys.exc_info()[2]) client_tls = is_tls_record_magic(d) # 2. Always insert a TLS layer, even if there's neither client nor server tls. @@ -82,21 +90,17 @@ class RootContext(object): if alpn == HTTP1Protocol.ALPN_PROTO_HTTP1: return Http1Layer(top_layer, 'transparent') - # 6. Assume HTTP1 by default - return Http1Layer(top_layer, 'transparent') + # 6. Check for raw tcp mode + is_ascii = ( + len(d) == 3 and + # better be safe here and don't expect uppercase... + all(x in string.ascii_letters for x in d) + ) + if self.config.rawtcp and not is_ascii: + return RawTCPLayer(top_layer) - # In a future version, we want to implement TCP passthrough as the last fallback, - # but we don't have the UI part ready for that. - # - # d = top_layer.client_conn.rfile.peek(3) - # is_ascii = ( - # len(d) == 3 and - # # better be safe here and don't expect uppercase... - # all(x in string.ascii_letters for x in d) - # ) - # # TODO: This could block if there are not enough bytes available? - # d = top_layer.client_conn.rfile.peek(len(HTTP2Protocol.CLIENT_CONNECTION_PREFACE)) - # is_http2_magic = (d == HTTP2Protocol.CLIENT_CONNECTION_PREFACE) + # 7. Assume HTTP1 by default + return Http1Layer(top_layer, 'transparent') def log(self, msg, level, subs=()): """ diff --git a/libmproxy/proxy/server.py b/libmproxy/proxy/server.py index c12bbbfa..88448172 100644 --- a/libmproxy/proxy/server.py +++ b/libmproxy/proxy/server.py @@ -3,11 +3,12 @@ from __future__ import (absolute_import, print_function, division) import traceback import sys import socket +import six from netlib import tcp from netlib.http.http1 import HTTP1Protocol from netlib.tcp import NetLibError -from ..exceptions import ProtocolException, ServerException +from ..exceptions import ProtocolException, ServerException, ClientHandshakeException from ..protocol import Kill from ..models import ClientConnection, make_error_response from .modes import HttpUpstreamProxy, HttpProxy, ReverseProxy, TransparentProxy, Socks5Proxy @@ -39,7 +40,11 @@ class ProxyServer(tcp.TCPServer): try: super(ProxyServer, self).__init__((config.host, config.port)) except socket.error as e: - raise ServerException('Error starting proxy server: ' + repr(e), e) + six.reraise( + ServerException, + ServerException('Error starting proxy server: ' + repr(e)), + sys.exc_info()[2] + ) self.channel = None def start_slave(self, klass, channel): @@ -116,7 +121,18 @@ class ConnectionHandler(object): except Kill: self.log("Connection killed", "info") except ProtocolException as e: - self.log(e, "info") + + if isinstance(e, ClientHandshakeException): + self.log( + "Client Handshake failed. " + "The client may not trust the proxy's certificate for {}.".format(e.server), + "error" + ) + self.log(repr(e), "debug") + else: + self.log(repr(e), "error") + + self.log(traceback.format_exc(), "debug") # If an error propagates to the topmost level, # we send an HTTP error response, which is both # understandable by HTTP clients and humans. diff --git a/libmproxy/script.py b/libmproxy/script.py index 9526d3af..6dd79199 100644 --- a/libmproxy/script.py +++ b/libmproxy/script.py @@ -99,8 +99,8 @@ class Script: """ if self.ns is not None: self.unload() - ns = {} script_dir = os.path.dirname(os.path.abspath(self.args[0])) + ns = {'__file__': os.path.abspath(self.args[0])} sys.path.append(script_dir) try: execfile(self.args[0], ns, ns) diff --git a/libmproxy/utils.py b/libmproxy/utils.py index a6ca55f7..8bd843a0 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import (absolute_import, print_function, division) import os import datetime import re @@ -30,15 +30,16 @@ def isBin(s): """ for i in s: i = ord(i) - if i < 9: - return True - elif i > 13 and i < 32: - return True - elif i > 126: + if i < 9 or 13 < i < 32 or 126 < i: return True return False +def isMostlyBin(s): + s = s[:100] + return sum(isBin(ch) for ch in s)/len(s) > 0.3 + + def isXML(s): for i in s: if i in "\n \t": @@ -54,7 +55,7 @@ def pretty_json(s): p = json.loads(s) except ValueError: return None - return json.dumps(p, sort_keys=True, indent=4).split("\n") + return json.dumps(p, sort_keys=True, indent=4) def pretty_duration(secs): @@ -23,15 +23,17 @@ deps = { "html2text>=2015.4.14", "construct>=2.5.2", "six>=1.9.0", + "lxml>=3.3.6", + "Pillow>=2.3.0", } # A script -> additional dependencies dict. scripts = { "mitmproxy": { "urwid>=1.3", - "lxml>=3.3.6", - "Pillow>=2.3.0", }, - "mitmdump": set(), + "mitmdump": { + "click>=5.1", + }, "mitmweb": set() } # Developer dependencies diff --git a/test/test_console_common.py b/test/test_console_common.py index 57cbef98..459539c5 100644 --- a/test/test_console_common.py +++ b/test/test_console_common.py @@ -3,10 +3,8 @@ from nose.plugins.skip import SkipTest if os.name == "nt": raise SkipTest("Skipped on Windows.") -from netlib import encoding import libmproxy.console.common as common -from libmproxy import utils, flow import tutils diff --git a/test/test_console_contentview.py b/test/test_contentview.py index 6a93346a..97608520 100644 --- a/test/test_console_contentview.py +++ b/test/test_contentview.py @@ -1,16 +1,9 @@ -import os -from nose.plugins.skip import SkipTest +from libmproxy.exceptions import ContentViewException from netlib.http import Headers - -if os.name == "nt": - raise SkipTest("Skipped on Windows.") -import sys - import netlib.utils from netlib import encoding -import libmproxy.console.contentview as cv -from libmproxy import utils, flow +import libmproxy.contentviews as cv import tutils try: @@ -25,76 +18,65 @@ except: class TestContentView: - def test_trailer(self): - txt = [] - cv.trailer(5, txt, 1000) - assert not txt - cv.trailer(cv.VIEW_CUTOFF + 10, txt, cv.VIEW_CUTOFF) - assert txt def test_view_auto(self): v = cv.ViewAuto() f = v( - Headers(), "foo", - 1000 + headers=Headers() ) assert f[0] == "Raw" f = v( - Headers(content_type="text/html"), "<html></html>", - 1000 + headers=Headers(content_type="text/html") ) assert f[0] == "HTML" f = v( - Headers(content_type="text/flibble"), "foo", - 1000 + headers=Headers(content_type="text/flibble") ) assert f[0] == "Raw" f = v( - Headers(content_type="text/flibble"), "<xml></xml>", - 1000 + headers=Headers(content_type="text/flibble") ) assert f[0].startswith("XML") def test_view_urlencoded(self): d = netlib.utils.urlencode([("one", "two"), ("three", "four")]) v = cv.ViewURLEncoded() - assert v([], d, 100) + assert v(d) d = netlib.utils.urlencode([("adsfa", "")]) v = cv.ViewURLEncoded() - assert v([], d, 100) + assert v(d) def test_view_html(self): v = cv.ViewHTML() s = "<html><br><br></br><p>one</p></html>" - assert v([], s, 1000) + assert v(s) s = "gobbledygook" - assert not v([], s, 1000) + assert not v(s) def test_view_html_outline(self): v = cv.ViewHTMLOutline() s = "<html><br><br></br><p>one</p></html>" - assert v([], s, 1000) + assert v(s) def test_view_json(self): cv.VIEW_CUTOFF = 100 v = cv.ViewJSON() - assert v([], "{}", 1000) - assert not v([], "{", 1000) - assert v([], "[" + ",".join(["0"] * cv.VIEW_CUTOFF) + "]", 1000) - assert v([], "[1, 2, 3, 4, 5]", 5) + assert v("{}") + assert not v("{") + assert v("[1, 2, 3, 4, 5]") def test_view_xml(self): v = cv.ViewXML() - assert v([], "<foo></foo>", 1000) - assert not v([], "<foo>", 1000) + assert v("<foo></foo>") + assert not v("<foo>") s = """<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet title="XSL_formatting"?> <rss @@ -103,17 +85,17 @@ class TestContentView: version="2.0"> </rss> """ - assert v([], s, 1000) + assert v(s) def test_view_raw(self): v = cv.ViewRaw() - assert v([], "foo", 1000) + assert v("foo") def test_view_javascript(self): v = cv.ViewJavaScript() - assert v([], "[1, 2, 3]", 100) - assert v([], "[1, 2, 3", 100) - assert v([], "function(a){[1, 2, 3]}", 100) + assert v("[1, 2, 3]") + assert v("[1, 2, 3") + assert v("function(a){[1, 2, 3]}") def test_view_css(self): v = cv.ViewCSS() @@ -121,39 +103,39 @@ class TestContentView: with open(tutils.test_data.path('data/1.css'), 'r') as fp: fixture_1 = fp.read() - result = v([], 'a', 100) + result = v('a') if cssutils: - assert len(result[1]) == 0 + assert len(list(result[1])) == 0 else: - assert len(result[1]) == 1 + assert len(list(result[1])) == 1 - result = v([], fixture_1, 100) + result = v(fixture_1) if cssutils: - assert len(result[1]) > 1 + assert len(list(result[1])) > 1 else: - assert len(result[1]) == 1 + assert len(list(result[1])) == 1 def test_view_hex(self): v = cv.ViewHex() - assert v([], "foo", 1000) + assert v("foo") def test_view_image(self): v = cv.ViewImage() p = tutils.test_data.path("data/image.png") - assert v([], file(p, "rb").read(), sys.maxsize) + assert v(file(p, "rb").read()) p = tutils.test_data.path("data/image.gif") - assert v([], file(p, "rb").read(), sys.maxsize) + assert v(file(p, "rb").read()) p = tutils.test_data.path("data/image-err1.jpg") - assert v([], file(p, "rb").read(), sys.maxsize) + assert v(file(p, "rb").read()) p = tutils.test_data.path("data/image.ico") - assert v([], file(p, "rb").read(), sys.maxsize) + assert v(file(p, "rb").read()) - assert not v([], "flibble", sys.maxsize) + assert not v("flibble") def test_view_multipart(self): view = cv.ViewMultipart() @@ -165,76 +147,65 @@ Larry --AaB03x """.strip() h = Headers(content_type="multipart/form-data; boundary=AaB03x") - assert view(h, v, 1000) + assert view(v, headers=h) h = Headers() - assert not view(h, v, 1000) + assert not view(v, headers=h) h = Headers(content_type="multipart/form-data") - assert not view(h, v, 1000) + assert not view(v, headers=h) h = Headers(content_type="unparseable") - assert not view(h, v, 1000) + assert not view(v, headers=h) def test_get_content_view(self): r = cv.get_content_view( cv.get("Raw"), - Headers(content_type="application/json"), "[1, 2, 3]", - 1000, - False + headers=Headers(content_type="application/json") ) assert "Raw" in r[0] r = cv.get_content_view( cv.get("Auto"), - Headers(content_type="application/json"), "[1, 2, 3]", - 1000, - False + headers=Headers(content_type="application/json") ) assert r[0] == "JSON" r = cv.get_content_view( cv.get("Auto"), - Headers(content_type="application/json"), "[1, 2", - 1000, - False + headers=Headers(content_type="application/json") ) assert "Raw" in r[0] - r = cv.get_content_view( + tutils.raises( + ContentViewException, + cv.get_content_view, cv.get("AMF"), - Headers(), "[1, 2", - 1000, - False + headers=Headers() ) - assert "Raw" in r[0] r = cv.get_content_view( cv.get("Auto"), - Headers( + encoding.encode('gzip', "[1, 2, 3]"), + headers=Headers( content_type="application/json", content_encoding="gzip" - ), - encoding.encode('gzip', "[1, 2, 3]"), - 1000, - False + ) ) assert "decoded gzip" in r[0] assert "JSON" in r[0] r = cv.get_content_view( cv.get("XML"), - Headers( + encoding.encode('gzip', "[1, 2, 3]"), + headers=Headers( content_type="application/json", content_encoding="gzip" - ), - encoding.encode('gzip', "[1, 2, 3]"), - 1000, - False + ) ) assert "decoded gzip" in r[0] assert "Raw" in r[0] @@ -245,22 +216,22 @@ if pyamf: v = cv.ViewAMF() p = tutils.test_data.path("data/amf01") - assert v([], file(p, "rb").read(), sys.maxsize) + assert v(file(p, "rb").read()) p = tutils.test_data.path("data/amf02") - assert v([], file(p, "rb").read(), sys.maxsize) + assert v(file(p, "rb").read()) def test_view_amf_response(): v = cv.ViewAMF() p = tutils.test_data.path("data/amf03") - assert v([], file(p, "rb").read(), sys.maxsize) + assert v(file(p, "rb").read()) if cv.ViewProtobuf.is_available(): def test_view_protobuf_request(): v = cv.ViewProtobuf() p = tutils.test_data.path("data/protobuf01") - content_type, output = v([], file(p, "rb").read(), sys.maxsize) + content_type, output = v(file(p, "rb").read()) assert content_type == "Protobuf" assert output[0].text == '1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"' diff --git a/test/test_dump.py b/test/test_dump.py index c76f555f..29931759 100644 --- a/test/test_dump.py +++ b/test/test_dump.py @@ -1,5 +1,6 @@ import os from cStringIO import StringIO +from libmproxy.exceptions import ContentViewException from libmproxy.models import HTTPResponse import netlib.tutils @@ -12,17 +13,51 @@ import mock def test_strfuncs(): - t = HTTPResponse.wrap(netlib.tutils.tresp()) - t.is_replay = True - dump.str_response(t) - - f = tutils.tflow() - f.client_conn = None - f.request.stickycookie = True - assert "stickycookie" in dump.str_request(f, False) - assert "stickycookie" in dump.str_request(f, True) - assert "replay" in dump.str_request(f, False) - assert "replay" in dump.str_request(f, True) + o = dump.Options() + m = dump.DumpMaster(None, o) + + m.outfile = StringIO() + m.o.flow_detail = 0 + m.echo_flow(tutils.tflow()) + assert not m.outfile.getvalue() + + m.o.flow_detail = 4 + m.echo_flow(tutils.tflow()) + assert m.outfile.getvalue() + + m.outfile = StringIO() + m.echo_flow(tutils.tflow(resp=True)) + assert "<<" in m.outfile.getvalue() + + m.outfile = StringIO() + m.echo_flow(tutils.tflow(err=True)) + assert "<<" in m.outfile.getvalue() + + flow = tutils.tflow() + flow.request = netlib.tutils.treq() + flow.request.stickycookie = True + flow.client_conn = mock.MagicMock() + flow.client_conn.address.host = "foo" + flow.response = netlib.tutils.tresp(content=CONTENT_MISSING) + flow.response.is_replay = True + flow.response.code = 300 + m.echo_flow(flow) + + + flow = tutils.tflow(resp=netlib.tutils.tresp("{")) + flow.response.headers["content-type"] = "application/json" + flow.response.code = 400 + m.echo_flow(flow) + + +@mock.patch("libmproxy.contentviews.get_content_view") +def test_contentview(get_content_view): + get_content_view.side_effect = ContentViewException(""), ("x", iter([])) + + o = dump.Options(flow_detail=4, verbosity=3) + m = dump.DumpMaster(None, o, StringIO()) + m.echo_flow(tutils.tflow()) + assert "Content viewer failed" in m.outfile.getvalue() class TestDumpMaster: diff --git a/test/test_server.py b/test/test_server.py index 829b5f0a..4a5dd7c2 100644 --- a/test/test_server.py +++ b/test/test_server.py @@ -522,13 +522,13 @@ class TestProxy(tservers.HTTPProxTest): assert f.response.code == 304 def test_response_timestamps(self): - # test that we notice at least 2 sec delay between timestamps + # test that we notice at least 1 sec delay between timestamps # in response object f = self.pathod("304:b@1k:p50,1") assert f.status_code == 304 response = self.master.state.view[0].response - assert 1 <= response.timestamp_end - response.timestamp_start <= 1.2 + assert 0.9 <= response.timestamp_end - response.timestamp_start <= 1.2 def test_request_timestamps(self): # test that we notice a delay between timestamps in request object @@ -547,8 +547,9 @@ class TestProxy(tservers.HTTPProxTest): request, response = self.master.state.view[ 0].request, self.master.state.view[0].response assert response.code == 304 # sanity test for our low level request - # time.sleep might be a little bit shorter than a second - assert 0.95 < (request.timestamp_end - request.timestamp_start) < 1.2 + # time.sleep might be a little bit shorter than a second, + # we observed up to 0.93s on appveyor. + assert 0.8 < (request.timestamp_end - request.timestamp_start) < 1.2 def test_request_timestamps_not_affected_by_client_time(self): # test that don't include user wait time in request's timestamps @@ -711,7 +712,7 @@ class TestStreamRequest(tservers.HTTPProxTest): connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) connection.connect(("127.0.0.1", self.proxy.port)) fconn = connection.makefile() - spec = '200:h"Transfer-Encoding"="chunked":r:b"4\\r\\nthis\\r\\n7\\r\\nisatest\\r\\n0\\r\\n\\r\\n"' + spec = '200:h"Transfer-Encoding"="chunked":r:b"4\\r\\nthis\\r\\n11\\r\\nisatest__reachhex\\r\\n0\\r\\n\\r\\n"' connection.send( "GET %s/p/%s HTTP/1.1\r\n" % (self.server.urlbase, spec)) @@ -726,7 +727,7 @@ class TestStreamRequest(tservers.HTTPProxTest): chunks = list(protocol.read_http_body_chunked( resp.headers, None, "GET", 200, False )) - assert chunks == ["this", "isatest", ""] + assert chunks == ["this", "isatest__reachhex"] connection.close() |