diff options
58 files changed, 1322 insertions, 1078 deletions
diff --git a/.appveyor.yml b/.appveyor.yml index 5421eb5a..5cf194a9 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -27,7 +27,7 @@ test_script: - ps: | $Env:VERSION = $(python mitmproxy/version.py) $Env:SKIP_MITMPROXY = "python -c `"print('skip mitmproxy')`"" - tox -e wheel -- https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl + tox -e wheel tox -e rtool -- bdist deploy_script: diff --git a/docs/install.rst b/docs/install.rst index 5c823ef4..1fe09aca 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -9,25 +9,19 @@ Installation On Ubuntu ---------------------- Ubuntu comes with Python but we need to install pip, python-dev and several libraries. -This was tested on a fully patched installation of Ubuntu 14.04. +This was tested on a fully patched installation of Ubuntu 16.04. .. code:: bash - sudo apt-get install python-pip python-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev libjpeg8-dev zlib1g-dev g++ - sudo pip install mitmproxy # or pip install --user mitmproxy + sudo apt-get install python3-pip python3-dev libffi-dev libssl-dev libtiff5-dev libjpeg8-dev zlib1g-dev libwebp-dev + sudo pip3 install mitmproxy # or pip install --user mitmproxy -Once installation is complete you can run :ref:`mitmproxy` or :ref:`mitmdump` from a terminal. - -On **Ubuntu 12.04** (and other systems with an outdated version of pip), -you may need to update pip using ``pip install -U pip`` before installing mitmproxy. +On older Ubuntu versions, e.g., **12.04** and **14.04**, you may need to install a newer version of Python. +mitmproxy requires Python 3.5 or higher. Please take a look at pyenv_. +Make sure to have an up-to-date version of pip by running ``pip3 install -U pip``. -Installation From Source (Ubuntu) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Once installation is complete you can run :ref:`mitmproxy` or :ref:`mitmdump` from a terminal. -If you would like to install mitmproxy directly from the master branch on GitHub or would like to -get set up to contribute to the project, install the dependencies as you would for a regular -mitmproxy installation (see :ref:`install-ubuntu`). -Then see the Hacking_ section of the README on GitHub. .. _install-fedora: @@ -35,16 +29,19 @@ Installation On Fedora ---------------------- Fedora comes with Python but we need to install pip, python-dev and several libraries. -This was tested on a fully patched installation of Fedora 23. +This was tested on a fully patched installation of Fedora 24. .. code:: bash - sudo dnf install -y python-pip python-devel libffi-devel openssl-devel libxml2-devel libxslt-devel libpng-devel libjpeg-devel - sudo pip install mitmproxy # or pip install --user mitmproxy + sudo dnf install make gcc redhat-rpm-config python3-pip python3-devel libffi-devel openssl-devel libtiff-devel libjpeg-devel zlib-devel libwebp-devel openjpeg2-devel + sudo pip3 install mitmproxy # or pip install --user mitmproxy + +Make sure to have an up-to-date version of pip by running ``pip3 install -U pip``. Once installation is complete you can run :ref:`mitmproxy` or :ref:`mitmdump` from a terminal. + .. _install-arch: Installation On Arch Linux @@ -54,42 +51,26 @@ mitmproxy has been added into the [community] repository. Use pacman to install >>> sudo pacman -S mitmproxy +Once installation is complete you can run :ref:`mitmproxy` or :ref:`mitmdump` from a terminal. -Installation On Mac OS X ------------------------- +.. _install-macos: -The easiest way to get up and running on OSX is to download the pre-built binary packages from -`mitmproxy.org`_. +Installation On macOS +------------------------ -There are a few bits of customization you might want to do to make mitmproxy comfortable to use on -OSX. The default color scheme is optimized for a dark background terminal, but you can select a -palette for a light terminal background with the ``--palette`` option. -You can use the OSX **open** program to create a simple and effective ``~/.mailcap`` file to view -request and response bodies: +You can use Homebrew to install everything: +.. code:: bash -.. code-block:: none + brew install mitmproxy - application/*; /usr/bin/open -Wn %s - audio/*; /usr/bin/open -Wn %s - image/*; /usr/bin/open -Wn %s - video/*; /usr/bin/open -Wn %s +Or you can download the pre-built binary packages from `mitmproxy.org`_. Once installation is complete you can run :ref:`mitmproxy` or :ref:`mitmdump` from a terminal. -Installation From Source (Mac OS X) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you would like to install mitmproxy directly from the master branch on GitHub or would like to -get set up to contribute to the project, there are a few OS X specific things to keep in mind. - -- Make sure that XCode is installed from the App Store, and that the command-line tools have been - downloaded (XCode/Preferences/Downloads). -- If you're running a Python interpreter installed with homebrew (or similar), you may have to - install some dependencies by hand. - -Then see the Hacking_ section of the README on GitHub. +.. _install-windows: Installation On Windows ----------------------- @@ -105,28 +86,39 @@ First, install the latest version of Python 3.5 from the `Python website`_. If you already have an older version of Python 3.5 installed, make sure to install pip_ (pip is included in Python by default). If pip aborts with an error, make sure you are using the current version of pip. ->>> python -m pip install --upgrade pip +.. code:: powershell + + python -m pip install --upgrade pip Next, add Python and the Python Scripts directory to your **PATH** variable. You can do this easily by running the following in powershell: ->>> [Environment]::SetEnvironmentVariable("Path", "$env:Path;C:\Python27;C:\Python27\Scripts", "User") +.. code:: powershell + + [Environment]::SetEnvironmentVariable("Path", "$env:Path;C:\Python27;C:\Python27\Scripts", "User") Now, you can install mitmproxy by running ->>> pip install mitmproxy +.. code:: powershell + + pip install mitmproxy Once the installation is complete, you can run :ref:`mitmdump` from a command prompt. -Installation From Source (Windows) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _install-source: + +Installation From Source +------------------------ If you would like to install mitmproxy directly from the master branch on GitHub or would like to -get set up to contribute to the project, install Python as outlined above, then see the -Hacking_ section of the README on GitHub. +get set up to contribute to the project, install the dependencies as you would for a regular +mitmproxy installation. Then see the Hacking_ section of the README on GitHub. +You can check your system information by running: ``mitmproxy --sysinfo`` .. _Hacking: https://github.com/mitmproxy/mitmproxy/blob/master/README.rst#hacking .. _mitmproxy.org: https://mitmproxy.org/ .. _`Python website`: https://www.python.org/downloads/windows/ .. _pip: https://pip.pypa.io/en/latest/installing.html +.. _pyenv: https://github.com/yyuu/pyenv diff --git a/examples/simple/modify_body_inject_iframe.py b/examples/simple/modify_body_inject_iframe.py index 33d18bbd..e3d5fee9 100644 --- a/examples/simple/modify_body_inject_iframe.py +++ b/examples/simple/modify_body_inject_iframe.py @@ -11,7 +11,7 @@ class Injector: def response(self, flow): if flow.request.host in self.iframe_url: return - html = BeautifulSoup(flow.response.content, "lxml") + html = BeautifulSoup(flow.response.content, "html.parser") if html.body: iframe = html.new_tag( "iframe", diff --git a/mitmproxy/addons/view.py b/mitmproxy/addons/view.py index b8b6093f..dd5b745d 100644 --- a/mitmproxy/addons/view.py +++ b/mitmproxy/addons/view.py @@ -145,9 +145,9 @@ class View(collections.Sequence): def inbounds(self, index: int) -> bool: """ - Is this index >= 0 and < len(self) + Is this 0 <= index < len(self) """ - return index >= 0 and index < len(self) + return 0 <= index < len(self) def _rev(self, idx: int) -> int: """ @@ -359,7 +359,7 @@ class Focus: return self.view.index(self.flow) @index.setter - def index(self, idx) -> typing.Optional[int]: + def index(self, idx): if idx < 0 or idx > len(self.view) - 1: raise ValueError("Index out of view bounds") self.flow = self.view[idx] diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py deleted file mode 100644 index ef0c80e0..00000000 --- a/mitmproxy/contentviews.py +++ /dev/null @@ -1,672 +0,0 @@ -""" -Mitmproxy Content Views -======================= - -mitmproxy includes a set of content views which can be used to -format/decode/highlight data. While they are currently used for HTTP message -bodies only, the may be used in other contexts in the future, e.g. to decode -protobuf messages sent as WebSocket frames. - -Thus, the View API is very minimalistic. The only arguments are `data` and -`**metadata`, where `data` is the actual content (as bytes). The contents on -metadata depend on the protocol in use. For HTTP, the message headers are -passed as the ``headers`` keyword argument. For HTTP requests, the query -parameters are passed as the ``query`` keyword argument. -""" - -import datetime -import io -import json -import logging -import subprocess -import traceback -from typing import Generator -from typing import Mapping -from typing import Tuple -from typing import Union - -import cssutils -import html2text -import jsbeautifier -import lxml.etree -import lxml.html -from PIL import ExifTags -from PIL import Image -from mitmproxy import exceptions -from mitmproxy.contrib.wbxml import ASCommandResponse -from mitmproxy.net import http -from mitmproxy.types import multidict -from mitmproxy.utils import strutils -from mitmproxy.net.http import url - -try: - import pyamf - from pyamf import remoting, flex -except ImportError: # pragma no cover - pyamf = None - -# Default view cutoff *in lines* -VIEW_CUTOFF = 512 - -KEY_MAX = 30 - - -def pretty_json(s: bytes) -> bytes: - try: - p = json.loads(s.decode('utf-8')) - except ValueError: - return None - pretty = json.dumps(p, sort_keys=True, indent=4, ensure_ascii=False) - if isinstance(pretty, str): - # json.dumps _may_ decide to return unicode, if the JSON object is not ascii. - # From limited testing this is always valid utf8 (otherwise json.loads will fail earlier), - # so we can just re-encode it here. - return pretty.encode("utf8", "strict") - return pretty - - -def format_dict( - d: Mapping[Union[str, bytes], Union[str, bytes]] -) -> Generator[Tuple[Union[str, bytes], Union[str, bytes]], None, None]: - """ - Helper function that transforms the given dictionary into a list of - ("key", key ) - ("value", value) - tuples, where key is padded to a uniform width. - """ - max_key_len = max(len(k) for k in d.keys()) - max_key_len = min(max_key_len, KEY_MAX) - for key, value in d.items(): - key += b":" if isinstance(key, bytes) else u":" - key = key.ljust(max_key_len + 2) - yield [ - ("header", key), - ("text", value) - ] - - -def format_text(text): - """ - Helper function that transforms bytes into the view output format. - """ - for line in text.splitlines(): - yield [("text", line)] - - -class View: - name = None - prompt = () - content_types = [] - - def __call__(self, data: bytes, **metadata): - """ - Transform raw data into human-readable output. - - Args: - data: the data to decode/format. - metadata: optional keyword-only arguments for metadata. Implementations must not - rely on a given argument being present. - - Returns: - A (description, content generator) tuple. - - The content generator yields lists of (style, text) tuples, where each list represents - a single line. ``text`` is a unfiltered byte string which may need to be escaped, - depending on the used output. - - Caveats: - The content generator must not yield tuples of tuples, - because urwid cannot process that. You have to yield a *list* of tuples per line. - """ - raise NotImplementedError() - - -class ViewAuto(View): - name = "Auto" - prompt = ("auto", "a") - content_types = [] - - def __call__(self, data, **metadata): - headers = metadata.get("headers", {}) - ctype = headers.get("content-type") - if data and ctype: - ct = http.parse_content_type(ctype) if ctype else None - ct = "%s/%s" % (ct[0], ct[1]) - if ct in content_types_map: - return content_types_map[ct][0](data, **metadata) - elif strutils.is_xml(data): - return get("XML")(data, **metadata) - if metadata.get("query"): - return get("Query")(data, **metadata) - if data and strutils.is_mostly_bin(data): - return get("Hex")(data) - if not data: - return "No content", [] - return get("Raw")(data) - - -class ViewRaw(View): - name = "Raw" - prompt = ("raw", "r") - content_types = [] - - def __call__(self, data, **metadata): - return "Raw", format_text(strutils.bytes_to_escaped_str(data, True)) - - -class ViewHex(View): - name = "Hex" - prompt = ("hex", "e") - content_types = [] - - @staticmethod - def _format(data): - for offset, hexa, s in strutils.hexdump(data): - yield [ - ("offset", offset + " "), - ("text", hexa + " "), - ("text", s) - ] - - def __call__(self, data, **metadata): - return "Hex", self._format(data) - - -class ViewXML(View): - name = "XML" - prompt = ("xml", "x") - content_types = ["text/xml"] - - def __call__(self, data, **metadata): - parser = lxml.etree.XMLParser( - remove_blank_text=True, - resolve_entities=False, - strip_cdata=False, - recover=False - ) - try: - document = lxml.etree.fromstring(data, parser) - except lxml.etree.XMLSyntaxError: - return None - docinfo = document.getroottree().docinfo - - prev = [] - p = document.getroottree().getroot().getprevious() - while p is not None: - prev.insert( - 0, - lxml.etree.tostring(p) - ) - p = p.getprevious() - doctype = docinfo.doctype - if prev: - doctype += "\n".join(p.decode() for p in prev).strip() - doctype = doctype.strip() - - s = lxml.etree.tostring( - document, - pretty_print=True, - xml_declaration=True, - doctype=doctype or None, - encoding=docinfo.encoding - ) - - return "XML-like data", format_text(s) - - -class ViewJSON(View): - name = "JSON" - prompt = ("json", "s") - content_types = [ - "application/json", - "application/vnd.api+json" - ] - - def __call__(self, data, **metadata): - pj = pretty_json(data) - if pj: - return "JSON", format_text(pj) - - -class ViewHTML(View): - name = "HTML" - prompt = ("html", "h") - content_types = ["text/html"] - - def __call__(self, data, **metadata): - if strutils.is_xml(data): - parser = lxml.etree.HTMLParser( - strip_cdata=True, - remove_blank_text=True - ) - d = lxml.html.fromstring(data, parser=parser) - docinfo = d.getroottree().docinfo - s = lxml.etree.tostring( - d, - pretty_print=True, - doctype=docinfo.doctype, - encoding='utf8' - ) - return "HTML", format_text(s) - - -class ViewHTMLOutline(View): - name = "HTML Outline" - prompt = ("html outline", "o") - content_types = ["text/html"] - - def __call__(self, data, **metadata): - data = data.decode("utf-8", "replace") - h = html2text.HTML2Text(baseurl="") - h.ignore_images = True - h.body_width = 0 - outline = h.handle(data) - return "HTML Outline", format_text(outline) - - -class ViewURLEncoded(View): - name = "URL-encoded" - prompt = ("urlencoded", "u") - content_types = ["application/x-www-form-urlencoded"] - - def __call__(self, data, **metadata): - try: - data = data.decode("ascii", "strict") - except ValueError: - return None - d = url.decode(data) - return "URLEncoded form", format_dict(multidict.MultiDict(d)) - - -class ViewMultipart(View): - name = "Multipart Form" - prompt = ("multipart", "m") - content_types = ["multipart/form-data"] - - @staticmethod - def _format(v): - yield [("highlight", "Form data:\n")] - for message in format_dict(multidict.MultiDict(v)): - yield message - - def __call__(self, data, **metadata): - headers = metadata.get("headers", {}) - v = http.multipart.decode(headers, data) - if v: - return "Multipart form", self._format(v) - - -if pyamf: - class DummyObject(dict): - def __init__(self, alias): - dict.__init__(self) - - def __readamf__(self, input): - data = input.readObject() - self["data"] = data - - def pyamf_class_loader(s): - for i in pyamf.CLASS_LOADERS: - if i != pyamf_class_loader: - v = i(s) - if v: - return v - return DummyObject - - pyamf.register_class_loader(pyamf_class_loader) - - class ViewAMF(View): - name = "AMF" - prompt = ("amf", "f") - content_types = ["application/x-amf"] - - def unpack(self, b, seen=None): - if seen is None: - seen = set([]) - - if hasattr(b, "body"): - return self.unpack(b.body, seen) - if isinstance(b, DummyObject): - if id(b) in seen: - return "<recursion>" - else: - seen.add(id(b)) - for k, v in b.items(): - b[k] = self.unpack(v, seen) - return b - elif isinstance(b, dict): - for k, v in b.items(): - b[k] = self.unpack(v, seen) - return b - elif isinstance(b, list): - return [self.unpack(i) for i in b] - elif isinstance(b, datetime.datetime): - return str(b) - elif isinstance(b, flex.ArrayCollection): - return [self.unpack(i, seen) for i in b] - else: - return b - - def _format(self, envelope): - for target, message in iter(envelope): - if isinstance(message, pyamf.remoting.Request): - yield [ - ("header", "Request: "), - ("text", str(target)), - ] - else: - yield [ - ("header", "Response: "), - ("text", "%s, code %s" % (target, message.status)), - ] - - s = json.dumps(self.unpack(message), indent=4) - for msg in format_text(s): - yield msg - - def __call__(self, data, **metadata): - envelope = remoting.decode(data, strict=False) - if envelope: - return "AMF v%s" % envelope.amfVersion, self._format(envelope) - - -class ViewJavaScript(View): - name = "JavaScript" - prompt = ("javascript", "j") - content_types = [ - "application/x-javascript", - "application/javascript", - "text/javascript" - ] - - def __call__(self, data, **metadata): - opts = jsbeautifier.default_options() - opts.indent_size = 2 - data = data.decode("utf-8", "replace") - res = jsbeautifier.beautify(data, opts) - return "JavaScript", format_text(res) - - -class ViewCSS(View): - name = "CSS" - prompt = ("css", "c") - content_types = [ - "text/css" - ] - - def __call__(self, data, **metadata): - cssutils.log.setLevel(logging.CRITICAL) - cssutils.ser.prefs.keepComments = True - cssutils.ser.prefs.omitLastSemicolon = False - cssutils.ser.prefs.indentClosingBrace = False - cssutils.ser.prefs.validOnly = False - - sheet = cssutils.parseString(data) - beautified = sheet.cssText - - return "CSS", format_text(beautified) - - -class ViewImage(View): - name = "Image" - prompt = ("image", "i") - content_types = [ - "image/png", - "image/jpeg", - "image/gif", - "image/vnd.microsoft.icon", - "image/x-icon", - ] - - def __call__(self, data, **metadata): - try: - img = Image.open(io.BytesIO(data)) - except IOError: - return None - parts = [ - ("Format", str(img.format_description)), - ("Size", "%s x %s px" % img.size), - ("Mode", str(img.mode)), - ] - for i in sorted(img.info.keys()): - if i != "exif": - parts.append( - (str(i), str(img.info[i])) - ) - if hasattr(img, "_getexif"): - ex = img._getexif() - if ex: - for i in sorted(ex.keys()): - tag = ExifTags.TAGS.get(i, i) - parts.append( - (str(tag), str(ex[i])) - ) - fmt = format_dict(multidict.MultiDict(parts)) - return "%s image" % img.format, fmt - - -class ViewProtobuf(View): - """Human friendly view of protocol buffers - The view uses the protoc compiler to decode the binary - """ - - name = "Protocol Buffer" - prompt = ("protobuf", "p") - content_types = [ - "application/x-protobuf", - "application/x-protobuffer", - ] - - @staticmethod - def is_available(): - try: - p = subprocess.Popen( - ["protoc", "--version"], - stdout=subprocess.PIPE - ) - out, _ = p.communicate() - return out.startswith("libprotoc") - except: - return False - - def decode_protobuf(self, content): - # if Popen raises OSError, it will be caught in - # get_content_view and fall back to Raw - p = subprocess.Popen(['protoc', '--decode_raw'], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = p.communicate(input=content) - if out: - return out - else: - return err - - def __call__(self, data, **metadata): - decoded = self.decode_protobuf(data) - return "Protobuf", format_text(decoded) - - -class ViewQuery(View): - name = "Query" - prompt = ("query", "q") - content_types = [] - - def __call__(self, data, **metadata): - query = metadata.get("query") - if query: - return "Query", format_dict(query) - else: - return "Query", format_text("") - - -class ViewWBXML(View): - name = "WBXML" - prompt = ("wbxml", "w") - content_types = [ - "application/vnd.wap.wbxml", - "application/vnd.ms-sync.wbxml" - ] - - def __call__(self, data, **metadata): - try: - parser = ASCommandResponse.ASCommandResponse(data) - parsedContent = parser.xmlString - if parsedContent: - return "WBXML", format_text(parsedContent) - except: - return None - - -views = [] -content_types_map = {} -view_prompts = [] - - -def get(name): - for i in views: - if i.name.lower() == name.lower(): - return i - - -def get_by_shortcut(c): - for i in views: - if i.prompt[1] == c: - return i - - -def add(view): - # TODO: auto-select a different name (append an integer?) - for i in views: - if i.name == view.name: - raise exceptions.ContentViewException("Duplicate view: " + view.name) - - # TODO: the UI should auto-prompt for a replacement shortcut - for prompt in view_prompts: - if prompt[1] == view.prompt[1]: - raise exceptions.ContentViewException("Duplicate view shortcut: " + view.prompt[1]) - - views.append(view) - - for ct in view.content_types: - l = content_types_map.setdefault(ct, []) - l.append(view) - - view_prompts.append(view.prompt) - - -def remove(view): - for ct in view.content_types: - l = content_types_map.setdefault(ct, []) - l.remove(view) - - if not len(l): - del content_types_map[ct] - - view_prompts.remove(view.prompt) - views.remove(view) - - -add(ViewAuto()) -add(ViewRaw()) -add(ViewHex()) -add(ViewJSON()) -add(ViewXML()) -add(ViewWBXML()) -add(ViewHTML()) -add(ViewHTMLOutline()) -add(ViewJavaScript()) -add(ViewCSS()) -add(ViewURLEncoded()) -add(ViewMultipart()) -add(ViewImage()) -add(ViewQuery()) - -if pyamf: - add(ViewAMF()) - -if ViewProtobuf.is_available(): - add(ViewProtobuf()) - - -def safe_to_print(lines, encoding="utf8"): - """ - Wraps a content generator so that each text portion is a *safe to print* unicode string. - """ - for line in lines: - clean_line = [] - for (style, text) in line: - if isinstance(text, bytes): - text = text.decode(encoding, "replace") - text = strutils.escape_control_characters(text) - clean_line.append((style, text)) - yield clean_line - - -def get_message_content_view(viewname, message): - """ - Like get_content_view, but also handles message encoding. - """ - viewmode = get(viewname) - if not viewmode: - viewmode = get("auto") - try: - content = message.content - except ValueError: - content = message.raw_content - enc = "[cannot decode]" - else: - if isinstance(message, http.Message) and content != message.raw_content: - enc = "[decoded {}]".format( - message.headers.get("content-encoding") - ) - else: - enc = None - - if content is None: - return "", iter([[("error", "content missing")]]), None - - metadata = {} - if isinstance(message, http.Request): - metadata["query"] = message.query - if isinstance(message, http.Message): - metadata["headers"] = message.headers - - description, lines, error = get_content_view( - viewmode, content, **metadata - ) - - if enc: - description = "{} {}".format(enc, description) - - return description, lines, error - - -def get_content_view(viewmode, data, **metadata): - """ - Args: - viewmode: the view to use. - data, **metadata: arguments passed to View instance. - - Returns: - A (description, content generator, error) tuple. - If the content view raised an exception generating the view, - the exception is returned in error and the flow is formatted in raw mode. - In contrast to calling the views directly, text is always safe-to-print unicode. - """ - try: - ret = viewmode(data, **metadata) - if ret is None: - ret = "Couldn't parse: falling back to Raw", get("Raw")(data, **metadata)[1] - desc, content = ret - error = None - # Third-party viewers can fail in unexpected ways... - except Exception: - desc = "Couldn't parse: falling back to Raw" - _, content = get("Raw")(data, **metadata) - error = "{} Content viewer failed: \n{}".format( - getattr(viewmode, "name"), - traceback.format_exc() - ) - - return desc, safe_to_print(content), error diff --git a/mitmproxy/contentviews/__init__.py b/mitmproxy/contentviews/__init__.py new file mode 100644 index 00000000..357172e3 --- /dev/null +++ b/mitmproxy/contentviews/__init__.py @@ -0,0 +1,183 @@ +""" +Mitmproxy Content Views +======================= + +mitmproxy includes a set of content views which can be used to +format/decode/highlight data. While they are currently used for HTTP message +bodies only, the may be used in other contexts in the future, e.g. to decode +protobuf messages sent as WebSocket frames. + +Thus, the View API is very minimalistic. The only arguments are `data` and +`**metadata`, where `data` is the actual content (as bytes). The contents on +metadata depend on the protocol in use. For HTTP, the message headers are +passed as the ``headers`` keyword argument. For HTTP requests, the query +parameters are passed as the ``query`` keyword argument. +""" +import traceback +from typing import Dict, Optional # noqa +from typing import List # noqa +from typing import Tuple # noqa + +from mitmproxy import exceptions +from mitmproxy.net import http +from mitmproxy.utils import strutils +from . import ( + auto, raw, hex, json, xml_html, html_outline, wbxml, javascript, css, + urlencoded, multipart, image, query, protobuf +) +from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict + +views = [] # type: List[View] +content_types_map = {} # type: Dict[str, List[View]] +view_prompts = [] # type: List[Tuple[str, str]] + + +def get(name: str) -> Optional[View]: + for i in views: + if i.name.lower() == name.lower(): + return i + + +def get_by_shortcut(c: str) -> Optional[View]: + for i in views: + if i.prompt[1] == c: + return i + + +def add(view: View) -> None: + # TODO: auto-select a different name (append an integer?) + for i in views: + if i.name == view.name: + raise exceptions.ContentViewException("Duplicate view: " + view.name) + + # TODO: the UI should auto-prompt for a replacement shortcut + for prompt in view_prompts: + if prompt[1] == view.prompt[1]: + raise exceptions.ContentViewException("Duplicate view shortcut: " + view.prompt[1]) + + views.append(view) + + for ct in view.content_types: + l = content_types_map.setdefault(ct, []) + l.append(view) + + view_prompts.append(view.prompt) + + +def remove(view: View) -> None: + for ct in view.content_types: + l = content_types_map.setdefault(ct, []) + l.remove(view) + + if not len(l): + del content_types_map[ct] + + view_prompts.remove(view.prompt) + views.remove(view) + + +def safe_to_print(lines, encoding="utf8"): + """ + Wraps a content generator so that each text portion is a *safe to print* unicode string. + """ + for line in lines: + clean_line = [] + for (style, text) in line: + if isinstance(text, bytes): + text = text.decode(encoding, "replace") + text = strutils.escape_control_characters(text) + clean_line.append((style, text)) + yield clean_line + + +def get_message_content_view(viewname, message): + """ + Like get_content_view, but also handles message encoding. + """ + viewmode = get(viewname) + if not viewmode: + viewmode = get("auto") + try: + content = message.content + except ValueError: + content = message.raw_content + enc = "[cannot decode]" + else: + if isinstance(message, http.Message) and content != message.raw_content: + enc = "[decoded {}]".format( + message.headers.get("content-encoding") + ) + else: + enc = None + + if content is None: + return "", iter([[("error", "content missing")]]), None + + metadata = {} + if isinstance(message, http.Request): + metadata["query"] = message.query + if isinstance(message, http.Message): + metadata["headers"] = message.headers + + description, lines, error = get_content_view( + viewmode, content, **metadata + ) + + if enc: + description = "{} {}".format(enc, description) + + return description, lines, error + + +def get_content_view(viewmode: View, data: bytes, **metadata): + """ + Args: + viewmode: the view to use. + data, **metadata: arguments passed to View instance. + + Returns: + A (description, content generator, error) tuple. + If the content view raised an exception generating the view, + the exception is returned in error and the flow is formatted in raw mode. + In contrast to calling the views directly, text is always safe-to-print unicode. + """ + try: + ret = viewmode(data, **metadata) + if ret is None: + ret = "Couldn't parse: falling back to Raw", get("Raw")(data, **metadata)[1] + desc, content = ret + error = None + # Third-party viewers can fail in unexpected ways... + except Exception: + desc = "Couldn't parse: falling back to Raw" + _, content = get("Raw")(data, **metadata) + error = "{} Content viewer failed: \n{}".format( + getattr(viewmode, "name"), + traceback.format_exc() + ) + + return desc, safe_to_print(content), error + + +add(auto.ViewAuto()) +add(raw.ViewRaw()) +add(hex.ViewHex()) +add(json.ViewJSON()) +add(xml_html.ViewXmlHtml()) +add(wbxml.ViewWBXML()) +add(html_outline.ViewHTMLOutline()) +add(javascript.ViewJavaScript()) +add(css.ViewCSS()) +add(urlencoded.ViewURLEncoded()) +add(multipart.ViewMultipart()) +add(image.ViewImage()) +add(query.ViewQuery()) + +if protobuf.ViewProtobuf.is_available(): + add(protobuf.ViewProtobuf()) + +__all__ = [ + "View", "VIEW_CUTOFF", "KEY_MAX", "format_text", "format_dict", + "get", "get_by_shortcut", "add", "remove", + "get_content_view", "get_message_content_view", +] diff --git a/mitmproxy/contentviews/auto.py b/mitmproxy/contentviews/auto.py new file mode 100644 index 00000000..7b3cbd78 --- /dev/null +++ b/mitmproxy/contentviews/auto.py @@ -0,0 +1,27 @@ +from mitmproxy import contentviews +from mitmproxy.net import http +from mitmproxy.utils import strutils +from . import base + + +class ViewAuto(base.View): + name = "Auto" + prompt = ("auto", "a") + + def __call__(self, data, **metadata): + headers = metadata.get("headers", {}) + ctype = headers.get("content-type") + if data and ctype: + ct = http.parse_content_type(ctype) if ctype else None + ct = "%s/%s" % (ct[0], ct[1]) + if ct in contentviews.content_types_map: + return contentviews.content_types_map[ct][0](data, **metadata) + elif strutils.is_xml(data): + return contentviews.get("XML/HTML")(data, **metadata) + if metadata.get("query"): + return contentviews.get("Query")(data, **metadata) + if data and strutils.is_mostly_bin(data): + return contentviews.get("Hex")(data) + if not data: + return "No content", [] + return contentviews.get("Raw")(data) diff --git a/mitmproxy/contentviews/base.py b/mitmproxy/contentviews/base.py new file mode 100644 index 00000000..0de4f786 --- /dev/null +++ b/mitmproxy/contentviews/base.py @@ -0,0 +1,65 @@ +# Default view cutoff *in lines* + +from typing import Iterable, AnyStr, List +from typing import Mapping +from typing import Tuple + +VIEW_CUTOFF = 512 + +KEY_MAX = 30 + + +class View: + name = None # type: str + prompt = None # type: Tuple[str,str] + content_types = [] # type: List[str] + + def __call__(self, data: bytes, **metadata): + """ + Transform raw data into human-readable output. + + Args: + data: the data to decode/format. + metadata: optional keyword-only arguments for metadata. Implementations must not + rely on a given argument being present. + + Returns: + A (description, content generator) tuple. + + The content generator yields lists of (style, text) tuples, where each list represents + a single line. ``text`` is a unfiltered byte string which may need to be escaped, + depending on the used output. + + Caveats: + The content generator must not yield tuples of tuples, + because urwid cannot process that. You have to yield a *list* of tuples per line. + """ + raise NotImplementedError() # pragma: no cover + + +def format_dict( + d: Mapping[AnyStr, AnyStr] +) -> Iterable[List[Tuple[str, AnyStr]]]: + """ + Helper function that transforms the given dictionary into a list of + ("key", key ) + ("value", value) + tuples, where key is padded to a uniform width. + """ + max_key_len = max(len(k) for k in d.keys()) + max_key_len = min(max_key_len, KEY_MAX) + for key, value in d.items(): + key += b":" if isinstance(key, bytes) else u":" + key = key.ljust(max_key_len + 2) + yield [ + ("header", key), + ("text", value) + ] + + +def format_text(text: AnyStr) -> Iterable[List[Tuple[str, AnyStr]]]: + """ + Helper function that transforms bytes into the view output format. + """ + for line in text.splitlines(): + yield [("text", line)] diff --git a/mitmproxy/contentviews/css.py b/mitmproxy/contentviews/css.py new file mode 100644 index 00000000..353a3257 --- /dev/null +++ b/mitmproxy/contentviews/css.py @@ -0,0 +1,25 @@ +import logging + +import cssutils + +from . import base + + +class ViewCSS(base.View): + name = "CSS" + prompt = ("css", "c") + content_types = [ + "text/css" + ] + + def __call__(self, data, **metadata): + cssutils.log.setLevel(logging.CRITICAL) + cssutils.ser.prefs.keepComments = True + cssutils.ser.prefs.omitLastSemicolon = False + cssutils.ser.prefs.indentClosingBrace = False + cssutils.ser.prefs.validOnly = False + + sheet = cssutils.parseString(data) + beautified = sheet.cssText + + return "CSS", base.format_text(beautified) diff --git a/mitmproxy/contentviews/hex.py b/mitmproxy/contentviews/hex.py new file mode 100644 index 00000000..6251a8f3 --- /dev/null +++ b/mitmproxy/contentviews/hex.py @@ -0,0 +1,19 @@ +from mitmproxy.utils import strutils +from . import base + + +class ViewHex(base.View): + name = "Hex" + prompt = ("hex", "e") + + @staticmethod + def _format(data): + for offset, hexa, s in strutils.hexdump(data): + yield [ + ("offset", offset + " "), + ("text", hexa + " "), + ("text", s) + ] + + def __call__(self, data, **metadata): + return "Hex", self._format(data) diff --git a/mitmproxy/contentviews/html_outline.py b/mitmproxy/contentviews/html_outline.py new file mode 100644 index 00000000..d6c51b29 --- /dev/null +++ b/mitmproxy/contentviews/html_outline.py @@ -0,0 +1,17 @@ +import html2text + +from mitmproxy.contentviews import base + + +class ViewHTMLOutline(base.View): + name = "HTML Outline" + prompt = ("html outline", "o") + content_types = ["text/html"] + + def __call__(self, data, **metadata): + data = data.decode("utf-8", "replace") + h = html2text.HTML2Text(baseurl="") + h.ignore_images = True + h.body_width = 0 + outline = h.handle(data) + return "HTML Outline", base.format_text(outline) diff --git a/mitmproxy/contentviews/image.py b/mitmproxy/contentviews/image.py new file mode 100644 index 00000000..57b1fffb --- /dev/null +++ b/mitmproxy/contentviews/image.py @@ -0,0 +1,45 @@ +import io + +from PIL import ExifTags +from PIL import Image + +from mitmproxy.types import multidict +from . import base + + +class ViewImage(base.View): + name = "Image" + prompt = ("image", "i") + content_types = [ + "image/png", + "image/jpeg", + "image/gif", + "image/vnd.microsoft.icon", + "image/x-icon", + ] + + def __call__(self, data, **metadata): + try: + img = Image.open(io.BytesIO(data)) + except IOError: + return None + parts = [ + ("Format", str(img.format_description)), + ("Size", "%s x %s px" % img.size), + ("Mode", str(img.mode)), + ] + for i in sorted(img.info.keys()): + if i != "exif": + parts.append( + (str(i), str(img.info[i])) + ) + if hasattr(img, "_getexif"): + ex = img._getexif() + if ex: + for i in sorted(ex.keys()): + tag = ExifTags.TAGS.get(i, i) + parts.append( + (str(tag), str(ex[i])) + ) + fmt = base.format_dict(multidict.MultiDict(parts)) + return "%s image" % img.format, fmt diff --git a/mitmproxy/contentviews/javascript.py b/mitmproxy/contentviews/javascript.py new file mode 100644 index 00000000..c2fab875 --- /dev/null +++ b/mitmproxy/contentviews/javascript.py @@ -0,0 +1,20 @@ +import jsbeautifier + +from . import base + + +class ViewJavaScript(base.View): + name = "JavaScript" + prompt = ("javascript", "j") + content_types = [ + "application/x-javascript", + "application/javascript", + "text/javascript" + ] + + def __call__(self, data, **metadata): + opts = jsbeautifier.default_options() + opts.indent_size = 2 + data = data.decode("utf-8", "replace") + res = jsbeautifier.beautify(data, opts) + return "JavaScript", base.format_text(res) diff --git a/mitmproxy/contentviews/json.py b/mitmproxy/contentviews/json.py new file mode 100644 index 00000000..de7f1093 --- /dev/null +++ b/mitmproxy/contentviews/json.py @@ -0,0 +1,27 @@ +import json +from typing import Optional + +from mitmproxy.contentviews import base + + +def pretty_json(s: bytes) -> Optional[bytes]: + try: + p = json.loads(s.decode('utf-8')) + except ValueError: + return None + pretty = json.dumps(p, sort_keys=True, indent=4, ensure_ascii=False) + return pretty.encode("utf8", "strict") + + +class ViewJSON(base.View): + name = "JSON" + prompt = ("json", "s") + content_types = [ + "application/json", + "application/vnd.api+json" + ] + + def __call__(self, data, **metadata): + pj = pretty_json(data) + if pj: + return "JSON", base.format_text(pj) diff --git a/mitmproxy/contentviews/multipart.py b/mitmproxy/contentviews/multipart.py new file mode 100644 index 00000000..0b0e51e2 --- /dev/null +++ b/mitmproxy/contentviews/multipart.py @@ -0,0 +1,21 @@ +from mitmproxy.net import http +from mitmproxy.types import multidict +from . import base + + +class ViewMultipart(base.View): + name = "Multipart Form" + prompt = ("multipart", "m") + content_types = ["multipart/form-data"] + + @staticmethod + def _format(v): + yield [("highlight", "Form data:\n")] + for message in base.format_dict(multidict.MultiDict(v)): + yield message + + def __call__(self, data, **metadata): + headers = metadata.get("headers", {}) + v = http.multipart.decode(headers, data) + if v: + return "Multipart form", self._format(v) diff --git a/mitmproxy/contentviews/protobuf.py b/mitmproxy/contentviews/protobuf.py new file mode 100644 index 00000000..620d9444 --- /dev/null +++ b/mitmproxy/contentviews/protobuf.py @@ -0,0 +1,45 @@ +import subprocess + +from . import base + + +class ViewProtobuf(base.View): + """Human friendly view of protocol buffers + The view uses the protoc compiler to decode the binary + """ + + name = "Protocol Buffer" + prompt = ("protobuf", "p") + content_types = [ + "application/x-protobuf", + "application/x-protobuffer", + ] + + @staticmethod + def is_available(): + try: + p = subprocess.Popen( + ["protoc", "--version"], + stdout=subprocess.PIPE + ) + out, _ = p.communicate() + return out.startswith("libprotoc") + except: + return False + + def decode_protobuf(self, content): + # if Popen raises OSError, it will be caught in + # get_content_view and fall back to Raw + p = subprocess.Popen(['protoc', '--decode_raw'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = p.communicate(input=content) + if out: + return out + else: + return err + + def __call__(self, data, **metadata): + decoded = self.decode_protobuf(data) + return "Protobuf", base.format_text(decoded) diff --git a/mitmproxy/contentviews/query.py b/mitmproxy/contentviews/query.py new file mode 100644 index 00000000..93f47829 --- /dev/null +++ b/mitmproxy/contentviews/query.py @@ -0,0 +1,15 @@ +from typing import List # noqa + +from . import base + + +class ViewQuery(base.View): + name = "Query" + prompt = ("query", "q") + + def __call__(self, data, **metadata): + query = metadata.get("query") + if query: + return "Query", base.format_dict(query) + else: + return "Query", base.format_text("") diff --git a/mitmproxy/contentviews/raw.py b/mitmproxy/contentviews/raw.py new file mode 100644 index 00000000..dcc53aa7 --- /dev/null +++ b/mitmproxy/contentviews/raw.py @@ -0,0 +1,12 @@ +from typing import List # noqa + +from mitmproxy.utils import strutils +from . import base + + +class ViewRaw(base.View): + name = "Raw" + prompt = ("raw", "r") + + def __call__(self, data, **metadata): + return "Raw", base.format_text(strutils.bytes_to_escaped_str(data, True)) diff --git a/mitmproxy/contentviews/urlencoded.py b/mitmproxy/contentviews/urlencoded.py new file mode 100644 index 00000000..79fe9c1c --- /dev/null +++ b/mitmproxy/contentviews/urlencoded.py @@ -0,0 +1,17 @@ +from mitmproxy.net.http import url +from mitmproxy.types import multidict +from . import base + + +class ViewURLEncoded(base.View): + name = "URL-encoded" + prompt = ("urlencoded", "u") + content_types = ["application/x-www-form-urlencoded"] + + def __call__(self, data, **metadata): + try: + data = data.decode("ascii", "strict") + except ValueError: + return None + d = url.decode(data) + return "URLEncoded form", base.format_dict(multidict.MultiDict(d)) diff --git a/mitmproxy/contentviews/wbxml.py b/mitmproxy/contentviews/wbxml.py new file mode 100644 index 00000000..d626e188 --- /dev/null +++ b/mitmproxy/contentviews/wbxml.py @@ -0,0 +1,20 @@ +from mitmproxy.contrib.wbxml import ASCommandResponse +from . import base + + +class ViewWBXML(base.View): + name = "WBXML" + prompt = ("wbxml", "w") + content_types = [ + "application/vnd.wap.wbxml", + "application/vnd.ms-sync.wbxml" + ] + + def __call__(self, data, **metadata): + try: + parser = ASCommandResponse.ASCommandResponse(data) + parsedContent = parser.xmlString + if parsedContent: + return "WBXML", base.format_text(parsedContent) + except: + return None diff --git a/mitmproxy/contentviews/xml_html.py b/mitmproxy/contentviews/xml_html.py new file mode 100644 index 00000000..0f2ce57d --- /dev/null +++ b/mitmproxy/contentviews/xml_html.py @@ -0,0 +1,234 @@ +import io +import re +import textwrap +from typing import Iterable + +from mitmproxy.contentviews import base +from mitmproxy.utils import sliding_window + +""" +A custom XML/HTML prettifier. Compared to other prettifiers, its main features are: + +- Implemented in pure Python. +- Modifies whitespace only. +- Works with any input. +- Lazy evaluation. + +The implementation is split into two main parts: tokenization and formatting of tokens. +""" + +# http://www.xml.com/pub/a/2001/07/25/namingparts.html - this is close enough for what we do. +REGEX_TAG = re.compile("[a-zA-Z0-9._:\-]+(?!=)") +# https://www.w3.org/TR/html5/syntax.html#void-elements +HTML_VOID_ELEMENTS = { + "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", + "source", "track", "wbr" +} +NO_INDENT_TAGS = {"xml", "doctype", "html"} +INDENT = 2 + + +class Token: + def __init__(self, data): + self.data = data + + def __repr__(self): + return "{}({})".format( + type(self).__name__, + self.data + ) + + +class Text(Token): + @property + def text(self): + return self.data.strip() + + +class Tag(Token): + @property + def tag(self): + t = REGEX_TAG.search(self.data) + if t is not None: + return t.group(0).lower() + return "<empty>" + + @property + def is_comment(self) -> bool: + return self.data.startswith("<!--") + + @property + def is_cdata(self) -> bool: + return self.data.startswith("<![CDATA[") + + @property + def is_closing(self): + return self.data.startswith("</") + + @property + def is_self_closing(self): + return self.is_comment or self.is_cdata or self.data.endswith( + "/>") or self.tag in HTML_VOID_ELEMENTS + + @property + def is_opening(self): + return not self.is_closing and not self.is_self_closing + + @property + def done(self): + if self.is_comment: + return self.data.endswith("-->") + elif self.is_cdata: + return self.data.endswith("]]>") + else: + # This fails for attributes that contain an unescaped ">" + return self.data.endswith(">") + + +def tokenize(data: str) -> Iterable[Token]: + token = Text("") # type: Token + + i = 0 + + def readuntil(char, start, include=1): + nonlocal i + end = data.find(char, start) + if end == -1: + end = len(data) + ret = data[i:end + include] + i = end + include + return ret + + while i < len(data): + if isinstance(token, Text): + token.data = readuntil("<", i, 0) + if token.text: + yield token + token = Tag("") + elif isinstance(token, Tag): + token.data += readuntil(">", i, 1) + if token.done: + yield token + token = Text("") + if token.data.strip(): + yield token + + +def indent_text(data: str, prefix: str) -> str: + # Add spacing to first line so that we dedent in cases like this: + # <li>This is + # example text + # over multiple lines + # </li> + dedented = textwrap.dedent(" " * 32 + data).strip() + return textwrap.indent(dedented, prefix[:32]) + + +def is_inline_text(a: Token, b: Token, c: Token) -> bool: + if isinstance(a, Tag) and isinstance(b, Text) and isinstance(c, Tag): + if a.is_opening and "\n" not in b.data and c.is_closing and a.tag == c.tag: + return True + + +def is_inline(prev2: Token, prev1: Token, t: Token, next1: Token, next2: Token) -> bool: + if isinstance(t, Text): + return is_inline_text(prev1, t, next1) + elif isinstance(t, Tag): + if is_inline_text(prev2, prev1, t) or is_inline_text(t, next1, next2): + return True + if isinstance(next1, Tag) and t.is_opening and next1.is_closing and t.tag == next1.tag: + return True # <div></div> (start tag) + if isinstance(prev1, Tag) and prev1.is_opening and t.is_closing and prev1.tag == t.tag: + return True # <div></div> (end tag) + + +class ElementStack: + """ + Keep track of how deeply nested our document is. + """ + + def __init__(self): + self.open_tags = [] + self.indent = "" + + def push_tag(self, tag: str): + if len(self.open_tags) > 16: + return + self.open_tags.append(tag) + if tag not in NO_INDENT_TAGS: + self.indent += " " * INDENT + + def pop_tag(self, tag: str): + if tag in self.open_tags: + remove_indent = 0 + while True: + t = self.open_tags.pop() + if t not in NO_INDENT_TAGS: + remove_indent += INDENT + if t == tag: + break + self.indent = self.indent[:-remove_indent] + else: + pass # this closing tag has no start tag. let's keep indentation as-is. + + +def format_xml(tokens: Iterable[Token]) -> str: + out = io.StringIO() + + context = ElementStack() + + for prev2, prev1, token, next1, next2 in sliding_window.window(tokens, 2, 2): + if isinstance(token, Tag): + if token.is_opening: + out.write(indent_text(token.data, context.indent)) + + if not is_inline(prev2, prev1, token, next1, next2): + out.write("\n") + + context.push_tag(token.tag) + elif token.is_closing: + context.pop_tag(token.tag) + + if is_inline(prev2, prev1, token, next1, next2): + out.write(token.data) + else: + out.write(indent_text(token.data, context.indent)) + out.write("\n") + + else: # self-closing + out.write(indent_text(token.data, context.indent)) + out.write("\n") + elif isinstance(token, Text): + if is_inline(prev2, prev1, token, next1, next2): + out.write(token.text) + else: + out.write(indent_text(token.data, context.indent)) + out.write("\n") + else: # pragma: no cover + raise RuntimeError() + + return out.getvalue() + + +class ViewXmlHtml(base.View): + name = "XML/HTML" + prompt = ("xml/html", "x") + content_types = ["text/xml", "text/html"] + + def __call__(self, data, **metadata): + # TODO: + # We should really have the message text as str here, + # not the message content as bytes. + # https://github.com/mitmproxy/mitmproxy/issues/1662#issuecomment-266192578 + data = data.decode("utf8", "xmlcharrefreplace") + tokens = tokenize(data) + # TODO: + # Performance: Don't render the whole document right away. + # Let's wait with this until we have a sequence-like interface, + # this thing is reasonably fast right now anyway. + pretty = base.format_text(format_xml(tokens)) + if "html" in data.lower(): + t = "HTML" + else: + t = "XML" + return t, pretty diff --git a/mitmproxy/tools/console/flowlist.py b/mitmproxy/tools/console/flowlist.py index d7c312e5..fee215c6 100644 --- a/mitmproxy/tools/console/flowlist.py +++ b/mitmproxy/tools/console/flowlist.py @@ -355,9 +355,11 @@ class FlowListBox(urwid.ListBox): elif key == "e": self.master.toggle_eventlog() elif key == "g": - self.master.view.focus.index = 0 + if len(self.master.view): + self.master.view.focus.index = 0 elif key == "G": - self.master.view.focus.index = len(self.master.view) - 1 + if len(self.master.view): + self.master.view.focus.index = len(self.master.view) - 1 elif key == "f": signals.status_prompt.send( prompt = "Filter View", diff --git a/mitmproxy/tools/console/flowview.py b/mitmproxy/tools/console/flowview.py index ecb070d8..efeab647 100644 --- a/mitmproxy/tools/console/flowview.py +++ b/mitmproxy/tools/console/flowview.py @@ -1,23 +1,23 @@ import math import os import sys +from functools import lru_cache +from typing import Optional, Union # noqa import urwid -from mitmproxy import exceptions -from typing import Optional, Union # noqa from mitmproxy import contentviews +from mitmproxy import exceptions +from mitmproxy import export from mitmproxy import http +from mitmproxy.net.http import Headers +from mitmproxy.net.http import status_codes from mitmproxy.tools.console import common from mitmproxy.tools.console import flowdetailview from mitmproxy.tools.console import grideditor from mitmproxy.tools.console import searchable from mitmproxy.tools.console import signals from mitmproxy.tools.console import tabs -from mitmproxy import export -from mitmproxy.net.http import Headers -from mitmproxy.net.http import status_codes -from functools import lru_cache class SearchError(Exception): @@ -483,9 +483,12 @@ class FlowView(tabs.Tabs): return self._view_nextprev_flow(self.view.index(flow) - 1, flow) def change_this_display_mode(self, t): - name = contentviews.get_by_shortcut(t).name - self.view.settings[self.flow][(self.tab_offset, "prettyview")] = name - signals.flow_change.send(self, flow = self.flow) + view = contentviews.get_by_shortcut(t) + if view: + self.view.settings[self.flow][(self.tab_offset, "prettyview")] = view.name + else: + self.view.settings[self.flow][(self.tab_offset, "prettyview")] = None + signals.flow_change.send(self, flow=self.flow) def keypress(self, size, key): conn = None # type: Optional[Union[http.HTTPRequest, http.HTTPResponse]] diff --git a/mitmproxy/utils/sliding_window.py b/mitmproxy/utils/sliding_window.py new file mode 100644 index 00000000..4714b8e3 --- /dev/null +++ b/mitmproxy/utils/sliding_window.py @@ -0,0 +1,30 @@ +import itertools +from typing import TypeVar, Iterator, Tuple, Optional + +T = TypeVar('T') + + +def window(iterator: Iterator[T], behind: int = 0, ahead: int = 0) -> Iterator[Tuple[Optional[T]]]: + """ + Sliding window for an iterator. + + Example: + >>> for prev, i, nxt in window(range(10), 1, 1): + >>> print(prev, i, nxt) + + None 0 1 + 0 1 2 + 1 2 3 + 2 3 None + """ + # TODO: move into utils + iters = list(itertools.tee(iterator, behind + 1 + ahead)) + for i in range(behind): + iters[i] = itertools.chain((behind - i) * [None], iters[i]) + for i in range(ahead): + iters[-1 - i] = itertools.islice( + itertools.chain(iters[-1 - i], (ahead - i) * [None]), + (ahead - i), + None + ) + return zip(*iters) diff --git a/requirements.txt b/requirements.txt index 67a02a97..ab8e8a0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl; sys_platform == 'win32' and python_version == '3.5' -e .[dev,examples,contentviews] @@ -61,7 +61,7 @@ setup( # It is not considered best practice to use install_requires to pin dependencies to specific versions. install_requires=[ "blinker>=1.4, <1.5", - "click>=6.2, <7.0", + "click>=6.2, <7", "certifi>=2015.11.20.1", # no semver here - this should always be on the last release! "construct>=2.8, <2.9", "cryptography>=1.3, <1.7", @@ -71,7 +71,6 @@ setup( "html2text>=2016.1.8, <=2016.9.19", "hyperframe>=4.0.1, <5", "jsbeautifier>=1.6.3, <1.7", - "lxml>=3.5.0, <=3.6.0", # no wheels for 3.6.1 yet. "Pillow>=3.2, <3.5", "passlib>=1.6.5, <1.8", "pyasn1>=0.1.9, <0.2", @@ -96,8 +95,8 @@ setup( ':sys_platform != "win32"': [ ], 'dev': [ - "flake8>=2.6.2, <3.3", - "mypy-lang>=0.4.5, <0.5", + "flake8>=3.2.1, <3.3", + "mypy-lang>=0.4.6, <0.5", "rstcheck>=2.2, <3.0", "tox>=2.3, <3", "mock>=2.0, <2.1", @@ -106,9 +105,10 @@ setup( "pytest-timeout>=1.0.0, <2", "pytest-xdist>=1.14, <2", "pytest-faulthandler>=1.3.0, <2", - "sphinx>=1.3.5, <1.5", + "sphinx>=1.3.5, <1.6", "sphinx-autobuild>=0.5.2, <0.7", "sphinxcontrib-documentedlist>=0.4.0, <0.5", + "docutils==0.12", # temporary pin, https://github.com/chintal/sphinxcontrib-documentedlist/pull/3 "sphinx_rtd_theme>=0.1.9, <0.2", ], 'contentviews': [ @@ -118,7 +118,7 @@ setup( ], 'examples': [ "beautifulsoup4>=4.4.1, <4.6", - "pytz>=2015.07.0, <=2016.7", + "pytz>=2015.07.0, <=2016.10", ] } ) diff --git a/test/mitmproxy/addons/test_dumper.py b/test/mitmproxy/addons/test_dumper.py index 760efa08..8b15c85b 100644 --- a/test/mitmproxy/addons/test_dumper.py +++ b/test/mitmproxy/addons/test_dumper.py @@ -131,7 +131,7 @@ def test_echo_request_line(): class TestContentView: - @mock.patch("mitmproxy.contentviews.ViewAuto.__call__") + @mock.patch("mitmproxy.contentviews.auto.ViewAuto.__call__") def test_contentview(self, view_auto): view_auto.side_effect = exceptions.ContentViewException("") sio = io.StringIO() diff --git a/test/mitmproxy/contentviews/__init__.py b/test/mitmproxy/contentviews/__init__.py new file mode 100644 index 00000000..9adc57ec --- /dev/null +++ b/test/mitmproxy/contentviews/__init__.py @@ -0,0 +1,9 @@ +def full_eval(instance): + def call(data, **metadata): + x = instance(data, **metadata) + if x is None: + return None + name, generator = x + return name, list(generator) + + return call diff --git a/test/mitmproxy/contentviews/test_api.py b/test/mitmproxy/contentviews/test_api.py new file mode 100644 index 00000000..8e6c3427 --- /dev/null +++ b/test/mitmproxy/contentviews/test_api.py @@ -0,0 +1,85 @@ +import mock + +from mitmproxy import contentviews +from mitmproxy.exceptions import ContentViewException +from mitmproxy.net.http import Headers +from mitmproxy.test import tutils + + +class TestContentView(contentviews.View): + name = "test" + prompt = ("t", "test") + content_types = ["test/123"] + + +def test_add_remove(): + tcv = TestContentView() + contentviews.add(tcv) + + # repeated addition causes exception + with tutils.raises(ContentViewException): + contentviews.add(tcv) + + # Same shortcut doesn't work either. + with tutils.raises(ContentViewException): + contentviews.add(TestContentView()) + + contentviews.remove(tcv) + + +def test_get_content_view(): + desc, lines, err = contentviews.get_content_view( + contentviews.get("Raw"), + b"[1, 2, 3]", + ) + assert "Raw" in desc + assert list(lines) + assert not err + + desc, lines, err = contentviews.get_content_view( + contentviews.get("Auto"), + b"[1, 2, 3]", + headers=Headers(content_type="application/json") + ) + assert desc == "JSON" + + desc, lines, err = contentviews.get_content_view( + contentviews.get("JSON"), + b"[1, 2", + ) + assert "Couldn't parse" in desc + + with mock.patch("mitmproxy.contentviews.auto.ViewAuto.__call__") as view_auto: + view_auto.side_effect = ValueError + + desc, lines, err = contentviews.get_content_view( + contentviews.get("Auto"), + b"[1, 2", + ) + assert err + assert "Couldn't parse" in desc + + +def test_get_message_content_view(): + r = tutils.treq() + desc, lines, err = contentviews.get_message_content_view("raw", r) + assert desc == "Raw" + + desc, lines, err = contentviews.get_message_content_view("unknown", r) + assert desc == "Raw" + + r.encode("gzip") + desc, lines, err = contentviews.get_message_content_view("raw", r) + assert desc == "[decoded gzip] Raw" + + r.headers["content-encoding"] = "deflate" + desc, lines, err = contentviews.get_message_content_view("raw", r) + assert desc == "[cannot decode] Raw" + + r.content = None + desc, lines, err = contentviews.get_message_content_view("raw", r) + assert list(lines) == [[("error", "content missing")]] + + +def test_get_by_shortcut(): + assert contentviews.get_by_shortcut("s") diff --git a/test/mitmproxy/contentviews/test_auto.py b/test/mitmproxy/contentviews/test_auto.py new file mode 100644 index 00000000..a077affa --- /dev/null +++ b/test/mitmproxy/contentviews/test_auto.py @@ -0,0 +1,47 @@ +from mitmproxy.contentviews import auto +from mitmproxy.net import http +from mitmproxy.types import multidict +from . import full_eval + + +def test_view_auto(): + v = full_eval(auto.ViewAuto()) + f = v( + b"foo", + headers=http.Headers() + ) + assert f[0] == "Raw" + + f = v( + b"<html></html>", + headers=http.Headers(content_type="text/html") + ) + assert f[0] == "HTML" + + f = v( + b"foo", + headers=http.Headers(content_type="text/flibble") + ) + assert f[0] == "Raw" + + f = v( + b"<xml></xml>", + headers=http.Headers(content_type="text/flibble") + ) + assert f[0].startswith("XML") + + f = v(b"\xFF" * 30) + assert f[0] == "Hex" + + f = v( + b"", + headers=http.Headers() + ) + assert f[0] == "No content" + + f = v( + b"", + headers=http.Headers(), + query=multidict.MultiDict([("foo", "bar")]), + ) + assert f[0] == "Query" diff --git a/test/mitmproxy/contentviews/test_css.py b/test/mitmproxy/contentviews/test_css.py new file mode 100644 index 00000000..ecb9259b --- /dev/null +++ b/test/mitmproxy/contentviews/test_css.py @@ -0,0 +1,29 @@ +from mitmproxy.contentviews import css +from mitmproxy.test import tutils +from . import full_eval + +try: + import cssutils +except: + cssutils = None + + +def test_view_css(): + v = full_eval(css.ViewCSS()) + + with open(tutils.test_data.path('mitmproxy/data/1.css'), 'r') as fp: + fixture_1 = fp.read() + + result = v('a') + + if cssutils: + assert len(list(result[1])) == 0 + else: + assert len(list(result[1])) == 1 + + result = v(fixture_1) + + if cssutils: + assert len(list(result[1])) > 1 + else: + assert len(list(result[1])) == 1 diff --git a/test/mitmproxy/contentviews/test_hex.py b/test/mitmproxy/contentviews/test_hex.py new file mode 100644 index 00000000..4292007e --- /dev/null +++ b/test/mitmproxy/contentviews/test_hex.py @@ -0,0 +1,7 @@ +from mitmproxy.contentviews import hex +from . import full_eval + + +def test_view_hex(): + v = full_eval(hex.ViewHex()) + assert v(b"foo") diff --git a/test/mitmproxy/contentviews/test_html_outline.py b/test/mitmproxy/contentviews/test_html_outline.py new file mode 100644 index 00000000..9e664e52 --- /dev/null +++ b/test/mitmproxy/contentviews/test_html_outline.py @@ -0,0 +1,9 @@ +from mitmproxy.contentviews import html_outline +from test.mitmproxy.contentviews import full_eval + + +def test_view_html_outline(): + v = full_eval(html_outline.ViewHTMLOutline()) + s = b"<html><br><br></br><p>one</p></html>" + assert v(s) + assert v(b'\xfe') diff --git a/test/mitmproxy/contentviews/test_image.py b/test/mitmproxy/contentviews/test_image.py new file mode 100644 index 00000000..9e7e28f5 --- /dev/null +++ b/test/mitmproxy/contentviews/test_image.py @@ -0,0 +1,17 @@ +from mitmproxy.contentviews import image +from mitmproxy.test import tutils +from . import full_eval + + +def test_view_image(): + v = full_eval(image.ViewImage()) + for img in [ + "mitmproxy/data/image.png", + "mitmproxy/data/image.gif", + "mitmproxy/data/image-err1.jpg", + "mitmproxy/data/image.ico" + ]: + with open(tutils.test_data.path(img), "rb") as f: + assert v(f.read()) + + assert not v(b"flibble") diff --git a/test/mitmproxy/contentviews/test_javascript.py b/test/mitmproxy/contentviews/test_javascript.py new file mode 100644 index 00000000..43039c93 --- /dev/null +++ b/test/mitmproxy/contentviews/test_javascript.py @@ -0,0 +1,10 @@ +from mitmproxy.contentviews import javascript +from . import full_eval + + +def test_view_javascript(): + v = full_eval(javascript.ViewJavaScript()) + assert v(b"[1, 2, 3]") + assert v(b"[1, 2, 3") + assert v(b"function(a){[1, 2, 3]}") + assert v(b"\xfe") # invalid utf-8 diff --git a/test/mitmproxy/contentviews/test_json.py b/test/mitmproxy/contentviews/test_json.py new file mode 100644 index 00000000..5e87b570 --- /dev/null +++ b/test/mitmproxy/contentviews/test_json.py @@ -0,0 +1,16 @@ +from mitmproxy.contentviews import json +from . import full_eval + + +def test_pretty_json(): + assert json.pretty_json(b'{"foo": 1}') + assert not json.pretty_json(b"moo") + assert json.pretty_json(b'{"foo" : "\xe4\xb8\x96\xe7\x95\x8c"}') # utf8 with chinese characters + assert not json.pretty_json(b'{"foo" : "\xFF"}') + + +def test_view_json(): + v = full_eval(json.ViewJSON()) + assert v(b"{}") + assert not v(b"{") + assert v(b"[1, 2, 3, 4, 5]") diff --git a/test/mitmproxy/contentviews/test_multipart.py b/test/mitmproxy/contentviews/test_multipart.py new file mode 100644 index 00000000..48a5ccc9 --- /dev/null +++ b/test/mitmproxy/contentviews/test_multipart.py @@ -0,0 +1,25 @@ +from mitmproxy.contentviews import multipart +from mitmproxy.net import http +from . import full_eval + + +def test_view_multipart(): + view = full_eval(multipart.ViewMultipart()) + v = b""" +--AaB03x +Content-Disposition: form-data; name="submit-name" + +Larry +--AaB03x + """.strip() + h = http.Headers(content_type="multipart/form-data; boundary=AaB03x") + assert view(v, headers=h) + + h = http.Headers() + assert not view(v, headers=h) + + h = http.Headers(content_type="multipart/form-data") + assert not view(v, headers=h) + + h = http.Headers(content_type="unparseable") + assert not view(v, headers=h) diff --git a/test/mitmproxy/contentviews/test_protobuf.py b/test/mitmproxy/contentviews/test_protobuf.py new file mode 100644 index 00000000..1224b8db --- /dev/null +++ b/test/mitmproxy/contentviews/test_protobuf.py @@ -0,0 +1,12 @@ +from mitmproxy.contentviews import protobuf +from mitmproxy.test import tutils +from . import full_eval + +if protobuf.ViewProtobuf.is_available(): + def test_view_protobuf_request(): + v = full_eval(protobuf.ViewProtobuf()) + + p = tutils.test_data.path("mitmproxy/data/protobuf01") + content_type, output = v(open(p, "rb").read()) + assert content_type == "Protobuf" + assert output.next()[0][1] == '1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"' diff --git a/test/mitmproxy/contentviews/test_query.py b/test/mitmproxy/contentviews/test_query.py new file mode 100644 index 00000000..d2bddd05 --- /dev/null +++ b/test/mitmproxy/contentviews/test_query.py @@ -0,0 +1,13 @@ +from mitmproxy.contentviews import query +from mitmproxy.types import multidict +from . import full_eval + + +def test_view_query(): + d = "" + v = full_eval(query.ViewQuery()) + f = v(d, query=multidict.MultiDict([("foo", "bar")])) + assert f[0] == "Query" + assert f[1] == [[("header", "foo: "), ("text", "bar")]] + + assert v(d) == ("Query", []) diff --git a/test/mitmproxy/contentviews/test_raw.py b/test/mitmproxy/contentviews/test_raw.py new file mode 100644 index 00000000..0e6e1b34 --- /dev/null +++ b/test/mitmproxy/contentviews/test_raw.py @@ -0,0 +1,7 @@ +from mitmproxy.contentviews import raw +from . import full_eval + + +def test_view_raw(): + v = full_eval(raw.ViewRaw()) + assert v(b"foo") diff --git a/test/mitmproxy/contentviews/test_urlencoded.py b/test/mitmproxy/contentviews/test_urlencoded.py new file mode 100644 index 00000000..d01f9aaa --- /dev/null +++ b/test/mitmproxy/contentviews/test_urlencoded.py @@ -0,0 +1,15 @@ +from mitmproxy.contentviews import urlencoded +from mitmproxy.net.http import url +from . import full_eval + + +def test_view_urlencoded(): + v = full_eval(urlencoded.ViewURLEncoded()) + + d = url.encode([("one", "two"), ("three", "four")]).encode() + assert v(d) + + d = url.encode([("adsfa", "")]).encode() + assert v(d) + + assert not v(b"\xFF\x00") diff --git a/test/mitmproxy/contentviews/test_xml_html.py b/test/mitmproxy/contentviews/test_xml_html.py new file mode 100644 index 00000000..899ecfde --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html.py @@ -0,0 +1,29 @@ +import pytest + +from mitmproxy.contentviews import xml_html +from mitmproxy.test import tutils +from . import full_eval + +data = tutils.test_data.push("mitmproxy/contentviews/test_xml_html_data/") + + +def test_simple(): + v = full_eval(xml_html.ViewXmlHtml()) + assert v(b"foo") == ('XML', [[('text', 'foo')]]) + assert v(b"<html></html>") == ('HTML', [[('text', '<html></html>')]]) + + +@pytest.mark.parametrize("filename", [ + "simple.html", + "cdata.xml", + "comment.xml", + "inline.html", +]) +def test_format_xml(filename): + path = data.path(filename) + with open(path) as f: + input = f.read() + with open(path.replace(".", "-formatted.")) as f: + expected = f.read() + tokens = xml_html.tokenize(input) + assert xml_html.format_xml(tokens) == expected diff --git a/test/mitmproxy/contentviews/test_xml_html_data/cdata-formatted.xml b/test/mitmproxy/contentviews/test_xml_html_data/cdata-formatted.xml new file mode 100644 index 00000000..44a81a83 --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/cdata-formatted.xml @@ -0,0 +1,10 @@ +<exampleOfACDATA> + <![CDATA[ + Since this is a CDATA section + I can use all sorts of reserved characters + like > < " and & + or write things like + <foo></bar> + but my document is still well formed! + ]]> +</exampleOfACDATA> diff --git a/test/mitmproxy/contentviews/test_xml_html_data/cdata.xml b/test/mitmproxy/contentviews/test_xml_html_data/cdata.xml new file mode 100644 index 00000000..b4c5dfca --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/cdata.xml @@ -0,0 +1,10 @@ +<exampleOfACDATA> +<![CDATA[ + Since this is a CDATA section + I can use all sorts of reserved characters + like > < " and & +or write things like + <foo></bar> + but my document is still well formed! +]]> +</exampleOfACDATA>
\ No newline at end of file diff --git a/test/mitmproxy/contentviews/test_xml_html_data/comment-formatted.xml b/test/mitmproxy/contentviews/test_xml_html_data/comment-formatted.xml new file mode 100644 index 00000000..d0da6665 --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/comment-formatted.xml @@ -0,0 +1,10 @@ +<exampleOfAComment> + <!-- + Since this is a comment + I can use all sorts of reserved characters + like > < " and & + or write things like + <foo></bar> + but my document is still well formed! + --> +</exampleOfAComment> diff --git a/test/mitmproxy/contentviews/test_xml_html_data/comment.xml b/test/mitmproxy/contentviews/test_xml_html_data/comment.xml new file mode 100644 index 00000000..3f54ddba --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/comment.xml @@ -0,0 +1,10 @@ +<exampleOfAComment> +<!-- + Since this is a comment + I can use all sorts of reserved characters + like > < " and & + or write things like + <foo></bar> + but my document is still well formed! +--> +</exampleOfAComment>
\ No newline at end of file diff --git a/test/mitmproxy/contentviews/test_xml_html_data/inline-formatted.html b/test/mitmproxy/contentviews/test_xml_html_data/inline-formatted.html new file mode 100644 index 00000000..5253bf4f --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/inline-formatted.html @@ -0,0 +1,14 @@ +<html> +<head> + <title>Test Page</title> +</head> +<body> + <p> + <i class="fa fa-alert"></i> + Some things should be + <b>inline</b> + , some things shouldn't! + </p> + <i class="fa fa-warning"/> +</body> +</html> diff --git a/test/mitmproxy/contentviews/test_xml_html_data/inline.html b/test/mitmproxy/contentviews/test_xml_html_data/inline.html new file mode 100644 index 00000000..3e4b16b9 --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/inline.html @@ -0,0 +1,7 @@ +<html> +<head><title>Test Page</title></head> +<body> + <p><i class="fa fa-alert"></i>Some things should be <b>inline</b>, some things shouldn't!</p> + <i class="fa fa-warning"/> +</body> +</html>
\ No newline at end of file diff --git a/test/mitmproxy/contentviews/test_xml_html_data/simple-formatted.html b/test/mitmproxy/contentviews/test_xml_html_data/simple-formatted.html new file mode 100644 index 00000000..23438428 --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/simple-formatted.html @@ -0,0 +1,10 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <title>title</title> +</head> +<body> + <h1>Hello World</h1> + <!-- page content --> +</body> +</html> diff --git a/test/mitmproxy/contentviews/test_xml_html_data/simple.html b/test/mitmproxy/contentviews/test_xml_html_data/simple.html new file mode 100644 index 00000000..73e81a5e --- /dev/null +++ b/test/mitmproxy/contentviews/test_xml_html_data/simple.html @@ -0,0 +1 @@ +<!DOCTYPE html><html lang="en"><head><title>title</title></head><body><h1>Hello World</h1><!-- page content --></body></html> diff --git a/test/mitmproxy/data/amf01 b/test/mitmproxy/data/amf01 Binary files differdeleted file mode 100644 index c8fc261d..00000000 --- a/test/mitmproxy/data/amf01 +++ /dev/null diff --git a/test/mitmproxy/data/amf02 b/test/mitmproxy/data/amf02 Binary files differdeleted file mode 100644 index ba69f130..00000000 --- a/test/mitmproxy/data/amf02 +++ /dev/null diff --git a/test/mitmproxy/data/amf03 b/test/mitmproxy/data/amf03 Binary files differdeleted file mode 100644 index d9fa736a..00000000 --- a/test/mitmproxy/data/amf03 +++ /dev/null diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py deleted file mode 100644 index 1f16765b..00000000 --- a/test/mitmproxy/test_contentview.py +++ /dev/null @@ -1,284 +0,0 @@ -import mock -from mitmproxy.exceptions import ContentViewException -from mitmproxy.net.http import Headers -from mitmproxy.net.http import url -from mitmproxy.types import multidict - -import mitmproxy.contentviews as cv -from mitmproxy.test import tutils - -try: - import pyamf -except ImportError: - pyamf = None - -try: - import cssutils -except: - cssutils = None - - -class TestContentView: - - def test_view_auto(self): - v = cv.ViewAuto() - f = v( - b"foo", - headers=Headers() - ) - assert f[0] == "Raw" - - f = v( - b"<html></html>", - headers=Headers(content_type="text/html") - ) - assert f[0] == "HTML" - - f = v( - b"foo", - headers=Headers(content_type="text/flibble") - ) - assert f[0] == "Raw" - - f = v( - b"<xml></xml>", - headers=Headers(content_type="text/flibble") - ) - assert f[0].startswith("XML") - - f = v( - b"", - headers=Headers() - ) - assert f[0] == "No content" - - f = v( - b"", - headers=Headers(), - query=multidict.MultiDict([("foo", "bar")]), - ) - assert f[0] == "Query" - - def test_view_urlencoded(self): - d = url.encode([("one", "two"), ("three", "four")]).encode() - v = cv.ViewURLEncoded() - assert v(d) - d = url.encode([("adsfa", "")]).encode() - v = cv.ViewURLEncoded() - assert v(d) - - def test_view_html(self): - v = cv.ViewHTML() - s = b"<html><br><br></br><p>one</p></html>" - assert v(s) - - s = b"gobbledygook" - assert not v(s) - - def test_view_html_outline(self): - v = cv.ViewHTMLOutline() - s = b"<html><br><br></br><p>one</p></html>" - assert v(s) - assert v(b'\xfe') - - def test_view_json(self): - cv.VIEW_CUTOFF = 100 - v = cv.ViewJSON() - assert v(b"{}") - assert not v(b"{") - assert v(b"[1, 2, 3, 4, 5]") - - def test_view_xml(self): - v = cv.ViewXML() - assert v(b"<foo></foo>") - assert not v(b"<foo>") - s = b"""<?xml version="1.0" encoding="UTF-8"?> - <?xml-stylesheet title="XSL_formatting"?> - <rss - xmlns:media="http://search.yahoo.com/mrss/" - xmlns:atom="http://www.w3.org/2005/Atom" - version="2.0"> - </rss> - """ - assert v(s) - - def test_view_raw(self): - v = cv.ViewRaw() - assert v(b"foo") - - def test_view_javascript(self): - v = cv.ViewJavaScript() - assert v(b"[1, 2, 3]") - assert v(b"[1, 2, 3") - assert v(b"function(a){[1, 2, 3]}") - assert v(b"\xfe") # invalid utf-8 - - def test_view_css(self): - v = cv.ViewCSS() - - with open(tutils.test_data.path('mitmproxy/data/1.css'), 'r') as fp: - fixture_1 = fp.read() - - result = v('a') - - if cssutils: - assert len(list(result[1])) == 0 - else: - assert len(list(result[1])) == 1 - - result = v(fixture_1) - - if cssutils: - assert len(list(result[1])) > 1 - else: - assert len(list(result[1])) == 1 - - def test_view_hex(self): - v = cv.ViewHex() - assert v(b"foo") - - def test_view_image(self): - v = cv.ViewImage() - p = tutils.test_data.path("mitmproxy/data/image.png") - assert v(open(p, "rb").read()) - - p = tutils.test_data.path("mitmproxy/data/image.gif") - assert v(open(p, "rb").read()) - - p = tutils.test_data.path("mitmproxy/data/image-err1.jpg") - assert v(open(p, "rb").read()) - - p = tutils.test_data.path("mitmproxy/data/image.ico") - assert v(open(p, "rb").read()) - - assert not v(b"flibble") - - def test_view_multipart(self): - view = cv.ViewMultipart() - v = b""" ---AaB03x -Content-Disposition: form-data; name="submit-name" - -Larry ---AaB03x - """.strip() - h = Headers(content_type="multipart/form-data; boundary=AaB03x") - assert view(v, headers=h) - - h = Headers() - assert not view(v, headers=h) - - h = Headers(content_type="multipart/form-data") - assert not view(v, headers=h) - - h = Headers(content_type="unparseable") - assert not view(v, headers=h) - - def test_view_query(self): - d = "" - v = cv.ViewQuery() - f = v(d, query=multidict.MultiDict([("foo", "bar")])) - assert f[0] == "Query" - assert [x for x in f[1]] == [[("header", "foo: "), ("text", "bar")]] - - def test_add_cv(self): - class TestContentView(cv.View): - name = "test" - prompt = ("t", "test") - - tcv = TestContentView() - cv.add(tcv) - - # repeated addition causes exception - tutils.raises( - ContentViewException, - cv.add, - tcv - ) - - -def test_get_content_view(): - desc, lines, err = cv.get_content_view( - cv.get("Raw"), - b"[1, 2, 3]", - ) - assert "Raw" in desc - assert list(lines) - assert not err - - desc, lines, err = cv.get_content_view( - cv.get("Auto"), - b"[1, 2, 3]", - headers=Headers(content_type="application/json") - ) - assert desc == "JSON" - - desc, lines, err = cv.get_content_view( - cv.get("JSON"), - b"[1, 2", - ) - assert "Couldn't parse" in desc - - with mock.patch("mitmproxy.contentviews.ViewAuto.__call__") as view_auto: - view_auto.side_effect = ValueError - - desc, lines, err = cv.get_content_view( - cv.get("Auto"), - b"[1, 2", - ) - assert err - assert "Couldn't parse" in desc - - -def test_get_message_content_view(): - r = tutils.treq() - desc, lines, err = cv.get_message_content_view("raw", r) - assert desc == "Raw" - - r.encode("gzip") - desc, lines, err = cv.get_message_content_view("raw", r) - assert desc == "[decoded gzip] Raw" - - r.headers["content-encoding"] = "deflate" - desc, lines, err = cv.get_message_content_view("raw", r) - assert desc == "[cannot decode] Raw" - - r.content = None - desc, lines, err = cv.get_message_content_view("raw", r) - assert list(lines) == [[("error", "content missing")]] - - -if pyamf: - def test_view_amf_request(): - v = cv.ViewAMF() - - p = tutils.test_data.path("mitmproxy/data/amf01") - assert v(open(p, "rb").read()) - - p = tutils.test_data.path("mitmproxy/data/amf02") - assert v(open(p, "rb").read()) - - def test_view_amf_response(): - v = cv.ViewAMF() - p = tutils.test_data.path("mitmproxy/data/amf03") - assert v(open(p, "rb").read()) - -if cv.ViewProtobuf.is_available(): - def test_view_protobuf_request(): - v = cv.ViewProtobuf() - - p = tutils.test_data.path("mitmproxy/data/protobuf01") - content_type, output = v(open(p, "rb").read()) - assert content_type == "Protobuf" - assert output.next()[0][1] == '1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"' - - -def test_get_by_shortcut(): - assert cv.get_by_shortcut("h") - - -def test_pretty_json(): - assert cv.pretty_json(b'{"foo": 1}') - assert not cv.pretty_json(b"moo") - assert cv.pretty_json(b'{"foo" : "\xe4\xb8\x96\xe7\x95\x8c"}') # utf8 with chinese characters - assert not cv.pretty_json(b'{"foo" : "\xFF"}') diff --git a/test/mitmproxy/test_custom_contentview.py b/test/mitmproxy/test_custom_contentview.py deleted file mode 100644 index 28f7fb33..00000000 --- a/test/mitmproxy/test_custom_contentview.py +++ /dev/null @@ -1,48 +0,0 @@ -import mitmproxy.contentviews as cv -from mitmproxy.net.http import Headers - - -def test_custom_views(): - class ViewNoop(cv.View): - name = "noop" - prompt = ("noop", "n") - content_types = ["text/none"] - - def __call__(self, data, **metadata): - return "noop", cv.format_text(data) - - view_obj = ViewNoop() - - cv.add(view_obj) - - assert cv.get("noop") - - r = cv.get_content_view( - cv.get("noop"), - "[1, 2, 3]", - headers=Headers( - content_type="text/plain" - ) - ) - assert "noop" in r[0] - - # now try content-type matching - r = cv.get_content_view( - cv.get("Auto"), - "[1, 2, 3]", - headers=Headers( - content_type="text/none" - ) - ) - assert "noop" in r[0] - - # now try removing the custom view - cv.remove(view_obj) - r = cv.get_content_view( - cv.get("Auto"), - b"[1, 2, 3]", - headers=Headers( - content_type="text/none" - ) - ) - assert "noop" not in r[0] diff --git a/test/mitmproxy/test_examples.py b/test/mitmproxy/test_examples.py index 94637350..8db2507f 100644 --- a/test/mitmproxy/test_examples.py +++ b/test/mitmproxy/test_examples.py @@ -56,7 +56,7 @@ class TestScripts(mastertest.MasterTest): tscript("simple/modify_body_inject_iframe.py") m, sc = tscript("simple/modify_body_inject_iframe.py", "http://example.org/evil_iframe") - f = tflow.tflow(resp=tutils.tresp(content=b"<html>mitmproxy</html>")) + f = tflow.tflow(resp=tutils.tresp(content=b"<html><body>mitmproxy</body></html>")) m.response(f) content = f.response.content assert b'iframe' in content and b'evil_iframe' in content diff --git a/test/mitmproxy/utils/test_sliding_window.py b/test/mitmproxy/utils/test_sliding_window.py new file mode 100644 index 00000000..23c76032 --- /dev/null +++ b/test/mitmproxy/utils/test_sliding_window.py @@ -0,0 +1,27 @@ +from mitmproxy.utils import sliding_window + + +def test_simple(): + y = list(sliding_window.window(range(1000, 1005), 1, 2)) + assert y == [ + # prev this next next2 + (None, 1000, 1001, 1002), + (1000, 1001, 1002, 1003), + (1001, 1002, 1003, 1004), + (1002, 1003, 1004, None), + (1003, 1004, None, None) + ] + + +def test_is_lazy(): + done = False + + def gen(): + nonlocal done + done = True + yield 42 + + x = sliding_window.window(gen(), 1, 1) + assert not done + assert list(x) + assert done @@ -29,7 +29,8 @@ commands = mitmproxy/addonmanager.py \ mitmproxy/proxy/protocol/ \ mitmproxy/log.py \ - mitmproxy/tools/dump.py mitmproxy/tools/web + mitmproxy/tools/dump.py mitmproxy/tools/web \ + mitmproxy/contentviews [testenv:wheel] recreate = True |