From 11c63dcb9f2e88f17704898efc16fb289d877196 Mon Sep 17 00:00:00 2001
From: Aldo Cortesi <aldo@nullcube.com>
Date: Sat, 18 Aug 2012 17:08:17 +1200
Subject: Huge cleanup of content viewers.

---
 libmproxy/console/__init__.py    |  10 +-
 libmproxy/console/contentview.py | 580 ++++++++++++++++++++-------------------
 libmproxy/console/flowview.py    |   6 +-
 3 files changed, 301 insertions(+), 295 deletions(-)

(limited to 'libmproxy/console')

diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py
index 9327ad2c..c968fb1c 100644
--- a/libmproxy/console/__init__.py
+++ b/libmproxy/console/__init__.py
@@ -159,10 +159,10 @@ class StatusBar(common.WWrap):
             r.append("[")
             r.append(("heading_key", "P"))
             r.append(":%s]"%utils.unparse_url(*self.master.server.config.reverse_proxy))
-        if self.master.state.default_body_view != contentview.VIEW_AUTO:
+        if self.master.state.default_body_view.name != "Auto":
             r.append("[")
             r.append(("heading_key", "M"))
-            r.append(":%s]"%contentview.VIEW_NAMES[self.master.state.default_body_view])
+            r.append(":%s]"%self.master.state.default_body_view.name)
 
         opts = []
         if self.master.anticache:
@@ -255,7 +255,7 @@ class ConsoleState(flow.State):
         flow.State.__init__(self)
         self.focus = None
         self.follow_focus = None
-        self.default_body_view = contentview.VIEW_AUTO
+        self.default_body_view = contentview.ViewAuto
         self.view_flow_mode = common.VIEW_FLOW_REQUEST
         self.last_script = ""
         self.last_saveload = ""
@@ -736,7 +736,7 @@ class ConsoleMaster(flow.FlowMaster):
         return self.state.set_intercept(txt)
 
     def change_default_display_mode(self, t):
-        v = contentview.VIEW_SHORTCUTS.get(t)
+        v = contentview.get_by_shortcut(t)
         self.state.default_body_view = v
         if self.currentflow:
             self.refresh_flow(self.currentflow)
@@ -835,7 +835,7 @@ class ConsoleMaster(flow.FlowMaster):
                             elif k == "M":
                                 self.prompt_onekey(
                                     "Global default display mode",
-                                    contentview.VIEW_PROMPT,
+                                    contentview.view_prompts,
                                     self.change_default_display_mode
                                 )
                             elif k == "P":
diff --git a/libmproxy/console/contentview.py b/libmproxy/console/contentview.py
index 4efbb2b1..ddc82540 100644
--- a/libmproxy/console/contentview.py
+++ b/libmproxy/console/contentview.py
@@ -9,92 +9,6 @@ from ..contrib import jsbeautifier, html2text
 
 VIEW_CUTOFF = 1024*50
 
-VIEW_AUTO = 0
-VIEW_JSON = 1
-VIEW_XML = 2
-VIEW_URLENCODED = 3
-VIEW_MULTIPART = 4
-VIEW_JAVASCRIPT = 5
-VIEW_IMAGE = 6
-VIEW_RAW = 7
-VIEW_HEX = 8
-VIEW_HTML = 9
-VIEW_OUTLINE = 10
-VIEW_AMF = 11
-
-VIEW_NAMES = {
-    VIEW_AUTO: "Auto",
-    VIEW_JSON: "JSON",
-    VIEW_XML: "XML",
-    VIEW_URLENCODED: "URL-encoded",
-    VIEW_MULTIPART: "Multipart Form",
-    VIEW_JAVASCRIPT: "JavaScript",
-    VIEW_IMAGE: "Image",
-    VIEW_RAW: "Raw",
-    VIEW_HEX: "Hex",
-    VIEW_HTML: "HTML",
-    VIEW_OUTLINE: "HTML Outline",
-}
-
-
-VIEW_PROMPT = [
-    ("auto detect", "a"),
-    ("hex", "e"),
-    ("html", "h"),
-    ("image", "i"),
-    ("javascript", "j"),
-    ("html outline", "o"),
-    ("json", "s"),
-    ("raw", "r"),
-    ("multipart", "m"),
-    ("urlencoded", "u"),
-    ("xml", "x"),
-]
-
-VIEW_SHORTCUTS = {
-    "a": VIEW_AUTO,
-    "x": VIEW_XML,
-    "h": VIEW_HTML,
-    "i": VIEW_IMAGE,
-    "j": VIEW_JAVASCRIPT,
-    "s": VIEW_JSON,
-    "u": VIEW_URLENCODED,
-    "m": VIEW_MULTIPART,
-    "o": VIEW_OUTLINE,
-    "r": VIEW_RAW,
-    "e": VIEW_HEX,
-}
-
-CONTENT_TYPES_MAP = {
-    "text/html": VIEW_HTML,
-    "application/json": VIEW_JSON,
-    "text/xml": VIEW_XML,
-    "multipart/form-data": VIEW_MULTIPART,
-    "application/x-www-form-urlencoded": VIEW_URLENCODED,
-    "application/x-javascript": VIEW_JAVASCRIPT,
-    "application/javascript": VIEW_JAVASCRIPT,
-    "text/javascript": VIEW_JAVASCRIPT,
-    "image/png": VIEW_IMAGE,
-    "image/jpeg": VIEW_IMAGE,
-    "image/gif": VIEW_IMAGE,
-    "image/vnd.microsoft.icon": VIEW_IMAGE,
-    "image/x-icon": VIEW_IMAGE,
-}
-
-def trailer(clen, txt, limit):
-    rem = clen - limit
-    if rem > 0:
-        txt.append(urwid.Text(""))
-        txt.append(
-            urwid.Text(
-                [
-                    ("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)),
-                    ("key", "f"),
-                    ("highlight", " to load all data.")
-                ]
-            )
-        )
-
 
 def _view_text(content, total, limit):
     """
@@ -109,215 +23,325 @@ def _view_text(content, total, limit):
     return txt
 
 
-def view_raw(hdrs, content, limit):
-    txt = _view_text(content[:limit], len(content), limit)
-    return "Raw", txt
-
-
-def view_hex(hdrs, content, limit):
-    txt = []
-    for offset, hexa, s in utils.hexdump(content[:limit]):
-        txt.append(urwid.Text([
-            ("offset", offset),
-            " ",
-            ("text", hexa),
-            "   ",
-            ("text", s),
-        ]))
-    trailer(len(content), txt, limit)
-    return "Hex", txt
-
-
-def view_xml(hdrs, content, limit):
-    parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False)
-    try:
-        document = lxml.etree.fromstring(content, parser)
-    except lxml.etree.XMLSyntaxError:
-        return None
-    docinfo = document.getroottree().docinfo
-
-    prev = []
-    p = document.getroottree().getroot().getprevious()
-    while p is not None:
-        prev.insert(
-            0,
-            lxml.etree.tostring(p)
-        )
-        p = p.getprevious()
-    doctype=docinfo.doctype
-    if prev:
-        doctype += "\n".join(prev).strip()
-    doctype = doctype.strip()
-
-    s = lxml.etree.tostring(
-            document,
-            pretty_print=True,
-            xml_declaration=True,
-            doctype=doctype or None,
-            encoding = docinfo.encoding
-        )
-
-    txt = []
-    for i in s[:limit].strip().split("\n"):
+def trailer(clen, txt, limit):
+    rem = clen - limit
+    if rem > 0:
+        txt.append(urwid.Text(""))
         txt.append(
-            urwid.Text(("text", i)),
+            urwid.Text(
+                [
+                    ("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)),
+                    ("key", "f"),
+                    ("highlight", " to load all data.")
+                ]
+            )
         )
-    trailer(len(content), txt, limit)
-    return "XML-like data", txt
 
 
-def view_html(hdrs, content, limit):
-    if utils.isXML(content):
-        parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)
-        d = lxml.html.fromstring(content, parser=parser)
-        docinfo = d.getroottree().docinfo
-        s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype)
-        return "HTML", _view_text(s[:limit], len(s), limit)
+class ViewAuto:
+    name = "Auto"
+    prompt = ("auto", "a")
+    content_types = []
 
 
-def view_outline(hdrs, content, limit):
-    content = content.decode("utf-8")
-    h = html2text.HTML2Text(baseurl="")
-    h.ignore_images = True
-    h.body_width = 0
-    content = h.handle(content)
-    txt = _view_text(content[:limit], len(content), limit)
-    return "HTML Outline", txt
+class ViewRaw:
+    name = "Raw"
+    prompt = ("raw", "r")
+    content_types = []
+    def __call__(self, hdrs, content, limit):
+        txt = _view_text(content[:limit], len(content), limit)
+        return "Raw", txt
 
 
-def view_json(hdrs, content, limit):
-    lines = utils.pretty_json(content)
-    if lines:
+class ViewHex:
+    name = "Hex"
+    prompt = ("hex", "e")
+    content_types = []
+    def __call__(self, hdrs, content, limit):
         txt = []
-        sofar = 0
-        for i in lines:
-            sofar += len(i)
+        for offset, hexa, s in utils.hexdump(content[:limit]):
+            txt.append(urwid.Text([
+                ("offset", offset),
+                " ",
+                ("text", hexa),
+                "   ",
+                ("text", s),
+            ]))
+        trailer(len(content), txt, limit)
+        return "Hex", txt
+
+
+class ViewXML:
+    name = "XML"
+    prompt = ("xml", "x")
+    content_types = ["text/xml"]
+    def __call__(self, hdrs, content, limit):
+        parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False)
+        try:
+            document = lxml.etree.fromstring(content, parser)
+        except lxml.etree.XMLSyntaxError:
+            return None
+        docinfo = document.getroottree().docinfo
+
+        prev = []
+        p = document.getroottree().getroot().getprevious()
+        while p is not None:
+            prev.insert(
+                0,
+                lxml.etree.tostring(p)
+            )
+            p = p.getprevious()
+        doctype=docinfo.doctype
+        if prev:
+            doctype += "\n".join(prev).strip()
+        doctype = doctype.strip()
+
+        s = lxml.etree.tostring(
+                document,
+                pretty_print=True,
+                xml_declaration=True,
+                doctype=doctype or None,
+                encoding = docinfo.encoding
+            )
+
+        txt = []
+        for i in s[:limit].strip().split("\n"):
             txt.append(
                 urwid.Text(("text", i)),
             )
-            if sofar > limit:
-                break
-        trailer(sum(len(i) for i in lines), txt, limit)
-        return "JSON", txt
-
-
-def view_multipart(hdrs, content, limit):
-    v = hdrs.get("content-type")
-    if v:
-        v = utils.parse_content_type(v[0])
-        if not v:
-            return
-        boundary = v[2].get("boundary")
-        if not boundary:
-            return
-
-        rx = re.compile(r'\bname="([^"]+)"')
-        keys = []
-        vals = []
-
-        for i in content.split("--" + boundary):
-            parts = i.splitlines()
-            if len(parts) > 1 and parts[0][0:2] != "--":
-                match = rx.search(parts[1])
-                if match:
-                    keys.append(match.group(1) + ":")
-                    vals.append(utils.cleanBin(
-                        "\n".join(parts[3+parts[2:].index(""):])
-                    ))
-        r = [
-            urwid.Text(("highlight", "Form data:\n")),
+        trailer(len(content), txt, limit)
+        return "XML-like data", txt
+
+
+class ViewJSON:
+    name = "JSON"
+    prompt = ("json", "j")
+    content_types = ["application/json"]
+    def __call__(self, hdrs, content, limit):
+        lines = utils.pretty_json(content)
+        if lines:
+            txt = []
+            sofar = 0
+            for i in lines:
+                sofar += len(i)
+                txt.append(
+                    urwid.Text(("text", i)),
+                )
+                if sofar > limit:
+                    break
+            trailer(sum(len(i) for i in lines), txt, limit)
+            return "JSON", txt
+
+
+class ViewHTML:
+    name = "HTML"
+    prompt = ("html", "h")
+    content_types = ["text/html"]
+    def __call__(self, hdrs, content, limit):
+        if utils.isXML(content):
+            parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)
+            d = lxml.html.fromstring(content, parser=parser)
+            docinfo = d.getroottree().docinfo
+            s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype)
+            return "HTML", _view_text(s[:limit], len(s), limit)
+
+
+class ViewHTMLOutline:
+    name = "HTML Outline"
+    prompt = ("html outline", "o")
+    content_types = ["text/html"]
+    def __call__(self, hdrs, content, limit):
+        content = content.decode("utf-8")
+        h = html2text.HTML2Text(baseurl="")
+        h.ignore_images = True
+        h.body_width = 0
+        content = h.handle(content)
+        txt = _view_text(content[:limit], len(content), limit)
+        return "HTML Outline", txt
+
+
+class ViewURLEncoded:
+    name = "URL-encoded"
+    prompt = ("urlencoded", "u")
+    content_types = ["application/x-www-form-urlencoded"]
+    def __call__(self, hdrs, content, limit):
+        lines = utils.urldecode(content)
+        if lines:
+            body = common.format_keyvals(
+                        [(k+":", v) for (k, v) in lines],
+                        key = "header",
+                        val = "text"
+                   )
+            return "URLEncoded form", body
+
+
+class ViewMultipart:
+    name = "Multipart Form"
+    prompt = ("multipart", "m")
+    content_types = ["multipart/form-data"]
+    def __call__(self, hdrs, content, limit):
+        v = hdrs.get("content-type")
+        if v:
+            v = utils.parse_content_type(v[0])
+            if not v:
+                return
+            boundary = v[2].get("boundary")
+            if not boundary:
+                return
+
+            rx = re.compile(r'\bname="([^"]+)"')
+            keys = []
+            vals = []
+
+            for i in content.split("--" + boundary):
+                parts = i.splitlines()
+                if len(parts) > 1 and parts[0][0:2] != "--":
+                    match = rx.search(parts[1])
+                    if match:
+                        keys.append(match.group(1) + ":")
+                        vals.append(utils.cleanBin(
+                            "\n".join(parts[3+parts[2:].index(""):])
+                        ))
+            r = [
+                urwid.Text(("highlight", "Form data:\n")),
+            ]
+            r.extend(common.format_keyvals(
+                zip(keys, vals),
+                key = "header",
+                val = "text"
+            ))
+            return "Multipart form", r
+
+
+class ViewAMF:
+    name = "AMF"
+    prompt = ("amf", "f")
+    content_types = ["application/x-amf"]
+    def __call__(self, hdrs, content, limit):
+        s = utils.pretty_amf(content)
+        if s:
+            return "AMF", _view_text(s[:limit], len(s), limit)
+
+
+class ViewJavaScript:
+    name = "JavaScript"
+    prompt = ("javascript", "j")
+    content_types = [
+        "application/x-javascript",
+        "application/javascript",
+        "text/javascript"
+    ]
+    def __call__(self, hdrs, content, limit):
+        opts = jsbeautifier.default_options()
+        opts.indent_size = 2
+        res = jsbeautifier.beautify(content[:limit], opts)
+        return "JavaScript", _view_text(res, len(content), limit)
+
+
+class ViewImage:
+    name = "Image"
+    prompt = ("image", "i")
+    content_types = [
+        "image/png",
+        "image/jpeg",
+        "image/gif",
+        "image/vnd.microsoft.icon",
+        "image/x-icon",
+    ]
+    def __call__(self, hdrs, content, limit):
+        try:
+            img = Image.open(cStringIO.StringIO(content))
+        except IOError:
+            return None
+        parts = [
+            ("Format", str(img.format_description)),
+            ("Size", "%s x %s px"%img.size),
+            ("Mode", str(img.mode)),
         ]
-        r.extend(common.format_keyvals(
-            zip(keys, vals),
-            key = "header",
-            val = "text"
-        ))
-        return "Multipart form", r
+        for i in sorted(img.info.keys()):
+            if i != "exif":
+                parts.append(
+                    (str(i), str(img.info[i]))
+                )
+        if hasattr(img, "_getexif"):
+            ex = img._getexif()
+            if ex:
+                for i in sorted(ex.keys()):
+                    tag = TAGS.get(i, i)
+                    parts.append(
+                        (str(tag), str(ex[i]))
+                    )
+        clean = []
+        for i in parts:
+            clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])])
+        fmt = common.format_keyvals(
+                clean,
+                key = "header",
+                val = "text"
+            )
+        return "%s image"%img.format, fmt
+
+
+views = [
+    ViewAuto(),
+    ViewRaw(),
+    ViewHex(),
+    ViewJSON(),
+    ViewXML(),
+    ViewHTML(),
+    ViewHTMLOutline(),
+    ViewJavaScript(),
+    ViewURLEncoded(),
+    ViewMultipart(),
+    ViewImage(),
+]
+try:
+    import pyamf
+    views.append(ViewAMF())
+except ImportError: # pragma nocover
+    pass
 
 
-def view_urlencoded(hdrs, content, limit):
-    lines = utils.urldecode(content)
-    if lines:
-        body = common.format_keyvals(
-                    [(k+":", v) for (k, v) in lines],
-                    key = "header",
-                    val = "text"
-               )
-        return "URLEncoded form", body
+content_types_map = {}
+for i in views:
+    for ct in i.content_types:
+        l = content_types_map.setdefault(ct, [])
+        l.append(i)
 
 
-def view_javascript(hdrs, content, limit):
-    opts = jsbeautifier.default_options()
-    opts.indent_size = 2
-    res = jsbeautifier.beautify(content[:limit], opts)
-    return "JavaScript", _view_text(res, len(content), limit)
+view_prompts = [i.prompt for i in views]
 
 
-def view_image(hdrs, content, limit):
-    try:
-        img = Image.open(cStringIO.StringIO(content))
-    except IOError:
-        return None
-    parts = [
-        ("Format", str(img.format_description)),
-        ("Size", "%s x %s px"%img.size),
-        ("Mode", str(img.mode)),
-    ]
-    for i in sorted(img.info.keys()):
-        if i != "exif":
-            parts.append(
-                (str(i), str(img.info[i]))
-            )
-    if hasattr(img, "_getexif"):
-        ex = img._getexif()
-        if ex:
-            for i in sorted(ex.keys()):
-                tag = TAGS.get(i, i)
-                parts.append(
-                    (str(tag), str(ex[i]))
-                )
-    clean = []
-    for i in parts:
-        clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])])
-    fmt = common.format_keyvals(
-            clean,
-            key = "header",
-            val = "text"
-        )
-    return "%s image"%img.format, fmt
-
-def view_amf(hdrs, content, limit):
-    s = utils.pretty_amf(content)
-    if s:
-        return "AMF", _view_text(s[:limit], len(s), limit)
-
-PRETTY_FUNCTION_MAP = {
-    VIEW_XML: view_xml,
-    VIEW_HTML: view_html,
-    VIEW_JSON: view_json,
-    VIEW_URLENCODED: view_urlencoded,
-    VIEW_MULTIPART: view_multipart,
-    VIEW_JAVASCRIPT: view_javascript,
-    VIEW_IMAGE: view_image,
-    VIEW_HEX: view_hex,
-    VIEW_RAW: view_raw,
-    VIEW_OUTLINE: view_outline,
-}
+def get_by_shortcut(c):
+    for i in views:
+        if i.prompt[1] == c:
+            return i
+
+
+def get(name):
+    for i in views:
+        if i.name == name:
+            return i
+
 
 def get_view_func(viewmode, hdrs, content):
     """
         Returns a function object.
     """
-    if viewmode == VIEW_AUTO:
+    if viewmode.name == "Auto":
         ctype = hdrs.get("content-type")
         if ctype:
             ctype = ctype[0]
         ct = utils.parse_content_type(ctype) if ctype else None
         if ct:
-            viewmode = CONTENT_TYPES_MAP.get("%s/%s"%(ct[0], ct[1]))
-        if not viewmode and utils.isXML(content):
-            viewmode = VIEW_XML
-    return PRETTY_FUNCTION_MAP.get(viewmode, view_raw)
+            ct = "%s/%s"%(ct[0], ct[1])
+            if ct in content_types_map:
+                return content_types_map[ct][0]
+            elif utils.isXML(content):
+                return ViewXML
+        return ViewRaw
+    else:
+        return viewmode
 
 
 def get_content_view(viewmode, hdrItems, content, limit):
@@ -339,32 +363,14 @@ def get_content_view(viewmode, hdrItems, content, limit):
         ret = func(hdrs, content, limit)
     # Third-party viewers can fail in unexpected ways...
     except Exception, e:
-        s = traceback.format_exc()
-        return "", _view_text(s, len(s), len(s))
+        #s = traceback.format_exc()
+        #return "", _view_text(s, len(s), len(s))
         ret = None
     if not ret:
-        viewmode = VIEW_RAW
-        ret = view_raw(hdrs, content, limit)
+        ret = get("Raw")(hdrs, content, limit)
         msg.append("Couldn't parse: falling back to Raw")
     else:
         msg.append(ret[0])
     return " ".join(msg), ret[1]
 
 
-#
-# Enable optional decoding methods at runtime
-#
-
-# AMF decoding requires pyamf
-try:
-    import pyamf
-
-    VIEW_SHORTCUTS["f"] = VIEW_AMF
-    VIEW_PROMPT.append(("amf", "f"))
-    VIEW_NAMES[VIEW_AMF] = "AMF"
-    CONTENT_TYPES_MAP["application/x-amf"] = VIEW_AMF
-    PRETTY_FUNCTION_MAP[VIEW_AMF] = view_amf
-except ImportError:
-    pass
-
-
diff --git a/libmproxy/console/flowview.py b/libmproxy/console/flowview.py
index 45b1a58b..f57ab3e4 100644
--- a/libmproxy/console/flowview.py
+++ b/libmproxy/console/flowview.py
@@ -180,7 +180,7 @@ class FlowView(common.WWrap):
                             " ",
                             ('heading', "["),
                             ('heading_key', "m"),
-                            ('heading', (":%s]"%contentview.VIEW_NAMES[viewmode])),
+                            ('heading', (":%s]"%viewmode.name)),
                         ],
                         align="right"
                     )
@@ -392,7 +392,7 @@ class FlowView(common.WWrap):
         self.state.add_flow_setting(
             self.flow,
             (self.state.view_flow_mode, "prettyview"),
-            contentview.VIEW_SHORTCUTS.get(t)
+            contentview.get_by_shortcut(t)
         )
         self.master.refresh_flow(self.flow)
 
@@ -500,7 +500,7 @@ class FlowView(common.WWrap):
             self.master.refresh_flow(self.flow)
             self.master.statusbar.message("")
         elif key == "m":
-            p = list(contentview.VIEW_PROMPT)
+            p = list(contentview.view_prompts)
             p.insert(0, ("clear", "c"))
             self.master.prompt_onekey(
                 "Display mode",
-- 
cgit v1.2.3