From 11c63dcb9f2e88f17704898efc16fb289d877196 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sat, 18 Aug 2012 17:08:17 +1200 Subject: Huge cleanup of content viewers. --- libmproxy/console/__init__.py | 10 +- libmproxy/console/contentview.py | 580 ++++++++++++++++++++------------------- libmproxy/console/flowview.py | 6 +- 3 files changed, 301 insertions(+), 295 deletions(-) (limited to 'libmproxy/console') diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py index 9327ad2c..c968fb1c 100644 --- a/libmproxy/console/__init__.py +++ b/libmproxy/console/__init__.py @@ -159,10 +159,10 @@ class StatusBar(common.WWrap): r.append("[") r.append(("heading_key", "P")) r.append(":%s]"%utils.unparse_url(*self.master.server.config.reverse_proxy)) - if self.master.state.default_body_view != contentview.VIEW_AUTO: + if self.master.state.default_body_view.name != "Auto": r.append("[") r.append(("heading_key", "M")) - r.append(":%s]"%contentview.VIEW_NAMES[self.master.state.default_body_view]) + r.append(":%s]"%self.master.state.default_body_view.name) opts = [] if self.master.anticache: @@ -255,7 +255,7 @@ class ConsoleState(flow.State): flow.State.__init__(self) self.focus = None self.follow_focus = None - self.default_body_view = contentview.VIEW_AUTO + self.default_body_view = contentview.ViewAuto self.view_flow_mode = common.VIEW_FLOW_REQUEST self.last_script = "" self.last_saveload = "" @@ -736,7 +736,7 @@ class ConsoleMaster(flow.FlowMaster): return self.state.set_intercept(txt) def change_default_display_mode(self, t): - v = contentview.VIEW_SHORTCUTS.get(t) + v = contentview.get_by_shortcut(t) self.state.default_body_view = v if self.currentflow: self.refresh_flow(self.currentflow) @@ -835,7 +835,7 @@ class ConsoleMaster(flow.FlowMaster): elif k == "M": self.prompt_onekey( "Global default display mode", - contentview.VIEW_PROMPT, + contentview.view_prompts, self.change_default_display_mode ) elif k == "P": diff --git a/libmproxy/console/contentview.py b/libmproxy/console/contentview.py index 4efbb2b1..ddc82540 100644 --- a/libmproxy/console/contentview.py +++ b/libmproxy/console/contentview.py @@ -9,92 +9,6 @@ from ..contrib import jsbeautifier, html2text VIEW_CUTOFF = 1024*50 -VIEW_AUTO = 0 -VIEW_JSON = 1 -VIEW_XML = 2 -VIEW_URLENCODED = 3 -VIEW_MULTIPART = 4 -VIEW_JAVASCRIPT = 5 -VIEW_IMAGE = 6 -VIEW_RAW = 7 -VIEW_HEX = 8 -VIEW_HTML = 9 -VIEW_OUTLINE = 10 -VIEW_AMF = 11 - -VIEW_NAMES = { - VIEW_AUTO: "Auto", - VIEW_JSON: "JSON", - VIEW_XML: "XML", - VIEW_URLENCODED: "URL-encoded", - VIEW_MULTIPART: "Multipart Form", - VIEW_JAVASCRIPT: "JavaScript", - VIEW_IMAGE: "Image", - VIEW_RAW: "Raw", - VIEW_HEX: "Hex", - VIEW_HTML: "HTML", - VIEW_OUTLINE: "HTML Outline", -} - - -VIEW_PROMPT = [ - ("auto detect", "a"), - ("hex", "e"), - ("html", "h"), - ("image", "i"), - ("javascript", "j"), - ("html outline", "o"), - ("json", "s"), - ("raw", "r"), - ("multipart", "m"), - ("urlencoded", "u"), - ("xml", "x"), -] - -VIEW_SHORTCUTS = { - "a": VIEW_AUTO, - "x": VIEW_XML, - "h": VIEW_HTML, - "i": VIEW_IMAGE, - "j": VIEW_JAVASCRIPT, - "s": VIEW_JSON, - "u": VIEW_URLENCODED, - "m": VIEW_MULTIPART, - "o": VIEW_OUTLINE, - "r": VIEW_RAW, - "e": VIEW_HEX, -} - -CONTENT_TYPES_MAP = { - "text/html": VIEW_HTML, - "application/json": VIEW_JSON, - "text/xml": VIEW_XML, - "multipart/form-data": VIEW_MULTIPART, - "application/x-www-form-urlencoded": VIEW_URLENCODED, - "application/x-javascript": VIEW_JAVASCRIPT, - "application/javascript": VIEW_JAVASCRIPT, - "text/javascript": VIEW_JAVASCRIPT, - "image/png": VIEW_IMAGE, - "image/jpeg": VIEW_IMAGE, - "image/gif": VIEW_IMAGE, - "image/vnd.microsoft.icon": VIEW_IMAGE, - "image/x-icon": VIEW_IMAGE, -} - -def trailer(clen, txt, limit): - rem = clen - limit - if rem > 0: - txt.append(urwid.Text("")) - txt.append( - urwid.Text( - [ - ("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)), - ("key", "f"), - ("highlight", " to load all data.") - ] - ) - ) - def _view_text(content, total, limit): """ @@ -109,215 +23,325 @@ def _view_text(content, total, limit): return txt -def view_raw(hdrs, content, limit): - txt = _view_text(content[:limit], len(content), limit) - return "Raw", txt - - -def view_hex(hdrs, content, limit): - txt = [] - for offset, hexa, s in utils.hexdump(content[:limit]): - txt.append(urwid.Text([ - ("offset", offset), - " ", - ("text", hexa), - " ", - ("text", s), - ])) - trailer(len(content), txt, limit) - return "Hex", txt - - -def view_xml(hdrs, content, limit): - parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False) - try: - document = lxml.etree.fromstring(content, parser) - except lxml.etree.XMLSyntaxError: - return None - docinfo = document.getroottree().docinfo - - prev = [] - p = document.getroottree().getroot().getprevious() - while p is not None: - prev.insert( - 0, - lxml.etree.tostring(p) - ) - p = p.getprevious() - doctype=docinfo.doctype - if prev: - doctype += "\n".join(prev).strip() - doctype = doctype.strip() - - s = lxml.etree.tostring( - document, - pretty_print=True, - xml_declaration=True, - doctype=doctype or None, - encoding = docinfo.encoding - ) - - txt = [] - for i in s[:limit].strip().split("\n"): +def trailer(clen, txt, limit): + rem = clen - limit + if rem > 0: + txt.append(urwid.Text("")) txt.append( - urwid.Text(("text", i)), + urwid.Text( + [ + ("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)), + ("key", "f"), + ("highlight", " to load all data.") + ] + ) ) - trailer(len(content), txt, limit) - return "XML-like data", txt -def view_html(hdrs, content, limit): - if utils.isXML(content): - parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True) - d = lxml.html.fromstring(content, parser=parser) - docinfo = d.getroottree().docinfo - s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype) - return "HTML", _view_text(s[:limit], len(s), limit) +class ViewAuto: + name = "Auto" + prompt = ("auto", "a") + content_types = [] -def view_outline(hdrs, content, limit): - content = content.decode("utf-8") - h = html2text.HTML2Text(baseurl="") - h.ignore_images = True - h.body_width = 0 - content = h.handle(content) - txt = _view_text(content[:limit], len(content), limit) - return "HTML Outline", txt +class ViewRaw: + name = "Raw" + prompt = ("raw", "r") + content_types = [] + def __call__(self, hdrs, content, limit): + txt = _view_text(content[:limit], len(content), limit) + return "Raw", txt -def view_json(hdrs, content, limit): - lines = utils.pretty_json(content) - if lines: +class ViewHex: + name = "Hex" + prompt = ("hex", "e") + content_types = [] + def __call__(self, hdrs, content, limit): txt = [] - sofar = 0 - for i in lines: - sofar += len(i) + for offset, hexa, s in utils.hexdump(content[:limit]): + txt.append(urwid.Text([ + ("offset", offset), + " ", + ("text", hexa), + " ", + ("text", s), + ])) + trailer(len(content), txt, limit) + return "Hex", txt + + +class ViewXML: + name = "XML" + prompt = ("xml", "x") + content_types = ["text/xml"] + def __call__(self, hdrs, content, limit): + parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False) + try: + document = lxml.etree.fromstring(content, parser) + except lxml.etree.XMLSyntaxError: + return None + docinfo = document.getroottree().docinfo + + prev = [] + p = document.getroottree().getroot().getprevious() + while p is not None: + prev.insert( + 0, + lxml.etree.tostring(p) + ) + p = p.getprevious() + doctype=docinfo.doctype + if prev: + doctype += "\n".join(prev).strip() + doctype = doctype.strip() + + s = lxml.etree.tostring( + document, + pretty_print=True, + xml_declaration=True, + doctype=doctype or None, + encoding = docinfo.encoding + ) + + txt = [] + for i in s[:limit].strip().split("\n"): txt.append( urwid.Text(("text", i)), ) - if sofar > limit: - break - trailer(sum(len(i) for i in lines), txt, limit) - return "JSON", txt - - -def view_multipart(hdrs, content, limit): - v = hdrs.get("content-type") - if v: - v = utils.parse_content_type(v[0]) - if not v: - return - boundary = v[2].get("boundary") - if not boundary: - return - - rx = re.compile(r'\bname="([^"]+)"') - keys = [] - vals = [] - - for i in content.split("--" + boundary): - parts = i.splitlines() - if len(parts) > 1 and parts[0][0:2] != "--": - match = rx.search(parts[1]) - if match: - keys.append(match.group(1) + ":") - vals.append(utils.cleanBin( - "\n".join(parts[3+parts[2:].index(""):]) - )) - r = [ - urwid.Text(("highlight", "Form data:\n")), + trailer(len(content), txt, limit) + return "XML-like data", txt + + +class ViewJSON: + name = "JSON" + prompt = ("json", "j") + content_types = ["application/json"] + def __call__(self, hdrs, content, limit): + lines = utils.pretty_json(content) + if lines: + txt = [] + sofar = 0 + for i in lines: + sofar += len(i) + txt.append( + urwid.Text(("text", i)), + ) + if sofar > limit: + break + trailer(sum(len(i) for i in lines), txt, limit) + return "JSON", txt + + +class ViewHTML: + name = "HTML" + prompt = ("html", "h") + content_types = ["text/html"] + def __call__(self, hdrs, content, limit): + if utils.isXML(content): + parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True) + d = lxml.html.fromstring(content, parser=parser) + docinfo = d.getroottree().docinfo + s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype) + return "HTML", _view_text(s[:limit], len(s), limit) + + +class ViewHTMLOutline: + name = "HTML Outline" + prompt = ("html outline", "o") + content_types = ["text/html"] + def __call__(self, hdrs, content, limit): + content = content.decode("utf-8") + h = html2text.HTML2Text(baseurl="") + h.ignore_images = True + h.body_width = 0 + content = h.handle(content) + txt = _view_text(content[:limit], len(content), limit) + return "HTML Outline", txt + + +class ViewURLEncoded: + name = "URL-encoded" + prompt = ("urlencoded", "u") + content_types = ["application/x-www-form-urlencoded"] + def __call__(self, hdrs, content, limit): + lines = utils.urldecode(content) + if lines: + body = common.format_keyvals( + [(k+":", v) for (k, v) in lines], + key = "header", + val = "text" + ) + return "URLEncoded form", body + + +class ViewMultipart: + name = "Multipart Form" + prompt = ("multipart", "m") + content_types = ["multipart/form-data"] + def __call__(self, hdrs, content, limit): + v = hdrs.get("content-type") + if v: + v = utils.parse_content_type(v[0]) + if not v: + return + boundary = v[2].get("boundary") + if not boundary: + return + + rx = re.compile(r'\bname="([^"]+)"') + keys = [] + vals = [] + + for i in content.split("--" + boundary): + parts = i.splitlines() + if len(parts) > 1 and parts[0][0:2] != "--": + match = rx.search(parts[1]) + if match: + keys.append(match.group(1) + ":") + vals.append(utils.cleanBin( + "\n".join(parts[3+parts[2:].index(""):]) + )) + r = [ + urwid.Text(("highlight", "Form data:\n")), + ] + r.extend(common.format_keyvals( + zip(keys, vals), + key = "header", + val = "text" + )) + return "Multipart form", r + + +class ViewAMF: + name = "AMF" + prompt = ("amf", "f") + content_types = ["application/x-amf"] + def __call__(self, hdrs, content, limit): + s = utils.pretty_amf(content) + if s: + return "AMF", _view_text(s[:limit], len(s), limit) + + +class ViewJavaScript: + name = "JavaScript" + prompt = ("javascript", "j") + content_types = [ + "application/x-javascript", + "application/javascript", + "text/javascript" + ] + def __call__(self, hdrs, content, limit): + opts = jsbeautifier.default_options() + opts.indent_size = 2 + res = jsbeautifier.beautify(content[:limit], opts) + return "JavaScript", _view_text(res, len(content), limit) + + +class ViewImage: + name = "Image" + prompt = ("image", "i") + content_types = [ + "image/png", + "image/jpeg", + "image/gif", + "image/vnd.microsoft.icon", + "image/x-icon", + ] + def __call__(self, hdrs, content, limit): + try: + img = Image.open(cStringIO.StringIO(content)) + except IOError: + return None + parts = [ + ("Format", str(img.format_description)), + ("Size", "%s x %s px"%img.size), + ("Mode", str(img.mode)), ] - r.extend(common.format_keyvals( - zip(keys, vals), - key = "header", - val = "text" - )) - return "Multipart form", r + for i in sorted(img.info.keys()): + if i != "exif": + parts.append( + (str(i), str(img.info[i])) + ) + if hasattr(img, "_getexif"): + ex = img._getexif() + if ex: + for i in sorted(ex.keys()): + tag = TAGS.get(i, i) + parts.append( + (str(tag), str(ex[i])) + ) + clean = [] + for i in parts: + clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])]) + fmt = common.format_keyvals( + clean, + key = "header", + val = "text" + ) + return "%s image"%img.format, fmt + + +views = [ + ViewAuto(), + ViewRaw(), + ViewHex(), + ViewJSON(), + ViewXML(), + ViewHTML(), + ViewHTMLOutline(), + ViewJavaScript(), + ViewURLEncoded(), + ViewMultipart(), + ViewImage(), +] +try: + import pyamf + views.append(ViewAMF()) +except ImportError: # pragma nocover + pass -def view_urlencoded(hdrs, content, limit): - lines = utils.urldecode(content) - if lines: - body = common.format_keyvals( - [(k+":", v) for (k, v) in lines], - key = "header", - val = "text" - ) - return "URLEncoded form", body +content_types_map = {} +for i in views: + for ct in i.content_types: + l = content_types_map.setdefault(ct, []) + l.append(i) -def view_javascript(hdrs, content, limit): - opts = jsbeautifier.default_options() - opts.indent_size = 2 - res = jsbeautifier.beautify(content[:limit], opts) - return "JavaScript", _view_text(res, len(content), limit) +view_prompts = [i.prompt for i in views] -def view_image(hdrs, content, limit): - try: - img = Image.open(cStringIO.StringIO(content)) - except IOError: - return None - parts = [ - ("Format", str(img.format_description)), - ("Size", "%s x %s px"%img.size), - ("Mode", str(img.mode)), - ] - for i in sorted(img.info.keys()): - if i != "exif": - parts.append( - (str(i), str(img.info[i])) - ) - if hasattr(img, "_getexif"): - ex = img._getexif() - if ex: - for i in sorted(ex.keys()): - tag = TAGS.get(i, i) - parts.append( - (str(tag), str(ex[i])) - ) - clean = [] - for i in parts: - clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])]) - fmt = common.format_keyvals( - clean, - key = "header", - val = "text" - ) - return "%s image"%img.format, fmt - -def view_amf(hdrs, content, limit): - s = utils.pretty_amf(content) - if s: - return "AMF", _view_text(s[:limit], len(s), limit) - -PRETTY_FUNCTION_MAP = { - VIEW_XML: view_xml, - VIEW_HTML: view_html, - VIEW_JSON: view_json, - VIEW_URLENCODED: view_urlencoded, - VIEW_MULTIPART: view_multipart, - VIEW_JAVASCRIPT: view_javascript, - VIEW_IMAGE: view_image, - VIEW_HEX: view_hex, - VIEW_RAW: view_raw, - VIEW_OUTLINE: view_outline, -} +def get_by_shortcut(c): + for i in views: + if i.prompt[1] == c: + return i + + +def get(name): + for i in views: + if i.name == name: + return i + def get_view_func(viewmode, hdrs, content): """ Returns a function object. """ - if viewmode == VIEW_AUTO: + if viewmode.name == "Auto": ctype = hdrs.get("content-type") if ctype: ctype = ctype[0] ct = utils.parse_content_type(ctype) if ctype else None if ct: - viewmode = CONTENT_TYPES_MAP.get("%s/%s"%(ct[0], ct[1])) - if not viewmode and utils.isXML(content): - viewmode = VIEW_XML - return PRETTY_FUNCTION_MAP.get(viewmode, view_raw) + ct = "%s/%s"%(ct[0], ct[1]) + if ct in content_types_map: + return content_types_map[ct][0] + elif utils.isXML(content): + return ViewXML + return ViewRaw + else: + return viewmode def get_content_view(viewmode, hdrItems, content, limit): @@ -339,32 +363,14 @@ def get_content_view(viewmode, hdrItems, content, limit): ret = func(hdrs, content, limit) # Third-party viewers can fail in unexpected ways... except Exception, e: - s = traceback.format_exc() - return "", _view_text(s, len(s), len(s)) + #s = traceback.format_exc() + #return "", _view_text(s, len(s), len(s)) ret = None if not ret: - viewmode = VIEW_RAW - ret = view_raw(hdrs, content, limit) + ret = get("Raw")(hdrs, content, limit) msg.append("Couldn't parse: falling back to Raw") else: msg.append(ret[0]) return " ".join(msg), ret[1] -# -# Enable optional decoding methods at runtime -# - -# AMF decoding requires pyamf -try: - import pyamf - - VIEW_SHORTCUTS["f"] = VIEW_AMF - VIEW_PROMPT.append(("amf", "f")) - VIEW_NAMES[VIEW_AMF] = "AMF" - CONTENT_TYPES_MAP["application/x-amf"] = VIEW_AMF - PRETTY_FUNCTION_MAP[VIEW_AMF] = view_amf -except ImportError: - pass - - diff --git a/libmproxy/console/flowview.py b/libmproxy/console/flowview.py index 45b1a58b..f57ab3e4 100644 --- a/libmproxy/console/flowview.py +++ b/libmproxy/console/flowview.py @@ -180,7 +180,7 @@ class FlowView(common.WWrap): " ", ('heading', "["), ('heading_key', "m"), - ('heading', (":%s]"%contentview.VIEW_NAMES[viewmode])), + ('heading', (":%s]"%viewmode.name)), ], align="right" ) @@ -392,7 +392,7 @@ class FlowView(common.WWrap): self.state.add_flow_setting( self.flow, (self.state.view_flow_mode, "prettyview"), - contentview.VIEW_SHORTCUTS.get(t) + contentview.get_by_shortcut(t) ) self.master.refresh_flow(self.flow) @@ -500,7 +500,7 @@ class FlowView(common.WWrap): self.master.refresh_flow(self.flow) self.master.statusbar.message("") elif key == "m": - p = list(contentview.VIEW_PROMPT) + p = list(contentview.view_prompts) p.insert(0, ("clear", "c")) self.master.prompt_onekey( "Display mode", -- cgit v1.2.3