aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/console
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2012-04-07 13:47:03 +1200
committerAldo Cortesi <aldo@nullcube.com>2012-04-07 13:47:03 +1200
commitf1dc3f2ab2d78ce2bc1a0418239fa6fa1f6c4429 (patch)
treefd1de610fc9a75bf962c18dedc83d4497a85efe9 /libmproxy/console
parent549512e93e7c0accf161b76b54f1338eb7aa5921 (diff)
downloadmitmproxy-f1dc3f2ab2d78ce2bc1a0418239fa6fa1f6c4429.tar.gz
mitmproxy-f1dc3f2ab2d78ce2bc1a0418239fa6fa1f6c4429.tar.bz2
mitmproxy-f1dc3f2ab2d78ce2bc1a0418239fa6fa1f6c4429.zip
Integrate lxml for pretty-printing HTML and XML.
Tackling the pretty-printing performance problem head-on, at the cost of a major dependency.
Diffstat (limited to 'libmproxy/console')
-rw-r--r--libmproxy/console/contentview.py67
1 files changed, 57 insertions, 10 deletions
diff --git a/libmproxy/console/contentview.py b/libmproxy/console/contentview.py
index 0d725c9d..02394c6f 100644
--- a/libmproxy/console/contentview.py
+++ b/libmproxy/console/contentview.py
@@ -2,11 +2,12 @@ import re, cStringIO
import urwid
from PIL import Image
from PIL.ExifTags import TAGS
+import lxml.html, lxml.etree
import common
from .. import utils, encoding, flow
from ..contrib import jsbeautifier
-VIEW_CUTOFF = 1024*20
+VIEW_CUTOFF = 1024*200
VIEW_AUTO = 0
VIEW_JSON = 1
@@ -17,6 +18,7 @@ VIEW_JAVASCRIPT = 5
VIEW_IMAGE = 6
VIEW_RAW = 7
VIEW_HEX = 8
+VIEW_HTML = 9
VIEW_NAMES = {
VIEW_AUTO: "Auto",
@@ -28,35 +30,38 @@ VIEW_NAMES = {
VIEW_IMAGE: "Image",
VIEW_RAW: "Raw",
VIEW_HEX: "Hex",
+ VIEW_HTML: "HTML",
}
VIEW_PROMPT = (
("auto detect", "a"),
- ("hex", "h"),
+ ("hex", "e"),
+ ("html", "h"),
("image", "i"),
("javascript", "j"),
("json", "s"),
("raw", "r"),
("multipart", "m"),
("urlencoded", "u"),
- ("xmlish", "x"),
+ ("xml", "x"),
)
VIEW_SHORTCUTS = {
"a": VIEW_AUTO,
+ "x": VIEW_XML,
+ "h": VIEW_HTML,
"i": VIEW_IMAGE,
"j": VIEW_JAVASCRIPT,
"s": VIEW_JSON,
"u": VIEW_URLENCODED,
"m": VIEW_MULTIPART,
- "x": VIEW_XML,
"r": VIEW_RAW,
- "h": VIEW_HEX,
+ "e": VIEW_HEX,
}
CONTENT_TYPES_MAP = {
- "text/html": VIEW_XML,
+ "text/html": VIEW_HTML,
"application/json": VIEW_JSON,
"text/xml": VIEW_XML,
"multipart/form-data": VIEW_MULTIPART,
@@ -116,9 +121,34 @@ def view_hex(hdrs, content):
return "Hex", txt
-def view_xmlish(hdrs, content):
+def view_xml(hdrs, content):
+ parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False)
+ try:
+ document = lxml.etree.fromstring(content, parser)
+ except lxml.etree.XMLSyntaxError, v:
+ print v
+ return None
+ docinfo = document.getroottree().docinfo
+
+ prev = []
+ p = document.getroottree().getroot().getprevious()
+ while p is not None:
+ prev.insert(
+ 0,
+ lxml.etree.tostring(p)
+ )
+ p = p.getprevious()
+
+ s = lxml.etree.tostring(
+ document,
+ pretty_print=True,
+ xml_declaration=True,
+ doctype=docinfo.doctype + "\n".join(prev),
+ encoding = docinfo.encoding
+ )
+
txt = []
- for i in utils.pretty_xmlish(content[:VIEW_CUTOFF]):
+ for i in s[:VIEW_CUTOFF].strip().split("\n"):
txt.append(
urwid.Text(("text", i)),
)
@@ -126,6 +156,22 @@ def view_xmlish(hdrs, content):
return "XML-like data", txt
+def view_html(hdrs, content):
+ if utils.isXML(content):
+ parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)
+ d = lxml.html.fromstring(content, parser=parser)
+ docinfo = d.getroottree().docinfo
+ s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype)
+
+ txt = []
+ for i in s[:VIEW_CUTOFF].strip().split("\n"):
+ txt.append(
+ urwid.Text(("text", i)),
+ )
+ trailer(len(content), txt)
+ return "HTML", txt
+
+
def view_json(hdrs, content):
lines = utils.pretty_json(content)
if lines:
@@ -229,7 +275,8 @@ def view_image(hdrs, content):
PRETTY_FUNCTION_MAP = {
- VIEW_XML: view_xmlish,
+ VIEW_XML: view_xml,
+ VIEW_HTML: view_html,
VIEW_JSON: view_json,
VIEW_URLENCODED: view_urlencoded,
VIEW_MULTIPART: view_multipart,
@@ -274,7 +321,7 @@ def get_content_view(viewmode, hdrItems, content):
if not ret:
viewmode = VIEW_RAW
ret = view_raw(hdrs, content)
- msg.append("Fallback to Raw")
+ msg.append("Couldn't parse: falling back to Raw")
else:
msg.append(ret[0])
return " ".join(msg), ret[1]