diff options
author | Maximilian Hils <git@maximilianhils.com> | 2016-12-10 10:19:05 +0100 |
---|---|---|
committer | Maximilian Hils <git@maximilianhils.com> | 2016-12-10 10:19:05 +0100 |
commit | 293b79af9120c3fc056db60492f88e21a5610ab6 (patch) | |
tree | 6e6607d9a82d5f9c5dd772f4a147fdd4e6a9bce4 /mitmproxy/contentviews | |
parent | a7ba2f7b46b98d8d688706adf5b1d9495218a91d (diff) | |
download | mitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.tar.gz mitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.tar.bz2 mitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.zip |
remove lxml-dependent code
Diffstat (limited to 'mitmproxy/contentviews')
-rw-r--r-- | mitmproxy/contentviews/__init__.py | 6 | ||||
-rw-r--r-- | mitmproxy/contentviews/html.py | 42 | ||||
-rw-r--r-- | mitmproxy/contentviews/html_outline.py | 17 | ||||
-rw-r--r-- | mitmproxy/contentviews/xml.py | 45 |
4 files changed, 19 insertions, 91 deletions
diff --git a/mitmproxy/contentviews/__init__.py b/mitmproxy/contentviews/__init__.py index b83e7aa6..3857d5e5 100644 --- a/mitmproxy/contentviews/__init__.py +++ b/mitmproxy/contentviews/__init__.py @@ -22,7 +22,7 @@ from mitmproxy import exceptions from mitmproxy.net import http from mitmproxy.utils import strutils from . import ( - auto, raw, hex, json, xml, wbxml, html, javascript, css, + auto, raw, hex, json, html_outline, wbxml, javascript, css, urlencoded, multipart, image, query, protobuf ) from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict @@ -163,10 +163,8 @@ add(auto.ViewAuto()) add(raw.ViewRaw()) add(hex.ViewHex()) add(json.ViewJSON()) -add(xml.ViewXML()) add(wbxml.ViewWBXML()) -add(html.ViewHTML()) -add(html.ViewHTMLOutline()) +add(html_outline.ViewHTMLOutline()) add(javascript.ViewJavaScript()) add(css.ViewCSS()) add(urlencoded.ViewURLEncoded()) diff --git a/mitmproxy/contentviews/html.py b/mitmproxy/contentviews/html.py deleted file mode 100644 index c625beef..00000000 --- a/mitmproxy/contentviews/html.py +++ /dev/null @@ -1,42 +0,0 @@ -import html2text -import lxml.etree -import lxml.html - -from mitmproxy.contentviews.base import View, format_text -from mitmproxy.utils import strutils - - -class ViewHTML(View): - name = "HTML" - prompt = ("html", "h") - content_types = ["text/html"] - - def __call__(self, data, **metadata): - if strutils.is_xml(data): - parser = lxml.etree.HTMLParser( - strip_cdata=True, - remove_blank_text=True - ) - d = lxml.html.fromstring(data, parser=parser) - docinfo = d.getroottree().docinfo - s = lxml.etree.tostring( - d, - pretty_print=True, - doctype=docinfo.doctype, - encoding='utf8' - ) - return "HTML", format_text(s) - - -class ViewHTMLOutline(View): - name = "HTML Outline" - prompt = ("html outline", "o") - content_types = ["text/html"] - - def __call__(self, data, **metadata): - data = data.decode("utf-8", "replace") - h = html2text.HTML2Text(baseurl="") - h.ignore_images = True - h.body_width = 0 - outline = h.handle(data) - return "HTML Outline", format_text(outline) diff --git a/mitmproxy/contentviews/html_outline.py b/mitmproxy/contentviews/html_outline.py new file mode 100644 index 00000000..d6c51b29 --- /dev/null +++ b/mitmproxy/contentviews/html_outline.py @@ -0,0 +1,17 @@ +import html2text + +from mitmproxy.contentviews import base + + +class ViewHTMLOutline(base.View): + name = "HTML Outline" + prompt = ("html outline", "o") + content_types = ["text/html"] + + def __call__(self, data, **metadata): + data = data.decode("utf-8", "replace") + h = html2text.HTML2Text(baseurl="") + h.ignore_images = True + h.body_width = 0 + outline = h.handle(data) + return "HTML Outline", base.format_text(outline) diff --git a/mitmproxy/contentviews/xml.py b/mitmproxy/contentviews/xml.py deleted file mode 100644 index a382b09d..00000000 --- a/mitmproxy/contentviews/xml.py +++ /dev/null @@ -1,45 +0,0 @@ -import lxml.etree - -from . import base - - -class ViewXML(base.View): - name = "XML" - prompt = ("xml", "x") - content_types = ["text/xml"] - - def __call__(self, data, **metadata): - parser = lxml.etree.XMLParser( - remove_blank_text=True, - resolve_entities=False, - strip_cdata=False, - recover=False - ) - try: - document = lxml.etree.fromstring(data, parser) - except lxml.etree.XMLSyntaxError: - return None - docinfo = document.getroottree().docinfo - - prev = [] - p = document.getroottree().getroot().getprevious() - while p is not None: - prev.insert( - 0, - lxml.etree.tostring(p) - ) - p = p.getprevious() - doctype = docinfo.doctype - if prev: - doctype += "\n".join(p.decode() for p in prev).strip() - doctype = doctype.strip() - - s = lxml.etree.tostring( - document, - pretty_print=True, - xml_declaration=True, - doctype=doctype or None, - encoding=docinfo.encoding - ) - - return "XML-like data", base.format_text(s) |