aboutsummaryrefslogtreecommitdiffstats
path: root/mitmproxy/contentviews
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2016-12-10 10:19:05 +0100
committerMaximilian Hils <git@maximilianhils.com>2016-12-10 10:19:05 +0100
commit293b79af9120c3fc056db60492f88e21a5610ab6 (patch)
tree6e6607d9a82d5f9c5dd772f4a147fdd4e6a9bce4 /mitmproxy/contentviews
parenta7ba2f7b46b98d8d688706adf5b1d9495218a91d (diff)
downloadmitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.tar.gz
mitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.tar.bz2
mitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.zip
remove lxml-dependent code
Diffstat (limited to 'mitmproxy/contentviews')
-rw-r--r--mitmproxy/contentviews/__init__.py6
-rw-r--r--mitmproxy/contentviews/html.py42
-rw-r--r--mitmproxy/contentviews/html_outline.py17
-rw-r--r--mitmproxy/contentviews/xml.py45
4 files changed, 19 insertions, 91 deletions
diff --git a/mitmproxy/contentviews/__init__.py b/mitmproxy/contentviews/__init__.py
index b83e7aa6..3857d5e5 100644
--- a/mitmproxy/contentviews/__init__.py
+++ b/mitmproxy/contentviews/__init__.py
@@ -22,7 +22,7 @@ from mitmproxy import exceptions
from mitmproxy.net import http
from mitmproxy.utils import strutils
from . import (
- auto, raw, hex, json, xml, wbxml, html, javascript, css,
+ auto, raw, hex, json, html_outline, wbxml, javascript, css,
urlencoded, multipart, image, query, protobuf
)
from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict
@@ -163,10 +163,8 @@ add(auto.ViewAuto())
add(raw.ViewRaw())
add(hex.ViewHex())
add(json.ViewJSON())
-add(xml.ViewXML())
add(wbxml.ViewWBXML())
-add(html.ViewHTML())
-add(html.ViewHTMLOutline())
+add(html_outline.ViewHTMLOutline())
add(javascript.ViewJavaScript())
add(css.ViewCSS())
add(urlencoded.ViewURLEncoded())
diff --git a/mitmproxy/contentviews/html.py b/mitmproxy/contentviews/html.py
deleted file mode 100644
index c625beef..00000000
--- a/mitmproxy/contentviews/html.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import html2text
-import lxml.etree
-import lxml.html
-
-from mitmproxy.contentviews.base import View, format_text
-from mitmproxy.utils import strutils
-
-
-class ViewHTML(View):
- name = "HTML"
- prompt = ("html", "h")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- if strutils.is_xml(data):
- parser = lxml.etree.HTMLParser(
- strip_cdata=True,
- remove_blank_text=True
- )
- d = lxml.html.fromstring(data, parser=parser)
- docinfo = d.getroottree().docinfo
- s = lxml.etree.tostring(
- d,
- pretty_print=True,
- doctype=docinfo.doctype,
- encoding='utf8'
- )
- return "HTML", format_text(s)
-
-
-class ViewHTMLOutline(View):
- name = "HTML Outline"
- prompt = ("html outline", "o")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- data = data.decode("utf-8", "replace")
- h = html2text.HTML2Text(baseurl="")
- h.ignore_images = True
- h.body_width = 0
- outline = h.handle(data)
- return "HTML Outline", format_text(outline)
diff --git a/mitmproxy/contentviews/html_outline.py b/mitmproxy/contentviews/html_outline.py
new file mode 100644
index 00000000..d6c51b29
--- /dev/null
+++ b/mitmproxy/contentviews/html_outline.py
@@ -0,0 +1,17 @@
+import html2text
+
+from mitmproxy.contentviews import base
+
+
+class ViewHTMLOutline(base.View):
+ name = "HTML Outline"
+ prompt = ("html outline", "o")
+ content_types = ["text/html"]
+
+ def __call__(self, data, **metadata):
+ data = data.decode("utf-8", "replace")
+ h = html2text.HTML2Text(baseurl="")
+ h.ignore_images = True
+ h.body_width = 0
+ outline = h.handle(data)
+ return "HTML Outline", base.format_text(outline)
diff --git a/mitmproxy/contentviews/xml.py b/mitmproxy/contentviews/xml.py
deleted file mode 100644
index a382b09d..00000000
--- a/mitmproxy/contentviews/xml.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import lxml.etree
-
-from . import base
-
-
-class ViewXML(base.View):
- name = "XML"
- prompt = ("xml", "x")
- content_types = ["text/xml"]
-
- def __call__(self, data, **metadata):
- parser = lxml.etree.XMLParser(
- remove_blank_text=True,
- resolve_entities=False,
- strip_cdata=False,
- recover=False
- )
- try:
- document = lxml.etree.fromstring(data, parser)
- except lxml.etree.XMLSyntaxError:
- return None
- docinfo = document.getroottree().docinfo
-
- prev = []
- p = document.getroottree().getroot().getprevious()
- while p is not None:
- prev.insert(
- 0,
- lxml.etree.tostring(p)
- )
- p = p.getprevious()
- doctype = docinfo.doctype
- if prev:
- doctype += "\n".join(p.decode() for p in prev).strip()
- doctype = doctype.strip()
-
- s = lxml.etree.tostring(
- document,
- pretty_print=True,
- xml_declaration=True,
- doctype=doctype or None,
- encoding=docinfo.encoding
- )
-
- return "XML-like data", base.format_text(s)