aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2016-12-10 10:19:05 +0100
committerMaximilian Hils <git@maximilianhils.com>2016-12-10 10:19:05 +0100
commit293b79af9120c3fc056db60492f88e21a5610ab6 (patch)
tree6e6607d9a82d5f9c5dd772f4a147fdd4e6a9bce4
parenta7ba2f7b46b98d8d688706adf5b1d9495218a91d (diff)
downloadmitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.tar.gz
mitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.tar.bz2
mitmproxy-293b79af9120c3fc056db60492f88e21a5610ab6.zip
remove lxml-dependent code
-rw-r--r--.appveyor.yml2
-rw-r--r--examples/simple/modify_body_inject_iframe.py2
-rw-r--r--mitmproxy/contentviews/__init__.py6
-rw-r--r--mitmproxy/contentviews/html.py42
-rw-r--r--mitmproxy/contentviews/html_outline.py17
-rw-r--r--mitmproxy/contentviews/xml.py45
-rw-r--r--requirements.txt1
-rw-r--r--setup.py1
-rw-r--r--test/mitmproxy/contentviews/test_html.py18
-rw-r--r--test/mitmproxy/contentviews/test_html_outline.py9
-rw-r--r--test/mitmproxy/contentviews/test_xml.py17
11 files changed, 30 insertions, 130 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 5421eb5a..5cf194a9 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -27,7 +27,7 @@ test_script:
- ps: |
$Env:VERSION = $(python mitmproxy/version.py)
$Env:SKIP_MITMPROXY = "python -c `"print('skip mitmproxy')`""
- tox -e wheel -- https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl
+ tox -e wheel
tox -e rtool -- bdist
deploy_script:
diff --git a/examples/simple/modify_body_inject_iframe.py b/examples/simple/modify_body_inject_iframe.py
index 33d18bbd..7f9cc958 100644
--- a/examples/simple/modify_body_inject_iframe.py
+++ b/examples/simple/modify_body_inject_iframe.py
@@ -11,7 +11,7 @@ class Injector:
def response(self, flow):
if flow.request.host in self.iframe_url:
return
- html = BeautifulSoup(flow.response.content, "lxml")
+ html = BeautifulSoup(flow.response.content)
if html.body:
iframe = html.new_tag(
"iframe",
diff --git a/mitmproxy/contentviews/__init__.py b/mitmproxy/contentviews/__init__.py
index b83e7aa6..3857d5e5 100644
--- a/mitmproxy/contentviews/__init__.py
+++ b/mitmproxy/contentviews/__init__.py
@@ -22,7 +22,7 @@ from mitmproxy import exceptions
from mitmproxy.net import http
from mitmproxy.utils import strutils
from . import (
- auto, raw, hex, json, xml, wbxml, html, javascript, css,
+ auto, raw, hex, json, html_outline, wbxml, javascript, css,
urlencoded, multipart, image, query, protobuf
)
from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict
@@ -163,10 +163,8 @@ add(auto.ViewAuto())
add(raw.ViewRaw())
add(hex.ViewHex())
add(json.ViewJSON())
-add(xml.ViewXML())
add(wbxml.ViewWBXML())
-add(html.ViewHTML())
-add(html.ViewHTMLOutline())
+add(html_outline.ViewHTMLOutline())
add(javascript.ViewJavaScript())
add(css.ViewCSS())
add(urlencoded.ViewURLEncoded())
diff --git a/mitmproxy/contentviews/html.py b/mitmproxy/contentviews/html.py
deleted file mode 100644
index c625beef..00000000
--- a/mitmproxy/contentviews/html.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import html2text
-import lxml.etree
-import lxml.html
-
-from mitmproxy.contentviews.base import View, format_text
-from mitmproxy.utils import strutils
-
-
-class ViewHTML(View):
- name = "HTML"
- prompt = ("html", "h")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- if strutils.is_xml(data):
- parser = lxml.etree.HTMLParser(
- strip_cdata=True,
- remove_blank_text=True
- )
- d = lxml.html.fromstring(data, parser=parser)
- docinfo = d.getroottree().docinfo
- s = lxml.etree.tostring(
- d,
- pretty_print=True,
- doctype=docinfo.doctype,
- encoding='utf8'
- )
- return "HTML", format_text(s)
-
-
-class ViewHTMLOutline(View):
- name = "HTML Outline"
- prompt = ("html outline", "o")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- data = data.decode("utf-8", "replace")
- h = html2text.HTML2Text(baseurl="")
- h.ignore_images = True
- h.body_width = 0
- outline = h.handle(data)
- return "HTML Outline", format_text(outline)
diff --git a/mitmproxy/contentviews/html_outline.py b/mitmproxy/contentviews/html_outline.py
new file mode 100644
index 00000000..d6c51b29
--- /dev/null
+++ b/mitmproxy/contentviews/html_outline.py
@@ -0,0 +1,17 @@
+import html2text
+
+from mitmproxy.contentviews import base
+
+
+class ViewHTMLOutline(base.View):
+ name = "HTML Outline"
+ prompt = ("html outline", "o")
+ content_types = ["text/html"]
+
+ def __call__(self, data, **metadata):
+ data = data.decode("utf-8", "replace")
+ h = html2text.HTML2Text(baseurl="")
+ h.ignore_images = True
+ h.body_width = 0
+ outline = h.handle(data)
+ return "HTML Outline", base.format_text(outline)
diff --git a/mitmproxy/contentviews/xml.py b/mitmproxy/contentviews/xml.py
deleted file mode 100644
index a382b09d..00000000
--- a/mitmproxy/contentviews/xml.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import lxml.etree
-
-from . import base
-
-
-class ViewXML(base.View):
- name = "XML"
- prompt = ("xml", "x")
- content_types = ["text/xml"]
-
- def __call__(self, data, **metadata):
- parser = lxml.etree.XMLParser(
- remove_blank_text=True,
- resolve_entities=False,
- strip_cdata=False,
- recover=False
- )
- try:
- document = lxml.etree.fromstring(data, parser)
- except lxml.etree.XMLSyntaxError:
- return None
- docinfo = document.getroottree().docinfo
-
- prev = []
- p = document.getroottree().getroot().getprevious()
- while p is not None:
- prev.insert(
- 0,
- lxml.etree.tostring(p)
- )
- p = p.getprevious()
- doctype = docinfo.doctype
- if prev:
- doctype += "\n".join(p.decode() for p in prev).strip()
- doctype = doctype.strip()
-
- s = lxml.etree.tostring(
- document,
- pretty_print=True,
- xml_declaration=True,
- doctype=doctype or None,
- encoding=docinfo.encoding
- )
-
- return "XML-like data", base.format_text(s)
diff --git a/requirements.txt b/requirements.txt
index 67a02a97..ab8e8a0b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1 @@
-https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl; sys_platform == 'win32' and python_version == '3.5'
-e .[dev,examples,contentviews]
diff --git a/setup.py b/setup.py
index 927fbc5e..4ef89e20 100644
--- a/setup.py
+++ b/setup.py
@@ -70,7 +70,6 @@ setup(
"html2text>=2016.1.8, <=2016.9.19",
"hyperframe>=4.0.1, <5",
"jsbeautifier>=1.6.3, <1.7",
- "lxml>=3.5.0, <=3.6.0", # no wheels for 3.6.1 yet.
"Pillow>=3.2, <3.5",
"passlib>=1.6.5, <1.8",
"pyasn1>=0.1.9, <0.2",
diff --git a/test/mitmproxy/contentviews/test_html.py b/test/mitmproxy/contentviews/test_html.py
deleted file mode 100644
index 8d5818e5..00000000
--- a/test/mitmproxy/contentviews/test_html.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from mitmproxy.contentviews import html
-from . import full_eval
-
-
-def test_view_html():
- v = full_eval(html.ViewHTML())
- s = b"<html><br><br></br><p>one</p></html>"
- assert v(s)
-
- s = b"gobbledygook"
- assert not v(s)
-
-
-def test_view_html_outline():
- v = full_eval(html.ViewHTMLOutline())
- s = b"<html><br><br></br><p>one</p></html>"
- assert v(s)
- assert v(b'\xfe')
diff --git a/test/mitmproxy/contentviews/test_html_outline.py b/test/mitmproxy/contentviews/test_html_outline.py
new file mode 100644
index 00000000..d9ccc406
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_html_outline.py
@@ -0,0 +1,9 @@
+from mitmproxy.contentviews import html_outline
+from test.mitmproxy.contentviews import full_eval
+
+
+def test_view_html_outline():
+ v = full_eval(html_outline.ViewHTMLOutline())
+ s = b"<html><br><br></br><p>one</p></html>"
+ assert v(s)
+ assert v(b'\xfe') \ No newline at end of file
diff --git a/test/mitmproxy/contentviews/test_xml.py b/test/mitmproxy/contentviews/test_xml.py
deleted file mode 100644
index 680134cb..00000000
--- a/test/mitmproxy/contentviews/test_xml.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from mitmproxy.contentviews import xml
-from . import full_eval
-
-
-def test_view_xml():
- v = full_eval(xml.ViewXML())
- assert v(b"<foo></foo>")
- assert not v(b"<foo>")
- s = b"""<?xml version="1.0" encoding="UTF-8"?>
- <?xml-stylesheet title="XSL_formatting"?>
- <rss
- xmlns:media="http://search.yahoo.com/mrss/"
- xmlns:atom="http://www.w3.org/2005/Atom"
- version="2.0">
- </rss>
- """
- assert v(s)