aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/utils.py
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2011-02-06 14:17:30 +1300
committerAldo Cortesi <aldo@nullcube.com>2011-02-06 14:17:30 +1300
commit7156d1a73ab6ce39ae8f8325bc8a62c0083cd054 (patch)
tree42baa74035d979deb122632a3e46b44ff0f08cf1 /libmproxy/utils.py
parent44dc3a052e724bdf10e9c04e1756db89615f5685 (diff)
downloadmitmproxy-7156d1a73ab6ce39ae8f8325bc8a62c0083cd054.tar.gz
mitmproxy-7156d1a73ab6ce39ae8f8325bc8a62c0083cd054.tar.bz2
mitmproxy-7156d1a73ab6ce39ae8f8325bc8a62c0083cd054.zip
Rip out BeautifulSoup, and use a custom XML-ish prettyprinter.
Diffstat (limited to 'libmproxy/utils.py')
-rw-r--r--libmproxy/utils.py50
1 files changed, 41 insertions, 9 deletions
diff --git a/libmproxy/utils.py b/libmproxy/utils.py
index ee0d9b43..2a878676 100644
--- a/libmproxy/utils.py
+++ b/libmproxy/utils.py
@@ -12,9 +12,7 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import re, os, subprocess, datetime
-from contrib import BeautifulSoup
+import re, os, subprocess, datetime, textwrap
def format_timestamp(s):
@@ -48,14 +46,48 @@ def cleanBin(s):
return "".join(parts)
-def prettybody(s):
+TAG = r"""
+ <\s*
+ (?!\s*[!"])
+ (?P<close>\s*\/)?
+ (?P<name>\w+)
+ (
+ [a-zA-Z0-9_#:=().%\/]+
+ |
+ "[^\"]*"['\"]*
+ |
+ '[^']*'['\"]*
+ |
+ \s+
+ )*
+ (?P<selfcont>\s*\/\s*)?
+ \s*>
+ """
+UNI = set(["br", "hr", "img", "input", "area", "link"])
+INDENT = " "*4
+def pretty_xmlish(s):
"""
- Return a list of pretty-printed lines.
+ This is a robust, general pretty-printer for XML-ish data.
+ Returns a list of lines.
"""
- s = BeautifulSoup.BeautifulStoneSoup(s)
- s = s.prettify().strip()
- parts = s.split("\n")
- return [repr(i)[1:-1] for i in parts]
+ data, offset, indent, prev = [], 0, 0, None
+ for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE):
+ start, end = i.span()
+ name = i.group("name")
+ if start > offset:
+ txt = []
+ for x in textwrap.dedent(s[offset:start]).split("\n"):
+ if x.strip():
+ txt.append(indent*INDENT + x)
+ data.extend(txt)
+ if i.group("close") and not (name in UNI and name==prev):
+ indent = max(indent - 1, 0)
+ data.append(indent*INDENT + i.group().strip())
+ offset = end
+ if not any([i.group("close"), i.group("selfcont"), name in UNI]):
+ indent += 1
+ prev = name
+ return data
def hexdump(s):