From 7156d1a73ab6ce39ae8f8325bc8a62c0083cd054 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 6 Feb 2011 14:17:30 +1300 Subject: Rip out BeautifulSoup, and use a custom XML-ish prettyprinter. --- libmproxy/utils.py | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) (limited to 'libmproxy/utils.py') diff --git a/libmproxy/utils.py b/libmproxy/utils.py index ee0d9b43..2a878676 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -12,9 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - -import re, os, subprocess, datetime -from contrib import BeautifulSoup +import re, os, subprocess, datetime, textwrap def format_timestamp(s): @@ -48,14 +46,48 @@ def cleanBin(s): return "".join(parts) -def prettybody(s): +TAG = r""" + <\s* + (?!\s*[!"]) + (?P\s*\/)? + (?P\w+) + ( + [a-zA-Z0-9_#:=().%\/]+ + | + "[^\"]*"['\"]* + | + '[^']*'['\"]* + | + \s+ + )* + (?P\s*\/\s*)? + \s*> + """ +UNI = set(["br", "hr", "img", "input", "area", "link"]) +INDENT = " "*4 +def pretty_xmlish(s): """ - Return a list of pretty-printed lines. + This is a robust, general pretty-printer for XML-ish data. + Returns a list of lines. """ - s = BeautifulSoup.BeautifulStoneSoup(s) - s = s.prettify().strip() - parts = s.split("\n") - return [repr(i)[1:-1] for i in parts] + data, offset, indent, prev = [], 0, 0, None + for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE): + start, end = i.span() + name = i.group("name") + if start > offset: + txt = [] + for x in textwrap.dedent(s[offset:start]).split("\n"): + if x.strip(): + txt.append(indent*INDENT + x) + data.extend(txt) + if i.group("close") and not (name in UNI and name==prev): + indent = max(indent - 1, 0) + data.append(indent*INDENT + i.group().strip()) + offset = end + if not any([i.group("close"), i.group("selfcont"), name in UNI]): + indent += 1 + prev = name + return data def hexdump(s): -- cgit v1.2.3