diff options
Diffstat (limited to 'libmproxy/utils.py')
-rw-r--r-- | libmproxy/utils.py | 50 |
1 files changed, 41 insertions, 9 deletions
diff --git a/libmproxy/utils.py b/libmproxy/utils.py index ee0d9b43..e29910c5 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -12,9 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - -import re, os, subprocess, datetime -from contrib import BeautifulSoup +import re, os, subprocess, datetime, textwrap def format_timestamp(s): @@ -48,14 +46,48 @@ def cleanBin(s): return "".join(parts) -def prettybody(s): +TAG = r""" + <\s* + (?!\s*[!"]) + (?P<close>\s*\/)? + (?P<name>\w+) + ( + [^'"\t >]+ | + "[^\"]*"['\"]* | + '[^']*'['\"]* | + \s+ + )* + (?P<selfcont>\s*\/\s*)? + \s*> + """ +UNI = set(["br", "hr", "img", "input", "area", "link"]) +INDENT = " "*4 +def pretty_xmlish(s): """ - Return a list of pretty-printed lines. + A robust pretty-printer for XML-ish data. + Returns a list of lines. """ - s = BeautifulSoup.BeautifulStoneSoup(s) - s = s.prettify().strip() - parts = s.split("\n") - return [repr(i)[1:-1] for i in parts] + data, offset, indent, prev = [], 0, 0, None + for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE): + start, end = i.span() + name = i.group("name") + if start > offset: + txt = [] + for x in textwrap.dedent(s[offset:start]).split("\n"): + if x.strip(): + txt.append(indent*INDENT + x) + data.extend(txt) + if i.group("close") and not (name in UNI and name==prev): + indent = max(indent - 1, 0) + data.append(indent*INDENT + i.group().strip()) + offset = end + if not any([i.group("close"), i.group("selfcont"), name in UNI]): + indent += 1 + prev = name + trail = s[offset:] + if trail.strip(): + data.append(s[offset:]) + return data def hexdump(s): |