import re, cStringIO, traceback import urwid from PIL import Image from PIL.ExifTags import TAGS import lxml.html, lxml.etree import common from .. import utils, encoding, flow from ..contrib import jsbeautifier, html2text VIEW_CUTOFF = 1024*50 VIEW_AUTO = 0 VIEW_JSON = 1 VIEW_XML = 2 VIEW_URLENCODED = 3 VIEW_MULTIPART = 4 VIEW_JAVASCRIPT = 5 VIEW_IMAGE = 6 VIEW_RAW = 7 VIEW_HEX = 8 VIEW_HTML = 9 VIEW_OUTLINE = 10 VIEW_NAMES = { VIEW_AUTO: "Auto", VIEW_JSON: "JSON", VIEW_XML: "XML", VIEW_URLENCODED: "URL-encoded", VIEW_MULTIPART: "Multipart Form", VIEW_JAVASCRIPT: "JavaScript", VIEW_IMAGE: "Image", VIEW_RAW: "Raw", VIEW_HEX: "Hex", VIEW_HTML: "HTML", VIEW_OUTLINE: "HTML Outline", } VIEW_PROMPT = ( ("auto detect", "a"), ("hex", "e"), ("html", "h"), ("image", "i"), ("javascript", "j"), ("html outline", "o"), ("json", "s"), ("raw", "r"), ("multipart", "m"), ("urlencoded", "u"), ("xml", "x"), ) VIEW_SHORTCUTS = { "a": VIEW_AUTO, "x": VIEW_XML, "h": VIEW_HTML, "i": VIEW_IMAGE, "j": VIEW_JAVASCRIPT, "s": VIEW_JSON, "u": VIEW_URLENCODED, "m": VIEW_MULTIPART, "o": VIEW_OUTLINE, "r": VIEW_RAW, "e": VIEW_HEX, } CONTENT_TYPES_MAP = { "text/html": VIEW_HTML, "application/json": VIEW_JSON, "text/xml": VIEW_XML, "multipart/form-data": VIEW_MULTIPART, "application/x-www-form-urlencoded": VIEW_URLENCODED, "application/x-javascript": VIEW_JAVASCRIPT, "application/javascript": VIEW_JAVASCRIPT, "text/javascript": VIEW_JAVASCRIPT, "image/png": VIEW_IMAGE, "image/jpeg": VIEW_IMAGE, "image/gif": VIEW_IMAGE, "image/vnd.microsoft.icon": VIEW_IMAGE, "image/x-icon": VIEW_IMAGE, } def trailer(clen, txt, limit): rem = clen - limit if rem > 0: txt.append(urwid.Text("")) txt.append( urwid.Text( [ ("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)), ("key", "f"), ("highlight", " to load all data.") ] ) ) def _view_text(content, total, limit): """ Generates a body for a chunk of text. """ txt = [] for i in utils.cleanBin(content).splitlines(): txt.append( urwid.Text(("text", i), wrap="any") ) trailer(total, txt, limit) return txt def view_raw(hdrs, content, limit): txt = _view_text(content[:limit], len(content), limit) return "Raw", txt def view_hex(hdrs, content, limit): txt = [] for offset, hexa, s in utils.hexdump(content[:limit]): txt.append(urwid.Text([ ("offset", offset), " ", ("text", hexa), " ", ("text", s), ])) trailer(len(content), txt, limit) return "Hex", txt def view_xml(hdrs, content, limit): parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False) try: document = lxml.etree.fromstring(content, parser) except lxml.etree.XMLSyntaxError: return None docinfo = document.getroottree().docinfo prev = [] p = document.getroottree().getroot().getprevious() while p is not None: prev.insert( 0, lxml.etree.tostring(p) ) p = p.getprevious() doctype=docinfo.doctype if prev: doctype += "\n".join(prev).strip() doctype = doctype.strip() s = lxml.etree.tostring( document, pretty_print=True, xml_declaration=True, doctype=doctype or None, encoding = docinfo.encoding ) txt = [] for i in s[:limit].strip().split("\n"): txt.append( urwid.Text(("text", i)), ) trailer(len(content), txt, limit) return "XML-like data", txt def view_html(hdrs, content, limit): if utils.isXML(content): parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True) d = lxml.html.fromstring(content, parser=parser) docinfo = d.getroottree().docinfo s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype) return "HTML", _view_text(s[:limit], len(s), limit) def view_outline(hdrs, content, limit): content = content.decode("utf-8") h = html2text.HTML2Text(baseurl="") h.ignore_images = True h.body_width = 0 content = h.handle(content) txt = _view_text(content[:limit], len(content), limit) return "HTML Outline", txt def view_json(hdrs, content, limit): lines = utils.pretty_json(content) if lines: txt = [] sofar = 0 for i in lines: sofar += len(i) txt.append( urwid.Text(("text", i)), ) if sofar > limit: break trailer(sum(len(i) for i in lines), txt, limit) return "JSON", txt def view_multipart(hdrs, content, limit): v = hdrs.get("content-type") if v: v = utils.parse_content_type(v[0]) if not v: return boundary = v[2].get("boundary") if not boundary: return rx = re.compile(r'\bname="([^"]+)"') keys = [] vals = [] for i in content.split("--" + boundary): parts = i.splitlines() if len(parts) > 1 and parts[0][0:2] != "--": match = rx.search(parts[1]) if match: keys.append(match.group(1) + ":") vals.append(utils.cleanBin( "\n".join(parts[3+parts[2:].index(""):]) )) r = [ urwid.Text(("highlight", "Form data:\n")), ] r.extend(common.format_keyvals( zip(keys, vals), key = "header", val = "text" )) return "Multipart form", r def view_urlencoded(hdrs, content, limit): lines = utils.urldecode(content) if lines: body = common.format_keyvals( [(k+":", v) for (k, v) in lines], key = "header", val = "text" ) return "URLEncoded form", body def view_javascript(hdrs, content, limit): opts = jsbeautifier.default_options() opts.indent_size = 2 res = jsbeautifier.beautify(content[:limit], opts) return "JavaScript", _view_text(res, len(content), limit) def view_image(hdrs, content, limit): try: img = Image.open(cStringIO.StringIO(content)) except IOError: return None parts = [ ("Format", str(img.format_description)), ("Size", "%s x %s px"%img.size), ("Mode", str(img.mode)), ] for i in sorted(img.info.keys()): if i != "exif": parts.append( (str(i), str(img.info[i])) ) if hasattr(img, "_getexif"): ex = img._getexif() if ex: for i in sorted(ex.keys()): tag = TAGS.get(i, i) parts.append( (str(tag), str(ex[i])) ) clean = [] for i in parts: clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])]) fmt = common.format_keyvals( clean, key = "header", val = "text" ) return "%s image"%img.format, fmt PRETTY_FUNCTION_MAP = { VIEW_XML: view_xml, VIEW_HTML: view_html, VIEW_JSON: view_json, VIEW_URLENCODED: view_urlencoded, VIEW_MULTIPART: view_multipart, VIEW_JAVASCRIPT: view_javascript, VIEW_IMAGE: view_image, VIEW_HEX: view_hex, VIEW_RAW: view_raw, VIEW_OUTLINE: view_outline, } def get_view_func(viewmode, hdrs, content): """ Returns a function object. """ if viewmode == VIEW_AUTO: ctype = hdrs.get("content-type") if ctype: ctype = ctype[0] ct = utils.parse_content_type(ctype) if ctype else None if ct: viewmode = CONTENT_TYPES_MAP.get("%s/%s"%(ct[0], ct[1])) if not viewmode and utils.isXML(content): viewmode = VIEW_XML return PRETTY_FUNCTION_MAP.get(viewmode, view_raw) def get_content_view(viewmode, hdrItems, content, limit): """ Returns a (msg, body) tuple. """ msg = [] hdrs = flow.ODictCaseless([list(i) for i in hdrItems]) enc = hdrs.get("content-encoding") if enc and enc[0] != "identity": decoded = encoding.decode(enc[0], content) if decoded: content = decoded msg.append("[decoded %s]"%enc[0]) func = get_view_func(viewmode, hdrs, content) try: ret = func(hdrs, content, limit) # Third-party viewers can fail in unexpected ways... except Exception, e: s = traceback.format_exc() return "", _view_text(s, len(s), len(s)) ret = None if not ret: viewmode = VIEW_RAW ret = view_raw(hdrs, content, limit) msg.append("Couldn't parse: falling back to Raw") else: msg.append(ret[0]) return " ".join(msg), ret[1]