diff options
-rw-r--r-- | libmproxy/console/contentview.py | 25 | ||||
-rw-r--r-- | libmproxy/contrib/README | 10 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/__init__.py | 1153 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/unpackers/README.specs.mkd | 25 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/unpackers/__init__.py | 67 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/unpackers/evalbased.py | 39 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/unpackers/javascriptobfuscator.py | 58 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/unpackers/myobfuscate.py | 86 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/unpackers/packer.py | 104 | ||||
-rw-r--r-- | libmproxy/contrib/jsbeautifier/unpackers/urlencode.py | 34 | ||||
-rw-r--r-- | libmproxy/utils.py | 5 | ||||
-rw-r--r-- | test/test_console_contentview.py | 7 |
12 files changed, 1607 insertions, 6 deletions
diff --git a/libmproxy/console/contentview.py b/libmproxy/console/contentview.py index 0d770b52..27addaff 100644 --- a/libmproxy/console/contentview.py +++ b/libmproxy/console/contentview.py @@ -2,6 +2,7 @@ import re import urwid import common from .. import utils, encoding, flow +from ..contrib import jsbeautifier VIEW_CUTOFF = 1024*100 @@ -20,12 +21,14 @@ VIEW_CONTENT_PRETTY_TYPE_JSON = 1 VIEW_CONTENT_PRETTY_TYPE_XML = 2 VIEW_CONTENT_PRETTY_TYPE_URLENCODED = 3 VIEW_CONTENT_PRETTY_TYPE_MULTIPART = 4 +VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT = 5 CONTENT_PRETTY_NAMES = { VIEW_CONTENT_PRETTY_TYPE_JSON: "JSON", VIEW_CONTENT_PRETTY_TYPE_XML: "XML", VIEW_CONTENT_PRETTY_TYPE_URLENCODED: "URL-encoded", - VIEW_CONTENT_PRETTY_TYPE_MULTIPART: "Multipart Form" + VIEW_CONTENT_PRETTY_TYPE_MULTIPART: "Multipart Form", + VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT: "JavaScript", } CONTENT_TYPES_MAP = { @@ -34,6 +37,8 @@ CONTENT_TYPES_MAP = { "text/xml": VIEW_CONTENT_PRETTY_TYPE_XML, "multipart/form-data": VIEW_CONTENT_PRETTY_TYPE_MULTIPART, "application/x-www-form-urlencoded": VIEW_CONTENT_PRETTY_TYPE_URLENCODED, + "application/x-javascript": VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT, + "application/javascript": VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT, } def trailer(clen, txt): @@ -49,13 +54,21 @@ def trailer(clen, txt): ) -def view_raw(hdrs, content): +def _view_text(content): + """ + Generates a body for a chunk of text. + """ txt = [] for i in utils.cleanBin(content[:VIEW_CUTOFF]).splitlines(): txt.append( urwid.Text(("text", i)) ) trailer(len(content), txt) + return txt + + +def view_raw(hdrs, content): + txt = _view_text(content) return "Raw", txt @@ -144,11 +157,19 @@ def view_urlencoded(hdrs, content): return "URLEncoded form", body +def view_javascript(hdrs, content): + opts = jsbeautifier.default_options() + opts.indent_size = 2 + res = jsbeautifier.beautify(content, opts) + return "JavaScript", _view_text(res) + + PRETTY_FUNCTION_MAP = { VIEW_CONTENT_PRETTY_TYPE_XML: view_xmlish, VIEW_CONTENT_PRETTY_TYPE_JSON: view_json, VIEW_CONTENT_PRETTY_TYPE_URLENCODED: view_urlencoded, VIEW_CONTENT_PRETTY_TYPE_MULTIPART: view_multipart, + VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT: view_javascript, } def get_view_func(viewmode, pretty_type, hdrs, content): diff --git a/libmproxy/contrib/README b/libmproxy/contrib/README new file mode 100644 index 00000000..f2e9907e --- /dev/null +++ b/libmproxy/contrib/README @@ -0,0 +1,10 @@ + +Contribs: + +pyparsing 1.5.2, MIT license + +jsbeautifier, git checkout 25/03/12, MIT license + - Removed test directories + - Disabled packers through a single-line modification (see "# CORTESI" + comment) + diff --git a/libmproxy/contrib/jsbeautifier/__init__.py b/libmproxy/contrib/jsbeautifier/__init__.py new file mode 100644 index 00000000..e319e8dd --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/__init__.py @@ -0,0 +1,1153 @@ +import sys +import getopt +import re +import string + +# +# Originally written by Einar Lielmanis et al., +# Conversion to python by Einar Lielmanis, einar@jsbeautifier.org, +# MIT licence, enjoy. +# +# Python is not my native language, feel free to push things around. +# +# Use either from command line (script displays its usage when run +# without any parameters), +# +# +# or, alternatively, use it as a module: +# +# import jsbeautifier +# res = jsbeautifier.beautify('your javascript string') +# res = jsbeautifier.beautify_file('some_file.js') +# +# you may specify some options: +# +# opts = jsbeautifier.default_options() +# opts.indent_size = 2 +# res = jsbeautifier.beautify('some javascript', opts) +# +# +# Here are the available options: (read source) + + +class BeautifierOptions: + def __init__(self): + self.indent_size = 4 + self.indent_char = ' ' + self.indent_with_tabs = False + self.preserve_newlines = True + self.max_preserve_newlines = 10. + self.jslint_happy = False + self.brace_style = 'collapse' + self.keep_array_indentation = False + self.keep_function_indentation = False + self.eval_code = False + + + + def __repr__(self): + return \ +"""indent_size = %d +indent_char = [%s] +preserve_newlines = %s +max_preserve_newlines = %d +jslint_happy = %s +indent_with_tabs = %s +brace_style = %s +keep_array_indentation = %s +eval_code = %s +""" % ( self.indent_size, + self.indent_char, + self.preserve_newlines, + self.max_preserve_newlines, + self.jslint_happy, + self.indent_with_tabs, + self.brace_style, + self.keep_array_indentation, + self.eval_code, + ) + + +class BeautifierFlags: + def __init__(self, mode): + self.previous_mode = 'BLOCK' + self.mode = mode + self.var_line = False + self.var_line_tainted = False + self.var_line_reindented = False + self.in_html_comment = False + self.if_line = False + self.in_case = False + self.eat_next_space = False + self.indentation_baseline = -1 + self.indentation_level = 0 + self.ternary_depth = 0 + + +def default_options(): + return BeautifierOptions() + + +def beautify(string, opts = default_options() ): + b = Beautifier() + return b.beautify(string, opts) + +def beautify_file(file_name, opts = default_options() ): + + if file_name == '-': # stdin + f = sys.stdin + else: + try: + f = open(file_name) + except Exception as ex: + return 'The file could not be opened' + + b = Beautifier() + return b.beautify(''.join(f.readlines()), opts) + + +def usage(): + + print("""Javascript beautifier (http://jsbeautifier.org/) + +Usage: jsbeautifier.py [options] <infile> + + <infile> can be "-", which means stdin. + <outfile> defaults to stdout + +Input options: + + -i, --stdin read input from stdin + +Output options: + + -s, --indent-size=NUMBER indentation size. (default 4). + -c, --indent-char=CHAR character to indent with. (default space). + -t, --indent-with-tabs Indent with tabs, overrides -s and -c + -d, --disable-preserve-newlines do not preserve existing line breaks. + -j, --jslint-happy more jslint-compatible output + -b, --brace-style=collapse brace style (collapse, expand, end-expand) + -k, --keep-array-indentation keep array indentation. + -o, --outfile=FILE specify a file to output to (default stdout) + -f, --keep-function-indentation Do not re-indent function bodies defined in var lines. + +Rarely needed options: + + --eval-code evaluate code if a JS interpreter is + installed. May be useful with some obfuscated + script but poses a potential security issue. + + -l, --indent-level=NUMBER initial indentation level. (default 0). + + -h, --help, --usage prints this help statement. + +""") + + + + + + +class Beautifier: + + def __init__(self, opts = default_options() ): + + self.opts = opts + self.blank_state() + + def blank_state(self): + + # internal flags + self.flags = BeautifierFlags('BLOCK') + self.flag_store = [] + self.wanted_newline = False + self.just_added_newline = False + self.do_block_just_closed = False + + if self.opts.indent_with_tabs: + self.indent_string = "\t" + else: + self.indent_string = self.opts.indent_char * self.opts.indent_size + + self.preindent_string = '' + self.last_word = '' # last TK_WORD seen + self.last_type = 'TK_START_EXPR' # last token type + self.last_text = '' # last token text + self.last_last_text = '' # pre-last token text + + self.input = None + self.output = [] # formatted javascript gets built here + + self.whitespace = ["\n", "\r", "\t", " "] + self.wordchar = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$' + self.digits = '0123456789' + self.punct = '+ - * / % & ++ -- = += -= *= /= %= == === != !== > < >= <= >> << >>> >>>= >>= <<= && &= | || ! !! , : ? ^ ^= |= ::' + self.punct += ' <?= <? ?> <%= <% %>' + self.punct = self.punct.split(' ') + + + # Words which always should start on a new line + self.line_starters = 'continue,try,throw,return,var,if,switch,case,default,for,while,break,function'.split(',') + self.set_mode('BLOCK') + + global parser_pos + parser_pos = 0 + + + def beautify(self, s, opts = None ): + + if opts != None: + self.opts = opts + + + if self.opts.brace_style not in ['expand', 'collapse', 'end-expand']: + raise(Exception('opts.brace_style must be "expand", "collapse" or "end-expand".')) + + self.blank_state() + + while s and s[0] in [' ', '\t']: + self.preindent_string += s[0] + s = s[1:] + + #self.input = self.unpack(s, opts.eval_code) + # CORTESI + self.input = s + + parser_pos = 0 + while True: + token_text, token_type = self.get_next_token() + #print (token_text, token_type, self.flags.mode) + if token_type == 'TK_EOF': + break + + handlers = { + 'TK_START_EXPR': self.handle_start_expr, + 'TK_END_EXPR': self.handle_end_expr, + 'TK_START_BLOCK': self.handle_start_block, + 'TK_END_BLOCK': self.handle_end_block, + 'TK_WORD': self.handle_word, + 'TK_SEMICOLON': self.handle_semicolon, + 'TK_STRING': self.handle_string, + 'TK_EQUALS': self.handle_equals, + 'TK_OPERATOR': self.handle_operator, + 'TK_BLOCK_COMMENT': self.handle_block_comment, + 'TK_INLINE_COMMENT': self.handle_inline_comment, + 'TK_COMMENT': self.handle_comment, + 'TK_UNKNOWN': self.handle_unknown, + } + + handlers[token_type](token_text) + + self.last_last_text = self.last_text + self.last_type = token_type + self.last_text = token_text + + sweet_code = self.preindent_string + re.sub('[\n ]+$', '', ''.join(self.output)) + return sweet_code + + def unpack(self, source, evalcode=False): + import jsbeautifier.unpackers as unpackers + try: + return unpackers.run(source, evalcode) + except unpackers.UnpackingError as error: + print('error:', error) + return '' + + def trim_output(self, eat_newlines = False): + while len(self.output) \ + and ( + self.output[-1] == ' '\ + or self.output[-1] == self.indent_string \ + or self.output[-1] == self.preindent_string \ + or (eat_newlines and self.output[-1] in ['\n', '\r'])): + self.output.pop() + + def is_special_word(self, s): + return s in ['case', 'return', 'do', 'if', 'throw', 'else']; + + def is_array(self, mode): + return mode in ['[EXPRESSION]', '[INDENDED-EXPRESSION]'] + + + def is_expression(self, mode): + return mode in ['[EXPRESSION]', '[INDENDED-EXPRESSION]', '(EXPRESSION)', '(FOR-EXPRESSION)', '(COND-EXPRESSION)'] + + + def append_newline_forced(self): + old_array_indentation = self.opts.keep_array_indentation + self.opts.keep_array_indentation = False + self.append_newline() + self.opts.keep_array_indentation = old_array_indentation + + def append_newline(self, ignore_repeated = True): + + self.flags.eat_next_space = False + + if self.opts.keep_array_indentation and self.is_array(self.flags.mode): + return + + self.flags.if_line = False + self.trim_output() + + if len(self.output) == 0: + # no newline on start of file + return + + if self.output[-1] != '\n' or not ignore_repeated: + self.just_added_newline = True + self.output.append('\n') + + if self.preindent_string: + self.output.append(self.preindent_string) + + for i in range(self.flags.indentation_level): + self.output.append(self.indent_string) + + if self.flags.var_line and self.flags.var_line_reindented: + self.output.append(self.indent_string) + + + def append(self, s): + if s == ' ': + # do not add just a single space after the // comment, ever + if self.last_type == 'TK_COMMENT': + return self.append_newline() + + # make sure only single space gets drawn + if self.flags.eat_next_space: + self.flags.eat_next_space = False + elif len(self.output) and self.output[-1] not in [' ', '\n', self.indent_string]: + self.output.append(' ') + else: + self.just_added_newline = False + self.flags.eat_next_space = False + self.output.append(s) + + + def indent(self): + self.flags.indentation_level = self.flags.indentation_level + 1 + + + def remove_indent(self): + if len(self.output) and self.output[-1] in [self.indent_string, self.preindent_string]: + self.output.pop() + + + def set_mode(self, mode): + + prev = BeautifierFlags('BLOCK') + + if self.flags: + self.flag_store.append(self.flags) + prev = self.flags + + self.flags = BeautifierFlags(mode) + + if len(self.flag_store) == 1: + self.flags.indentation_level = 0 + else: + self.flags.indentation_level = prev.indentation_level + if prev.var_line and prev.var_line_reindented: + self.flags.indentation_level = self.flags.indentation_level + 1 + self.flags.previous_mode = prev.mode + + + def restore_mode(self): + self.do_block_just_closed = self.flags.mode == 'DO_BLOCK' + if len(self.flag_store) > 0: + mode = self.flags.mode + self.flags = self.flag_store.pop() + self.flags.previous_mode = mode + + + def get_next_token(self): + + global parser_pos + + self.n_newlines = 0 + + if parser_pos >= len(self.input): + return '', 'TK_EOF' + + self.wanted_newline = False + c = self.input[parser_pos] + parser_pos += 1 + + keep_whitespace = self.opts.keep_array_indentation and self.is_array(self.flags.mode) + + if keep_whitespace: + # slight mess to allow nice preservation of array indentation and reindent that correctly + # first time when we get to the arrays: + # var a = [ + # ....'something' + # we make note of whitespace_count = 4 into flags.indentation_baseline + # so we know that 4 whitespaces in original source match indent_level of reindented source + # + # and afterwards, when we get to + # 'something, + # .......'something else' + # we know that this should be indented to indent_level + (7 - indentation_baseline) spaces + + whitespace_count = 0 + while c in self.whitespace: + if c == '\n': + self.trim_output() + self.output.append('\n') + self.just_added_newline = True + whitespace_count = 0 + elif c == '\t': + whitespace_count += 4 + elif c == '\r': + pass + else: + whitespace_count += 1 + + if parser_pos >= len(self.input): + return '', 'TK_EOF' + + c = self.input[parser_pos] + parser_pos += 1 + + if self.flags.indentation_baseline == -1: + + self.flags.indentation_baseline = whitespace_count + + if self.just_added_newline: + for i in range(self.flags.indentation_level + 1): + self.output.append(self.indent_string) + + if self.flags.indentation_baseline != -1: + for i in range(whitespace_count - self.flags.indentation_baseline): + self.output.append(' ') + + else: # not keep_whitespace + while c in self.whitespace: + if c == '\n': + if self.opts.max_preserve_newlines == 0 or self.opts.max_preserve_newlines > self.n_newlines: + self.n_newlines += 1 + + if parser_pos >= len(self.input): + return '', 'TK_EOF' + + c = self.input[parser_pos] + parser_pos += 1 + + if self.opts.preserve_newlines and self.n_newlines > 1: + for i in range(self.n_newlines): + self.append_newline(i == 0) + self.just_added_newline = True + + self.wanted_newline = self.n_newlines > 0 + + + if c in self.wordchar: + if parser_pos < len(self.input): + while self.input[parser_pos] in self.wordchar: + c = c + self.input[parser_pos] + parser_pos += 1 + if parser_pos == len(self.input): + break + + # small and surprisingly unugly hack for 1E-10 representation + if parser_pos != len(self.input) and self.input[parser_pos] in '+-' \ + and re.match('^[0-9]+[Ee]$', c): + + sign = self.input[parser_pos] + parser_pos += 1 + t = self.get_next_token() + c += sign + t[0] + return c, 'TK_WORD' + + if c == 'in': # in is an operator, need to hack + return c, 'TK_OPERATOR' + + if self.wanted_newline and \ + self.last_type != 'TK_OPERATOR' and\ + self.last_type != 'TK_EQUALS' and\ + not self.flags.if_line and \ + (self.opts.preserve_newlines or self.last_text != 'var'): + self.append_newline() + + return c, 'TK_WORD' + + if c in '([': + return c, 'TK_START_EXPR' + + if c in ')]': + return c, 'TK_END_EXPR' + + if c == '{': + return c, 'TK_START_BLOCK' + + if c == '}': + return c, 'TK_END_BLOCK' + + if c == ';': + return c, 'TK_SEMICOLON' + + if c == '/': + comment = '' + inline_comment = True + comment_mode = 'TK_INLINE_COMMENT' + if self.input[parser_pos] == '*': # peek /* .. */ comment + parser_pos += 1 + if parser_pos < len(self.input): + while not (self.input[parser_pos] == '*' and \ + parser_pos + 1 < len(self.input) and \ + self.input[parser_pos + 1] == '/')\ + and parser_pos < len(self.input): + c = self.input[parser_pos] + comment += c + if c in '\r\n': + comment_mode = 'TK_BLOCK_COMMENT' + parser_pos += 1 + if parser_pos >= len(self.input): + break + parser_pos += 2 + return '/*' + comment + '*/', comment_mode + if self.input[parser_pos] == '/': # peek // comment + comment = c + while self.input[parser_pos] not in '\r\n': + comment += self.input[parser_pos] + parser_pos += 1 + if parser_pos >= len(self.input): + break + parser_pos += 1 + if self.wanted_newline: + self.append_newline() + return comment, 'TK_COMMENT' + + + + if c == "'" or c == '"' or \ + (c == '/' and ((self.last_type == 'TK_WORD' and self.is_special_word(self.last_text)) or \ + (self.last_type == 'TK_END_EXPR' and self.flags.previous_mode in ['(FOR-EXPRESSION)', '(COND-EXPRESSION)']) or \ + (self.last_type in ['TK_COMMENT', 'TK_START_EXPR', 'TK_START_BLOCK', 'TK_END_BLOCK', 'TK_OPERATOR', + 'TK_EQUALS', 'TK_EOF', 'TK_SEMICOLON']))): + sep = c + esc = False + resulting_string = c + in_char_class = False + + if parser_pos < len(self.input): + if sep == '/': + # handle regexp + in_char_class = False + while esc or in_char_class or self.input[parser_pos] != sep: + resulting_string += self.input[parser_pos] + if not esc: + esc = self.input[parser_pos] == '\\' + if self.input[parser_pos] == '[': + in_char_class = True + elif self.input[parser_pos] == ']': + in_char_class = False + else: + esc = False + parser_pos += 1 + if parser_pos >= len(self.input): + # incomplete regex when end-of-file reached + # bail out with what has received so far + return resulting_string, 'TK_STRING' + else: + # handle string + while esc or self.input[parser_pos] != sep: + resulting_string += self.input[parser_pos] + if not esc: + esc = self.input[parser_pos] == '\\' + else: + esc = False + parser_pos += 1 + if parser_pos >= len(self.input): + # incomplete string when end-of-file reached + # bail out with what has received so far + return resulting_string, 'TK_STRING' + + + parser_pos += 1 + resulting_string += sep + if sep == '/': + # regexps may have modifiers /regexp/MOD, so fetch those too + while parser_pos < len(self.input) and self.input[parser_pos] in self.wordchar: + resulting_string += self.input[parser_pos] + parser_pos += 1 + return resulting_string, 'TK_STRING' + + if c == '#': + + # she-bang + if len(self.output) == 0 and len(self.input) > 1 and self.input[parser_pos] == '!': + resulting_string = c + while parser_pos < len(self.input) and c != '\n': + c = self.input[parser_pos] + resulting_string += c + parser_pos += 1 + self.output.append(resulting_string.strip() + "\n") + self.append_newline() + return self.get_next_token() + + + # Spidermonkey-specific sharp variables for circular references + # https://developer.mozilla.org/En/Sharp_variables_in_JavaScript + # http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp around line 1935 + sharp = '#' + if parser_pos < len(self.input) and self.input[parser_pos] in self.digits: + while True: + c = self.input[parser_pos] + sharp += c + parser_pos += 1 + if parser_pos >= len(self.input) or c == '#' or c == '=': + break + if c == '#' or parser_pos >= len(self.input): + pass + elif self.input[parser_pos] == '[' and self.input[parser_pos + 1] == ']': + sharp += '[]' + parser_pos += 2 + elif self.input[parser_pos] == '{' and self.input[parser_pos + 1] == '}': + sharp += '{}' + parser_pos += 2 + return sharp, 'TK_WORD' + + if c == '<' and self.input[parser_pos - 1 : parser_pos + 3] == '<!--': + parser_pos += 3 + c = '<!--' + while parser_pos < len(self.input) and self.input[parser_pos] != '\n': + c += self.input[parser_pos] + parser_pos += 1 + self.flags.in_html_comment = True + return c, 'TK_COMMENT' + + if c == '-' and self.flags.in_html_comment and self.input[parser_pos - 1 : parser_pos + 2] == '-->': + self.flags.in_html_comment = False + parser_pos += 2 + if self.wanted_newline: + self.append_newline() + return '-->', 'TK_COMMENT' + + if c in self.punct: + while parser_pos < len(self.input) and c + self.input[parser_pos] in self.punct: + c += self.input[parser_pos] + parser_pos += 1 + if parser_pos >= len(self.input): + break + if c == '=': + return c, 'TK_EQUALS' + else: + return c, 'TK_OPERATOR' + return c, 'TK_UNKNOWN' + + + + def handle_start_expr(self, token_text): + if token_text == '[': + if self.last_type == 'TK_WORD' or self.last_text == ')': + if self.last_text in self.line_starters: + self.append(' ') + self.set_mode('(EXPRESSION)') + self.append(token_text) + return + + if self.flags.mode in ['[EXPRESSION]', '[INDENTED-EXPRESSION]']: + if self.last_last_text == ']' and self.last_text == ',': + # ], [ goes to a new line + if self.flags.mode == '[EXPRESSION]': + self.flags.mode = '[INDENTED-EXPRESSION]' + if not self.opts.keep_array_indentation: + self.indent() + self.set_mode('[EXPRESSION]') + if not self.opts.keep_array_indentation: + self.append_newline() + elif self.last_text == '[': + if self.flags.mode == '[EXPRESSION]': + self.flags.mode = '[INDENTED-EXPRESSION]' + if not self.opts.keep_array_indentation: + self.indent() + self.set_mode('[EXPRESSION]') + + if not self.opts.keep_array_indentation: + self.append_newline() + else: + self.set_mode('[EXPRESSION]') + else: + self.set_mode('[EXPRESSION]') + else: + if self.last_text == 'for': + self.set_mode('(FOR-EXPRESSION)') + elif self.last_text in ['if', 'while']: + self.set_mode('(COND-EXPRESSION)') + else: + self.set_mode('(EXPRESSION)') + + + if self.last_text == ';' or self.last_type == 'TK_START_BLOCK': + self.append_newline() + elif self.last_type in ['TK_END_EXPR', 'TK_START_EXPR', 'TK_END_BLOCK'] or self.last_text == '.': + # do nothing on (( and )( and ][ and ]( and .( + if self.wanted_newline: + self.append_newline(); + elif self.last_type not in ['TK_WORD', 'TK_OPERATOR']: + self.append(' ') + elif self.last_word == 'function' or self.last_word == 'typeof': + # function() vs function (), typeof() vs typeof () + if self.opts.jslint_happy: + self.append(' ') + elif self.last_text in self.line_starters or self.last_text == 'catch': + self.append(' ') + + self.append(token_text) + + + def handle_end_expr(self, token_text): + if token_text == ']': + if self.opts.keep_array_indentation: + if self.last_text == '}': + self.remove_indent() + self.append(token_text) + self.restore_mode() + return + else: + if self.flags.mode == '[INDENTED-EXPRESSION]': + if self.last_text == ']': + self.restore_mode() + self.append_newline() + self.append(token_text) + return + self.restore_mode() + self.append(token_text) + + + def handle_start_block(self, token_text): + if self.last_word == 'do': + self.set_mode('DO_BLOCK') + else: + self.set_mode('BLOCK') + + if self.opts.brace_style == 'expand': + if self.last_type != 'TK_OPERATOR': + if self.last_text == '=' or (self.is_special_word(self.last_text) and self.last_text != 'else'): + self.append(' ') + else: + self.append_newline(True) + + self.append(token_text) + self.indent() + else: + if self.last_type not in ['TK_OPERATOR', 'TK_START_EXPR']: + if self.last_type == 'TK_START_BLOCK': + self.append_newline() + else: + self.append(' ') + else: + # if TK_OPERATOR or TK_START_EXPR + if self.is_array(self.flags.previous_mode) and self.last_text == ',': + if self.last_last_text == '}': + self.append(' ') + else: + self.append_newline() + self.indent() + self.append(token_text) + + + def handle_end_block(self, token_text): + self.restore_mode() + if self.opts.brace_style == 'expand': + if self.last_text != '{': + self.append_newline() + else: + if self.last_type == 'TK_START_BLOCK': + if self.just_added_newline: + self.remove_indent() + else: + # {} + self.trim_output() + else: + if self.is_array(self.flags.mode) and self.opts.keep_array_indentation: + self.opts.keep_array_indentation = False + self.append_newline() + self.opts.keep_array_indentation = True + else: + self.append_newline() + + self.append(token_text) + + + def handle_word(self, token_text): + if self.do_block_just_closed: + self.append(' ') + self.append(token_text) + self.append(' ') + self.do_block_just_closed = False + return + + if token_text == 'function': + + if self.flags.var_line: + self.flags.var_line_reindented = not self.opts.keep_function_indentation + if (self.just_added_newline or self.last_text == ';') and self.last_text != '{': + # make sure there is a nice clean space of at least one blank line + # before a new function definition + have_newlines = self.n_newlines + if not self.just_added_newline: + have_newlines = 0 + if not self.opts.preserve_newlines: + have_newlines = 1 + for i in range(2 - have_newlines): + self.append_newline(False) + + if token_text in ['case', 'default']: + if self.last_text == ':': + self.remove_indent() + else: + self.flags.indentation_level -= 1 + self.append_newline() + self.flags.indentation_level += 1 + self.append(token_text) + self.flags.in_case = True + return + + prefix = 'NONE' + + if self.last_type == 'TK_END_BLOCK': + if token_text not in ['else', 'catch', 'finally']: + prefix = 'NEWLINE' + else: + if self.opts.brace_style in ['expand', 'end-expand']: + prefix = 'NEWLINE' + else: + prefix = 'SPACE' + self.append(' ') + elif self.last_type == 'TK_SEMICOLON' and self.flags.mode in ['BLOCK', 'DO_BLOCK']: + prefix = 'NEWLINE' + elif self.last_type == 'TK_SEMICOLON' and self.is_expression(self.flags.mode): + prefix = 'SPACE' + elif self.last_type == 'TK_STRING': + prefix = 'NEWLINE' + elif self.last_type == 'TK_WORD': + if self.last_text == 'else': + # eat newlines between ...else *** some_op... + # won't preserve extra newlines in this place (if any), but don't care that much + self.trim_output(True) + prefix = 'SPACE' + elif self.last_type == 'TK_START_BLOCK': + prefix = 'NEWLINE' + elif self.last_type == 'TK_END_EXPR': + self.append(' ') + prefix = 'NEWLINE' + + if self.flags.if_line and self.last_type == 'TK_END_EXPR': + self.flags.if_line = False + + if token_text in self.line_starters: + if self.last_text == 'else': + prefix = 'SPACE' + else: + prefix = 'NEWLINE' + + if token_text == 'function' and self.last_text in ['get', 'set']: + prefix = 'SPACE' + + if token_text in ['else', 'catch', 'finally']: + if self.last_type != 'TK_END_BLOCK' \ + or self.opts.brace_style == 'expand' \ + or self.opts.brace_style == 'end-expand': + self.append_newline() + else: + self.trim_output(True) + self.append(' ') + elif prefix == 'NEWLINE': + if token_text == 'function' and (self.last_type == 'TK_START_EXPR' or self.last_text in '=,'): + # no need to force newline on "function" - + # (function... + pass + elif token_text == 'function' and self.last_text == 'new': + self.append(' ') + elif self.is_special_word(self.last_text): + # no newline between return nnn + self.append(' ') + elif self.last_type != 'TK_END_EXPR': + if (self.last_type != 'TK_START_EXPR' or token_text != 'var') and self.last_text != ':': + # no need to force newline on VAR - + # for (var x = 0... + if token_text == 'if' and self.last_word == 'else' and self.last_text != '{': + self.append(' ') + else: + self.flags.var_line = False + self.flags.var_line_reindented = False + self.append_newline() + elif token_text in self.line_starters and self.last_text != ')': + self.flags.var_line = False + self.flags.var_line_reindented = False + self.append_newline() + elif self.is_array(self.flags.mode) and self.last_text == ',' and self.last_last_text == '}': + self.append_newline() # }, in lists get a newline + elif prefix == 'SPACE': + self.append(' ') + + + self.append(token_text) + self.last_word = token_text + + if token_text == 'var': + self.flags.var_line = True + self.flags.var_line_reindented = False + self.flags.var_line_tainted = False + + + if token_text == 'if': + self.flags.if_line = True + + if token_text == 'else': + self.flags.if_line = False + + + def handle_semicolon(self, token_text): + self.append(token_text) + self.flags.var_line = False + self.flags.var_line_reindented = False + if self.flags.mode == 'OBJECT': + # OBJECT mode is weird and doesn't get reset too well. + self.flags.mode = 'BLOCK' + + + def handle_string(self, token_text): + if self.last_type == 'TK_END_EXPR' and self.flags.previous_mode in ['(COND-EXPRESSION)', '(FOR-EXPRESSION)']: + self.append(' ') + if self.last_type in ['TK_STRING', 'TK_START_BLOCK', 'TK_END_BLOCK', 'TK_SEMICOLON']: + self.append_newline() + elif self.last_type == 'TK_WORD': + self.append(' ') + + # Try to replace readable \x-encoded characters with their equivalent, + # if it is possible (e.g. '\x41\x42\x43\x01' becomes 'ABC\x01'). + def unescape(match): + block, code = match.group(0, 1) + char = chr(int(code, 16)) + if block.count('\\') == 1 and char in string.printable: + return char + return block + + token_text = re.sub(r'\\{1,2}x([a-fA-F0-9]{2})', unescape, token_text) + + self.append(token_text) + + def handle_equals(self, token_text): + if self.flags.var_line: + # just got an '=' in a var-line, different line breaking rules will apply + self.flags.var_line_tainted = True + + self.append(' ') + self.append(token_text) + self.append(' ') + + + def handle_operator(self, token_text): + space_before = True + space_after = True + + if self.flags.var_line and token_text == ',' and self.is_expression(self.flags.mode): + # do not break on comma, for ( var a = 1, b = 2 + self.flags.var_line_tainted = False + + if self.flags.var_line and token_text == ',': + if self.flags.var_line_tainted: + self.append(token_text) + self.flags.var_line_reindented = True + self.flags.var_line_tainted = False + self.append_newline() + return + else: + self.flags.var_line_tainted = False + + if self.is_special_word(self.last_text): + # return had a special handling in TK_WORD + self.append(' ') + self.append(token_text) + return + + if token_text == ':' and self.flags.in_case: + self.append(token_text) + self.append_newline() + self.flags.in_case = False + return + + if token_text == '::': + # no spaces around the exotic namespacing syntax operator + self.append(token_text) + return + + if token_text == ',': + if self.flags.var_line: + if self.flags.var_line_tainted: + # This never happens, as it's handled previously, right? + self.append(token_text) + self.append_newline() + self.flags.var_line_tainted = False + else: + self.append(token_text) + self.append(' ') + elif self.last_type == 'TK_END_BLOCK' and self.flags.mode != '(EXPRESSION)': + self.append(token_text) + if self.flags.mode == 'OBJECT' and self.last_text == '}': + self.append_newline() + else: + self.append(' ') + else: + if self.flags.mode == 'OBJECT': + self.append(token_text) + self.append_newline() + else: + # EXPR or DO_BLOCK + self.append(token_text) + self.append(' ') + # comma handled + return + elif token_text in ['--', '++', '!'] \ + or (token_text in ['+', '-'] \ + and self.last_type in ['TK_START_BLOCK', 'TK_START_EXPR', 'TK_EQUALS', 'TK_OPERATOR']) \ + or self.last_text in self.line_starters: + + space_before = False + space_after = False + + if self.last_text == ';' and self.is_expression(self.flags.mode): + # for (;; ++i) + # ^^ + space_before = True + + if self.last_type == 'TK_WORD' and self.last_text in self.line_starters: + space_before = True + + if self.flags.mode == 'BLOCK' and self.last_text in ['{', ';']: + # { foo: --i } + # foo(): --bar + self.append_newline() + + elif token_text == '.': + # decimal digits or object.property + space_before = False + + elif token_text == ':': + if self.flags.ternary_depth == 0: + self.flags.mode = 'OBJECT' + space_before = False + else: + self.flags.ternary_depth -= 1 + elif token_text == '?': + self.flags.ternary_depth += 1 + + if space_before: + self.append(' ') + + self.append(token_text) + + if space_after: + self.append(' ') + + + + def handle_block_comment(self, token_text): + + lines = token_text.replace('\x0d', '').split('\x0a') + # all lines start with an asterisk? that's a proper box comment + if not any(l for l in lines[1:] if ( l.strip() == '' or (l.lstrip())[0] != '*')): + self.append_newline() + self.append(lines[0]) + for line in lines[1:]: + self.append_newline() + self.append(' ' + line.strip()) + else: + # simple block comment: leave intact + if len(lines) > 1: + # multiline comment starts on a new line + self.append_newline() + else: + # single line /* ... */ comment stays on the same line + self.append(' ') + for line in lines: + self.append(line) + self.append('\n') + self.append_newline() + + + def handle_inline_comment(self, token_text): + self.append(' ') + self.append(token_text) + if self.is_expression(self.flags.mode): + self.append(' ') + else: + self.append_newline_forced() + + + def handle_comment(self, token_text): + if self.wanted_newline: + self.append_newline() + else: + self.append(' ') + + self.append(token_text) + self.append_newline_forced() + + + def handle_unknown(self, token_text): + if self.last_text in ['return', 'throw']: + self.append(' ') + + self.append(token_text) + + + + + +def main(): + + argv = sys.argv[1:] + + try: + opts, args = getopt.getopt(argv, "s:c:o:djbkil:htf", ['indent-size=','indent-char=','outfile=', 'disable-preserve-newlines', + 'jslint-happy', 'brace-style=', + 'keep-array-indentation', 'indent-level=', 'help', + 'usage', 'stdin', 'eval-code', 'indent-with-tabs', 'keep-function-indentation']) + except getopt.GetoptError: + return usage() + + js_options = default_options() + + file = None + outfile = 'stdout' + if len(args) == 1: + file = args[0] + + for opt, arg in opts: + if opt in ('--keep-array-indentation', '-k'): + js_options.keep_array_indentation = True + if opt in ('--keep-function-indentation','-f'): + js_options.keep_function_indentation = True + elif opt in ('--outfile', '-o'): + outfile = arg + elif opt in ('--indent-size', '-s'): + js_options.indent_size = int(arg) + elif opt in ('--indent-char', '-c'): + js_options.indent_char = arg + elif opt in ('--indent-with-tabs', '-t'): + js_options.indent_with_tabs = True + elif opt in ('--disable-preserve_newlines', '-d'): + js_options.preserve_newlines = False + elif opt in ('--jslint-happy', '-j'): + js_options.jslint_happy = True + elif opt in ('--eval-code'): + js_options.eval_code = True + elif opt in ('--brace-style', '-b'): + js_options.brace_style = arg + elif opt in ('--stdin', '-i'): + file = '-' + elif opt in ('--help', '--usage', '-h'): + return usage() + + if not file: + return usage() + else: + if outfile == 'stdout': + print(beautify_file(file, js_options)) + else: + with open(outfile, 'w') as f: + f.write(beautify_file(file, js_options) + '\n') + diff --git a/libmproxy/contrib/jsbeautifier/unpackers/README.specs.mkd b/libmproxy/contrib/jsbeautifier/unpackers/README.specs.mkd new file mode 100644 index 00000000..e937b762 --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/unpackers/README.specs.mkd @@ -0,0 +1,25 @@ +# UNPACKERS SPECIFICATIONS + +Nothing very difficult: an unpacker is a submodule placed in the directory +where this file was found. Each unpacker must define three symbols: + + * `PRIORITY` : integer number expressing the priority in applying this + unpacker. Lower number means higher priority. + Makes sense only if a source file has been packed with + more than one packer. + * `detect(source)` : returns `True` if source is packed, otherwise, `False`. + * `unpack(source)` : takes a `source` string and unpacks it. Must always return + valid JavaScript. That is to say, your code should look + like: + +``` +if detect(source): + return do_your_fancy_things_with(source) +else: + return source +``` + +*You can safely define any other symbol in your module, as it will be ignored.* + +`__init__` code will automatically load new unpackers, without any further step +to be accomplished. Simply drop it in this directory. diff --git a/libmproxy/contrib/jsbeautifier/unpackers/__init__.py b/libmproxy/contrib/jsbeautifier/unpackers/__init__.py new file mode 100644 index 00000000..6d136533 --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/unpackers/__init__.py @@ -0,0 +1,67 @@ +# +# General code for JSBeautifier unpackers infrastructure. See README.specs +# written by Stefano Sanfilippo <a.little.coder@gmail.com> +# + +"""General code for JSBeautifier unpackers infrastructure.""" + +import pkgutil +import re +from jsbeautifier.unpackers import evalbased + +# NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js! +BLACKLIST = ['jsbeautifier.unpackers.evalbased'] + +class UnpackingError(Exception): + """Badly packed source or general error. Argument is a + meaningful description.""" + pass + +def getunpackers(): + """Scans the unpackers dir, finds unpackers and add them to UNPACKERS list. + An unpacker will be loaded only if it is a valid python module (name must + adhere to naming conventions) and it is not blacklisted (i.e. inserted + into BLACKLIST.""" + path = __path__ + prefix = __name__ + '.' + unpackers = [] + interface = ['unpack', 'detect', 'PRIORITY'] + for _importer, modname, _ispkg in pkgutil.iter_modules(path, prefix): + if 'tests' not in modname and modname not in BLACKLIST: + try: + module = __import__(modname, fromlist=interface) + except ImportError: + raise UnpackingError('Bad unpacker: %s' % modname) + else: + unpackers.append(module) + + return sorted(unpackers, key = lambda mod: mod.PRIORITY) + +UNPACKERS = getunpackers() + +def run(source, evalcode=False): + """Runs the applicable unpackers and return unpacked source as a string.""" + for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]: + source = unpacker.unpack(source) + if evalcode and evalbased.detect(source): + source = evalbased.unpack(source) + return source + +def filtercomments(source): + """NOT USED: strips trailing comments and put them at the top.""" + trailing_comments = [] + comment = True + + while comment: + if re.search(r'^\s*\/\*', source): + comment = source[0, source.index('*/') + 2] + elif re.search(r'^\s*\/\/', source): + comment = re.search(r'^\s*\/\/', source).group(0) + else: + comment = None + + if comment: + source = re.sub(r'^\s+', '', source[len(comment):]) + trailing_comments.append(comment) + + return '\n'.join(trailing_comments) + source diff --git a/libmproxy/contrib/jsbeautifier/unpackers/evalbased.py b/libmproxy/contrib/jsbeautifier/unpackers/evalbased.py new file mode 100644 index 00000000..b17d926e --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/unpackers/evalbased.py @@ -0,0 +1,39 @@ +# +# Unpacker for eval() based packers, a part of javascript beautifier +# by Einar Lielmanis <einar@jsbeautifier.org> +# +# written by Stefano Sanfilippo <a.little.coder@gmail.com> +# +# usage: +# +# if detect(some_string): +# unpacked = unpack(some_string) +# + +"""Unpacker for eval() based packers: runs JS code and returns result. +Works only if a JS interpreter (e.g. Mozilla's Rhino) is installed and +properly set up on host.""" + +from subprocess import PIPE, Popen + +PRIORITY = 3 + +def detect(source): + """Detects if source is likely to be eval() packed.""" + return source.strip().lower().startswith('eval(function(') + +def unpack(source): + """Runs source and return resulting code.""" + return jseval('print %s;' % source[4:]) if detect(source) else source + +# In case of failure, we'll just return the original, without crashing on user. +def jseval(script): + """Run code in the JS interpreter and return output.""" + try: + interpreter = Popen(['js'], stdin=PIPE, stdout=PIPE) + except OSError: + return script + result, errors = interpreter.communicate(script) + if interpreter.poll() or errors: + return script + return result diff --git a/libmproxy/contrib/jsbeautifier/unpackers/javascriptobfuscator.py b/libmproxy/contrib/jsbeautifier/unpackers/javascriptobfuscator.py new file mode 100644 index 00000000..aa4344a3 --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/unpackers/javascriptobfuscator.py @@ -0,0 +1,58 @@ +# +# simple unpacker/deobfuscator for scripts messed up with +# javascriptobfuscator.com +# +# written by Einar Lielmanis <einar@jsbeautifier.org> +# rewritten in Python by Stefano Sanfilippo <a.little.coder@gmail.com> +# +# Will always return valid javascript: if `detect()` is false, `code` is +# returned, unmodified. +# +# usage: +# +# if javascriptobfuscator.detect(some_string): +# some_string = javascriptobfuscator.unpack(some_string) +# + +"""deobfuscator for scripts messed up with JavascriptObfuscator.com""" + +import re + +PRIORITY = 1 + +def smartsplit(code): + """Split `code` at " symbol, only if it is not escaped.""" + strings = [] + pos = 0 + while pos < len(code): + if code[pos] == '"': + word = '' # new word + pos += 1 + while pos < len(code): + if code[pos] == '"': + break + if code[pos] == '\\': + word += '\\' + pos += 1 + word += code[pos] + pos += 1 + strings.append('"%s"' % word) + pos += 1 + return strings + +def detect(code): + """Detects if `code` is JavascriptObfuscator.com packed.""" + # prefer `is not` idiom, so that a true boolean is returned + return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None) + +def unpack(code): + """Unpacks JavascriptObfuscator.com packed code.""" + if detect(code): + matches = re.search(r'var (_0x[a-f\d]+) ?\= ?\[(.*?)\];', code) + if matches: + variable = matches.group(1) + dictionary = smartsplit(matches.group(2)) + code = code[len(matches.group(0)):] + for key, value in enumerate(dictionary): + code = code.replace(r'%s[%s]' % (variable, key), value) + return code diff --git a/libmproxy/contrib/jsbeautifier/unpackers/myobfuscate.py b/libmproxy/contrib/jsbeautifier/unpackers/myobfuscate.py new file mode 100644 index 00000000..52e10034 --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/unpackers/myobfuscate.py @@ -0,0 +1,86 @@ +# +# deobfuscator for scripts messed up with myobfuscate.com +# by Einar Lielmanis <einar@jsbeautifier.org> +# +# written by Stefano Sanfilippo <a.little.coder@gmail.com> +# +# usage: +# +# if detect(some_string): +# unpacked = unpack(some_string) +# + +# CAVEAT by Einar Lielmanis + +# +# You really don't want to obfuscate your scripts there: they're tracking +# your unpackings, your script gets turned into something like this, +# as of 2011-08-26: +# +# var _escape = 'your_script_escaped'; +# var _111 = document.createElement('script'); +# _111.src = 'http://api.www.myobfuscate.com/?getsrc=ok' + +# '&ref=' + encodeURIComponent(document.referrer) + +# '&url=' + encodeURIComponent(document.URL); +# var 000 = document.getElementsByTagName('head')[0]; +# 000.appendChild(_111); +# document.write(unescape(_escape)); +# + +"""Deobfuscator for scripts messed up with MyObfuscate.com""" + +import re +import base64 + +# Python 2 retrocompatibility +# pylint: disable=F0401 +# pylint: disable=E0611 +try: + from urllib import unquote +except ImportError: + from urllib.parse import unquote + +from jsbeautifier.unpackers import UnpackingError + +PRIORITY = 1 + +CAVEAT = """// +// Unpacker warning: be careful when using myobfuscate.com for your projects: +// scripts obfuscated by the free online version call back home. +// + +""" + +SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F' + r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65' + r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75' + r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B' + r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78' + r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","' + r'\x6C\x65\x6E\x67\x74\x68"]') + +def detect(source): + """Detects MyObfuscate.com packer.""" + return SIGNATURE in source + +def unpack(source): + """Unpacks js code packed with MyObfuscate.com""" + if not detect(source): + return source + payload = unquote(_filter(source)) + match = re.search(r"^var _escape\='<script>(.*)<\/script>'", + payload, re.DOTALL) + polished = match.group(1) if match else source + return CAVEAT + polished + +def _filter(source): + """Extracts and decode payload (original file) from `source`""" + try: + varname = re.search(r'eval\(\w+\(\w+\((\w+)\)\)\);', source).group(1) + reverse = re.search(r"var +%s *\= *'(.*)';" % varname, source).group(1) + except AttributeError: + raise UnpackingError('Malformed MyObfuscate data.') + try: + return base64.b64decode(reverse[::-1].encode('utf8')).decode('utf8') + except TypeError: + raise UnpackingError('MyObfuscate payload is not base64-encoded.') diff --git a/libmproxy/contrib/jsbeautifier/unpackers/packer.py b/libmproxy/contrib/jsbeautifier/unpackers/packer.py new file mode 100644 index 00000000..a79d3ed5 --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/unpackers/packer.py @@ -0,0 +1,104 @@ +# +# Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier +# by Einar Lielmanis <einar@jsbeautifier.org> +# +# written by Stefano Sanfilippo <a.little.coder@gmail.com> +# +# usage: +# +# if detect(some_string): +# unpacked = unpack(some_string) +# + +"""Unpacker for Dean Edward's p.a.c.k.e.r""" + +import re +import string +from jsbeautifier.unpackers import UnpackingError + +PRIORITY = 1 + +def detect(source): + """Detects whether `source` is P.A.C.K.E.R. coded.""" + return source.replace(' ', '').startswith('eval(function(p,a,c,k,e,r') + +def unpack(source): + """Unpacks P.A.C.K.E.R. packed js code.""" + payload, symtab, radix, count = _filterargs(source) + + if count != len(symtab): + raise UnpackingError('Malformed p.a.c.k.e.r. symtab.') + + try: + unbase = Unbaser(radix) + except TypeError: + raise UnpackingError('Unknown p.a.c.k.e.r. encoding.') + + def lookup(match): + """Look up symbols in the synthetic symtab.""" + word = match.group(0) + return symtab[unbase(word)] or word + + source = re.sub(r'\b\w+\b', lookup, payload) + return _replacestrings(source) + +def _filterargs(source): + """Juice from a source file the four args needed by decoder.""" + argsregex = (r"}\('(.*)', *(\d+), *(\d+), *'(.*)'\." + r"split\('\|'\), *(\d+), *(.*)\)\)") + args = re.search(argsregex, source, re.DOTALL).groups() + + try: + return args[0], args[3].split('|'), int(args[1]), int(args[2]) + except ValueError: + raise UnpackingError('Corrupted p.a.c.k.e.r. data.') + +def _replacestrings(source): + """Strip string lookup table (list) and replace values in source.""" + match = re.search(r'var *(_\w+)\=\["(.*?)"\];', source, re.DOTALL) + + if match: + varname, strings = match.groups() + startpoint = len(match.group(0)) + lookup = strings.split('","') + variable = '%s[%%d]' % varname + for index, value in enumerate(lookup): + source = source.replace(variable % index, '"%s"' % value) + return source[startpoint:] + return source + + +class Unbaser(object): + """Functor for a given base. Will efficiently convert + strings to natural numbers.""" + ALPHABET = { + 62 : '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', + 95 : (' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '[\]^_`abcdefghijklmnopqrstuvwxyz{|}~') + } + + def __init__(self, base): + self.base = base + + # If base can be handled by int() builtin, let it do it for us + if 2 <= base <= 36: + self.unbase = lambda string: int(string, base) + else: + # Build conversion dictionary cache + try: + self.dictionary = dict((cipher, index) for + index, cipher in enumerate(self.ALPHABET[base])) + except KeyError: + raise TypeError('Unsupported base encoding.') + + self.unbase = self._dictunbaser + + def __call__(self, string): + return self.unbase(string) + + def _dictunbaser(self, string): + """Decodes a value to an integer.""" + ret = 0 + for index, cipher in enumerate(string[::-1]): + ret += (self.base ** index) * self.dictionary[cipher] + return ret diff --git a/libmproxy/contrib/jsbeautifier/unpackers/urlencode.py b/libmproxy/contrib/jsbeautifier/unpackers/urlencode.py new file mode 100644 index 00000000..72d2bd1c --- /dev/null +++ b/libmproxy/contrib/jsbeautifier/unpackers/urlencode.py @@ -0,0 +1,34 @@ +# +# Trivial bookmarklet/escaped script detector for the javascript beautifier +# written by Einar Lielmanis <einar@jsbeautifier.org> +# rewritten in Python by Stefano Sanfilippo <a.little.coder@gmail.com> +# +# Will always return valid javascript: if `detect()` is false, `code` is +# returned, unmodified. +# +# usage: +# +# some_string = urlencode.unpack(some_string) +# + +"""Bookmarklet/escaped script unpacker.""" + +# Python 2 retrocompatibility +# pylint: disable=F0401 +# pylint: disable=E0611 +try: + from urllib import unquote_plus +except ImportError: + from urllib.parse import unquote_plus + +PRIORITY = 0 + +def detect(code): + """Detects if a scriptlet is urlencoded.""" + # the fact that script doesn't contain any space, but has %20 instead + # should be sufficient check for now. + return ' ' not in code and ('%20' in code or code.count('%') > 3) + +def unpack(code): + """URL decode `code` source string.""" + return unquote_plus(code) if detect(code) else code diff --git a/libmproxy/utils.py b/libmproxy/utils.py index 97aa1e55..bcf9d141 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -58,11 +58,10 @@ def cleanBin(s): parts = [] for i in s: o = ord(i) - if o > 31 and o < 127: + if (o > 31 and o < 127) or i in "\n\r\t": parts.append(i) else: - if i not in "\n\r\t": - parts.append(".") + parts.append(".") return "".join(parts) diff --git a/test/test_console_contentview.py b/test/test_console_contentview.py index cbb19678..e7258545 100644 --- a/test/test_console_contentview.py +++ b/test/test_console_contentview.py @@ -9,7 +9,7 @@ class uContentView(libpry.AutoTree): assert not txt cv.trailer(cv.VIEW_CUTOFF + 10, txt) assert txt - + def test_get_view_func(self): f = cv.get_view_func( cv.VIEW_CONTENT_HEX, @@ -75,6 +75,11 @@ class uContentView(libpry.AutoTree): def test_view_raw(self): assert cv.view_raw([], "foo") + def test_view_javascript(self): + assert cv.view_javascript([], "[1, 2, 3]") + assert cv.view_javascript([], "[1, 2, 3") + assert cv.view_javascript([], "function(a){[1, 2, 3]}") + def test_view_raw(self): assert cv.view_hex([], "foo") |