diff options
author | Shadab Zafar <dufferzafar0@gmail.com> | 2016-06-21 23:53:59 +0530 |
---|---|---|
committer | Shadab Zafar <dufferzafar0@gmail.com> | 2016-06-21 23:53:59 +0530 |
commit | 0a535509555ec76569ff69bc1553adbc501918d3 (patch) | |
tree | 60a403429f83070cc82c4f5cc974508f870248fd /mitmproxy/contrib | |
parent | 10f4a36a5a845fbf49b519ff674acd1e6f132078 (diff) | |
download | mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.tar.gz mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.tar.bz2 mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.zip |
mitmproxy.tnetstring -> mitmproxy.contrib.tnetstring
Diffstat (limited to 'mitmproxy/contrib')
-rw-r--r-- | mitmproxy/contrib/tnetstring.py | 375 |
1 files changed, 375 insertions, 0 deletions
diff --git a/mitmproxy/contrib/tnetstring.py b/mitmproxy/contrib/tnetstring.py new file mode 100644 index 00000000..9bf20b09 --- /dev/null +++ b/mitmproxy/contrib/tnetstring.py @@ -0,0 +1,375 @@ +# imported from the tnetstring project: https://github.com/rfk/tnetstring +# +# Copyright (c) 2011 Ryan Kelly +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +""" +tnetstring: data serialization using typed netstrings +====================================================== + + +This is a data serialization library. It's a lot like JSON but it uses a +new syntax called "typed netstrings" that Zed has proposed for use in the +Mongrel2 webserver. It's designed to be simpler and easier to implement +than JSON, with a happy consequence of also being faster in many cases. + +An ordinary netstring is a blob of data prefixed with its length and postfixed +with a sanity-checking comma. The string "hello world" encodes like this:: + + 11:hello world, + +Typed netstrings add other datatypes by replacing the comma with a type tag. +Here's the integer 12345 encoded as a tnetstring:: + + 5:12345# + +And here's the list [12345,True,0] which mixes integers and bools:: + + 19:5:12345#4:true!1:0#] + +Simple enough? This module gives you the following functions: + + :dump: dump an object as a tnetstring to a file + :dumps: dump an object as a tnetstring to a string + :load: load a tnetstring-encoded object from a file + :loads: load a tnetstring-encoded object from a string + :pop: pop a tnetstring-encoded object from the front of a string + +Note that since parsing a tnetstring requires reading all the data into memory +at once, there's no efficiency gain from using the file-based versions of these +functions. They're only here so you can use load() to read precisely one +item from a file or socket without consuming any extra data. + +By default tnetstrings work only with byte strings, not unicode. If you want +unicode strings then pass an optional encoding to the various functions, +like so:: + + >>> print(repr(tnetstring.loads("2:\\xce\\xb1,"))) + '\\xce\\xb1' + >>> + >>> print(repr(tnetstring.loads("2:\\xce\\xb1,","utf8"))) + u'\u03b1' + +""" +from collections import deque + +import six + +__ver_major__ = 0 +__ver_minor__ = 2 +__ver_patch__ = 0 +__ver_sub__ = "" +__version__ = "%d.%d.%d%s" % ( + __ver_major__, __ver_minor__, __ver_patch__, __ver_sub__) + + +def dumps(value): + """ + This function dumps a python object as a tnetstring. + """ + # This uses a deque to collect output fragments in reverse order, + # then joins them together at the end. It's measurably faster + # than creating all the intermediate strings. + # If you're reading this to get a handle on the tnetstring format, + # consider the _gdumps() function instead; it's a standard top-down + # generator that's simpler to understand but much less efficient. + q = deque() + _rdumpq(q, 0, value) + return b''.join(q) + + +def dump(value, file_handle): + """ + This function dumps a python object as a tnetstring and + writes it to the given file. + """ + file_handle.write(dumps(value)) + + +def _rdumpq(q, size, value): + """ + Dump value as a tnetstring, to a deque instance, last chunks first. + + This function generates the tnetstring representation of the given value, + pushing chunks of the output onto the given deque instance. It pushes + the last chunk first, then recursively generates more chunks. + + When passed in the current size of the string in the queue, it will return + the new size of the string in the queue. + + Operating last-chunk-first makes it easy to calculate the size written + for recursive structures without having to build their representation as + a string. This is measurably faster than generating the intermediate + strings, especially on deeply nested structures. + """ + write = q.appendleft + if value is None: + write(b'0:~') + return size + 3 + elif value is True: + write(b'4:true!') + return size + 7 + elif value is False: + write(b'5:false!') + return size + 8 + elif isinstance(value, six.integer_types): + data = str(value).encode() + ldata = len(data) + span = str(ldata).encode() + write(b'#') + write(data) + write(b':') + write(span) + return size + 2 + len(span) + ldata + elif isinstance(value, float): + # Use repr() for float rather than str(). + # It round-trips more accurately. + # Probably unnecessary in later python versions that + # use David Gay's ftoa routines. + data = repr(value).encode() + ldata = len(data) + span = str(ldata).encode() + write(b'^') + write(data) + write(b':') + write(span) + return size + 2 + len(span) + ldata + elif isinstance(value, bytes): + lvalue = len(value) + span = str(lvalue).encode() + write(b',') + write(value) + write(b':') + write(span) + return size + 2 + len(span) + lvalue + elif isinstance(value, (list, tuple)): + write(b']') + init_size = size = size + 1 + for item in reversed(value): + size = _rdumpq(q, size, item) + span = str(size - init_size).encode() + write(b':') + write(span) + return size + 1 + len(span) + elif isinstance(value, dict): + write(b'}') + init_size = size = size + 1 + for (k, v) in value.items(): + size = _rdumpq(q, size, v) + size = _rdumpq(q, size, k) + span = str(size - init_size).encode() + write(b':') + write(span) + return size + 1 + len(span) + else: + raise ValueError("unserializable object: {} ({})".format(value, type(value))) + + +def _gdumps(value): + """ + Generate fragments of value dumped as a tnetstring. + + This is the naive dumping algorithm, implemented as a generator so that + it's easy to pass to "".join() without building a new list. + + This is mainly here for comparison purposes; the _rdumpq version is + measurably faster as it doesn't have to build intermediate strins. + """ + if value is None: + yield b'0:~' + elif value is True: + yield b'4:true!' + elif value is False: + yield b'5:false!' + elif isinstance(value, six.integer_types): + data = str(value).encode() + yield str(len(data)).encode() + yield b':' + yield data + yield b'#' + elif isinstance(value, float): + data = repr(value).encode() + yield str(len(data)).encode() + yield b':' + yield data + yield b'^' + elif isinstance(value, bytes): + yield str(len(value)).encode() + yield b':' + yield value + yield b',' + elif isinstance(value, (list, tuple)): + sub = [] + for item in value: + sub.extend(_gdumps(item)) + sub = b''.join(sub) + yield str(len(sub)).encode() + yield b':' + yield sub + yield b']' + elif isinstance(value, (dict,)): + sub = [] + for (k, v) in value.items(): + sub.extend(_gdumps(k)) + sub.extend(_gdumps(v)) + sub = b''.join(sub) + yield str(len(sub)).encode() + yield b':' + yield sub + yield b'}' + else: + raise ValueError("unserializable object") + + +def loads(string): + """ + This function parses a tnetstring into a python object. + """ + # No point duplicating effort here. In the C-extension version, + # loads() is measurably faster then pop() since it can avoid + # the overhead of building a second string. + return pop(string)[0] + + +def load(file_handle): + """load(file) -> object + + This function reads a tnetstring from a file and parses it into a + python object. The file must support the read() method, and this + function promises not to read more data than necessary. + """ + # Read the length prefix one char at a time. + # Note that the netstring spec explicitly forbids padding zeros. + c = file_handle.read(1) + if not c.isdigit(): + raise ValueError("not a tnetstring: missing or invalid length prefix") + datalen = ord(c) - ord('0') + c = file_handle.read(1) + if datalen != 0: + while c.isdigit(): + datalen = (10 * datalen) + (ord(c) - ord('0')) + if datalen > 999999999: + errmsg = "not a tnetstring: absurdly large length prefix" + raise ValueError(errmsg) + c = file_handle.read(1) + if c != b':': + raise ValueError("not a tnetstring: missing or invalid length prefix") + # Now we can read and parse the payload. + # This repeats the dispatch logic of pop() so we can avoid + # re-constructing the outermost tnetstring. + data = file_handle.read(datalen) + if len(data) != datalen: + raise ValueError("not a tnetstring: length prefix too big") + tns_type = file_handle.read(1) + if tns_type == b',': + return data + if tns_type == b'#': + try: + return int(data) + except ValueError: + raise ValueError("not a tnetstring: invalid integer literal") + if tns_type == b'^': + try: + return float(data) + except ValueError: + raise ValueError("not a tnetstring: invalid float literal") + if tns_type == b'!': + if data == b'true': + return True + elif data == b'false': + return False + else: + raise ValueError("not a tnetstring: invalid boolean literal") + if tns_type == b'~': + if data: + raise ValueError("not a tnetstring: invalid null literal") + return None + if tns_type == b']': + l = [] + while data: + item, data = pop(data) + l.append(item) + return l + if tns_type == b'}': + d = {} + while data: + key, data = pop(data) + val, data = pop(data) + d[key] = val + return d + raise ValueError("unknown type tag") + + +def pop(string): + """pop(string,encoding='utf_8') -> (object, remain) + + This function parses a tnetstring into a python object. + It returns a tuple giving the parsed object and a string + containing any unparsed data from the end of the string. + """ + # Parse out data length, type and remaining string. + try: + dlen, rest = string.split(b':', 1) + dlen = int(dlen) + except ValueError: + raise ValueError("not a tnetstring: missing or invalid length prefix: {}".format(string)) + try: + data, tns_type, remain = rest[:dlen], rest[dlen:dlen + 1], rest[dlen + 1:] + except IndexError: + # This fires if len(rest) < dlen, meaning we don't need + # to further validate that data is the right length. + raise ValueError("not a tnetstring: invalid length prefix: {}".format(dlen)) + # Parse the data based on the type tag. + if tns_type == b',': + return data, remain + if tns_type == b'#': + try: + return int(data), remain + except ValueError: + raise ValueError("not a tnetstring: invalid integer literal: {}".format(data)) + if tns_type == b'^': + try: + return float(data), remain + except ValueError: + raise ValueError("not a tnetstring: invalid float literal: {}".format(data)) + if tns_type == b'!': + if data == b'true': + return True, remain + elif data == b'false': + return False, remain + else: + raise ValueError("not a tnetstring: invalid boolean literal: {}".format(data)) + if tns_type == b'~': + if data: + raise ValueError("not a tnetstring: invalid null literal") + return None, remain + if tns_type == b']': + l = [] + while data: + item, data = pop(data) + l.append(item) + return (l, remain) + if tns_type == b'}': + d = {} + while data: + key, data = pop(data) + val, data = pop(data) + d[key] = val + return d, remain + raise ValueError("unknown type tag: {}".format(tns_type)) |