mitmproxy.tnetstring -> mitmproxy.contrib.tnetstring

author: Shadab Zafar <dufferzafar0@gmail.com> 2016-06-21 23:53:59 +0530
committer: Shadab Zafar <dufferzafar0@gmail.com> 2016-06-21 23:53:59 +0530
commit: 0a535509555ec76569ff69bc1553adbc501918d3 (patch)
tree: 60a403429f83070cc82c4f5cc974508f870248fd /mitmproxy/contrib
parent: 10f4a36a5a845fbf49b519ff674acd1e6f132078 (diff)
download: mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.tar.gz
mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.tar.bz2
mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.zip
1 files changed, 375 insertions, 0 deletions
diff --git a/mitmproxy/contrib/tnetstring.py b/mitmproxy/contrib/tnetstring.py
new file mode 100644
index 00000000..9bf20b09
--- /dev/null
+++ b/mitmproxy/contrib/tnetstring.py
@@ -0,0 +1,375 @@
+# imported from the tnetstring project: https://github.com/rfk/tnetstring
+#
+# Copyright (c) 2011 Ryan Kelly
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+"""
+tnetstring:  data serialization using typed netstrings
+======================================================
+
+
+This is a data serialization library. It's a lot like JSON but it uses a
+new syntax called "typed netstrings" that Zed has proposed for use in the
+Mongrel2 webserver.  It's designed to be simpler and easier to implement
+than JSON, with a happy consequence of also being faster in many cases.
+
+An ordinary netstring is a blob of data prefixed with its length and postfixed
+with a sanity-checking comma.  The string "hello world" encodes like this::
+
+    11:hello world,
+
+Typed netstrings add other datatypes by replacing the comma with a type tag.
+Here's the integer 12345 encoded as a tnetstring::
+
+    5:12345#
+
+And here's the list [12345,True,0] which mixes integers and bools::
+
+    19:5:12345#4:true!1:0#]
+
+Simple enough?  This module gives you the following functions:
+
+    :dump:    dump an object as a tnetstring to a file
+    :dumps:   dump an object as a tnetstring to a string
+    :load:    load a tnetstring-encoded object from a file
+    :loads:   load a tnetstring-encoded object from a string
+    :pop:     pop a tnetstring-encoded object from the front of a string
+
+Note that since parsing a tnetstring requires reading all the data into memory
+at once, there's no efficiency gain from using the file-based versions of these
+functions.  They're only here so you can use load() to read precisely one
+item from a file or socket without consuming any extra data.
+
+By default tnetstrings work only with byte strings, not unicode.  If you want
+unicode strings then pass an optional encoding to the various functions,
+like so::
+
+    >>> print(repr(tnetstring.loads("2:\\xce\\xb1,")))
+    '\\xce\\xb1'
+    >>>
+    >>> print(repr(tnetstring.loads("2:\\xce\\xb1,","utf8")))
+    u'\u03b1'
+
+"""
+from collections import deque
+
+import six
+
+__ver_major__ = 0
+__ver_minor__ = 2
+__ver_patch__ = 0
+__ver_sub__ = ""
+__version__ = "%d.%d.%d%s" % (
+    __ver_major__, __ver_minor__, __ver_patch__, __ver_sub__)
+
+
+def dumps(value):
+    """
+    This function dumps a python object as a tnetstring.
+    """
+    #  This uses a deque to collect output fragments in reverse order,
+    #  then joins them together at the end.  It's measurably faster
+    #  than creating all the intermediate strings.
+    #  If you're reading this to get a handle on the tnetstring format,
+    #  consider the _gdumps() function instead; it's a standard top-down
+    #  generator that's simpler to understand but much less efficient.
+    q = deque()
+    _rdumpq(q, 0, value)
+    return b''.join(q)
+
+
+def dump(value, file_handle):
+    """
+    This function dumps a python object as a tnetstring and
+    writes it to the given file.
+    """
+    file_handle.write(dumps(value))
+
+
+def _rdumpq(q, size, value):
+    """
+    Dump value as a tnetstring, to a deque instance, last chunks first.
+
+    This function generates the tnetstring representation of the given value,
+    pushing chunks of the output onto the given deque instance.  It pushes
+    the last chunk first, then recursively generates more chunks.
+
+    When passed in the current size of the string in the queue, it will return
+    the new size of the string in the queue.
+
+    Operating last-chunk-first makes it easy to calculate the size written
+    for recursive structures without having to build their representation as
+    a string.  This is measurably faster than generating the intermediate
+    strings, especially on deeply nested structures.
+    """
+    write = q.appendleft
+    if value is None:
+        write(b'0:~')
+        return size + 3
+    elif value is True:
+        write(b'4:true!')
+        return size + 7
+    elif value is False:
+        write(b'5:false!')
+        return size + 8
+    elif isinstance(value, six.integer_types):
+        data = str(value).encode()
+        ldata = len(data)
+        span = str(ldata).encode()
+        write(b'#')
+        write(data)
+        write(b':')
+        write(span)
+        return size + 2 + len(span) + ldata
+    elif isinstance(value, float):
+        #  Use repr() for float rather than str().
+        #  It round-trips more accurately.
+        #  Probably unnecessary in later python versions that
+        #  use David Gay's ftoa routines.
+        data = repr(value).encode()
+        ldata = len(data)
+        span = str(ldata).encode()
+        write(b'^')
+        write(data)
+        write(b':')
+        write(span)
+        return size + 2 + len(span) + ldata
+    elif isinstance(value, bytes):
+        lvalue = len(value)
+        span = str(lvalue).encode()
+        write(b',')
+        write(value)
+        write(b':')
+        write(span)
+        return size + 2 + len(span) + lvalue
+    elif isinstance(value, (list, tuple)):
+        write(b']')
+        init_size = size = size + 1
+        for item in reversed(value):
+            size = _rdumpq(q, size, item)
+        span = str(size - init_size).encode()
+        write(b':')
+        write(span)
+        return size + 1 + len(span)
+    elif isinstance(value, dict):
+        write(b'}')
+        init_size = size = size + 1
+        for (k, v) in value.items():
+            size = _rdumpq(q, size, v)
+            size = _rdumpq(q, size, k)
+        span = str(size - init_size).encode()
+        write(b':')
+        write(span)
+        return size + 1 + len(span)
+    else:
+        raise ValueError("unserializable object: {} ({})".format(value, type(value)))
+
+
+def _gdumps(value):
+    """
+    Generate fragments of value dumped as a tnetstring.
+
+    This is the naive dumping algorithm, implemented as a generator so that
+    it's easy to pass to "".join() without building a new list.
+
+    This is mainly here for comparison purposes; the _rdumpq version is
+    measurably faster as it doesn't have to build intermediate strins.
+    """
+    if value is None:
+        yield b'0:~'
+    elif value is True:
+        yield b'4:true!'
+    elif value is False:
+        yield b'5:false!'
+    elif isinstance(value, six.integer_types):
+        data = str(value).encode()
+        yield str(len(data)).encode()
+        yield b':'
+        yield data
+        yield b'#'
+    elif isinstance(value, float):
+        data = repr(value).encode()
+        yield str(len(data)).encode()
+        yield b':'
+        yield data
+        yield b'^'
+    elif isinstance(value, bytes):
+        yield str(len(value)).encode()
+        yield b':'
+        yield value
+        yield b','
+    elif isinstance(value, (list, tuple)):
+        sub = []
+        for item in value:
+            sub.extend(_gdumps(item))
+        sub = b''.join(sub)
+        yield str(len(sub)).encode()
+        yield b':'
+        yield sub
+        yield b']'
+    elif isinstance(value, (dict,)):
+        sub = []
+        for (k, v) in value.items():
+            sub.extend(_gdumps(k))
+            sub.extend(_gdumps(v))
+        sub = b''.join(sub)
+        yield str(len(sub)).encode()
+        yield b':'
+        yield sub
+        yield b'}'
+    else:
+        raise ValueError("unserializable object")
+
+
+def loads(string):
+    """
+    This function parses a tnetstring into a python object.
+    """
+    #  No point duplicating effort here.  In the C-extension version,
+    #  loads() is measurably faster then pop() since it can avoid
+    #  the overhead of building a second string.
+    return pop(string)[0]
+
+
+def load(file_handle):
+    """load(file) -> object
+
+    This function reads a tnetstring from a file and parses it into a
+    python object.  The file must support the read() method, and this
+    function promises not to read more data than necessary.
+    """
+    #  Read the length prefix one char at a time.
+    #  Note that the netstring spec explicitly forbids padding zeros.
+    c = file_handle.read(1)
+    if not c.isdigit():
+        raise ValueError("not a tnetstring: missing or invalid length prefix")
+    datalen = ord(c) - ord('0')
+    c = file_handle.read(1)
+    if datalen != 0:
+        while c.isdigit():
+            datalen = (10 * datalen) + (ord(c) - ord('0'))
+            if datalen > 999999999:
+                errmsg = "not a tnetstring: absurdly large length prefix"
+                raise ValueError(errmsg)
+            c = file_handle.read(1)
+    if c != b':':
+        raise ValueError("not a tnetstring: missing or invalid length prefix")
+    #  Now we can read and parse the payload.
+    #  This repeats the dispatch logic of pop() so we can avoid
+    #  re-constructing the outermost tnetstring.
+    data = file_handle.read(datalen)
+    if len(data) != datalen:
+        raise ValueError("not a tnetstring: length prefix too big")
+    tns_type = file_handle.read(1)
+    if tns_type == b',':
+        return data
+    if tns_type == b'#':
+        try:
+            return int(data)
+        except ValueError:
+            raise ValueError("not a tnetstring: invalid integer literal")
+    if tns_type == b'^':
+        try:
+            return float(data)
+        except ValueError:
+            raise ValueError("not a tnetstring: invalid float literal")
+    if tns_type == b'!':
+        if data == b'true':
+            return True
+        elif data == b'false':
+            return False
+        else:
+            raise ValueError("not a tnetstring: invalid boolean literal")
+    if tns_type == b'~':
+        if data:
+            raise ValueError("not a tnetstring: invalid null literal")
+        return None
+    if tns_type == b']':
+        l = []
+        while data:
+            item, data = pop(data)
+            l.append(item)
+        return l
+    if tns_type == b'}':
+        d = {}
+        while data:
+            key, data = pop(data)
+            val, data = pop(data)
+            d[key] = val
+        return d
+    raise ValueError("unknown type tag")
+
+
+def pop(string):
+    """pop(string,encoding='utf_8') -> (object, remain)
+
+    This function parses a tnetstring into a python object.
+    It returns a tuple giving the parsed object and a string
+    containing any unparsed data from the end of the string.
+    """
+    #  Parse out data length, type and remaining string.
+    try:
+        dlen, rest = string.split(b':', 1)
+        dlen = int(dlen)
+    except ValueError:
+        raise ValueError("not a tnetstring: missing or invalid length prefix: {}".format(string))
+    try:
+        data, tns_type, remain = rest[:dlen], rest[dlen:dlen + 1], rest[dlen + 1:]
+    except IndexError:
+        #  This fires if len(rest) < dlen, meaning we don't need
+        #  to further validate that data is the right length.
+        raise ValueError("not a tnetstring: invalid length prefix: {}".format(dlen))
+    #  Parse the data based on the type tag.
+    if tns_type == b',':
+        return data, remain
+    if tns_type == b'#':
+        try:
+            return int(data), remain
+        except ValueError:
+            raise ValueError("not a tnetstring: invalid integer literal: {}".format(data))
+    if tns_type == b'^':
+        try:
+            return float(data), remain
+        except ValueError:
+            raise ValueError("not a tnetstring: invalid float literal: {}".format(data))
+    if tns_type == b'!':
+        if data == b'true':
+            return True, remain
+        elif data == b'false':
+            return False, remain
+        else:
+            raise ValueError("not a tnetstring: invalid boolean literal: {}".format(data))
+    if tns_type == b'~':
+        if data:
+            raise ValueError("not a tnetstring: invalid null literal")
+        return None, remain
+    if tns_type == b']':
+        l = []
+        while data:
+            item, data = pop(data)
+            l.append(item)
+        return (l, remain)
+    if tns_type == b'}':
+        d = {}
+        while data:
+            key, data = pop(data)
+            val, data = pop(data)
+            d[key] = val
+        return d, remain
+    raise ValueError("unknown type tag: {}".format(tns_type))
author	Shadab Zafar <dufferzafar0@gmail.com>	2016-06-21 23:53:59 +0530
committer	Shadab Zafar <dufferzafar0@gmail.com>	2016-06-21 23:53:59 +0530
commit	0a535509555ec76569ff69bc1553adbc501918d3 (patch)
tree	60a403429f83070cc82c4f5cc974508f870248fd /mitmproxy/contrib
parent	10f4a36a5a845fbf49b519ff674acd1e6f132078 (diff)
download	mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.tar.gz mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.tar.bz2 mitmproxy-0a535509555ec76569ff69bc1553adbc501918d3.zip