aboutsummaryrefslogtreecommitdiffstats
path: root/mitmproxy/contrib
diff options
context:
space:
mode:
authorUjjwal Verma <ujjwalverma1111@gmail.com>2017-07-10 01:24:43 +0530
committerUjjwal Verma <ujjwalverma1111@gmail.com>2017-07-10 01:24:43 +0530
commit3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0 (patch)
tree0a27052ae11b25d92bbd44fca99819e75d1690aa /mitmproxy/contrib
parentf3231ed758324a7de465ee5a377f9c40b0a8df34 (diff)
downloadmitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.tar.gz
mitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.tar.bz2
mitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.zip
Kaitai parser for protobuf
Diffstat (limited to 'mitmproxy/contrib')
-rw-r--r--mitmproxy/contrib/kaitaistruct/google_protobuf.py124
-rwxr-xr-xmitmproxy/contrib/kaitaistruct/make.sh2
-rw-r--r--mitmproxy/contrib/kaitaistruct/vlq_base128_le.py94
3 files changed, 220 insertions, 0 deletions
diff --git a/mitmproxy/contrib/kaitaistruct/google_protobuf.py b/mitmproxy/contrib/kaitaistruct/google_protobuf.py
new file mode 100644
index 00000000..fe2336cc
--- /dev/null
+++ b/mitmproxy/contrib/kaitaistruct/google_protobuf.py
@@ -0,0 +1,124 @@
+# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
+
+from pkg_resources import parse_version
+from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO
+from enum import Enum
+
+
+if parse_version(ks_version) < parse_version('0.7'):
+ raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version))
+
+from .vlq_base128_le import VlqBase128Le
+class GoogleProtobuf(KaitaiStruct):
+ """Google Protocol Buffers (AKA protobuf) is a popular data
+ serialization scheme used for communication protocols, data storage,
+ etc. There are implementations are available for almost every
+ popular language. The focus points of this scheme are brevity (data
+ is encoded in a very size-efficient manner) and extensibility (one
+ can add keys to the structure, while keeping it readable in previous
+ version of software).
+
+ Protobuf uses semi-self-describing encoding scheme for its
+ messages. It means that it is possible to parse overall structure of
+ the message (skipping over fields one can't understand), but to
+ fully understand the message, one needs a protocol definition file
+ (`.proto`). To be specific:
+
+ * "Keys" in key-value pairs provided in the message are identified
+ only with an integer "field tag". `.proto` file provides info on
+ which symbolic field names these field tags map to.
+ * "Keys" also provide something called "wire type". It's not a data
+ type in its common sense (i.e. you can't, for example, distinguish
+ `sint32` vs `uint32` vs some enum, or `string` from `bytes`), but
+ it's enough information to determine how many bytes to
+ parse. Interpretation of the value should be done according to the
+ type specified in `.proto` file.
+ * There's no direct information on which fields are optional /
+ required, which fields may be repeated or constitute a map, what
+ restrictions are placed on fields usage in a single message, what
+ are the fields' default values, etc, etc.
+
+ .. seealso::
+ Source - https://developers.google.com/protocol-buffers/docs/encoding
+ """
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.pairs = []
+ while not self._io.is_eof():
+ self.pairs.append(self._root.Pair(self._io, self, self._root))
+
+
+ class Pair(KaitaiStruct):
+ """Key-value pair."""
+
+ class WireTypes(Enum):
+ varint = 0
+ bit_64 = 1
+ len_delimited = 2
+ group_start = 3
+ group_end = 4
+ bit_32 = 5
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.key = VlqBase128Le(self._io)
+ _on = self.wire_type
+ if _on == self._root.Pair.WireTypes.varint:
+ self.value = VlqBase128Le(self._io)
+ elif _on == self._root.Pair.WireTypes.len_delimited:
+ self.value = self._root.DelimitedBytes(self._io, self, self._root)
+ elif _on == self._root.Pair.WireTypes.bit_64:
+ self.value = self._io.read_u8le()
+ elif _on == self._root.Pair.WireTypes.bit_32:
+ self.value = self._io.read_u4le()
+
+ @property
+ def wire_type(self):
+ """"Wire type" is a part of the "key" that carries enough
+ information to parse value from the wire, i.e. read correct
+ amount of bytes, but there's not enough informaton to
+ interprete in unambiguously. For example, one can't clearly
+ distinguish 64-bit fixed-sized integers from 64-bit floats,
+ signed zigzag-encoded varints from regular unsigned varints,
+ arbitrary bytes from UTF-8 encoded strings, etc.
+ """
+ if hasattr(self, '_m_wire_type'):
+ return self._m_wire_type if hasattr(self, '_m_wire_type') else None
+
+ self._m_wire_type = self._root.Pair.WireTypes((self.key.value & 7))
+ return self._m_wire_type if hasattr(self, '_m_wire_type') else None
+
+ @property
+ def field_tag(self):
+ """Identifies a field of protocol. One can look up symbolic
+ field name in a `.proto` file by this field tag.
+ """
+ if hasattr(self, '_m_field_tag'):
+ return self._m_field_tag if hasattr(self, '_m_field_tag') else None
+
+ self._m_field_tag = (self.key.value >> 3)
+ return self._m_field_tag if hasattr(self, '_m_field_tag') else None
+
+
+ class DelimitedBytes(KaitaiStruct):
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.len = VlqBase128Le(self._io)
+ self.body = self._io.read_bytes(self.len.value)
+
+
+
diff --git a/mitmproxy/contrib/kaitaistruct/make.sh b/mitmproxy/contrib/kaitaistruct/make.sh
index 789829cf..0a30358a 100755
--- a/mitmproxy/contrib/kaitaistruct/make.sh
+++ b/mitmproxy/contrib/kaitaistruct/make.sh
@@ -7,5 +7,7 @@ wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master
wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/jpeg.ksy
wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/png.ksy
wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/ico.ksy
+wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/common/vlq_base128_le.ksy
+wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/serialization/google_protobuf.ksy
kaitai-struct-compiler --target python --opaque-types=true *.ksy
diff --git a/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py
new file mode 100644
index 00000000..235759b7
--- /dev/null
+++ b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py
@@ -0,0 +1,94 @@
+# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
+
+from pkg_resources import parse_version
+from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO
+
+
+if parse_version(ks_version) < parse_version('0.7'):
+ raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version))
+
+class VlqBase128Le(KaitaiStruct):
+ """A variable-length unsigned integer using base128 encoding. 1-byte groups
+ consists of 1-bit flag of continuation and 7-bit value, and are ordered
+ "least significant group first", i.e. in "little-endian" manner.
+
+ This particular encoding is specified and used in:
+
+ * DWARF debug file format, where it's dubbed "unsigned LEB128" or "ULEB128".
+ http://dwarfstd.org/doc/dwarf-2.0.0.pdf - page 139
+ * Google Protocol Buffers, where it's called "Base 128 Varints".
+ https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
+ * Apache Lucene, where it's called "VInt"
+ http://lucene.apache.org/core/3_5_0/fileformats.html#VInt
+ * Apache Avro uses this as a basis for integer encoding, adding ZigZag on
+ top of it for signed ints
+ http://avro.apache.org/docs/current/spec.html#binary_encode_primitive
+
+ More information on this encoding is available at https://en.wikipedia.org/wiki/LEB128
+
+ This particular implementation supports serialized values to up 8 bytes long.
+ """
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.groups = []
+ while True:
+ _ = self._root.Group(self._io, self, self._root)
+ self.groups.append(_)
+ if not (_.has_next):
+ break
+
+ class Group(KaitaiStruct):
+ """One byte group, clearly divided into 7-bit "value" and 1-bit "has continuation
+ in the next byte" flag.
+ """
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.b = self._io.read_u1()
+
+ @property
+ def has_next(self):
+ """If true, then we have more bytes to read."""
+ if hasattr(self, '_m_has_next'):
+ return self._m_has_next if hasattr(self, '_m_has_next') else None
+
+ self._m_has_next = (self.b & 128) != 0
+ return self._m_has_next if hasattr(self, '_m_has_next') else None
+
+ @property
+ def value(self):
+ """The 7-bit (base128) numeric value of this group."""
+ if hasattr(self, '_m_value'):
+ return self._m_value if hasattr(self, '_m_value') else None
+
+ self._m_value = (self.b & 127)
+ return self._m_value if hasattr(self, '_m_value') else None
+
+
+ @property
+ def len(self):
+ if hasattr(self, '_m_len'):
+ return self._m_len if hasattr(self, '_m_len') else None
+
+ self._m_len = len(self.groups)
+ return self._m_len if hasattr(self, '_m_len') else None
+
+ @property
+ def value(self):
+ """Resulting value as normal integer."""
+ if hasattr(self, '_m_value'):
+ return self._m_value if hasattr(self, '_m_value') else None
+
+ self._m_value = (((((((self.groups[0].value + ((self.groups[1].value << 7) if self.len >= 2 else 0)) + ((self.groups[2].value << 14) if self.len >= 3 else 0)) + ((self.groups[3].value << 21) if self.len >= 4 else 0)) + ((self.groups[4].value << 28) if self.len >= 5 else 0)) + ((self.groups[5].value << 35) if self.len >= 6 else 0)) + ((self.groups[6].value << 42) if self.len >= 7 else 0)) + ((self.groups[7].value << 49) if self.len >= 8 else 0))
+ return self._m_value if hasattr(self, '_m_value') else None
+
+