diff options
author | Ujjwal Verma <ujjwalverma1111@gmail.com> | 2017-07-10 01:24:43 +0530 |
---|---|---|
committer | Ujjwal Verma <ujjwalverma1111@gmail.com> | 2017-07-10 01:24:43 +0530 |
commit | 3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0 (patch) | |
tree | 0a27052ae11b25d92bbd44fca99819e75d1690aa /mitmproxy/contrib | |
parent | f3231ed758324a7de465ee5a377f9c40b0a8df34 (diff) | |
download | mitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.tar.gz mitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.tar.bz2 mitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.zip |
Kaitai parser for protobuf
Diffstat (limited to 'mitmproxy/contrib')
-rw-r--r-- | mitmproxy/contrib/kaitaistruct/google_protobuf.py | 124 | ||||
-rwxr-xr-x | mitmproxy/contrib/kaitaistruct/make.sh | 2 | ||||
-rw-r--r-- | mitmproxy/contrib/kaitaistruct/vlq_base128_le.py | 94 |
3 files changed, 220 insertions, 0 deletions
diff --git a/mitmproxy/contrib/kaitaistruct/google_protobuf.py b/mitmproxy/contrib/kaitaistruct/google_protobuf.py new file mode 100644 index 00000000..fe2336cc --- /dev/null +++ b/mitmproxy/contrib/kaitaistruct/google_protobuf.py @@ -0,0 +1,124 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild + +from pkg_resources import parse_version +from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO +from enum import Enum + + +if parse_version(ks_version) < parse_version('0.7'): + raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version)) + +from .vlq_base128_le import VlqBase128Le +class GoogleProtobuf(KaitaiStruct): + """Google Protocol Buffers (AKA protobuf) is a popular data + serialization scheme used for communication protocols, data storage, + etc. There are implementations are available for almost every + popular language. The focus points of this scheme are brevity (data + is encoded in a very size-efficient manner) and extensibility (one + can add keys to the structure, while keeping it readable in previous + version of software). + + Protobuf uses semi-self-describing encoding scheme for its + messages. It means that it is possible to parse overall structure of + the message (skipping over fields one can't understand), but to + fully understand the message, one needs a protocol definition file + (`.proto`). To be specific: + + * "Keys" in key-value pairs provided in the message are identified + only with an integer "field tag". `.proto` file provides info on + which symbolic field names these field tags map to. + * "Keys" also provide something called "wire type". It's not a data + type in its common sense (i.e. you can't, for example, distinguish + `sint32` vs `uint32` vs some enum, or `string` from `bytes`), but + it's enough information to determine how many bytes to + parse. Interpretation of the value should be done according to the + type specified in `.proto` file. + * There's no direct information on which fields are optional / + required, which fields may be repeated or constitute a map, what + restrictions are placed on fields usage in a single message, what + are the fields' default values, etc, etc. + + .. seealso:: + Source - https://developers.google.com/protocol-buffers/docs/encoding + """ + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.pairs = [] + while not self._io.is_eof(): + self.pairs.append(self._root.Pair(self._io, self, self._root)) + + + class Pair(KaitaiStruct): + """Key-value pair.""" + + class WireTypes(Enum): + varint = 0 + bit_64 = 1 + len_delimited = 2 + group_start = 3 + group_end = 4 + bit_32 = 5 + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.key = VlqBase128Le(self._io) + _on = self.wire_type + if _on == self._root.Pair.WireTypes.varint: + self.value = VlqBase128Le(self._io) + elif _on == self._root.Pair.WireTypes.len_delimited: + self.value = self._root.DelimitedBytes(self._io, self, self._root) + elif _on == self._root.Pair.WireTypes.bit_64: + self.value = self._io.read_u8le() + elif _on == self._root.Pair.WireTypes.bit_32: + self.value = self._io.read_u4le() + + @property + def wire_type(self): + """"Wire type" is a part of the "key" that carries enough + information to parse value from the wire, i.e. read correct + amount of bytes, but there's not enough informaton to + interprete in unambiguously. For example, one can't clearly + distinguish 64-bit fixed-sized integers from 64-bit floats, + signed zigzag-encoded varints from regular unsigned varints, + arbitrary bytes from UTF-8 encoded strings, etc. + """ + if hasattr(self, '_m_wire_type'): + return self._m_wire_type if hasattr(self, '_m_wire_type') else None + + self._m_wire_type = self._root.Pair.WireTypes((self.key.value & 7)) + return self._m_wire_type if hasattr(self, '_m_wire_type') else None + + @property + def field_tag(self): + """Identifies a field of protocol. One can look up symbolic + field name in a `.proto` file by this field tag. + """ + if hasattr(self, '_m_field_tag'): + return self._m_field_tag if hasattr(self, '_m_field_tag') else None + + self._m_field_tag = (self.key.value >> 3) + return self._m_field_tag if hasattr(self, '_m_field_tag') else None + + + class DelimitedBytes(KaitaiStruct): + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.len = VlqBase128Le(self._io) + self.body = self._io.read_bytes(self.len.value) + + + diff --git a/mitmproxy/contrib/kaitaistruct/make.sh b/mitmproxy/contrib/kaitaistruct/make.sh index 789829cf..0a30358a 100755 --- a/mitmproxy/contrib/kaitaistruct/make.sh +++ b/mitmproxy/contrib/kaitaistruct/make.sh @@ -7,5 +7,7 @@ wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/jpeg.ksy wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/png.ksy wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/ico.ksy +wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/common/vlq_base128_le.ksy +wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/serialization/google_protobuf.ksy kaitai-struct-compiler --target python --opaque-types=true *.ksy diff --git a/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py new file mode 100644 index 00000000..235759b7 --- /dev/null +++ b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py @@ -0,0 +1,94 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild + +from pkg_resources import parse_version +from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO + + +if parse_version(ks_version) < parse_version('0.7'): + raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version)) + +class VlqBase128Le(KaitaiStruct): + """A variable-length unsigned integer using base128 encoding. 1-byte groups + consists of 1-bit flag of continuation and 7-bit value, and are ordered + "least significant group first", i.e. in "little-endian" manner. + + This particular encoding is specified and used in: + + * DWARF debug file format, where it's dubbed "unsigned LEB128" or "ULEB128". + http://dwarfstd.org/doc/dwarf-2.0.0.pdf - page 139 + * Google Protocol Buffers, where it's called "Base 128 Varints". + https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints + * Apache Lucene, where it's called "VInt" + http://lucene.apache.org/core/3_5_0/fileformats.html#VInt + * Apache Avro uses this as a basis for integer encoding, adding ZigZag on + top of it for signed ints + http://avro.apache.org/docs/current/spec.html#binary_encode_primitive + + More information on this encoding is available at https://en.wikipedia.org/wiki/LEB128 + + This particular implementation supports serialized values to up 8 bytes long. + """ + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.groups = [] + while True: + _ = self._root.Group(self._io, self, self._root) + self.groups.append(_) + if not (_.has_next): + break + + class Group(KaitaiStruct): + """One byte group, clearly divided into 7-bit "value" and 1-bit "has continuation + in the next byte" flag. + """ + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.b = self._io.read_u1() + + @property + def has_next(self): + """If true, then we have more bytes to read.""" + if hasattr(self, '_m_has_next'): + return self._m_has_next if hasattr(self, '_m_has_next') else None + + self._m_has_next = (self.b & 128) != 0 + return self._m_has_next if hasattr(self, '_m_has_next') else None + + @property + def value(self): + """The 7-bit (base128) numeric value of this group.""" + if hasattr(self, '_m_value'): + return self._m_value if hasattr(self, '_m_value') else None + + self._m_value = (self.b & 127) + return self._m_value if hasattr(self, '_m_value') else None + + + @property + def len(self): + if hasattr(self, '_m_len'): + return self._m_len if hasattr(self, '_m_len') else None + + self._m_len = len(self.groups) + return self._m_len if hasattr(self, '_m_len') else None + + @property + def value(self): + """Resulting value as normal integer.""" + if hasattr(self, '_m_value'): + return self._m_value if hasattr(self, '_m_value') else None + + self._m_value = (((((((self.groups[0].value + ((self.groups[1].value << 7) if self.len >= 2 else 0)) + ((self.groups[2].value << 14) if self.len >= 3 else 0)) + ((self.groups[3].value << 21) if self.len >= 4 else 0)) + ((self.groups[4].value << 28) if self.len >= 5 else 0)) + ((self.groups[5].value << 35) if self.len >= 6 else 0)) + ((self.groups[6].value << 42) if self.len >= 7 else 0)) + ((self.groups[7].value << 49) if self.len >= 8 else 0)) + return self._m_value if hasattr(self, '_m_value') else None + + |