aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorUjjwal Verma <ujjwalverma1111@gmail.com>2017-07-10 01:24:43 +0530
committerUjjwal Verma <ujjwalverma1111@gmail.com>2017-07-10 01:24:43 +0530
commit3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0 (patch)
tree0a27052ae11b25d92bbd44fca99819e75d1690aa
parentf3231ed758324a7de465ee5a377f9c40b0a8df34 (diff)
downloadmitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.tar.gz
mitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.tar.bz2
mitmproxy-3f269d2b68f1d1a09bd31b0e0f9c550d095d5fc0.zip
Kaitai parser for protobuf
-rw-r--r--mitmproxy/contentviews/protobuf.py82
-rw-r--r--mitmproxy/contrib/kaitaistruct/google_protobuf.py124
-rwxr-xr-xmitmproxy/contrib/kaitaistruct/make.sh2
-rw-r--r--mitmproxy/contrib/kaitaistruct/vlq_base128_le.py94
-rw-r--r--test/mitmproxy/contentviews/test_protobuf.py65
-rw-r--r--test/mitmproxy/contentviews/test_protobuf_data/protobuf01 (renamed from test/mitmproxy/data/protobuf01)0
-rw-r--r--test/mitmproxy/contentviews/test_protobuf_data/protobuf02bin0 -> 213 bytes
-rw-r--r--test/mitmproxy/contentviews/test_protobuf_data/protobuf02-decoded65
-rw-r--r--test/mitmproxy/contentviews/test_protobuf_data/protobuf031
-rw-r--r--test/mitmproxy/contentviews/test_protobuf_data/protobuf03-decoded4
10 files changed, 372 insertions, 65 deletions
diff --git a/mitmproxy/contentviews/protobuf.py b/mitmproxy/contentviews/protobuf.py
index 4bbb1580..abd3985a 100644
--- a/mitmproxy/contentviews/protobuf.py
+++ b/mitmproxy/contentviews/protobuf.py
@@ -1,6 +1,63 @@
-import subprocess
+import io
+from kaitaistruct import KaitaiStream
from . import base
+from mitmproxy.contrib.kaitaistruct import google_protobuf
+
+
+def write_buf(out, field_tag, body, indent_level):
+ if body is not None:
+ out.write("{: <{level}}{}: {}\n".format('', field_tag, body if isinstance(body, int) else str(body, 'utf-8'),
+ level=indent_level))
+ elif field_tag is not None:
+ out.write(' ' * indent_level + str(field_tag) + " {\n")
+ else:
+ out.write(' ' * indent_level + "}\n")
+
+
+def format_pbuf(raw):
+ out = io.StringIO()
+ stack = []
+
+ try:
+ buf = google_protobuf.GoogleProtobuf(KaitaiStream(io.BytesIO(raw)))
+ except:
+ return False
+ stack.extend([(pair, 0) for pair in buf.pairs[::-1]])
+
+ while len(stack):
+ pair, indent_level = stack.pop()
+
+ if pair.wire_type == pair.WireTypes.group_start:
+ body = None
+ elif pair.wire_type == pair.WireTypes.group_end:
+ body = None
+ pair._m_field_tag = None
+ elif pair.wire_type == pair.WireTypes.len_delimited:
+ body = pair.value.body
+ elif pair.wire_type == pair.WireTypes.varint:
+ body = pair.value.value
+ else:
+ body = pair.value
+
+ try:
+ next_buf = google_protobuf.GoogleProtobuf(KaitaiStream(io.BytesIO(body)))
+ stack.extend([(pair, indent_level + 2) for pair in next_buf.pairs[::-1]])
+ write_buf(out, pair.field_tag, None, indent_level)
+ except:
+ write_buf(out, pair.field_tag, body, indent_level)
+
+ if stack:
+ prev_level = stack[-1][1]
+ else:
+ prev_level = 0
+
+ if prev_level < indent_level:
+ levels = int((indent_level - prev_level) / 2)
+ for i in range(1, levels + 1):
+ write_buf(out, None, None, indent_level - i * 2)
+
+ return out.getvalue()
class ViewProtobuf(base.View):
@@ -15,28 +72,9 @@ class ViewProtobuf(base.View):
"application/x-protobuffer",
]
- def is_available(self):
- try:
- p = subprocess.Popen(
- ["protoc", "--version"],
- stdout=subprocess.PIPE
- )
- out, _ = p.communicate()
- return out.startswith(b"libprotoc")
- except:
- return False
-
def __call__(self, data, **metadata):
- if not self.is_available():
- raise NotImplementedError("protoc not found. Please make sure 'protoc' is available in $PATH.")
-
- # if Popen raises OSError, it will be caught in
- # get_content_view and fall back to Raw
- p = subprocess.Popen(['protoc', '--decode_raw'],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- decoded, _ = p.communicate(input=data)
+ decoded = format_pbuf(data)
if not decoded:
raise ValueError("Failed to parse input.")
+
return "Protobuf", base.format_text(decoded)
diff --git a/mitmproxy/contrib/kaitaistruct/google_protobuf.py b/mitmproxy/contrib/kaitaistruct/google_protobuf.py
new file mode 100644
index 00000000..fe2336cc
--- /dev/null
+++ b/mitmproxy/contrib/kaitaistruct/google_protobuf.py
@@ -0,0 +1,124 @@
+# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
+
+from pkg_resources import parse_version
+from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO
+from enum import Enum
+
+
+if parse_version(ks_version) < parse_version('0.7'):
+ raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version))
+
+from .vlq_base128_le import VlqBase128Le
+class GoogleProtobuf(KaitaiStruct):
+ """Google Protocol Buffers (AKA protobuf) is a popular data
+ serialization scheme used for communication protocols, data storage,
+ etc. There are implementations are available for almost every
+ popular language. The focus points of this scheme are brevity (data
+ is encoded in a very size-efficient manner) and extensibility (one
+ can add keys to the structure, while keeping it readable in previous
+ version of software).
+
+ Protobuf uses semi-self-describing encoding scheme for its
+ messages. It means that it is possible to parse overall structure of
+ the message (skipping over fields one can't understand), but to
+ fully understand the message, one needs a protocol definition file
+ (`.proto`). To be specific:
+
+ * "Keys" in key-value pairs provided in the message are identified
+ only with an integer "field tag". `.proto` file provides info on
+ which symbolic field names these field tags map to.
+ * "Keys" also provide something called "wire type". It's not a data
+ type in its common sense (i.e. you can't, for example, distinguish
+ `sint32` vs `uint32` vs some enum, or `string` from `bytes`), but
+ it's enough information to determine how many bytes to
+ parse. Interpretation of the value should be done according to the
+ type specified in `.proto` file.
+ * There's no direct information on which fields are optional /
+ required, which fields may be repeated or constitute a map, what
+ restrictions are placed on fields usage in a single message, what
+ are the fields' default values, etc, etc.
+
+ .. seealso::
+ Source - https://developers.google.com/protocol-buffers/docs/encoding
+ """
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.pairs = []
+ while not self._io.is_eof():
+ self.pairs.append(self._root.Pair(self._io, self, self._root))
+
+
+ class Pair(KaitaiStruct):
+ """Key-value pair."""
+
+ class WireTypes(Enum):
+ varint = 0
+ bit_64 = 1
+ len_delimited = 2
+ group_start = 3
+ group_end = 4
+ bit_32 = 5
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.key = VlqBase128Le(self._io)
+ _on = self.wire_type
+ if _on == self._root.Pair.WireTypes.varint:
+ self.value = VlqBase128Le(self._io)
+ elif _on == self._root.Pair.WireTypes.len_delimited:
+ self.value = self._root.DelimitedBytes(self._io, self, self._root)
+ elif _on == self._root.Pair.WireTypes.bit_64:
+ self.value = self._io.read_u8le()
+ elif _on == self._root.Pair.WireTypes.bit_32:
+ self.value = self._io.read_u4le()
+
+ @property
+ def wire_type(self):
+ """"Wire type" is a part of the "key" that carries enough
+ information to parse value from the wire, i.e. read correct
+ amount of bytes, but there's not enough informaton to
+ interprete in unambiguously. For example, one can't clearly
+ distinguish 64-bit fixed-sized integers from 64-bit floats,
+ signed zigzag-encoded varints from regular unsigned varints,
+ arbitrary bytes from UTF-8 encoded strings, etc.
+ """
+ if hasattr(self, '_m_wire_type'):
+ return self._m_wire_type if hasattr(self, '_m_wire_type') else None
+
+ self._m_wire_type = self._root.Pair.WireTypes((self.key.value & 7))
+ return self._m_wire_type if hasattr(self, '_m_wire_type') else None
+
+ @property
+ def field_tag(self):
+ """Identifies a field of protocol. One can look up symbolic
+ field name in a `.proto` file by this field tag.
+ """
+ if hasattr(self, '_m_field_tag'):
+ return self._m_field_tag if hasattr(self, '_m_field_tag') else None
+
+ self._m_field_tag = (self.key.value >> 3)
+ return self._m_field_tag if hasattr(self, '_m_field_tag') else None
+
+
+ class DelimitedBytes(KaitaiStruct):
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.len = VlqBase128Le(self._io)
+ self.body = self._io.read_bytes(self.len.value)
+
+
+
diff --git a/mitmproxy/contrib/kaitaistruct/make.sh b/mitmproxy/contrib/kaitaistruct/make.sh
index 789829cf..0a30358a 100755
--- a/mitmproxy/contrib/kaitaistruct/make.sh
+++ b/mitmproxy/contrib/kaitaistruct/make.sh
@@ -7,5 +7,7 @@ wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master
wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/jpeg.ksy
wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/png.ksy
wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/ico.ksy
+wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/common/vlq_base128_le.ksy
+wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/serialization/google_protobuf.ksy
kaitai-struct-compiler --target python --opaque-types=true *.ksy
diff --git a/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py
new file mode 100644
index 00000000..235759b7
--- /dev/null
+++ b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py
@@ -0,0 +1,94 @@
+# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
+
+from pkg_resources import parse_version
+from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO
+
+
+if parse_version(ks_version) < parse_version('0.7'):
+ raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version))
+
+class VlqBase128Le(KaitaiStruct):
+ """A variable-length unsigned integer using base128 encoding. 1-byte groups
+ consists of 1-bit flag of continuation and 7-bit value, and are ordered
+ "least significant group first", i.e. in "little-endian" manner.
+
+ This particular encoding is specified and used in:
+
+ * DWARF debug file format, where it's dubbed "unsigned LEB128" or "ULEB128".
+ http://dwarfstd.org/doc/dwarf-2.0.0.pdf - page 139
+ * Google Protocol Buffers, where it's called "Base 128 Varints".
+ https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
+ * Apache Lucene, where it's called "VInt"
+ http://lucene.apache.org/core/3_5_0/fileformats.html#VInt
+ * Apache Avro uses this as a basis for integer encoding, adding ZigZag on
+ top of it for signed ints
+ http://avro.apache.org/docs/current/spec.html#binary_encode_primitive
+
+ More information on this encoding is available at https://en.wikipedia.org/wiki/LEB128
+
+ This particular implementation supports serialized values to up 8 bytes long.
+ """
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.groups = []
+ while True:
+ _ = self._root.Group(self._io, self, self._root)
+ self.groups.append(_)
+ if not (_.has_next):
+ break
+
+ class Group(KaitaiStruct):
+ """One byte group, clearly divided into 7-bit "value" and 1-bit "has continuation
+ in the next byte" flag.
+ """
+ def __init__(self, _io, _parent=None, _root=None):
+ self._io = _io
+ self._parent = _parent
+ self._root = _root if _root else self
+ self._read()
+
+ def _read(self):
+ self.b = self._io.read_u1()
+
+ @property
+ def has_next(self):
+ """If true, then we have more bytes to read."""
+ if hasattr(self, '_m_has_next'):
+ return self._m_has_next if hasattr(self, '_m_has_next') else None
+
+ self._m_has_next = (self.b & 128) != 0
+ return self._m_has_next if hasattr(self, '_m_has_next') else None
+
+ @property
+ def value(self):
+ """The 7-bit (base128) numeric value of this group."""
+ if hasattr(self, '_m_value'):
+ return self._m_value if hasattr(self, '_m_value') else None
+
+ self._m_value = (self.b & 127)
+ return self._m_value if hasattr(self, '_m_value') else None
+
+
+ @property
+ def len(self):
+ if hasattr(self, '_m_len'):
+ return self._m_len if hasattr(self, '_m_len') else None
+
+ self._m_len = len(self.groups)
+ return self._m_len if hasattr(self, '_m_len') else None
+
+ @property
+ def value(self):
+ """Resulting value as normal integer."""
+ if hasattr(self, '_m_value'):
+ return self._m_value if hasattr(self, '_m_value') else None
+
+ self._m_value = (((((((self.groups[0].value + ((self.groups[1].value << 7) if self.len >= 2 else 0)) + ((self.groups[2].value << 14) if self.len >= 3 else 0)) + ((self.groups[3].value << 21) if self.len >= 4 else 0)) + ((self.groups[4].value << 28) if self.len >= 5 else 0)) + ((self.groups[5].value << 35) if self.len >= 6 else 0)) + ((self.groups[6].value << 42) if self.len >= 7 else 0)) + ((self.groups[7].value << 49) if self.len >= 8 else 0))
+ return self._m_value if hasattr(self, '_m_value') else None
+
+
diff --git a/test/mitmproxy/contentviews/test_protobuf.py b/test/mitmproxy/contentviews/test_protobuf.py
index 71e51576..6c6e37f2 100644
--- a/test/mitmproxy/contentviews/test_protobuf.py
+++ b/test/mitmproxy/contentviews/test_protobuf.py
@@ -1,52 +1,31 @@
-from unittest import mock
import pytest
from mitmproxy.contentviews import protobuf
from mitmproxy.test import tutils
from . import full_eval
+data = tutils.test_data.push("mitmproxy/contentviews/test_protobuf_data/")
+
def test_view_protobuf_request():
v = full_eval(protobuf.ViewProtobuf())
- p = tutils.test_data.path("mitmproxy/data/protobuf01")
-
- with mock.patch('mitmproxy.contentviews.protobuf.ViewProtobuf.is_available'):
- with mock.patch('subprocess.Popen') as n:
- m = mock.Mock()
- attrs = {'communicate.return_value': (b'1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"', True)}
- m.configure_mock(**attrs)
- n.return_value = m
-
- with open(p, "rb") as f:
- data = f.read()
- content_type, output = v(data)
- assert content_type == "Protobuf"
- assert output[0] == [('text', b'1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"')]
-
- m.communicate = mock.MagicMock()
- m.communicate.return_value = (None, None)
- with pytest.raises(ValueError, matches="Failed to parse input."):
- v(b'foobar')
-
-
-def test_view_protobuf_availability():
- with mock.patch('subprocess.Popen') as n:
- m = mock.Mock()
- attrs = {'communicate.return_value': (b'libprotoc fake version', True)}
- m.configure_mock(**attrs)
- n.return_value = m
- assert protobuf.ViewProtobuf().is_available()
-
- m = mock.Mock()
- attrs = {'communicate.return_value': (b'command not found', True)}
- m.configure_mock(**attrs)
- n.return_value = m
- assert not protobuf.ViewProtobuf().is_available()
-
-
-def test_view_protobuf_fallback():
- with mock.patch('subprocess.Popen.communicate') as m:
- m.side_effect = OSError()
- v = full_eval(protobuf.ViewProtobuf())
- with pytest.raises(NotImplementedError, matches='protoc not found'):
- v(b'foobar')
+ p = data.path("protobuf01")
+
+ with open(p, "rb") as f:
+ raw = f.read()
+ content_type, output = v(raw)
+ assert content_type == "Protobuf"
+ assert output == [[('text', '1: 3bbc333c-e61c-433b-819a-0b9a8cc103b8')]]
+ with pytest.raises(ValueError, matches="Failed to parse input."):
+ v(b'foobar')
+
+
+@pytest.mark.parametrize("filename", ["protobuf02", "protobuf03"])
+def test_format_pbuf(filename):
+ path = data.path(filename)
+ with open(path, "rb") as f:
+ input = f.read()
+ with open(path + "-decoded") as f:
+ expected = f.read()
+
+ assert protobuf.format_pbuf(input) == expected
diff --git a/test/mitmproxy/data/protobuf01 b/test/mitmproxy/contentviews/test_protobuf_data/protobuf01
index fbfdbff3..fbfdbff3 100644
--- a/test/mitmproxy/data/protobuf01
+++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf01
diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf02 b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02
new file mode 100644
index 00000000..a47c45d5
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02
Binary files differ
diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf02-decoded b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02-decoded
new file mode 100644
index 00000000..9be61e28
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02-decoded
@@ -0,0 +1,65 @@
+1 {
+ 1: tpbuf
+ 4 {
+ 1: Person
+ 2 {
+ 1: name
+ 3: 1
+ 4: 2
+ 5: 9
+ }
+ 2 {
+ 1: id
+ 3: 2
+ 4: 2
+ 5: 5
+ }
+ 2 {
+ 1 {
+ 12: 1818845549
+ }
+ 3: 3
+ 4: 1
+ 5: 9
+ }
+ 2 {
+ 1: phone
+ 3: 4
+ 4: 3
+ 5: 11
+ 6: .Person.PhoneNumber
+ }
+ 3 {
+ 1: PhoneNumber
+ 2 {
+ 1: number
+ 3: 1
+ 4: 2
+ 5: 9
+ }
+ 2 {
+ 1: type
+ 3: 2
+ 4: 1
+ 5: 14
+ 6: .Person.PhoneType
+ 7: HOME
+ }
+ }
+ 4 {
+ 1: PhoneType
+ 2 {
+ 1: MOBILE
+ 2: 0
+ }
+ 2 {
+ 1: HOME
+ 2: 1
+ }
+ 2 {
+ 1: WORK
+ 2: 2
+ }
+ }
+ }
+}
diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf03 b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03
new file mode 100644
index 00000000..9fb230b3
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03
@@ -0,0 +1 @@
+€ ð \ No newline at end of file
diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf03-decoded b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03-decoded
new file mode 100644
index 00000000..3d3392e1
--- /dev/null
+++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03-decoded
@@ -0,0 +1,4 @@
+2 {
+3: 3840
+4: 2160
+}