aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralts <stephen@evilrobotstuff.com>2011-07-16 02:47:06 -0700
committeralts <stephen@evilrobotstuff.com>2011-07-16 02:47:06 -0700
commit6dc0f105ccabeb10f557dc8baa51d3ce08b3c8ee (patch)
tree977c6339d1562a333b1d2087853248ea5ed32771
parent94ae720a220da4beaa2fc6111b4cafb60b41d33b (diff)
downloadmitmproxy-6dc0f105ccabeb10f557dc8baa51d3ce08b3c8ee.tar.gz
mitmproxy-6dc0f105ccabeb10f557dc8baa51d3ce08b3c8ee.tar.bz2
mitmproxy-6dc0f105ccabeb10f557dc8baa51d3ce08b3c8ee.zip
Adds support for content encoding, namely gip and deflate
-rw-r--r--libmproxy/cmdline.py2
-rw-r--r--libmproxy/console.py40
-rw-r--r--libmproxy/encoding.py43
-rw-r--r--libmproxy/flow.py9
-rw-r--r--libmproxy/proxy.py21
-rw-r--r--test/test_encoding.py31
-rw-r--r--test/test_utils.py2
7 files changed, 122 insertions, 26 deletions
diff --git a/libmproxy/cmdline.py b/libmproxy/cmdline.py
index e3e6ef4e..2d78e5d1 100644
--- a/libmproxy/cmdline.py
+++ b/libmproxy/cmdline.py
@@ -111,7 +111,7 @@ def common_options(parser):
)
parser.add_option(
"-z",
- action="store_false", dest="anticomp", default=True,
+ action="store_false", dest="anticomp", default=False,
help="Try to convince servers to send us un-compressed data."
)
diff --git a/libmproxy/console.py b/libmproxy/console.py
index 09149186..c1e14b33 100644
--- a/libmproxy/console.py
+++ b/libmproxy/console.py
@@ -1,15 +1,15 @@
# Copyright (C) 2010 Aldo Cortesi
-#
+#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -18,7 +18,7 @@ import os.path, sys
import cStringIO
import urwid.raw_display
import urwid
-import controller, utils, filt, proxy, flow
+import controller, utils, filt, proxy, flow, encoding
VIEW_CUTOFF = 1024*100
@@ -77,7 +77,7 @@ def format_flow(f, focus, extended=False, padding=2):
else:
ts = " "
- txt.append("\n")
+ txt.append("\n")
txt.append(("text", ts))
txt.append(" "*(padding+2))
met = ""
@@ -97,6 +97,11 @@ def format_flow(f, focus, extended=False, padding=2):
if t:
t = t[0].split(";")[0]
txt.append(("text", " %s"%t))
+ e = f.response.headers["content-encoding"]
+ if e:
+ e = e[0]
+ else:
+ e = "identity"
if f.response.content:
txt.append(", %s"%utils.pretty_size(len(f.response.content)))
elif f.error:
@@ -121,7 +126,7 @@ def int_version(v):
for i in range(min(SIG, len(v))):
x += int(v[i]) * 10**(SIG-i)
return x
-
+
# We have to do this to be portable over 0.9.8 and 0.9.9 If compatibility
# becomes a pain to maintain, we'll just mandate 0.9.9 or newer.
@@ -295,8 +300,13 @@ class ConnectionView(WWrap):
def _conn_text(self, conn, viewmode):
if conn:
+ e = conn.headers["content-encoding"]
+ if e:
+ e = e[0]
+ else:
+ e = "identity"
return self.master._cached_conn_text(
- conn.content,
+ encoding.decode(e, conn.content),
tuple([tuple(i) for i in conn.headers.lst]),
viewmode
)
@@ -395,7 +405,7 @@ class ConnectionView(WWrap):
response = self.flow.response
response.msg = msg
self.master.refresh_connection(self.flow)
-
+
def edit(self, part):
if self.state.view_flow_mode == VIEW_FLOW_REQUEST:
conn = self.flow.request
@@ -577,7 +587,7 @@ class PathEdit(urwid.Edit, _PathCompleter):
else:
self.reset()
return urwid.Edit.keypress(self, size, key)
-
+
class ActionBar(WWrap):
def __init__(self):
@@ -656,7 +666,7 @@ class StatusBar(WWrap):
('statusbar_text', ("[%s]"%len(self.master.state.flow_list)).ljust(7)),
]
t.extend(self.get_status())
-
+
if self.master.server:
boundaddr = "[%s:%s]"%(self.master.server.address or "*", self.master.server.port)
else:
@@ -821,9 +831,9 @@ class ConsoleMaster(flow.FlowMaster):
self.set_palette()
if options.response_script:
- self.set_response_script(options.response_script)
+ self.set_response_script(options.response_script)
if options.request_script:
- self.set_request_script(options.request_script)
+ self.set_request_script(options.request_script)
r = self.set_limit(options.limit)
if r:
@@ -1157,7 +1167,7 @@ class ConsoleMaster(flow.FlowMaster):
def _write_flows(self, path, flows):
self.state.last_saveload = path
if not path:
- return
+ return
path = os.path.expanduser(path)
try:
f = file(path, "wb")
@@ -1176,7 +1186,7 @@ class ConsoleMaster(flow.FlowMaster):
def load_flows(self, path):
if not path:
- return
+ return
self.state.last_saveload = path
path = os.path.expanduser(path)
try:
@@ -1307,7 +1317,7 @@ class ConsoleMaster(flow.FlowMaster):
def prompt_onekey(self, prompt, keys, callback):
"""
Keys are a set of (word, key) tuples. The appropriate key in the
- word is highlighted.
+ word is highlighted.
"""
prompt = [prompt, " ("]
mkup = []
diff --git a/libmproxy/encoding.py b/libmproxy/encoding.py
new file mode 100644
index 00000000..f280ed9f
--- /dev/null
+++ b/libmproxy/encoding.py
@@ -0,0 +1,43 @@
+"""
+ Utility functions for decoding response bodies.
+"""
+import cStringIO
+import gzip, zlib
+
+__ALL__ = ["ENCODINGS"]
+
+ENCODINGS = set(["identity", "gzip", "deflate"])
+
+def decode(encoding, content):
+ encoding_map = {
+ "identity": decode_identity,
+ "gzip": decode_gzip,
+ "deflate": decode_deflate,
+ }
+
+ return encoding_map.get(encoding, decode_identity)(content)
+
+def decode_identity(content):
+ """
+ Returns content unchanged. Identity is the default value of
+ Accept-Encoding headers.
+ """
+ return content
+
+def decode_gzip(content):
+ gfile = gzip.GzipFile(fileobj=cStringIO.StringIO(content))
+ return gfile.read()
+
+def decode_deflate(content):
+ """
+ Returns decompress data for DEFLATE. Some servers may respond with
+ compressed data without a zlib header or checksum. An undocumented
+ feature of zlib permits the lenient decompression of data missing both
+ values.
+
+ http://bugs.python.org/issue5784
+ """
+ try:
+ return zlib.decompress(content)
+ except zlib.error:
+ return zlib.decompress(content, -15) \ No newline at end of file
diff --git a/libmproxy/flow.py b/libmproxy/flow.py
index bd07cfa7..568ec3d1 100644
--- a/libmproxy/flow.py
+++ b/libmproxy/flow.py
@@ -84,10 +84,10 @@ class ServerPlaybackState:
def count(self):
return sum([len(i) for i in self.fmap.values()])
-
+
def _hash(self, flow):
"""
- Calculates a loose hash of the flow request.
+ Calculates a loose hash of the flow request.
"""
r = flow.request
key = [
@@ -130,7 +130,7 @@ class StickyCookieState:
def ckey(self, m, f):
"""
- Returns a (domain, port, path) tuple.
+ Returns a (domain, port, path) tuple.
"""
return (
m["domain"] or f.request.host,
@@ -568,6 +568,9 @@ class FlowMaster(controller.Master):
f.request.anticache()
if self.anticomp:
f.request.anticomp()
+ else:
+ f.request.constrain_encoding()
+
if self.server_playback:
pb = self.do_server_playback(f)
if not pb:
diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py
index fe545335..a7cc31e8 100644
--- a/libmproxy/proxy.py
+++ b/libmproxy/proxy.py
@@ -1,7 +1,7 @@
"""
A simple proxy server implementation, which always reads all of a server
response into memory, performs some transformation, and then writes it back
- to the client.
+ to the client.
Development started from Neil Schemenauer's munchy.py
"""
@@ -9,7 +9,7 @@ import sys, os, string, socket, urlparse, re, select, copy, base64, time, Cookie
from email.utils import parsedate_tz, formatdate, mktime_tz
import shutil, tempfile
import optparse, SocketServer, ssl
-import utils, controller
+import utils, controller, encoding
NAME = "mitmproxy"
@@ -53,7 +53,7 @@ def read_chunked(fp):
if line == '\r\n' or line == '\n':
break
return content
-
+
def read_http_body(rfile, connection, headers, all):
if 'transfer-encoding' in headers:
@@ -156,11 +156,21 @@ class Request(controller.Msg):
def anticomp(self):
"""
- Modifies this request to remove headers that might produce a cached
- response. That is, we remove ETags and If-Modified-Since headers.
+ Modifies this request to remove headers that will compress the
+ resource's data.
"""
self.headers["accept-encoding"] = ["identity"]
+ def constrain_encoding(self):
+ """
+ Limits the permissible Accept-Encoding values, based on what we can
+ decode appropriately.
+ """
+ if self.headers["accept-encoding"]:
+ self.headers["accept-encoding"] = [', '.join([
+ e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
+ ])]
+
def set_replay(self):
self.client_conn = None
@@ -381,7 +391,6 @@ class Response(controller.Msg):
modifications to make sure interception works properly.
"""
headers = self.headers.copy()
- utils.try_del(headers, 'accept-encoding')
utils.try_del(headers, 'proxy-connection')
utils.try_del(headers, 'connection')
utils.try_del(headers, 'keep-alive')
diff --git a/test/test_encoding.py b/test/test_encoding.py
new file mode 100644
index 00000000..ba0755d6
--- /dev/null
+++ b/test/test_encoding.py
@@ -0,0 +1,31 @@
+from libmproxy import encoding
+import libpry
+
+import cStringIO
+import gzip, zlib
+
+class udecode_identity(libpry.AutoTree):
+ def test_decode(self):
+ assert 'string' == encoding.decode('identity', 'string')
+
+ def test_fallthrough(self):
+ assert 'string' == encoding.decode('nonexistent encoding', 'string')
+
+class udecode_gzip(libpry.AutoTree):
+ def test_simple(self):
+ s = cStringIO.StringIO()
+ gf = gzip.GzipFile(fileobj=s, mode='wb')
+ gf.write('string')
+ gf.close()
+ assert 'string' == encoding.decode('gzip', s.getvalue())
+
+class udecode_deflate(libpry.AutoTree):
+ def test_simple(self):
+ assert 'string' == encoding.decode('deflate', zlib.compress('string'))
+ assert 'string' == encoding.decode('deflate', zlib.compress('string')[2:-4])
+
+tests = [
+ udecode_identity(),
+ udecode_gzip(),
+ udecode_deflate()
+]
diff --git a/test/test_utils.py b/test/test_utils.py
index 2b0f4342..2ff951d4 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -98,7 +98,7 @@ class uHeaders(libpry.AutoTree):
out = repr(self.hd)
for i in expected:
assert out.find(i) >= 0
-
+
def test_dictToHeader2(self):
self.hd["one"] = ["uno"]
expected1 = "one: uno\r\n"