From 244ef243d75145a01d9029589de65be51299b3f3 Mon Sep 17 00:00:00 2001 From: Krzysztof Bielicki Date: Tue, 10 Mar 2015 10:44:06 +0100 Subject: [#514] Add support for ignoring payload params in multipart/form-data --- libmproxy/console/contentview.py | 24 ++---------------------- libmproxy/flow.py | 2 +- libmproxy/protocol/http.py | 21 ++++++++++++++++++++- libmproxy/utils.py | 27 +++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 24 deletions(-) diff --git a/libmproxy/console/contentview.py b/libmproxy/console/contentview.py index 582723bb..84e9946d 100644 --- a/libmproxy/console/contentview.py +++ b/libmproxy/console/contentview.py @@ -210,33 +210,13 @@ class ViewMultipart: prompt = ("multipart", "m") content_types = ["multipart/form-data"] def __call__(self, hdrs, content, limit): - v = hdrs.get_first("content-type") + v = utils.multipartdecode(hdrs, content) if v: - v = utils.parse_content_type(v) - if not v: - return - boundary = v[2].get("boundary") - if not boundary: - return - - rx = re.compile(r'\bname="([^"]+)"') - keys = [] - vals = [] - - for i in content.split("--" + boundary): - parts = i.splitlines() - if len(parts) > 1 and parts[0][0:2] != "--": - match = rx.search(parts[1]) - if match: - keys.append(match.group(1) + ":") - vals.append(netlib.utils.cleanBin( - "\n".join(parts[3+parts[2:].index(""):]) - )) r = [ urwid.Text(("highlight", "Form data:\n")), ] r.extend(common.format_keyvals( - zip(keys, vals), + v, key = "header", val = "text" )) diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 43580109..0e9e481c 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -236,7 +236,7 @@ class ServerPlaybackState: ] if not self.ignore_content: - form_contents = r.get_form_urlencoded() + form_contents = r.get_form() if self.ignore_payload_params and form_contents: key.extend( p for p in form_contents diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 49310ec3..512cf75b 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -15,6 +15,7 @@ from ..proxy.connection import ServerConnection from .. import encoding, utils, controller, stateobject, proxy HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" +HDR_FORM_MULTIPART = "multipart/form-data" CONTENT_MISSING = 0 @@ -507,6 +508,19 @@ class HTTPRequest(HTTPMessage): """ self.headers["Host"] = [self.host] + def get_form(self): + """ + Retrieves the URL-encoded or multipart form data, returning an ODict object. + Returns an empty ODict if there is no data or the content-type + indicates non-form data. + """ + if self.content: + if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): + return self.get_form_urlencoded() + elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + return self.get_form_multipart() + return ODict([]) + def get_form_urlencoded(self): """ Retrieves the URL-encoded form data, returning an ODict object. @@ -514,7 +528,12 @@ class HTTPRequest(HTTPMessage): indicates non-form data. """ if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): - return ODict(utils.urldecode(self.content)) + return ODict(utils.urldecode(self.content)) + return ODict([]) + + def get_form_multipart(self): + if self.content and self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + return ODict(utils.multipartdecode(self.headers, self.content)) return ODict([]) def set_form_urlencoded(self, odict): diff --git a/libmproxy/utils.py b/libmproxy/utils.py index 51f2dc26..b84c589a 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -69,6 +69,33 @@ def urlencode(s): return urllib.urlencode(s, False) +def multipartdecode(hdrs, content): + """ + Takes a multipart boundary encoded string and returns list of (key, value) tuples. + """ + v = hdrs.get_first("content-type") + if v: + v = parse_content_type(v) + if not v: + return [] + boundary = v[2].get("boundary") + if not boundary: + return [] + + rx = re.compile(r'\bname="([^"]+)"') + r = [] + + for i in content.split("--" + boundary): + parts = i.splitlines() + if len(parts) > 1 and parts[0][0:2] != "--": + match = rx.search(parts[1]) + if match: + key = match.group(1) + value = "".join(parts[3+parts[2:].index(""):]) + r.append((key, value)) + return r + return [] + def pretty_size(size): suffixes = [ ("B", 2**10), -- cgit v1.2.3 From 953f9aa64166451a07502f05c15db47c053e6081 Mon Sep 17 00:00:00 2001 From: Krzysztof Bielicki Date: Mon, 16 Mar 2015 10:23:50 +0100 Subject: Added tests --- test/test_protocol_http.py | 21 +++++++++++++++++++++ test/test_utils.py | 19 ++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/test/test_protocol_http.py b/test/test_protocol_http.py index 16870777..23c3f469 100644 --- a/test/test_protocol_http.py +++ b/test/test_protocol_http.py @@ -1,3 +1,4 @@ +from mock import MagicMock from libmproxy.protocol.http import * from cStringIO import StringIO import tutils, tservers @@ -112,6 +113,26 @@ class TestHTTPRequest: r = tutils.treq() assert repr(r) + def test_get_form_for_urlencoded(self): + r = tutils.treq() + r.headers.add("content-type", "application/x-www-form-urlencoded") + r.get_form_urlencoded = MagicMock() + + r.get_form() + + assert r.get_form_urlencoded.called + + def test_get_form_for_multipart(self): + r = tutils.treq() + r.headers.add("content-type", "multipart/form-data") + r.get_form_multipart = MagicMock() + + r.get_form() + + assert r.get_form_multipart.called + + + class TestHTTPResponse: def test_read_from_stringio(self): diff --git a/test/test_utils.py b/test/test_utils.py index 78d1c072..a7902910 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,5 +1,5 @@ import json -from libmproxy import utils +from libmproxy import utils, flow import tutils utils.CERT_SLEEP_TIME = 0 @@ -52,6 +52,23 @@ def test_urldecode(): s = "one=two&three=four" assert len(utils.urldecode(s)) == 2 +def test_multipartdecode(): + boundary = 'somefancyboundary' + headers = flow.ODict([('content-type', ('multipart/form-data; boundary=%s' % boundary))]) + content = "--{0}\n" \ + "Content-Disposition: form-data; name=\"field1\"\n\n" \ + "value1\n" \ + "--{0}\n" \ + "Content-Disposition: form-data; name=\"field2\"\n\n" \ + "value2\n" \ + "--{0}--".format(boundary) + + form = utils.multipartdecode(headers, content) + + assert len(form) == 2 + assert form[0] == ('field1', 'value1') + assert form[1] == ('field2', 'value2') + def test_pretty_duration(): assert utils.pretty_duration(0.00001) == "0ms" assert utils.pretty_duration(0.0001) == "0ms" -- cgit v1.2.3