aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2012-05-25 18:10:31 -0700
committerAldo Cortesi <aldo@nullcube.com>2012-05-25 18:10:31 -0700
commitee2950cd1979e7f5fdc6d5df43d83d0a6a0ce5dc (patch)
treef23ec4f984f9b15a8ce656113bf17c4cfa0f8d7b
parenta0c63b6108e76d24222b51def69c74fb88d72b0c (diff)
downloadmitmproxy-ee2950cd1979e7f5fdc6d5df43d83d0a6a0ce5dc.tar.gz
mitmproxy-ee2950cd1979e7f5fdc6d5df43d83d0a6a0ce5dc.tar.bz2
mitmproxy-ee2950cd1979e7f5fdc6d5df43d83d0a6a0ce5dc.zip
Fix a crashing bug when replacing text in a flow with unicode bodies.
-rw-r--r--libmproxy/flow.py12
-rw-r--r--libmproxy/utils.py14
-rw-r--r--test/test_flow.py5
-rw-r--r--test/test_utils.py5
4 files changed, 29 insertions, 7 deletions
diff --git a/libmproxy/flow.py b/libmproxy/flow.py
index 2258868b..6b3d868e 100644
--- a/libmproxy/flow.py
+++ b/libmproxy/flow.py
@@ -249,9 +249,9 @@ class ODict:
"""
nlst, count = [], 0
for i in self.lst:
- k, c = re.subn(pattern, repl, i[0], *args, **kwargs)
+ k, c = utils.safe_subn(pattern, repl, i[0], *args, **kwargs)
count += c
- v, c = re.subn(pattern, repl, i[1], *args, **kwargs)
+ v, c = utils.safe_subn(pattern, repl, i[1], *args, **kwargs)
count += c
nlst.append([k, v])
self.lst = nlst
@@ -560,8 +560,8 @@ class Request(HTTPMsg):
Returns the number of replacements made.
"""
with decoded(self):
- self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs)
- self.path, pc = re.subn(pattern, repl, self.path, *args, **kwargs)
+ self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
+ self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
c += pc
c += self.headers.replace(pattern, repl, *args, **kwargs)
return c
@@ -740,7 +740,7 @@ class Response(HTTPMsg):
Returns the number of replacements made.
"""
with decoded(self):
- self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs)
+ self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
c += self.headers.replace(pattern, repl, *args, **kwargs)
return c
@@ -869,7 +869,7 @@ class Error(controller.Msg):
FIXME: Is replace useful on an Error object??
"""
- self.msg, c = re.subn(pattern, repl, self.msg, *args, **kwargs)
+ self.msg, c = utils.safe_subn(pattern, repl, self.msg, *args, **kwargs)
return c
diff --git a/libmproxy/utils.py b/libmproxy/utils.py
index e9c90320..337d4378 100644
--- a/libmproxy/utils.py
+++ b/libmproxy/utils.py
@@ -12,7 +12,7 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import os, datetime, urlparse, string, urllib
+import os, datetime, urlparse, string, urllib, re
import time, functools, cgi
import json
@@ -309,3 +309,15 @@ def parse_size(s):
return int(s) * mult
except ValueError:
raise ValueError("Invalid size specification: %s"%s)
+
+
+def safe_subn(pattern, repl, target, *args, **kwargs):
+ """
+ There are Unicode conversion problems with re.subn. We try to smooth
+ that over by casting the pattern and replacement to strings. We really
+ need a better solution that is aware of the actual content ecoding.
+ """
+ return re.subn(str(pattern), str(repl), target, *args, **kwargs)
+
+
+
diff --git a/test/test_flow.py b/test/test_flow.py
index 627d9cd0..0b7fee4f 100644
--- a/test/test_flow.py
+++ b/test/test_flow.py
@@ -259,6 +259,11 @@ class uFlow(libpry.AutoTree):
f = flow.Flow(None)
f.request = tutils.treq()
+ def test_replace_unicode(self):
+ f = tutils.tflow_full()
+ f.response.content = "\xc2foo"
+ f.replace("foo", u"bar")
+
def test_replace(self):
f = tutils.tflow_full()
f.request.headers["foo"] = ["foo"]
diff --git a/test/test_utils.py b/test/test_utils.py
index f279ce65..8bff0303 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -175,8 +175,13 @@ class u_cleanBin(libpry.AutoTree):
assert utils.cleanBin("\nne", True) == ".ne"
+class u_safe_subn(libpry.AutoTree):
+ def test_simple(self):
+ assert utils.safe_subn("foo", u"bar", "\xc2foo")
+
tests = [
+ u_safe_subn(),
u_cleanBin(),
u_parse_content_type(),
uformat_timestamp(),