From ee2950cd1979e7f5fdc6d5df43d83d0a6a0ce5dc Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Fri, 25 May 2012 18:10:31 -0700 Subject: Fix a crashing bug when replacing text in a flow with unicode bodies. --- libmproxy/flow.py | 12 ++++++------ libmproxy/utils.py | 14 +++++++++++++- test/test_flow.py | 5 +++++ test/test_utils.py | 5 +++++ 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 2258868b..6b3d868e 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -249,9 +249,9 @@ class ODict: """ nlst, count = [], 0 for i in self.lst: - k, c = re.subn(pattern, repl, i[0], *args, **kwargs) + k, c = utils.safe_subn(pattern, repl, i[0], *args, **kwargs) count += c - v, c = re.subn(pattern, repl, i[1], *args, **kwargs) + v, c = utils.safe_subn(pattern, repl, i[1], *args, **kwargs) count += c nlst.append([k, v]) self.lst = nlst @@ -560,8 +560,8 @@ class Request(HTTPMsg): Returns the number of replacements made. """ with decoded(self): - self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs) - self.path, pc = re.subn(pattern, repl, self.path, *args, **kwargs) + self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs) + self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs) c += pc c += self.headers.replace(pattern, repl, *args, **kwargs) return c @@ -740,7 +740,7 @@ class Response(HTTPMsg): Returns the number of replacements made. """ with decoded(self): - self.content, c = re.subn(pattern, repl, self.content, *args, **kwargs) + self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs) c += self.headers.replace(pattern, repl, *args, **kwargs) return c @@ -869,7 +869,7 @@ class Error(controller.Msg): FIXME: Is replace useful on an Error object?? """ - self.msg, c = re.subn(pattern, repl, self.msg, *args, **kwargs) + self.msg, c = utils.safe_subn(pattern, repl, self.msg, *args, **kwargs) return c diff --git a/libmproxy/utils.py b/libmproxy/utils.py index e9c90320..337d4378 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -12,7 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import os, datetime, urlparse, string, urllib +import os, datetime, urlparse, string, urllib, re import time, functools, cgi import json @@ -309,3 +309,15 @@ def parse_size(s): return int(s) * mult except ValueError: raise ValueError("Invalid size specification: %s"%s) + + +def safe_subn(pattern, repl, target, *args, **kwargs): + """ + There are Unicode conversion problems with re.subn. We try to smooth + that over by casting the pattern and replacement to strings. We really + need a better solution that is aware of the actual content ecoding. + """ + return re.subn(str(pattern), str(repl), target, *args, **kwargs) + + + diff --git a/test/test_flow.py b/test/test_flow.py index 627d9cd0..0b7fee4f 100644 --- a/test/test_flow.py +++ b/test/test_flow.py @@ -259,6 +259,11 @@ class uFlow(libpry.AutoTree): f = flow.Flow(None) f.request = tutils.treq() + def test_replace_unicode(self): + f = tutils.tflow_full() + f.response.content = "\xc2foo" + f.replace("foo", u"bar") + def test_replace(self): f = tutils.tflow_full() f.request.headers["foo"] = ["foo"] diff --git a/test/test_utils.py b/test/test_utils.py index f279ce65..8bff0303 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -175,8 +175,13 @@ class u_cleanBin(libpry.AutoTree): assert utils.cleanBin("\nne", True) == ".ne" +class u_safe_subn(libpry.AutoTree): + def test_simple(self): + assert utils.safe_subn("foo", u"bar", "\xc2foo") + tests = [ + u_safe_subn(), u_cleanBin(), u_parse_content_type(), uformat_timestamp(), -- cgit v1.2.3