aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2015-09-12 17:03:09 +0200
committerMaximilian Hils <git@maximilianhils.com>2015-09-12 17:03:09 +0200
commit997fcde8ce94be9d8decddd4bc783106dbb41ab3 (patch)
treec668d1e49dcc9298bed2a85de136e7f053d72b17
parenta38142d5950a899c6e3f854841a45f4785515761 (diff)
downloadmitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.tar.gz
mitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.tar.bz2
mitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.zip
make clean_bin unicode-aware
-rw-r--r--netlib/utils.py39
-rw-r--r--netlib/websockets/frame.py2
-rw-r--r--setup.py1
-rw-r--r--test/test_utils.py15
4 files changed, 38 insertions, 19 deletions
diff --git a/netlib/utils.py b/netlib/utils.py
index aae187da..d6774419 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -5,6 +5,8 @@ import urllib
import urlparse
import string
import re
+import six
+import unicodedata
def isascii(s):
@@ -20,22 +22,31 @@ def bytes_to_int(i):
return int(i.encode('hex'), 16)
-def cleanBin(s, fixspacing=False):
+def clean_bin(s, keep_spacing=True):
"""
- Cleans binary data to make it safe to display. If fixspacing is True,
- tabs, newlines and so forth will be maintained, if not, they will be
- replaced with a placeholder.
+ Cleans binary data to make it safe to display.
+
+ Args:
+ keep_spacing: If False, tabs and newlines will also be replaced.
"""
- parts = []
- for i in s:
- o = ord(i)
- if (o > 31 and o < 127):
- parts.append(i)
- elif i in "\n\t" and not fixspacing:
- parts.append(i)
+ if isinstance(s, six.text_type):
+ if keep_spacing:
+ keep = u" \n\r\t"
+ else:
+ keep = u" "
+ return u"".join(
+ ch if (unicodedata.category(ch)[0] not in "CZ" or ch in keep) else u"."
+ for ch in s
+ )
+ else:
+ if keep_spacing:
+ keep = b"\n\r\t"
else:
- parts.append(".")
- return "".join(parts)
+ keep = b""
+ return b"".join(
+ ch if (31 < ord(ch) < 127 or ch in keep) else b"."
+ for ch in s
+ )
def hexdump(s):
@@ -52,7 +63,7 @@ def hexdump(s):
x += " "
x += " ".join(" " for i in range(16 - len(part)))
parts.append(
- (o, x, cleanBin(part, True))
+ (o, x, clean_bin(part, False))
)
return parts
diff --git a/netlib/websockets/frame.py b/netlib/websockets/frame.py
index 1c4a03b2..e3ff1405 100644
--- a/netlib/websockets/frame.py
+++ b/netlib/websockets/frame.py
@@ -236,7 +236,7 @@ class Frame(object):
def human_readable(self):
ret = self.header.human_readable()
if self.payload:
- ret = ret + "\nPayload:\n" + utils.cleanBin(self.payload)
+ ret = ret + "\nPayload:\n" + utils.clean_bin(self.payload)
return ret
def __repr__(self):
diff --git a/setup.py b/setup.py
index a4da6e69..c24d37c0 100644
--- a/setup.py
+++ b/setup.py
@@ -46,6 +46,7 @@ setup(
"cryptography>=1.0",
"passlib>=1.6.2",
"hpack>=1.0.1",
+ "six>=1.9.0",
"certifi"
],
extras_require={
diff --git a/test/test_utils.py b/test/test_utils.py
index 374d09ba..9dba5d35 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -15,10 +15,17 @@ def test_hexdump():
def test_cleanBin():
- assert utils.cleanBin("one") == "one"
- assert utils.cleanBin("\00ne") == ".ne"
- assert utils.cleanBin("\nne") == "\nne"
- assert utils.cleanBin("\nne", True) == ".ne"
+ assert utils.clean_bin(b"one") == b"one"
+ assert utils.clean_bin(b"\00ne") == b".ne"
+ assert utils.clean_bin(b"\nne") == b"\nne"
+ assert utils.clean_bin(b"\nne", False) == b".ne"
+ assert utils.clean_bin(u"\u2605".encode("utf8")) == b"..."
+
+ assert utils.clean_bin(u"one") == u"one"
+ assert utils.clean_bin(u"\00ne") == u".ne"
+ assert utils.clean_bin(u"\nne") == u"\nne"
+ assert utils.clean_bin(u"\nne", False) == u".ne"
+ assert utils.clean_bin(u"\u2605") == u"\u2605"
def test_pretty_size():