aboutsummaryrefslogtreecommitdiffstats
path: root/mitmproxy/utils
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2017-01-06 00:58:21 +0100
committerMaximilian Hils <git@maximilianhils.com>2017-01-07 23:08:50 +0100
commitc21ee90debe13154f4d34aed1f088796a2d0c02c (patch)
tree07e606a175a0e66d508356b40526d33b89b385d8 /mitmproxy/utils
parent042261266f5b901b2b0745fd108c9a92525e9087 (diff)
downloadmitmproxy-c21ee90debe13154f4d34aed1f088796a2d0c02c.tar.gz
mitmproxy-c21ee90debe13154f4d34aed1f088796a2d0c02c.tar.bz2
mitmproxy-c21ee90debe13154f4d34aed1f088796a2d0c02c.zip
add strutils.replace_surrogates
Diffstat (limited to 'mitmproxy/utils')
-rw-r--r--mitmproxy/utils/strutils.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/mitmproxy/utils/strutils.py b/mitmproxy/utils/strutils.py
index 29465615..9c5e6bc8 100644
--- a/mitmproxy/utils/strutils.py
+++ b/mitmproxy/utils/strutils.py
@@ -25,6 +25,17 @@ def always_str(str_or_bytes: Optional[AnyStr], *decode_args) -> Optional[str]:
raise TypeError("Expected str or bytes, but got {}.".format(type(str_or_bytes).__name__))
+def replace_surrogates(text: str, errors='replace') -> str:
+ """Convert surrogates to replacement characters (e.g., "\udc80" becomes "�")
+ by applying a different error handler.
+
+ Uses the "replace" error handler by default, but any input
+ error handler may be specified.
+
+ For an introduction to surrogateescape, see https://www.python.org/dev/peps/pep-0383/.
+ """
+ return text.encode('utf-8', 'surrogateescape').decode('utf-8', errors)
+
# Translate control characters to "safe" characters. This implementation initially
# replaced them with the matching control pictures (http://unicode.org/charts/PDF/U2400.pdf),
# but that turned out to render badly with monospace fonts. We are back to "." therefore.