diff options
author | Maximilian Hils <git@maximilianhils.com> | 2017-01-06 00:58:21 +0100 |
---|---|---|
committer | Maximilian Hils <git@maximilianhils.com> | 2017-01-07 23:08:50 +0100 |
commit | c21ee90debe13154f4d34aed1f088796a2d0c02c (patch) | |
tree | 07e606a175a0e66d508356b40526d33b89b385d8 /mitmproxy/utils | |
parent | 042261266f5b901b2b0745fd108c9a92525e9087 (diff) | |
download | mitmproxy-c21ee90debe13154f4d34aed1f088796a2d0c02c.tar.gz mitmproxy-c21ee90debe13154f4d34aed1f088796a2d0c02c.tar.bz2 mitmproxy-c21ee90debe13154f4d34aed1f088796a2d0c02c.zip |
add strutils.replace_surrogates
Diffstat (limited to 'mitmproxy/utils')
-rw-r--r-- | mitmproxy/utils/strutils.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/mitmproxy/utils/strutils.py b/mitmproxy/utils/strutils.py index 29465615..9c5e6bc8 100644 --- a/mitmproxy/utils/strutils.py +++ b/mitmproxy/utils/strutils.py @@ -25,6 +25,17 @@ def always_str(str_or_bytes: Optional[AnyStr], *decode_args) -> Optional[str]: raise TypeError("Expected str or bytes, but got {}.".format(type(str_or_bytes).__name__)) +def replace_surrogates(text: str, errors='replace') -> str: + """Convert surrogates to replacement characters (e.g., "\udc80" becomes "�") + by applying a different error handler. + + Uses the "replace" error handler by default, but any input + error handler may be specified. + + For an introduction to surrogateescape, see https://www.python.org/dev/peps/pep-0383/. + """ + return text.encode('utf-8', 'surrogateescape').decode('utf-8', errors) + # Translate control characters to "safe" characters. This implementation initially # replaced them with the matching control pictures (http://unicode.org/charts/PDF/U2400.pdf), # but that turned out to render badly with monospace fonts. We are back to "." therefore. |