diff options
| author | Aldo Cortesi <aldo@nullcube.com> | 2016-10-20 10:11:58 +1300 |
|---|---|---|
| committer | Aldo Cortesi <aldo@nullcube.com> | 2016-10-20 10:11:58 +1300 |
| commit | f45f4e677e8cddba8160d1e4e02ca8a4515e3456 (patch) | |
| tree | a48ce5978fa24b2e92d770b1263fd3952055d9d7 /mitmproxy | |
| parent | 1407830280383e50a8af848a0c564c4912df5a52 (diff) | |
| download | mitmproxy-f45f4e677e8cddba8160d1e4e02ca8a4515e3456.tar.gz mitmproxy-f45f4e677e8cddba8160d1e4e02ca8a4515e3456.tar.bz2 mitmproxy-f45f4e677e8cddba8160d1e4e02ca8a4515e3456.zip | |
netlib.strutils -> mitmproxy.utils.strutils
Diffstat (limited to 'mitmproxy')
| -rw-r--r-- | mitmproxy/addons/dumper.py | 2 | ||||
| -rw-r--r-- | mitmproxy/addons/serverplayback.py | 2 | ||||
| -rw-r--r-- | mitmproxy/contentviews.py | 2 | ||||
| -rw-r--r-- | mitmproxy/flowfilter.py | 2 | ||||
| -rw-r--r-- | mitmproxy/io_compat.py | 2 | ||||
| -rw-r--r-- | mitmproxy/proxy/config.py | 2 | ||||
| -rw-r--r-- | mitmproxy/proxy/protocol/websockets.py | 2 | ||||
| -rw-r--r-- | mitmproxy/tools/console/grideditor/col_bytes.py | 2 | ||||
| -rw-r--r-- | mitmproxy/tools/console/master.py | 4 | ||||
| -rw-r--r-- | mitmproxy/utils/strutils.py | 142 |
10 files changed, 153 insertions, 9 deletions
diff --git a/mitmproxy/addons/dumper.py b/mitmproxy/addons/dumper.py index 013fa337..d690c000 100644 --- a/mitmproxy/addons/dumper.py +++ b/mitmproxy/addons/dumper.py @@ -9,7 +9,7 @@ from mitmproxy import ctx from mitmproxy import exceptions from mitmproxy import flowfilter from mitmproxy.utils import human -from netlib import strutils +from mitmproxy.utils import strutils def indent(n, text): diff --git a/mitmproxy/addons/serverplayback.py b/mitmproxy/addons/serverplayback.py index 1161ce23..0b52918c 100644 --- a/mitmproxy/addons/serverplayback.py +++ b/mitmproxy/addons/serverplayback.py @@ -1,7 +1,7 @@ import urllib import hashlib -from netlib import strutils +from mitmproxy.utils import strutils from mitmproxy import exceptions from mitmproxy import ctx from mitmproxy import io diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py index 9a70b104..07bf09f5 100644 --- a/mitmproxy/contentviews.py +++ b/mitmproxy/contentviews.py @@ -35,7 +35,7 @@ from mitmproxy import exceptions from mitmproxy.contrib.wbxml import ASCommandResponse from netlib import http from netlib import multidict -from netlib import strutils +from mitmproxy.utils import strutils from netlib.http import url try: diff --git a/mitmproxy/flowfilter.py b/mitmproxy/flowfilter.py index f1454fd1..ee5224c6 100644 --- a/mitmproxy/flowfilter.py +++ b/mitmproxy/flowfilter.py @@ -40,7 +40,7 @@ from mitmproxy import http from mitmproxy import tcp from mitmproxy import flow -from netlib import strutils +from mitmproxy.utils import strutils import pyparsing as pp from typing import Callable diff --git a/mitmproxy/io_compat.py b/mitmproxy/io_compat.py index 7f8f41b3..68c747ea 100644 --- a/mitmproxy/io_compat.py +++ b/mitmproxy/io_compat.py @@ -5,7 +5,7 @@ This module handles the import of mitmproxy flows generated by old versions. from typing import Any from mitmproxy import version -from netlib import strutils +from mitmproxy.utils import strutils def convert_011_012(data): diff --git a/mitmproxy/proxy/config.py b/mitmproxy/proxy/config.py index 201dfdf7..a6fc739b 100644 --- a/mitmproxy/proxy/config.py +++ b/mitmproxy/proxy/config.py @@ -4,7 +4,7 @@ import os import re from typing import Any -from netlib import strutils +from mitmproxy.utils import strutils from OpenSSL import SSL, crypto diff --git a/mitmproxy/proxy/protocol/websockets.py b/mitmproxy/proxy/protocol/websockets.py index 816ec92d..636748a1 100644 --- a/mitmproxy/proxy/protocol/websockets.py +++ b/mitmproxy/proxy/protocol/websockets.py @@ -4,7 +4,7 @@ import struct from OpenSSL import SSL from mitmproxy import exceptions from mitmproxy.proxy.protocol import base -from netlib import strutils +from mitmproxy.utils import strutils from netlib import tcp from netlib import websockets diff --git a/mitmproxy/tools/console/grideditor/col_bytes.py b/mitmproxy/tools/console/grideditor/col_bytes.py index c951ce44..f580e947 100644 --- a/mitmproxy/tools/console/grideditor/col_bytes.py +++ b/mitmproxy/tools/console/grideditor/col_bytes.py @@ -4,7 +4,7 @@ from typing import Callable, Optional import urwid from mitmproxy.tools.console import signals from mitmproxy.tools.console.grideditor import base -from netlib import strutils +from mitmproxy.utils import strutils def read_file(filename: str, callback: Callable[..., None], escaped: bool) -> Optional[str]: diff --git a/mitmproxy/tools/console/master.py b/mitmproxy/tools/console/master.py index 3cc721b2..7ff0026e 100644 --- a/mitmproxy/tools/console/master.py +++ b/mitmproxy/tools/console/master.py @@ -35,7 +35,9 @@ from mitmproxy.tools.console import signals from mitmproxy.tools.console import statusbar from mitmproxy.tools.console import window from mitmproxy.flowfilter import FMarked -from netlib import tcp, strutils +from mitmproxy.utils import strutils + +from netlib import tcp EVENTLOG_SIZE = 500 diff --git a/mitmproxy/utils/strutils.py b/mitmproxy/utils/strutils.py new file mode 100644 index 00000000..57cfbc79 --- /dev/null +++ b/mitmproxy/utils/strutils.py @@ -0,0 +1,142 @@ +import re +import codecs + + +def always_bytes(unicode_or_bytes, *encode_args): + if isinstance(unicode_or_bytes, str): + return unicode_or_bytes.encode(*encode_args) + elif isinstance(unicode_or_bytes, bytes) or unicode_or_bytes is None: + return unicode_or_bytes + else: + raise TypeError("Expected str or bytes, but got {}.".format(type(unicode_or_bytes).__name__)) + + +def native(s, *encoding_opts): + """ + Convert :py:class:`bytes` or :py:class:`unicode` to the native + :py:class:`str` type, using latin1 encoding if conversion is necessary. + + https://www.python.org/dev/peps/pep-3333/#a-note-on-string-types + """ + if not isinstance(s, (bytes, str)): + raise TypeError("%r is neither bytes nor unicode" % s) + if isinstance(s, bytes): + return s.decode(*encoding_opts) + return s + + +# Translate control characters to "safe" characters. This implementation initially +# replaced them with the matching control pictures (http://unicode.org/charts/PDF/U2400.pdf), +# but that turned out to render badly with monospace fonts. We are back to "." therefore. +_control_char_trans = { + x: ord(".") # x + 0x2400 for unicode control group pictures + for x in range(32) +} +_control_char_trans[127] = ord(".") # 0x2421 +_control_char_trans_newline = _control_char_trans.copy() +for x in ("\r", "\n", "\t"): + del _control_char_trans_newline[ord(x)] + + +_control_char_trans = str.maketrans(_control_char_trans) +_control_char_trans_newline = str.maketrans(_control_char_trans_newline) + + +def escape_control_characters(text: str, keep_spacing=True) -> str: + """ + Replace all unicode C1 control characters from the given text with a single "." + + Args: + keep_spacing: If True, tabs and newlines will not be replaced. + """ + if not isinstance(text, str): + raise ValueError("text type must be unicode but is {}".format(type(text).__name__)) + + trans = _control_char_trans_newline if keep_spacing else _control_char_trans + return text.translate(trans) + + +def bytes_to_escaped_str(data, keep_spacing=False, escape_single_quotes=False): + """ + Take bytes and return a safe string that can be displayed to the user. + + Single quotes are always escaped, double quotes are never escaped: + "'" + bytes_to_escaped_str(...) + "'" + gives a valid Python string. + + Args: + keep_spacing: If True, tabs and newlines will not be escaped. + """ + + if not isinstance(data, bytes): + raise ValueError("data must be bytes, but is {}".format(data.__class__.__name__)) + # We always insert a double-quote here so that we get a single-quoted string back + # https://stackoverflow.com/questions/29019340/why-does-python-use-different-quotes-for-representing-strings-depending-on-their + ret = repr(b'"' + data).lstrip("b")[2:-1] + if not escape_single_quotes: + ret = re.sub(r"(?<!\\)(\\\\)*\\'", lambda m: (m.group(1) or "") + "'", ret) + if keep_spacing: + ret = re.sub( + r"(?<!\\)(\\\\)*\\([nrt])", + lambda m: (m.group(1) or "") + dict(n="\n", r="\r", t="\t")[m.group(2)], + ret + ) + return ret + + +def escaped_str_to_bytes(data): + """ + Take an escaped string and return the unescaped bytes equivalent. + + Raises: + ValueError, if the escape sequence is invalid. + """ + if not isinstance(data, str): + raise ValueError("data must be str, but is {}".format(data.__class__.__name__)) + + # This one is difficult - we use an undocumented Python API here + # as per http://stackoverflow.com/a/23151714/934719 + return codecs.escape_decode(data)[0] + + +def is_mostly_bin(s: bytes) -> bool: + if not s or len(s) == 0: + return False + + return sum( + i < 9 or 13 < i < 32 or 126 < i + for i in s[:100] + ) / len(s[:100]) > 0.3 + + +def is_xml(s: bytes) -> bool: + return s.strip().startswith(b"<") + + +def clean_hanging_newline(t): + """ + Many editors will silently add a newline to the final line of a + document (I'm looking at you, Vim). This function fixes this common + problem at the risk of removing a hanging newline in the rare cases + where the user actually intends it. + """ + if t and t[-1] == "\n": + return t[:-1] + return t + + +def hexdump(s): + """ + Returns: + A generator of (offset, hex, str) tuples + """ + for i in range(0, len(s), 16): + offset = "{:0=10x}".format(i) + part = s[i:i + 16] + x = " ".join("{:0=2x}".format(i) for i in part) + x = x.ljust(47) # 16*2 + 15 + part_repr = native(escape_control_characters( + part.decode("ascii", "replace").replace(u"\ufffd", u"."), + False + )) + yield (offset, x, part_repr) |
