From cccdc9842648518de7ee48ce461801954fc334c8 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Thu, 2 Jun 2016 12:31:41 +1200 Subject: Utils reorganisation: add netlib.strutils Extract a number of string and format-related functions to netlib.strutils. --- netlib/utils.py | 102 ++------------------------------------------------------ 1 file changed, 2 insertions(+), 100 deletions(-) (limited to 'netlib/utils.py') diff --git a/netlib/utils.py b/netlib/utils.py index b8408d1d..00e7e5d9 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -1,63 +1,12 @@ from __future__ import absolute_import, print_function, division import os.path import re -import codecs -import unicodedata import importlib import inspect import six - -def always_bytes(unicode_or_bytes, *encode_args): - if isinstance(unicode_or_bytes, six.text_type): - return unicode_or_bytes.encode(*encode_args) - return unicode_or_bytes - - -def native(s, *encoding_opts): - """ - Convert :py:class:`bytes` or :py:class:`unicode` to the native - :py:class:`str` type, using latin1 encoding if conversion is necessary. - - https://www.python.org/dev/peps/pep-3333/#a-note-on-string-types - """ - if not isinstance(s, (six.binary_type, six.text_type)): - raise TypeError("%r is neither bytes nor unicode" % s) - if six.PY3: - if isinstance(s, six.binary_type): - return s.decode(*encoding_opts) - else: - if isinstance(s, six.text_type): - return s.encode(*encoding_opts) - return s - - -def clean_bin(s, keep_spacing=True): - """ - Cleans binary data to make it safe to display. - - Args: - keep_spacing: If False, tabs and newlines will also be replaced. - """ - if isinstance(s, six.text_type): - if keep_spacing: - keep = u" \n\r\t" - else: - keep = u" " - return u"".join( - ch if (unicodedata.category(ch)[0] not in "CZ" or ch in keep) else u"." - for ch in s - ) - else: - if keep_spacing: - keep = (9, 10, 13) # \t, \n, \r, - else: - keep = () - return b"".join( - six.int2byte(ch) if (31 < ch < 127 or ch in keep) else b"." - for ch in six.iterbytes(s) - ) +from netlib import strutils def hexdump(s): @@ -70,7 +19,7 @@ def hexdump(s): part = s[i:i + 16] x = b" ".join("{:0=2x}".format(i).encode() for i in six.iterbytes(part)) x = x.ljust(47) # 16*2 + 15 - yield (offset, x, clean_bin(part, False)) + yield (offset, x, strutils.clean_bin(part, False)) def setbit(byte, offset, value): @@ -173,50 +122,3 @@ def hostport(scheme, host, port): return b"%s:%d" % (host, port) else: return "%s:%d" % (host, port) - - -def safe_subn(pattern, repl, target, *args, **kwargs): - """ - There are Unicode conversion problems with re.subn. We try to smooth - that over by casting the pattern and replacement to strings. We really - need a better solution that is aware of the actual content ecoding. - """ - return re.subn(str(pattern), str(repl), target, *args, **kwargs) - - -def bytes_to_escaped_str(data): - """ - Take bytes and return a safe string that can be displayed to the user. - - Single quotes are always escaped, double quotes are never escaped: - "'" + bytes_to_escaped_str(...) + "'" - gives a valid Python string. - """ - # TODO: We may want to support multi-byte characters without escaping them. - # One way to do would be calling .decode("utf8", "backslashreplace") first - # and then escaping UTF8 control chars (see clean_bin). - - if not isinstance(data, bytes): - raise ValueError("data must be bytes, but is {}".format(data.__class__.__name__)) - # We always insert a double-quote here so that we get a single-quoted string back - # https://stackoverflow.com/questions/29019340/why-does-python-use-different-quotes-for-representing-strings-depending-on-their - return repr(b'"' + data).lstrip("b")[2:-1] - - -def escaped_str_to_bytes(data): - """ - Take an escaped string and return the unescaped bytes equivalent. - """ - if not isinstance(data, six.string_types): - if six.PY2: - raise ValueError("data must be str or unicode, but is {}".format(data.__class__.__name__)) - raise ValueError("data must be str, but is {}".format(data.__class__.__name__)) - - if six.PY2: - if isinstance(data, unicode): - data = data.encode("utf8") - return data.decode("string-escape") - - # This one is difficult - we use an undocumented Python API here - # as per http://stackoverflow.com/a/23151714/934719 - return codecs.escape_decode(data)[0] -- cgit v1.2.3 From 09da1febbd9beac5ef5650274899439f5ce10e98 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Thu, 2 Jun 2016 13:03:37 +1200 Subject: Shift a bunch more string-related functions to strutils --- netlib/utils.py | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'netlib/utils.py') diff --git a/netlib/utils.py b/netlib/utils.py index 00e7e5d9..b4b99679 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -6,21 +6,6 @@ import inspect import six -from netlib import strutils - - -def hexdump(s): - """ - Returns: - A generator of (offset, hex, str) tuples - """ - for i in range(0, len(s), 16): - offset = "{:0=10x}".format(i).encode() - part = s[i:i + 16] - x = b" ".join("{:0=2x}".format(i).encode() for i in six.iterbytes(part)) - x = x.ljust(47) # 16*2 + 15 - yield (offset, x, strutils.clean_bin(part, False)) - def setbit(byte, offset, value): """ -- cgit v1.2.3