From 798759d2b3974eaa7afbaab7c9678e8f66dc1be6 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Thu, 21 Jul 2016 19:49:32 -0700 Subject: fix content view cache invalidation --- netlib/http/message.py | 6 ------ netlib/multidict.py | 6 +++--- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'netlib') diff --git a/netlib/http/message.py b/netlib/http/message.py index 34709f0a..a86e7489 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -32,9 +32,6 @@ class MessageData(basetypes.Serializable): def __ne__(self, other): return not self.__eq__(other) - def __hash__(self): - return hash(frozenset(self.__dict__.items())) - def set_state(self, state): for k, v in state.items(): if k == "headers": @@ -77,9 +74,6 @@ class Message(basetypes.Serializable): def __ne__(self, other): return not self.__eq__(other) - def __hash__(self): - return hash(self.data) ^ 1 - def get_state(self): return self.data.get_state() diff --git a/netlib/multidict.py b/netlib/multidict.py index 51053ff6..e9fec155 100644 --- a/netlib/multidict.py +++ b/netlib/multidict.py @@ -79,9 +79,6 @@ class _MultiDict(MutableMapping, basetypes.Serializable): def __ne__(self, other): return not self.__eq__(other) - def __hash__(self): - return hash(self.fields) - def get_all(self, key): """ Return the list of all values for a given key. @@ -241,6 +238,9 @@ class ImmutableMultiDict(MultiDict): __delitem__ = set_all = insert = _immutable + def __hash__(self): + return hash(self.fields) + def with_delitem(self, key): """ Returns: -- cgit v1.2.3 From 61de6fa1d65d4219c6798ab025e1beeca1247068 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 23 Jul 2016 11:55:27 -0700 Subject: fix test_view_urlencoded --- netlib/http/url.py | 1 + 1 file changed, 1 insertion(+) (limited to 'netlib') diff --git a/netlib/http/url.py b/netlib/http/url.py index 2fc6e7ee..1c8c007a 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -82,6 +82,7 @@ def unparse(scheme, host, port, path=""): def encode(s): + # type: (six.text_type, bytes) -> str """ Takes a list of (key, value) tuples and returns a urlencoded string. """ -- cgit v1.2.3 From fcb906dc97914ad7d852d7e0c04e68121946e350 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 23 Jul 2016 12:01:05 -0700 Subject: improve dumper addon text alignment (#1415) --- netlib/strutils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'netlib') diff --git a/netlib/strutils.py b/netlib/strutils.py index 32e77927..96c8b10f 100644 --- a/netlib/strutils.py +++ b/netlib/strutils.py @@ -51,8 +51,7 @@ else: def escape_control_characters(text, keep_spacing=True): """ - Replace all unicode C1 control characters from the given text with their respective control pictures. - For example, a null byte is replaced with the unicode character "\u2400". + Replace all unicode C1 control characters from the given text with a single "." Args: keep_spacing: If True, tabs and newlines will not be replaced. -- cgit v1.2.3 From e920c101e5b568962b645064dfbb58372931116c Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 23 Jul 2016 18:24:02 -0700 Subject: add single-element cache for netlib.encoding --- netlib/encoding.py | 47 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) (limited to 'netlib') diff --git a/netlib/encoding.py b/netlib/encoding.py index e3cf5f30..29e2a420 100644 --- a/netlib/encoding.py +++ b/netlib/encoding.py @@ -4,6 +4,7 @@ Utility functions for decoding response bodies. from __future__ import absolute_import import codecs +import collections from io import BytesIO import gzip import zlib @@ -11,7 +12,15 @@ import zlib from typing import Union # noqa -def decode(obj, encoding, errors='strict'): +# We have a shared single-element cache for encoding and decoding. +# This is quite useful in practice, e.g. +# flow.request.content = flow.request.content.replace(b"foo", b"bar") +# does not require an .encode() call if content does not contain b"foo" +CachedDecode = collections.namedtuple("CachedDecode", "encoded encoding errors decoded") +_cache = CachedDecode(None, None, None, None) + + +def decode(encoded, encoding, errors='strict'): # type: (Union[str, bytes], str, str) -> Union[str, bytes] """ Decode the given input object @@ -22,20 +31,31 @@ def decode(obj, encoding, errors='strict'): Raises: ValueError, if decoding fails. """ + global _cache + cached = ( + _cache.encoded == encoded and + _cache.encoding == encoding and + _cache.errors == errors + ) + if cached: + return _cache.decoded try: try: - return custom_decode[encoding](obj) + decoded = custom_decode[encoding](encoded) except KeyError: - return codecs.decode(obj, encoding, errors) + decoded = codecs.decode(encoded, encoding, errors) + if encoding in ("gzip", "deflate"): + _cache = CachedDecode(encoded, encoding, errors, decoded) + return decoded except Exception as e: raise ValueError("{} when decoding {} with {}".format( type(e).__name__, - repr(obj)[:10], + repr(encoded)[:10], repr(encoding), )) -def encode(obj, encoding, errors='strict'): +def encode(decoded, encoding, errors='strict'): # type: (Union[str, bytes], str, str) -> Union[str, bytes] """ Encode the given input object @@ -46,15 +66,26 @@ def encode(obj, encoding, errors='strict'): Raises: ValueError, if encoding fails. """ + global _cache + cached = ( + _cache.decoded == decoded and + _cache.encoding == encoding and + _cache.errors == errors + ) + if cached: + return _cache.encoded try: try: - return custom_encode[encoding](obj) + encoded = custom_encode[encoding](decoded) except KeyError: - return codecs.encode(obj, encoding, errors) + encoded = codecs.encode(decoded, encoding, errors) + if encoding in ("gzip", "deflate"): + _cache = CachedDecode(encoded, encoding, errors, decoded) + return encoded except Exception as e: raise ValueError("{} when encoding {} with {}".format( type(e).__name__, - repr(obj)[:10], + repr(decoded)[:10], repr(encoding), )) -- cgit v1.2.3 From e07f515a208ff10f00abee6cfd9d984e676261b1 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 23 Jul 2016 18:47:35 -0700 Subject: remove content caching in netlib.http.Message --- netlib/http/message.py | 115 +++++++++++++------------------------------------ 1 file changed, 31 insertions(+), 84 deletions(-) (limited to 'netlib') diff --git a/netlib/http/message.py b/netlib/http/message.py index a86e7489..be35b8d1 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -49,23 +49,7 @@ class MessageData(basetypes.Serializable): return cls(**state) -class CachedDecode(object): - __slots__ = ["encoded", "encoding", "strict", "decoded"] - - def __init__(self, object, encoding, strict, decoded): - self.encoded = object - self.encoding = encoding - self.strict = strict - self.decoded = decoded - -no_cached_decode = CachedDecode(None, None, None, None) - - class Message(basetypes.Serializable): - def __init__(self): - self._content_cache = no_cached_decode # type: CachedDecode - self._text_cache = no_cached_decode # type: CachedDecode - def __eq__(self, other): if isinstance(other, Message): return self.data == other.data @@ -126,25 +110,15 @@ class Message(basetypes.Serializable): if self.raw_content is None: return None ce = self.headers.get("content-encoding") - cached = ( - self._content_cache.encoded == self.raw_content and - (self._content_cache.strict or not strict) and - self._content_cache.encoding == ce - ) - if not cached: - is_strict = True - if ce: - try: - decoded = encoding.decode(self.raw_content, ce) - except ValueError: - if strict: - raise - is_strict = False - decoded = self.raw_content - else: - decoded = self.raw_content - self._content_cache = CachedDecode(self.raw_content, ce, is_strict, decoded) - return self._content_cache.decoded + if ce: + try: + return encoding.decode(self.raw_content, ce) + except ValueError: + if strict: + raise + return self.raw_content + else: + return self.raw_content def set_content(self, value): if value is None: @@ -157,22 +131,13 @@ class Message(basetypes.Serializable): .format(type(value).__name__) ) ce = self.headers.get("content-encoding") - cached = ( - self._content_cache.decoded == value and - self._content_cache.encoding == ce and - self._content_cache.strict - ) - if not cached: - try: - encoded = encoding.encode(value, ce or "identity") - except ValueError: - # So we have an invalid content-encoding? - # Let's remove it! - del self.headers["content-encoding"] - ce = None - encoded = value - self._content_cache = CachedDecode(encoded, ce, True, value) - self.raw_content = self._content_cache.encoded + try: + self.raw_content = encoding.encode(value, ce or "identity") + except ValueError: + # So we have an invalid content-encoding? + # Let's remove it! + del self.headers["content-encoding"] + self.raw_content = value self.headers["content-length"] = str(len(self.raw_content)) content = property(get_content, set_content) @@ -244,22 +209,12 @@ class Message(basetypes.Serializable): enc = self._guess_encoding() content = self.get_content(strict) - cached = ( - self._text_cache.encoded == content and - (self._text_cache.strict or not strict) and - self._text_cache.encoding == enc - ) - if not cached: - is_strict = self._content_cache.strict - try: - decoded = encoding.decode(content, enc) - except ValueError: - if strict: - raise - is_strict = False - decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape") - self._text_cache = CachedDecode(content, enc, is_strict, decoded) - return self._text_cache.decoded + try: + return encoding.decode(content, enc) + except ValueError: + if strict: + raise + return content.decode("utf8", "replace" if six.PY2 else "surrogateescape") def set_text(self, text): if text is None: @@ -267,23 +222,15 @@ class Message(basetypes.Serializable): return enc = self._guess_encoding() - cached = ( - self._text_cache.decoded == text and - self._text_cache.encoding == enc and - self._text_cache.strict - ) - if not cached: - try: - encoded = encoding.encode(text, enc) - except ValueError: - # Fall back to UTF-8 and update the content-type header. - ct = headers.parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {}) - ct[2]["charset"] = "utf-8" - self.headers["content-type"] = headers.assemble_content_type(*ct) - enc = "utf8" - encoded = text.encode(enc, "replace" if six.PY2 else "surrogateescape") - self._text_cache = CachedDecode(encoded, enc, True, text) - self.content = self._text_cache.encoded + try: + self.content = encoding.encode(text, enc) + except ValueError: + # Fall back to UTF-8 and update the content-type header. + ct = headers.parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {}) + ct[2]["charset"] = "utf-8" + self.headers["content-type"] = headers.assemble_content_type(*ct) + enc = "utf8" + self.content = text.encode(enc, "replace" if six.PY2 else "surrogateescape") text = property(get_text, set_text) -- cgit v1.2.3 From a682074e9ed5e94683389f67cc192e6547d6310e Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 24 Jul 2016 19:06:49 -0700 Subject: improve query/path_components getter/setter --- netlib/http/request.py | 26 ++++++++++++-------------- netlib/http/url.py | 42 +++++++++++++++++++++++++++++++++++++----- netlib/strutils.py | 3 +++ 3 files changed, 52 insertions(+), 19 deletions(-) (limited to 'netlib') diff --git a/netlib/http/request.py b/netlib/http/request.py index ecaa9b79..061217a3 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -253,14 +253,13 @@ class Request(message.Message): ) def _get_query(self): - _, _, _, _, query, _ = urllib.parse.urlparse(self.url) + query = urllib.parse.urlparse(self.url).query return tuple(netlib.http.url.decode(query)) - def _set_query(self, value): - query = netlib.http.url.encode(value) - scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse( - urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) + def _set_query(self, query_data): + query = netlib.http.url.encode(query_data) + _, _, path, params, _, fragment = urllib.parse.urlparse(self.url) + self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment]) @query.setter def query(self, value): @@ -296,19 +295,18 @@ class Request(message.Message): The URL's path components as a tuple of strings. Components are unquoted. """ - _, _, path, _, _, _ = urllib.parse.urlparse(self.url) + path = urllib.parse.urlparse(self.url).path # This needs to be a tuple so that it's immutable. # Otherwise, this would fail silently: # request.path_components.append("foo") - return tuple(urllib.parse.unquote(i) for i in path.split("/") if i) + return tuple(netlib.http.url.unquote(i) for i in path.split("/") if i) @path_components.setter def path_components(self, components): - components = map(lambda x: urllib.parse.quote(x, safe=""), components) + components = map(lambda x: netlib.http.url.quote(x, safe=""), components) path = "/" + "/".join(components) - scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = netlib.http.url.parse( - urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) + _, _, _, params, query, fragment = urllib.parse.urlparse(self.url) + self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment]) def anticache(self): """ @@ -365,13 +363,13 @@ class Request(message.Message): pass return () - def _set_urlencoded_form(self, value): + def _set_urlencoded_form(self, form_data): """ Sets the body to the URL-encoded form data, and adds the appropriate content-type header. This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = netlib.http.url.encode(value).encode() + self.content = netlib.http.url.encode(form_data).encode() @urlencoded_form.setter def urlencoded_form(self, value): diff --git a/netlib/http/url.py b/netlib/http/url.py index 1c8c007a..076854b9 100644 --- a/netlib/http/url.py +++ b/netlib/http/url.py @@ -82,19 +82,51 @@ def unparse(scheme, host, port, path=""): def encode(s): - # type: (six.text_type, bytes) -> str + # type: Sequence[Tuple[str,str]] -> str """ Takes a list of (key, value) tuples and returns a urlencoded string. """ - s = [tuple(i) for i in s] - return urllib.parse.urlencode(s, False) + if six.PY2: + return urllib.parse.urlencode(s, False) + else: + return urllib.parse.urlencode(s, False, errors="surrogateescape") def decode(s): """ - Takes a urlencoded string and returns a list of (key, value) tuples. + Takes a urlencoded string and returns a list of surrogate-escaped (key, value) tuples. + """ + if six.PY2: + return urllib.parse.parse_qsl(s, keep_blank_values=True) + else: + return urllib.parse.parse_qsl(s, keep_blank_values=True, errors='surrogateescape') + + +def quote(b, safe="/"): + """ + Returns: + An ascii-encodable str. + """ + # type: (str) -> str + if six.PY2: + return urllib.parse.quote(b, safe=safe) + else: + return urllib.parse.quote(b, safe=safe, errors="surrogateescape") + + +def unquote(s): """ - return urllib.parse.parse_qsl(s, keep_blank_values=True) + Args: + s: A surrogate-escaped str + Returns: + A surrogate-escaped str + """ + # type: (str) -> str + + if six.PY2: + return urllib.parse.unquote(s) + else: + return urllib.parse.unquote(s, errors="surrogateescape") def hostport(scheme, host, port): diff --git a/netlib/strutils.py b/netlib/strutils.py index 96c8b10f..8f27ebb7 100644 --- a/netlib/strutils.py +++ b/netlib/strutils.py @@ -98,6 +98,9 @@ def bytes_to_escaped_str(data, keep_spacing=False): def escaped_str_to_bytes(data): """ Take an escaped string and return the unescaped bytes equivalent. + + Raises: + ValueError, if the escape sequence is invalid. """ if not isinstance(data, six.string_types): if six.PY2: -- cgit v1.2.3 From f9edffc58e5198d7995c3652acee9116ae9fe7d8 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 24 Jul 2016 20:13:18 -0700 Subject: silence test warnings --- netlib/encoding.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'netlib') diff --git a/netlib/encoding.py b/netlib/encoding.py index 29e2a420..da282194 100644 --- a/netlib/encoding.py +++ b/netlib/encoding.py @@ -33,6 +33,7 @@ def decode(encoded, encoding, errors='strict'): """ global _cache cached = ( + isinstance(encoded, bytes) and _cache.encoded == encoded and _cache.encoding == encoding and _cache.errors == errors @@ -68,6 +69,7 @@ def encode(decoded, encoding, errors='strict'): """ global _cache cached = ( + isinstance(decoded, bytes) and _cache.decoded == decoded and _cache.encoding == encoding and _cache.errors == errors -- cgit v1.2.3