diff options
author | Maximilian Hils <git@maximilianhils.com> | 2016-07-02 01:51:47 -0700 |
---|---|---|
committer | Maximilian Hils <git@maximilianhils.com> | 2016-07-02 01:51:47 -0700 |
commit | 6032c4f2352260d32032800a2ff694339e2af6b2 (patch) | |
tree | e242ede8ebb828f424f270aeb5143516ed048939 /netlib/encoding.py | |
parent | 2c09e0416bcf94d9ebef7c11bb1883388e8e2c5d (diff) | |
download | mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.gz mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.tar.bz2 mitmproxy-6032c4f2352260d32032800a2ff694339e2af6b2.zip |
message.content -> .raw_content, implement .text
This PR improves our handling of HTTP message body encodings:
- The unaltered message body is now accessible as `.raw_content`
- The "content-encoding"-decoded content (i.e. gzip removed) content
is not `.content`, as this is what we want in 99% of the cases.
- `.text` now provides the "content-encoding"-decoded and then
"content-type charset"-decoded message body.
- The decoded values for `.content` and `.text` are cached,
so that repeated access and `x.text = x.text` is cheap.
- The `decoded()` decorator is now deprecated, as we can now just use
`.content`. Similarly `HTTPMessage.get_decoded_content()` is
deprecated.
Diffstat (limited to 'netlib/encoding.py')
-rw-r--r-- | netlib/encoding.py | 97 |
1 files changed, 63 insertions, 34 deletions
diff --git a/netlib/encoding.py b/netlib/encoding.py index 98502451..8b67b543 100644 --- a/netlib/encoding.py +++ b/netlib/encoding.py @@ -1,39 +1,62 @@ """ - Utility functions for decoding response bodies. +Utility functions for decoding response bodies. """ from __future__ import absolute_import + +import codecs from io import BytesIO import gzip import zlib +from typing import Union # noqa + -ENCODINGS = {"identity", "gzip", "deflate"} +def decode(obj, encoding, errors='strict'): + # type: (Union[str, bytes], str) -> Union[str, bytes] + """ + Decode the given input object + Returns: + The decoded value -def decode(e, content): - if not isinstance(content, bytes): - return None - encoding_map = { - "identity": identity, - "gzip": decode_gzip, - "deflate": decode_deflate, - } - if e not in encoding_map: - return None - return encoding_map[e](content) + Raises: + ValueError, if decoding fails. + """ + try: + try: + return custom_decode[encoding](obj) + except KeyError: + return codecs.decode(obj, encoding, errors) + except Exception as e: + raise ValueError("{} when decoding {} with {}".format( + type(e).__name__, + repr(obj)[:10], + repr(encoding), + )) + + +def encode(obj, encoding, errors='strict'): + # type: (Union[str, bytes], str) -> Union[str, bytes] + """ + Encode the given input object + Returns: + The encoded value -def encode(e, content): - if not isinstance(content, bytes): - return None - encoding_map = { - "identity": identity, - "gzip": encode_gzip, - "deflate": encode_deflate, - } - if e not in encoding_map: - return None - return encoding_map[e](content) + Raises: + ValueError, if encoding fails. + """ + try: + try: + return custom_encode[encoding](obj) + except KeyError: + return codecs.encode(obj, encoding, errors) + except Exception as e: + raise ValueError("{} when encoding {} with {}".format( + type(e).__name__, + repr(obj)[:10], + repr(encoding), + )) def identity(content): @@ -46,10 +69,7 @@ def identity(content): def decode_gzip(content): gfile = gzip.GzipFile(fileobj=BytesIO(content)) - try: - return gfile.read() - except (IOError, EOFError): - return None + return gfile.read() def encode_gzip(content): @@ -70,12 +90,9 @@ def decode_deflate(content): http://bugs.python.org/issue5784 """ try: - try: - return zlib.decompress(content) - except zlib.error: - return zlib.decompress(content, -15) + return zlib.decompress(content) except zlib.error: - return None + return zlib.decompress(content, -15) def encode_deflate(content): @@ -84,4 +101,16 @@ def encode_deflate(content): """ return zlib.compress(content) -__all__ = ["ENCODINGS", "encode", "decode"] + +custom_decode = { + "identity": identity, + "gzip": decode_gzip, + "deflate": decode_deflate, +} +custom_encode = { + "identity": identity, + "gzip": encode_gzip, + "deflate": encode_deflate, +} + +__all__ = ["encode", "decode"] |