diff options
| author | Maximilian Hils <git@maximilianhils.com> | 2015-09-22 01:48:35 +0200 | 
|---|---|---|
| committer | Maximilian Hils <git@maximilianhils.com> | 2015-09-22 01:48:35 +0200 | 
| commit | f93752277395d201fabefed8fae6d412f13da699 (patch) | |
| tree | 7f3b217b89b7d6b78725ea1a6d0185b13ab2876a /netlib | |
| parent | 9fbeac50ce3f6ae49b0f0270c508b6e81a1eaf17 (diff) | |
| download | mitmproxy-f93752277395d201fabefed8fae6d412f13da699.tar.gz mitmproxy-f93752277395d201fabefed8fae6d412f13da699.tar.bz2 mitmproxy-f93752277395d201fabefed8fae6d412f13da699.zip | |
Headers: return str on all Python versions
Diffstat (limited to 'netlib')
| -rw-r--r-- | netlib/http/__init__.py | 6 | ||||
| -rw-r--r-- | netlib/http/authentication.py | 10 | ||||
| -rw-r--r-- | netlib/http/headers.py | 205 | ||||
| -rw-r--r-- | netlib/http/http1/assemble.py | 6 | ||||
| -rw-r--r-- | netlib/http/http1/read.py | 14 | ||||
| -rw-r--r-- | netlib/http/models.py | 215 | ||||
| -rw-r--r-- | netlib/utils.py | 17 | ||||
| -rw-r--r-- | netlib/websockets/protocol.py | 14 | 
8 files changed, 257 insertions, 230 deletions
| diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py index d72884b3..0ccf6b32 100644 --- a/netlib/http/__init__.py +++ b/netlib/http/__init__.py @@ -1,11 +1,13 @@  from __future__ import absolute_import, print_function, division -from .models import Request, Response, Headers +from .headers import Headers +from .models import Request, Response  from .models import ALPN_PROTO_HTTP1, ALPN_PROTO_H2  from .models import HDR_FORM_MULTIPART, HDR_FORM_URLENCODED, CONTENT_MISSING  from . import http1, http2  __all__ = [ -    "Request", "Response", "Headers", +    "Headers", +    "Request", "Response",      "ALPN_PROTO_HTTP1", "ALPN_PROTO_H2",      "HDR_FORM_MULTIPART", "HDR_FORM_URLENCODED", "CONTENT_MISSING",      "http1", "http2", diff --git a/netlib/http/authentication.py b/netlib/http/authentication.py index 5831660b..d769abe5 100644 --- a/netlib/http/authentication.py +++ b/netlib/http/authentication.py @@ -9,18 +9,18 @@ def parse_http_basic_auth(s):          return None      scheme = words[0]      try: -        user = binascii.a2b_base64(words[1]) +        user = binascii.a2b_base64(words[1]).decode("utf8", "replace")      except binascii.Error:          return None -    parts = user.split(b':') +    parts = user.split(':')      if len(parts) != 2:          return None      return scheme, parts[0], parts[1]  def assemble_http_basic_auth(scheme, username, password): -    v = binascii.b2a_base64(username + b":" + password) -    return scheme + b" " + v +    v = binascii.b2a_base64((username + ":" + password).encode("utf8")).decode("ascii") +    return scheme + " " + v  class NullProxyAuth(object): @@ -69,7 +69,7 @@ class BasicProxyAuth(NullProxyAuth):          if not parts:              return False          scheme, username, password = parts -        if scheme.lower() != b'basic': +        if scheme.lower() != 'basic':              return False          if not self.password_manager.test(username, password):              return False diff --git a/netlib/http/headers.py b/netlib/http/headers.py new file mode 100644 index 00000000..1511ea2d --- /dev/null +++ b/netlib/http/headers.py @@ -0,0 +1,205 @@ +""" + +Unicode Handling +---------------- +See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/ +""" +from __future__ import absolute_import, print_function, division +import copy +try: +    from collections.abc import MutableMapping +except ImportError:  # Workaround for Python < 3.3 +    from collections import MutableMapping + + +import six + +from netlib.utils import always_byte_args + +if six.PY2: +    _native = lambda x: x +    _asbytes = lambda x: x +    _always_byte_args = lambda x: x +else: +    # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. +    _native = lambda x: x.decode("utf-8", "surrogateescape") +    _asbytes = lambda x: x.encode("utf-8", "surrogateescape") +    _always_byte_args = always_byte_args("utf-8", "surrogateescape") + + +class Headers(MutableMapping, object): +    """ +    Header class which allows both convenient access to individual headers as well as +    direct access to the underlying raw data. Provides a full dictionary interface. + +    Example: + +    .. code-block:: python + +        # Create header from a list of (header_name, header_value) tuples +        >>> h = Headers([ +                ["Host","example.com"], +                ["Accept","text/html"], +                ["accept","application/xml"] +            ]) + +        # Headers mostly behave like a normal dict. +        >>> h["Host"] +        "example.com" + +        # HTTP Headers are case insensitive +        >>> h["host"] +        "example.com" + +        # Multiple headers are folded into a single header as per RFC7230 +        >>> h["Accept"] +        "text/html, application/xml" + +        # Setting a header removes all existing headers with the same name. +        >>> h["Accept"] = "application/text" +        >>> h["Accept"] +        "application/text" + +        # str(h) returns a HTTP1 header block. +        >>> print(h) +        Host: example.com +        Accept: application/text + +        # For full control, the raw header fields can be accessed +        >>> h.fields + +        # Headers can also be crated from keyword arguments +        >>> h = Headers(host="example.com", content_type="application/xml") + +    Caveats: +        For use with the "Set-Cookie" header, see :py:meth:`get_all`. +    """ + +    @_always_byte_args +    def __init__(self, fields=None, **headers): +        """ +        Args: +            fields: (optional) list of ``(name, value)`` header tuples, +                e.g. ``[("Host","example.com")]``. All names and values must be bytes. +            **headers: Additional headers to set. Will overwrite existing values from `fields`. +                For convenience, underscores in header names will be transformed to dashes - +                this behaviour does not extend to other methods. +                If ``**headers`` contains multiple keys that have equal ``.lower()`` s, +                the behavior is undefined. +        """ +        self.fields = fields or [] + +        for name, value in self.fields: +            if not isinstance(name, bytes) or not isinstance(value, bytes): +                raise ValueError("Headers passed as fields must be bytes.") + +        # content_type -> content-type +        headers = { +            _asbytes(name).replace(b"_", b"-"): value +            for name, value in six.iteritems(headers) +        } +        self.update(headers) + +    def __bytes__(self): +        if self.fields: +            return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n" +        else: +            return b"" + +    if six.PY2: +        __str__ = __bytes__ + +    @_always_byte_args +    def __getitem__(self, name): +        values = self.get_all(name) +        if not values: +            raise KeyError(name) +        return ", ".join(values) + +    @_always_byte_args +    def __setitem__(self, name, value): +        idx = self._index(name) + +        # To please the human eye, we insert at the same position the first existing header occured. +        if idx is not None: +            del self[name] +            self.fields.insert(idx, [name, value]) +        else: +            self.fields.append([name, value]) + +    @_always_byte_args +    def __delitem__(self, name): +        if name not in self: +            raise KeyError(name) +        name = name.lower() +        self.fields = [ +            field for field in self.fields +            if name != field[0].lower() +        ] + +    def __iter__(self): +        seen = set() +        for name, _ in self.fields: +            name_lower = name.lower() +            if name_lower not in seen: +                seen.add(name_lower) +                yield _native(name) + +    def __len__(self): +        return len(set(name.lower() for name, _ in self.fields)) + +    # __hash__ = object.__hash__ + +    def _index(self, name): +        name = name.lower() +        for i, field in enumerate(self.fields): +            if field[0].lower() == name: +                return i +        return None + +    def __eq__(self, other): +        if isinstance(other, Headers): +            return self.fields == other.fields +        return False + +    def __ne__(self, other): +        return not self.__eq__(other) + +    @_always_byte_args +    def get_all(self, name): +        """ +        Like :py:meth:`get`, but does not fold multiple headers into a single one. +        This is useful for Set-Cookie headers, which do not support folding. + +        See also: https://tools.ietf.org/html/rfc7230#section-3.2.2 +        """ +        name_lower = name.lower() +        values = [_native(value) for n, value in self.fields if n.lower() == name_lower] +        return values + +    @_always_byte_args +    def set_all(self, name, values): +        """ +        Explicitly set multiple headers for the given key. +        See: :py:meth:`get_all` +        """ +        values = map(_asbytes, values)  # _always_byte_args does not fix lists +        if name in self: +            del self[name] +        self.fields.extend( +            [name, value] for value in values +        ) + +    def copy(self): +        return Headers(copy.copy(self.fields)) + +    # Implement the StateObject protocol from mitmproxy +    def get_state(self, short=False): +        return tuple(tuple(field) for field in self.fields) + +    def load_state(self, state): +        self.fields = [list(field) for field in state] + +    @classmethod +    def from_state(cls, state): +        return cls([list(field) for field in state])
\ No newline at end of file diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py index c2b60a0f..88aeac05 100644 --- a/netlib/http/http1/assemble.py +++ b/netlib/http/http1/assemble.py @@ -35,7 +35,7 @@ def assemble_response_head(response):  def assemble_body(headers, body_chunks): -    if b"chunked" in headers.get(b"transfer-encoding", b"").lower(): +    if "chunked" in headers.get("transfer-encoding", "").lower():          for chunk in body_chunks:              if chunk:                  yield b"%x\r\n%s\r\n" % (len(chunk), chunk) @@ -76,8 +76,8 @@ def _assemble_request_line(request, form=None):  def _assemble_request_headers(request):      headers = request.headers.copy() -    if b"host" not in headers and request.scheme and request.host and request.port: -        headers[b"Host"] = utils.hostport( +    if "host" not in headers and request.scheme and request.host and request.port: +        headers["host"] = utils.hostport(              request.scheme,              request.host,              request.port diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index c6760ff3..4c898348 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -146,11 +146,11 @@ def connection_close(http_version, headers):          according to RFC 2616 Section 8.1.      """      # At first, check if we have an explicit Connection header. -    if b"connection" in headers: +    if "connection" in headers:          tokens = utils.get_header_tokens(headers, "connection") -        if b"close" in tokens: +        if "close" in tokens:              return True -        elif b"keep-alive" in tokens: +        elif "keep-alive" in tokens:              return False      # If we don't have a Connection header, HTTP 1.1 connections are assumed to @@ -181,7 +181,7 @@ def expected_http_body_size(request, response=None):          is_request = False      if is_request: -        if headers.get(b"expect", b"").lower() == b"100-continue": +        if headers.get("expect", "").lower() == "100-continue":              return 0      else:          if request.method.upper() == b"HEAD": @@ -193,11 +193,11 @@ def expected_http_body_size(request, response=None):          if response_code in (204, 304):              return 0 -    if b"chunked" in headers.get(b"transfer-encoding", b"").lower(): +    if "chunked" in headers.get("transfer-encoding", "").lower():          return None -    if b"content-length" in headers: +    if "content-length" in headers:          try: -            size = int(headers[b"content-length"]) +            size = int(headers["content-length"])              if size < 0:                  raise ValueError()              return size diff --git a/netlib/http/models.py b/netlib/http/models.py index 512a764d..55664533 100644 --- a/netlib/http/models.py +++ b/netlib/http/models.py @@ -1,201 +1,22 @@ -from __future__ import absolute_import, print_function, division -import copy +  from ..odict import ODict  from .. import utils, encoding -from ..utils import always_bytes, always_byte_args, native +from ..utils import always_bytes, native  from . import cookies +from .headers import Headers -import six  from six.moves import urllib -try: -    from collections import MutableMapping -except ImportError: -    from collections.abc import MutableMapping  # TODO: Move somewhere else?  ALPN_PROTO_HTTP1 = b'http/1.1'  ALPN_PROTO_H2 = b'h2' -HDR_FORM_URLENCODED = b"application/x-www-form-urlencoded" -HDR_FORM_MULTIPART = b"multipart/form-data" +HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" +HDR_FORM_MULTIPART = "multipart/form-data"  CONTENT_MISSING = 0 -class Headers(MutableMapping, object): -    """ -    Header class which allows both convenient access to individual headers as well as -    direct access to the underlying raw data. Provides a full dictionary interface. - -    Example: - -    .. code-block:: python - -        # Create header from a list of (header_name, header_value) tuples -        >>> h = Headers([ -                ["Host","example.com"], -                ["Accept","text/html"], -                ["accept","application/xml"] -            ]) - -        # Headers mostly behave like a normal dict. -        >>> h["Host"] -        "example.com" - -        # HTTP Headers are case insensitive -        >>> h["host"] -        "example.com" - -        # Multiple headers are folded into a single header as per RFC7230 -        >>> h["Accept"] -        "text/html, application/xml" - -        # Setting a header removes all existing headers with the same name. -        >>> h["Accept"] = "application/text" -        >>> h["Accept"] -        "application/text" - -        # str(h) returns a HTTP1 header block. -        >>> print(h) -        Host: example.com -        Accept: application/text - -        # For full control, the raw header fields can be accessed -        >>> h.fields - -        # Headers can also be crated from keyword arguments -        >>> h = Headers(host="example.com", content_type="application/xml") - -    Caveats: -        For use with the "Set-Cookie" header, see :py:meth:`get_all`. -    """ - -    @always_byte_args("ascii") -    def __init__(self, fields=None, **headers): -        """ -        Args: -            fields: (optional) list of ``(name, value)`` header tuples, -                e.g. ``[("Host","example.com")]``. All names and values must be bytes. -            **headers: Additional headers to set. Will overwrite existing values from `fields`. -                For convenience, underscores in header names will be transformed to dashes - -                this behaviour does not extend to other methods. -                If ``**headers`` contains multiple keys that have equal ``.lower()`` s, -                the behavior is undefined. -        """ -        self.fields = fields or [] - -        # content_type -> content-type -        headers = { -            name.encode("ascii").replace(b"_", b"-"): value -            for name, value in six.iteritems(headers) -        } -        self.update(headers) - -    def __bytes__(self): -        if self.fields: -            return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n" -        else: -            return b"" - -    if six.PY2: -        __str__ = __bytes__ - -    @always_byte_args("ascii") -    def __getitem__(self, name): -        values = self.get_all(name) -        if not values: -            raise KeyError(name) -        return b", ".join(values) - -    @always_byte_args("ascii") -    def __setitem__(self, name, value): -        idx = self._index(name) - -        # To please the human eye, we insert at the same position the first existing header occured. -        if idx is not None: -            del self[name] -            self.fields.insert(idx, [name, value]) -        else: -            self.fields.append([name, value]) - -    @always_byte_args("ascii") -    def __delitem__(self, name): -        if name not in self: -            raise KeyError(name) -        name = name.lower() -        self.fields = [ -            field for field in self.fields -            if name != field[0].lower() -        ] - -    def __iter__(self): -        seen = set() -        for name, _ in self.fields: -            name_lower = name.lower() -            if name_lower not in seen: -                seen.add(name_lower) -                yield name - -    def __len__(self): -        return len(set(name.lower() for name, _ in self.fields)) - -    # __hash__ = object.__hash__ - -    def _index(self, name): -        name = name.lower() -        for i, field in enumerate(self.fields): -            if field[0].lower() == name: -                return i -        return None - -    def __eq__(self, other): -        if isinstance(other, Headers): -            return self.fields == other.fields -        return False - -    def __ne__(self, other): -        return not self.__eq__(other) - -    @always_byte_args("ascii") -    def get_all(self, name): -        """ -        Like :py:meth:`get`, but does not fold multiple headers into a single one. -        This is useful for Set-Cookie headers, which do not support folding. - -        See also: https://tools.ietf.org/html/rfc7230#section-3.2.2 -        """ -        name_lower = name.lower() -        values = [value for n, value in self.fields if n.lower() == name_lower] -        return values - -    def set_all(self, name, values): -        """ -        Explicitly set multiple headers for the given key. -        See: :py:meth:`get_all` -        """ -        name = always_bytes(name, "ascii") -        values = (always_bytes(value, "ascii") for value in values) -        if name in self: -            del self[name] -        self.fields.extend( -            [name, value] for value in values -        ) - -    def copy(self): -        return Headers(copy.copy(self.fields)) - -    # Implement the StateObject protocol from mitmproxy -    def get_state(self, short=False): -        return tuple(tuple(field) for field in self.fields) - -    def load_state(self, state): -        self.fields = [list(field) for field in state] - -    @classmethod -    def from_state(cls, state): -        return cls([list(field) for field in state]) - -  class Message(object):      def __init__(self, http_version, headers, body, timestamp_start, timestamp_end):          self.http_version = http_version @@ -216,7 +37,7 @@ class Message(object):      def body(self, body):          self._body = body          if isinstance(body, bytes): -            self.headers[b"content-length"] = str(len(body)).encode() +            self.headers["content-length"] = str(len(body)).encode()      content = body @@ -268,8 +89,8 @@ class Request(Message):              response. That is, we remove ETags and If-Modified-Since headers.          """          delheaders = [ -            b"if-modified-since", -            b"if-none-match", +            "if-modified-since", +            "if-none-match",          ]          for i in delheaders:              self.headers.pop(i, None) @@ -279,14 +100,14 @@ class Request(Message):              Modifies this request to remove headers that will compress the              resource's data.          """ -        self.headers["accept-encoding"] = b"identity" +        self.headers["accept-encoding"] = "identity"      def constrain_encoding(self):          """              Limits the permissible Accept-Encoding values, based on what we can              decode appropriately.          """ -        accept_encoding = native(self.headers.get("accept-encoding"), "ascii") +        accept_encoding = self.headers.get("accept-encoding")          if accept_encoding:              self.headers["accept-encoding"] = (                  ', '.join( @@ -309,9 +130,9 @@ class Request(Message):              indicates non-form data.          """          if self.body: -            if HDR_FORM_URLENCODED in self.headers.get("content-type", b"").lower(): +            if HDR_FORM_URLENCODED in self.headers.get("content-type", "").lower():                  return self.get_form_urlencoded() -            elif HDR_FORM_MULTIPART in self.headers.get("content-type", b"").lower(): +            elif HDR_FORM_MULTIPART in self.headers.get("content-type", "").lower():                  return self.get_form_multipart()          return ODict([]) @@ -321,12 +142,12 @@ class Request(Message):              Returns an empty ODict if there is no data or the content-type              indicates non-form data.          """ -        if self.body and HDR_FORM_URLENCODED in self.headers.get("content-type", b"").lower(): +        if self.body and HDR_FORM_URLENCODED in self.headers.get("content-type", "").lower():              return ODict(utils.urldecode(self.body))          return ODict([])      def get_form_multipart(self): -        if self.body and HDR_FORM_MULTIPART in self.headers.get("content-type", b"").lower(): +        if self.body and HDR_FORM_MULTIPART in self.headers.get("content-type", "").lower():              return ODict(                  utils.multipartdecode(                      self.headers, @@ -341,7 +162,7 @@ class Request(Message):          """          # FIXME: If there's an existing content-type header indicating a          # url-encoded form, leave it alone. -        self.headers[b"content-type"] = HDR_FORM_URLENCODED +        self.headers["content-type"] = HDR_FORM_URLENCODED          self.body = utils.urlencode(odict.lst)      def get_path_components(self): @@ -400,7 +221,7 @@ class Request(Message):          """          if hostheader and "host" in self.headers:              try: -                return self.headers["host"].decode("idna") +                return self.headers["host"]              except ValueError:                  pass          if self.host: @@ -420,7 +241,7 @@ class Request(Message):          """          ret = ODict()          for i in self.headers.get_all("Cookie"): -            ret.extend(cookies.parse_cookie_header(native(i,"ascii"))) +            ret.extend(cookies.parse_cookie_header(i))          return ret      def set_cookies(self, odict): @@ -499,7 +320,7 @@ class Response(Message):          """          ret = []          for header in self.headers.get_all("set-cookie"): -            v = cookies.parse_set_cookie_header(native(header, "ascii")) +            v = cookies.parse_set_cookie_header(header)              if v:                  name, value, attrs = v                  ret.append([name, [value, attrs]]) diff --git a/netlib/utils.py b/netlib/utils.py index b9848038..d5b30128 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -269,7 +269,7 @@ def get_header_tokens(headers, key):      """      if key not in headers:          return [] -    tokens = headers[key].split(b",") +    tokens = headers[key].split(",")      return [token.strip() for token in tokens] @@ -320,14 +320,14 @@ def parse_content_type(c):              ("text", "html", {"charset": "UTF-8"})      """ -    parts = c.split(b";", 1) -    ts = parts[0].split(b"/", 1) +    parts = c.split(";", 1) +    ts = parts[0].split("/", 1)      if len(ts) != 2:          return None      d = {}      if len(parts) == 2: -        for i in parts[1].split(b";"): -            clause = i.split(b"=", 1) +        for i in parts[1].split(";"): +            clause = i.split("=", 1)              if len(clause) == 2:                  d[clause[0].strip()] = clause[1].strip()      return ts[0].lower(), ts[1].lower(), d @@ -337,13 +337,14 @@ def multipartdecode(headers, content):      """          Takes a multipart boundary encoded string and returns list of (key, value) tuples.      """ -    v = headers.get(b"Content-Type") +    v = headers.get("Content-Type")      if v:          v = parse_content_type(v)          if not v:              return [] -        boundary = v[2].get(b"boundary") -        if not boundary: +        try: +            boundary = v[2]["boundary"].encode("ascii") +        except (KeyError, UnicodeError):              return []          rx = re.compile(br'\bname="([^"]+)"') diff --git a/netlib/websockets/protocol.py b/netlib/websockets/protocol.py index 778fe7e7..e62f8df6 100644 --- a/netlib/websockets/protocol.py +++ b/netlib/websockets/protocol.py @@ -80,7 +80,7 @@ class WebsocketsProtocol(object):              Returns an instance of Headers          """          if not key: -            key = base64.b64encode(os.urandom(16)).decode('utf-8') +            key = base64.b64encode(os.urandom(16)).decode('ascii')          return Headers(**{              HEADER_WEBSOCKET_KEY: key,              HEADER_WEBSOCKET_VERSION: version, @@ -95,27 +95,25 @@ class WebsocketsProtocol(object):          """          return Headers(**{              HEADER_WEBSOCKET_ACCEPT: self.create_server_nonce(key), -            "Connection": "Upgrade", -            "Upgrade": "websocket", +            "connection": "Upgrade", +            "upgrade": "websocket",          })      @classmethod      def check_client_handshake(self, headers): -        if headers.get("upgrade") != b"websocket": +        if headers.get("upgrade") != "websocket":              return          return headers.get(HEADER_WEBSOCKET_KEY)      @classmethod      def check_server_handshake(self, headers): -        if headers.get("upgrade") != b"websocket": +        if headers.get("upgrade") != "websocket":              return          return headers.get(HEADER_WEBSOCKET_ACCEPT)      @classmethod      def create_server_nonce(self, client_nonce): -        return base64.b64encode( -            binascii.unhexlify(hashlib.sha1(client_nonce + websockets_magic).hexdigest()) -        ) +        return base64.b64encode(hashlib.sha1(client_nonce + websockets_magic).digest()) | 
