aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2015-09-22 01:48:35 +0200
committerMaximilian Hils <git@maximilianhils.com>2015-09-22 01:48:35 +0200
commitf93752277395d201fabefed8fae6d412f13da699 (patch)
tree7f3b217b89b7d6b78725ea1a6d0185b13ab2876a /netlib
parent9fbeac50ce3f6ae49b0f0270c508b6e81a1eaf17 (diff)
downloadmitmproxy-f93752277395d201fabefed8fae6d412f13da699.tar.gz
mitmproxy-f93752277395d201fabefed8fae6d412f13da699.tar.bz2
mitmproxy-f93752277395d201fabefed8fae6d412f13da699.zip
Headers: return str on all Python versions
Diffstat (limited to 'netlib')
-rw-r--r--netlib/http/__init__.py6
-rw-r--r--netlib/http/authentication.py10
-rw-r--r--netlib/http/headers.py205
-rw-r--r--netlib/http/http1/assemble.py6
-rw-r--r--netlib/http/http1/read.py14
-rw-r--r--netlib/http/models.py215
-rw-r--r--netlib/utils.py17
-rw-r--r--netlib/websockets/protocol.py14
8 files changed, 257 insertions, 230 deletions
diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py
index d72884b3..0ccf6b32 100644
--- a/netlib/http/__init__.py
+++ b/netlib/http/__init__.py
@@ -1,11 +1,13 @@
from __future__ import absolute_import, print_function, division
-from .models import Request, Response, Headers
+from .headers import Headers
+from .models import Request, Response
from .models import ALPN_PROTO_HTTP1, ALPN_PROTO_H2
from .models import HDR_FORM_MULTIPART, HDR_FORM_URLENCODED, CONTENT_MISSING
from . import http1, http2
__all__ = [
- "Request", "Response", "Headers",
+ "Headers",
+ "Request", "Response",
"ALPN_PROTO_HTTP1", "ALPN_PROTO_H2",
"HDR_FORM_MULTIPART", "HDR_FORM_URLENCODED", "CONTENT_MISSING",
"http1", "http2",
diff --git a/netlib/http/authentication.py b/netlib/http/authentication.py
index 5831660b..d769abe5 100644
--- a/netlib/http/authentication.py
+++ b/netlib/http/authentication.py
@@ -9,18 +9,18 @@ def parse_http_basic_auth(s):
return None
scheme = words[0]
try:
- user = binascii.a2b_base64(words[1])
+ user = binascii.a2b_base64(words[1]).decode("utf8", "replace")
except binascii.Error:
return None
- parts = user.split(b':')
+ parts = user.split(':')
if len(parts) != 2:
return None
return scheme, parts[0], parts[1]
def assemble_http_basic_auth(scheme, username, password):
- v = binascii.b2a_base64(username + b":" + password)
- return scheme + b" " + v
+ v = binascii.b2a_base64((username + ":" + password).encode("utf8")).decode("ascii")
+ return scheme + " " + v
class NullProxyAuth(object):
@@ -69,7 +69,7 @@ class BasicProxyAuth(NullProxyAuth):
if not parts:
return False
scheme, username, password = parts
- if scheme.lower() != b'basic':
+ if scheme.lower() != 'basic':
return False
if not self.password_manager.test(username, password):
return False
diff --git a/netlib/http/headers.py b/netlib/http/headers.py
new file mode 100644
index 00000000..1511ea2d
--- /dev/null
+++ b/netlib/http/headers.py
@@ -0,0 +1,205 @@
+"""
+
+Unicode Handling
+----------------
+See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/
+"""
+from __future__ import absolute_import, print_function, division
+import copy
+try:
+ from collections.abc import MutableMapping
+except ImportError: # Workaround for Python < 3.3
+ from collections import MutableMapping
+
+
+import six
+
+from netlib.utils import always_byte_args
+
+if six.PY2:
+ _native = lambda x: x
+ _asbytes = lambda x: x
+ _always_byte_args = lambda x: x
+else:
+ # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
+ _native = lambda x: x.decode("utf-8", "surrogateescape")
+ _asbytes = lambda x: x.encode("utf-8", "surrogateescape")
+ _always_byte_args = always_byte_args("utf-8", "surrogateescape")
+
+
+class Headers(MutableMapping, object):
+ """
+ Header class which allows both convenient access to individual headers as well as
+ direct access to the underlying raw data. Provides a full dictionary interface.
+
+ Example:
+
+ .. code-block:: python
+
+ # Create header from a list of (header_name, header_value) tuples
+ >>> h = Headers([
+ ["Host","example.com"],
+ ["Accept","text/html"],
+ ["accept","application/xml"]
+ ])
+
+ # Headers mostly behave like a normal dict.
+ >>> h["Host"]
+ "example.com"
+
+ # HTTP Headers are case insensitive
+ >>> h["host"]
+ "example.com"
+
+ # Multiple headers are folded into a single header as per RFC7230
+ >>> h["Accept"]
+ "text/html, application/xml"
+
+ # Setting a header removes all existing headers with the same name.
+ >>> h["Accept"] = "application/text"
+ >>> h["Accept"]
+ "application/text"
+
+ # str(h) returns a HTTP1 header block.
+ >>> print(h)
+ Host: example.com
+ Accept: application/text
+
+ # For full control, the raw header fields can be accessed
+ >>> h.fields
+
+ # Headers can also be crated from keyword arguments
+ >>> h = Headers(host="example.com", content_type="application/xml")
+
+ Caveats:
+ For use with the "Set-Cookie" header, see :py:meth:`get_all`.
+ """
+
+ @_always_byte_args
+ def __init__(self, fields=None, **headers):
+ """
+ Args:
+ fields: (optional) list of ``(name, value)`` header tuples,
+ e.g. ``[("Host","example.com")]``. All names and values must be bytes.
+ **headers: Additional headers to set. Will overwrite existing values from `fields`.
+ For convenience, underscores in header names will be transformed to dashes -
+ this behaviour does not extend to other methods.
+ If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
+ the behavior is undefined.
+ """
+ self.fields = fields or []
+
+ for name, value in self.fields:
+ if not isinstance(name, bytes) or not isinstance(value, bytes):
+ raise ValueError("Headers passed as fields must be bytes.")
+
+ # content_type -> content-type
+ headers = {
+ _asbytes(name).replace(b"_", b"-"): value
+ for name, value in six.iteritems(headers)
+ }
+ self.update(headers)
+
+ def __bytes__(self):
+ if self.fields:
+ return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"
+ else:
+ return b""
+
+ if six.PY2:
+ __str__ = __bytes__
+
+ @_always_byte_args
+ def __getitem__(self, name):
+ values = self.get_all(name)
+ if not values:
+ raise KeyError(name)
+ return ", ".join(values)
+
+ @_always_byte_args
+ def __setitem__(self, name, value):
+ idx = self._index(name)
+
+ # To please the human eye, we insert at the same position the first existing header occured.
+ if idx is not None:
+ del self[name]
+ self.fields.insert(idx, [name, value])
+ else:
+ self.fields.append([name, value])
+
+ @_always_byte_args
+ def __delitem__(self, name):
+ if name not in self:
+ raise KeyError(name)
+ name = name.lower()
+ self.fields = [
+ field for field in self.fields
+ if name != field[0].lower()
+ ]
+
+ def __iter__(self):
+ seen = set()
+ for name, _ in self.fields:
+ name_lower = name.lower()
+ if name_lower not in seen:
+ seen.add(name_lower)
+ yield _native(name)
+
+ def __len__(self):
+ return len(set(name.lower() for name, _ in self.fields))
+
+ # __hash__ = object.__hash__
+
+ def _index(self, name):
+ name = name.lower()
+ for i, field in enumerate(self.fields):
+ if field[0].lower() == name:
+ return i
+ return None
+
+ def __eq__(self, other):
+ if isinstance(other, Headers):
+ return self.fields == other.fields
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ @_always_byte_args
+ def get_all(self, name):
+ """
+ Like :py:meth:`get`, but does not fold multiple headers into a single one.
+ This is useful for Set-Cookie headers, which do not support folding.
+
+ See also: https://tools.ietf.org/html/rfc7230#section-3.2.2
+ """
+ name_lower = name.lower()
+ values = [_native(value) for n, value in self.fields if n.lower() == name_lower]
+ return values
+
+ @_always_byte_args
+ def set_all(self, name, values):
+ """
+ Explicitly set multiple headers for the given key.
+ See: :py:meth:`get_all`
+ """
+ values = map(_asbytes, values) # _always_byte_args does not fix lists
+ if name in self:
+ del self[name]
+ self.fields.extend(
+ [name, value] for value in values
+ )
+
+ def copy(self):
+ return Headers(copy.copy(self.fields))
+
+ # Implement the StateObject protocol from mitmproxy
+ def get_state(self, short=False):
+ return tuple(tuple(field) for field in self.fields)
+
+ def load_state(self, state):
+ self.fields = [list(field) for field in state]
+
+ @classmethod
+ def from_state(cls, state):
+ return cls([list(field) for field in state]) \ No newline at end of file
diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py
index c2b60a0f..88aeac05 100644
--- a/netlib/http/http1/assemble.py
+++ b/netlib/http/http1/assemble.py
@@ -35,7 +35,7 @@ def assemble_response_head(response):
def assemble_body(headers, body_chunks):
- if b"chunked" in headers.get(b"transfer-encoding", b"").lower():
+ if "chunked" in headers.get("transfer-encoding", "").lower():
for chunk in body_chunks:
if chunk:
yield b"%x\r\n%s\r\n" % (len(chunk), chunk)
@@ -76,8 +76,8 @@ def _assemble_request_line(request, form=None):
def _assemble_request_headers(request):
headers = request.headers.copy()
- if b"host" not in headers and request.scheme and request.host and request.port:
- headers[b"Host"] = utils.hostport(
+ if "host" not in headers and request.scheme and request.host and request.port:
+ headers["host"] = utils.hostport(
request.scheme,
request.host,
request.port
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
index c6760ff3..4c898348 100644
--- a/netlib/http/http1/read.py
+++ b/netlib/http/http1/read.py
@@ -146,11 +146,11 @@ def connection_close(http_version, headers):
according to RFC 2616 Section 8.1.
"""
# At first, check if we have an explicit Connection header.
- if b"connection" in headers:
+ if "connection" in headers:
tokens = utils.get_header_tokens(headers, "connection")
- if b"close" in tokens:
+ if "close" in tokens:
return True
- elif b"keep-alive" in tokens:
+ elif "keep-alive" in tokens:
return False
# If we don't have a Connection header, HTTP 1.1 connections are assumed to
@@ -181,7 +181,7 @@ def expected_http_body_size(request, response=None):
is_request = False
if is_request:
- if headers.get(b"expect", b"").lower() == b"100-continue":
+ if headers.get("expect", "").lower() == "100-continue":
return 0
else:
if request.method.upper() == b"HEAD":
@@ -193,11 +193,11 @@ def expected_http_body_size(request, response=None):
if response_code in (204, 304):
return 0
- if b"chunked" in headers.get(b"transfer-encoding", b"").lower():
+ if "chunked" in headers.get("transfer-encoding", "").lower():
return None
- if b"content-length" in headers:
+ if "content-length" in headers:
try:
- size = int(headers[b"content-length"])
+ size = int(headers["content-length"])
if size < 0:
raise ValueError()
return size
diff --git a/netlib/http/models.py b/netlib/http/models.py
index 512a764d..55664533 100644
--- a/netlib/http/models.py
+++ b/netlib/http/models.py
@@ -1,201 +1,22 @@
-from __future__ import absolute_import, print_function, division
-import copy
+
from ..odict import ODict
from .. import utils, encoding
-from ..utils import always_bytes, always_byte_args, native
+from ..utils import always_bytes, native
from . import cookies
+from .headers import Headers
-import six
from six.moves import urllib
-try:
- from collections import MutableMapping
-except ImportError:
- from collections.abc import MutableMapping
# TODO: Move somewhere else?
ALPN_PROTO_HTTP1 = b'http/1.1'
ALPN_PROTO_H2 = b'h2'
-HDR_FORM_URLENCODED = b"application/x-www-form-urlencoded"
-HDR_FORM_MULTIPART = b"multipart/form-data"
+HDR_FORM_URLENCODED = "application/x-www-form-urlencoded"
+HDR_FORM_MULTIPART = "multipart/form-data"
CONTENT_MISSING = 0
-class Headers(MutableMapping, object):
- """
- Header class which allows both convenient access to individual headers as well as
- direct access to the underlying raw data. Provides a full dictionary interface.
-
- Example:
-
- .. code-block:: python
-
- # Create header from a list of (header_name, header_value) tuples
- >>> h = Headers([
- ["Host","example.com"],
- ["Accept","text/html"],
- ["accept","application/xml"]
- ])
-
- # Headers mostly behave like a normal dict.
- >>> h["Host"]
- "example.com"
-
- # HTTP Headers are case insensitive
- >>> h["host"]
- "example.com"
-
- # Multiple headers are folded into a single header as per RFC7230
- >>> h["Accept"]
- "text/html, application/xml"
-
- # Setting a header removes all existing headers with the same name.
- >>> h["Accept"] = "application/text"
- >>> h["Accept"]
- "application/text"
-
- # str(h) returns a HTTP1 header block.
- >>> print(h)
- Host: example.com
- Accept: application/text
-
- # For full control, the raw header fields can be accessed
- >>> h.fields
-
- # Headers can also be crated from keyword arguments
- >>> h = Headers(host="example.com", content_type="application/xml")
-
- Caveats:
- For use with the "Set-Cookie" header, see :py:meth:`get_all`.
- """
-
- @always_byte_args("ascii")
- def __init__(self, fields=None, **headers):
- """
- Args:
- fields: (optional) list of ``(name, value)`` header tuples,
- e.g. ``[("Host","example.com")]``. All names and values must be bytes.
- **headers: Additional headers to set. Will overwrite existing values from `fields`.
- For convenience, underscores in header names will be transformed to dashes -
- this behaviour does not extend to other methods.
- If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
- the behavior is undefined.
- """
- self.fields = fields or []
-
- # content_type -> content-type
- headers = {
- name.encode("ascii").replace(b"_", b"-"): value
- for name, value in six.iteritems(headers)
- }
- self.update(headers)
-
- def __bytes__(self):
- if self.fields:
- return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"
- else:
- return b""
-
- if six.PY2:
- __str__ = __bytes__
-
- @always_byte_args("ascii")
- def __getitem__(self, name):
- values = self.get_all(name)
- if not values:
- raise KeyError(name)
- return b", ".join(values)
-
- @always_byte_args("ascii")
- def __setitem__(self, name, value):
- idx = self._index(name)
-
- # To please the human eye, we insert at the same position the first existing header occured.
- if idx is not None:
- del self[name]
- self.fields.insert(idx, [name, value])
- else:
- self.fields.append([name, value])
-
- @always_byte_args("ascii")
- def __delitem__(self, name):
- if name not in self:
- raise KeyError(name)
- name = name.lower()
- self.fields = [
- field for field in self.fields
- if name != field[0].lower()
- ]
-
- def __iter__(self):
- seen = set()
- for name, _ in self.fields:
- name_lower = name.lower()
- if name_lower not in seen:
- seen.add(name_lower)
- yield name
-
- def __len__(self):
- return len(set(name.lower() for name, _ in self.fields))
-
- # __hash__ = object.__hash__
-
- def _index(self, name):
- name = name.lower()
- for i, field in enumerate(self.fields):
- if field[0].lower() == name:
- return i
- return None
-
- def __eq__(self, other):
- if isinstance(other, Headers):
- return self.fields == other.fields
- return False
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- @always_byte_args("ascii")
- def get_all(self, name):
- """
- Like :py:meth:`get`, but does not fold multiple headers into a single one.
- This is useful for Set-Cookie headers, which do not support folding.
-
- See also: https://tools.ietf.org/html/rfc7230#section-3.2.2
- """
- name_lower = name.lower()
- values = [value for n, value in self.fields if n.lower() == name_lower]
- return values
-
- def set_all(self, name, values):
- """
- Explicitly set multiple headers for the given key.
- See: :py:meth:`get_all`
- """
- name = always_bytes(name, "ascii")
- values = (always_bytes(value, "ascii") for value in values)
- if name in self:
- del self[name]
- self.fields.extend(
- [name, value] for value in values
- )
-
- def copy(self):
- return Headers(copy.copy(self.fields))
-
- # Implement the StateObject protocol from mitmproxy
- def get_state(self, short=False):
- return tuple(tuple(field) for field in self.fields)
-
- def load_state(self, state):
- self.fields = [list(field) for field in state]
-
- @classmethod
- def from_state(cls, state):
- return cls([list(field) for field in state])
-
-
class Message(object):
def __init__(self, http_version, headers, body, timestamp_start, timestamp_end):
self.http_version = http_version
@@ -216,7 +37,7 @@ class Message(object):
def body(self, body):
self._body = body
if isinstance(body, bytes):
- self.headers[b"content-length"] = str(len(body)).encode()
+ self.headers["content-length"] = str(len(body)).encode()
content = body
@@ -268,8 +89,8 @@ class Request(Message):
response. That is, we remove ETags and If-Modified-Since headers.
"""
delheaders = [
- b"if-modified-since",
- b"if-none-match",
+ "if-modified-since",
+ "if-none-match",
]
for i in delheaders:
self.headers.pop(i, None)
@@ -279,14 +100,14 @@ class Request(Message):
Modifies this request to remove headers that will compress the
resource's data.
"""
- self.headers["accept-encoding"] = b"identity"
+ self.headers["accept-encoding"] = "identity"
def constrain_encoding(self):
"""
Limits the permissible Accept-Encoding values, based on what we can
decode appropriately.
"""
- accept_encoding = native(self.headers.get("accept-encoding"), "ascii")
+ accept_encoding = self.headers.get("accept-encoding")
if accept_encoding:
self.headers["accept-encoding"] = (
', '.join(
@@ -309,9 +130,9 @@ class Request(Message):
indicates non-form data.
"""
if self.body:
- if HDR_FORM_URLENCODED in self.headers.get("content-type", b"").lower():
+ if HDR_FORM_URLENCODED in self.headers.get("content-type", "").lower():
return self.get_form_urlencoded()
- elif HDR_FORM_MULTIPART in self.headers.get("content-type", b"").lower():
+ elif HDR_FORM_MULTIPART in self.headers.get("content-type", "").lower():
return self.get_form_multipart()
return ODict([])
@@ -321,12 +142,12 @@ class Request(Message):
Returns an empty ODict if there is no data or the content-type
indicates non-form data.
"""
- if self.body and HDR_FORM_URLENCODED in self.headers.get("content-type", b"").lower():
+ if self.body and HDR_FORM_URLENCODED in self.headers.get("content-type", "").lower():
return ODict(utils.urldecode(self.body))
return ODict([])
def get_form_multipart(self):
- if self.body and HDR_FORM_MULTIPART in self.headers.get("content-type", b"").lower():
+ if self.body and HDR_FORM_MULTIPART in self.headers.get("content-type", "").lower():
return ODict(
utils.multipartdecode(
self.headers,
@@ -341,7 +162,7 @@ class Request(Message):
"""
# FIXME: If there's an existing content-type header indicating a
# url-encoded form, leave it alone.
- self.headers[b"content-type"] = HDR_FORM_URLENCODED
+ self.headers["content-type"] = HDR_FORM_URLENCODED
self.body = utils.urlencode(odict.lst)
def get_path_components(self):
@@ -400,7 +221,7 @@ class Request(Message):
"""
if hostheader and "host" in self.headers:
try:
- return self.headers["host"].decode("idna")
+ return self.headers["host"]
except ValueError:
pass
if self.host:
@@ -420,7 +241,7 @@ class Request(Message):
"""
ret = ODict()
for i in self.headers.get_all("Cookie"):
- ret.extend(cookies.parse_cookie_header(native(i,"ascii")))
+ ret.extend(cookies.parse_cookie_header(i))
return ret
def set_cookies(self, odict):
@@ -499,7 +320,7 @@ class Response(Message):
"""
ret = []
for header in self.headers.get_all("set-cookie"):
- v = cookies.parse_set_cookie_header(native(header, "ascii"))
+ v = cookies.parse_set_cookie_header(header)
if v:
name, value, attrs = v
ret.append([name, [value, attrs]])
diff --git a/netlib/utils.py b/netlib/utils.py
index b9848038..d5b30128 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -269,7 +269,7 @@ def get_header_tokens(headers, key):
"""
if key not in headers:
return []
- tokens = headers[key].split(b",")
+ tokens = headers[key].split(",")
return [token.strip() for token in tokens]
@@ -320,14 +320,14 @@ def parse_content_type(c):
("text", "html", {"charset": "UTF-8"})
"""
- parts = c.split(b";", 1)
- ts = parts[0].split(b"/", 1)
+ parts = c.split(";", 1)
+ ts = parts[0].split("/", 1)
if len(ts) != 2:
return None
d = {}
if len(parts) == 2:
- for i in parts[1].split(b";"):
- clause = i.split(b"=", 1)
+ for i in parts[1].split(";"):
+ clause = i.split("=", 1)
if len(clause) == 2:
d[clause[0].strip()] = clause[1].strip()
return ts[0].lower(), ts[1].lower(), d
@@ -337,13 +337,14 @@ def multipartdecode(headers, content):
"""
Takes a multipart boundary encoded string and returns list of (key, value) tuples.
"""
- v = headers.get(b"Content-Type")
+ v = headers.get("Content-Type")
if v:
v = parse_content_type(v)
if not v:
return []
- boundary = v[2].get(b"boundary")
- if not boundary:
+ try:
+ boundary = v[2]["boundary"].encode("ascii")
+ except (KeyError, UnicodeError):
return []
rx = re.compile(br'\bname="([^"]+)"')
diff --git a/netlib/websockets/protocol.py b/netlib/websockets/protocol.py
index 778fe7e7..e62f8df6 100644
--- a/netlib/websockets/protocol.py
+++ b/netlib/websockets/protocol.py
@@ -80,7 +80,7 @@ class WebsocketsProtocol(object):
Returns an instance of Headers
"""
if not key:
- key = base64.b64encode(os.urandom(16)).decode('utf-8')
+ key = base64.b64encode(os.urandom(16)).decode('ascii')
return Headers(**{
HEADER_WEBSOCKET_KEY: key,
HEADER_WEBSOCKET_VERSION: version,
@@ -95,27 +95,25 @@ class WebsocketsProtocol(object):
"""
return Headers(**{
HEADER_WEBSOCKET_ACCEPT: self.create_server_nonce(key),
- "Connection": "Upgrade",
- "Upgrade": "websocket",
+ "connection": "Upgrade",
+ "upgrade": "websocket",
})
@classmethod
def check_client_handshake(self, headers):
- if headers.get("upgrade") != b"websocket":
+ if headers.get("upgrade") != "websocket":
return
return headers.get(HEADER_WEBSOCKET_KEY)
@classmethod
def check_server_handshake(self, headers):
- if headers.get("upgrade") != b"websocket":
+ if headers.get("upgrade") != "websocket":
return
return headers.get(HEADER_WEBSOCKET_ACCEPT)
@classmethod
def create_server_nonce(self, client_nonce):
- return base64.b64encode(
- binascii.unhexlify(hashlib.sha1(client_nonce + websockets_magic).hexdigest())
- )
+ return base64.b64encode(hashlib.sha1(client_nonce + websockets_magic).digest())