aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http
diff options
context:
space:
mode:
Diffstat (limited to 'netlib/http')
-rw-r--r--netlib/http/__init__.py14
-rw-r--r--netlib/http/authentication.py167
-rw-r--r--netlib/http/cookies.py193
-rw-r--r--netlib/http/headers.py204
-rw-r--r--netlib/http/http1/__init__.py25
-rw-r--r--netlib/http/http1/assemble.py104
-rw-r--r--netlib/http/http1/read.py362
-rw-r--r--netlib/http/http2/__init__.py6
-rw-r--r--netlib/http/http2/connections.py426
-rw-r--r--netlib/http/message.py222
-rw-r--r--netlib/http/request.py356
-rw-r--r--netlib/http/response.py116
-rw-r--r--netlib/http/status_codes.py106
-rw-r--r--netlib/http/user_agents.py52
14 files changed, 2353 insertions, 0 deletions
diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py
new file mode 100644
index 00000000..fd632cd5
--- /dev/null
+++ b/netlib/http/__init__.py
@@ -0,0 +1,14 @@
+from __future__ import absolute_import, print_function, division
+from .request import Request
+from .response import Response
+from .headers import Headers
+from .message import decoded, CONTENT_MISSING
+from . import http1, http2
+
+__all__ = [
+ "Request",
+ "Response",
+ "Headers",
+ "decoded", "CONTENT_MISSING",
+ "http1", "http2",
+]
diff --git a/netlib/http/authentication.py b/netlib/http/authentication.py
new file mode 100644
index 00000000..d769abe5
--- /dev/null
+++ b/netlib/http/authentication.py
@@ -0,0 +1,167 @@
+from __future__ import (absolute_import, print_function, division)
+from argparse import Action, ArgumentTypeError
+import binascii
+
+
+def parse_http_basic_auth(s):
+ words = s.split()
+ if len(words) != 2:
+ return None
+ scheme = words[0]
+ try:
+ user = binascii.a2b_base64(words[1]).decode("utf8", "replace")
+ except binascii.Error:
+ return None
+ parts = user.split(':')
+ if len(parts) != 2:
+ return None
+ return scheme, parts[0], parts[1]
+
+
+def assemble_http_basic_auth(scheme, username, password):
+ v = binascii.b2a_base64((username + ":" + password).encode("utf8")).decode("ascii")
+ return scheme + " " + v
+
+
+class NullProxyAuth(object):
+
+ """
+ No proxy auth at all (returns empty challange headers)
+ """
+
+ def __init__(self, password_manager):
+ self.password_manager = password_manager
+
+ def clean(self, headers_):
+ """
+ Clean up authentication headers, so they're not passed upstream.
+ """
+
+ def authenticate(self, headers_):
+ """
+ Tests that the user is allowed to use the proxy
+ """
+ return True
+
+ def auth_challenge_headers(self):
+ """
+ Returns a dictionary containing the headers require to challenge the user
+ """
+ return {}
+
+
+class BasicProxyAuth(NullProxyAuth):
+ CHALLENGE_HEADER = 'Proxy-Authenticate'
+ AUTH_HEADER = 'Proxy-Authorization'
+
+ def __init__(self, password_manager, realm):
+ NullProxyAuth.__init__(self, password_manager)
+ self.realm = realm
+
+ def clean(self, headers):
+ del headers[self.AUTH_HEADER]
+
+ def authenticate(self, headers):
+ auth_value = headers.get(self.AUTH_HEADER)
+ if not auth_value:
+ return False
+ parts = parse_http_basic_auth(auth_value)
+ if not parts:
+ return False
+ scheme, username, password = parts
+ if scheme.lower() != 'basic':
+ return False
+ if not self.password_manager.test(username, password):
+ return False
+ self.username = username
+ return True
+
+ def auth_challenge_headers(self):
+ return {self.CHALLENGE_HEADER: 'Basic realm="%s"' % self.realm}
+
+
+class PassMan(object):
+
+ def test(self, username_, password_token_):
+ return False
+
+
+class PassManNonAnon(PassMan):
+
+ """
+ Ensure the user specifies a username, accept any password.
+ """
+
+ def test(self, username, password_token_):
+ if username:
+ return True
+ return False
+
+
+class PassManHtpasswd(PassMan):
+
+ """
+ Read usernames and passwords from an htpasswd file
+ """
+
+ def __init__(self, path):
+ """
+ Raises ValueError if htpasswd file is invalid.
+ """
+ import passlib.apache
+ self.htpasswd = passlib.apache.HtpasswdFile(path)
+
+ def test(self, username, password_token):
+ return bool(self.htpasswd.check_password(username, password_token))
+
+
+class PassManSingleUser(PassMan):
+
+ def __init__(self, username, password):
+ self.username, self.password = username, password
+
+ def test(self, username, password_token):
+ return self.username == username and self.password == password_token
+
+
+class AuthAction(Action):
+
+ """
+ Helper class to allow seamless integration int argparse. Example usage:
+ parser.add_argument(
+ "--nonanonymous",
+ action=NonanonymousAuthAction, nargs=0,
+ help="Allow access to any user long as a credentials are specified."
+ )
+ """
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ passman = self.getPasswordManager(values)
+ authenticator = BasicProxyAuth(passman, "mitmproxy")
+ setattr(namespace, self.dest, authenticator)
+
+ def getPasswordManager(self, s): # pragma: nocover
+ raise NotImplementedError()
+
+
+class SingleuserAuthAction(AuthAction):
+
+ def getPasswordManager(self, s):
+ if len(s.split(':')) != 2:
+ raise ArgumentTypeError(
+ "Invalid single-user specification. Please use the format username:password"
+ )
+ username, password = s.split(':')
+ return PassManSingleUser(username, password)
+
+
+class NonanonymousAuthAction(AuthAction):
+
+ def getPasswordManager(self, s):
+ return PassManNonAnon()
+
+
+class HtpasswdAuthAction(AuthAction):
+
+ def getPasswordManager(self, s):
+ return PassManHtpasswd(s)
diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py
new file mode 100644
index 00000000..18544b5e
--- /dev/null
+++ b/netlib/http/cookies.py
@@ -0,0 +1,193 @@
+import re
+
+from .. import odict
+
+"""
+A flexible module for cookie parsing and manipulation.
+
+This module differs from usual standards-compliant cookie modules in a number
+of ways. We try to be as permissive as possible, and to retain even mal-formed
+information. Duplicate cookies are preserved in parsing, and can be set in
+formatting. We do attempt to escape and quote values where needed, but will not
+reject data that violate the specs.
+
+Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do
+not parse the comma-separated variant of Set-Cookie that allows multiple
+cookies to be set in a single header. Technically this should be feasible, but
+it turns out that violations of RFC6265 that makes the parsing problem
+indeterminate are much more common than genuine occurences of the multi-cookie
+variants. Serialization follows RFC6265.
+
+ http://tools.ietf.org/html/rfc6265
+ http://tools.ietf.org/html/rfc2109
+ http://tools.ietf.org/html/rfc2965
+"""
+
+# TODO: Disallow LHS-only Cookie values
+
+
+def _read_until(s, start, term):
+ """
+ Read until one of the characters in term is reached.
+ """
+ if start == len(s):
+ return "", start + 1
+ for i in range(start, len(s)):
+ if s[i] in term:
+ return s[start:i], i
+ return s[start:i + 1], i + 1
+
+
+def _read_token(s, start):
+ """
+ Read a token - the LHS of a token/value pair in a cookie.
+ """
+ return _read_until(s, start, ";=")
+
+
+def _read_quoted_string(s, start):
+ """
+ start: offset to the first quote of the string to be read
+
+ A sort of loose super-set of the various quoted string specifications.
+
+ RFC6265 disallows backslashes or double quotes within quoted strings.
+ Prior RFCs use backslashes to escape. This leaves us free to apply
+ backslash escaping by default and be compatible with everything.
+ """
+ escaping = False
+ ret = []
+ # Skip the first quote
+ i = start # initialize in case the loop doesn't run.
+ for i in range(start + 1, len(s)):
+ if escaping:
+ ret.append(s[i])
+ escaping = False
+ elif s[i] == '"':
+ break
+ elif s[i] == "\\":
+ escaping = True
+ else:
+ ret.append(s[i])
+ return "".join(ret), i + 1
+
+
+def _read_value(s, start, delims):
+ """
+ Reads a value - the RHS of a token/value pair in a cookie.
+
+ special: If the value is special, commas are premitted. Else comma
+ terminates. This helps us support old and new style values.
+ """
+ if start >= len(s):
+ return "", start
+ elif s[start] == '"':
+ return _read_quoted_string(s, start)
+ else:
+ return _read_until(s, start, delims)
+
+
+def _read_pairs(s, off=0):
+ """
+ Read pairs of lhs=rhs values.
+
+ off: start offset
+ specials: a lower-cased list of keys that may contain commas
+ """
+ vals = []
+ while True:
+ lhs, off = _read_token(s, off)
+ lhs = lhs.lstrip()
+ if lhs:
+ rhs = None
+ if off < len(s):
+ if s[off] == "=":
+ rhs, off = _read_value(s, off + 1, ";")
+ vals.append([lhs, rhs])
+ off += 1
+ if not off < len(s):
+ break
+ return vals, off
+
+
+def _has_special(s):
+ for i in s:
+ if i in '",;\\':
+ return True
+ o = ord(i)
+ if o < 0x21 or o > 0x7e:
+ return True
+ return False
+
+
+ESCAPE = re.compile(r"([\"\\])")
+
+
+def _format_pairs(lst, specials=(), sep="; "):
+ """
+ specials: A lower-cased list of keys that will not be quoted.
+ """
+ vals = []
+ for k, v in lst:
+ if v is None:
+ vals.append(k)
+ else:
+ if k.lower() not in specials and _has_special(v):
+ v = ESCAPE.sub(r"\\\1", v)
+ v = '"%s"' % v
+ vals.append("%s=%s" % (k, v))
+ return sep.join(vals)
+
+
+def _format_set_cookie_pairs(lst):
+ return _format_pairs(
+ lst,
+ specials=("expires", "path")
+ )
+
+
+def _parse_set_cookie_pairs(s):
+ """
+ For Set-Cookie, we support multiple cookies as described in RFC2109.
+ This function therefore returns a list of lists.
+ """
+ pairs, off_ = _read_pairs(s)
+ return pairs
+
+
+def parse_set_cookie_header(line):
+ """
+ Parse a Set-Cookie header value
+
+ Returns a (name, value, attrs) tuple, or None, where attrs is an
+ ODictCaseless set of attributes. No attempt is made to parse attribute
+ values - they are treated purely as strings.
+ """
+ pairs = _parse_set_cookie_pairs(line)
+ if pairs:
+ return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:])
+
+
+def format_set_cookie_header(name, value, attrs):
+ """
+ Formats a Set-Cookie header value.
+ """
+ pairs = [[name, value]]
+ pairs.extend(attrs.lst)
+ return _format_set_cookie_pairs(pairs)
+
+
+def parse_cookie_header(line):
+ """
+ Parse a Cookie header value.
+ Returns a (possibly empty) ODict object.
+ """
+ pairs, off_ = _read_pairs(line)
+ return odict.ODict(pairs)
+
+
+def format_cookie_header(od):
+ """
+ Formats a Cookie header value.
+ """
+ return _format_pairs(od.lst)
diff --git a/netlib/http/headers.py b/netlib/http/headers.py
new file mode 100644
index 00000000..78404796
--- /dev/null
+++ b/netlib/http/headers.py
@@ -0,0 +1,204 @@
+"""
+
+Unicode Handling
+----------------
+See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/
+"""
+from __future__ import absolute_import, print_function, division
+import copy
+try:
+ from collections.abc import MutableMapping
+except ImportError: # pragma: nocover
+ from collections import MutableMapping # Workaround for Python < 3.3
+
+
+import six
+
+from netlib.utils import always_byte_args, always_bytes, Serializable
+
+if six.PY2: # pragma: nocover
+ _native = lambda x: x
+ _always_bytes = lambda x: x
+ _always_byte_args = lambda x: x
+else:
+ # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
+ _native = lambda x: x.decode("utf-8", "surrogateescape")
+ _always_bytes = lambda x: always_bytes(x, "utf-8", "surrogateescape")
+ _always_byte_args = always_byte_args("utf-8", "surrogateescape")
+
+
+class Headers(MutableMapping, Serializable):
+ """
+ Header class which allows both convenient access to individual headers as well as
+ direct access to the underlying raw data. Provides a full dictionary interface.
+
+ Example:
+
+ .. code-block:: python
+
+ # Create headers with keyword arguments
+ >>> h = Headers(host="example.com", content_type="application/xml")
+
+ # Headers mostly behave like a normal dict.
+ >>> h["Host"]
+ "example.com"
+
+ # HTTP Headers are case insensitive
+ >>> h["host"]
+ "example.com"
+
+ # Headers can also be creatd from a list of raw (header_name, header_value) byte tuples
+ >>> h = Headers([
+ [b"Host",b"example.com"],
+ [b"Accept",b"text/html"],
+ [b"accept",b"application/xml"]
+ ])
+
+ # Multiple headers are folded into a single header as per RFC7230
+ >>> h["Accept"]
+ "text/html, application/xml"
+
+ # Setting a header removes all existing headers with the same name.
+ >>> h["Accept"] = "application/text"
+ >>> h["Accept"]
+ "application/text"
+
+ # bytes(h) returns a HTTP1 header block.
+ >>> print(bytes(h))
+ Host: example.com
+ Accept: application/text
+
+ # For full control, the raw header fields can be accessed
+ >>> h.fields
+
+ Caveats:
+ For use with the "Set-Cookie" header, see :py:meth:`get_all`.
+ """
+
+ @_always_byte_args
+ def __init__(self, fields=None, **headers):
+ """
+ Args:
+ fields: (optional) list of ``(name, value)`` header byte tuples,
+ e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes.
+ **headers: Additional headers to set. Will overwrite existing values from `fields`.
+ For convenience, underscores in header names will be transformed to dashes -
+ this behaviour does not extend to other methods.
+ If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
+ the behavior is undefined.
+ """
+ self.fields = fields or []
+
+ for name, value in self.fields:
+ if not isinstance(name, bytes) or not isinstance(value, bytes):
+ raise ValueError("Headers passed as fields must be bytes.")
+
+ # content_type -> content-type
+ headers = {
+ _always_bytes(name).replace(b"_", b"-"): value
+ for name, value in six.iteritems(headers)
+ }
+ self.update(headers)
+
+ def __bytes__(self):
+ if self.fields:
+ return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"
+ else:
+ return b""
+
+ if six.PY2: # pragma: nocover
+ __str__ = __bytes__
+
+ @_always_byte_args
+ def __getitem__(self, name):
+ values = self.get_all(name)
+ if not values:
+ raise KeyError(name)
+ return ", ".join(values)
+
+ @_always_byte_args
+ def __setitem__(self, name, value):
+ idx = self._index(name)
+
+ # To please the human eye, we insert at the same position the first existing header occured.
+ if idx is not None:
+ del self[name]
+ self.fields.insert(idx, [name, value])
+ else:
+ self.fields.append([name, value])
+
+ @_always_byte_args
+ def __delitem__(self, name):
+ if name not in self:
+ raise KeyError(name)
+ name = name.lower()
+ self.fields = [
+ field for field in self.fields
+ if name != field[0].lower()
+ ]
+
+ def __iter__(self):
+ seen = set()
+ for name, _ in self.fields:
+ name_lower = name.lower()
+ if name_lower not in seen:
+ seen.add(name_lower)
+ yield _native(name)
+
+ def __len__(self):
+ return len(set(name.lower() for name, _ in self.fields))
+
+ # __hash__ = object.__hash__
+
+ def _index(self, name):
+ name = name.lower()
+ for i, field in enumerate(self.fields):
+ if field[0].lower() == name:
+ return i
+ return None
+
+ def __eq__(self, other):
+ if isinstance(other, Headers):
+ return self.fields == other.fields
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ @_always_byte_args
+ def get_all(self, name):
+ """
+ Like :py:meth:`get`, but does not fold multiple headers into a single one.
+ This is useful for Set-Cookie headers, which do not support folding.
+
+ See also: https://tools.ietf.org/html/rfc7230#section-3.2.2
+ """
+ name_lower = name.lower()
+ values = [_native(value) for n, value in self.fields if n.lower() == name_lower]
+ return values
+
+ @_always_byte_args
+ def set_all(self, name, values):
+ """
+ Explicitly set multiple headers for the given key.
+ See: :py:meth:`get_all`
+ """
+ values = map(_always_bytes, values) # _always_byte_args does not fix lists
+ if name in self:
+ del self[name]
+ self.fields.extend(
+ [name, value] for value in values
+ )
+
+ def copy(self):
+ return Headers(copy.copy(self.fields))
+
+ def get_state(self):
+ return tuple(tuple(field) for field in self.fields)
+
+ def set_state(self, state):
+ self.fields = [list(field) for field in state]
+
+ @classmethod
+ def from_state(cls, state):
+ return cls([list(field) for field in state]) \ No newline at end of file
diff --git a/netlib/http/http1/__init__.py b/netlib/http/http1/__init__.py
new file mode 100644
index 00000000..2aa7e26a
--- /dev/null
+++ b/netlib/http/http1/__init__.py
@@ -0,0 +1,25 @@
+from __future__ import absolute_import, print_function, division
+from .read import (
+ read_request, read_request_head,
+ read_response, read_response_head,
+ read_body,
+ connection_close,
+ expected_http_body_size,
+)
+from .assemble import (
+ assemble_request, assemble_request_head,
+ assemble_response, assemble_response_head,
+ assemble_body,
+)
+
+
+__all__ = [
+ "read_request", "read_request_head",
+ "read_response", "read_response_head",
+ "read_body",
+ "connection_close",
+ "expected_http_body_size",
+ "assemble_request", "assemble_request_head",
+ "assemble_response", "assemble_response_head",
+ "assemble_body",
+]
diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py
new file mode 100644
index 00000000..785ee8d3
--- /dev/null
+++ b/netlib/http/http1/assemble.py
@@ -0,0 +1,104 @@
+from __future__ import absolute_import, print_function, division
+
+from ... import utils
+import itertools
+from ...exceptions import HttpException
+from .. import CONTENT_MISSING
+
+
+def assemble_request(request):
+ if request.content == CONTENT_MISSING:
+ raise HttpException("Cannot assemble flow with CONTENT_MISSING")
+ head = assemble_request_head(request)
+ body = b"".join(assemble_body(request.data.headers, [request.data.content]))
+ return head + body
+
+
+def assemble_request_head(request):
+ first_line = _assemble_request_line(request.data)
+ headers = _assemble_request_headers(request.data)
+ return b"%s\r\n%s\r\n" % (first_line, headers)
+
+
+def assemble_response(response):
+ if response.content == CONTENT_MISSING:
+ raise HttpException("Cannot assemble flow with CONTENT_MISSING")
+ head = assemble_response_head(response)
+ body = b"".join(assemble_body(response.data.headers, [response.data.content]))
+ return head + body
+
+
+def assemble_response_head(response):
+ first_line = _assemble_response_line(response.data)
+ headers = _assemble_response_headers(response.data)
+ return b"%s\r\n%s\r\n" % (first_line, headers)
+
+
+def assemble_body(headers, body_chunks):
+ if "chunked" in headers.get("transfer-encoding", "").lower():
+ for chunk in body_chunks:
+ if chunk:
+ yield b"%x\r\n%s\r\n" % (len(chunk), chunk)
+ yield b"0\r\n\r\n"
+ else:
+ for chunk in body_chunks:
+ yield chunk
+
+
+def _assemble_request_line(request_data):
+ """
+ Args:
+ request_data (netlib.http.request.RequestData)
+ """
+ form = request_data.first_line_format
+ if form == "relative":
+ return b"%s %s %s" % (
+ request_data.method,
+ request_data.path,
+ request_data.http_version
+ )
+ elif form == "authority":
+ return b"%s %s:%d %s" % (
+ request_data.method,
+ request_data.host,
+ request_data.port,
+ request_data.http_version
+ )
+ elif form == "absolute":
+ return b"%s %s://%s:%d%s %s" % (
+ request_data.method,
+ request_data.scheme,
+ request_data.host,
+ request_data.port,
+ request_data.path,
+ request_data.http_version
+ )
+ else:
+ raise RuntimeError("Invalid request form")
+
+
+def _assemble_request_headers(request_data):
+ """
+ Args:
+ request_data (netlib.http.request.RequestData)
+ """
+ headers = request_data.headers.copy()
+ if "host" not in headers and request_data.scheme and request_data.host and request_data.port:
+ headers["host"] = utils.hostport(
+ request_data.scheme,
+ request_data.host,
+ request_data.port
+ )
+ return bytes(headers)
+
+
+def _assemble_response_line(response_data):
+ return b"%s %d %s" % (
+ response_data.http_version,
+ response_data.status_code,
+ response_data.reason,
+ )
+
+
+def _assemble_response_headers(response):
+ return bytes(response.headers)
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
new file mode 100644
index 00000000..6e3a1b93
--- /dev/null
+++ b/netlib/http/http1/read.py
@@ -0,0 +1,362 @@
+from __future__ import absolute_import, print_function, division
+import time
+import sys
+import re
+
+from ... import utils
+from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect
+from .. import Request, Response, Headers
+
+
+def read_request(rfile, body_size_limit=None):
+ request = read_request_head(rfile)
+ expected_body_size = expected_http_body_size(request)
+ request.data.content = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit))
+ request.timestamp_end = time.time()
+ return request
+
+
+def read_request_head(rfile):
+ """
+ Parse an HTTP request head (request line + headers) from an input stream
+
+ Args:
+ rfile: The input stream
+
+ Returns:
+ The HTTP request object (without body)
+
+ Raises:
+ HttpReadDisconnect: No bytes can be read from rfile.
+ HttpSyntaxException: The input is malformed HTTP.
+ HttpException: Any other error occured.
+ """
+ timestamp_start = time.time()
+ if hasattr(rfile, "reset_timestamps"):
+ rfile.reset_timestamps()
+
+ form, method, scheme, host, port, path, http_version = _read_request_line(rfile)
+ headers = _read_headers(rfile)
+
+ if hasattr(rfile, "first_byte_timestamp"):
+ # more accurate timestamp_start
+ timestamp_start = rfile.first_byte_timestamp
+
+ return Request(
+ form, method, scheme, host, port, path, http_version, headers, None, timestamp_start
+ )
+
+
+def read_response(rfile, request, body_size_limit=None):
+ response = read_response_head(rfile)
+ expected_body_size = expected_http_body_size(request, response)
+ response.data.content = b"".join(read_body(rfile, expected_body_size, body_size_limit))
+ response.timestamp_end = time.time()
+ return response
+
+
+def read_response_head(rfile):
+ """
+ Parse an HTTP response head (response line + headers) from an input stream
+
+ Args:
+ rfile: The input stream
+
+ Returns:
+ The HTTP request object (without body)
+
+ Raises:
+ HttpReadDisconnect: No bytes can be read from rfile.
+ HttpSyntaxException: The input is malformed HTTP.
+ HttpException: Any other error occured.
+ """
+
+ timestamp_start = time.time()
+ if hasattr(rfile, "reset_timestamps"):
+ rfile.reset_timestamps()
+
+ http_version, status_code, message = _read_response_line(rfile)
+ headers = _read_headers(rfile)
+
+ if hasattr(rfile, "first_byte_timestamp"):
+ # more accurate timestamp_start
+ timestamp_start = rfile.first_byte_timestamp
+
+ return Response(http_version, status_code, message, headers, None, timestamp_start)
+
+
+def read_body(rfile, expected_size, limit=None, max_chunk_size=4096):
+ """
+ Read an HTTP message body
+
+ Args:
+ rfile: The input stream
+ expected_size: The expected body size (see :py:meth:`expected_body_size`)
+ limit: Maximum body size
+ max_chunk_size: Maximium chunk size that gets yielded
+
+ Returns:
+ A generator that yields byte chunks of the content.
+
+ Raises:
+ HttpException, if an error occurs
+
+ Caveats:
+ max_chunk_size is not considered if the transfer encoding is chunked.
+ """
+ if not limit or limit < 0:
+ limit = sys.maxsize
+ if not max_chunk_size:
+ max_chunk_size = limit
+
+ if expected_size is None:
+ for x in _read_chunked(rfile, limit):
+ yield x
+ elif expected_size >= 0:
+ if limit is not None and expected_size > limit:
+ raise HttpException(
+ "HTTP Body too large. "
+ "Limit is {}, content length was advertised as {}".format(limit, expected_size)
+ )
+ bytes_left = expected_size
+ while bytes_left:
+ chunk_size = min(bytes_left, max_chunk_size)
+ content = rfile.read(chunk_size)
+ if len(content) < chunk_size:
+ raise HttpException("Unexpected EOF")
+ yield content
+ bytes_left -= chunk_size
+ else:
+ bytes_left = limit
+ while bytes_left:
+ chunk_size = min(bytes_left, max_chunk_size)
+ content = rfile.read(chunk_size)
+ if not content:
+ return
+ yield content
+ bytes_left -= chunk_size
+ not_done = rfile.read(1)
+ if not_done:
+ raise HttpException("HTTP body too large. Limit is {}.".format(limit))
+
+
+def connection_close(http_version, headers):
+ """
+ Checks the message to see if the client connection should be closed
+ according to RFC 2616 Section 8.1.
+ """
+ # At first, check if we have an explicit Connection header.
+ if "connection" in headers:
+ tokens = utils.get_header_tokens(headers, "connection")
+ if "close" in tokens:
+ return True
+ elif "keep-alive" in tokens:
+ return False
+
+ # If we don't have a Connection header, HTTP 1.1 connections are assumed to
+ # be persistent
+ return http_version != "HTTP/1.1" and http_version != b"HTTP/1.1" # FIXME: Remove one case.
+
+
+def expected_http_body_size(request, response=None):
+ """
+ Returns:
+ The expected body length:
+ - a positive integer, if the size is known in advance
+ - None, if the size in unknown in advance (chunked encoding)
+ - -1, if all data should be read until end of stream.
+
+ Raises:
+ HttpSyntaxException, if the content length header is invalid
+ """
+ # Determine response size according to
+ # http://tools.ietf.org/html/rfc7230#section-3.3
+ if not response:
+ headers = request.headers
+ response_code = None
+ is_request = True
+ else:
+ headers = response.headers
+ response_code = response.status_code
+ is_request = False
+
+ if is_request:
+ if headers.get("expect", "").lower() == "100-continue":
+ return 0
+ else:
+ if request.method.upper() == "HEAD":
+ return 0
+ if 100 <= response_code <= 199:
+ return 0
+ if response_code == 200 and request.method.upper() == "CONNECT":
+ return 0
+ if response_code in (204, 304):
+ return 0
+
+ if "chunked" in headers.get("transfer-encoding", "").lower():
+ return None
+ if "content-length" in headers:
+ try:
+ size = int(headers["content-length"])
+ if size < 0:
+ raise ValueError()
+ return size
+ except ValueError:
+ raise HttpSyntaxException("Unparseable Content Length")
+ if is_request:
+ return 0
+ return -1
+
+
+def _get_first_line(rfile):
+ try:
+ line = rfile.readline()
+ if line == b"\r\n" or line == b"\n":
+ # Possible leftover from previous message
+ line = rfile.readline()
+ except TcpDisconnect:
+ raise HttpReadDisconnect("Remote disconnected")
+ if not line:
+ raise HttpReadDisconnect("Remote disconnected")
+ return line.strip()
+
+
+def _read_request_line(rfile):
+ try:
+ line = _get_first_line(rfile)
+ except HttpReadDisconnect:
+ # We want to provide a better error message.
+ raise HttpReadDisconnect("Client disconnected")
+
+ try:
+ method, path, http_version = line.split(b" ")
+
+ if path == b"*" or path.startswith(b"/"):
+ form = "relative"
+ scheme, host, port = None, None, None
+ elif method == b"CONNECT":
+ form = "authority"
+ host, port = _parse_authority_form(path)
+ scheme, path = None, None
+ else:
+ form = "absolute"
+ scheme, host, port, path = utils.parse_url(path)
+
+ _check_http_version(http_version)
+ except ValueError:
+ raise HttpSyntaxException("Bad HTTP request line: {}".format(line))
+
+ return form, method, scheme, host, port, path, http_version
+
+
+def _parse_authority_form(hostport):
+ """
+ Returns (host, port) if hostport is a valid authority-form host specification.
+ http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1
+
+ Raises:
+ ValueError, if the input is malformed
+ """
+ try:
+ host, port = hostport.split(b":")
+ port = int(port)
+ if not utils.is_valid_host(host) or not utils.is_valid_port(port):
+ raise ValueError()
+ except ValueError:
+ raise HttpSyntaxException("Invalid host specification: {}".format(hostport))
+
+ return host, port
+
+
+def _read_response_line(rfile):
+ try:
+ line = _get_first_line(rfile)
+ except HttpReadDisconnect:
+ # We want to provide a better error message.
+ raise HttpReadDisconnect("Server disconnected")
+
+ try:
+
+ parts = line.split(b" ", 2)
+ if len(parts) == 2: # handle missing message gracefully
+ parts.append(b"")
+
+ http_version, status_code, message = parts
+ status_code = int(status_code)
+ _check_http_version(http_version)
+
+ except ValueError:
+ raise HttpSyntaxException("Bad HTTP response line: {}".format(line))
+
+ return http_version, status_code, message
+
+
+def _check_http_version(http_version):
+ if not re.match(br"^HTTP/\d\.\d$", http_version):
+ raise HttpSyntaxException("Unknown HTTP version: {}".format(http_version))
+
+
+def _read_headers(rfile):
+ """
+ Read a set of headers.
+ Stop once a blank line is reached.
+
+ Returns:
+ A headers object
+
+ Raises:
+ HttpSyntaxException
+ """
+ ret = []
+ while True:
+ line = rfile.readline()
+ if not line or line == b"\r\n" or line == b"\n":
+ break
+ if line[0] in b" \t":
+ if not ret:
+ raise HttpSyntaxException("Invalid headers")
+ # continued header
+ ret[-1][1] = ret[-1][1] + b'\r\n ' + line.strip()
+ else:
+ try:
+ name, value = line.split(b":", 1)
+ value = value.strip()
+ if not name:
+ raise ValueError()
+ ret.append([name, value])
+ except ValueError:
+ raise HttpSyntaxException("Invalid headers")
+ return Headers(ret)
+
+
+def _read_chunked(rfile, limit=sys.maxsize):
+ """
+ Read a HTTP body with chunked transfer encoding.
+
+ Args:
+ rfile: the input file
+ limit: A positive integer
+ """
+ total = 0
+ while True:
+ line = rfile.readline(128)
+ if line == b"":
+ raise HttpException("Connection closed prematurely")
+ if line != b"\r\n" and line != b"\n":
+ try:
+ length = int(line, 16)
+ except ValueError:
+ raise HttpSyntaxException("Invalid chunked encoding length: {}".format(line))
+ total += length
+ if total > limit:
+ raise HttpException(
+ "HTTP Body too large. Limit is {}, "
+ "chunked content longer than {}".format(limit, total)
+ )
+ chunk = rfile.read(length)
+ suffix = rfile.readline(5)
+ if suffix != b"\r\n":
+ raise HttpSyntaxException("Malformed chunked body")
+ if length == 0:
+ return
+ yield chunk
diff --git a/netlib/http/http2/__init__.py b/netlib/http/http2/__init__.py
new file mode 100644
index 00000000..7043d36f
--- /dev/null
+++ b/netlib/http/http2/__init__.py
@@ -0,0 +1,6 @@
+from __future__ import absolute_import, print_function, division
+from .connections import HTTP2Protocol
+
+__all__ = [
+ "HTTP2Protocol"
+]
diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py
new file mode 100644
index 00000000..52fa7193
--- /dev/null
+++ b/netlib/http/http2/connections.py
@@ -0,0 +1,426 @@
+from __future__ import (absolute_import, print_function, division)
+import itertools
+import time
+
+from hpack.hpack import Encoder, Decoder
+from ... import utils
+from .. import Headers, Response, Request
+
+from hyperframe import frame
+
+
+class TCPHandler(object):
+
+ def __init__(self, rfile, wfile=None):
+ self.rfile = rfile
+ self.wfile = wfile
+
+
+class HTTP2Protocol(object):
+
+ ERROR_CODES = utils.BiDi(
+ NO_ERROR=0x0,
+ PROTOCOL_ERROR=0x1,
+ INTERNAL_ERROR=0x2,
+ FLOW_CONTROL_ERROR=0x3,
+ SETTINGS_TIMEOUT=0x4,
+ STREAM_CLOSED=0x5,
+ FRAME_SIZE_ERROR=0x6,
+ REFUSED_STREAM=0x7,
+ CANCEL=0x8,
+ COMPRESSION_ERROR=0x9,
+ CONNECT_ERROR=0xa,
+ ENHANCE_YOUR_CALM=0xb,
+ INADEQUATE_SECURITY=0xc,
+ HTTP_1_1_REQUIRED=0xd
+ )
+
+ CLIENT_CONNECTION_PREFACE = b'PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n'
+
+ HTTP2_DEFAULT_SETTINGS = {
+ frame.SettingsFrame.HEADER_TABLE_SIZE: 4096,
+ frame.SettingsFrame.ENABLE_PUSH: 1,
+ frame.SettingsFrame.MAX_CONCURRENT_STREAMS: None,
+ frame.SettingsFrame.INITIAL_WINDOW_SIZE: 2 ** 16 - 1,
+ frame.SettingsFrame.MAX_FRAME_SIZE: 2 ** 14,
+ frame.SettingsFrame.MAX_HEADER_LIST_SIZE: None,
+ }
+
+ def __init__(
+ self,
+ tcp_handler=None,
+ rfile=None,
+ wfile=None,
+ is_server=False,
+ dump_frames=False,
+ encoder=None,
+ decoder=None,
+ unhandled_frame_cb=None,
+ ):
+ self.tcp_handler = tcp_handler or TCPHandler(rfile, wfile)
+ self.is_server = is_server
+ self.dump_frames = dump_frames
+ self.encoder = encoder or Encoder()
+ self.decoder = decoder or Decoder()
+ self.unhandled_frame_cb = unhandled_frame_cb
+
+ self.http2_settings = self.HTTP2_DEFAULT_SETTINGS.copy()
+ self.current_stream_id = None
+ self.connection_preface_performed = False
+
+ def read_request(
+ self,
+ __rfile,
+ include_body=True,
+ body_size_limit=None,
+ allow_empty=False,
+ ):
+ if body_size_limit is not None:
+ raise NotImplementedError()
+
+ self.perform_connection_preface()
+
+ timestamp_start = time.time()
+ if hasattr(self.tcp_handler.rfile, "reset_timestamps"):
+ self.tcp_handler.rfile.reset_timestamps()
+
+ stream_id, headers, body = self._receive_transmission(
+ include_body=include_body,
+ )
+
+ if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"):
+ # more accurate timestamp_start
+ timestamp_start = self.tcp_handler.rfile.first_byte_timestamp
+
+ timestamp_end = time.time()
+
+ authority = headers.get(':authority', b'')
+ method = headers.get(':method', 'GET')
+ scheme = headers.get(':scheme', 'https')
+ path = headers.get(':path', '/')
+ host = None
+ port = None
+
+ if path == '*' or path.startswith("/"):
+ form_in = "relative"
+ elif method == 'CONNECT':
+ form_in = "authority"
+ if ":" in authority:
+ host, port = authority.split(":", 1)
+ else:
+ host = authority
+ else:
+ form_in = "absolute"
+ # FIXME: verify if path or :host contains what we need
+ scheme, host, port, _ = utils.parse_url(path)
+ scheme = scheme.decode('ascii')
+ host = host.decode('ascii')
+
+ if host is None:
+ host = 'localhost'
+ if port is None:
+ port = 80 if scheme == 'http' else 443
+ port = int(port)
+
+ request = Request(
+ form_in,
+ method.encode('ascii'),
+ scheme.encode('ascii'),
+ host.encode('ascii'),
+ port,
+ path.encode('ascii'),
+ b"HTTP/2.0",
+ headers,
+ body,
+ timestamp_start,
+ timestamp_end,
+ )
+ request.stream_id = stream_id
+
+ return request
+
+ def read_response(
+ self,
+ __rfile,
+ request_method=b'',
+ body_size_limit=None,
+ include_body=True,
+ stream_id=None,
+ ):
+ if body_size_limit is not None:
+ raise NotImplementedError()
+
+ self.perform_connection_preface()
+
+ timestamp_start = time.time()
+ if hasattr(self.tcp_handler.rfile, "reset_timestamps"):
+ self.tcp_handler.rfile.reset_timestamps()
+
+ stream_id, headers, body = self._receive_transmission(
+ stream_id=stream_id,
+ include_body=include_body,
+ )
+
+ if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"):
+ # more accurate timestamp_start
+ timestamp_start = self.tcp_handler.rfile.first_byte_timestamp
+
+ if include_body:
+ timestamp_end = time.time()
+ else:
+ timestamp_end = None
+
+ response = Response(
+ b"HTTP/2.0",
+ int(headers.get(':status', 502)),
+ b'',
+ headers,
+ body,
+ timestamp_start=timestamp_start,
+ timestamp_end=timestamp_end,
+ )
+ response.stream_id = stream_id
+
+ return response
+
+ def assemble(self, message):
+ if isinstance(message, Request):
+ return self.assemble_request(message)
+ elif isinstance(message, Response):
+ return self.assemble_response(message)
+ else:
+ raise ValueError("HTTP message not supported.")
+
+ def assemble_request(self, request):
+ assert isinstance(request, Request)
+
+ authority = self.tcp_handler.sni if self.tcp_handler.sni else self.tcp_handler.address.host
+ if self.tcp_handler.address.port != 443:
+ authority += ":%d" % self.tcp_handler.address.port
+
+ headers = request.headers.copy()
+
+ if ':authority' not in headers:
+ headers.fields.insert(0, (b':authority', authority.encode('ascii')))
+ if ':scheme' not in headers:
+ headers.fields.insert(0, (b':scheme', request.scheme.encode('ascii')))
+ if ':path' not in headers:
+ headers.fields.insert(0, (b':path', request.path.encode('ascii')))
+ if ':method' not in headers:
+ headers.fields.insert(0, (b':method', request.method.encode('ascii')))
+
+ if hasattr(request, 'stream_id'):
+ stream_id = request.stream_id
+ else:
+ stream_id = self._next_stream_id()
+
+ return list(itertools.chain(
+ self._create_headers(headers, stream_id, end_stream=(request.body is None or len(request.body) == 0)),
+ self._create_body(request.body, stream_id)))
+
+ def assemble_response(self, response):
+ assert isinstance(response, Response)
+
+ headers = response.headers.copy()
+
+ if ':status' not in headers:
+ headers.fields.insert(0, (b':status', str(response.status_code).encode('ascii')))
+
+ if hasattr(response, 'stream_id'):
+ stream_id = response.stream_id
+ else:
+ stream_id = self._next_stream_id()
+
+ return list(itertools.chain(
+ self._create_headers(headers, stream_id, end_stream=(response.body is None or len(response.body) == 0)),
+ self._create_body(response.body, stream_id),
+ ))
+
+ def perform_connection_preface(self, force=False):
+ if force or not self.connection_preface_performed:
+ if self.is_server:
+ self.perform_server_connection_preface(force)
+ else:
+ self.perform_client_connection_preface(force)
+
+ def perform_server_connection_preface(self, force=False):
+ if force or not self.connection_preface_performed:
+ self.connection_preface_performed = True
+
+ magic_length = len(self.CLIENT_CONNECTION_PREFACE)
+ magic = self.tcp_handler.rfile.safe_read(magic_length)
+ assert magic == self.CLIENT_CONNECTION_PREFACE
+
+ frm = frame.SettingsFrame(settings={
+ frame.SettingsFrame.ENABLE_PUSH: 0,
+ frame.SettingsFrame.MAX_CONCURRENT_STREAMS: 1,
+ })
+ self.send_frame(frm, hide=True)
+ self._receive_settings(hide=True)
+
+ def perform_client_connection_preface(self, force=False):
+ if force or not self.connection_preface_performed:
+ self.connection_preface_performed = True
+
+ self.tcp_handler.wfile.write(self.CLIENT_CONNECTION_PREFACE)
+
+ self.send_frame(frame.SettingsFrame(), hide=True)
+ self._receive_settings(hide=True) # server announces own settings
+ self._receive_settings(hide=True) # server acks my settings
+
+ def send_frame(self, frm, hide=False):
+ raw_bytes = frm.serialize()
+ self.tcp_handler.wfile.write(raw_bytes)
+ self.tcp_handler.wfile.flush()
+ if not hide and self.dump_frames: # pragma no cover
+ print(frm.human_readable(">>"))
+
+ def read_frame(self, hide=False):
+ while True:
+ frm = utils.http2_read_frame(self.tcp_handler.rfile)
+ if not hide and self.dump_frames: # pragma no cover
+ print(frm.human_readable("<<"))
+
+ if isinstance(frm, frame.PingFrame):
+ raw_bytes = frame.PingFrame(flags=['ACK'], payload=frm.payload).serialize()
+ self.tcp_handler.wfile.write(raw_bytes)
+ self.tcp_handler.wfile.flush()
+ continue
+ if isinstance(frm, frame.SettingsFrame) and 'ACK' not in frm.flags:
+ self._apply_settings(frm.settings, hide)
+ if isinstance(frm, frame.DataFrame) and frm.flow_controlled_length > 0:
+ self._update_flow_control_window(frm.stream_id, frm.flow_controlled_length)
+ return frm
+
+ def check_alpn(self):
+ alp = self.tcp_handler.get_alpn_proto_negotiated()
+ if alp != b'h2':
+ raise NotImplementedError(
+ "HTTP2Protocol can not handle unknown ALP: %s" % alp)
+ return True
+
+ def _handle_unexpected_frame(self, frm):
+ if isinstance(frm, frame.SettingsFrame):
+ return
+ if self.unhandled_frame_cb:
+ self.unhandled_frame_cb(frm)
+
+ def _receive_settings(self, hide=False):
+ while True:
+ frm = self.read_frame(hide)
+ if isinstance(frm, frame.SettingsFrame):
+ break
+ else:
+ self._handle_unexpected_frame(frm)
+
+ def _next_stream_id(self):
+ if self.current_stream_id is None:
+ if self.is_server:
+ # servers must use even stream ids
+ self.current_stream_id = 2
+ else:
+ # clients must use odd stream ids
+ self.current_stream_id = 1
+ else:
+ self.current_stream_id += 2
+ return self.current_stream_id
+
+ def _apply_settings(self, settings, hide=False):
+ for setting, value in settings.items():
+ old_value = self.http2_settings[setting]
+ if not old_value:
+ old_value = '-'
+ self.http2_settings[setting] = value
+
+ frm = frame.SettingsFrame(flags=['ACK'])
+ self.send_frame(frm, hide)
+
+ def _update_flow_control_window(self, stream_id, increment):
+ frm = frame.WindowUpdateFrame(stream_id=0, window_increment=increment)
+ self.send_frame(frm)
+ frm = frame.WindowUpdateFrame(stream_id=stream_id, window_increment=increment)
+ self.send_frame(frm)
+
+ def _create_headers(self, headers, stream_id, end_stream=True):
+ def frame_cls(chunks):
+ for i in chunks:
+ if i == 0:
+ yield frame.HeadersFrame, i
+ else:
+ yield frame.ContinuationFrame, i
+
+ header_block_fragment = self.encoder.encode(headers.fields)
+
+ chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE]
+ chunks = range(0, len(header_block_fragment), chunk_size)
+ frms = [frm_cls(
+ flags=[],
+ stream_id=stream_id,
+ data=header_block_fragment[i:i+chunk_size]) for frm_cls, i in frame_cls(chunks)]
+
+ frms[-1].flags.add('END_HEADERS')
+ if end_stream:
+ frms[0].flags.add('END_STREAM')
+
+ if self.dump_frames: # pragma no cover
+ for frm in frms:
+ print(frm.human_readable(">>"))
+
+ return [frm.serialize() for frm in frms]
+
+ def _create_body(self, body, stream_id):
+ if body is None or len(body) == 0:
+ return b''
+
+ chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE]
+ chunks = range(0, len(body), chunk_size)
+ frms = [frame.DataFrame(
+ flags=[],
+ stream_id=stream_id,
+ data=body[i:i+chunk_size]) for i in chunks]
+ frms[-1].flags.add('END_STREAM')
+
+ if self.dump_frames: # pragma no cover
+ for frm in frms:
+ print(frm.human_readable(">>"))
+
+ return [frm.serialize() for frm in frms]
+
+ def _receive_transmission(self, stream_id=None, include_body=True):
+ if not include_body:
+ raise NotImplementedError()
+
+ body_expected = True
+
+ header_blocks = b''
+ body = b''
+
+ while True:
+ frm = self.read_frame()
+ if (
+ (isinstance(frm, frame.HeadersFrame) or isinstance(frm, frame.ContinuationFrame)) and
+ (stream_id is None or frm.stream_id == stream_id)
+ ):
+ stream_id = frm.stream_id
+ header_blocks += frm.data
+ if 'END_STREAM' in frm.flags:
+ body_expected = False
+ if 'END_HEADERS' in frm.flags:
+ break
+ else:
+ self._handle_unexpected_frame(frm)
+
+ while body_expected:
+ frm = self.read_frame()
+ if isinstance(frm, frame.DataFrame) and frm.stream_id == stream_id:
+ body += frm.data
+ if 'END_STREAM' in frm.flags:
+ break
+ else:
+ self._handle_unexpected_frame(frm)
+
+ headers = Headers(
+ [[k.encode('ascii'), v.encode('ascii')] for k, v in self.decoder.decode(header_blocks)]
+ )
+
+ return stream_id, headers, body
diff --git a/netlib/http/message.py b/netlib/http/message.py
new file mode 100644
index 00000000..e3d8ce37
--- /dev/null
+++ b/netlib/http/message.py
@@ -0,0 +1,222 @@
+from __future__ import absolute_import, print_function, division
+
+import warnings
+
+import six
+
+from .headers import Headers
+from .. import encoding, utils
+
+CONTENT_MISSING = 0
+
+if six.PY2: # pragma: nocover
+ _native = lambda x: x
+ _always_bytes = lambda x: x
+else:
+ # While the HTTP head _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
+ _native = lambda x: x.decode("utf-8", "surrogateescape")
+ _always_bytes = lambda x: utils.always_bytes(x, "utf-8", "surrogateescape")
+
+
+class MessageData(utils.Serializable):
+ def __eq__(self, other):
+ if isinstance(other, MessageData):
+ return self.__dict__ == other.__dict__
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def set_state(self, state):
+ for k, v in state.items():
+ if k == "headers":
+ v = Headers.from_state(v)
+ setattr(self, k, v)
+
+ def get_state(self):
+ state = vars(self).copy()
+ state["headers"] = state["headers"].get_state()
+ return state
+
+ @classmethod
+ def from_state(cls, state):
+ state["headers"] = Headers.from_state(state["headers"])
+ return cls(**state)
+
+
+class Message(utils.Serializable):
+ def __init__(self, data):
+ self.data = data
+
+ def __eq__(self, other):
+ if isinstance(other, Message):
+ return self.data == other.data
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def get_state(self):
+ return self.data.get_state()
+
+ def set_state(self, state):
+ self.data.set_state(state)
+
+ @classmethod
+ def from_state(cls, state):
+ return cls(**state)
+
+ @property
+ def headers(self):
+ """
+ Message headers object
+
+ Returns:
+ netlib.http.Headers
+ """
+ return self.data.headers
+
+ @headers.setter
+ def headers(self, h):
+ self.data.headers = h
+
+ @property
+ def content(self):
+ """
+ The raw (encoded) HTTP message body
+
+ See also: :py:attr:`text`
+ """
+ return self.data.content
+
+ @content.setter
+ def content(self, content):
+ self.data.content = content
+ if isinstance(content, bytes):
+ self.headers["content-length"] = str(len(content))
+
+ @property
+ def http_version(self):
+ """
+ Version string, e.g. "HTTP/1.1"
+ """
+ return _native(self.data.http_version)
+
+ @http_version.setter
+ def http_version(self, http_version):
+ self.data.http_version = _always_bytes(http_version)
+
+ @property
+ def timestamp_start(self):
+ """
+ First byte timestamp
+ """
+ return self.data.timestamp_start
+
+ @timestamp_start.setter
+ def timestamp_start(self, timestamp_start):
+ self.data.timestamp_start = timestamp_start
+
+ @property
+ def timestamp_end(self):
+ """
+ Last byte timestamp
+ """
+ return self.data.timestamp_end
+
+ @timestamp_end.setter
+ def timestamp_end(self, timestamp_end):
+ self.data.timestamp_end = timestamp_end
+
+ @property
+ def text(self):
+ """
+ The decoded HTTP message body.
+ Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive.
+
+ .. note::
+ This is not implemented yet.
+
+ See also: :py:attr:`content`, :py:class:`decoded`
+ """
+ # This attribute should be called text, because that's what requests does.
+ raise NotImplementedError()
+
+ @text.setter
+ def text(self, text):
+ raise NotImplementedError()
+
+ def decode(self):
+ """
+ Decodes body based on the current Content-Encoding header, then
+ removes the header. If there is no Content-Encoding header, no
+ action is taken.
+
+ Returns:
+ True, if decoding succeeded.
+ False, otherwise.
+ """
+ ce = self.headers.get("content-encoding")
+ data = encoding.decode(ce, self.content)
+ if data is None:
+ return False
+ self.content = data
+ self.headers.pop("content-encoding", None)
+ return True
+
+ def encode(self, e):
+ """
+ Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
+
+ Returns:
+ True, if decoding succeeded.
+ False, otherwise.
+ """
+ data = encoding.encode(e, self.content)
+ if data is None:
+ return False
+ self.content = data
+ self.headers["content-encoding"] = e
+ return True
+
+ # Legacy
+
+ @property
+ def body(self): # pragma: nocover
+ warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning)
+ return self.content
+
+ @body.setter
+ def body(self, body): # pragma: nocover
+ warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning)
+ self.content = body
+
+
+class decoded(object):
+ """
+ A context manager that decodes a request or response, and then
+ re-encodes it with the same encoding after execution of the block.
+
+ Example:
+
+ .. code-block:: python
+
+ with decoded(request):
+ request.content = request.content.replace("foo", "bar")
+ """
+
+ def __init__(self, message):
+ self.message = message
+ ce = message.headers.get("content-encoding")
+ if ce in encoding.ENCODINGS:
+ self.ce = ce
+ else:
+ self.ce = None
+
+ def __enter__(self):
+ if self.ce:
+ self.message.decode()
+
+ def __exit__(self, type, value, tb):
+ if self.ce:
+ self.message.encode(self.ce)
diff --git a/netlib/http/request.py b/netlib/http/request.py
new file mode 100644
index 00000000..b9076c0f
--- /dev/null
+++ b/netlib/http/request.py
@@ -0,0 +1,356 @@
+from __future__ import absolute_import, print_function, division
+
+import warnings
+
+import six
+from six.moves import urllib
+
+from netlib import utils
+from netlib.http import cookies
+from netlib.odict import ODict
+from .. import encoding
+from .headers import Headers
+from .message import Message, _native, _always_bytes, MessageData
+
+
+class RequestData(MessageData):
+ def __init__(self, first_line_format, method, scheme, host, port, path, http_version, headers=None, content=None,
+ timestamp_start=None, timestamp_end=None):
+ if not isinstance(headers, Headers):
+ headers = Headers(headers)
+
+ self.first_line_format = first_line_format
+ self.method = method
+ self.scheme = scheme
+ self.host = host
+ self.port = port
+ self.path = path
+ self.http_version = http_version
+ self.headers = headers
+ self.content = content
+ self.timestamp_start = timestamp_start
+ self.timestamp_end = timestamp_end
+
+
+class Request(Message):
+ """
+ An HTTP request.
+ """
+ def __init__(self, *args, **kwargs):
+ data = RequestData(*args, **kwargs)
+ super(Request, self).__init__(data)
+
+ def __repr__(self):
+ if self.host and self.port:
+ hostport = "{}:{}".format(self.host, self.port)
+ else:
+ hostport = ""
+ path = self.path or ""
+ return "Request({} {}{})".format(
+ self.method, hostport, path
+ )
+
+ @property
+ def first_line_format(self):
+ """
+ HTTP request form as defined in `RFC7230 <https://tools.ietf.org/html/rfc7230#section-5.3>`_.
+
+ origin-form and asterisk-form are subsumed as "relative".
+ """
+ return self.data.first_line_format
+
+ @first_line_format.setter
+ def first_line_format(self, first_line_format):
+ self.data.first_line_format = first_line_format
+
+ @property
+ def method(self):
+ """
+ HTTP request method, e.g. "GET".
+ """
+ return _native(self.data.method).upper()
+
+ @method.setter
+ def method(self, method):
+ self.data.method = _always_bytes(method)
+
+ @property
+ def scheme(self):
+ """
+ HTTP request scheme, which should be "http" or "https".
+ """
+ return _native(self.data.scheme)
+
+ @scheme.setter
+ def scheme(self, scheme):
+ self.data.scheme = _always_bytes(scheme)
+
+ @property
+ def host(self):
+ """
+ Target host. This may be parsed from the raw request
+ (e.g. from a ``GET http://example.com/ HTTP/1.1`` request line)
+ or inferred from the proxy mode (e.g. an IP in transparent mode).
+
+ Setting the host attribute also updates the host header, if present.
+ """
+
+ if six.PY2: # pragma: nocover
+ return self.data.host
+
+ if not self.data.host:
+ return self.data.host
+ try:
+ return self.data.host.decode("idna")
+ except UnicodeError:
+ return self.data.host.decode("utf8", "surrogateescape")
+
+ @host.setter
+ def host(self, host):
+ if isinstance(host, six.text_type):
+ try:
+ # There's no non-strict mode for IDNA encoding.
+ # We don't want this operation to fail though, so we try
+ # utf8 as a last resort.
+ host = host.encode("idna", "strict")
+ except UnicodeError:
+ host = host.encode("utf8", "surrogateescape")
+
+ self.data.host = host
+
+ # Update host header
+ if "host" in self.headers:
+ if host:
+ self.headers["host"] = host
+ else:
+ self.headers.pop("host")
+
+ @property
+ def port(self):
+ """
+ Target port
+ """
+ return self.data.port
+
+ @port.setter
+ def port(self, port):
+ self.data.port = port
+
+ @property
+ def path(self):
+ """
+ HTTP request path, e.g. "/index.html".
+ Guaranteed to start with a slash.
+ """
+ return _native(self.data.path)
+
+ @path.setter
+ def path(self, path):
+ self.data.path = _always_bytes(path)
+
+ @property
+ def url(self):
+ """
+ The URL string, constructed from the request's URL components
+ """
+ return utils.unparse_url(self.scheme, self.host, self.port, self.path)
+
+ @url.setter
+ def url(self, url):
+ self.scheme, self.host, self.port, self.path = utils.parse_url(url)
+
+ @property
+ def pretty_host(self):
+ """
+ Similar to :py:attr:`host`, but using the Host headers as an additional preferred data source.
+ This is useful in transparent mode where :py:attr:`host` is only an IP address,
+ but may not reflect the actual destination as the Host header could be spoofed.
+ """
+ return self.headers.get("host", self.host)
+
+ @property
+ def pretty_url(self):
+ """
+ Like :py:attr:`url`, but using :py:attr:`pretty_host` instead of :py:attr:`host`.
+ """
+ if self.first_line_format == "authority":
+ return "%s:%d" % (self.pretty_host, self.port)
+ return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
+
+ @property
+ def query(self):
+ """
+ The request query string as an :py:class:`ODict` object.
+ None, if there is no query.
+ """
+ _, _, _, _, query, _ = urllib.parse.urlparse(self.url)
+ if query:
+ return ODict(utils.urldecode(query))
+ return None
+
+ @query.setter
+ def query(self, odict):
+ query = utils.urlencode(odict.lst)
+ scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
+ _, _, _, self.path = utils.parse_url(
+ urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
+
+ @property
+ def cookies(self):
+ """
+ The request cookies.
+ An empty :py:class:`ODict` object if the cookie monster ate them all.
+ """
+ ret = ODict()
+ for i in self.headers.get_all("Cookie"):
+ ret.extend(cookies.parse_cookie_header(i))
+ return ret
+
+ @cookies.setter
+ def cookies(self, odict):
+ self.headers["cookie"] = cookies.format_cookie_header(odict)
+
+ @property
+ def path_components(self):
+ """
+ The URL's path components as a list of strings.
+ Components are unquoted.
+ """
+ _, _, path, _, _, _ = urllib.parse.urlparse(self.url)
+ return [urllib.parse.unquote(i) for i in path.split("/") if i]
+
+ @path_components.setter
+ def path_components(self, components):
+ components = map(lambda x: urllib.parse.quote(x, safe=""), components)
+ path = "/" + "/".join(components)
+ scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
+ _, _, _, self.path = utils.parse_url(
+ urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
+
+ def anticache(self):
+ """
+ Modifies this request to remove headers that might produce a cached
+ response. That is, we remove ETags and If-Modified-Since headers.
+ """
+ delheaders = [
+ "if-modified-since",
+ "if-none-match",
+ ]
+ for i in delheaders:
+ self.headers.pop(i, None)
+
+ def anticomp(self):
+ """
+ Modifies this request to remove headers that will compress the
+ resource's data.
+ """
+ self.headers["accept-encoding"] = "identity"
+
+ def constrain_encoding(self):
+ """
+ Limits the permissible Accept-Encoding values, based on what we can
+ decode appropriately.
+ """
+ accept_encoding = self.headers.get("accept-encoding")
+ if accept_encoding:
+ self.headers["accept-encoding"] = (
+ ', '.join(
+ e
+ for e in encoding.ENCODINGS
+ if e in accept_encoding
+ )
+ )
+
+ @property
+ def urlencoded_form(self):
+ """
+ The URL-encoded form data as an :py:class:`ODict` object.
+ None if there is no data or the content-type indicates non-form data.
+ """
+ is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
+ if self.content and is_valid_content_type:
+ return ODict(utils.urldecode(self.content))
+ return None
+
+ @urlencoded_form.setter
+ def urlencoded_form(self, odict):
+ """
+ Sets the body to the URL-encoded form data, and adds the appropriate content-type header.
+ This will overwrite the existing content if there is one.
+ """
+ self.headers["content-type"] = "application/x-www-form-urlencoded"
+ self.content = utils.urlencode(odict.lst)
+
+ @property
+ def multipart_form(self):
+ """
+ The multipart form data as an :py:class:`ODict` object.
+ None if there is no data or the content-type indicates non-form data.
+ """
+ is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower()
+ if self.content and is_valid_content_type:
+ return ODict(utils.multipartdecode(self.headers,self.content))
+ return None
+
+ @multipart_form.setter
+ def multipart_form(self, value):
+ raise NotImplementedError()
+
+ # Legacy
+
+ def get_cookies(self): # pragma: nocover
+ warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning)
+ return self.cookies
+
+ def set_cookies(self, odict): # pragma: nocover
+ warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning)
+ self.cookies = odict
+
+ def get_query(self): # pragma: nocover
+ warnings.warn(".get_query is deprecated, use .query instead.", DeprecationWarning)
+ return self.query or ODict([])
+
+ def set_query(self, odict): # pragma: nocover
+ warnings.warn(".set_query is deprecated, use .query instead.", DeprecationWarning)
+ self.query = odict
+
+ def get_path_components(self): # pragma: nocover
+ warnings.warn(".get_path_components is deprecated, use .path_components instead.", DeprecationWarning)
+ return self.path_components
+
+ def set_path_components(self, lst): # pragma: nocover
+ warnings.warn(".set_path_components is deprecated, use .path_components instead.", DeprecationWarning)
+ self.path_components = lst
+
+ def get_form_urlencoded(self): # pragma: nocover
+ warnings.warn(".get_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning)
+ return self.urlencoded_form or ODict([])
+
+ def set_form_urlencoded(self, odict): # pragma: nocover
+ warnings.warn(".set_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning)
+ self.urlencoded_form = odict
+
+ def get_form_multipart(self): # pragma: nocover
+ warnings.warn(".get_form_multipart is deprecated, use .multipart_form instead.", DeprecationWarning)
+ return self.multipart_form or ODict([])
+
+ @property
+ def form_in(self): # pragma: nocover
+ warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning)
+ return self.first_line_format
+
+ @form_in.setter
+ def form_in(self, form_in): # pragma: nocover
+ warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning)
+ self.first_line_format = form_in
+
+ @property
+ def form_out(self): # pragma: nocover
+ warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning)
+ return self.first_line_format
+
+ @form_out.setter
+ def form_out(self, form_out): # pragma: nocover
+ warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning)
+ self.first_line_format = form_out
+
diff --git a/netlib/http/response.py b/netlib/http/response.py
new file mode 100644
index 00000000..8f4d6215
--- /dev/null
+++ b/netlib/http/response.py
@@ -0,0 +1,116 @@
+from __future__ import absolute_import, print_function, division
+
+import warnings
+
+from . import cookies
+from .headers import Headers
+from .message import Message, _native, _always_bytes, MessageData
+from .. import utils
+from ..odict import ODict
+
+
+class ResponseData(MessageData):
+ def __init__(self, http_version, status_code, reason=None, headers=None, content=None,
+ timestamp_start=None, timestamp_end=None):
+ if not isinstance(headers, Headers):
+ headers = Headers(headers)
+
+ self.http_version = http_version
+ self.status_code = status_code
+ self.reason = reason
+ self.headers = headers
+ self.content = content
+ self.timestamp_start = timestamp_start
+ self.timestamp_end = timestamp_end
+
+
+class Response(Message):
+ """
+ An HTTP response.
+ """
+ def __init__(self, *args, **kwargs):
+ data = ResponseData(*args, **kwargs)
+ super(Response, self).__init__(data)
+
+ def __repr__(self):
+ if self.content:
+ details = "{}, {}".format(
+ self.headers.get("content-type", "unknown content type"),
+ utils.pretty_size(len(self.content))
+ )
+ else:
+ details = "no content"
+ return "Response({status_code} {reason}, {details})".format(
+ status_code=self.status_code,
+ reason=self.reason,
+ details=details
+ )
+
+ @property
+ def status_code(self):
+ """
+ HTTP Status Code, e.g. ``200``.
+ """
+ return self.data.status_code
+
+ @status_code.setter
+ def status_code(self, status_code):
+ self.data.status_code = status_code
+
+ @property
+ def reason(self):
+ """
+ HTTP Reason Phrase, e.g. "Not Found".
+ This is always :py:obj:`None` for HTTP2 requests, because HTTP2 responses do not contain a reason phrase.
+ """
+ return _native(self.data.reason)
+
+ @reason.setter
+ def reason(self, reason):
+ self.data.reason = _always_bytes(reason)
+
+ @property
+ def cookies(self):
+ """
+ Get the contents of all Set-Cookie headers.
+
+ A possibly empty :py:class:`ODict`, where keys are cookie name strings,
+ and values are [value, attr] lists. Value is a string, and attr is
+ an ODictCaseless containing cookie attributes. Within attrs, unary
+ attributes (e.g. HTTPOnly) are indicated by a Null value.
+ """
+ ret = []
+ for header in self.headers.get_all("set-cookie"):
+ v = cookies.parse_set_cookie_header(header)
+ if v:
+ name, value, attrs = v
+ ret.append([name, [value, attrs]])
+ return ODict(ret)
+
+ @cookies.setter
+ def cookies(self, odict):
+ values = []
+ for i in odict.lst:
+ header = cookies.format_set_cookie_header(i[0], i[1][0], i[1][1])
+ values.append(header)
+ self.headers.set_all("set-cookie", values)
+
+ # Legacy
+
+ def get_cookies(self): # pragma: nocover
+ warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning)
+ return self.cookies
+
+ def set_cookies(self, odict): # pragma: nocover
+ warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning)
+ self.cookies = odict
+
+ @property
+ def msg(self): # pragma: nocover
+ warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning)
+ return self.reason
+
+ @msg.setter
+ def msg(self, reason): # pragma: nocover
+ warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning)
+ self.reason = reason
diff --git a/netlib/http/status_codes.py b/netlib/http/status_codes.py
new file mode 100644
index 00000000..8a4dc1f5
--- /dev/null
+++ b/netlib/http/status_codes.py
@@ -0,0 +1,106 @@
+from __future__ import absolute_import, print_function, division
+
+CONTINUE = 100
+SWITCHING = 101
+OK = 200
+CREATED = 201
+ACCEPTED = 202
+NON_AUTHORITATIVE_INFORMATION = 203
+NO_CONTENT = 204
+RESET_CONTENT = 205
+PARTIAL_CONTENT = 206
+MULTI_STATUS = 207
+
+MULTIPLE_CHOICE = 300
+MOVED_PERMANENTLY = 301
+FOUND = 302
+SEE_OTHER = 303
+NOT_MODIFIED = 304
+USE_PROXY = 305
+TEMPORARY_REDIRECT = 307
+
+BAD_REQUEST = 400
+UNAUTHORIZED = 401
+PAYMENT_REQUIRED = 402
+FORBIDDEN = 403
+NOT_FOUND = 404
+NOT_ALLOWED = 405
+NOT_ACCEPTABLE = 406
+PROXY_AUTH_REQUIRED = 407
+REQUEST_TIMEOUT = 408
+CONFLICT = 409
+GONE = 410
+LENGTH_REQUIRED = 411
+PRECONDITION_FAILED = 412
+REQUEST_ENTITY_TOO_LARGE = 413
+REQUEST_URI_TOO_LONG = 414
+UNSUPPORTED_MEDIA_TYPE = 415
+REQUESTED_RANGE_NOT_SATISFIABLE = 416
+EXPECTATION_FAILED = 417
+IM_A_TEAPOT = 418
+
+INTERNAL_SERVER_ERROR = 500
+NOT_IMPLEMENTED = 501
+BAD_GATEWAY = 502
+SERVICE_UNAVAILABLE = 503
+GATEWAY_TIMEOUT = 504
+HTTP_VERSION_NOT_SUPPORTED = 505
+INSUFFICIENT_STORAGE_SPACE = 507
+NOT_EXTENDED = 510
+
+RESPONSES = {
+ # 100
+ CONTINUE: "Continue",
+ SWITCHING: "Switching Protocols",
+
+ # 200
+ OK: "OK",
+ CREATED: "Created",
+ ACCEPTED: "Accepted",
+ NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information",
+ NO_CONTENT: "No Content",
+ RESET_CONTENT: "Reset Content.",
+ PARTIAL_CONTENT: "Partial Content",
+ MULTI_STATUS: "Multi-Status",
+
+ # 300
+ MULTIPLE_CHOICE: "Multiple Choices",
+ MOVED_PERMANENTLY: "Moved Permanently",
+ FOUND: "Found",
+ SEE_OTHER: "See Other",
+ NOT_MODIFIED: "Not Modified",
+ USE_PROXY: "Use Proxy",
+ # 306 not defined??
+ TEMPORARY_REDIRECT: "Temporary Redirect",
+
+ # 400
+ BAD_REQUEST: "Bad Request",
+ UNAUTHORIZED: "Unauthorized",
+ PAYMENT_REQUIRED: "Payment Required",
+ FORBIDDEN: "Forbidden",
+ NOT_FOUND: "Not Found",
+ NOT_ALLOWED: "Method Not Allowed",
+ NOT_ACCEPTABLE: "Not Acceptable",
+ PROXY_AUTH_REQUIRED: "Proxy Authentication Required",
+ REQUEST_TIMEOUT: "Request Time-out",
+ CONFLICT: "Conflict",
+ GONE: "Gone",
+ LENGTH_REQUIRED: "Length Required",
+ PRECONDITION_FAILED: "Precondition Failed",
+ REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large",
+ REQUEST_URI_TOO_LONG: "Request-URI Too Long",
+ UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type",
+ REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable",
+ EXPECTATION_FAILED: "Expectation Failed",
+ IM_A_TEAPOT: "I'm a teapot",
+
+ # 500
+ INTERNAL_SERVER_ERROR: "Internal Server Error",
+ NOT_IMPLEMENTED: "Not Implemented",
+ BAD_GATEWAY: "Bad Gateway",
+ SERVICE_UNAVAILABLE: "Service Unavailable",
+ GATEWAY_TIMEOUT: "Gateway Time-out",
+ HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported",
+ INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space",
+ NOT_EXTENDED: "Not Extended"
+}
diff --git a/netlib/http/user_agents.py b/netlib/http/user_agents.py
new file mode 100644
index 00000000..e8681908
--- /dev/null
+++ b/netlib/http/user_agents.py
@@ -0,0 +1,52 @@
+from __future__ import (absolute_import, print_function, division)
+
+"""
+ A small collection of useful user-agent header strings. These should be
+ kept reasonably current to reflect common usage.
+"""
+
+# pylint: line-too-long
+
+# A collection of (name, shortcut, string) tuples.
+
+UASTRINGS = [
+ ("android",
+ "a",
+ "Mozilla/5.0 (Linux; U; Android 4.1.1; en-gb; Nexus 7 Build/JRO03D) AFL/01.04.02"), # noqa
+ ("blackberry",
+ "l",
+ "Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+"), # noqa
+ ("bingbot",
+ "b",
+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"), # noqa
+ ("chrome",
+ "c",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"), # noqa
+ ("firefox",
+ "f",
+ "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:14.0) Gecko/20120405 Firefox/14.0a1"), # noqa
+ ("googlebot",
+ "g",
+ "Googlebot/2.1 (+http://www.googlebot.com/bot.html)"), # noqa
+ ("ie9",
+ "i",
+ "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US)"), # noqa
+ ("ipad",
+ "p",
+ "Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B176 Safari/7534.48.3"), # noqa
+ ("iphone",
+ "h",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 4_2_1 like Mac OS X) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148a Safari/6533.18.5"), # noqa
+ ("safari",
+ "s",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10"), # noqa
+]
+
+
+def get_by_shortcut(s):
+ """
+ Retrieve a user agent entry by shortcut.
+ """
+ for i in UASTRINGS:
+ if s == i[1]:
+ return i