aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http
diff options
context:
space:
mode:
Diffstat (limited to 'netlib/http')
-rw-r--r--netlib/http/__init__.py14
-rw-r--r--netlib/http/authentication.py167
-rw-r--r--netlib/http/cookies.py193
-rw-r--r--netlib/http/headers.py204
-rw-r--r--netlib/http/http1/__init__.py25
-rw-r--r--netlib/http/http1/assemble.py104
-rw-r--r--netlib/http/http1/read.py362
-rw-r--r--netlib/http/http2/__init__.py6
-rw-r--r--netlib/http/http2/connections.py426
-rw-r--r--netlib/http/message.py222
-rw-r--r--netlib/http/request.py353
-rw-r--r--netlib/http/response.py116
-rw-r--r--netlib/http/status_codes.py106
-rw-r--r--netlib/http/user_agents.py52
14 files changed, 0 insertions, 2350 deletions
diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py
deleted file mode 100644
index fd632cd5..00000000
--- a/netlib/http/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from __future__ import absolute_import, print_function, division
-from .request import Request
-from .response import Response
-from .headers import Headers
-from .message import decoded, CONTENT_MISSING
-from . import http1, http2
-
-__all__ = [
- "Request",
- "Response",
- "Headers",
- "decoded", "CONTENT_MISSING",
- "http1", "http2",
-]
diff --git a/netlib/http/authentication.py b/netlib/http/authentication.py
deleted file mode 100644
index d769abe5..00000000
--- a/netlib/http/authentication.py
+++ /dev/null
@@ -1,167 +0,0 @@
-from __future__ import (absolute_import, print_function, division)
-from argparse import Action, ArgumentTypeError
-import binascii
-
-
-def parse_http_basic_auth(s):
- words = s.split()
- if len(words) != 2:
- return None
- scheme = words[0]
- try:
- user = binascii.a2b_base64(words[1]).decode("utf8", "replace")
- except binascii.Error:
- return None
- parts = user.split(':')
- if len(parts) != 2:
- return None
- return scheme, parts[0], parts[1]
-
-
-def assemble_http_basic_auth(scheme, username, password):
- v = binascii.b2a_base64((username + ":" + password).encode("utf8")).decode("ascii")
- return scheme + " " + v
-
-
-class NullProxyAuth(object):
-
- """
- No proxy auth at all (returns empty challange headers)
- """
-
- def __init__(self, password_manager):
- self.password_manager = password_manager
-
- def clean(self, headers_):
- """
- Clean up authentication headers, so they're not passed upstream.
- """
-
- def authenticate(self, headers_):
- """
- Tests that the user is allowed to use the proxy
- """
- return True
-
- def auth_challenge_headers(self):
- """
- Returns a dictionary containing the headers require to challenge the user
- """
- return {}
-
-
-class BasicProxyAuth(NullProxyAuth):
- CHALLENGE_HEADER = 'Proxy-Authenticate'
- AUTH_HEADER = 'Proxy-Authorization'
-
- def __init__(self, password_manager, realm):
- NullProxyAuth.__init__(self, password_manager)
- self.realm = realm
-
- def clean(self, headers):
- del headers[self.AUTH_HEADER]
-
- def authenticate(self, headers):
- auth_value = headers.get(self.AUTH_HEADER)
- if not auth_value:
- return False
- parts = parse_http_basic_auth(auth_value)
- if not parts:
- return False
- scheme, username, password = parts
- if scheme.lower() != 'basic':
- return False
- if not self.password_manager.test(username, password):
- return False
- self.username = username
- return True
-
- def auth_challenge_headers(self):
- return {self.CHALLENGE_HEADER: 'Basic realm="%s"' % self.realm}
-
-
-class PassMan(object):
-
- def test(self, username_, password_token_):
- return False
-
-
-class PassManNonAnon(PassMan):
-
- """
- Ensure the user specifies a username, accept any password.
- """
-
- def test(self, username, password_token_):
- if username:
- return True
- return False
-
-
-class PassManHtpasswd(PassMan):
-
- """
- Read usernames and passwords from an htpasswd file
- """
-
- def __init__(self, path):
- """
- Raises ValueError if htpasswd file is invalid.
- """
- import passlib.apache
- self.htpasswd = passlib.apache.HtpasswdFile(path)
-
- def test(self, username, password_token):
- return bool(self.htpasswd.check_password(username, password_token))
-
-
-class PassManSingleUser(PassMan):
-
- def __init__(self, username, password):
- self.username, self.password = username, password
-
- def test(self, username, password_token):
- return self.username == username and self.password == password_token
-
-
-class AuthAction(Action):
-
- """
- Helper class to allow seamless integration int argparse. Example usage:
- parser.add_argument(
- "--nonanonymous",
- action=NonanonymousAuthAction, nargs=0,
- help="Allow access to any user long as a credentials are specified."
- )
- """
-
- def __call__(self, parser, namespace, values, option_string=None):
- passman = self.getPasswordManager(values)
- authenticator = BasicProxyAuth(passman, "mitmproxy")
- setattr(namespace, self.dest, authenticator)
-
- def getPasswordManager(self, s): # pragma: nocover
- raise NotImplementedError()
-
-
-class SingleuserAuthAction(AuthAction):
-
- def getPasswordManager(self, s):
- if len(s.split(':')) != 2:
- raise ArgumentTypeError(
- "Invalid single-user specification. Please use the format username:password"
- )
- username, password = s.split(':')
- return PassManSingleUser(username, password)
-
-
-class NonanonymousAuthAction(AuthAction):
-
- def getPasswordManager(self, s):
- return PassManNonAnon()
-
-
-class HtpasswdAuthAction(AuthAction):
-
- def getPasswordManager(self, s):
- return PassManHtpasswd(s)
diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py
deleted file mode 100644
index 18544b5e..00000000
--- a/netlib/http/cookies.py
+++ /dev/null
@@ -1,193 +0,0 @@
-import re
-
-from .. import odict
-
-"""
-A flexible module for cookie parsing and manipulation.
-
-This module differs from usual standards-compliant cookie modules in a number
-of ways. We try to be as permissive as possible, and to retain even mal-formed
-information. Duplicate cookies are preserved in parsing, and can be set in
-formatting. We do attempt to escape and quote values where needed, but will not
-reject data that violate the specs.
-
-Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do
-not parse the comma-separated variant of Set-Cookie that allows multiple
-cookies to be set in a single header. Technically this should be feasible, but
-it turns out that violations of RFC6265 that makes the parsing problem
-indeterminate are much more common than genuine occurences of the multi-cookie
-variants. Serialization follows RFC6265.
-
- http://tools.ietf.org/html/rfc6265
- http://tools.ietf.org/html/rfc2109
- http://tools.ietf.org/html/rfc2965
-"""
-
-# TODO: Disallow LHS-only Cookie values
-
-
-def _read_until(s, start, term):
- """
- Read until one of the characters in term is reached.
- """
- if start == len(s):
- return "", start + 1
- for i in range(start, len(s)):
- if s[i] in term:
- return s[start:i], i
- return s[start:i + 1], i + 1
-
-
-def _read_token(s, start):
- """
- Read a token - the LHS of a token/value pair in a cookie.
- """
- return _read_until(s, start, ";=")
-
-
-def _read_quoted_string(s, start):
- """
- start: offset to the first quote of the string to be read
-
- A sort of loose super-set of the various quoted string specifications.
-
- RFC6265 disallows backslashes or double quotes within quoted strings.
- Prior RFCs use backslashes to escape. This leaves us free to apply
- backslash escaping by default and be compatible with everything.
- """
- escaping = False
- ret = []
- # Skip the first quote
- i = start # initialize in case the loop doesn't run.
- for i in range(start + 1, len(s)):
- if escaping:
- ret.append(s[i])
- escaping = False
- elif s[i] == '"':
- break
- elif s[i] == "\\":
- escaping = True
- else:
- ret.append(s[i])
- return "".join(ret), i + 1
-
-
-def _read_value(s, start, delims):
- """
- Reads a value - the RHS of a token/value pair in a cookie.
-
- special: If the value is special, commas are premitted. Else comma
- terminates. This helps us support old and new style values.
- """
- if start >= len(s):
- return "", start
- elif s[start] == '"':
- return _read_quoted_string(s, start)
- else:
- return _read_until(s, start, delims)
-
-
-def _read_pairs(s, off=0):
- """
- Read pairs of lhs=rhs values.
-
- off: start offset
- specials: a lower-cased list of keys that may contain commas
- """
- vals = []
- while True:
- lhs, off = _read_token(s, off)
- lhs = lhs.lstrip()
- if lhs:
- rhs = None
- if off < len(s):
- if s[off] == "=":
- rhs, off = _read_value(s, off + 1, ";")
- vals.append([lhs, rhs])
- off += 1
- if not off < len(s):
- break
- return vals, off
-
-
-def _has_special(s):
- for i in s:
- if i in '",;\\':
- return True
- o = ord(i)
- if o < 0x21 or o > 0x7e:
- return True
- return False
-
-
-ESCAPE = re.compile(r"([\"\\])")
-
-
-def _format_pairs(lst, specials=(), sep="; "):
- """
- specials: A lower-cased list of keys that will not be quoted.
- """
- vals = []
- for k, v in lst:
- if v is None:
- vals.append(k)
- else:
- if k.lower() not in specials and _has_special(v):
- v = ESCAPE.sub(r"\\\1", v)
- v = '"%s"' % v
- vals.append("%s=%s" % (k, v))
- return sep.join(vals)
-
-
-def _format_set_cookie_pairs(lst):
- return _format_pairs(
- lst,
- specials=("expires", "path")
- )
-
-
-def _parse_set_cookie_pairs(s):
- """
- For Set-Cookie, we support multiple cookies as described in RFC2109.
- This function therefore returns a list of lists.
- """
- pairs, off_ = _read_pairs(s)
- return pairs
-
-
-def parse_set_cookie_header(line):
- """
- Parse a Set-Cookie header value
-
- Returns a (name, value, attrs) tuple, or None, where attrs is an
- ODictCaseless set of attributes. No attempt is made to parse attribute
- values - they are treated purely as strings.
- """
- pairs = _parse_set_cookie_pairs(line)
- if pairs:
- return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:])
-
-
-def format_set_cookie_header(name, value, attrs):
- """
- Formats a Set-Cookie header value.
- """
- pairs = [[name, value]]
- pairs.extend(attrs.lst)
- return _format_set_cookie_pairs(pairs)
-
-
-def parse_cookie_header(line):
- """
- Parse a Cookie header value.
- Returns a (possibly empty) ODict object.
- """
- pairs, off_ = _read_pairs(line)
- return odict.ODict(pairs)
-
-
-def format_cookie_header(od):
- """
- Formats a Cookie header value.
- """
- return _format_pairs(od.lst)
diff --git a/netlib/http/headers.py b/netlib/http/headers.py
deleted file mode 100644
index 78404796..00000000
--- a/netlib/http/headers.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""
-
-Unicode Handling
-----------------
-See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/
-"""
-from __future__ import absolute_import, print_function, division
-import copy
-try:
- from collections.abc import MutableMapping
-except ImportError: # pragma: nocover
- from collections import MutableMapping # Workaround for Python < 3.3
-
-
-import six
-
-from netlib.utils import always_byte_args, always_bytes, Serializable
-
-if six.PY2: # pragma: nocover
- _native = lambda x: x
- _always_bytes = lambda x: x
- _always_byte_args = lambda x: x
-else:
- # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
- _native = lambda x: x.decode("utf-8", "surrogateescape")
- _always_bytes = lambda x: always_bytes(x, "utf-8", "surrogateescape")
- _always_byte_args = always_byte_args("utf-8", "surrogateescape")
-
-
-class Headers(MutableMapping, Serializable):
- """
- Header class which allows both convenient access to individual headers as well as
- direct access to the underlying raw data. Provides a full dictionary interface.
-
- Example:
-
- .. code-block:: python
-
- # Create headers with keyword arguments
- >>> h = Headers(host="example.com", content_type="application/xml")
-
- # Headers mostly behave like a normal dict.
- >>> h["Host"]
- "example.com"
-
- # HTTP Headers are case insensitive
- >>> h["host"]
- "example.com"
-
- # Headers can also be creatd from a list of raw (header_name, header_value) byte tuples
- >>> h = Headers([
- [b"Host",b"example.com"],
- [b"Accept",b"text/html"],
- [b"accept",b"application/xml"]
- ])
-
- # Multiple headers are folded into a single header as per RFC7230
- >>> h["Accept"]
- "text/html, application/xml"
-
- # Setting a header removes all existing headers with the same name.
- >>> h["Accept"] = "application/text"
- >>> h["Accept"]
- "application/text"
-
- # bytes(h) returns a HTTP1 header block.
- >>> print(bytes(h))
- Host: example.com
- Accept: application/text
-
- # For full control, the raw header fields can be accessed
- >>> h.fields
-
- Caveats:
- For use with the "Set-Cookie" header, see :py:meth:`get_all`.
- """
-
- @_always_byte_args
- def __init__(self, fields=None, **headers):
- """
- Args:
- fields: (optional) list of ``(name, value)`` header byte tuples,
- e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes.
- **headers: Additional headers to set. Will overwrite existing values from `fields`.
- For convenience, underscores in header names will be transformed to dashes -
- this behaviour does not extend to other methods.
- If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
- the behavior is undefined.
- """
- self.fields = fields or []
-
- for name, value in self.fields:
- if not isinstance(name, bytes) or not isinstance(value, bytes):
- raise ValueError("Headers passed as fields must be bytes.")
-
- # content_type -> content-type
- headers = {
- _always_bytes(name).replace(b"_", b"-"): value
- for name, value in six.iteritems(headers)
- }
- self.update(headers)
-
- def __bytes__(self):
- if self.fields:
- return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"
- else:
- return b""
-
- if six.PY2: # pragma: nocover
- __str__ = __bytes__
-
- @_always_byte_args
- def __getitem__(self, name):
- values = self.get_all(name)
- if not values:
- raise KeyError(name)
- return ", ".join(values)
-
- @_always_byte_args
- def __setitem__(self, name, value):
- idx = self._index(name)
-
- # To please the human eye, we insert at the same position the first existing header occured.
- if idx is not None:
- del self[name]
- self.fields.insert(idx, [name, value])
- else:
- self.fields.append([name, value])
-
- @_always_byte_args
- def __delitem__(self, name):
- if name not in self:
- raise KeyError(name)
- name = name.lower()
- self.fields = [
- field for field in self.fields
- if name != field[0].lower()
- ]
-
- def __iter__(self):
- seen = set()
- for name, _ in self.fields:
- name_lower = name.lower()
- if name_lower not in seen:
- seen.add(name_lower)
- yield _native(name)
-
- def __len__(self):
- return len(set(name.lower() for name, _ in self.fields))
-
- # __hash__ = object.__hash__
-
- def _index(self, name):
- name = name.lower()
- for i, field in enumerate(self.fields):
- if field[0].lower() == name:
- return i
- return None
-
- def __eq__(self, other):
- if isinstance(other, Headers):
- return self.fields == other.fields
- return False
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- @_always_byte_args
- def get_all(self, name):
- """
- Like :py:meth:`get`, but does not fold multiple headers into a single one.
- This is useful for Set-Cookie headers, which do not support folding.
-
- See also: https://tools.ietf.org/html/rfc7230#section-3.2.2
- """
- name_lower = name.lower()
- values = [_native(value) for n, value in self.fields if n.lower() == name_lower]
- return values
-
- @_always_byte_args
- def set_all(self, name, values):
- """
- Explicitly set multiple headers for the given key.
- See: :py:meth:`get_all`
- """
- values = map(_always_bytes, values) # _always_byte_args does not fix lists
- if name in self:
- del self[name]
- self.fields.extend(
- [name, value] for value in values
- )
-
- def copy(self):
- return Headers(copy.copy(self.fields))
-
- def get_state(self):
- return tuple(tuple(field) for field in self.fields)
-
- def set_state(self, state):
- self.fields = [list(field) for field in state]
-
- @classmethod
- def from_state(cls, state):
- return cls([list(field) for field in state]) \ No newline at end of file
diff --git a/netlib/http/http1/__init__.py b/netlib/http/http1/__init__.py
deleted file mode 100644
index 2aa7e26a..00000000
--- a/netlib/http/http1/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from __future__ import absolute_import, print_function, division
-from .read import (
- read_request, read_request_head,
- read_response, read_response_head,
- read_body,
- connection_close,
- expected_http_body_size,
-)
-from .assemble import (
- assemble_request, assemble_request_head,
- assemble_response, assemble_response_head,
- assemble_body,
-)
-
-
-__all__ = [
- "read_request", "read_request_head",
- "read_response", "read_response_head",
- "read_body",
- "connection_close",
- "expected_http_body_size",
- "assemble_request", "assemble_request_head",
- "assemble_response", "assemble_response_head",
- "assemble_body",
-]
diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py
deleted file mode 100644
index 785ee8d3..00000000
--- a/netlib/http/http1/assemble.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-from ... import utils
-import itertools
-from ...exceptions import HttpException
-from .. import CONTENT_MISSING
-
-
-def assemble_request(request):
- if request.content == CONTENT_MISSING:
- raise HttpException("Cannot assemble flow with CONTENT_MISSING")
- head = assemble_request_head(request)
- body = b"".join(assemble_body(request.data.headers, [request.data.content]))
- return head + body
-
-
-def assemble_request_head(request):
- first_line = _assemble_request_line(request.data)
- headers = _assemble_request_headers(request.data)
- return b"%s\r\n%s\r\n" % (first_line, headers)
-
-
-def assemble_response(response):
- if response.content == CONTENT_MISSING:
- raise HttpException("Cannot assemble flow with CONTENT_MISSING")
- head = assemble_response_head(response)
- body = b"".join(assemble_body(response.data.headers, [response.data.content]))
- return head + body
-
-
-def assemble_response_head(response):
- first_line = _assemble_response_line(response.data)
- headers = _assemble_response_headers(response.data)
- return b"%s\r\n%s\r\n" % (first_line, headers)
-
-
-def assemble_body(headers, body_chunks):
- if "chunked" in headers.get("transfer-encoding", "").lower():
- for chunk in body_chunks:
- if chunk:
- yield b"%x\r\n%s\r\n" % (len(chunk), chunk)
- yield b"0\r\n\r\n"
- else:
- for chunk in body_chunks:
- yield chunk
-
-
-def _assemble_request_line(request_data):
- """
- Args:
- request_data (netlib.http.request.RequestData)
- """
- form = request_data.first_line_format
- if form == "relative":
- return b"%s %s %s" % (
- request_data.method,
- request_data.path,
- request_data.http_version
- )
- elif form == "authority":
- return b"%s %s:%d %s" % (
- request_data.method,
- request_data.host,
- request_data.port,
- request_data.http_version
- )
- elif form == "absolute":
- return b"%s %s://%s:%d%s %s" % (
- request_data.method,
- request_data.scheme,
- request_data.host,
- request_data.port,
- request_data.path,
- request_data.http_version
- )
- else:
- raise RuntimeError("Invalid request form")
-
-
-def _assemble_request_headers(request_data):
- """
- Args:
- request_data (netlib.http.request.RequestData)
- """
- headers = request_data.headers.copy()
- if "host" not in headers and request_data.scheme and request_data.host and request_data.port:
- headers["host"] = utils.hostport(
- request_data.scheme,
- request_data.host,
- request_data.port
- )
- return bytes(headers)
-
-
-def _assemble_response_line(response_data):
- return b"%s %d %s" % (
- response_data.http_version,
- response_data.status_code,
- response_data.reason,
- )
-
-
-def _assemble_response_headers(response):
- return bytes(response.headers)
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
deleted file mode 100644
index 6e3a1b93..00000000
--- a/netlib/http/http1/read.py
+++ /dev/null
@@ -1,362 +0,0 @@
-from __future__ import absolute_import, print_function, division
-import time
-import sys
-import re
-
-from ... import utils
-from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect
-from .. import Request, Response, Headers
-
-
-def read_request(rfile, body_size_limit=None):
- request = read_request_head(rfile)
- expected_body_size = expected_http_body_size(request)
- request.data.content = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit))
- request.timestamp_end = time.time()
- return request
-
-
-def read_request_head(rfile):
- """
- Parse an HTTP request head (request line + headers) from an input stream
-
- Args:
- rfile: The input stream
-
- Returns:
- The HTTP request object (without body)
-
- Raises:
- HttpReadDisconnect: No bytes can be read from rfile.
- HttpSyntaxException: The input is malformed HTTP.
- HttpException: Any other error occured.
- """
- timestamp_start = time.time()
- if hasattr(rfile, "reset_timestamps"):
- rfile.reset_timestamps()
-
- form, method, scheme, host, port, path, http_version = _read_request_line(rfile)
- headers = _read_headers(rfile)
-
- if hasattr(rfile, "first_byte_timestamp"):
- # more accurate timestamp_start
- timestamp_start = rfile.first_byte_timestamp
-
- return Request(
- form, method, scheme, host, port, path, http_version, headers, None, timestamp_start
- )
-
-
-def read_response(rfile, request, body_size_limit=None):
- response = read_response_head(rfile)
- expected_body_size = expected_http_body_size(request, response)
- response.data.content = b"".join(read_body(rfile, expected_body_size, body_size_limit))
- response.timestamp_end = time.time()
- return response
-
-
-def read_response_head(rfile):
- """
- Parse an HTTP response head (response line + headers) from an input stream
-
- Args:
- rfile: The input stream
-
- Returns:
- The HTTP request object (without body)
-
- Raises:
- HttpReadDisconnect: No bytes can be read from rfile.
- HttpSyntaxException: The input is malformed HTTP.
- HttpException: Any other error occured.
- """
-
- timestamp_start = time.time()
- if hasattr(rfile, "reset_timestamps"):
- rfile.reset_timestamps()
-
- http_version, status_code, message = _read_response_line(rfile)
- headers = _read_headers(rfile)
-
- if hasattr(rfile, "first_byte_timestamp"):
- # more accurate timestamp_start
- timestamp_start = rfile.first_byte_timestamp
-
- return Response(http_version, status_code, message, headers, None, timestamp_start)
-
-
-def read_body(rfile, expected_size, limit=None, max_chunk_size=4096):
- """
- Read an HTTP message body
-
- Args:
- rfile: The input stream
- expected_size: The expected body size (see :py:meth:`expected_body_size`)
- limit: Maximum body size
- max_chunk_size: Maximium chunk size that gets yielded
-
- Returns:
- A generator that yields byte chunks of the content.
-
- Raises:
- HttpException, if an error occurs
-
- Caveats:
- max_chunk_size is not considered if the transfer encoding is chunked.
- """
- if not limit or limit < 0:
- limit = sys.maxsize
- if not max_chunk_size:
- max_chunk_size = limit
-
- if expected_size is None:
- for x in _read_chunked(rfile, limit):
- yield x
- elif expected_size >= 0:
- if limit is not None and expected_size > limit:
- raise HttpException(
- "HTTP Body too large. "
- "Limit is {}, content length was advertised as {}".format(limit, expected_size)
- )
- bytes_left = expected_size
- while bytes_left:
- chunk_size = min(bytes_left, max_chunk_size)
- content = rfile.read(chunk_size)
- if len(content) < chunk_size:
- raise HttpException("Unexpected EOF")
- yield content
- bytes_left -= chunk_size
- else:
- bytes_left = limit
- while bytes_left:
- chunk_size = min(bytes_left, max_chunk_size)
- content = rfile.read(chunk_size)
- if not content:
- return
- yield content
- bytes_left -= chunk_size
- not_done = rfile.read(1)
- if not_done:
- raise HttpException("HTTP body too large. Limit is {}.".format(limit))
-
-
-def connection_close(http_version, headers):
- """
- Checks the message to see if the client connection should be closed
- according to RFC 2616 Section 8.1.
- """
- # At first, check if we have an explicit Connection header.
- if "connection" in headers:
- tokens = utils.get_header_tokens(headers, "connection")
- if "close" in tokens:
- return True
- elif "keep-alive" in tokens:
- return False
-
- # If we don't have a Connection header, HTTP 1.1 connections are assumed to
- # be persistent
- return http_version != "HTTP/1.1" and http_version != b"HTTP/1.1" # FIXME: Remove one case.
-
-
-def expected_http_body_size(request, response=None):
- """
- Returns:
- The expected body length:
- - a positive integer, if the size is known in advance
- - None, if the size in unknown in advance (chunked encoding)
- - -1, if all data should be read until end of stream.
-
- Raises:
- HttpSyntaxException, if the content length header is invalid
- """
- # Determine response size according to
- # http://tools.ietf.org/html/rfc7230#section-3.3
- if not response:
- headers = request.headers
- response_code = None
- is_request = True
- else:
- headers = response.headers
- response_code = response.status_code
- is_request = False
-
- if is_request:
- if headers.get("expect", "").lower() == "100-continue":
- return 0
- else:
- if request.method.upper() == "HEAD":
- return 0
- if 100 <= response_code <= 199:
- return 0
- if response_code == 200 and request.method.upper() == "CONNECT":
- return 0
- if response_code in (204, 304):
- return 0
-
- if "chunked" in headers.get("transfer-encoding", "").lower():
- return None
- if "content-length" in headers:
- try:
- size = int(headers["content-length"])
- if size < 0:
- raise ValueError()
- return size
- except ValueError:
- raise HttpSyntaxException("Unparseable Content Length")
- if is_request:
- return 0
- return -1
-
-
-def _get_first_line(rfile):
- try:
- line = rfile.readline()
- if line == b"\r\n" or line == b"\n":
- # Possible leftover from previous message
- line = rfile.readline()
- except TcpDisconnect:
- raise HttpReadDisconnect("Remote disconnected")
- if not line:
- raise HttpReadDisconnect("Remote disconnected")
- return line.strip()
-
-
-def _read_request_line(rfile):
- try:
- line = _get_first_line(rfile)
- except HttpReadDisconnect:
- # We want to provide a better error message.
- raise HttpReadDisconnect("Client disconnected")
-
- try:
- method, path, http_version = line.split(b" ")
-
- if path == b"*" or path.startswith(b"/"):
- form = "relative"
- scheme, host, port = None, None, None
- elif method == b"CONNECT":
- form = "authority"
- host, port = _parse_authority_form(path)
- scheme, path = None, None
- else:
- form = "absolute"
- scheme, host, port, path = utils.parse_url(path)
-
- _check_http_version(http_version)
- except ValueError:
- raise HttpSyntaxException("Bad HTTP request line: {}".format(line))
-
- return form, method, scheme, host, port, path, http_version
-
-
-def _parse_authority_form(hostport):
- """
- Returns (host, port) if hostport is a valid authority-form host specification.
- http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1
-
- Raises:
- ValueError, if the input is malformed
- """
- try:
- host, port = hostport.split(b":")
- port = int(port)
- if not utils.is_valid_host(host) or not utils.is_valid_port(port):
- raise ValueError()
- except ValueError:
- raise HttpSyntaxException("Invalid host specification: {}".format(hostport))
-
- return host, port
-
-
-def _read_response_line(rfile):
- try:
- line = _get_first_line(rfile)
- except HttpReadDisconnect:
- # We want to provide a better error message.
- raise HttpReadDisconnect("Server disconnected")
-
- try:
-
- parts = line.split(b" ", 2)
- if len(parts) == 2: # handle missing message gracefully
- parts.append(b"")
-
- http_version, status_code, message = parts
- status_code = int(status_code)
- _check_http_version(http_version)
-
- except ValueError:
- raise HttpSyntaxException("Bad HTTP response line: {}".format(line))
-
- return http_version, status_code, message
-
-
-def _check_http_version(http_version):
- if not re.match(br"^HTTP/\d\.\d$", http_version):
- raise HttpSyntaxException("Unknown HTTP version: {}".format(http_version))
-
-
-def _read_headers(rfile):
- """
- Read a set of headers.
- Stop once a blank line is reached.
-
- Returns:
- A headers object
-
- Raises:
- HttpSyntaxException
- """
- ret = []
- while True:
- line = rfile.readline()
- if not line or line == b"\r\n" or line == b"\n":
- break
- if line[0] in b" \t":
- if not ret:
- raise HttpSyntaxException("Invalid headers")
- # continued header
- ret[-1][1] = ret[-1][1] + b'\r\n ' + line.strip()
- else:
- try:
- name, value = line.split(b":", 1)
- value = value.strip()
- if not name:
- raise ValueError()
- ret.append([name, value])
- except ValueError:
- raise HttpSyntaxException("Invalid headers")
- return Headers(ret)
-
-
-def _read_chunked(rfile, limit=sys.maxsize):
- """
- Read a HTTP body with chunked transfer encoding.
-
- Args:
- rfile: the input file
- limit: A positive integer
- """
- total = 0
- while True:
- line = rfile.readline(128)
- if line == b"":
- raise HttpException("Connection closed prematurely")
- if line != b"\r\n" and line != b"\n":
- try:
- length = int(line, 16)
- except ValueError:
- raise HttpSyntaxException("Invalid chunked encoding length: {}".format(line))
- total += length
- if total > limit:
- raise HttpException(
- "HTTP Body too large. Limit is {}, "
- "chunked content longer than {}".format(limit, total)
- )
- chunk = rfile.read(length)
- suffix = rfile.readline(5)
- if suffix != b"\r\n":
- raise HttpSyntaxException("Malformed chunked body")
- if length == 0:
- return
- yield chunk
diff --git a/netlib/http/http2/__init__.py b/netlib/http/http2/__init__.py
deleted file mode 100644
index 7043d36f..00000000
--- a/netlib/http/http2/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from __future__ import absolute_import, print_function, division
-from .connections import HTTP2Protocol
-
-__all__ = [
- "HTTP2Protocol"
-]
diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py
deleted file mode 100644
index 52fa7193..00000000
--- a/netlib/http/http2/connections.py
+++ /dev/null
@@ -1,426 +0,0 @@
-from __future__ import (absolute_import, print_function, division)
-import itertools
-import time
-
-from hpack.hpack import Encoder, Decoder
-from ... import utils
-from .. import Headers, Response, Request
-
-from hyperframe import frame
-
-
-class TCPHandler(object):
-
- def __init__(self, rfile, wfile=None):
- self.rfile = rfile
- self.wfile = wfile
-
-
-class HTTP2Protocol(object):
-
- ERROR_CODES = utils.BiDi(
- NO_ERROR=0x0,
- PROTOCOL_ERROR=0x1,
- INTERNAL_ERROR=0x2,
- FLOW_CONTROL_ERROR=0x3,
- SETTINGS_TIMEOUT=0x4,
- STREAM_CLOSED=0x5,
- FRAME_SIZE_ERROR=0x6,
- REFUSED_STREAM=0x7,
- CANCEL=0x8,
- COMPRESSION_ERROR=0x9,
- CONNECT_ERROR=0xa,
- ENHANCE_YOUR_CALM=0xb,
- INADEQUATE_SECURITY=0xc,
- HTTP_1_1_REQUIRED=0xd
- )
-
- CLIENT_CONNECTION_PREFACE = b'PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n'
-
- HTTP2_DEFAULT_SETTINGS = {
- frame.SettingsFrame.HEADER_TABLE_SIZE: 4096,
- frame.SettingsFrame.ENABLE_PUSH: 1,
- frame.SettingsFrame.MAX_CONCURRENT_STREAMS: None,
- frame.SettingsFrame.INITIAL_WINDOW_SIZE: 2 ** 16 - 1,
- frame.SettingsFrame.MAX_FRAME_SIZE: 2 ** 14,
- frame.SettingsFrame.MAX_HEADER_LIST_SIZE: None,
- }
-
- def __init__(
- self,
- tcp_handler=None,
- rfile=None,
- wfile=None,
- is_server=False,
- dump_frames=False,
- encoder=None,
- decoder=None,
- unhandled_frame_cb=None,
- ):
- self.tcp_handler = tcp_handler or TCPHandler(rfile, wfile)
- self.is_server = is_server
- self.dump_frames = dump_frames
- self.encoder = encoder or Encoder()
- self.decoder = decoder or Decoder()
- self.unhandled_frame_cb = unhandled_frame_cb
-
- self.http2_settings = self.HTTP2_DEFAULT_SETTINGS.copy()
- self.current_stream_id = None
- self.connection_preface_performed = False
-
- def read_request(
- self,
- __rfile,
- include_body=True,
- body_size_limit=None,
- allow_empty=False,
- ):
- if body_size_limit is not None:
- raise NotImplementedError()
-
- self.perform_connection_preface()
-
- timestamp_start = time.time()
- if hasattr(self.tcp_handler.rfile, "reset_timestamps"):
- self.tcp_handler.rfile.reset_timestamps()
-
- stream_id, headers, body = self._receive_transmission(
- include_body=include_body,
- )
-
- if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"):
- # more accurate timestamp_start
- timestamp_start = self.tcp_handler.rfile.first_byte_timestamp
-
- timestamp_end = time.time()
-
- authority = headers.get(':authority', b'')
- method = headers.get(':method', 'GET')
- scheme = headers.get(':scheme', 'https')
- path = headers.get(':path', '/')
- host = None
- port = None
-
- if path == '*' or path.startswith("/"):
- form_in = "relative"
- elif method == 'CONNECT':
- form_in = "authority"
- if ":" in authority:
- host, port = authority.split(":", 1)
- else:
- host = authority
- else:
- form_in = "absolute"
- # FIXME: verify if path or :host contains what we need
- scheme, host, port, _ = utils.parse_url(path)
- scheme = scheme.decode('ascii')
- host = host.decode('ascii')
-
- if host is None:
- host = 'localhost'
- if port is None:
- port = 80 if scheme == 'http' else 443
- port = int(port)
-
- request = Request(
- form_in,
- method.encode('ascii'),
- scheme.encode('ascii'),
- host.encode('ascii'),
- port,
- path.encode('ascii'),
- b"HTTP/2.0",
- headers,
- body,
- timestamp_start,
- timestamp_end,
- )
- request.stream_id = stream_id
-
- return request
-
- def read_response(
- self,
- __rfile,
- request_method=b'',
- body_size_limit=None,
- include_body=True,
- stream_id=None,
- ):
- if body_size_limit is not None:
- raise NotImplementedError()
-
- self.perform_connection_preface()
-
- timestamp_start = time.time()
- if hasattr(self.tcp_handler.rfile, "reset_timestamps"):
- self.tcp_handler.rfile.reset_timestamps()
-
- stream_id, headers, body = self._receive_transmission(
- stream_id=stream_id,
- include_body=include_body,
- )
-
- if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"):
- # more accurate timestamp_start
- timestamp_start = self.tcp_handler.rfile.first_byte_timestamp
-
- if include_body:
- timestamp_end = time.time()
- else:
- timestamp_end = None
-
- response = Response(
- b"HTTP/2.0",
- int(headers.get(':status', 502)),
- b'',
- headers,
- body,
- timestamp_start=timestamp_start,
- timestamp_end=timestamp_end,
- )
- response.stream_id = stream_id
-
- return response
-
- def assemble(self, message):
- if isinstance(message, Request):
- return self.assemble_request(message)
- elif isinstance(message, Response):
- return self.assemble_response(message)
- else:
- raise ValueError("HTTP message not supported.")
-
- def assemble_request(self, request):
- assert isinstance(request, Request)
-
- authority = self.tcp_handler.sni if self.tcp_handler.sni else self.tcp_handler.address.host
- if self.tcp_handler.address.port != 443:
- authority += ":%d" % self.tcp_handler.address.port
-
- headers = request.headers.copy()
-
- if ':authority' not in headers:
- headers.fields.insert(0, (b':authority', authority.encode('ascii')))
- if ':scheme' not in headers:
- headers.fields.insert(0, (b':scheme', request.scheme.encode('ascii')))
- if ':path' not in headers:
- headers.fields.insert(0, (b':path', request.path.encode('ascii')))
- if ':method' not in headers:
- headers.fields.insert(0, (b':method', request.method.encode('ascii')))
-
- if hasattr(request, 'stream_id'):
- stream_id = request.stream_id
- else:
- stream_id = self._next_stream_id()
-
- return list(itertools.chain(
- self._create_headers(headers, stream_id, end_stream=(request.body is None or len(request.body) == 0)),
- self._create_body(request.body, stream_id)))
-
- def assemble_response(self, response):
- assert isinstance(response, Response)
-
- headers = response.headers.copy()
-
- if ':status' not in headers:
- headers.fields.insert(0, (b':status', str(response.status_code).encode('ascii')))
-
- if hasattr(response, 'stream_id'):
- stream_id = response.stream_id
- else:
- stream_id = self._next_stream_id()
-
- return list(itertools.chain(
- self._create_headers(headers, stream_id, end_stream=(response.body is None or len(response.body) == 0)),
- self._create_body(response.body, stream_id),
- ))
-
- def perform_connection_preface(self, force=False):
- if force or not self.connection_preface_performed:
- if self.is_server:
- self.perform_server_connection_preface(force)
- else:
- self.perform_client_connection_preface(force)
-
- def perform_server_connection_preface(self, force=False):
- if force or not self.connection_preface_performed:
- self.connection_preface_performed = True
-
- magic_length = len(self.CLIENT_CONNECTION_PREFACE)
- magic = self.tcp_handler.rfile.safe_read(magic_length)
- assert magic == self.CLIENT_CONNECTION_PREFACE
-
- frm = frame.SettingsFrame(settings={
- frame.SettingsFrame.ENABLE_PUSH: 0,
- frame.SettingsFrame.MAX_CONCURRENT_STREAMS: 1,
- })
- self.send_frame(frm, hide=True)
- self._receive_settings(hide=True)
-
- def perform_client_connection_preface(self, force=False):
- if force or not self.connection_preface_performed:
- self.connection_preface_performed = True
-
- self.tcp_handler.wfile.write(self.CLIENT_CONNECTION_PREFACE)
-
- self.send_frame(frame.SettingsFrame(), hide=True)
- self._receive_settings(hide=True) # server announces own settings
- self._receive_settings(hide=True) # server acks my settings
-
- def send_frame(self, frm, hide=False):
- raw_bytes = frm.serialize()
- self.tcp_handler.wfile.write(raw_bytes)
- self.tcp_handler.wfile.flush()
- if not hide and self.dump_frames: # pragma no cover
- print(frm.human_readable(">>"))
-
- def read_frame(self, hide=False):
- while True:
- frm = utils.http2_read_frame(self.tcp_handler.rfile)
- if not hide and self.dump_frames: # pragma no cover
- print(frm.human_readable("<<"))
-
- if isinstance(frm, frame.PingFrame):
- raw_bytes = frame.PingFrame(flags=['ACK'], payload=frm.payload).serialize()
- self.tcp_handler.wfile.write(raw_bytes)
- self.tcp_handler.wfile.flush()
- continue
- if isinstance(frm, frame.SettingsFrame) and 'ACK' not in frm.flags:
- self._apply_settings(frm.settings, hide)
- if isinstance(frm, frame.DataFrame) and frm.flow_controlled_length > 0:
- self._update_flow_control_window(frm.stream_id, frm.flow_controlled_length)
- return frm
-
- def check_alpn(self):
- alp = self.tcp_handler.get_alpn_proto_negotiated()
- if alp != b'h2':
- raise NotImplementedError(
- "HTTP2Protocol can not handle unknown ALP: %s" % alp)
- return True
-
- def _handle_unexpected_frame(self, frm):
- if isinstance(frm, frame.SettingsFrame):
- return
- if self.unhandled_frame_cb:
- self.unhandled_frame_cb(frm)
-
- def _receive_settings(self, hide=False):
- while True:
- frm = self.read_frame(hide)
- if isinstance(frm, frame.SettingsFrame):
- break
- else:
- self._handle_unexpected_frame(frm)
-
- def _next_stream_id(self):
- if self.current_stream_id is None:
- if self.is_server:
- # servers must use even stream ids
- self.current_stream_id = 2
- else:
- # clients must use odd stream ids
- self.current_stream_id = 1
- else:
- self.current_stream_id += 2
- return self.current_stream_id
-
- def _apply_settings(self, settings, hide=False):
- for setting, value in settings.items():
- old_value = self.http2_settings[setting]
- if not old_value:
- old_value = '-'
- self.http2_settings[setting] = value
-
- frm = frame.SettingsFrame(flags=['ACK'])
- self.send_frame(frm, hide)
-
- def _update_flow_control_window(self, stream_id, increment):
- frm = frame.WindowUpdateFrame(stream_id=0, window_increment=increment)
- self.send_frame(frm)
- frm = frame.WindowUpdateFrame(stream_id=stream_id, window_increment=increment)
- self.send_frame(frm)
-
- def _create_headers(self, headers, stream_id, end_stream=True):
- def frame_cls(chunks):
- for i in chunks:
- if i == 0:
- yield frame.HeadersFrame, i
- else:
- yield frame.ContinuationFrame, i
-
- header_block_fragment = self.encoder.encode(headers.fields)
-
- chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE]
- chunks = range(0, len(header_block_fragment), chunk_size)
- frms = [frm_cls(
- flags=[],
- stream_id=stream_id,
- data=header_block_fragment[i:i+chunk_size]) for frm_cls, i in frame_cls(chunks)]
-
- frms[-1].flags.add('END_HEADERS')
- if end_stream:
- frms[0].flags.add('END_STREAM')
-
- if self.dump_frames: # pragma no cover
- for frm in frms:
- print(frm.human_readable(">>"))
-
- return [frm.serialize() for frm in frms]
-
- def _create_body(self, body, stream_id):
- if body is None or len(body) == 0:
- return b''
-
- chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE]
- chunks = range(0, len(body), chunk_size)
- frms = [frame.DataFrame(
- flags=[],
- stream_id=stream_id,
- data=body[i:i+chunk_size]) for i in chunks]
- frms[-1].flags.add('END_STREAM')
-
- if self.dump_frames: # pragma no cover
- for frm in frms:
- print(frm.human_readable(">>"))
-
- return [frm.serialize() for frm in frms]
-
- def _receive_transmission(self, stream_id=None, include_body=True):
- if not include_body:
- raise NotImplementedError()
-
- body_expected = True
-
- header_blocks = b''
- body = b''
-
- while True:
- frm = self.read_frame()
- if (
- (isinstance(frm, frame.HeadersFrame) or isinstance(frm, frame.ContinuationFrame)) and
- (stream_id is None or frm.stream_id == stream_id)
- ):
- stream_id = frm.stream_id
- header_blocks += frm.data
- if 'END_STREAM' in frm.flags:
- body_expected = False
- if 'END_HEADERS' in frm.flags:
- break
- else:
- self._handle_unexpected_frame(frm)
-
- while body_expected:
- frm = self.read_frame()
- if isinstance(frm, frame.DataFrame) and frm.stream_id == stream_id:
- body += frm.data
- if 'END_STREAM' in frm.flags:
- break
- else:
- self._handle_unexpected_frame(frm)
-
- headers = Headers(
- [[k.encode('ascii'), v.encode('ascii')] for k, v in self.decoder.decode(header_blocks)]
- )
-
- return stream_id, headers, body
diff --git a/netlib/http/message.py b/netlib/http/message.py
deleted file mode 100644
index e3d8ce37..00000000
--- a/netlib/http/message.py
+++ /dev/null
@@ -1,222 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-import warnings
-
-import six
-
-from .headers import Headers
-from .. import encoding, utils
-
-CONTENT_MISSING = 0
-
-if six.PY2: # pragma: nocover
- _native = lambda x: x
- _always_bytes = lambda x: x
-else:
- # While the HTTP head _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
- _native = lambda x: x.decode("utf-8", "surrogateescape")
- _always_bytes = lambda x: utils.always_bytes(x, "utf-8", "surrogateescape")
-
-
-class MessageData(utils.Serializable):
- def __eq__(self, other):
- if isinstance(other, MessageData):
- return self.__dict__ == other.__dict__
- return False
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def set_state(self, state):
- for k, v in state.items():
- if k == "headers":
- v = Headers.from_state(v)
- setattr(self, k, v)
-
- def get_state(self):
- state = vars(self).copy()
- state["headers"] = state["headers"].get_state()
- return state
-
- @classmethod
- def from_state(cls, state):
- state["headers"] = Headers.from_state(state["headers"])
- return cls(**state)
-
-
-class Message(utils.Serializable):
- def __init__(self, data):
- self.data = data
-
- def __eq__(self, other):
- if isinstance(other, Message):
- return self.data == other.data
- return False
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def get_state(self):
- return self.data.get_state()
-
- def set_state(self, state):
- self.data.set_state(state)
-
- @classmethod
- def from_state(cls, state):
- return cls(**state)
-
- @property
- def headers(self):
- """
- Message headers object
-
- Returns:
- netlib.http.Headers
- """
- return self.data.headers
-
- @headers.setter
- def headers(self, h):
- self.data.headers = h
-
- @property
- def content(self):
- """
- The raw (encoded) HTTP message body
-
- See also: :py:attr:`text`
- """
- return self.data.content
-
- @content.setter
- def content(self, content):
- self.data.content = content
- if isinstance(content, bytes):
- self.headers["content-length"] = str(len(content))
-
- @property
- def http_version(self):
- """
- Version string, e.g. "HTTP/1.1"
- """
- return _native(self.data.http_version)
-
- @http_version.setter
- def http_version(self, http_version):
- self.data.http_version = _always_bytes(http_version)
-
- @property
- def timestamp_start(self):
- """
- First byte timestamp
- """
- return self.data.timestamp_start
-
- @timestamp_start.setter
- def timestamp_start(self, timestamp_start):
- self.data.timestamp_start = timestamp_start
-
- @property
- def timestamp_end(self):
- """
- Last byte timestamp
- """
- return self.data.timestamp_end
-
- @timestamp_end.setter
- def timestamp_end(self, timestamp_end):
- self.data.timestamp_end = timestamp_end
-
- @property
- def text(self):
- """
- The decoded HTTP message body.
- Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive.
-
- .. note::
- This is not implemented yet.
-
- See also: :py:attr:`content`, :py:class:`decoded`
- """
- # This attribute should be called text, because that's what requests does.
- raise NotImplementedError()
-
- @text.setter
- def text(self, text):
- raise NotImplementedError()
-
- def decode(self):
- """
- Decodes body based on the current Content-Encoding header, then
- removes the header. If there is no Content-Encoding header, no
- action is taken.
-
- Returns:
- True, if decoding succeeded.
- False, otherwise.
- """
- ce = self.headers.get("content-encoding")
- data = encoding.decode(ce, self.content)
- if data is None:
- return False
- self.content = data
- self.headers.pop("content-encoding", None)
- return True
-
- def encode(self, e):
- """
- Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
-
- Returns:
- True, if decoding succeeded.
- False, otherwise.
- """
- data = encoding.encode(e, self.content)
- if data is None:
- return False
- self.content = data
- self.headers["content-encoding"] = e
- return True
-
- # Legacy
-
- @property
- def body(self): # pragma: nocover
- warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning)
- return self.content
-
- @body.setter
- def body(self, body): # pragma: nocover
- warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning)
- self.content = body
-
-
-class decoded(object):
- """
- A context manager that decodes a request or response, and then
- re-encodes it with the same encoding after execution of the block.
-
- Example:
-
- .. code-block:: python
-
- with decoded(request):
- request.content = request.content.replace("foo", "bar")
- """
-
- def __init__(self, message):
- self.message = message
- ce = message.headers.get("content-encoding")
- if ce in encoding.ENCODINGS:
- self.ce = ce
- else:
- self.ce = None
-
- def __enter__(self):
- if self.ce:
- self.message.decode()
-
- def __exit__(self, type, value, tb):
- if self.ce:
- self.message.encode(self.ce)
diff --git a/netlib/http/request.py b/netlib/http/request.py
deleted file mode 100644
index 0e0f88ce..00000000
--- a/netlib/http/request.py
+++ /dev/null
@@ -1,353 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-import warnings
-
-import six
-from six.moves import urllib
-
-from netlib import utils
-from netlib.http import cookies
-from netlib.odict import ODict
-from .. import encoding
-from .headers import Headers
-from .message import Message, _native, _always_bytes, MessageData
-
-
-class RequestData(MessageData):
- def __init__(self, first_line_format, method, scheme, host, port, path, http_version, headers=None, content=None,
- timestamp_start=None, timestamp_end=None):
- if not isinstance(headers, Headers):
- headers = Headers(headers)
-
- self.first_line_format = first_line_format
- self.method = method
- self.scheme = scheme
- self.host = host
- self.port = port
- self.path = path
- self.http_version = http_version
- self.headers = headers
- self.content = content
- self.timestamp_start = timestamp_start
- self.timestamp_end = timestamp_end
-
-
-class Request(Message):
- """
- An HTTP request.
- """
- def __init__(self, *args, **kwargs):
- data = RequestData(*args, **kwargs)
- super(Request, self).__init__(data)
-
- def __repr__(self):
- if self.host and self.port:
- hostport = "{}:{}".format(self.host, self.port)
- else:
- hostport = ""
- path = self.path or ""
- return "Request({} {}{})".format(
- self.method, hostport, path
- )
-
- @property
- def first_line_format(self):
- """
- HTTP request form as defined in `RFC7230 <https://tools.ietf.org/html/rfc7230#section-5.3>`_.
-
- origin-form and asterisk-form are subsumed as "relative".
- """
- return self.data.first_line_format
-
- @first_line_format.setter
- def first_line_format(self, first_line_format):
- self.data.first_line_format = first_line_format
-
- @property
- def method(self):
- """
- HTTP request method, e.g. "GET".
- """
- return _native(self.data.method).upper()
-
- @method.setter
- def method(self, method):
- self.data.method = _always_bytes(method)
-
- @property
- def scheme(self):
- """
- HTTP request scheme, which should be "http" or "https".
- """
- return _native(self.data.scheme)
-
- @scheme.setter
- def scheme(self, scheme):
- self.data.scheme = _always_bytes(scheme)
-
- @property
- def host(self):
- """
- Target host. This may be parsed from the raw request
- (e.g. from a ``GET http://example.com/ HTTP/1.1`` request line)
- or inferred from the proxy mode (e.g. an IP in transparent mode).
-
- Setting the host attribute also updates the host header, if present.
- """
-
- if six.PY2: # pragma: nocover
- return self.data.host
-
- if not self.data.host:
- return self.data.host
- try:
- return self.data.host.decode("idna")
- except UnicodeError:
- return self.data.host.decode("utf8", "surrogateescape")
-
- @host.setter
- def host(self, host):
- if isinstance(host, six.text_type):
- try:
- # There's no non-strict mode for IDNA encoding.
- # We don't want this operation to fail though, so we try
- # utf8 as a last resort.
- host = host.encode("idna", "strict")
- except UnicodeError:
- host = host.encode("utf8", "surrogateescape")
-
- self.data.host = host
-
- # Update host header
- if "host" in self.headers:
- if host:
- self.headers["host"] = host
- else:
- self.headers.pop("host")
-
- @property
- def port(self):
- """
- Target port
- """
- return self.data.port
-
- @port.setter
- def port(self, port):
- self.data.port = port
-
- @property
- def path(self):
- """
- HTTP request path, e.g. "/index.html".
- Guaranteed to start with a slash.
- """
- return _native(self.data.path)
-
- @path.setter
- def path(self, path):
- self.data.path = _always_bytes(path)
-
- @property
- def url(self):
- """
- The URL string, constructed from the request's URL components
- """
- return utils.unparse_url(self.scheme, self.host, self.port, self.path)
-
- @url.setter
- def url(self, url):
- self.scheme, self.host, self.port, self.path = utils.parse_url(url)
-
- @property
- def pretty_host(self):
- """
- Similar to :py:attr:`host`, but using the Host headers as an additional preferred data source.
- This is useful in transparent mode where :py:attr:`host` is only an IP address,
- but may not reflect the actual destination as the Host header could be spoofed.
- """
- return self.headers.get("host", self.host)
-
- @property
- def pretty_url(self):
- """
- Like :py:attr:`url`, but using :py:attr:`pretty_host` instead of :py:attr:`host`.
- """
- if self.first_line_format == "authority":
- return "%s:%d" % (self.pretty_host, self.port)
- return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
-
- @property
- def query(self):
- """
- The request query string as an :py:class:`ODict` object.
- None, if there is no query.
- """
- _, _, _, _, query, _ = urllib.parse.urlparse(self.url)
- if query:
- return ODict(utils.urldecode(query))
- return None
-
- @query.setter
- def query(self, odict):
- query = utils.urlencode(odict.lst)
- scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
- self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])
-
- @property
- def cookies(self):
- """
- The request cookies.
- An empty :py:class:`ODict` object if the cookie monster ate them all.
- """
- ret = ODict()
- for i in self.headers.get_all("Cookie"):
- ret.extend(cookies.parse_cookie_header(i))
- return ret
-
- @cookies.setter
- def cookies(self, odict):
- self.headers["cookie"] = cookies.format_cookie_header(odict)
-
- @property
- def path_components(self):
- """
- The URL's path components as a list of strings.
- Components are unquoted.
- """
- _, _, path, _, _, _ = urllib.parse.urlparse(self.url)
- return [urllib.parse.unquote(i) for i in path.split("/") if i]
-
- @path_components.setter
- def path_components(self, components):
- components = map(lambda x: urllib.parse.quote(x, safe=""), components)
- path = "/" + "/".join(components)
- scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
- self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])
-
- def anticache(self):
- """
- Modifies this request to remove headers that might produce a cached
- response. That is, we remove ETags and If-Modified-Since headers.
- """
- delheaders = [
- "if-modified-since",
- "if-none-match",
- ]
- for i in delheaders:
- self.headers.pop(i, None)
-
- def anticomp(self):
- """
- Modifies this request to remove headers that will compress the
- resource's data.
- """
- self.headers["accept-encoding"] = "identity"
-
- def constrain_encoding(self):
- """
- Limits the permissible Accept-Encoding values, based on what we can
- decode appropriately.
- """
- accept_encoding = self.headers.get("accept-encoding")
- if accept_encoding:
- self.headers["accept-encoding"] = (
- ', '.join(
- e
- for e in encoding.ENCODINGS
- if e in accept_encoding
- )
- )
-
- @property
- def urlencoded_form(self):
- """
- The URL-encoded form data as an :py:class:`ODict` object.
- None if there is no data or the content-type indicates non-form data.
- """
- is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
- if self.content and is_valid_content_type:
- return ODict(utils.urldecode(self.content))
- return None
-
- @urlencoded_form.setter
- def urlencoded_form(self, odict):
- """
- Sets the body to the URL-encoded form data, and adds the appropriate content-type header.
- This will overwrite the existing content if there is one.
- """
- self.headers["content-type"] = "application/x-www-form-urlencoded"
- self.content = utils.urlencode(odict.lst)
-
- @property
- def multipart_form(self):
- """
- The multipart form data as an :py:class:`ODict` object.
- None if there is no data or the content-type indicates non-form data.
- """
- is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower()
- if self.content and is_valid_content_type:
- return ODict(utils.multipartdecode(self.headers,self.content))
- return None
-
- @multipart_form.setter
- def multipart_form(self, value):
- raise NotImplementedError()
-
- # Legacy
-
- def get_cookies(self): # pragma: nocover
- warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning)
- return self.cookies
-
- def set_cookies(self, odict): # pragma: nocover
- warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning)
- self.cookies = odict
-
- def get_query(self): # pragma: nocover
- warnings.warn(".get_query is deprecated, use .query instead.", DeprecationWarning)
- return self.query or ODict([])
-
- def set_query(self, odict): # pragma: nocover
- warnings.warn(".set_query is deprecated, use .query instead.", DeprecationWarning)
- self.query = odict
-
- def get_path_components(self): # pragma: nocover
- warnings.warn(".get_path_components is deprecated, use .path_components instead.", DeprecationWarning)
- return self.path_components
-
- def set_path_components(self, lst): # pragma: nocover
- warnings.warn(".set_path_components is deprecated, use .path_components instead.", DeprecationWarning)
- self.path_components = lst
-
- def get_form_urlencoded(self): # pragma: nocover
- warnings.warn(".get_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning)
- return self.urlencoded_form or ODict([])
-
- def set_form_urlencoded(self, odict): # pragma: nocover
- warnings.warn(".set_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning)
- self.urlencoded_form = odict
-
- def get_form_multipart(self): # pragma: nocover
- warnings.warn(".get_form_multipart is deprecated, use .multipart_form instead.", DeprecationWarning)
- return self.multipart_form or ODict([])
-
- @property
- def form_in(self): # pragma: nocover
- warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning)
- return self.first_line_format
-
- @form_in.setter
- def form_in(self, form_in): # pragma: nocover
- warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning)
- self.first_line_format = form_in
-
- @property
- def form_out(self): # pragma: nocover
- warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning)
- return self.first_line_format
-
- @form_out.setter
- def form_out(self, form_out): # pragma: nocover
- warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning)
- self.first_line_format = form_out \ No newline at end of file
diff --git a/netlib/http/response.py b/netlib/http/response.py
deleted file mode 100644
index 8f4d6215..00000000
--- a/netlib/http/response.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-import warnings
-
-from . import cookies
-from .headers import Headers
-from .message import Message, _native, _always_bytes, MessageData
-from .. import utils
-from ..odict import ODict
-
-
-class ResponseData(MessageData):
- def __init__(self, http_version, status_code, reason=None, headers=None, content=None,
- timestamp_start=None, timestamp_end=None):
- if not isinstance(headers, Headers):
- headers = Headers(headers)
-
- self.http_version = http_version
- self.status_code = status_code
- self.reason = reason
- self.headers = headers
- self.content = content
- self.timestamp_start = timestamp_start
- self.timestamp_end = timestamp_end
-
-
-class Response(Message):
- """
- An HTTP response.
- """
- def __init__(self, *args, **kwargs):
- data = ResponseData(*args, **kwargs)
- super(Response, self).__init__(data)
-
- def __repr__(self):
- if self.content:
- details = "{}, {}".format(
- self.headers.get("content-type", "unknown content type"),
- utils.pretty_size(len(self.content))
- )
- else:
- details = "no content"
- return "Response({status_code} {reason}, {details})".format(
- status_code=self.status_code,
- reason=self.reason,
- details=details
- )
-
- @property
- def status_code(self):
- """
- HTTP Status Code, e.g. ``200``.
- """
- return self.data.status_code
-
- @status_code.setter
- def status_code(self, status_code):
- self.data.status_code = status_code
-
- @property
- def reason(self):
- """
- HTTP Reason Phrase, e.g. "Not Found".
- This is always :py:obj:`None` for HTTP2 requests, because HTTP2 responses do not contain a reason phrase.
- """
- return _native(self.data.reason)
-
- @reason.setter
- def reason(self, reason):
- self.data.reason = _always_bytes(reason)
-
- @property
- def cookies(self):
- """
- Get the contents of all Set-Cookie headers.
-
- A possibly empty :py:class:`ODict`, where keys are cookie name strings,
- and values are [value, attr] lists. Value is a string, and attr is
- an ODictCaseless containing cookie attributes. Within attrs, unary
- attributes (e.g. HTTPOnly) are indicated by a Null value.
- """
- ret = []
- for header in self.headers.get_all("set-cookie"):
- v = cookies.parse_set_cookie_header(header)
- if v:
- name, value, attrs = v
- ret.append([name, [value, attrs]])
- return ODict(ret)
-
- @cookies.setter
- def cookies(self, odict):
- values = []
- for i in odict.lst:
- header = cookies.format_set_cookie_header(i[0], i[1][0], i[1][1])
- values.append(header)
- self.headers.set_all("set-cookie", values)
-
- # Legacy
-
- def get_cookies(self): # pragma: nocover
- warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning)
- return self.cookies
-
- def set_cookies(self, odict): # pragma: nocover
- warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning)
- self.cookies = odict
-
- @property
- def msg(self): # pragma: nocover
- warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning)
- return self.reason
-
- @msg.setter
- def msg(self, reason): # pragma: nocover
- warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning)
- self.reason = reason
diff --git a/netlib/http/status_codes.py b/netlib/http/status_codes.py
deleted file mode 100644
index 8a4dc1f5..00000000
--- a/netlib/http/status_codes.py
+++ /dev/null
@@ -1,106 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-CONTINUE = 100
-SWITCHING = 101
-OK = 200
-CREATED = 201
-ACCEPTED = 202
-NON_AUTHORITATIVE_INFORMATION = 203
-NO_CONTENT = 204
-RESET_CONTENT = 205
-PARTIAL_CONTENT = 206
-MULTI_STATUS = 207
-
-MULTIPLE_CHOICE = 300
-MOVED_PERMANENTLY = 301
-FOUND = 302
-SEE_OTHER = 303
-NOT_MODIFIED = 304
-USE_PROXY = 305
-TEMPORARY_REDIRECT = 307
-
-BAD_REQUEST = 400
-UNAUTHORIZED = 401
-PAYMENT_REQUIRED = 402
-FORBIDDEN = 403
-NOT_FOUND = 404
-NOT_ALLOWED = 405
-NOT_ACCEPTABLE = 406
-PROXY_AUTH_REQUIRED = 407
-REQUEST_TIMEOUT = 408
-CONFLICT = 409
-GONE = 410
-LENGTH_REQUIRED = 411
-PRECONDITION_FAILED = 412
-REQUEST_ENTITY_TOO_LARGE = 413
-REQUEST_URI_TOO_LONG = 414
-UNSUPPORTED_MEDIA_TYPE = 415
-REQUESTED_RANGE_NOT_SATISFIABLE = 416
-EXPECTATION_FAILED = 417
-IM_A_TEAPOT = 418
-
-INTERNAL_SERVER_ERROR = 500
-NOT_IMPLEMENTED = 501
-BAD_GATEWAY = 502
-SERVICE_UNAVAILABLE = 503
-GATEWAY_TIMEOUT = 504
-HTTP_VERSION_NOT_SUPPORTED = 505
-INSUFFICIENT_STORAGE_SPACE = 507
-NOT_EXTENDED = 510
-
-RESPONSES = {
- # 100
- CONTINUE: "Continue",
- SWITCHING: "Switching Protocols",
-
- # 200
- OK: "OK",
- CREATED: "Created",
- ACCEPTED: "Accepted",
- NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information",
- NO_CONTENT: "No Content",
- RESET_CONTENT: "Reset Content.",
- PARTIAL_CONTENT: "Partial Content",
- MULTI_STATUS: "Multi-Status",
-
- # 300
- MULTIPLE_CHOICE: "Multiple Choices",
- MOVED_PERMANENTLY: "Moved Permanently",
- FOUND: "Found",
- SEE_OTHER: "See Other",
- NOT_MODIFIED: "Not Modified",
- USE_PROXY: "Use Proxy",
- # 306 not defined??
- TEMPORARY_REDIRECT: "Temporary Redirect",
-
- # 400
- BAD_REQUEST: "Bad Request",
- UNAUTHORIZED: "Unauthorized",
- PAYMENT_REQUIRED: "Payment Required",
- FORBIDDEN: "Forbidden",
- NOT_FOUND: "Not Found",
- NOT_ALLOWED: "Method Not Allowed",
- NOT_ACCEPTABLE: "Not Acceptable",
- PROXY_AUTH_REQUIRED: "Proxy Authentication Required",
- REQUEST_TIMEOUT: "Request Time-out",
- CONFLICT: "Conflict",
- GONE: "Gone",
- LENGTH_REQUIRED: "Length Required",
- PRECONDITION_FAILED: "Precondition Failed",
- REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large",
- REQUEST_URI_TOO_LONG: "Request-URI Too Long",
- UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type",
- REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable",
- EXPECTATION_FAILED: "Expectation Failed",
- IM_A_TEAPOT: "I'm a teapot",
-
- # 500
- INTERNAL_SERVER_ERROR: "Internal Server Error",
- NOT_IMPLEMENTED: "Not Implemented",
- BAD_GATEWAY: "Bad Gateway",
- SERVICE_UNAVAILABLE: "Service Unavailable",
- GATEWAY_TIMEOUT: "Gateway Time-out",
- HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported",
- INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space",
- NOT_EXTENDED: "Not Extended"
-}
diff --git a/netlib/http/user_agents.py b/netlib/http/user_agents.py
deleted file mode 100644
index e8681908..00000000
--- a/netlib/http/user_agents.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from __future__ import (absolute_import, print_function, division)
-
-"""
- A small collection of useful user-agent header strings. These should be
- kept reasonably current to reflect common usage.
-"""
-
-# pylint: line-too-long
-
-# A collection of (name, shortcut, string) tuples.
-
-UASTRINGS = [
- ("android",
- "a",
- "Mozilla/5.0 (Linux; U; Android 4.1.1; en-gb; Nexus 7 Build/JRO03D) AFL/01.04.02"), # noqa
- ("blackberry",
- "l",
- "Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+"), # noqa
- ("bingbot",
- "b",
- "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"), # noqa
- ("chrome",
- "c",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"), # noqa
- ("firefox",
- "f",
- "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:14.0) Gecko/20120405 Firefox/14.0a1"), # noqa
- ("googlebot",
- "g",
- "Googlebot/2.1 (+http://www.googlebot.com/bot.html)"), # noqa
- ("ie9",
- "i",
- "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US)"), # noqa
- ("ipad",
- "p",
- "Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B176 Safari/7534.48.3"), # noqa
- ("iphone",
- "h",
- "Mozilla/5.0 (iPhone; CPU iPhone OS 4_2_1 like Mac OS X) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148a Safari/6533.18.5"), # noqa
- ("safari",
- "s",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10"), # noqa
-]
-
-
-def get_by_shortcut(s):
- """
- Retrieve a user agent entry by shortcut.
- """
- for i in UASTRINGS:
- if s == i[1]:
- return i