diff options
Diffstat (limited to 'netlib/http')
| -rw-r--r-- | netlib/http/__init__.py | 14 | ||||
| -rw-r--r-- | netlib/http/authentication.py | 167 | ||||
| -rw-r--r-- | netlib/http/cookies.py | 193 | ||||
| -rw-r--r-- | netlib/http/headers.py | 204 | ||||
| -rw-r--r-- | netlib/http/http1/__init__.py | 25 | ||||
| -rw-r--r-- | netlib/http/http1/assemble.py | 104 | ||||
| -rw-r--r-- | netlib/http/http1/read.py | 362 | ||||
| -rw-r--r-- | netlib/http/http2/__init__.py | 6 | ||||
| -rw-r--r-- | netlib/http/http2/connections.py | 426 | ||||
| -rw-r--r-- | netlib/http/message.py | 222 | ||||
| -rw-r--r-- | netlib/http/request.py | 353 | ||||
| -rw-r--r-- | netlib/http/response.py | 116 | ||||
| -rw-r--r-- | netlib/http/status_codes.py | 106 | ||||
| -rw-r--r-- | netlib/http/user_agents.py | 52 |
14 files changed, 0 insertions, 2350 deletions
diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py deleted file mode 100644 index fd632cd5..00000000 --- a/netlib/http/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from __future__ import absolute_import, print_function, division -from .request import Request -from .response import Response -from .headers import Headers -from .message import decoded, CONTENT_MISSING -from . import http1, http2 - -__all__ = [ - "Request", - "Response", - "Headers", - "decoded", "CONTENT_MISSING", - "http1", "http2", -] diff --git a/netlib/http/authentication.py b/netlib/http/authentication.py deleted file mode 100644 index d769abe5..00000000 --- a/netlib/http/authentication.py +++ /dev/null @@ -1,167 +0,0 @@ -from __future__ import (absolute_import, print_function, division) -from argparse import Action, ArgumentTypeError -import binascii - - -def parse_http_basic_auth(s): - words = s.split() - if len(words) != 2: - return None - scheme = words[0] - try: - user = binascii.a2b_base64(words[1]).decode("utf8", "replace") - except binascii.Error: - return None - parts = user.split(':') - if len(parts) != 2: - return None - return scheme, parts[0], parts[1] - - -def assemble_http_basic_auth(scheme, username, password): - v = binascii.b2a_base64((username + ":" + password).encode("utf8")).decode("ascii") - return scheme + " " + v - - -class NullProxyAuth(object): - - """ - No proxy auth at all (returns empty challange headers) - """ - - def __init__(self, password_manager): - self.password_manager = password_manager - - def clean(self, headers_): - """ - Clean up authentication headers, so they're not passed upstream. - """ - - def authenticate(self, headers_): - """ - Tests that the user is allowed to use the proxy - """ - return True - - def auth_challenge_headers(self): - """ - Returns a dictionary containing the headers require to challenge the user - """ - return {} - - -class BasicProxyAuth(NullProxyAuth): - CHALLENGE_HEADER = 'Proxy-Authenticate' - AUTH_HEADER = 'Proxy-Authorization' - - def __init__(self, password_manager, realm): - NullProxyAuth.__init__(self, password_manager) - self.realm = realm - - def clean(self, headers): - del headers[self.AUTH_HEADER] - - def authenticate(self, headers): - auth_value = headers.get(self.AUTH_HEADER) - if not auth_value: - return False - parts = parse_http_basic_auth(auth_value) - if not parts: - return False - scheme, username, password = parts - if scheme.lower() != 'basic': - return False - if not self.password_manager.test(username, password): - return False - self.username = username - return True - - def auth_challenge_headers(self): - return {self.CHALLENGE_HEADER: 'Basic realm="%s"' % self.realm} - - -class PassMan(object): - - def test(self, username_, password_token_): - return False - - -class PassManNonAnon(PassMan): - - """ - Ensure the user specifies a username, accept any password. - """ - - def test(self, username, password_token_): - if username: - return True - return False - - -class PassManHtpasswd(PassMan): - - """ - Read usernames and passwords from an htpasswd file - """ - - def __init__(self, path): - """ - Raises ValueError if htpasswd file is invalid. - """ - import passlib.apache - self.htpasswd = passlib.apache.HtpasswdFile(path) - - def test(self, username, password_token): - return bool(self.htpasswd.check_password(username, password_token)) - - -class PassManSingleUser(PassMan): - - def __init__(self, username, password): - self.username, self.password = username, password - - def test(self, username, password_token): - return self.username == username and self.password == password_token - - -class AuthAction(Action): - - """ - Helper class to allow seamless integration int argparse. Example usage: - parser.add_argument( - "--nonanonymous", - action=NonanonymousAuthAction, nargs=0, - help="Allow access to any user long as a credentials are specified." - ) - """ - - def __call__(self, parser, namespace, values, option_string=None): - passman = self.getPasswordManager(values) - authenticator = BasicProxyAuth(passman, "mitmproxy") - setattr(namespace, self.dest, authenticator) - - def getPasswordManager(self, s): # pragma: nocover - raise NotImplementedError() - - -class SingleuserAuthAction(AuthAction): - - def getPasswordManager(self, s): - if len(s.split(':')) != 2: - raise ArgumentTypeError( - "Invalid single-user specification. Please use the format username:password" - ) - username, password = s.split(':') - return PassManSingleUser(username, password) - - -class NonanonymousAuthAction(AuthAction): - - def getPasswordManager(self, s): - return PassManNonAnon() - - -class HtpasswdAuthAction(AuthAction): - - def getPasswordManager(self, s): - return PassManHtpasswd(s) diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py deleted file mode 100644 index 18544b5e..00000000 --- a/netlib/http/cookies.py +++ /dev/null @@ -1,193 +0,0 @@ -import re - -from .. import odict - -""" -A flexible module for cookie parsing and manipulation. - -This module differs from usual standards-compliant cookie modules in a number -of ways. We try to be as permissive as possible, and to retain even mal-formed -information. Duplicate cookies are preserved in parsing, and can be set in -formatting. We do attempt to escape and quote values where needed, but will not -reject data that violate the specs. - -Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do -not parse the comma-separated variant of Set-Cookie that allows multiple -cookies to be set in a single header. Technically this should be feasible, but -it turns out that violations of RFC6265 that makes the parsing problem -indeterminate are much more common than genuine occurences of the multi-cookie -variants. Serialization follows RFC6265. - - http://tools.ietf.org/html/rfc6265 - http://tools.ietf.org/html/rfc2109 - http://tools.ietf.org/html/rfc2965 -""" - -# TODO: Disallow LHS-only Cookie values - - -def _read_until(s, start, term): - """ - Read until one of the characters in term is reached. - """ - if start == len(s): - return "", start + 1 - for i in range(start, len(s)): - if s[i] in term: - return s[start:i], i - return s[start:i + 1], i + 1 - - -def _read_token(s, start): - """ - Read a token - the LHS of a token/value pair in a cookie. - """ - return _read_until(s, start, ";=") - - -def _read_quoted_string(s, start): - """ - start: offset to the first quote of the string to be read - - A sort of loose super-set of the various quoted string specifications. - - RFC6265 disallows backslashes or double quotes within quoted strings. - Prior RFCs use backslashes to escape. This leaves us free to apply - backslash escaping by default and be compatible with everything. - """ - escaping = False - ret = [] - # Skip the first quote - i = start # initialize in case the loop doesn't run. - for i in range(start + 1, len(s)): - if escaping: - ret.append(s[i]) - escaping = False - elif s[i] == '"': - break - elif s[i] == "\\": - escaping = True - else: - ret.append(s[i]) - return "".join(ret), i + 1 - - -def _read_value(s, start, delims): - """ - Reads a value - the RHS of a token/value pair in a cookie. - - special: If the value is special, commas are premitted. Else comma - terminates. This helps us support old and new style values. - """ - if start >= len(s): - return "", start - elif s[start] == '"': - return _read_quoted_string(s, start) - else: - return _read_until(s, start, delims) - - -def _read_pairs(s, off=0): - """ - Read pairs of lhs=rhs values. - - off: start offset - specials: a lower-cased list of keys that may contain commas - """ - vals = [] - while True: - lhs, off = _read_token(s, off) - lhs = lhs.lstrip() - if lhs: - rhs = None - if off < len(s): - if s[off] == "=": - rhs, off = _read_value(s, off + 1, ";") - vals.append([lhs, rhs]) - off += 1 - if not off < len(s): - break - return vals, off - - -def _has_special(s): - for i in s: - if i in '",;\\': - return True - o = ord(i) - if o < 0x21 or o > 0x7e: - return True - return False - - -ESCAPE = re.compile(r"([\"\\])") - - -def _format_pairs(lst, specials=(), sep="; "): - """ - specials: A lower-cased list of keys that will not be quoted. - """ - vals = [] - for k, v in lst: - if v is None: - vals.append(k) - else: - if k.lower() not in specials and _has_special(v): - v = ESCAPE.sub(r"\\\1", v) - v = '"%s"' % v - vals.append("%s=%s" % (k, v)) - return sep.join(vals) - - -def _format_set_cookie_pairs(lst): - return _format_pairs( - lst, - specials=("expires", "path") - ) - - -def _parse_set_cookie_pairs(s): - """ - For Set-Cookie, we support multiple cookies as described in RFC2109. - This function therefore returns a list of lists. - """ - pairs, off_ = _read_pairs(s) - return pairs - - -def parse_set_cookie_header(line): - """ - Parse a Set-Cookie header value - - Returns a (name, value, attrs) tuple, or None, where attrs is an - ODictCaseless set of attributes. No attempt is made to parse attribute - values - they are treated purely as strings. - """ - pairs = _parse_set_cookie_pairs(line) - if pairs: - return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:]) - - -def format_set_cookie_header(name, value, attrs): - """ - Formats a Set-Cookie header value. - """ - pairs = [[name, value]] - pairs.extend(attrs.lst) - return _format_set_cookie_pairs(pairs) - - -def parse_cookie_header(line): - """ - Parse a Cookie header value. - Returns a (possibly empty) ODict object. - """ - pairs, off_ = _read_pairs(line) - return odict.ODict(pairs) - - -def format_cookie_header(od): - """ - Formats a Cookie header value. - """ - return _format_pairs(od.lst) diff --git a/netlib/http/headers.py b/netlib/http/headers.py deleted file mode 100644 index 78404796..00000000 --- a/netlib/http/headers.py +++ /dev/null @@ -1,204 +0,0 @@ -""" - -Unicode Handling ----------------- -See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/ -""" -from __future__ import absolute_import, print_function, division -import copy -try: - from collections.abc import MutableMapping -except ImportError: # pragma: nocover - from collections import MutableMapping # Workaround for Python < 3.3 - - -import six - -from netlib.utils import always_byte_args, always_bytes, Serializable - -if six.PY2: # pragma: nocover - _native = lambda x: x - _always_bytes = lambda x: x - _always_byte_args = lambda x: x -else: - # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. - _native = lambda x: x.decode("utf-8", "surrogateescape") - _always_bytes = lambda x: always_bytes(x, "utf-8", "surrogateescape") - _always_byte_args = always_byte_args("utf-8", "surrogateescape") - - -class Headers(MutableMapping, Serializable): - """ - Header class which allows both convenient access to individual headers as well as - direct access to the underlying raw data. Provides a full dictionary interface. - - Example: - - .. code-block:: python - - # Create headers with keyword arguments - >>> h = Headers(host="example.com", content_type="application/xml") - - # Headers mostly behave like a normal dict. - >>> h["Host"] - "example.com" - - # HTTP Headers are case insensitive - >>> h["host"] - "example.com" - - # Headers can also be creatd from a list of raw (header_name, header_value) byte tuples - >>> h = Headers([ - [b"Host",b"example.com"], - [b"Accept",b"text/html"], - [b"accept",b"application/xml"] - ]) - - # Multiple headers are folded into a single header as per RFC7230 - >>> h["Accept"] - "text/html, application/xml" - - # Setting a header removes all existing headers with the same name. - >>> h["Accept"] = "application/text" - >>> h["Accept"] - "application/text" - - # bytes(h) returns a HTTP1 header block. - >>> print(bytes(h)) - Host: example.com - Accept: application/text - - # For full control, the raw header fields can be accessed - >>> h.fields - - Caveats: - For use with the "Set-Cookie" header, see :py:meth:`get_all`. - """ - - @_always_byte_args - def __init__(self, fields=None, **headers): - """ - Args: - fields: (optional) list of ``(name, value)`` header byte tuples, - e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes. - **headers: Additional headers to set. Will overwrite existing values from `fields`. - For convenience, underscores in header names will be transformed to dashes - - this behaviour does not extend to other methods. - If ``**headers`` contains multiple keys that have equal ``.lower()`` s, - the behavior is undefined. - """ - self.fields = fields or [] - - for name, value in self.fields: - if not isinstance(name, bytes) or not isinstance(value, bytes): - raise ValueError("Headers passed as fields must be bytes.") - - # content_type -> content-type - headers = { - _always_bytes(name).replace(b"_", b"-"): value - for name, value in six.iteritems(headers) - } - self.update(headers) - - def __bytes__(self): - if self.fields: - return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n" - else: - return b"" - - if six.PY2: # pragma: nocover - __str__ = __bytes__ - - @_always_byte_args - def __getitem__(self, name): - values = self.get_all(name) - if not values: - raise KeyError(name) - return ", ".join(values) - - @_always_byte_args - def __setitem__(self, name, value): - idx = self._index(name) - - # To please the human eye, we insert at the same position the first existing header occured. - if idx is not None: - del self[name] - self.fields.insert(idx, [name, value]) - else: - self.fields.append([name, value]) - - @_always_byte_args - def __delitem__(self, name): - if name not in self: - raise KeyError(name) - name = name.lower() - self.fields = [ - field for field in self.fields - if name != field[0].lower() - ] - - def __iter__(self): - seen = set() - for name, _ in self.fields: - name_lower = name.lower() - if name_lower not in seen: - seen.add(name_lower) - yield _native(name) - - def __len__(self): - return len(set(name.lower() for name, _ in self.fields)) - - # __hash__ = object.__hash__ - - def _index(self, name): - name = name.lower() - for i, field in enumerate(self.fields): - if field[0].lower() == name: - return i - return None - - def __eq__(self, other): - if isinstance(other, Headers): - return self.fields == other.fields - return False - - def __ne__(self, other): - return not self.__eq__(other) - - @_always_byte_args - def get_all(self, name): - """ - Like :py:meth:`get`, but does not fold multiple headers into a single one. - This is useful for Set-Cookie headers, which do not support folding. - - See also: https://tools.ietf.org/html/rfc7230#section-3.2.2 - """ - name_lower = name.lower() - values = [_native(value) for n, value in self.fields if n.lower() == name_lower] - return values - - @_always_byte_args - def set_all(self, name, values): - """ - Explicitly set multiple headers for the given key. - See: :py:meth:`get_all` - """ - values = map(_always_bytes, values) # _always_byte_args does not fix lists - if name in self: - del self[name] - self.fields.extend( - [name, value] for value in values - ) - - def copy(self): - return Headers(copy.copy(self.fields)) - - def get_state(self): - return tuple(tuple(field) for field in self.fields) - - def set_state(self, state): - self.fields = [list(field) for field in state] - - @classmethod - def from_state(cls, state): - return cls([list(field) for field in state])
\ No newline at end of file diff --git a/netlib/http/http1/__init__.py b/netlib/http/http1/__init__.py deleted file mode 100644 index 2aa7e26a..00000000 --- a/netlib/http/http1/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import absolute_import, print_function, division -from .read import ( - read_request, read_request_head, - read_response, read_response_head, - read_body, - connection_close, - expected_http_body_size, -) -from .assemble import ( - assemble_request, assemble_request_head, - assemble_response, assemble_response_head, - assemble_body, -) - - -__all__ = [ - "read_request", "read_request_head", - "read_response", "read_response_head", - "read_body", - "connection_close", - "expected_http_body_size", - "assemble_request", "assemble_request_head", - "assemble_response", "assemble_response_head", - "assemble_body", -] diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py deleted file mode 100644 index 785ee8d3..00000000 --- a/netlib/http/http1/assemble.py +++ /dev/null @@ -1,104 +0,0 @@ -from __future__ import absolute_import, print_function, division - -from ... import utils -import itertools -from ...exceptions import HttpException -from .. import CONTENT_MISSING - - -def assemble_request(request): - if request.content == CONTENT_MISSING: - raise HttpException("Cannot assemble flow with CONTENT_MISSING") - head = assemble_request_head(request) - body = b"".join(assemble_body(request.data.headers, [request.data.content])) - return head + body - - -def assemble_request_head(request): - first_line = _assemble_request_line(request.data) - headers = _assemble_request_headers(request.data) - return b"%s\r\n%s\r\n" % (first_line, headers) - - -def assemble_response(response): - if response.content == CONTENT_MISSING: - raise HttpException("Cannot assemble flow with CONTENT_MISSING") - head = assemble_response_head(response) - body = b"".join(assemble_body(response.data.headers, [response.data.content])) - return head + body - - -def assemble_response_head(response): - first_line = _assemble_response_line(response.data) - headers = _assemble_response_headers(response.data) - return b"%s\r\n%s\r\n" % (first_line, headers) - - -def assemble_body(headers, body_chunks): - if "chunked" in headers.get("transfer-encoding", "").lower(): - for chunk in body_chunks: - if chunk: - yield b"%x\r\n%s\r\n" % (len(chunk), chunk) - yield b"0\r\n\r\n" - else: - for chunk in body_chunks: - yield chunk - - -def _assemble_request_line(request_data): - """ - Args: - request_data (netlib.http.request.RequestData) - """ - form = request_data.first_line_format - if form == "relative": - return b"%s %s %s" % ( - request_data.method, - request_data.path, - request_data.http_version - ) - elif form == "authority": - return b"%s %s:%d %s" % ( - request_data.method, - request_data.host, - request_data.port, - request_data.http_version - ) - elif form == "absolute": - return b"%s %s://%s:%d%s %s" % ( - request_data.method, - request_data.scheme, - request_data.host, - request_data.port, - request_data.path, - request_data.http_version - ) - else: - raise RuntimeError("Invalid request form") - - -def _assemble_request_headers(request_data): - """ - Args: - request_data (netlib.http.request.RequestData) - """ - headers = request_data.headers.copy() - if "host" not in headers and request_data.scheme and request_data.host and request_data.port: - headers["host"] = utils.hostport( - request_data.scheme, - request_data.host, - request_data.port - ) - return bytes(headers) - - -def _assemble_response_line(response_data): - return b"%s %d %s" % ( - response_data.http_version, - response_data.status_code, - response_data.reason, - ) - - -def _assemble_response_headers(response): - return bytes(response.headers) diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py deleted file mode 100644 index 6e3a1b93..00000000 --- a/netlib/http/http1/read.py +++ /dev/null @@ -1,362 +0,0 @@ -from __future__ import absolute_import, print_function, division -import time -import sys -import re - -from ... import utils -from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect -from .. import Request, Response, Headers - - -def read_request(rfile, body_size_limit=None): - request = read_request_head(rfile) - expected_body_size = expected_http_body_size(request) - request.data.content = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit)) - request.timestamp_end = time.time() - return request - - -def read_request_head(rfile): - """ - Parse an HTTP request head (request line + headers) from an input stream - - Args: - rfile: The input stream - - Returns: - The HTTP request object (without body) - - Raises: - HttpReadDisconnect: No bytes can be read from rfile. - HttpSyntaxException: The input is malformed HTTP. - HttpException: Any other error occured. - """ - timestamp_start = time.time() - if hasattr(rfile, "reset_timestamps"): - rfile.reset_timestamps() - - form, method, scheme, host, port, path, http_version = _read_request_line(rfile) - headers = _read_headers(rfile) - - if hasattr(rfile, "first_byte_timestamp"): - # more accurate timestamp_start - timestamp_start = rfile.first_byte_timestamp - - return Request( - form, method, scheme, host, port, path, http_version, headers, None, timestamp_start - ) - - -def read_response(rfile, request, body_size_limit=None): - response = read_response_head(rfile) - expected_body_size = expected_http_body_size(request, response) - response.data.content = b"".join(read_body(rfile, expected_body_size, body_size_limit)) - response.timestamp_end = time.time() - return response - - -def read_response_head(rfile): - """ - Parse an HTTP response head (response line + headers) from an input stream - - Args: - rfile: The input stream - - Returns: - The HTTP request object (without body) - - Raises: - HttpReadDisconnect: No bytes can be read from rfile. - HttpSyntaxException: The input is malformed HTTP. - HttpException: Any other error occured. - """ - - timestamp_start = time.time() - if hasattr(rfile, "reset_timestamps"): - rfile.reset_timestamps() - - http_version, status_code, message = _read_response_line(rfile) - headers = _read_headers(rfile) - - if hasattr(rfile, "first_byte_timestamp"): - # more accurate timestamp_start - timestamp_start = rfile.first_byte_timestamp - - return Response(http_version, status_code, message, headers, None, timestamp_start) - - -def read_body(rfile, expected_size, limit=None, max_chunk_size=4096): - """ - Read an HTTP message body - - Args: - rfile: The input stream - expected_size: The expected body size (see :py:meth:`expected_body_size`) - limit: Maximum body size - max_chunk_size: Maximium chunk size that gets yielded - - Returns: - A generator that yields byte chunks of the content. - - Raises: - HttpException, if an error occurs - - Caveats: - max_chunk_size is not considered if the transfer encoding is chunked. - """ - if not limit or limit < 0: - limit = sys.maxsize - if not max_chunk_size: - max_chunk_size = limit - - if expected_size is None: - for x in _read_chunked(rfile, limit): - yield x - elif expected_size >= 0: - if limit is not None and expected_size > limit: - raise HttpException( - "HTTP Body too large. " - "Limit is {}, content length was advertised as {}".format(limit, expected_size) - ) - bytes_left = expected_size - while bytes_left: - chunk_size = min(bytes_left, max_chunk_size) - content = rfile.read(chunk_size) - if len(content) < chunk_size: - raise HttpException("Unexpected EOF") - yield content - bytes_left -= chunk_size - else: - bytes_left = limit - while bytes_left: - chunk_size = min(bytes_left, max_chunk_size) - content = rfile.read(chunk_size) - if not content: - return - yield content - bytes_left -= chunk_size - not_done = rfile.read(1) - if not_done: - raise HttpException("HTTP body too large. Limit is {}.".format(limit)) - - -def connection_close(http_version, headers): - """ - Checks the message to see if the client connection should be closed - according to RFC 2616 Section 8.1. - """ - # At first, check if we have an explicit Connection header. - if "connection" in headers: - tokens = utils.get_header_tokens(headers, "connection") - if "close" in tokens: - return True - elif "keep-alive" in tokens: - return False - - # If we don't have a Connection header, HTTP 1.1 connections are assumed to - # be persistent - return http_version != "HTTP/1.1" and http_version != b"HTTP/1.1" # FIXME: Remove one case. - - -def expected_http_body_size(request, response=None): - """ - Returns: - The expected body length: - - a positive integer, if the size is known in advance - - None, if the size in unknown in advance (chunked encoding) - - -1, if all data should be read until end of stream. - - Raises: - HttpSyntaxException, if the content length header is invalid - """ - # Determine response size according to - # http://tools.ietf.org/html/rfc7230#section-3.3 - if not response: - headers = request.headers - response_code = None - is_request = True - else: - headers = response.headers - response_code = response.status_code - is_request = False - - if is_request: - if headers.get("expect", "").lower() == "100-continue": - return 0 - else: - if request.method.upper() == "HEAD": - return 0 - if 100 <= response_code <= 199: - return 0 - if response_code == 200 and request.method.upper() == "CONNECT": - return 0 - if response_code in (204, 304): - return 0 - - if "chunked" in headers.get("transfer-encoding", "").lower(): - return None - if "content-length" in headers: - try: - size = int(headers["content-length"]) - if size < 0: - raise ValueError() - return size - except ValueError: - raise HttpSyntaxException("Unparseable Content Length") - if is_request: - return 0 - return -1 - - -def _get_first_line(rfile): - try: - line = rfile.readline() - if line == b"\r\n" or line == b"\n": - # Possible leftover from previous message - line = rfile.readline() - except TcpDisconnect: - raise HttpReadDisconnect("Remote disconnected") - if not line: - raise HttpReadDisconnect("Remote disconnected") - return line.strip() - - -def _read_request_line(rfile): - try: - line = _get_first_line(rfile) - except HttpReadDisconnect: - # We want to provide a better error message. - raise HttpReadDisconnect("Client disconnected") - - try: - method, path, http_version = line.split(b" ") - - if path == b"*" or path.startswith(b"/"): - form = "relative" - scheme, host, port = None, None, None - elif method == b"CONNECT": - form = "authority" - host, port = _parse_authority_form(path) - scheme, path = None, None - else: - form = "absolute" - scheme, host, port, path = utils.parse_url(path) - - _check_http_version(http_version) - except ValueError: - raise HttpSyntaxException("Bad HTTP request line: {}".format(line)) - - return form, method, scheme, host, port, path, http_version - - -def _parse_authority_form(hostport): - """ - Returns (host, port) if hostport is a valid authority-form host specification. - http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1 - - Raises: - ValueError, if the input is malformed - """ - try: - host, port = hostport.split(b":") - port = int(port) - if not utils.is_valid_host(host) or not utils.is_valid_port(port): - raise ValueError() - except ValueError: - raise HttpSyntaxException("Invalid host specification: {}".format(hostport)) - - return host, port - - -def _read_response_line(rfile): - try: - line = _get_first_line(rfile) - except HttpReadDisconnect: - # We want to provide a better error message. - raise HttpReadDisconnect("Server disconnected") - - try: - - parts = line.split(b" ", 2) - if len(parts) == 2: # handle missing message gracefully - parts.append(b"") - - http_version, status_code, message = parts - status_code = int(status_code) - _check_http_version(http_version) - - except ValueError: - raise HttpSyntaxException("Bad HTTP response line: {}".format(line)) - - return http_version, status_code, message - - -def _check_http_version(http_version): - if not re.match(br"^HTTP/\d\.\d$", http_version): - raise HttpSyntaxException("Unknown HTTP version: {}".format(http_version)) - - -def _read_headers(rfile): - """ - Read a set of headers. - Stop once a blank line is reached. - - Returns: - A headers object - - Raises: - HttpSyntaxException - """ - ret = [] - while True: - line = rfile.readline() - if not line or line == b"\r\n" or line == b"\n": - break - if line[0] in b" \t": - if not ret: - raise HttpSyntaxException("Invalid headers") - # continued header - ret[-1][1] = ret[-1][1] + b'\r\n ' + line.strip() - else: - try: - name, value = line.split(b":", 1) - value = value.strip() - if not name: - raise ValueError() - ret.append([name, value]) - except ValueError: - raise HttpSyntaxException("Invalid headers") - return Headers(ret) - - -def _read_chunked(rfile, limit=sys.maxsize): - """ - Read a HTTP body with chunked transfer encoding. - - Args: - rfile: the input file - limit: A positive integer - """ - total = 0 - while True: - line = rfile.readline(128) - if line == b"": - raise HttpException("Connection closed prematurely") - if line != b"\r\n" and line != b"\n": - try: - length = int(line, 16) - except ValueError: - raise HttpSyntaxException("Invalid chunked encoding length: {}".format(line)) - total += length - if total > limit: - raise HttpException( - "HTTP Body too large. Limit is {}, " - "chunked content longer than {}".format(limit, total) - ) - chunk = rfile.read(length) - suffix = rfile.readline(5) - if suffix != b"\r\n": - raise HttpSyntaxException("Malformed chunked body") - if length == 0: - return - yield chunk diff --git a/netlib/http/http2/__init__.py b/netlib/http/http2/__init__.py deleted file mode 100644 index 7043d36f..00000000 --- a/netlib/http/http2/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from __future__ import absolute_import, print_function, division -from .connections import HTTP2Protocol - -__all__ = [ - "HTTP2Protocol" -] diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py deleted file mode 100644 index 52fa7193..00000000 --- a/netlib/http/http2/connections.py +++ /dev/null @@ -1,426 +0,0 @@ -from __future__ import (absolute_import, print_function, division) -import itertools -import time - -from hpack.hpack import Encoder, Decoder -from ... import utils -from .. import Headers, Response, Request - -from hyperframe import frame - - -class TCPHandler(object): - - def __init__(self, rfile, wfile=None): - self.rfile = rfile - self.wfile = wfile - - -class HTTP2Protocol(object): - - ERROR_CODES = utils.BiDi( - NO_ERROR=0x0, - PROTOCOL_ERROR=0x1, - INTERNAL_ERROR=0x2, - FLOW_CONTROL_ERROR=0x3, - SETTINGS_TIMEOUT=0x4, - STREAM_CLOSED=0x5, - FRAME_SIZE_ERROR=0x6, - REFUSED_STREAM=0x7, - CANCEL=0x8, - COMPRESSION_ERROR=0x9, - CONNECT_ERROR=0xa, - ENHANCE_YOUR_CALM=0xb, - INADEQUATE_SECURITY=0xc, - HTTP_1_1_REQUIRED=0xd - ) - - CLIENT_CONNECTION_PREFACE = b'PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n' - - HTTP2_DEFAULT_SETTINGS = { - frame.SettingsFrame.HEADER_TABLE_SIZE: 4096, - frame.SettingsFrame.ENABLE_PUSH: 1, - frame.SettingsFrame.MAX_CONCURRENT_STREAMS: None, - frame.SettingsFrame.INITIAL_WINDOW_SIZE: 2 ** 16 - 1, - frame.SettingsFrame.MAX_FRAME_SIZE: 2 ** 14, - frame.SettingsFrame.MAX_HEADER_LIST_SIZE: None, - } - - def __init__( - self, - tcp_handler=None, - rfile=None, - wfile=None, - is_server=False, - dump_frames=False, - encoder=None, - decoder=None, - unhandled_frame_cb=None, - ): - self.tcp_handler = tcp_handler or TCPHandler(rfile, wfile) - self.is_server = is_server - self.dump_frames = dump_frames - self.encoder = encoder or Encoder() - self.decoder = decoder or Decoder() - self.unhandled_frame_cb = unhandled_frame_cb - - self.http2_settings = self.HTTP2_DEFAULT_SETTINGS.copy() - self.current_stream_id = None - self.connection_preface_performed = False - - def read_request( - self, - __rfile, - include_body=True, - body_size_limit=None, - allow_empty=False, - ): - if body_size_limit is not None: - raise NotImplementedError() - - self.perform_connection_preface() - - timestamp_start = time.time() - if hasattr(self.tcp_handler.rfile, "reset_timestamps"): - self.tcp_handler.rfile.reset_timestamps() - - stream_id, headers, body = self._receive_transmission( - include_body=include_body, - ) - - if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"): - # more accurate timestamp_start - timestamp_start = self.tcp_handler.rfile.first_byte_timestamp - - timestamp_end = time.time() - - authority = headers.get(':authority', b'') - method = headers.get(':method', 'GET') - scheme = headers.get(':scheme', 'https') - path = headers.get(':path', '/') - host = None - port = None - - if path == '*' or path.startswith("/"): - form_in = "relative" - elif method == 'CONNECT': - form_in = "authority" - if ":" in authority: - host, port = authority.split(":", 1) - else: - host = authority - else: - form_in = "absolute" - # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = utils.parse_url(path) - scheme = scheme.decode('ascii') - host = host.decode('ascii') - - if host is None: - host = 'localhost' - if port is None: - port = 80 if scheme == 'http' else 443 - port = int(port) - - request = Request( - form_in, - method.encode('ascii'), - scheme.encode('ascii'), - host.encode('ascii'), - port, - path.encode('ascii'), - b"HTTP/2.0", - headers, - body, - timestamp_start, - timestamp_end, - ) - request.stream_id = stream_id - - return request - - def read_response( - self, - __rfile, - request_method=b'', - body_size_limit=None, - include_body=True, - stream_id=None, - ): - if body_size_limit is not None: - raise NotImplementedError() - - self.perform_connection_preface() - - timestamp_start = time.time() - if hasattr(self.tcp_handler.rfile, "reset_timestamps"): - self.tcp_handler.rfile.reset_timestamps() - - stream_id, headers, body = self._receive_transmission( - stream_id=stream_id, - include_body=include_body, - ) - - if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"): - # more accurate timestamp_start - timestamp_start = self.tcp_handler.rfile.first_byte_timestamp - - if include_body: - timestamp_end = time.time() - else: - timestamp_end = None - - response = Response( - b"HTTP/2.0", - int(headers.get(':status', 502)), - b'', - headers, - body, - timestamp_start=timestamp_start, - timestamp_end=timestamp_end, - ) - response.stream_id = stream_id - - return response - - def assemble(self, message): - if isinstance(message, Request): - return self.assemble_request(message) - elif isinstance(message, Response): - return self.assemble_response(message) - else: - raise ValueError("HTTP message not supported.") - - def assemble_request(self, request): - assert isinstance(request, Request) - - authority = self.tcp_handler.sni if self.tcp_handler.sni else self.tcp_handler.address.host - if self.tcp_handler.address.port != 443: - authority += ":%d" % self.tcp_handler.address.port - - headers = request.headers.copy() - - if ':authority' not in headers: - headers.fields.insert(0, (b':authority', authority.encode('ascii'))) - if ':scheme' not in headers: - headers.fields.insert(0, (b':scheme', request.scheme.encode('ascii'))) - if ':path' not in headers: - headers.fields.insert(0, (b':path', request.path.encode('ascii'))) - if ':method' not in headers: - headers.fields.insert(0, (b':method', request.method.encode('ascii'))) - - if hasattr(request, 'stream_id'): - stream_id = request.stream_id - else: - stream_id = self._next_stream_id() - - return list(itertools.chain( - self._create_headers(headers, stream_id, end_stream=(request.body is None or len(request.body) == 0)), - self._create_body(request.body, stream_id))) - - def assemble_response(self, response): - assert isinstance(response, Response) - - headers = response.headers.copy() - - if ':status' not in headers: - headers.fields.insert(0, (b':status', str(response.status_code).encode('ascii'))) - - if hasattr(response, 'stream_id'): - stream_id = response.stream_id - else: - stream_id = self._next_stream_id() - - return list(itertools.chain( - self._create_headers(headers, stream_id, end_stream=(response.body is None or len(response.body) == 0)), - self._create_body(response.body, stream_id), - )) - - def perform_connection_preface(self, force=False): - if force or not self.connection_preface_performed: - if self.is_server: - self.perform_server_connection_preface(force) - else: - self.perform_client_connection_preface(force) - - def perform_server_connection_preface(self, force=False): - if force or not self.connection_preface_performed: - self.connection_preface_performed = True - - magic_length = len(self.CLIENT_CONNECTION_PREFACE) - magic = self.tcp_handler.rfile.safe_read(magic_length) - assert magic == self.CLIENT_CONNECTION_PREFACE - - frm = frame.SettingsFrame(settings={ - frame.SettingsFrame.ENABLE_PUSH: 0, - frame.SettingsFrame.MAX_CONCURRENT_STREAMS: 1, - }) - self.send_frame(frm, hide=True) - self._receive_settings(hide=True) - - def perform_client_connection_preface(self, force=False): - if force or not self.connection_preface_performed: - self.connection_preface_performed = True - - self.tcp_handler.wfile.write(self.CLIENT_CONNECTION_PREFACE) - - self.send_frame(frame.SettingsFrame(), hide=True) - self._receive_settings(hide=True) # server announces own settings - self._receive_settings(hide=True) # server acks my settings - - def send_frame(self, frm, hide=False): - raw_bytes = frm.serialize() - self.tcp_handler.wfile.write(raw_bytes) - self.tcp_handler.wfile.flush() - if not hide and self.dump_frames: # pragma no cover - print(frm.human_readable(">>")) - - def read_frame(self, hide=False): - while True: - frm = utils.http2_read_frame(self.tcp_handler.rfile) - if not hide and self.dump_frames: # pragma no cover - print(frm.human_readable("<<")) - - if isinstance(frm, frame.PingFrame): - raw_bytes = frame.PingFrame(flags=['ACK'], payload=frm.payload).serialize() - self.tcp_handler.wfile.write(raw_bytes) - self.tcp_handler.wfile.flush() - continue - if isinstance(frm, frame.SettingsFrame) and 'ACK' not in frm.flags: - self._apply_settings(frm.settings, hide) - if isinstance(frm, frame.DataFrame) and frm.flow_controlled_length > 0: - self._update_flow_control_window(frm.stream_id, frm.flow_controlled_length) - return frm - - def check_alpn(self): - alp = self.tcp_handler.get_alpn_proto_negotiated() - if alp != b'h2': - raise NotImplementedError( - "HTTP2Protocol can not handle unknown ALP: %s" % alp) - return True - - def _handle_unexpected_frame(self, frm): - if isinstance(frm, frame.SettingsFrame): - return - if self.unhandled_frame_cb: - self.unhandled_frame_cb(frm) - - def _receive_settings(self, hide=False): - while True: - frm = self.read_frame(hide) - if isinstance(frm, frame.SettingsFrame): - break - else: - self._handle_unexpected_frame(frm) - - def _next_stream_id(self): - if self.current_stream_id is None: - if self.is_server: - # servers must use even stream ids - self.current_stream_id = 2 - else: - # clients must use odd stream ids - self.current_stream_id = 1 - else: - self.current_stream_id += 2 - return self.current_stream_id - - def _apply_settings(self, settings, hide=False): - for setting, value in settings.items(): - old_value = self.http2_settings[setting] - if not old_value: - old_value = '-' - self.http2_settings[setting] = value - - frm = frame.SettingsFrame(flags=['ACK']) - self.send_frame(frm, hide) - - def _update_flow_control_window(self, stream_id, increment): - frm = frame.WindowUpdateFrame(stream_id=0, window_increment=increment) - self.send_frame(frm) - frm = frame.WindowUpdateFrame(stream_id=stream_id, window_increment=increment) - self.send_frame(frm) - - def _create_headers(self, headers, stream_id, end_stream=True): - def frame_cls(chunks): - for i in chunks: - if i == 0: - yield frame.HeadersFrame, i - else: - yield frame.ContinuationFrame, i - - header_block_fragment = self.encoder.encode(headers.fields) - - chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE] - chunks = range(0, len(header_block_fragment), chunk_size) - frms = [frm_cls( - flags=[], - stream_id=stream_id, - data=header_block_fragment[i:i+chunk_size]) for frm_cls, i in frame_cls(chunks)] - - frms[-1].flags.add('END_HEADERS') - if end_stream: - frms[0].flags.add('END_STREAM') - - if self.dump_frames: # pragma no cover - for frm in frms: - print(frm.human_readable(">>")) - - return [frm.serialize() for frm in frms] - - def _create_body(self, body, stream_id): - if body is None or len(body) == 0: - return b'' - - chunk_size = self.http2_settings[frame.SettingsFrame.MAX_FRAME_SIZE] - chunks = range(0, len(body), chunk_size) - frms = [frame.DataFrame( - flags=[], - stream_id=stream_id, - data=body[i:i+chunk_size]) for i in chunks] - frms[-1].flags.add('END_STREAM') - - if self.dump_frames: # pragma no cover - for frm in frms: - print(frm.human_readable(">>")) - - return [frm.serialize() for frm in frms] - - def _receive_transmission(self, stream_id=None, include_body=True): - if not include_body: - raise NotImplementedError() - - body_expected = True - - header_blocks = b'' - body = b'' - - while True: - frm = self.read_frame() - if ( - (isinstance(frm, frame.HeadersFrame) or isinstance(frm, frame.ContinuationFrame)) and - (stream_id is None or frm.stream_id == stream_id) - ): - stream_id = frm.stream_id - header_blocks += frm.data - if 'END_STREAM' in frm.flags: - body_expected = False - if 'END_HEADERS' in frm.flags: - break - else: - self._handle_unexpected_frame(frm) - - while body_expected: - frm = self.read_frame() - if isinstance(frm, frame.DataFrame) and frm.stream_id == stream_id: - body += frm.data - if 'END_STREAM' in frm.flags: - break - else: - self._handle_unexpected_frame(frm) - - headers = Headers( - [[k.encode('ascii'), v.encode('ascii')] for k, v in self.decoder.decode(header_blocks)] - ) - - return stream_id, headers, body diff --git a/netlib/http/message.py b/netlib/http/message.py deleted file mode 100644 index e3d8ce37..00000000 --- a/netlib/http/message.py +++ /dev/null @@ -1,222 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import warnings - -import six - -from .headers import Headers -from .. import encoding, utils - -CONTENT_MISSING = 0 - -if six.PY2: # pragma: nocover - _native = lambda x: x - _always_bytes = lambda x: x -else: - # While the HTTP head _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. - _native = lambda x: x.decode("utf-8", "surrogateescape") - _always_bytes = lambda x: utils.always_bytes(x, "utf-8", "surrogateescape") - - -class MessageData(utils.Serializable): - def __eq__(self, other): - if isinstance(other, MessageData): - return self.__dict__ == other.__dict__ - return False - - def __ne__(self, other): - return not self.__eq__(other) - - def set_state(self, state): - for k, v in state.items(): - if k == "headers": - v = Headers.from_state(v) - setattr(self, k, v) - - def get_state(self): - state = vars(self).copy() - state["headers"] = state["headers"].get_state() - return state - - @classmethod - def from_state(cls, state): - state["headers"] = Headers.from_state(state["headers"]) - return cls(**state) - - -class Message(utils.Serializable): - def __init__(self, data): - self.data = data - - def __eq__(self, other): - if isinstance(other, Message): - return self.data == other.data - return False - - def __ne__(self, other): - return not self.__eq__(other) - - def get_state(self): - return self.data.get_state() - - def set_state(self, state): - self.data.set_state(state) - - @classmethod - def from_state(cls, state): - return cls(**state) - - @property - def headers(self): - """ - Message headers object - - Returns: - netlib.http.Headers - """ - return self.data.headers - - @headers.setter - def headers(self, h): - self.data.headers = h - - @property - def content(self): - """ - The raw (encoded) HTTP message body - - See also: :py:attr:`text` - """ - return self.data.content - - @content.setter - def content(self, content): - self.data.content = content - if isinstance(content, bytes): - self.headers["content-length"] = str(len(content)) - - @property - def http_version(self): - """ - Version string, e.g. "HTTP/1.1" - """ - return _native(self.data.http_version) - - @http_version.setter - def http_version(self, http_version): - self.data.http_version = _always_bytes(http_version) - - @property - def timestamp_start(self): - """ - First byte timestamp - """ - return self.data.timestamp_start - - @timestamp_start.setter - def timestamp_start(self, timestamp_start): - self.data.timestamp_start = timestamp_start - - @property - def timestamp_end(self): - """ - Last byte timestamp - """ - return self.data.timestamp_end - - @timestamp_end.setter - def timestamp_end(self, timestamp_end): - self.data.timestamp_end = timestamp_end - - @property - def text(self): - """ - The decoded HTTP message body. - Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive. - - .. note:: - This is not implemented yet. - - See also: :py:attr:`content`, :py:class:`decoded` - """ - # This attribute should be called text, because that's what requests does. - raise NotImplementedError() - - @text.setter - def text(self, text): - raise NotImplementedError() - - def decode(self): - """ - Decodes body based on the current Content-Encoding header, then - removes the header. If there is no Content-Encoding header, no - action is taken. - - Returns: - True, if decoding succeeded. - False, otherwise. - """ - ce = self.headers.get("content-encoding") - data = encoding.decode(ce, self.content) - if data is None: - return False - self.content = data - self.headers.pop("content-encoding", None) - return True - - def encode(self, e): - """ - Encodes body with the encoding e, where e is "gzip", "deflate" or "identity". - - Returns: - True, if decoding succeeded. - False, otherwise. - """ - data = encoding.encode(e, self.content) - if data is None: - return False - self.content = data - self.headers["content-encoding"] = e - return True - - # Legacy - - @property - def body(self): # pragma: nocover - warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning) - return self.content - - @body.setter - def body(self, body): # pragma: nocover - warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning) - self.content = body - - -class decoded(object): - """ - A context manager that decodes a request or response, and then - re-encodes it with the same encoding after execution of the block. - - Example: - - .. code-block:: python - - with decoded(request): - request.content = request.content.replace("foo", "bar") - """ - - def __init__(self, message): - self.message = message - ce = message.headers.get("content-encoding") - if ce in encoding.ENCODINGS: - self.ce = ce - else: - self.ce = None - - def __enter__(self): - if self.ce: - self.message.decode() - - def __exit__(self, type, value, tb): - if self.ce: - self.message.encode(self.ce) diff --git a/netlib/http/request.py b/netlib/http/request.py deleted file mode 100644 index 0e0f88ce..00000000 --- a/netlib/http/request.py +++ /dev/null @@ -1,353 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import warnings - -import six -from six.moves import urllib - -from netlib import utils -from netlib.http import cookies -from netlib.odict import ODict -from .. import encoding -from .headers import Headers -from .message import Message, _native, _always_bytes, MessageData - - -class RequestData(MessageData): - def __init__(self, first_line_format, method, scheme, host, port, path, http_version, headers=None, content=None, - timestamp_start=None, timestamp_end=None): - if not isinstance(headers, Headers): - headers = Headers(headers) - - self.first_line_format = first_line_format - self.method = method - self.scheme = scheme - self.host = host - self.port = port - self.path = path - self.http_version = http_version - self.headers = headers - self.content = content - self.timestamp_start = timestamp_start - self.timestamp_end = timestamp_end - - -class Request(Message): - """ - An HTTP request. - """ - def __init__(self, *args, **kwargs): - data = RequestData(*args, **kwargs) - super(Request, self).__init__(data) - - def __repr__(self): - if self.host and self.port: - hostport = "{}:{}".format(self.host, self.port) - else: - hostport = "" - path = self.path or "" - return "Request({} {}{})".format( - self.method, hostport, path - ) - - @property - def first_line_format(self): - """ - HTTP request form as defined in `RFC7230 <https://tools.ietf.org/html/rfc7230#section-5.3>`_. - - origin-form and asterisk-form are subsumed as "relative". - """ - return self.data.first_line_format - - @first_line_format.setter - def first_line_format(self, first_line_format): - self.data.first_line_format = first_line_format - - @property - def method(self): - """ - HTTP request method, e.g. "GET". - """ - return _native(self.data.method).upper() - - @method.setter - def method(self, method): - self.data.method = _always_bytes(method) - - @property - def scheme(self): - """ - HTTP request scheme, which should be "http" or "https". - """ - return _native(self.data.scheme) - - @scheme.setter - def scheme(self, scheme): - self.data.scheme = _always_bytes(scheme) - - @property - def host(self): - """ - Target host. This may be parsed from the raw request - (e.g. from a ``GET http://example.com/ HTTP/1.1`` request line) - or inferred from the proxy mode (e.g. an IP in transparent mode). - - Setting the host attribute also updates the host header, if present. - """ - - if six.PY2: # pragma: nocover - return self.data.host - - if not self.data.host: - return self.data.host - try: - return self.data.host.decode("idna") - except UnicodeError: - return self.data.host.decode("utf8", "surrogateescape") - - @host.setter - def host(self, host): - if isinstance(host, six.text_type): - try: - # There's no non-strict mode for IDNA encoding. - # We don't want this operation to fail though, so we try - # utf8 as a last resort. - host = host.encode("idna", "strict") - except UnicodeError: - host = host.encode("utf8", "surrogateescape") - - self.data.host = host - - # Update host header - if "host" in self.headers: - if host: - self.headers["host"] = host - else: - self.headers.pop("host") - - @property - def port(self): - """ - Target port - """ - return self.data.port - - @port.setter - def port(self, port): - self.data.port = port - - @property - def path(self): - """ - HTTP request path, e.g. "/index.html". - Guaranteed to start with a slash. - """ - return _native(self.data.path) - - @path.setter - def path(self, path): - self.data.path = _always_bytes(path) - - @property - def url(self): - """ - The URL string, constructed from the request's URL components - """ - return utils.unparse_url(self.scheme, self.host, self.port, self.path) - - @url.setter - def url(self, url): - self.scheme, self.host, self.port, self.path = utils.parse_url(url) - - @property - def pretty_host(self): - """ - Similar to :py:attr:`host`, but using the Host headers as an additional preferred data source. - This is useful in transparent mode where :py:attr:`host` is only an IP address, - but may not reflect the actual destination as the Host header could be spoofed. - """ - return self.headers.get("host", self.host) - - @property - def pretty_url(self): - """ - Like :py:attr:`url`, but using :py:attr:`pretty_host` instead of :py:attr:`host`. - """ - if self.first_line_format == "authority": - return "%s:%d" % (self.pretty_host, self.port) - return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path) - - @property - def query(self): - """ - The request query string as an :py:class:`ODict` object. - None, if there is no query. - """ - _, _, _, _, query, _ = urllib.parse.urlparse(self.url) - if query: - return ODict(utils.urldecode(query)) - return None - - @query.setter - def query(self, odict): - query = utils.urlencode(odict.lst) - scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) - - @property - def cookies(self): - """ - The request cookies. - An empty :py:class:`ODict` object if the cookie monster ate them all. - """ - ret = ODict() - for i in self.headers.get_all("Cookie"): - ret.extend(cookies.parse_cookie_header(i)) - return ret - - @cookies.setter - def cookies(self, odict): - self.headers["cookie"] = cookies.format_cookie_header(odict) - - @property - def path_components(self): - """ - The URL's path components as a list of strings. - Components are unquoted. - """ - _, _, path, _, _, _ = urllib.parse.urlparse(self.url) - return [urllib.parse.unquote(i) for i in path.split("/") if i] - - @path_components.setter - def path_components(self, components): - components = map(lambda x: urllib.parse.quote(x, safe=""), components) - path = "/" + "/".join(components) - scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) - - def anticache(self): - """ - Modifies this request to remove headers that might produce a cached - response. That is, we remove ETags and If-Modified-Since headers. - """ - delheaders = [ - "if-modified-since", - "if-none-match", - ] - for i in delheaders: - self.headers.pop(i, None) - - def anticomp(self): - """ - Modifies this request to remove headers that will compress the - resource's data. - """ - self.headers["accept-encoding"] = "identity" - - def constrain_encoding(self): - """ - Limits the permissible Accept-Encoding values, based on what we can - decode appropriately. - """ - accept_encoding = self.headers.get("accept-encoding") - if accept_encoding: - self.headers["accept-encoding"] = ( - ', '.join( - e - for e in encoding.ENCODINGS - if e in accept_encoding - ) - ) - - @property - def urlencoded_form(self): - """ - The URL-encoded form data as an :py:class:`ODict` object. - None if there is no data or the content-type indicates non-form data. - """ - is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() - if self.content and is_valid_content_type: - return ODict(utils.urldecode(self.content)) - return None - - @urlencoded_form.setter - def urlencoded_form(self, odict): - """ - Sets the body to the URL-encoded form data, and adds the appropriate content-type header. - This will overwrite the existing content if there is one. - """ - self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = utils.urlencode(odict.lst) - - @property - def multipart_form(self): - """ - The multipart form data as an :py:class:`ODict` object. - None if there is no data or the content-type indicates non-form data. - """ - is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower() - if self.content and is_valid_content_type: - return ODict(utils.multipartdecode(self.headers,self.content)) - return None - - @multipart_form.setter - def multipart_form(self, value): - raise NotImplementedError() - - # Legacy - - def get_cookies(self): # pragma: nocover - warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning) - return self.cookies - - def set_cookies(self, odict): # pragma: nocover - warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning) - self.cookies = odict - - def get_query(self): # pragma: nocover - warnings.warn(".get_query is deprecated, use .query instead.", DeprecationWarning) - return self.query or ODict([]) - - def set_query(self, odict): # pragma: nocover - warnings.warn(".set_query is deprecated, use .query instead.", DeprecationWarning) - self.query = odict - - def get_path_components(self): # pragma: nocover - warnings.warn(".get_path_components is deprecated, use .path_components instead.", DeprecationWarning) - return self.path_components - - def set_path_components(self, lst): # pragma: nocover - warnings.warn(".set_path_components is deprecated, use .path_components instead.", DeprecationWarning) - self.path_components = lst - - def get_form_urlencoded(self): # pragma: nocover - warnings.warn(".get_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning) - return self.urlencoded_form or ODict([]) - - def set_form_urlencoded(self, odict): # pragma: nocover - warnings.warn(".set_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning) - self.urlencoded_form = odict - - def get_form_multipart(self): # pragma: nocover - warnings.warn(".get_form_multipart is deprecated, use .multipart_form instead.", DeprecationWarning) - return self.multipart_form or ODict([]) - - @property - def form_in(self): # pragma: nocover - warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning) - return self.first_line_format - - @form_in.setter - def form_in(self, form_in): # pragma: nocover - warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning) - self.first_line_format = form_in - - @property - def form_out(self): # pragma: nocover - warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning) - return self.first_line_format - - @form_out.setter - def form_out(self, form_out): # pragma: nocover - warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning) - self.first_line_format = form_out
\ No newline at end of file diff --git a/netlib/http/response.py b/netlib/http/response.py deleted file mode 100644 index 8f4d6215..00000000 --- a/netlib/http/response.py +++ /dev/null @@ -1,116 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import warnings - -from . import cookies -from .headers import Headers -from .message import Message, _native, _always_bytes, MessageData -from .. import utils -from ..odict import ODict - - -class ResponseData(MessageData): - def __init__(self, http_version, status_code, reason=None, headers=None, content=None, - timestamp_start=None, timestamp_end=None): - if not isinstance(headers, Headers): - headers = Headers(headers) - - self.http_version = http_version - self.status_code = status_code - self.reason = reason - self.headers = headers - self.content = content - self.timestamp_start = timestamp_start - self.timestamp_end = timestamp_end - - -class Response(Message): - """ - An HTTP response. - """ - def __init__(self, *args, **kwargs): - data = ResponseData(*args, **kwargs) - super(Response, self).__init__(data) - - def __repr__(self): - if self.content: - details = "{}, {}".format( - self.headers.get("content-type", "unknown content type"), - utils.pretty_size(len(self.content)) - ) - else: - details = "no content" - return "Response({status_code} {reason}, {details})".format( - status_code=self.status_code, - reason=self.reason, - details=details - ) - - @property - def status_code(self): - """ - HTTP Status Code, e.g. ``200``. - """ - return self.data.status_code - - @status_code.setter - def status_code(self, status_code): - self.data.status_code = status_code - - @property - def reason(self): - """ - HTTP Reason Phrase, e.g. "Not Found". - This is always :py:obj:`None` for HTTP2 requests, because HTTP2 responses do not contain a reason phrase. - """ - return _native(self.data.reason) - - @reason.setter - def reason(self, reason): - self.data.reason = _always_bytes(reason) - - @property - def cookies(self): - """ - Get the contents of all Set-Cookie headers. - - A possibly empty :py:class:`ODict`, where keys are cookie name strings, - and values are [value, attr] lists. Value is a string, and attr is - an ODictCaseless containing cookie attributes. Within attrs, unary - attributes (e.g. HTTPOnly) are indicated by a Null value. - """ - ret = [] - for header in self.headers.get_all("set-cookie"): - v = cookies.parse_set_cookie_header(header) - if v: - name, value, attrs = v - ret.append([name, [value, attrs]]) - return ODict(ret) - - @cookies.setter - def cookies(self, odict): - values = [] - for i in odict.lst: - header = cookies.format_set_cookie_header(i[0], i[1][0], i[1][1]) - values.append(header) - self.headers.set_all("set-cookie", values) - - # Legacy - - def get_cookies(self): # pragma: nocover - warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning) - return self.cookies - - def set_cookies(self, odict): # pragma: nocover - warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning) - self.cookies = odict - - @property - def msg(self): # pragma: nocover - warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning) - return self.reason - - @msg.setter - def msg(self, reason): # pragma: nocover - warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning) - self.reason = reason diff --git a/netlib/http/status_codes.py b/netlib/http/status_codes.py deleted file mode 100644 index 8a4dc1f5..00000000 --- a/netlib/http/status_codes.py +++ /dev/null @@ -1,106 +0,0 @@ -from __future__ import absolute_import, print_function, division - -CONTINUE = 100 -SWITCHING = 101 -OK = 200 -CREATED = 201 -ACCEPTED = 202 -NON_AUTHORITATIVE_INFORMATION = 203 -NO_CONTENT = 204 -RESET_CONTENT = 205 -PARTIAL_CONTENT = 206 -MULTI_STATUS = 207 - -MULTIPLE_CHOICE = 300 -MOVED_PERMANENTLY = 301 -FOUND = 302 -SEE_OTHER = 303 -NOT_MODIFIED = 304 -USE_PROXY = 305 -TEMPORARY_REDIRECT = 307 - -BAD_REQUEST = 400 -UNAUTHORIZED = 401 -PAYMENT_REQUIRED = 402 -FORBIDDEN = 403 -NOT_FOUND = 404 -NOT_ALLOWED = 405 -NOT_ACCEPTABLE = 406 -PROXY_AUTH_REQUIRED = 407 -REQUEST_TIMEOUT = 408 -CONFLICT = 409 -GONE = 410 -LENGTH_REQUIRED = 411 -PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 -REQUEST_URI_TOO_LONG = 414 -UNSUPPORTED_MEDIA_TYPE = 415 -REQUESTED_RANGE_NOT_SATISFIABLE = 416 -EXPECTATION_FAILED = 417 -IM_A_TEAPOT = 418 - -INTERNAL_SERVER_ERROR = 500 -NOT_IMPLEMENTED = 501 -BAD_GATEWAY = 502 -SERVICE_UNAVAILABLE = 503 -GATEWAY_TIMEOUT = 504 -HTTP_VERSION_NOT_SUPPORTED = 505 -INSUFFICIENT_STORAGE_SPACE = 507 -NOT_EXTENDED = 510 - -RESPONSES = { - # 100 - CONTINUE: "Continue", - SWITCHING: "Switching Protocols", - - # 200 - OK: "OK", - CREATED: "Created", - ACCEPTED: "Accepted", - NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information", - NO_CONTENT: "No Content", - RESET_CONTENT: "Reset Content.", - PARTIAL_CONTENT: "Partial Content", - MULTI_STATUS: "Multi-Status", - - # 300 - MULTIPLE_CHOICE: "Multiple Choices", - MOVED_PERMANENTLY: "Moved Permanently", - FOUND: "Found", - SEE_OTHER: "See Other", - NOT_MODIFIED: "Not Modified", - USE_PROXY: "Use Proxy", - # 306 not defined?? - TEMPORARY_REDIRECT: "Temporary Redirect", - - # 400 - BAD_REQUEST: "Bad Request", - UNAUTHORIZED: "Unauthorized", - PAYMENT_REQUIRED: "Payment Required", - FORBIDDEN: "Forbidden", - NOT_FOUND: "Not Found", - NOT_ALLOWED: "Method Not Allowed", - NOT_ACCEPTABLE: "Not Acceptable", - PROXY_AUTH_REQUIRED: "Proxy Authentication Required", - REQUEST_TIMEOUT: "Request Time-out", - CONFLICT: "Conflict", - GONE: "Gone", - LENGTH_REQUIRED: "Length Required", - PRECONDITION_FAILED: "Precondition Failed", - REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large", - REQUEST_URI_TOO_LONG: "Request-URI Too Long", - UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type", - REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable", - EXPECTATION_FAILED: "Expectation Failed", - IM_A_TEAPOT: "I'm a teapot", - - # 500 - INTERNAL_SERVER_ERROR: "Internal Server Error", - NOT_IMPLEMENTED: "Not Implemented", - BAD_GATEWAY: "Bad Gateway", - SERVICE_UNAVAILABLE: "Service Unavailable", - GATEWAY_TIMEOUT: "Gateway Time-out", - HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported", - INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space", - NOT_EXTENDED: "Not Extended" -} diff --git a/netlib/http/user_agents.py b/netlib/http/user_agents.py deleted file mode 100644 index e8681908..00000000 --- a/netlib/http/user_agents.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import (absolute_import, print_function, division) - -""" - A small collection of useful user-agent header strings. These should be - kept reasonably current to reflect common usage. -""" - -# pylint: line-too-long - -# A collection of (name, shortcut, string) tuples. - -UASTRINGS = [ - ("android", - "a", - "Mozilla/5.0 (Linux; U; Android 4.1.1; en-gb; Nexus 7 Build/JRO03D) AFL/01.04.02"), # noqa - ("blackberry", - "l", - "Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+"), # noqa - ("bingbot", - "b", - "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"), # noqa - ("chrome", - "c", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"), # noqa - ("firefox", - "f", - "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:14.0) Gecko/20120405 Firefox/14.0a1"), # noqa - ("googlebot", - "g", - "Googlebot/2.1 (+http://www.googlebot.com/bot.html)"), # noqa - ("ie9", - "i", - "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US)"), # noqa - ("ipad", - "p", - "Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B176 Safari/7534.48.3"), # noqa - ("iphone", - "h", - "Mozilla/5.0 (iPhone; CPU iPhone OS 4_2_1 like Mac OS X) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148a Safari/6533.18.5"), # noqa - ("safari", - "s", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10"), # noqa -] - - -def get_by_shortcut(s): - """ - Retrieve a user agent entry by shortcut. - """ - for i in UASTRINGS: - if s == i[1]: - return i |
