From 106f7046d3862cb0e3cbb4f38335af0330b4e7e3 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 26 Sep 2015 00:39:04 +0200 Subject: refactor request model --- netlib/http/__init__.py | 5 +- netlib/http/headers.py | 2 +- netlib/http/http1/assemble.py | 65 ++++---- netlib/http/http1/read.py | 8 +- netlib/http/message.py | 146 ++++++++++++++++++ netlib/http/models.py | 233 ---------------------------- netlib/http/request.py | 351 ++++++++++++++++++++++++++++++++++++++++++ netlib/http/response.py | 3 + netlib/tutils.py | 4 +- netlib/utils.py | 15 +- 10 files changed, 557 insertions(+), 275 deletions(-) create mode 100644 netlib/http/message.py create mode 100644 netlib/http/request.py create mode 100644 netlib/http/response.py (limited to 'netlib') diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py index 0ccf6b32..e8c7ba20 100644 --- a/netlib/http/__init__.py +++ b/netlib/http/__init__.py @@ -1,12 +1,15 @@ from __future__ import absolute_import, print_function, division from .headers import Headers -from .models import Request, Response +from .message import decoded +from .request import Request +from .models import Response from .models import ALPN_PROTO_HTTP1, ALPN_PROTO_H2 from .models import HDR_FORM_MULTIPART, HDR_FORM_URLENCODED, CONTENT_MISSING from . import http1, http2 __all__ = [ "Headers", + "decoded", "Request", "Response", "ALPN_PROTO_HTTP1", "ALPN_PROTO_H2", "HDR_FORM_MULTIPART", "HDR_FORM_URLENCODED", "CONTENT_MISSING", diff --git a/netlib/http/headers.py b/netlib/http/headers.py index 613beb4f..47ea923b 100644 --- a/netlib/http/headers.py +++ b/netlib/http/headers.py @@ -27,7 +27,7 @@ else: _always_byte_args = always_byte_args("utf-8", "surrogateescape") -class Headers(MutableMapping, object): +class Headers(MutableMapping): """ Header class which allows both convenient access to individual headers as well as direct access to the underlying raw data. Provides a full dictionary interface. diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py index 88aeac05..864f6017 100644 --- a/netlib/http/http1/assemble.py +++ b/netlib/http/http1/assemble.py @@ -7,24 +7,24 @@ from .. import CONTENT_MISSING def assemble_request(request): - if request.body == CONTENT_MISSING: + if request.content == CONTENT_MISSING: raise HttpException("Cannot assemble flow with CONTENT_MISSING") head = assemble_request_head(request) - body = b"".join(assemble_body(request.headers, [request.body])) + body = b"".join(assemble_body(request.headers, [request.data.content])) return head + body def assemble_request_head(request): - first_line = _assemble_request_line(request) - headers = _assemble_request_headers(request) + first_line = _assemble_request_line(request.data) + headers = _assemble_request_headers(request.data) return b"%s\r\n%s\r\n" % (first_line, headers) def assemble_response(response): - if response.body == CONTENT_MISSING: + if response.content == CONTENT_MISSING: raise HttpException("Cannot assemble flow with CONTENT_MISSING") head = assemble_response_head(response) - body = b"".join(assemble_body(response.headers, [response.body])) + body = b"".join(assemble_body(response.headers, [response.content])) return head + body @@ -45,42 +45,49 @@ def assemble_body(headers, body_chunks): yield chunk -def _assemble_request_line(request, form=None): - if form is None: - form = request.form_out +def _assemble_request_line(request_data): + """ + Args: + request_data (netlib.http.request.RequestData) + """ + form = request_data.first_line_format if form == "relative": return b"%s %s %s" % ( - request.method, - request.path, - request.http_version + request_data.method, + request_data.path, + request_data.http_version ) elif form == "authority": return b"%s %s:%d %s" % ( - request.method, - request.host, - request.port, - request.http_version + request_data.method, + request_data.host, + request_data.port, + request_data.http_version ) elif form == "absolute": return b"%s %s://%s:%d%s %s" % ( - request.method, - request.scheme, - request.host, - request.port, - request.path, - request.http_version + request_data.method, + request_data.scheme, + request_data.host, + request_data.port, + request_data.path, + request_data.http_version ) - else: # pragma: nocover + else: raise RuntimeError("Invalid request form") -def _assemble_request_headers(request): - headers = request.headers.copy() - if "host" not in headers and request.scheme and request.host and request.port: +def _assemble_request_headers(request_data): + """ + Args: + request_data (netlib.http.request.RequestData) + """ + headers = request_data.headers.copy() + if "host" not in headers and request_data.scheme and request_data.host and request_data.port: headers["host"] = utils.hostport( - request.scheme, - request.host, - request.port + request_data.scheme, + request_data.host, + request_data.port ) return bytes(headers) diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index 4c898348..76721e06 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -11,7 +11,7 @@ from .. import Request, Response, Headers def read_request(rfile, body_size_limit=None): request = read_request_head(rfile) expected_body_size = expected_http_body_size(request) - request._body = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit)) + request.data.content = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit)) request.timestamp_end = time.time() return request @@ -155,7 +155,7 @@ def connection_close(http_version, headers): # If we don't have a Connection header, HTTP 1.1 connections are assumed to # be persistent - return http_version != b"HTTP/1.1" + return http_version != "HTTP/1.1" and http_version != b"HTTP/1.1" # FIXME: Remove one case. def expected_http_body_size(request, response=None): @@ -184,11 +184,11 @@ def expected_http_body_size(request, response=None): if headers.get("expect", "").lower() == "100-continue": return 0 else: - if request.method.upper() == b"HEAD": + if request.method.upper() == "HEAD": return 0 if 100 <= response_code <= 199: return 0 - if response_code == 200 and request.method.upper() == b"CONNECT": + if response_code == 200 and request.method.upper() == "CONNECT": return 0 if response_code in (204, 304): return 0 diff --git a/netlib/http/message.py b/netlib/http/message.py new file mode 100644 index 00000000..20497bd5 --- /dev/null +++ b/netlib/http/message.py @@ -0,0 +1,146 @@ +from __future__ import absolute_import, print_function, division + +import warnings + +import six + +from .. import encoding, utils + +if six.PY2: + _native = lambda x: x + _always_bytes = lambda x: x +else: + # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. + _native = lambda x: x.decode("utf-8", "surrogateescape") + _always_bytes = lambda x: utils.always_bytes(x, "utf-8", "surrogateescape") + + +class Message(object): + def __init__(self, data): + self.data = data + + def __eq__(self, other): + if isinstance(other, Message): + return self.data == other.data + return False + + def __ne__(self, other): + return not self.__eq__(other) + + @property + def http_version(self): + """ + Version string, e.g. "HTTP/1.1" + """ + return _native(self.data.http_version) + + @http_version.setter + def http_version(self, http_version): + self.data.http_version = _always_bytes(http_version) + + @property + def headers(self): + """ + Message headers object + + Returns: + netlib.http.Headers + """ + return self.data.headers + + @headers.setter + def headers(self, h): + self.data.headers = h + + @property + def timestamp_start(self): + """ + First byte timestamp + """ + return self.data.timestamp_start + + @timestamp_start.setter + def timestamp_start(self, timestamp_start): + self.data.timestamp_start = timestamp_start + + @property + def timestamp_end(self): + """ + Last byte timestamp + """ + return self.data.timestamp_end + + @timestamp_end.setter + def timestamp_end(self, timestamp_end): + self.data.timestamp_end = timestamp_end + + @property + def content(self): + """ + The raw (encoded) HTTP message body + + See also: :py:attr:`text` + """ + return self.data.content + + @content.setter + def content(self, content): + self.data.content = content + if isinstance(content, bytes): + self.headers["content-length"] = str(len(content)) + + @property + def text(self): + """ + The decoded HTTP message body. + Decoded contents are not cached, so this method is relatively expensive to call. + + See also: :py:attr:`content`, :py:class:`decoded` + """ + # This attribute should be called text, because that's what requests does. + raise NotImplementedError() + + @text.setter + def text(self, text): + raise NotImplementedError() + + @property + def body(self): + warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning) + return self.content + + @body.setter + def body(self, body): + warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning) + self.content = body + + +class decoded(object): + """ + A context manager that decodes a request or response, and then + re-encodes it with the same encoding after execution of the block. + + Example: + + .. code-block:: python + + with decoded(request): + request.content = request.content.replace("foo", "bar") + """ + + def __init__(self, message): + self.message = message + ce = message.headers.get("content-encoding") + if ce in encoding.ENCODINGS: + self.ce = ce + else: + self.ce = None + + def __enter__(self): + if self.ce: + if not self.message.decode(): + self.ce = None + + def __exit__(self, type, value, tb): + if self.ce: + self.message.encode(self.ce) \ No newline at end of file diff --git a/netlib/http/models.py b/netlib/http/models.py index 55664533..40f6e98c 100644 --- a/netlib/http/models.py +++ b/netlib/http/models.py @@ -47,239 +47,6 @@ class Message(object): return False -class Request(Message): - def __init__( - self, - form_in, - method, - scheme, - host, - port, - path, - http_version, - headers=None, - body=None, - timestamp_start=None, - timestamp_end=None, - form_out=None - ): - super(Request, self).__init__(http_version, headers, body, timestamp_start, timestamp_end) - - self.form_in = form_in - self.method = method - self.scheme = scheme - self.host = host - self.port = port - self.path = path - self.form_out = form_out or form_in - - def __repr__(self): - if self.host and self.port: - hostport = "{}:{}".format(native(self.host,"idna"), self.port) - else: - hostport = "" - path = self.path or "" - return "HTTPRequest({} {}{})".format( - self.method, hostport, path - ) - - def anticache(self): - """ - Modifies this request to remove headers that might produce a cached - response. That is, we remove ETags and If-Modified-Since headers. - """ - delheaders = [ - "if-modified-since", - "if-none-match", - ] - for i in delheaders: - self.headers.pop(i, None) - - def anticomp(self): - """ - Modifies this request to remove headers that will compress the - resource's data. - """ - self.headers["accept-encoding"] = "identity" - - def constrain_encoding(self): - """ - Limits the permissible Accept-Encoding values, based on what we can - decode appropriately. - """ - accept_encoding = self.headers.get("accept-encoding") - if accept_encoding: - self.headers["accept-encoding"] = ( - ', '.join( - e - for e in encoding.ENCODINGS - if e in accept_encoding - ) - ) - - def update_host_header(self): - """ - Update the host header to reflect the current target. - """ - self.headers["host"] = self.host - - def get_form(self): - """ - Retrieves the URL-encoded or multipart form data, returning an ODict object. - Returns an empty ODict if there is no data or the content-type - indicates non-form data. - """ - if self.body: - if HDR_FORM_URLENCODED in self.headers.get("content-type", "").lower(): - return self.get_form_urlencoded() - elif HDR_FORM_MULTIPART in self.headers.get("content-type", "").lower(): - return self.get_form_multipart() - return ODict([]) - - def get_form_urlencoded(self): - """ - Retrieves the URL-encoded form data, returning an ODict object. - Returns an empty ODict if there is no data or the content-type - indicates non-form data. - """ - if self.body and HDR_FORM_URLENCODED in self.headers.get("content-type", "").lower(): - return ODict(utils.urldecode(self.body)) - return ODict([]) - - def get_form_multipart(self): - if self.body and HDR_FORM_MULTIPART in self.headers.get("content-type", "").lower(): - return ODict( - utils.multipartdecode( - self.headers, - self.body)) - return ODict([]) - - def set_form_urlencoded(self, odict): - """ - Sets the body to the URL-encoded form data, and adds the - appropriate content-type header. Note that this will destory the - existing body if there is one. - """ - # FIXME: If there's an existing content-type header indicating a - # url-encoded form, leave it alone. - self.headers["content-type"] = HDR_FORM_URLENCODED - self.body = utils.urlencode(odict.lst) - - def get_path_components(self): - """ - Returns the path components of the URL as a list of strings. - - Components are unquoted. - """ - _, _, path, _, _, _ = urllib.parse.urlparse(self.url) - return [urllib.parse.unquote(native(i,"ascii")) for i in path.split(b"/") if i] - - def set_path_components(self, lst): - """ - Takes a list of strings, and sets the path component of the URL. - - Components are quoted. - """ - lst = [urllib.parse.quote(i, safe="") for i in lst] - path = always_bytes("/" + "/".join(lst)) - scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - self.url = urllib.parse.urlunparse( - [scheme, netloc, path, params, query, fragment] - ) - - def get_query(self): - """ - Gets the request query string. Returns an ODict object. - """ - _, _, _, _, query, _ = urllib.parse.urlparse(self.url) - if query: - return ODict(utils.urldecode(query)) - return ODict([]) - - def set_query(self, odict): - """ - Takes an ODict object, and sets the request query string. - """ - scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - query = utils.urlencode(odict.lst) - self.url = urllib.parse.urlunparse( - [scheme, netloc, path, params, query, fragment] - ) - - def pretty_host(self, hostheader): - """ - Heuristic to get the host of the request. - - Note that pretty_host() does not always return the TCP destination - of the request, e.g. if an upstream proxy is in place - - If hostheader is set to True, the Host: header will be used as - additional (and preferred) data source. This is handy in - transparent mode, where only the IO of the destination is known, - but not the resolved name. This is disabled by default, as an - attacker may spoof the host header to confuse an analyst. - """ - if hostheader and "host" in self.headers: - try: - return self.headers["host"] - except ValueError: - pass - if self.host: - return self.host.decode("idna") - - def pretty_url(self, hostheader): - if self.form_out == "authority": # upstream proxy mode - return b"%s:%d" % (always_bytes(self.pretty_host(hostheader)), self.port) - return utils.unparse_url(self.scheme, - self.pretty_host(hostheader), - self.port, - self.path) - - def get_cookies(self): - """ - Returns a possibly empty netlib.odict.ODict object. - """ - ret = ODict() - for i in self.headers.get_all("Cookie"): - ret.extend(cookies.parse_cookie_header(i)) - return ret - - def set_cookies(self, odict): - """ - Takes an netlib.odict.ODict object. Over-writes any existing Cookie - headers. - """ - v = cookies.format_cookie_header(odict) - self.headers["cookie"] = v - - @property - def url(self): - """ - Returns a URL string, constructed from the Request's URL components. - """ - return utils.unparse_url( - self.scheme, - self.host, - self.port, - self.path - ) - - @url.setter - def url(self, url): - """ - Parses a URL specification, and updates the Request's information - accordingly. - - Raises: - ValueError if the URL was invalid - """ - # TODO: Should handle incoming unicode here. - parts = utils.parse_url(url) - if not parts: - raise ValueError("Invalid URL: %s" % url) - self.scheme, self.host, self.port, self.path = parts - - class Response(Message): def __init__( self, diff --git a/netlib/http/request.py b/netlib/http/request.py new file mode 100644 index 00000000..6830ca40 --- /dev/null +++ b/netlib/http/request.py @@ -0,0 +1,351 @@ +from __future__ import absolute_import, print_function, division + +import warnings + +import six +from six.moves import urllib + +from netlib import utils +from netlib.http import cookies +from netlib.odict import ODict +from .. import encoding +from .headers import Headers +from .message import Message, _native, _always_bytes + + +class RequestData(object): + def __init__(self, first_line_format, method, scheme, host, port, path, http_version, headers=None, content=None, + timestamp_start=None, timestamp_end=None): + if not headers: + headers = Headers() + assert isinstance(headers, Headers) + + self.first_line_format = first_line_format + self.method = method + self.scheme = scheme + self.host = host + self.port = port + self.path = path + self.http_version = http_version + self.headers = headers + self.content = content + self.timestamp_start = timestamp_start + self.timestamp_end = timestamp_end + + def __eq__(self, other): + if isinstance(other, RequestData): + return self.__dict__ == other.__dict__ + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +class Request(Message): + """ + An HTTP request. + """ + def __init__(self, *args, **kwargs): + data = RequestData(*args, **kwargs) + super(Request, self).__init__(data) + + def __repr__(self): + if self.host and self.port: + hostport = "{}:{}".format(self.host, self.port) + else: + hostport = "" + path = self.path or "" + return "HTTPRequest({} {}{})".format( + self.method, hostport, path + ) + + @property + def first_line_format(self): + """ + HTTP request form as defined in `RFC7230 `_. + + origin-form and asterisk-form are subsumed as "relative". + """ + return self.data.first_line_format + + @first_line_format.setter + def first_line_format(self, first_line_format): + self.data.first_line_format = first_line_format + + @property + def method(self): + """ + HTTP request method, e.g. "GET". + """ + return _native(self.data.method) + + @method.setter + def method(self, method): + self.data.method = _always_bytes(method) + + @property + def scheme(self): + """ + HTTP request scheme, which should be "http" or "https". + """ + return _native(self.data.scheme) + + @scheme.setter + def scheme(self, scheme): + self.data.scheme = _always_bytes(scheme) + + @property + def host(self): + """ + Target host for the request. This may be directly taken in the request (e.g. "GET http://example.com/ HTTP/1.1") + or inferred from the proxy mode (e.g. an IP in transparent mode). + """ + + if six.PY2: + return self.data.host + + if not self.data.host: + return self.data.host + try: + return self.data.host.decode("idna") + except UnicodeError: + return self.data.host.decode("utf8", "surrogateescape") + + @host.setter + def host(self, host): + if isinstance(host, six.text_type): + try: + # There's no non-strict mode for IDNA encoding. + # We don't want this operation to fail though, so we try + # utf8 as a last resort. + host = host.encode("idna", "strict") + except UnicodeError: + host = host.encode("utf8", "surrogateescape") + + self.data.host = host + + # Update host header + if "host" in self.headers: + if host: + self.headers["host"] = host + else: + self.headers.pop("host") + + @property + def port(self): + """ + Target port + """ + return self.data.port + + @port.setter + def port(self, port): + self.data.port = port + + @property + def path(self): + """ + HTTP request path, e.g. "/index.html". + Guaranteed to start with a slash. + """ + return _native(self.data.path) + + @path.setter + def path(self, path): + self.data.path = _always_bytes(path) + + def anticache(self): + """ + Modifies this request to remove headers that might produce a cached + response. That is, we remove ETags and If-Modified-Since headers. + """ + delheaders = [ + "if-modified-since", + "if-none-match", + ] + for i in delheaders: + self.headers.pop(i, None) + + def anticomp(self): + """ + Modifies this request to remove headers that will compress the + resource's data. + """ + self.headers["accept-encoding"] = "identity" + + def constrain_encoding(self): + """ + Limits the permissible Accept-Encoding values, based on what we can + decode appropriately. + """ + accept_encoding = self.headers.get("accept-encoding") + if accept_encoding: + self.headers["accept-encoding"] = ( + ', '.join( + e + for e in encoding.ENCODINGS + if e in accept_encoding + ) + ) + + @property + def urlencoded_form(self): + """ + The URL-encoded form data as an ODict object. + None if there is no data or the content-type indicates non-form data. + """ + is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() + if self.content and is_valid_content_type: + return ODict(utils.urldecode(self.content)) + return None + + @urlencoded_form.setter + def urlencoded_form(self, odict): + """ + Sets the body to the URL-encoded form data, and adds the appropriate content-type header. + This will overwrite the existing content if there is one. + """ + self.headers["content-type"] = "application/x-www-form-urlencoded" + self.content = utils.urlencode(odict.lst) + + @property + def multipart_form(self): + """ + The multipart form data as an ODict object. + None if there is no data or the content-type indicates non-form data. + """ + is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower() + if self.content and is_valid_content_type: + return ODict(utils.multipartdecode(self.headers,self.content)) + return None + + @multipart_form.setter + def multipart_form(self): + raise NotImplementedError() + + @property + def path_components(self): + """ + The URL's path components as a list of strings. + Components are unquoted. + """ + _, _, path, _, _, _ = urllib.parse.urlparse(self.url) + return [urllib.parse.unquote(i) for i in path.split("/") if i] + + @path_components.setter + def path_components(self, components): + components = map(lambda x: urllib.parse.quote(x, safe=""), components) + path = "/" + "/".join(components) + scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) + self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) + + @property + def query(self): + """ + The request query string as an ODict object. + None, if there is no query. + """ + _, _, _, _, query, _ = urllib.parse.urlparse(self.url) + if query: + return ODict(utils.urldecode(query)) + return None + + @query.setter + def query(self, odict): + query = utils.urlencode(odict.lst) + scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) + self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) + + @property + def cookies(self): + """ + The request cookies. + An empty ODict object if the cookie monster ate them all. + """ + ret = ODict() + for i in self.headers.get_all("Cookie"): + ret.extend(cookies.parse_cookie_header(i)) + return ret + + @cookies.setter + def cookies(self, odict): + self.headers["cookie"] = cookies.format_cookie_header(odict) + + @property + def url(self): + """ + The URL string, constructed from the request's URL components + """ + return utils.unparse_url(self.scheme, self.host, self.port, self.path) + + @url.setter + def url(self, url): + self.scheme, self.host, self.port, self.path = utils.parse_url(url) + + @property + def pretty_host(self): + return self.headers.get("host", self.host) + + @property + def pretty_url(self): + if self.first_line_format == "authority": + return "%s:%d" % (self.pretty_host, self.port) + return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path) + + # Legacy + + def get_cookies(self): + warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning) + return self.cookies + + def set_cookies(self, odict): + warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning) + self.cookies = odict + + def get_query(self): + warnings.warn(".get_query is deprecated, use .query instead.", DeprecationWarning) + return self.query or ODict([]) + + def set_query(self, odict): + warnings.warn(".set_query is deprecated, use .query instead.", DeprecationWarning) + self.query = odict + + def get_path_components(self): + warnings.warn(".get_path_components is deprecated, use .path_components instead.", DeprecationWarning) + return self.path_components + + def set_path_components(self, lst): + warnings.warn(".set_path_components is deprecated, use .path_components instead.", DeprecationWarning) + self.path_components = lst + + def get_form_urlencoded(self): + warnings.warn(".get_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning) + return self.urlencoded_form or ODict([]) + + def set_form_urlencoded(self, odict): + warnings.warn(".set_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning) + self.urlencoded_form = odict + + def get_form_multipart(self): + warnings.warn(".get_form_multipart is deprecated, use .multipart_form instead.", DeprecationWarning) + return self.multipart_form or ODict([]) + + @property + def form_in(self): + warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning) + return self.first_line_format + + @form_in.setter + def form_in(self, form_in): + warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning) + self.first_line_format = form_in + + @property + def form_out(self): + warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning) + return self.first_line_format + + @form_out.setter + def form_out(self, form_out): + warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning) + self.first_line_format = form_out \ No newline at end of file diff --git a/netlib/http/response.py b/netlib/http/response.py new file mode 100644 index 00000000..02fac3df --- /dev/null +++ b/netlib/http/response.py @@ -0,0 +1,3 @@ +from __future__ import absolute_import, print_function, division + +# TODO \ No newline at end of file diff --git a/netlib/tutils.py b/netlib/tutils.py index 1665a792..ff63c33c 100644 --- a/netlib/tutils.py +++ b/netlib/tutils.py @@ -98,7 +98,7 @@ def treq(**kwargs): netlib.http.Request """ default = dict( - form_in="relative", + first_line_format="relative", method=b"GET", scheme=b"http", host=b"address", @@ -106,7 +106,7 @@ def treq(**kwargs): path=b"/path", http_version=b"HTTP/1.1", headers=Headers(header="qvalue"), - body=b"content" + content=b"content" ) default.update(kwargs) return Request(**default) diff --git a/netlib/utils.py b/netlib/utils.py index 6f6d1ea0..3ec60890 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -273,22 +273,27 @@ def get_header_tokens(headers, key): return [token.strip() for token in tokens] -@always_byte_args() def hostport(scheme, host, port): """ Returns the host component, with a port specifcation if needed. """ - if (port, scheme) in [(80, b"http"), (443, b"https")]: + if (port, scheme) in [(80, "http"), (443, "https"), (80, b"http"), (443, b"https")]: return host else: - return b"%s:%d" % (host, port) + if isinstance(host, six.binary_type): + return b"%s:%d" % (host, port) + else: + return "%s:%d" % (host, port) def unparse_url(scheme, host, port, path=""): """ - Returns a URL string, constructed from the specified compnents. + Returns a URL string, constructed from the specified components. + + Args: + All args must be str. """ - return b"%s://%s%s" % (scheme, hostport(scheme, host, port), path) + return "%s://%s%s" % (scheme, hostport(scheme, host, port), path) def urlencode(s): -- cgit v1.2.3 From 49ea8fc0ebcfe4861f099200044a553f092faec7 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 26 Sep 2015 17:39:50 +0200 Subject: refactor response model --- netlib/http/__init__.py | 15 ++-- netlib/http/headers.py | 26 +++---- netlib/http/http1/assemble.py | 16 ++-- netlib/http/http1/read.py | 2 +- netlib/http/http2/connections.py | 4 +- netlib/http/http2/frame.py | 3 - netlib/http/message.py | 64 +++++++++------- netlib/http/models.py | 112 ---------------------------- netlib/http/request.py | 155 +++++++++++++++++++++------------------ netlib/http/response.py | 124 ++++++++++++++++++++++++++++++- netlib/tutils.py | 6 +- netlib/wsgi.py | 6 +- 12 files changed, 277 insertions(+), 256 deletions(-) delete mode 100644 netlib/http/models.py (limited to 'netlib') diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py index e8c7ba20..fd632cd5 100644 --- a/netlib/http/__init__.py +++ b/netlib/http/__init__.py @@ -1,17 +1,14 @@ from __future__ import absolute_import, print_function, division -from .headers import Headers -from .message import decoded from .request import Request -from .models import Response -from .models import ALPN_PROTO_HTTP1, ALPN_PROTO_H2 -from .models import HDR_FORM_MULTIPART, HDR_FORM_URLENCODED, CONTENT_MISSING +from .response import Response +from .headers import Headers +from .message import decoded, CONTENT_MISSING from . import http1, http2 __all__ = [ + "Request", + "Response", "Headers", - "decoded", - "Request", "Response", - "ALPN_PROTO_HTTP1", "ALPN_PROTO_H2", - "HDR_FORM_MULTIPART", "HDR_FORM_URLENCODED", "CONTENT_MISSING", + "decoded", "CONTENT_MISSING", "http1", "http2", ] diff --git a/netlib/http/headers.py b/netlib/http/headers.py index 47ea923b..c79c3344 100644 --- a/netlib/http/headers.py +++ b/netlib/http/headers.py @@ -36,12 +36,8 @@ class Headers(MutableMapping): .. code-block:: python - # Create header from a list of (header_name, header_value) tuples - >>> h = Headers([ - ["Host","example.com"], - ["Accept","text/html"], - ["accept","application/xml"] - ]) + # Create headers with keyword arguments + >>> h = Headers(host="example.com", content_type="application/xml") # Headers mostly behave like a normal dict. >>> h["Host"] @@ -51,6 +47,13 @@ class Headers(MutableMapping): >>> h["host"] "example.com" + # Headers can also be creatd from a list of raw (header_name, header_value) byte tuples + >>> h = Headers([ + [b"Host",b"example.com"], + [b"Accept",b"text/html"], + [b"accept",b"application/xml"] + ]) + # Multiple headers are folded into a single header as per RFC7230 >>> h["Accept"] "text/html, application/xml" @@ -60,17 +63,14 @@ class Headers(MutableMapping): >>> h["Accept"] "application/text" - # str(h) returns a HTTP1 header block. - >>> print(h) + # bytes(h) returns a HTTP1 header block. + >>> print(bytes(h)) Host: example.com Accept: application/text # For full control, the raw header fields can be accessed >>> h.fields - # Headers can also be crated from keyword arguments - >>> h = Headers(host="example.com", content_type="application/xml") - Caveats: For use with the "Set-Cookie" header, see :py:meth:`get_all`. """ @@ -79,8 +79,8 @@ class Headers(MutableMapping): def __init__(self, fields=None, **headers): """ Args: - fields: (optional) list of ``(name, value)`` header tuples, - e.g. ``[("Host","example.com")]``. All names and values must be bytes. + fields: (optional) list of ``(name, value)`` header byte tuples, + e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes. **headers: Additional headers to set. Will overwrite existing values from `fields`. For convenience, underscores in header names will be transformed to dashes - this behaviour does not extend to other methods. diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py index 864f6017..785ee8d3 100644 --- a/netlib/http/http1/assemble.py +++ b/netlib/http/http1/assemble.py @@ -10,7 +10,7 @@ def assemble_request(request): if request.content == CONTENT_MISSING: raise HttpException("Cannot assemble flow with CONTENT_MISSING") head = assemble_request_head(request) - body = b"".join(assemble_body(request.headers, [request.data.content])) + body = b"".join(assemble_body(request.data.headers, [request.data.content])) return head + body @@ -24,13 +24,13 @@ def assemble_response(response): if response.content == CONTENT_MISSING: raise HttpException("Cannot assemble flow with CONTENT_MISSING") head = assemble_response_head(response) - body = b"".join(assemble_body(response.headers, [response.content])) + body = b"".join(assemble_body(response.data.headers, [response.data.content])) return head + body def assemble_response_head(response): - first_line = _assemble_response_line(response) - headers = _assemble_response_headers(response) + first_line = _assemble_response_line(response.data) + headers = _assemble_response_headers(response.data) return b"%s\r\n%s\r\n" % (first_line, headers) @@ -92,11 +92,11 @@ def _assemble_request_headers(request_data): return bytes(headers) -def _assemble_response_line(response): +def _assemble_response_line(response_data): return b"%s %d %s" % ( - response.http_version, - response.status_code, - response.msg, + response_data.http_version, + response_data.status_code, + response_data.reason, ) diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index 76721e06..0d5e7f4b 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -50,7 +50,7 @@ def read_request_head(rfile): def read_response(rfile, request, body_size_limit=None): response = read_response_head(rfile) expected_body_size = expected_http_body_size(request, response) - response._body = b"".join(read_body(rfile, expected_body_size, body_size_limit)) + response.data.content = b"".join(read_body(rfile, expected_body_size, body_size_limit)) response.timestamp_end = time.time() return response diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index 5220d5d2..c493abe6 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -4,7 +4,7 @@ import time from hpack.hpack import Encoder, Decoder from ... import utils -from .. import Headers, Response, Request, ALPN_PROTO_H2 +from .. import Headers, Response, Request from . import frame @@ -283,7 +283,7 @@ class HTTP2Protocol(object): def check_alpn(self): alp = self.tcp_handler.get_alpn_proto_negotiated() - if alp != ALPN_PROTO_H2: + if alp != b'h2': raise NotImplementedError( "HTTP2Protocol can not handle unknown ALP: %s" % alp) return True diff --git a/netlib/http/http2/frame.py b/netlib/http/http2/frame.py index cb2cde99..188629d4 100644 --- a/netlib/http/http2/frame.py +++ b/netlib/http/http2/frame.py @@ -25,9 +25,6 @@ ERROR_CODES = BiDi( CLIENT_CONNECTION_PREFACE = b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n" -ALPN_PROTO_H2 = b'h2' - - class Frame(object): """ diff --git a/netlib/http/message.py b/netlib/http/message.py index 20497bd5..ee138746 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -6,11 +6,14 @@ import six from .. import encoding, utils + +CONTENT_MISSING = 0 + if six.PY2: _native = lambda x: x _always_bytes = lambda x: x else: - # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. + # While the HTTP head _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. _native = lambda x: x.decode("utf-8", "surrogateescape") _always_bytes = lambda x: utils.always_bytes(x, "utf-8", "surrogateescape") @@ -27,17 +30,6 @@ class Message(object): def __ne__(self, other): return not self.__eq__(other) - @property - def http_version(self): - """ - Version string, e.g. "HTTP/1.1" - """ - return _native(self.data.http_version) - - @http_version.setter - def http_version(self, http_version): - self.data.http_version = _always_bytes(http_version) - @property def headers(self): """ @@ -52,6 +44,32 @@ class Message(object): def headers(self, h): self.data.headers = h + @property + def content(self): + """ + The raw (encoded) HTTP message body + + See also: :py:attr:`text` + """ + return self.data.content + + @content.setter + def content(self, content): + self.data.content = content + if isinstance(content, bytes): + self.headers["content-length"] = str(len(content)) + + @property + def http_version(self): + """ + Version string, e.g. "HTTP/1.1" + """ + return _native(self.data.http_version) + + @http_version.setter + def http_version(self, http_version): + self.data.http_version = _always_bytes(http_version) + @property def timestamp_start(self): """ @@ -74,26 +92,14 @@ class Message(object): def timestamp_end(self, timestamp_end): self.data.timestamp_end = timestamp_end - @property - def content(self): - """ - The raw (encoded) HTTP message body - - See also: :py:attr:`text` - """ - return self.data.content - - @content.setter - def content(self, content): - self.data.content = content - if isinstance(content, bytes): - self.headers["content-length"] = str(len(content)) - @property def text(self): """ The decoded HTTP message body. - Decoded contents are not cached, so this method is relatively expensive to call. + Decoded contents are not cached, so accessing this attribute repeatedly is relatively expensive. + + .. note:: + This is not implemented yet. See also: :py:attr:`content`, :py:class:`decoded` """ @@ -104,6 +110,8 @@ class Message(object): def text(self, text): raise NotImplementedError() + # Legacy + @property def body(self): warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning) diff --git a/netlib/http/models.py b/netlib/http/models.py deleted file mode 100644 index 40f6e98c..00000000 --- a/netlib/http/models.py +++ /dev/null @@ -1,112 +0,0 @@ - - -from ..odict import ODict -from .. import utils, encoding -from ..utils import always_bytes, native -from . import cookies -from .headers import Headers - -from six.moves import urllib - -# TODO: Move somewhere else? -ALPN_PROTO_HTTP1 = b'http/1.1' -ALPN_PROTO_H2 = b'h2' -HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" -HDR_FORM_MULTIPART = "multipart/form-data" - -CONTENT_MISSING = 0 - - -class Message(object): - def __init__(self, http_version, headers, body, timestamp_start, timestamp_end): - self.http_version = http_version - if not headers: - headers = Headers() - assert isinstance(headers, Headers) - self.headers = headers - - self._body = body - self.timestamp_start = timestamp_start - self.timestamp_end = timestamp_end - - @property - def body(self): - return self._body - - @body.setter - def body(self, body): - self._body = body - if isinstance(body, bytes): - self.headers["content-length"] = str(len(body)).encode() - - content = body - - def __eq__(self, other): - if isinstance(other, Message): - return self.__dict__ == other.__dict__ - return False - - -class Response(Message): - def __init__( - self, - http_version, - status_code, - msg=None, - headers=None, - body=None, - timestamp_start=None, - timestamp_end=None, - ): - super(Response, self).__init__(http_version, headers, body, timestamp_start, timestamp_end) - self.status_code = status_code - self.msg = msg - - def __repr__(self): - # return "Response(%s - %s)" % (self.status_code, self.msg) - - if self.body: - size = utils.pretty_size(len(self.body)) - else: - size = "content missing" - # TODO: Remove "(unknown content type, content missing)" edge-case - return "".format( - status_code=self.status_code, - msg=self.msg, - contenttype=self.headers.get("content-type", "unknown content type"), - size=size) - - def get_cookies(self): - """ - Get the contents of all Set-Cookie headers. - - Returns a possibly empty ODict, where keys are cookie name strings, - and values are [value, attr] lists. Value is a string, and attr is - an ODictCaseless containing cookie attributes. Within attrs, unary - attributes (e.g. HTTPOnly) are indicated by a Null value. - """ - ret = [] - for header in self.headers.get_all("set-cookie"): - v = cookies.parse_set_cookie_header(header) - if v: - name, value, attrs = v - ret.append([name, [value, attrs]]) - return ODict(ret) - - def set_cookies(self, odict): - """ - Set the Set-Cookie headers on this response, over-writing existing - headers. - - Accepts an ODict of the same format as that returned by get_cookies. - """ - values = [] - for i in odict.lst: - values.append( - cookies.format_set_cookie_header( - i[0], - i[1][0], - i[1][1] - ) - ) - self.headers.set_all("set-cookie", values) diff --git a/netlib/http/request.py b/netlib/http/request.py index 6830ca40..f8a3b5b9 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -55,7 +55,7 @@ class Request(Message): else: hostport = "" path = self.path or "" - return "HTTPRequest({} {}{})".format( + return "Request({} {}{})".format( self.method, hostport, path ) @@ -97,7 +97,8 @@ class Request(Message): @property def host(self): """ - Target host for the request. This may be directly taken in the request (e.g. "GET http://example.com/ HTTP/1.1") + Target host. This may be parsed from the raw request + (e.g. from a ``GET http://example.com/ HTTP/1.1`` request line) or inferred from the proxy mode (e.g. an IP in transparent mode). """ @@ -154,6 +155,83 @@ class Request(Message): def path(self, path): self.data.path = _always_bytes(path) + @property + def url(self): + """ + The URL string, constructed from the request's URL components + """ + return utils.unparse_url(self.scheme, self.host, self.port, self.path) + + @url.setter + def url(self, url): + self.scheme, self.host, self.port, self.path = utils.parse_url(url) + + @property + def pretty_host(self): + """ + Similar to :py:attr:`host`, but using the Host headers as an additional preferred data source. + This is useful in transparent mode where :py:attr:`host` is only an IP address, + but may not reflect the actual destination as the Host header could be spoofed. + """ + return self.headers.get("host", self.host) + + @property + def pretty_url(self): + """ + Like :py:attr:`url`, but using :py:attr:`pretty_host` instead of :py:attr:`host`. + """ + if self.first_line_format == "authority": + return "%s:%d" % (self.pretty_host, self.port) + return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path) + + @property + def query(self): + """ + The request query string as an :py:class:`ODict` object. + None, if there is no query. + """ + _, _, _, _, query, _ = urllib.parse.urlparse(self.url) + if query: + return ODict(utils.urldecode(query)) + return None + + @query.setter + def query(self, odict): + query = utils.urlencode(odict.lst) + scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) + self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) + + @property + def cookies(self): + """ + The request cookies. + An empty :py:class:`ODict` object if the cookie monster ate them all. + """ + ret = ODict() + for i in self.headers.get_all("Cookie"): + ret.extend(cookies.parse_cookie_header(i)) + return ret + + @cookies.setter + def cookies(self, odict): + self.headers["cookie"] = cookies.format_cookie_header(odict) + + @property + def path_components(self): + """ + The URL's path components as a list of strings. + Components are unquoted. + """ + _, _, path, _, _, _ = urllib.parse.urlparse(self.url) + return [urllib.parse.unquote(i) for i in path.split("/") if i] + + @path_components.setter + def path_components(self, components): + components = map(lambda x: urllib.parse.quote(x, safe=""), components) + path = "/" + "/".join(components) + scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) + self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) + def anticache(self): """ Modifies this request to remove headers that might produce a cached @@ -191,7 +269,7 @@ class Request(Message): @property def urlencoded_form(self): """ - The URL-encoded form data as an ODict object. + The URL-encoded form data as an :py:class:`ODict` object. None if there is no data or the content-type indicates non-form data. """ is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() @@ -211,7 +289,7 @@ class Request(Message): @property def multipart_form(self): """ - The multipart form data as an ODict object. + The multipart form data as an :py:class:`ODict` object. None if there is no data or the content-type indicates non-form data. """ is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower() @@ -223,75 +301,6 @@ class Request(Message): def multipart_form(self): raise NotImplementedError() - @property - def path_components(self): - """ - The URL's path components as a list of strings. - Components are unquoted. - """ - _, _, path, _, _, _ = urllib.parse.urlparse(self.url) - return [urllib.parse.unquote(i) for i in path.split("/") if i] - - @path_components.setter - def path_components(self, components): - components = map(lambda x: urllib.parse.quote(x, safe=""), components) - path = "/" + "/".join(components) - scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) - - @property - def query(self): - """ - The request query string as an ODict object. - None, if there is no query. - """ - _, _, _, _, query, _ = urllib.parse.urlparse(self.url) - if query: - return ODict(utils.urldecode(query)) - return None - - @query.setter - def query(self, odict): - query = utils.urlencode(odict.lst) - scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - self.url = urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]) - - @property - def cookies(self): - """ - The request cookies. - An empty ODict object if the cookie monster ate them all. - """ - ret = ODict() - for i in self.headers.get_all("Cookie"): - ret.extend(cookies.parse_cookie_header(i)) - return ret - - @cookies.setter - def cookies(self, odict): - self.headers["cookie"] = cookies.format_cookie_header(odict) - - @property - def url(self): - """ - The URL string, constructed from the request's URL components - """ - return utils.unparse_url(self.scheme, self.host, self.port, self.path) - - @url.setter - def url(self, url): - self.scheme, self.host, self.port, self.path = utils.parse_url(url) - - @property - def pretty_host(self): - return self.headers.get("host", self.host) - - @property - def pretty_url(self): - if self.first_line_format == "authority": - return "%s:%d" % (self.pretty_host, self.port) - return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path) - # Legacy def get_cookies(self): diff --git a/netlib/http/response.py b/netlib/http/response.py index 02fac3df..7d64243d 100644 --- a/netlib/http/response.py +++ b/netlib/http/response.py @@ -1,3 +1,125 @@ from __future__ import absolute_import, print_function, division -# TODO \ No newline at end of file +import warnings + +from . import cookies +from .headers import Headers +from .message import Message, _native, _always_bytes +from .. import utils +from ..odict import ODict + + +class ResponseData(object): + def __init__(self, http_version, status_code, reason=None, headers=None, content=None, + timestamp_start=None, timestamp_end=None): + if not headers: + headers = Headers() + assert isinstance(headers, Headers) + + self.http_version = http_version + self.status_code = status_code + self.reason = reason + self.headers = headers + self.content = content + self.timestamp_start = timestamp_start + self.timestamp_end = timestamp_end + + def __eq__(self, other): + if isinstance(other, ResponseData): + return self.__dict__ == other.__dict__ + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +class Response(Message): + """ + An HTTP response. + """ + def __init__(self, *args, **kwargs): + data = ResponseData(*args, **kwargs) + super(Response, self).__init__(data) + + def __repr__(self): + if self.content: + details = "{}, {}".format( + self.headers.get("content-type", "unknown content type"), + utils.pretty_size(len(self.content)) + ) + else: + details = "content missing" + return "Response({status_code} {reason}, {details})".format( + status_code=self.status_code, + reason=self.reason, + details=details + ) + + @property + def status_code(self): + """ + HTTP Status Code, e.g. ``200``. + """ + return self.data.status_code + + @status_code.setter + def status_code(self, status_code): + self.data.status_code = status_code + + @property + def reason(self): + """ + HTTP Reason Phrase, e.g. "Not Found". + This is always :py:obj:`None` for HTTP2 requests, because HTTP2 responses do not contain a reason phrase. + """ + return _native(self.data.reason) + + @reason.setter + def reason(self, reason): + self.data.reason = _always_bytes(reason) + + @property + def cookies(self): + """ + Get the contents of all Set-Cookie headers. + + A possibly empty :py:class:`ODict`, where keys are cookie name strings, + and values are [value, attr] lists. Value is a string, and attr is + an ODictCaseless containing cookie attributes. Within attrs, unary + attributes (e.g. HTTPOnly) are indicated by a Null value. + """ + ret = [] + for header in self.headers.get_all("set-cookie"): + v = cookies.parse_set_cookie_header(header) + if v: + name, value, attrs = v + ret.append([name, [value, attrs]]) + return ODict(ret) + + @cookies.setter + def cookies(self, odict): + values = [] + for i in odict.lst: + header = cookies.format_set_cookie_header(i[0], i[1][0], i[1][1]) + values.append(header) + self.headers.set_all("set-cookie", values) + + # Legacy + + def get_cookies(self): + warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning) + return self.cookies + + def set_cookies(self, odict): + warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning) + self.cookies = odict + + @property + def msg(self): + warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning) + return self.reason + + @msg.setter + def msg(self, reason): + warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning) + self.reason = reason diff --git a/netlib/tutils.py b/netlib/tutils.py index ff63c33c..e16f1a76 100644 --- a/netlib/tutils.py +++ b/netlib/tutils.py @@ -120,9 +120,9 @@ def tresp(**kwargs): default = dict( http_version=b"HTTP/1.1", status_code=200, - msg=b"OK", - headers=Headers(header_response=b"svalue"), - body=b"message", + reason=b"OK", + headers=Headers(header_response="svalue"), + content=b"message", timestamp_start=time.time(), timestamp_end=time.time(), ) diff --git a/netlib/wsgi.py b/netlib/wsgi.py index 4fcd5178..df248a19 100644 --- a/netlib/wsgi.py +++ b/netlib/wsgi.py @@ -25,9 +25,9 @@ class Flow(object): class Request(object): - def __init__(self, scheme, method, path, http_version, headers, body): + def __init__(self, scheme, method, path, http_version, headers, content): self.scheme, self.method, self.path = scheme, method, path - self.headers, self.body = headers, body + self.headers, self.content = headers, content self.http_version = http_version @@ -64,7 +64,7 @@ class WSGIAdaptor(object): environ = { 'wsgi.version': (1, 0), 'wsgi.url_scheme': native(flow.request.scheme, "latin-1"), - 'wsgi.input': BytesIO(flow.request.body or b""), + 'wsgi.input': BytesIO(flow.request.content or b""), 'wsgi.errors': errsoc, 'wsgi.multithread': True, 'wsgi.multiprocess': False, -- cgit v1.2.3 From 466888b01a361e46fb3d4e66afa2c6a0fd168c8e Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 26 Sep 2015 20:07:11 +0200 Subject: improve request tests, coverage++ --- netlib/encoding.py | 4 ++++ netlib/http/headers.py | 8 ++++---- netlib/http/message.py | 42 +++++++++++++++++++++++++++++++++++++----- netlib/http/request.py | 28 ++++++++++++++-------------- netlib/http/response.py | 8 ++++---- netlib/http/status_codes.py | 4 +++- 6 files changed, 66 insertions(+), 28 deletions(-) (limited to 'netlib') diff --git a/netlib/encoding.py b/netlib/encoding.py index 4c11273b..14479e00 100644 --- a/netlib/encoding.py +++ b/netlib/encoding.py @@ -12,6 +12,8 @@ ENCODINGS = {"identity", "gzip", "deflate"} def decode(e, content): + if not isinstance(content, bytes): + return None encoding_map = { "identity": identity, "gzip": decode_gzip, @@ -23,6 +25,8 @@ def decode(e, content): def encode(e, content): + if not isinstance(content, bytes): + return None encoding_map = { "identity": identity, "gzip": encode_gzip, diff --git a/netlib/http/headers.py b/netlib/http/headers.py index c79c3344..f64e6200 100644 --- a/netlib/http/headers.py +++ b/netlib/http/headers.py @@ -8,15 +8,15 @@ from __future__ import absolute_import, print_function, division import copy try: from collections.abc import MutableMapping -except ImportError: # Workaround for Python < 3.3 - from collections import MutableMapping +except ImportError: # pragma: nocover + from collections import MutableMapping # Workaround for Python < 3.3 import six from netlib.utils import always_byte_args, always_bytes -if six.PY2: +if six.PY2: # pragma: nocover _native = lambda x: x _always_bytes = lambda x: x _always_byte_args = lambda x: x @@ -106,7 +106,7 @@ class Headers(MutableMapping): else: return b"" - if six.PY2: + if six.PY2: # pragma: nocover __str__ = __bytes__ @_always_byte_args diff --git a/netlib/http/message.py b/netlib/http/message.py index ee138746..7cb18f52 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -9,7 +9,7 @@ from .. import encoding, utils CONTENT_MISSING = 0 -if six.PY2: +if six.PY2: # pragma: nocover _native = lambda x: x _always_bytes = lambda x: x else: @@ -110,15 +110,48 @@ class Message(object): def text(self, text): raise NotImplementedError() + def decode(self): + """ + Decodes body based on the current Content-Encoding header, then + removes the header. If there is no Content-Encoding header, no + action is taken. + + Returns: + True, if decoding succeeded. + False, otherwise. + """ + ce = self.headers.get("content-encoding") + data = encoding.decode(ce, self.content) + if data is None: + return False + self.content = data + self.headers.pop("content-encoding", None) + return True + + def encode(self, e): + """ + Encodes body with the encoding e, where e is "gzip", "deflate" or "identity". + + Returns: + True, if decoding succeeded. + False, otherwise. + """ + data = encoding.encode(e, self.content) + if data is None: + return False + self.content = data + self.headers["content-encoding"] = e + return True + # Legacy @property - def body(self): + def body(self): # pragma: nocover warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning) return self.content @body.setter - def body(self, body): + def body(self, body): # pragma: nocover warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning) self.content = body @@ -146,8 +179,7 @@ class decoded(object): def __enter__(self): if self.ce: - if not self.message.decode(): - self.ce = None + self.message.decode() def __exit__(self, type, value, tb): if self.ce: diff --git a/netlib/http/request.py b/netlib/http/request.py index f8a3b5b9..325c0080 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -102,7 +102,7 @@ class Request(Message): or inferred from the proxy mode (e.g. an IP in transparent mode). """ - if six.PY2: + if six.PY2: # pragma: nocover return self.data.host if not self.data.host: @@ -303,58 +303,58 @@ class Request(Message): # Legacy - def get_cookies(self): + def get_cookies(self): # pragma: nocover warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning) return self.cookies - def set_cookies(self, odict): + def set_cookies(self, odict): # pragma: nocover warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning) self.cookies = odict - def get_query(self): + def get_query(self): # pragma: nocover warnings.warn(".get_query is deprecated, use .query instead.", DeprecationWarning) return self.query or ODict([]) - def set_query(self, odict): + def set_query(self, odict): # pragma: nocover warnings.warn(".set_query is deprecated, use .query instead.", DeprecationWarning) self.query = odict - def get_path_components(self): + def get_path_components(self): # pragma: nocover warnings.warn(".get_path_components is deprecated, use .path_components instead.", DeprecationWarning) return self.path_components - def set_path_components(self, lst): + def set_path_components(self, lst): # pragma: nocover warnings.warn(".set_path_components is deprecated, use .path_components instead.", DeprecationWarning) self.path_components = lst - def get_form_urlencoded(self): + def get_form_urlencoded(self): # pragma: nocover warnings.warn(".get_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning) return self.urlencoded_form or ODict([]) - def set_form_urlencoded(self, odict): + def set_form_urlencoded(self, odict): # pragma: nocover warnings.warn(".set_form_urlencoded is deprecated, use .urlencoded_form instead.", DeprecationWarning) self.urlencoded_form = odict - def get_form_multipart(self): + def get_form_multipart(self): # pragma: nocover warnings.warn(".get_form_multipart is deprecated, use .multipart_form instead.", DeprecationWarning) return self.multipart_form or ODict([]) @property - def form_in(self): + def form_in(self): # pragma: nocover warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning) return self.first_line_format @form_in.setter - def form_in(self, form_in): + def form_in(self, form_in): # pragma: nocover warnings.warn(".form_in is deprecated, use .first_line_format instead.", DeprecationWarning) self.first_line_format = form_in @property - def form_out(self): + def form_out(self): # pragma: nocover warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning) return self.first_line_format @form_out.setter - def form_out(self, form_out): + def form_out(self, form_out): # pragma: nocover warnings.warn(".form_out is deprecated, use .first_line_format instead.", DeprecationWarning) self.first_line_format = form_out \ No newline at end of file diff --git a/netlib/http/response.py b/netlib/http/response.py index 7d64243d..db31d2b9 100644 --- a/netlib/http/response.py +++ b/netlib/http/response.py @@ -106,20 +106,20 @@ class Response(Message): # Legacy - def get_cookies(self): + def get_cookies(self): # pragma: nocover warnings.warn(".get_cookies is deprecated, use .cookies instead.", DeprecationWarning) return self.cookies - def set_cookies(self, odict): + def set_cookies(self, odict): # pragma: nocover warnings.warn(".set_cookies is deprecated, use .cookies instead.", DeprecationWarning) self.cookies = odict @property - def msg(self): + def msg(self): # pragma: nocover warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning) return self.reason @msg.setter - def msg(self, reason): + def msg(self, reason): # pragma: nocover warnings.warn(".msg is deprecated, use .reason instead.", DeprecationWarning) self.reason = reason diff --git a/netlib/http/status_codes.py b/netlib/http/status_codes.py index dc09f465..8a4dc1f5 100644 --- a/netlib/http/status_codes.py +++ b/netlib/http/status_codes.py @@ -1,4 +1,4 @@ -from __future__ import (absolute_import, print_function, division) +from __future__ import absolute_import, print_function, division CONTINUE = 100 SWITCHING = 101 @@ -37,6 +37,7 @@ REQUEST_URI_TOO_LONG = 414 UNSUPPORTED_MEDIA_TYPE = 415 REQUESTED_RANGE_NOT_SATISFIABLE = 416 EXPECTATION_FAILED = 417 +IM_A_TEAPOT = 418 INTERNAL_SERVER_ERROR = 500 NOT_IMPLEMENTED = 501 @@ -91,6 +92,7 @@ RESPONSES = { UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type", REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable", EXPECTATION_FAILED: "Expectation Failed", + IM_A_TEAPOT: "I'm a teapot", # 500 INTERNAL_SERVER_ERROR: "Internal Server Error", -- cgit v1.2.3 From 23d13e4c1282bc46c54222479c3b83032dad3335 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 27 Sep 2015 00:49:41 +0200 Subject: test response model, push coverage to 100% branch cov --- netlib/http/cookies.py | 1 + netlib/http/message.py | 10 ++++++++++ netlib/http/request.py | 12 ++---------- netlib/http/response.py | 14 +++----------- 4 files changed, 16 insertions(+), 21 deletions(-) (limited to 'netlib') diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 78b03a83..18544b5e 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -58,6 +58,7 @@ def _read_quoted_string(s, start): escaping = False ret = [] # Skip the first quote + i = start # initialize in case the loop doesn't run. for i in range(start + 1, len(s)): if escaping: ret.append(s[i]) diff --git a/netlib/http/message.py b/netlib/http/message.py index 7cb18f52..e4e799ca 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -18,6 +18,16 @@ else: _always_bytes = lambda x: utils.always_bytes(x, "utf-8", "surrogateescape") +class MessageData(object): + def __eq__(self, other): + if isinstance(other, MessageData): + return self.__dict__ == other.__dict__ + return False + + def __ne__(self, other): + return not self.__eq__(other) + + class Message(object): def __init__(self, data): self.data = data diff --git a/netlib/http/request.py b/netlib/http/request.py index 325c0080..095b5945 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -10,10 +10,10 @@ from netlib.http import cookies from netlib.odict import ODict from .. import encoding from .headers import Headers -from .message import Message, _native, _always_bytes +from .message import Message, _native, _always_bytes, MessageData -class RequestData(object): +class RequestData(MessageData): def __init__(self, first_line_format, method, scheme, host, port, path, http_version, headers=None, content=None, timestamp_start=None, timestamp_end=None): if not headers: @@ -32,14 +32,6 @@ class RequestData(object): self.timestamp_start = timestamp_start self.timestamp_end = timestamp_end - def __eq__(self, other): - if isinstance(other, RequestData): - return self.__dict__ == other.__dict__ - return False - - def __ne__(self, other): - return not self.__eq__(other) - class Request(Message): """ diff --git a/netlib/http/response.py b/netlib/http/response.py index db31d2b9..66e5ded6 100644 --- a/netlib/http/response.py +++ b/netlib/http/response.py @@ -4,12 +4,12 @@ import warnings from . import cookies from .headers import Headers -from .message import Message, _native, _always_bytes +from .message import Message, _native, _always_bytes, MessageData from .. import utils from ..odict import ODict -class ResponseData(object): +class ResponseData(MessageData): def __init__(self, http_version, status_code, reason=None, headers=None, content=None, timestamp_start=None, timestamp_end=None): if not headers: @@ -24,14 +24,6 @@ class ResponseData(object): self.timestamp_start = timestamp_start self.timestamp_end = timestamp_end - def __eq__(self, other): - if isinstance(other, ResponseData): - return self.__dict__ == other.__dict__ - return False - - def __ne__(self, other): - return not self.__eq__(other) - class Response(Message): """ @@ -48,7 +40,7 @@ class Response(Message): utils.pretty_size(len(self.content)) ) else: - details = "content missing" + details = "no content" return "Response({status_code} {reason}, {details})".format( status_code=self.status_code, reason=self.reason, -- cgit v1.2.3