diff options
Diffstat (limited to 'netlib')
-rw-r--r-- | netlib/http/http1/read.py | 3 | ||||
-rw-r--r-- | netlib/http/http2/connections.py | 4 | ||||
-rw-r--r-- | netlib/http/request.py | 19 | ||||
-rw-r--r-- | netlib/http/url.py | 95 | ||||
-rw-r--r-- | netlib/utils.py | 95 |
5 files changed, 109 insertions, 107 deletions
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index d30976bd..f776d0b5 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -6,6 +6,7 @@ import re from ... import utils from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect from .. import Request, Response, Headers +from .. import url def read_request(rfile, body_size_limit=None): @@ -240,7 +241,7 @@ def _read_request_line(rfile): scheme, path = None, None else: form = "absolute" - scheme, host, port, path = utils.parse_url(path) + scheme, host, port, path = url.parse_url(path) _check_http_version(http_version) except ValueError: diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index 03f1804b..4c15ee07 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -6,7 +6,7 @@ import hyperframe.frame from hpack.hpack import Encoder, Decoder from ... import utils -from .. import Headers, Response, Request +from .. import Headers, Response, Request, url from . import frame @@ -118,7 +118,7 @@ class HTTP2Protocol(object): else: first_line_format = "absolute" # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = utils.parse_url(path) + scheme, host, port, _ = url.parse_url(path) scheme = scheme.decode('ascii') host = host.decode('ascii') diff --git a/netlib/http/request.py b/netlib/http/request.py index 80a9ae65..170066f7 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -6,6 +6,7 @@ import six from six.moves import urllib from netlib import utils +import netlib.http.url from . import cookies from .. import encoding from ..multidict import MultiDictView @@ -179,11 +180,11 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.host, self.port) - return utils.unparse_url(self.scheme, self.host, self.port, self.path) + return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path) @url.setter def url(self, url): - self.scheme, self.host, self.port, self.path = utils.parse_url(url) + self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url) def _parse_host_header(self): """Extract the host and port from Host header""" @@ -219,7 +220,7 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.pretty_host, self.port) - return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path) + return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path) @property def query(self): @@ -234,12 +235,12 @@ class Request(Message): def _get_query(self): _, _, _, _, query, _ = urllib.parse.urlparse(self.url) - return tuple(utils.urldecode(query)) + return tuple(netlib.http.url.urldecode(query)) def _set_query(self, value): - query = utils.urlencode(value) + query = netlib.http.url.urlencode(value) scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = utils.parse_url( + _, _, _, self.path = netlib.http.url.parse_url( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) @query.setter @@ -287,7 +288,7 @@ class Request(Message): components = map(lambda x: urllib.parse.quote(x, safe=""), components) path = "/" + "/".join(components) scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = utils.parse_url( + _, _, _, self.path = netlib.http.url.parse_url( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) def anticache(self): @@ -339,7 +340,7 @@ class Request(Message): def _get_urlencoded_form(self): is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() if is_valid_content_type: - return tuple(utils.urldecode(self.content)) + return tuple(netlib.http.url.urldecode(self.content)) return () def _set_urlencoded_form(self, value): @@ -348,7 +349,7 @@ class Request(Message): This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = utils.urlencode(value) + self.content = netlib.http.url.urlencode(value) @urlencoded_form.setter def urlencoded_form(self, value): diff --git a/netlib/http/url.py b/netlib/http/url.py new file mode 100644 index 00000000..3961998b --- /dev/null +++ b/netlib/http/url.py @@ -0,0 +1,95 @@ +import six +from six.moves import urllib + +from .. import utils + +# PY2 workaround +def decode_parse_result(result, enc): + if hasattr(result, "decode"): + return result.decode(enc) + else: + return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) + + +# PY2 workaround +def encode_parse_result(result, enc): + if hasattr(result, "encode"): + return result.encode(enc) + else: + return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) + + +def parse_url(url): + """ + URL-parsing function that checks that + - port is an integer 0-65535 + - host is a valid IDNA-encoded hostname with no null-bytes + - path is valid ASCII + + Args: + A URL (as bytes or as unicode) + + Returns: + A (scheme, host, port, path) tuple + + Raises: + ValueError, if the URL is not properly formatted. + """ + parsed = urllib.parse.urlparse(url) + + if not parsed.hostname: + raise ValueError("No hostname given") + + if isinstance(url, six.binary_type): + host = parsed.hostname + + # this should not raise a ValueError, + # but we try to be very forgiving here and accept just everything. + # decode_parse_result(parsed, "ascii") + else: + host = parsed.hostname.encode("idna") + parsed = encode_parse_result(parsed, "ascii") + + port = parsed.port + if not port: + port = 443 if parsed.scheme == b"https" else 80 + + full_path = urllib.parse.urlunparse( + (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) + ) + if not full_path.startswith(b"/"): + full_path = b"/" + full_path + + if not utils.is_valid_host(host): + raise ValueError("Invalid Host") + if not utils.is_valid_port(port): + raise ValueError("Invalid Port") + + return parsed.scheme, host, port, full_path + + +def unparse_url(scheme, host, port, path=""): + """ + Returns a URL string, constructed from the specified components. + + Args: + All args must be str. + """ + if path == "*": + path = "" + return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path) + + +def urlencode(s): + """ + Takes a list of (key, value) tuples and returns a urlencoded string. + """ + s = [tuple(i) for i in s] + return urllib.parse.urlencode(s, False) + + +def urldecode(s): + """ + Takes a urlencoded string and returns a list of (key, value) tuples. + """ + return urllib.parse.parse_qsl(s, keep_blank_values=True) diff --git a/netlib/utils.py b/netlib/utils.py index 770ad6a6..cd8aa55a 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -8,9 +8,6 @@ import inspect import six -from six.moves import urllib - - def always_bytes(unicode_or_bytes, *encode_args): if isinstance(unicode_or_bytes, six.text_type): return unicode_or_bytes.encode(*encode_args) @@ -188,71 +185,6 @@ def is_valid_port(port): return 0 <= port <= 65535 -# PY2 workaround -def decode_parse_result(result, enc): - if hasattr(result, "decode"): - return result.decode(enc) - else: - return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) - - -# PY2 workaround -def encode_parse_result(result, enc): - if hasattr(result, "encode"): - return result.encode(enc) - else: - return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) - - -def parse_url(url): - """ - URL-parsing function that checks that - - port is an integer 0-65535 - - host is a valid IDNA-encoded hostname with no null-bytes - - path is valid ASCII - - Args: - A URL (as bytes or as unicode) - - Returns: - A (scheme, host, port, path) tuple - - Raises: - ValueError, if the URL is not properly formatted. - """ - parsed = urllib.parse.urlparse(url) - - if not parsed.hostname: - raise ValueError("No hostname given") - - if isinstance(url, six.binary_type): - host = parsed.hostname - - # this should not raise a ValueError, - # but we try to be very forgiving here and accept just everything. - # decode_parse_result(parsed, "ascii") - else: - host = parsed.hostname.encode("idna") - parsed = encode_parse_result(parsed, "ascii") - - port = parsed.port - if not port: - port = 443 if parsed.scheme == b"https" else 80 - - full_path = urllib.parse.urlunparse( - (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) - ) - if not full_path.startswith(b"/"): - full_path = b"/" + full_path - - if not is_valid_host(host): - raise ValueError("Invalid Host") - if not is_valid_port(port): - raise ValueError("Invalid Port") - - return parsed.scheme, host, port, full_path - - def get_header_tokens(headers, key): """ Retrieve all tokens for a header key. A number of different headers @@ -278,33 +210,6 @@ def hostport(scheme, host, port): return "%s:%d" % (host, port) -def unparse_url(scheme, host, port, path=""): - """ - Returns a URL string, constructed from the specified components. - - Args: - All args must be str. - """ - if path == "*": - path = "" - return "%s://%s%s" % (scheme, hostport(scheme, host, port), path) - - -def urlencode(s): - """ - Takes a list of (key, value) tuples and returns a urlencoded string. - """ - s = [tuple(i) for i in s] - return urllib.parse.urlencode(s, False) - - -def urldecode(s): - """ - Takes a urlencoded string and returns a list of (key, value) tuples. - """ - return urllib.parse.parse_qsl(s, keep_blank_values=True) - - def parse_content_type(c): """ A simple parser for content-type values. Returns a (type, subtype, |