aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
Diffstat (limited to 'netlib')
-rw-r--r--netlib/http/http1/read.py3
-rw-r--r--netlib/http/http2/connections.py4
-rw-r--r--netlib/http/request.py19
-rw-r--r--netlib/http/url.py95
-rw-r--r--netlib/utils.py95
5 files changed, 109 insertions, 107 deletions
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
index d30976bd..f776d0b5 100644
--- a/netlib/http/http1/read.py
+++ b/netlib/http/http1/read.py
@@ -6,6 +6,7 @@ import re
from ... import utils
from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect
from .. import Request, Response, Headers
+from .. import url
def read_request(rfile, body_size_limit=None):
@@ -240,7 +241,7 @@ def _read_request_line(rfile):
scheme, path = None, None
else:
form = "absolute"
- scheme, host, port, path = utils.parse_url(path)
+ scheme, host, port, path = url.parse_url(path)
_check_http_version(http_version)
except ValueError:
diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py
index 03f1804b..4c15ee07 100644
--- a/netlib/http/http2/connections.py
+++ b/netlib/http/http2/connections.py
@@ -6,7 +6,7 @@ import hyperframe.frame
from hpack.hpack import Encoder, Decoder
from ... import utils
-from .. import Headers, Response, Request
+from .. import Headers, Response, Request, url
from . import frame
@@ -118,7 +118,7 @@ class HTTP2Protocol(object):
else:
first_line_format = "absolute"
# FIXME: verify if path or :host contains what we need
- scheme, host, port, _ = utils.parse_url(path)
+ scheme, host, port, _ = url.parse_url(path)
scheme = scheme.decode('ascii')
host = host.decode('ascii')
diff --git a/netlib/http/request.py b/netlib/http/request.py
index 80a9ae65..170066f7 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -6,6 +6,7 @@ import six
from six.moves import urllib
from netlib import utils
+import netlib.http.url
from . import cookies
from .. import encoding
from ..multidict import MultiDictView
@@ -179,11 +180,11 @@ class Request(Message):
"""
if self.first_line_format == "authority":
return "%s:%d" % (self.host, self.port)
- return utils.unparse_url(self.scheme, self.host, self.port, self.path)
+ return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path)
@url.setter
def url(self, url):
- self.scheme, self.host, self.port, self.path = utils.parse_url(url)
+ self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url)
def _parse_host_header(self):
"""Extract the host and port from Host header"""
@@ -219,7 +220,7 @@ class Request(Message):
"""
if self.first_line_format == "authority":
return "%s:%d" % (self.pretty_host, self.port)
- return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
+ return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
@property
def query(self):
@@ -234,12 +235,12 @@ class Request(Message):
def _get_query(self):
_, _, _, _, query, _ = urllib.parse.urlparse(self.url)
- return tuple(utils.urldecode(query))
+ return tuple(netlib.http.url.urldecode(query))
def _set_query(self, value):
- query = utils.urlencode(value)
+ query = netlib.http.url.urlencode(value)
scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = utils.parse_url(
+ _, _, _, self.path = netlib.http.url.parse_url(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
@query.setter
@@ -287,7 +288,7 @@ class Request(Message):
components = map(lambda x: urllib.parse.quote(x, safe=""), components)
path = "/" + "/".join(components)
scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = utils.parse_url(
+ _, _, _, self.path = netlib.http.url.parse_url(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
def anticache(self):
@@ -339,7 +340,7 @@ class Request(Message):
def _get_urlencoded_form(self):
is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
- return tuple(utils.urldecode(self.content))
+ return tuple(netlib.http.url.urldecode(self.content))
return ()
def _set_urlencoded_form(self, value):
@@ -348,7 +349,7 @@ class Request(Message):
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
- self.content = utils.urlencode(value)
+ self.content = netlib.http.url.urlencode(value)
@urlencoded_form.setter
def urlencoded_form(self, value):
diff --git a/netlib/http/url.py b/netlib/http/url.py
new file mode 100644
index 00000000..3961998b
--- /dev/null
+++ b/netlib/http/url.py
@@ -0,0 +1,95 @@
+import six
+from six.moves import urllib
+
+from .. import utils
+
+# PY2 workaround
+def decode_parse_result(result, enc):
+ if hasattr(result, "decode"):
+ return result.decode(enc)
+ else:
+ return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
+
+
+# PY2 workaround
+def encode_parse_result(result, enc):
+ if hasattr(result, "encode"):
+ return result.encode(enc)
+ else:
+ return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
+
+
+def parse_url(url):
+ """
+ URL-parsing function that checks that
+ - port is an integer 0-65535
+ - host is a valid IDNA-encoded hostname with no null-bytes
+ - path is valid ASCII
+
+ Args:
+ A URL (as bytes or as unicode)
+
+ Returns:
+ A (scheme, host, port, path) tuple
+
+ Raises:
+ ValueError, if the URL is not properly formatted.
+ """
+ parsed = urllib.parse.urlparse(url)
+
+ if not parsed.hostname:
+ raise ValueError("No hostname given")
+
+ if isinstance(url, six.binary_type):
+ host = parsed.hostname
+
+ # this should not raise a ValueError,
+ # but we try to be very forgiving here and accept just everything.
+ # decode_parse_result(parsed, "ascii")
+ else:
+ host = parsed.hostname.encode("idna")
+ parsed = encode_parse_result(parsed, "ascii")
+
+ port = parsed.port
+ if not port:
+ port = 443 if parsed.scheme == b"https" else 80
+
+ full_path = urllib.parse.urlunparse(
+ (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
+ )
+ if not full_path.startswith(b"/"):
+ full_path = b"/" + full_path
+
+ if not utils.is_valid_host(host):
+ raise ValueError("Invalid Host")
+ if not utils.is_valid_port(port):
+ raise ValueError("Invalid Port")
+
+ return parsed.scheme, host, port, full_path
+
+
+def unparse_url(scheme, host, port, path=""):
+ """
+ Returns a URL string, constructed from the specified components.
+
+ Args:
+ All args must be str.
+ """
+ if path == "*":
+ path = ""
+ return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path)
+
+
+def urlencode(s):
+ """
+ Takes a list of (key, value) tuples and returns a urlencoded string.
+ """
+ s = [tuple(i) for i in s]
+ return urllib.parse.urlencode(s, False)
+
+
+def urldecode(s):
+ """
+ Takes a urlencoded string and returns a list of (key, value) tuples.
+ """
+ return urllib.parse.parse_qsl(s, keep_blank_values=True)
diff --git a/netlib/utils.py b/netlib/utils.py
index 770ad6a6..cd8aa55a 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -8,9 +8,6 @@ import inspect
import six
-from six.moves import urllib
-
-
def always_bytes(unicode_or_bytes, *encode_args):
if isinstance(unicode_or_bytes, six.text_type):
return unicode_or_bytes.encode(*encode_args)
@@ -188,71 +185,6 @@ def is_valid_port(port):
return 0 <= port <= 65535
-# PY2 workaround
-def decode_parse_result(result, enc):
- if hasattr(result, "decode"):
- return result.decode(enc)
- else:
- return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
-
-
-# PY2 workaround
-def encode_parse_result(result, enc):
- if hasattr(result, "encode"):
- return result.encode(enc)
- else:
- return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
-
-
-def parse_url(url):
- """
- URL-parsing function that checks that
- - port is an integer 0-65535
- - host is a valid IDNA-encoded hostname with no null-bytes
- - path is valid ASCII
-
- Args:
- A URL (as bytes or as unicode)
-
- Returns:
- A (scheme, host, port, path) tuple
-
- Raises:
- ValueError, if the URL is not properly formatted.
- """
- parsed = urllib.parse.urlparse(url)
-
- if not parsed.hostname:
- raise ValueError("No hostname given")
-
- if isinstance(url, six.binary_type):
- host = parsed.hostname
-
- # this should not raise a ValueError,
- # but we try to be very forgiving here and accept just everything.
- # decode_parse_result(parsed, "ascii")
- else:
- host = parsed.hostname.encode("idna")
- parsed = encode_parse_result(parsed, "ascii")
-
- port = parsed.port
- if not port:
- port = 443 if parsed.scheme == b"https" else 80
-
- full_path = urllib.parse.urlunparse(
- (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
- )
- if not full_path.startswith(b"/"):
- full_path = b"/" + full_path
-
- if not is_valid_host(host):
- raise ValueError("Invalid Host")
- if not is_valid_port(port):
- raise ValueError("Invalid Port")
-
- return parsed.scheme, host, port, full_path
-
-
def get_header_tokens(headers, key):
"""
Retrieve all tokens for a header key. A number of different headers
@@ -278,33 +210,6 @@ def hostport(scheme, host, port):
return "%s:%d" % (host, port)
-def unparse_url(scheme, host, port, path=""):
- """
- Returns a URL string, constructed from the specified components.
-
- Args:
- All args must be str.
- """
- if path == "*":
- path = ""
- return "%s://%s%s" % (scheme, hostport(scheme, host, port), path)
-
-
-def urlencode(s):
- """
- Takes a list of (key, value) tuples and returns a urlencoded string.
- """
- s = [tuple(i) for i in s]
- return urllib.parse.urlencode(s, False)
-
-
-def urldecode(s):
- """
- Takes a urlencoded string and returns a list of (key, value) tuples.
- """
- return urllib.parse.parse_qsl(s, keep_blank_values=True)
-
-
def parse_content_type(c):
"""
A simple parser for content-type values. Returns a (type, subtype,