aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--netlib/http/request.py26
-rw-r--r--netlib/http/url.py42
-rw-r--r--netlib/strutils.py3
-rw-r--r--test/netlib/http/test_url.py44
4 files changed, 94 insertions, 21 deletions
diff --git a/netlib/http/request.py b/netlib/http/request.py
index ecaa9b79..061217a3 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -253,14 +253,13 @@ class Request(message.Message):
)
def _get_query(self):
- _, _, _, _, query, _ = urllib.parse.urlparse(self.url)
+ query = urllib.parse.urlparse(self.url).query
return tuple(netlib.http.url.decode(query))
- def _set_query(self, value):
- query = netlib.http.url.encode(value)
- scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = netlib.http.url.parse(
- urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
+ def _set_query(self, query_data):
+ query = netlib.http.url.encode(query_data)
+ _, _, path, params, _, fragment = urllib.parse.urlparse(self.url)
+ self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
@query.setter
def query(self, value):
@@ -296,19 +295,18 @@ class Request(message.Message):
The URL's path components as a tuple of strings.
Components are unquoted.
"""
- _, _, path, _, _, _ = urllib.parse.urlparse(self.url)
+ path = urllib.parse.urlparse(self.url).path
# This needs to be a tuple so that it's immutable.
# Otherwise, this would fail silently:
# request.path_components.append("foo")
- return tuple(urllib.parse.unquote(i) for i in path.split("/") if i)
+ return tuple(netlib.http.url.unquote(i) for i in path.split("/") if i)
@path_components.setter
def path_components(self, components):
- components = map(lambda x: urllib.parse.quote(x, safe=""), components)
+ components = map(lambda x: netlib.http.url.quote(x, safe=""), components)
path = "/" + "/".join(components)
- scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = netlib.http.url.parse(
- urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
+ _, _, _, params, query, fragment = urllib.parse.urlparse(self.url)
+ self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
def anticache(self):
"""
@@ -365,13 +363,13 @@ class Request(message.Message):
pass
return ()
- def _set_urlencoded_form(self, value):
+ def _set_urlencoded_form(self, form_data):
"""
Sets the body to the URL-encoded form data, and adds the appropriate content-type header.
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
- self.content = netlib.http.url.encode(value).encode()
+ self.content = netlib.http.url.encode(form_data).encode()
@urlencoded_form.setter
def urlencoded_form(self, value):
diff --git a/netlib/http/url.py b/netlib/http/url.py
index 1c8c007a..076854b9 100644
--- a/netlib/http/url.py
+++ b/netlib/http/url.py
@@ -82,19 +82,51 @@ def unparse(scheme, host, port, path=""):
def encode(s):
- # type: (six.text_type, bytes) -> str
+ # type: Sequence[Tuple[str,str]] -> str
"""
Takes a list of (key, value) tuples and returns a urlencoded string.
"""
- s = [tuple(i) for i in s]
- return urllib.parse.urlencode(s, False)
+ if six.PY2:
+ return urllib.parse.urlencode(s, False)
+ else:
+ return urllib.parse.urlencode(s, False, errors="surrogateescape")
def decode(s):
"""
- Takes a urlencoded string and returns a list of (key, value) tuples.
+ Takes a urlencoded string and returns a list of surrogate-escaped (key, value) tuples.
+ """
+ if six.PY2:
+ return urllib.parse.parse_qsl(s, keep_blank_values=True)
+ else:
+ return urllib.parse.parse_qsl(s, keep_blank_values=True, errors='surrogateescape')
+
+
+def quote(b, safe="/"):
+ """
+ Returns:
+ An ascii-encodable str.
+ """
+ # type: (str) -> str
+ if six.PY2:
+ return urllib.parse.quote(b, safe=safe)
+ else:
+ return urllib.parse.quote(b, safe=safe, errors="surrogateescape")
+
+
+def unquote(s):
"""
- return urllib.parse.parse_qsl(s, keep_blank_values=True)
+ Args:
+ s: A surrogate-escaped str
+ Returns:
+ A surrogate-escaped str
+ """
+ # type: (str) -> str
+
+ if six.PY2:
+ return urllib.parse.unquote(s)
+ else:
+ return urllib.parse.unquote(s, errors="surrogateescape")
def hostport(scheme, host, port):
diff --git a/netlib/strutils.py b/netlib/strutils.py
index 96c8b10f..8f27ebb7 100644
--- a/netlib/strutils.py
+++ b/netlib/strutils.py
@@ -98,6 +98,9 @@ def bytes_to_escaped_str(data, keep_spacing=False):
def escaped_str_to_bytes(data):
"""
Take an escaped string and return the unescaped bytes equivalent.
+
+ Raises:
+ ValueError, if the escape sequence is invalid.
"""
if not isinstance(data, six.string_types):
if six.PY2:
diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py
index 26b37230..768e5130 100644
--- a/test/netlib/http/test_url.py
+++ b/test/netlib/http/test_url.py
@@ -1,3 +1,4 @@
+import six
from netlib import tutils
from netlib.http import url
@@ -57,10 +58,49 @@ def test_unparse():
assert url.unparse("https", "foo.com", 443, "") == "https://foo.com"
-def test_urlencode():
+if six.PY2:
+ surrogates = bytes(bytearray(range(256)))
+else:
+ surrogates = bytes(range(256)).decode("utf8", "surrogateescape")
+
+surrogates_quoted = (
+ '%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F'
+ '%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F'
+ '%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-./'
+ '0123456789%3A%3B%3C%3D%3E%3F'
+ '%40ABCDEFGHIJKLMNO'
+ 'PQRSTUVWXYZ%5B%5C%5D%5E_'
+ '%60abcdefghijklmno'
+ 'pqrstuvwxyz%7B%7C%7D%7E%7F'
+ '%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F'
+ '%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F'
+ '%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF'
+ '%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF'
+ '%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF'
+ '%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF'
+ '%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF'
+ '%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF'
+)
+
+
+def test_encode():
assert url.encode([('foo', 'bar')])
+ assert url.encode([('foo', surrogates)])
-def test_urldecode():
+def test_decode():
s = "one=two&three=four"
assert len(url.decode(s)) == 2
+ assert url.decode(surrogates)
+
+
+def test_quote():
+ assert url.quote("foo") == "foo"
+ assert url.quote("foo bar") == "foo%20bar"
+ assert url.quote(surrogates) == surrogates_quoted
+
+
+def test_unquote():
+ assert url.unquote("foo") == "foo"
+ assert url.unquote("foo%20bar") == "foo bar"
+ assert url.unquote(surrogates_quoted) == surrogates