aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/url.py
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2016-06-01 09:58:01 +1200
committerAldo Cortesi <aldo@nullcube.com>2016-06-01 09:58:01 +1200
commita061e4587772f4a87eb43d84f2ed358f7cc98fbd (patch)
tree1aba05c9d0d6f654fe946897dcb8b5d9127a3de2 /netlib/http/url.py
parent06703542037d1c84b0dcb60c6d1c500a0d189e93 (diff)
parenta7abf8b731658b4e7ed8705f7a94a6a62f08d51d (diff)
downloadmitmproxy-a061e4587772f4a87eb43d84f2ed358f7cc98fbd.tar.gz
mitmproxy-a061e4587772f4a87eb43d84f2ed358f7cc98fbd.tar.bz2
mitmproxy-a061e4587772f4a87eb43d84f2ed358f7cc98fbd.zip
Merge branch 'master' of github.com:cortesi/mitmproxy
Diffstat (limited to 'netlib/http/url.py')
-rw-r--r--netlib/http/url.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/netlib/http/url.py b/netlib/http/url.py
new file mode 100644
index 00000000..8ce28578
--- /dev/null
+++ b/netlib/http/url.py
@@ -0,0 +1,96 @@
+import six
+from six.moves import urllib
+
+from .. import utils
+
+
+# PY2 workaround
+def decode_parse_result(result, enc):
+ if hasattr(result, "decode"):
+ return result.decode(enc)
+ else:
+ return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
+
+
+# PY2 workaround
+def encode_parse_result(result, enc):
+ if hasattr(result, "encode"):
+ return result.encode(enc)
+ else:
+ return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
+
+
+def parse(url):
+ """
+ URL-parsing function that checks that
+ - port is an integer 0-65535
+ - host is a valid IDNA-encoded hostname with no null-bytes
+ - path is valid ASCII
+
+ Args:
+ A URL (as bytes or as unicode)
+
+ Returns:
+ A (scheme, host, port, path) tuple
+
+ Raises:
+ ValueError, if the URL is not properly formatted.
+ """
+ parsed = urllib.parse.urlparse(url)
+
+ if not parsed.hostname:
+ raise ValueError("No hostname given")
+
+ if isinstance(url, six.binary_type):
+ host = parsed.hostname
+
+ # this should not raise a ValueError,
+ # but we try to be very forgiving here and accept just everything.
+ # decode_parse_result(parsed, "ascii")
+ else:
+ host = parsed.hostname.encode("idna")
+ parsed = encode_parse_result(parsed, "ascii")
+
+ port = parsed.port
+ if not port:
+ port = 443 if parsed.scheme == b"https" else 80
+
+ full_path = urllib.parse.urlunparse(
+ (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
+ )
+ if not full_path.startswith(b"/"):
+ full_path = b"/" + full_path
+
+ if not utils.is_valid_host(host):
+ raise ValueError("Invalid Host")
+ if not utils.is_valid_port(port):
+ raise ValueError("Invalid Port")
+
+ return parsed.scheme, host, port, full_path
+
+
+def unparse(scheme, host, port, path=""):
+ """
+ Returns a URL string, constructed from the specified components.
+
+ Args:
+ All args must be str.
+ """
+ if path == "*":
+ path = ""
+ return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path)
+
+
+def encode(s):
+ """
+ Takes a list of (key, value) tuples and returns a urlencoded string.
+ """
+ s = [tuple(i) for i in s]
+ return urllib.parse.urlencode(s, False)
+
+
+def decode(s):
+ """
+ Takes a urlencoded string and returns a list of (key, value) tuples.
+ """
+ return urllib.parse.parse_qsl(s, keep_blank_values=True)