diff options
author | Aldo Cortesi <aldo@corte.si> | 2016-05-31 21:03:42 +1200 |
---|---|---|
committer | Aldo Cortesi <aldo@corte.si> | 2016-05-31 21:03:42 +1200 |
commit | a7abf8b731658b4e7ed8705f7a94a6a62f08d51d (patch) | |
tree | cb86e2e483530f5e1e8b0c5d60839de21fcf7390 /netlib/http/url.py | |
parent | 2f526393d29b6a03e43d1f6240175b4dfb13dc7d (diff) | |
parent | 4da125b6a098cc0fd8b1fd2878584beb5df75c6c (diff) | |
download | mitmproxy-a7abf8b731658b4e7ed8705f7a94a6a62f08d51d.tar.gz mitmproxy-a7abf8b731658b4e7ed8705f7a94a6a62f08d51d.tar.bz2 mitmproxy-a7abf8b731658b4e7ed8705f7a94a6a62f08d51d.zip |
Merge pull request #1179 from cortesi/reorg
Start reorganising */utils.py
Diffstat (limited to 'netlib/http/url.py')
-rw-r--r-- | netlib/http/url.py | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/netlib/http/url.py b/netlib/http/url.py new file mode 100644 index 00000000..8ce28578 --- /dev/null +++ b/netlib/http/url.py @@ -0,0 +1,96 @@ +import six +from six.moves import urllib + +from .. import utils + + +# PY2 workaround +def decode_parse_result(result, enc): + if hasattr(result, "decode"): + return result.decode(enc) + else: + return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) + + +# PY2 workaround +def encode_parse_result(result, enc): + if hasattr(result, "encode"): + return result.encode(enc) + else: + return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) + + +def parse(url): + """ + URL-parsing function that checks that + - port is an integer 0-65535 + - host is a valid IDNA-encoded hostname with no null-bytes + - path is valid ASCII + + Args: + A URL (as bytes or as unicode) + + Returns: + A (scheme, host, port, path) tuple + + Raises: + ValueError, if the URL is not properly formatted. + """ + parsed = urllib.parse.urlparse(url) + + if not parsed.hostname: + raise ValueError("No hostname given") + + if isinstance(url, six.binary_type): + host = parsed.hostname + + # this should not raise a ValueError, + # but we try to be very forgiving here and accept just everything. + # decode_parse_result(parsed, "ascii") + else: + host = parsed.hostname.encode("idna") + parsed = encode_parse_result(parsed, "ascii") + + port = parsed.port + if not port: + port = 443 if parsed.scheme == b"https" else 80 + + full_path = urllib.parse.urlunparse( + (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) + ) + if not full_path.startswith(b"/"): + full_path = b"/" + full_path + + if not utils.is_valid_host(host): + raise ValueError("Invalid Host") + if not utils.is_valid_port(port): + raise ValueError("Invalid Port") + + return parsed.scheme, host, port, full_path + + +def unparse(scheme, host, port, path=""): + """ + Returns a URL string, constructed from the specified components. + + Args: + All args must be str. + """ + if path == "*": + path = "" + return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path) + + +def encode(s): + """ + Takes a list of (key, value) tuples and returns a urlencoded string. + """ + s = [tuple(i) for i in s] + return urllib.parse.urlencode(s, False) + + +def decode(s): + """ + Takes a urlencoded string and returns a list of (key, value) tuples. + """ + return urllib.parse.parse_qsl(s, keep_blank_values=True) |