aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mitmproxy/net/http/url.py21
-rw-r--r--test/mitmproxy/net/http/test_url.py16
2 files changed, 31 insertions, 6 deletions
diff --git a/mitmproxy/net/http/url.py b/mitmproxy/net/http/url.py
index f938cb12..d8e14aeb 100644
--- a/mitmproxy/net/http/url.py
+++ b/mitmproxy/net/http/url.py
@@ -21,16 +21,25 @@ def parse(url):
Raises:
ValueError, if the URL is not properly formatted.
"""
- parsed = urllib.parse.urlparse(url)
+ # Size of Ascii character after encoding is 1 byte which is same as its size
+ # But non-Ascii character's size after encoding will be more than its size
+ def ascii_check(l):
+ if len(l) == len(str(l).encode()):
+ return True
+ return False
+
+ if isinstance(url, bytes):
+ url = url.decode()
+ if not ascii_check(url):
+ url = urllib.parse.urlsplit(url)
+ url = list(url)
+ url[3] = urllib.parse.quote(url[3])
+ url = urllib.parse.urlunsplit(url)
+ parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
raise ValueError("No hostname given")
- if isinstance(url, bytes):
- host = parsed.hostname
-
- # this should not raise a ValueError,
- # but we try to be very forgiving here and accept just everything.
else:
host = parsed.hostname.encode("idna")
if isinstance(parsed, urllib.parse.ParseResult):
diff --git a/test/mitmproxy/net/http/test_url.py b/test/mitmproxy/net/http/test_url.py
index ecf8e896..48277859 100644
--- a/test/mitmproxy/net/http/test_url.py
+++ b/test/mitmproxy/net/http/test_url.py
@@ -49,6 +49,17 @@ def test_parse():
url.parse('http://lo[calhost')
+def test_ascii_check():
+
+ test_url = "https://xyz.tax-edu.net?flag=selectCourse&lc_id=42825&lc_name=茅莽莽猫氓猫氓".encode()
+ scheme, host, port, full_path = url.parse(test_url)
+ assert scheme == b'https'
+ assert host == b'xyz.tax-edu.net'
+ assert port == 443
+ assert full_path == b'/?flag%3DselectCourse%26lc_id%3D42825%26lc_name%3D%E8%8C%85%E8%8E%BD%E8%8E' \
+ b'%BD%E7%8C%AB%E6%B0%93%E7%8C%AB%E6%B0%93'
+
+
@pytest.mark.skipif(sys.version_info < (3, 6), reason='requires Python 3.6 or higher')
def test_parse_port_range():
# Port out of range
@@ -61,6 +72,7 @@ def test_unparse():
assert url.unparse("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
assert url.unparse("https", "foo.com", 80, "") == "https://foo.com:80"
assert url.unparse("https", "foo.com", 443, "") == "https://foo.com"
+ assert url.unparse("https", "foo.com", 443, "*") == "https://foo.com"
# We ignore the byte 126: '~' because of an incompatibility in Python 3.6 and 3.7
@@ -131,3 +143,7 @@ def test_unquote():
assert url.unquote("foo") == "foo"
assert url.unquote("foo%20bar") == "foo bar"
assert url.unquote(surrogates_quoted) == surrogates
+
+
+def test_hostport():
+ assert url.hostport(b"https", b"foo.com", 8080) == b"foo.com:8080"