aboutsummaryrefslogtreecommitdiffstats
path: root/mitmproxy/net
diff options
context:
space:
mode:
authorThomas Kriechbaumer <Kriechi@users.noreply.github.com>2019-09-28 11:46:58 +0200
committerGitHub <noreply@github.com>2019-09-28 11:46:58 +0200
commit7d60dde76cbcc115b33c3d763de5995ef5621565 (patch)
tree9ec9510e3b5d77daca7fafeb35a954b75b63bfdb /mitmproxy/net
parent76bd3ef82dd6b8e3d00b9b4dfe56c96a3c22dd47 (diff)
parentba054b15f367d59139ec78fe03dc1c7d8fb099b5 (diff)
downloadmitmproxy-7d60dde76cbcc115b33c3d763de5995ef5621565.tar.gz
mitmproxy-7d60dde76cbcc115b33c3d763de5995ef5621565.tar.bz2
mitmproxy-7d60dde76cbcc115b33c3d763de5995ef5621565.zip
Merge pull request #3464 from rjt-gupta/url-fix
Non ascii characters in url
Diffstat (limited to 'mitmproxy/net')
-rw-r--r--mitmproxy/net/http/url.py21
1 files changed, 15 insertions, 6 deletions
diff --git a/mitmproxy/net/http/url.py b/mitmproxy/net/http/url.py
index f938cb12..d8e14aeb 100644
--- a/mitmproxy/net/http/url.py
+++ b/mitmproxy/net/http/url.py
@@ -21,16 +21,25 @@ def parse(url):
Raises:
ValueError, if the URL is not properly formatted.
"""
- parsed = urllib.parse.urlparse(url)
+ # Size of Ascii character after encoding is 1 byte which is same as its size
+ # But non-Ascii character's size after encoding will be more than its size
+ def ascii_check(l):
+ if len(l) == len(str(l).encode()):
+ return True
+ return False
+
+ if isinstance(url, bytes):
+ url = url.decode()
+ if not ascii_check(url):
+ url = urllib.parse.urlsplit(url)
+ url = list(url)
+ url[3] = urllib.parse.quote(url[3])
+ url = urllib.parse.urlunsplit(url)
+ parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
raise ValueError("No hostname given")
- if isinstance(url, bytes):
- host = parsed.hostname
-
- # this should not raise a ValueError,
- # but we try to be very forgiving here and accept just everything.
else:
host = parsed.hostname.encode("idna")
if isinstance(parsed, urllib.parse.ParseResult):