aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mitmproxy/cmdline.py4
-rw-r--r--mitmproxy/console/flowlist.py4
-rw-r--r--mitmproxy/contentviews.py5
-rw-r--r--mitmproxy/protocol/http2.py4
-rw-r--r--netlib/http/http1/read.py3
-rw-r--r--netlib/http/http2/connections.py4
-rw-r--r--netlib/http/request.py19
-rw-r--r--netlib/http/url.py95
-rw-r--r--netlib/utils.py95
-rw-r--r--test/mitmproxy/test_contentview.py6
-rw-r--r--test/netlib/http/test_url.py65
-rw-r--r--test/netlib/test_utils.py64
12 files changed, 186 insertions, 182 deletions
diff --git a/mitmproxy/cmdline.py b/mitmproxy/cmdline.py
index 8476f6f3..d8bbe448 100644
--- a/mitmproxy/cmdline.py
+++ b/mitmproxy/cmdline.py
@@ -6,7 +6,7 @@ import base64
import configargparse
from netlib.tcp import Address, sslversion_choices
-import netlib.utils
+import netlib.http.url
from . import filt, utils, version
from .proxy import config
@@ -105,7 +105,7 @@ def parse_setheader(s):
def parse_server_spec(url):
try:
- p = netlib.utils.parse_url(url)
+ p = netlib.http.url.parse_url(url)
if p[0] not in ("http", "https"):
raise ValueError()
except ValueError:
diff --git a/mitmproxy/console/flowlist.py b/mitmproxy/console/flowlist.py
index 78b30231..66d92fe1 100644
--- a/mitmproxy/console/flowlist.py
+++ b/mitmproxy/console/flowlist.py
@@ -1,7 +1,7 @@
from __future__ import absolute_import
import urwid
-import netlib.utils
+import netlib.http.url
from . import common, signals
@@ -343,7 +343,7 @@ class FlowListBox(urwid.ListBox):
)
def new_request(self, url, method):
- parts = netlib.utils.parse_url(str(url))
+ parts = netlib.http.url.parse_url(str(url))
if not parts:
signals.status_message.send(message="Invalid Url")
return
diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py
index 1b0f389f..5c562f95 100644
--- a/mitmproxy/contentviews.py
+++ b/mitmproxy/contentviews.py
@@ -27,7 +27,8 @@ import html2text
import six
from netlib.odict import ODict
from netlib import encoding
-from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type
+from netlib.http import url
+from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type
from . import utils
from .exceptions import ContentViewException
from .contrib import jsbeautifier
@@ -257,7 +258,7 @@ class ViewURLEncoded(View):
content_types = ["application/x-www-form-urlencoded"]
def __call__(self, data, **metadata):
- d = urldecode(data)
+ d = url.urldecode(data)
return "URLEncoded form", format_dict(ODict(d))
diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py
index 24460ec9..f4a6cf9d 100644
--- a/mitmproxy/protocol/http2.py
+++ b/mitmproxy/protocol/http2.py
@@ -14,8 +14,8 @@ from hyperframe.frame import PriorityFrame
from netlib.tcp import ssl_read_select
from netlib.exceptions import HttpException
from netlib.http import Headers
-from netlib.utils import parse_url
from netlib.http.http2 import frame
+import netlib.http.url
from .base import Layer
from .http import _HttpTransmissionLayer, HttpLayer
@@ -320,7 +320,7 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread):
else: # pragma: no cover
first_line_format = "absolute"
# FIXME: verify if path or :host contains what we need
- scheme, host, port, _ = parse_url(path)
+ scheme, host, port, _ = netlib.http.url.parse_url(path)
if authority:
host, _, port = authority.partition(':')
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
index d30976bd..f776d0b5 100644
--- a/netlib/http/http1/read.py
+++ b/netlib/http/http1/read.py
@@ -6,6 +6,7 @@ import re
from ... import utils
from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect
from .. import Request, Response, Headers
+from .. import url
def read_request(rfile, body_size_limit=None):
@@ -240,7 +241,7 @@ def _read_request_line(rfile):
scheme, path = None, None
else:
form = "absolute"
- scheme, host, port, path = utils.parse_url(path)
+ scheme, host, port, path = url.parse_url(path)
_check_http_version(http_version)
except ValueError:
diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py
index 03f1804b..4c15ee07 100644
--- a/netlib/http/http2/connections.py
+++ b/netlib/http/http2/connections.py
@@ -6,7 +6,7 @@ import hyperframe.frame
from hpack.hpack import Encoder, Decoder
from ... import utils
-from .. import Headers, Response, Request
+from .. import Headers, Response, Request, url
from . import frame
@@ -118,7 +118,7 @@ class HTTP2Protocol(object):
else:
first_line_format = "absolute"
# FIXME: verify if path or :host contains what we need
- scheme, host, port, _ = utils.parse_url(path)
+ scheme, host, port, _ = url.parse_url(path)
scheme = scheme.decode('ascii')
host = host.decode('ascii')
diff --git a/netlib/http/request.py b/netlib/http/request.py
index 80a9ae65..170066f7 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -6,6 +6,7 @@ import six
from six.moves import urllib
from netlib import utils
+import netlib.http.url
from . import cookies
from .. import encoding
from ..multidict import MultiDictView
@@ -179,11 +180,11 @@ class Request(Message):
"""
if self.first_line_format == "authority":
return "%s:%d" % (self.host, self.port)
- return utils.unparse_url(self.scheme, self.host, self.port, self.path)
+ return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path)
@url.setter
def url(self, url):
- self.scheme, self.host, self.port, self.path = utils.parse_url(url)
+ self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url)
def _parse_host_header(self):
"""Extract the host and port from Host header"""
@@ -219,7 +220,7 @@ class Request(Message):
"""
if self.first_line_format == "authority":
return "%s:%d" % (self.pretty_host, self.port)
- return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
+ return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
@property
def query(self):
@@ -234,12 +235,12 @@ class Request(Message):
def _get_query(self):
_, _, _, _, query, _ = urllib.parse.urlparse(self.url)
- return tuple(utils.urldecode(query))
+ return tuple(netlib.http.url.urldecode(query))
def _set_query(self, value):
- query = utils.urlencode(value)
+ query = netlib.http.url.urlencode(value)
scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = utils.parse_url(
+ _, _, _, self.path = netlib.http.url.parse_url(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
@query.setter
@@ -287,7 +288,7 @@ class Request(Message):
components = map(lambda x: urllib.parse.quote(x, safe=""), components)
path = "/" + "/".join(components)
scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
- _, _, _, self.path = utils.parse_url(
+ _, _, _, self.path = netlib.http.url.parse_url(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
def anticache(self):
@@ -339,7 +340,7 @@ class Request(Message):
def _get_urlencoded_form(self):
is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
- return tuple(utils.urldecode(self.content))
+ return tuple(netlib.http.url.urldecode(self.content))
return ()
def _set_urlencoded_form(self, value):
@@ -348,7 +349,7 @@ class Request(Message):
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
- self.content = utils.urlencode(value)
+ self.content = netlib.http.url.urlencode(value)
@urlencoded_form.setter
def urlencoded_form(self, value):
diff --git a/netlib/http/url.py b/netlib/http/url.py
new file mode 100644
index 00000000..3961998b
--- /dev/null
+++ b/netlib/http/url.py
@@ -0,0 +1,95 @@
+import six
+from six.moves import urllib
+
+from .. import utils
+
+# PY2 workaround
+def decode_parse_result(result, enc):
+ if hasattr(result, "decode"):
+ return result.decode(enc)
+ else:
+ return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
+
+
+# PY2 workaround
+def encode_parse_result(result, enc):
+ if hasattr(result, "encode"):
+ return result.encode(enc)
+ else:
+ return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
+
+
+def parse_url(url):
+ """
+ URL-parsing function that checks that
+ - port is an integer 0-65535
+ - host is a valid IDNA-encoded hostname with no null-bytes
+ - path is valid ASCII
+
+ Args:
+ A URL (as bytes or as unicode)
+
+ Returns:
+ A (scheme, host, port, path) tuple
+
+ Raises:
+ ValueError, if the URL is not properly formatted.
+ """
+ parsed = urllib.parse.urlparse(url)
+
+ if not parsed.hostname:
+ raise ValueError("No hostname given")
+
+ if isinstance(url, six.binary_type):
+ host = parsed.hostname
+
+ # this should not raise a ValueError,
+ # but we try to be very forgiving here and accept just everything.
+ # decode_parse_result(parsed, "ascii")
+ else:
+ host = parsed.hostname.encode("idna")
+ parsed = encode_parse_result(parsed, "ascii")
+
+ port = parsed.port
+ if not port:
+ port = 443 if parsed.scheme == b"https" else 80
+
+ full_path = urllib.parse.urlunparse(
+ (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
+ )
+ if not full_path.startswith(b"/"):
+ full_path = b"/" + full_path
+
+ if not utils.is_valid_host(host):
+ raise ValueError("Invalid Host")
+ if not utils.is_valid_port(port):
+ raise ValueError("Invalid Port")
+
+ return parsed.scheme, host, port, full_path
+
+
+def unparse_url(scheme, host, port, path=""):
+ """
+ Returns a URL string, constructed from the specified components.
+
+ Args:
+ All args must be str.
+ """
+ if path == "*":
+ path = ""
+ return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path)
+
+
+def urlencode(s):
+ """
+ Takes a list of (key, value) tuples and returns a urlencoded string.
+ """
+ s = [tuple(i) for i in s]
+ return urllib.parse.urlencode(s, False)
+
+
+def urldecode(s):
+ """
+ Takes a urlencoded string and returns a list of (key, value) tuples.
+ """
+ return urllib.parse.parse_qsl(s, keep_blank_values=True)
diff --git a/netlib/utils.py b/netlib/utils.py
index 770ad6a6..cd8aa55a 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -8,9 +8,6 @@ import inspect
import six
-from six.moves import urllib
-
-
def always_bytes(unicode_or_bytes, *encode_args):
if isinstance(unicode_or_bytes, six.text_type):
return unicode_or_bytes.encode(*encode_args)
@@ -188,71 +185,6 @@ def is_valid_port(port):
return 0 <= port <= 65535
-# PY2 workaround
-def decode_parse_result(result, enc):
- if hasattr(result, "decode"):
- return result.decode(enc)
- else:
- return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
-
-
-# PY2 workaround
-def encode_parse_result(result, enc):
- if hasattr(result, "encode"):
- return result.encode(enc)
- else:
- return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
-
-
-def parse_url(url):
- """
- URL-parsing function that checks that
- - port is an integer 0-65535
- - host is a valid IDNA-encoded hostname with no null-bytes
- - path is valid ASCII
-
- Args:
- A URL (as bytes or as unicode)
-
- Returns:
- A (scheme, host, port, path) tuple
-
- Raises:
- ValueError, if the URL is not properly formatted.
- """
- parsed = urllib.parse.urlparse(url)
-
- if not parsed.hostname:
- raise ValueError("No hostname given")
-
- if isinstance(url, six.binary_type):
- host = parsed.hostname
-
- # this should not raise a ValueError,
- # but we try to be very forgiving here and accept just everything.
- # decode_parse_result(parsed, "ascii")
- else:
- host = parsed.hostname.encode("idna")
- parsed = encode_parse_result(parsed, "ascii")
-
- port = parsed.port
- if not port:
- port = 443 if parsed.scheme == b"https" else 80
-
- full_path = urllib.parse.urlunparse(
- (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
- )
- if not full_path.startswith(b"/"):
- full_path = b"/" + full_path
-
- if not is_valid_host(host):
- raise ValueError("Invalid Host")
- if not is_valid_port(port):
- raise ValueError("Invalid Port")
-
- return parsed.scheme, host, port, full_path
-
-
def get_header_tokens(headers, key):
"""
Retrieve all tokens for a header key. A number of different headers
@@ -278,33 +210,6 @@ def hostport(scheme, host, port):
return "%s:%d" % (host, port)
-def unparse_url(scheme, host, port, path=""):
- """
- Returns a URL string, constructed from the specified components.
-
- Args:
- All args must be str.
- """
- if path == "*":
- path = ""
- return "%s://%s%s" % (scheme, hostport(scheme, host, port), path)
-
-
-def urlencode(s):
- """
- Takes a list of (key, value) tuples and returns a urlencoded string.
- """
- s = [tuple(i) for i in s]
- return urllib.parse.urlencode(s, False)
-
-
-def urldecode(s):
- """
- Takes a urlencoded string and returns a list of (key, value) tuples.
- """
- return urllib.parse.parse_qsl(s, keep_blank_values=True)
-
-
def parse_content_type(c):
"""
A simple parser for content-type values. Returns a (type, subtype,
diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py
index c00afa5f..57e5ae99 100644
--- a/test/mitmproxy/test_contentview.py
+++ b/test/mitmproxy/test_contentview.py
@@ -1,8 +1,8 @@
from mitmproxy.exceptions import ContentViewException
from netlib.http import Headers
from netlib.odict import ODict
-import netlib.utils
from netlib import encoding
+from netlib.http import url
import mitmproxy.contentviews as cv
from . import tutils
@@ -60,10 +60,10 @@ class TestContentView:
assert f[0] == "Query"
def test_view_urlencoded(self):
- d = netlib.utils.urlencode([("one", "two"), ("three", "four")])
+ d = url.urlencode([("one", "two"), ("three", "four")])
v = cv.ViewURLEncoded()
assert v(d)
- d = netlib.utils.urlencode([("adsfa", "")])
+ d = url.urlencode([("adsfa", "")])
v = cv.ViewURLEncoded()
assert v(d)
diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py
new file mode 100644
index 00000000..d777a949
--- /dev/null
+++ b/test/netlib/http/test_url.py
@@ -0,0 +1,65 @@
+from netlib import tutils
+from netlib.http import url
+
+def test_parse_url():
+ with tutils.raises(ValueError):
+ url.parse_url("")
+
+ s, h, po, pa = url.parse_url(b"http://foo.com:8888/test")
+ assert s == b"http"
+ assert h == b"foo.com"
+ assert po == 8888
+ assert pa == b"/test"
+
+ s, h, po, pa = url.parse_url("http://foo/bar")
+ assert s == b"http"
+ assert h == b"foo"
+ assert po == 80
+ assert pa == b"/bar"
+
+ s, h, po, pa = url.parse_url(b"http://user:pass@foo/bar")
+ assert s == b"http"
+ assert h == b"foo"
+ assert po == 80
+ assert pa == b"/bar"
+
+ s, h, po, pa = url.parse_url(b"http://foo")
+ assert pa == b"/"
+
+ s, h, po, pa = url.parse_url(b"https://foo")
+ assert po == 443
+
+ with tutils.raises(ValueError):
+ url.parse_url(b"https://foo:bar")
+
+ # Invalid IDNA
+ with tutils.raises(ValueError):
+ url.parse_url("http://\xfafoo")
+ # Invalid PATH
+ with tutils.raises(ValueError):
+ url.parse_url("http:/\xc6/localhost:56121")
+ # Null byte in host
+ with tutils.raises(ValueError):
+ url.parse_url("http://foo\0")
+ # Port out of range
+ _, _, port, _ = url.parse_url("http://foo:999999")
+ assert port == 80
+ # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
+ with tutils.raises(ValueError):
+ url.parse_url('http://lo[calhost')
+
+
+def test_unparse_url():
+ assert url.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
+ assert url.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
+ assert url.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
+ assert url.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
+
+
+def test_urlencode():
+ assert url.urlencode([('foo', 'bar')])
+
+
+def test_urldecode():
+ s = "one=two&three=four"
+ assert len(url.urldecode(s)) == 2
diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py
index cd629d77..f9315667 100644
--- a/test/netlib/test_utils.py
+++ b/test/netlib/test_utils.py
@@ -38,70 +38,6 @@ def test_pretty_size():
assert utils.pretty_size(1024 * 1024) == "1MB"
-def test_parse_url():
- with tutils.raises(ValueError):
- utils.parse_url("")
-
- s, h, po, pa = utils.parse_url(b"http://foo.com:8888/test")
- assert s == b"http"
- assert h == b"foo.com"
- assert po == 8888
- assert pa == b"/test"
-
- s, h, po, pa = utils.parse_url("http://foo/bar")
- assert s == b"http"
- assert h == b"foo"
- assert po == 80
- assert pa == b"/bar"
-
- s, h, po, pa = utils.parse_url(b"http://user:pass@foo/bar")
- assert s == b"http"
- assert h == b"foo"
- assert po == 80
- assert pa == b"/bar"
-
- s, h, po, pa = utils.parse_url(b"http://foo")
- assert pa == b"/"
-
- s, h, po, pa = utils.parse_url(b"https://foo")
- assert po == 443
-
- with tutils.raises(ValueError):
- utils.parse_url(b"https://foo:bar")
-
- # Invalid IDNA
- with tutils.raises(ValueError):
- utils.parse_url("http://\xfafoo")
- # Invalid PATH
- with tutils.raises(ValueError):
- utils.parse_url("http:/\xc6/localhost:56121")
- # Null byte in host
- with tutils.raises(ValueError):
- utils.parse_url("http://foo\0")
- # Port out of range
- _, _, port, _ = utils.parse_url("http://foo:999999")
- assert port == 80
- # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
- with tutils.raises(ValueError):
- utils.parse_url('http://lo[calhost')
-
-
-def test_unparse_url():
- assert utils.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
- assert utils.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
- assert utils.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
- assert utils.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
-
-
-def test_urlencode():
- assert utils.urlencode([('foo', 'bar')])
-
-
-def test_urldecode():
- s = "one=two&three=four"
- assert len(utils.urldecode(s)) == 2
-
-
def test_get_header_tokens():
headers = Headers()
assert utils.get_header_tokens(headers, "foo") == []