12 files changed, 186 insertions, 182 deletions
diff --git a/mitmproxy/cmdline.py b/mitmproxy/cmdline.py
index 8476f6f3..d8bbe448 100644
--- a/mitmproxy/cmdline.py
+++ b/mitmproxy/cmdline.py
@@ -6,7 +6,7 @@ import base64
 import configargparse
 
 from netlib.tcp import Address, sslversion_choices
-import netlib.utils
+import netlib.http.url
 from . import filt, utils, version
 from .proxy import config
 
@@ -105,7 +105,7 @@ def parse_setheader(s):
 
 def parse_server_spec(url):
     try:
-        p = netlib.utils.parse_url(url)
+        p = netlib.http.url.parse_url(url)
         if p[0] not in ("http", "https"):
             raise ValueError()
     except ValueError:
diff --git a/mitmproxy/console/flowlist.py b/mitmproxy/console/flowlist.py
index 78b30231..66d92fe1 100644
--- a/mitmproxy/console/flowlist.py
+++ b/mitmproxy/console/flowlist.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import
 import urwid
 
-import netlib.utils
+import netlib.http.url
 
 from . import common, signals
 
@@ -343,7 +343,7 @@ class FlowListBox(urwid.ListBox):
         )
 
     def new_request(self, url, method):
-        parts = netlib.utils.parse_url(str(url))
+        parts = netlib.http.url.parse_url(str(url))
         if not parts:
             signals.status_message.send(message="Invalid Url")
             return
diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py
index 1b0f389f..5c562f95 100644
--- a/mitmproxy/contentviews.py
+++ b/mitmproxy/contentviews.py
@@ -27,7 +27,8 @@ import html2text
 import six
 from netlib.odict import ODict
 from netlib import encoding
-from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type
+from netlib.http import url
+from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type
 from . import utils
 from .exceptions import ContentViewException
 from .contrib import jsbeautifier
@@ -257,7 +258,7 @@ class ViewURLEncoded(View):
     content_types = ["application/x-www-form-urlencoded"]
 
     def __call__(self, data, **metadata):
-        d = urldecode(data)
+        d = url.urldecode(data)
         return "URLEncoded form", format_dict(ODict(d))
 
 
diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py
index 24460ec9..f4a6cf9d 100644
--- a/mitmproxy/protocol/http2.py
+++ b/mitmproxy/protocol/http2.py
@@ -14,8 +14,8 @@ from hyperframe.frame import PriorityFrame
 from netlib.tcp import ssl_read_select
 from netlib.exceptions import HttpException
 from netlib.http import Headers
-from netlib.utils import parse_url
 from netlib.http.http2 import frame
+import netlib.http.url
 
 from .base import Layer
 from .http import _HttpTransmissionLayer, HttpLayer
@@ -320,7 +320,7 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread):
         else:  # pragma: no cover
             first_line_format = "absolute"
             # FIXME: verify if path or :host contains what we need
-            scheme, host, port, _ = parse_url(path)
+            scheme, host, port, _ = netlib.http.url.parse_url(path)
 
         if authority:
             host, _, port = authority.partition(':')
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
index d30976bd..f776d0b5 100644
--- a/netlib/http/http1/read.py
+++ b/netlib/http/http1/read.py
@@ -6,6 +6,7 @@ import re
 from ... import utils
 from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect
 from .. import Request, Response, Headers
+from .. import url
 
 
 def read_request(rfile, body_size_limit=None):
@@ -240,7 +241,7 @@ def _read_request_line(rfile):
             scheme, path = None, None
         else:
             form = "absolute"
-            scheme, host, port, path = utils.parse_url(path)
+            scheme, host, port, path = url.parse_url(path)
 
         _check_http_version(http_version)
     except ValueError:
diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py
index 03f1804b..4c15ee07 100644
--- a/netlib/http/http2/connections.py
+++ b/netlib/http/http2/connections.py
@@ -6,7 +6,7 @@ import hyperframe.frame
 
 from hpack.hpack import Encoder, Decoder
 from ... import utils
-from .. import Headers, Response, Request
+from .. import Headers, Response, Request, url
 from . import frame
 
 
@@ -118,7 +118,7 @@ class HTTP2Protocol(object):
         else:
             first_line_format = "absolute"
             # FIXME: verify if path or :host contains what we need
-            scheme, host, port, _ = utils.parse_url(path)
+            scheme, host, port, _ = url.parse_url(path)
             scheme = scheme.decode('ascii')
             host = host.decode('ascii')
 
diff --git a/netlib/http/request.py b/netlib/http/request.py
index 80a9ae65..170066f7 100644
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@@ -6,6 +6,7 @@ import six
 from six.moves import urllib
 
 from netlib import utils
+import netlib.http.url
 from . import cookies
 from .. import encoding
 from ..multidict import MultiDictView
@@ -179,11 +180,11 @@ class Request(Message):
         """
         if self.first_line_format == "authority":
             return "%s:%d" % (self.host, self.port)
-        return utils.unparse_url(self.scheme, self.host, self.port, self.path)
+        return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path)
 
     @url.setter
     def url(self, url):
-        self.scheme, self.host, self.port, self.path = utils.parse_url(url)
+        self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url)
 
     def _parse_host_header(self):
         """Extract the host and port from Host header"""
@@ -219,7 +220,7 @@ class Request(Message):
         """
         if self.first_line_format == "authority":
             return "%s:%d" % (self.pretty_host, self.port)
-        return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
+        return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
 
     @property
     def query(self):
@@ -234,12 +235,12 @@ class Request(Message):
 
     def _get_query(self):
         _, _, _, _, query, _ = urllib.parse.urlparse(self.url)
-        return tuple(utils.urldecode(query))
+        return tuple(netlib.http.url.urldecode(query))
 
     def _set_query(self, value):
-        query = utils.urlencode(value)
+        query = netlib.http.url.urlencode(value)
         scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
-        _, _, _, self.path = utils.parse_url(
+        _, _, _, self.path = netlib.http.url.parse_url(
             urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
 
     @query.setter
@@ -287,7 +288,7 @@ class Request(Message):
         components = map(lambda x: urllib.parse.quote(x, safe=""), components)
         path = "/" + "/".join(components)
         scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
-        _, _, _, self.path = utils.parse_url(
+        _, _, _, self.path = netlib.http.url.parse_url(
             urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
 
     def anticache(self):
@@ -339,7 +340,7 @@ class Request(Message):
     def _get_urlencoded_form(self):
         is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
         if is_valid_content_type:
-            return tuple(utils.urldecode(self.content))
+            return tuple(netlib.http.url.urldecode(self.content))
         return ()
 
     def _set_urlencoded_form(self, value):
@@ -348,7 +349,7 @@ class Request(Message):
         This will overwrite the existing content if there is one.
         """
         self.headers["content-type"] = "application/x-www-form-urlencoded"
-        self.content = utils.urlencode(value)
+        self.content = netlib.http.url.urlencode(value)
 
     @urlencoded_form.setter
     def urlencoded_form(self, value):
diff --git a/netlib/http/url.py b/netlib/http/url.py
new file mode 100644
index 00000000..3961998b
--- /dev/null
+++ b/netlib/http/url.py
@@ -0,0 +1,95 @@
+import six
+from six.moves import urllib
+
+from .. import utils
+
+# PY2 workaround
+def decode_parse_result(result, enc):
+    if hasattr(result, "decode"):
+        return result.decode(enc)
+    else:
+        return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
+
+
+# PY2 workaround
+def encode_parse_result(result, enc):
+    if hasattr(result, "encode"):
+        return result.encode(enc)
+    else:
+        return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
+
+
+def parse_url(url):
+    """
+        URL-parsing function that checks that
+            - port is an integer 0-65535
+            - host is a valid IDNA-encoded hostname with no null-bytes
+            - path is valid ASCII
+
+        Args:
+            A URL (as bytes or as unicode)
+
+        Returns:
+            A (scheme, host, port, path) tuple
+
+        Raises:
+            ValueError, if the URL is not properly formatted.
+    """
+    parsed = urllib.parse.urlparse(url)
+
+    if not parsed.hostname:
+        raise ValueError("No hostname given")
+
+    if isinstance(url, six.binary_type):
+        host = parsed.hostname
+
+        # this should not raise a ValueError,
+        # but we try to be very forgiving here and accept just everything.
+        # decode_parse_result(parsed, "ascii")
+    else:
+        host = parsed.hostname.encode("idna")
+        parsed = encode_parse_result(parsed, "ascii")
+
+    port = parsed.port
+    if not port:
+        port = 443 if parsed.scheme == b"https" else 80
+
+    full_path = urllib.parse.urlunparse(
+        (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
+    )
+    if not full_path.startswith(b"/"):
+        full_path = b"/" + full_path
+
+    if not utils.is_valid_host(host):
+        raise ValueError("Invalid Host")
+    if not utils.is_valid_port(port):
+        raise ValueError("Invalid Port")
+
+    return parsed.scheme, host, port, full_path
+
+
+def unparse_url(scheme, host, port, path=""):
+    """
+    Returns a URL string, constructed from the specified components.
+
+    Args:
+        All args must be str.
+    """
+    if path == "*":
+        path = ""
+    return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path)
+
+
+def urlencode(s):
+    """
+        Takes a list of (key, value) tuples and returns a urlencoded string.
+    """
+    s = [tuple(i) for i in s]
+    return urllib.parse.urlencode(s, False)
+
+
+def urldecode(s):
+    """
+        Takes a urlencoded string and returns a list of (key, value) tuples.
+    """
+    return urllib.parse.parse_qsl(s, keep_blank_values=True)
diff --git a/netlib/utils.py b/netlib/utils.py
index 770ad6a6..cd8aa55a 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -8,9 +8,6 @@ import inspect
 
 import six
 
-from six.moves import urllib
-
-
 def always_bytes(unicode_or_bytes, *encode_args):
     if isinstance(unicode_or_bytes, six.text_type):
         return unicode_or_bytes.encode(*encode_args)
@@ -188,71 +185,6 @@ def is_valid_port(port):
     return 0 <= port <= 65535
 
 
-# PY2 workaround
-def decode_parse_result(result, enc):
-    if hasattr(result, "decode"):
-        return result.decode(enc)
-    else:
-        return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
-
-
-# PY2 workaround
-def encode_parse_result(result, enc):
-    if hasattr(result, "encode"):
-        return result.encode(enc)
-    else:
-        return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
-
-
-def parse_url(url):
-    """
-        URL-parsing function that checks that
-            - port is an integer 0-65535
-            - host is a valid IDNA-encoded hostname with no null-bytes
-            - path is valid ASCII
-
-        Args:
-            A URL (as bytes or as unicode)
-
-        Returns:
-            A (scheme, host, port, path) tuple
-
-        Raises:
-            ValueError, if the URL is not properly formatted.
-    """
-    parsed = urllib.parse.urlparse(url)
-
-    if not parsed.hostname:
-        raise ValueError("No hostname given")
-
-    if isinstance(url, six.binary_type):
-        host = parsed.hostname
-
-        # this should not raise a ValueError,
-        # but we try to be very forgiving here and accept just everything.
-        # decode_parse_result(parsed, "ascii")
-    else:
-        host = parsed.hostname.encode("idna")
-        parsed = encode_parse_result(parsed, "ascii")
-
-    port = parsed.port
-    if not port:
-        port = 443 if parsed.scheme == b"https" else 80
-
-    full_path = urllib.parse.urlunparse(
-        (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
-    )
-    if not full_path.startswith(b"/"):
-        full_path = b"/" + full_path
-
-    if not is_valid_host(host):
-        raise ValueError("Invalid Host")
-    if not is_valid_port(port):
-        raise ValueError("Invalid Port")
-
-    return parsed.scheme, host, port, full_path
-
-
 def get_header_tokens(headers, key):
     """
         Retrieve all tokens for a header key. A number of different headers
@@ -278,33 +210,6 @@ def hostport(scheme, host, port):
             return "%s:%d" % (host, port)
 
 
-def unparse_url(scheme, host, port, path=""):
-    """
-    Returns a URL string, constructed from the specified components.
-
-    Args:
-        All args must be str.
-    """
-    if path == "*":
-        path = ""
-    return "%s://%s%s" % (scheme, hostport(scheme, host, port), path)
-
-
-def urlencode(s):
-    """
-        Takes a list of (key, value) tuples and returns a urlencoded string.
-    """
-    s = [tuple(i) for i in s]
-    return urllib.parse.urlencode(s, False)
-
-
-def urldecode(s):
-    """
-        Takes a urlencoded string and returns a list of (key, value) tuples.
-    """
-    return urllib.parse.parse_qsl(s, keep_blank_values=True)
-
-
 def parse_content_type(c):
     """
         A simple parser for content-type values. Returns a (type, subtype,
diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py
index c00afa5f..57e5ae99 100644
--- a/test/mitmproxy/test_contentview.py
+++ b/test/mitmproxy/test_contentview.py
@@ -1,8 +1,8 @@
 from mitmproxy.exceptions import ContentViewException
 from netlib.http import Headers
 from netlib.odict import ODict
-import netlib.utils
 from netlib import encoding
+from netlib.http import url
 
 import mitmproxy.contentviews as cv
 from . import tutils
@@ -60,10 +60,10 @@ class TestContentView:
         assert f[0] == "Query"
 
     def test_view_urlencoded(self):
-        d = netlib.utils.urlencode([("one", "two"), ("three", "four")])
+        d = url.urlencode([("one", "two"), ("three", "four")])
         v = cv.ViewURLEncoded()
         assert v(d)
-        d = netlib.utils.urlencode([("adsfa", "")])
+        d = url.urlencode([("adsfa", "")])
         v = cv.ViewURLEncoded()
         assert v(d)
 
diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py
new file mode 100644
index 00000000..d777a949
--- /dev/null
+++ b/test/netlib/http/test_url.py
@@ -0,0 +1,65 @@
+from netlib import tutils
+from netlib.http import url
+
+def test_parse_url():
+    with tutils.raises(ValueError):
+        url.parse_url("")
+
+    s, h, po, pa = url.parse_url(b"http://foo.com:8888/test")
+    assert s == b"http"
+    assert h == b"foo.com"
+    assert po == 8888
+    assert pa == b"/test"
+
+    s, h, po, pa = url.parse_url("http://foo/bar")
+    assert s == b"http"
+    assert h == b"foo"
+    assert po == 80
+    assert pa == b"/bar"
+
+    s, h, po, pa = url.parse_url(b"http://user:pass@foo/bar")
+    assert s == b"http"
+    assert h == b"foo"
+    assert po == 80
+    assert pa == b"/bar"
+
+    s, h, po, pa = url.parse_url(b"http://foo")
+    assert pa == b"/"
+
+    s, h, po, pa = url.parse_url(b"https://foo")
+    assert po == 443
+
+    with tutils.raises(ValueError):
+        url.parse_url(b"https://foo:bar")
+
+    # Invalid IDNA
+    with tutils.raises(ValueError):
+        url.parse_url("http://\xfafoo")
+    # Invalid PATH
+    with tutils.raises(ValueError):
+        url.parse_url("http:/\xc6/localhost:56121")
+    # Null byte in host
+    with tutils.raises(ValueError):
+        url.parse_url("http://foo\0")
+    # Port out of range
+    _, _, port, _ = url.parse_url("http://foo:999999")
+    assert port == 80
+    # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
+    with tutils.raises(ValueError):
+        url.parse_url('http://lo[calhost')
+
+
+def test_unparse_url():
+    assert url.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
+    assert url.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
+    assert url.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
+    assert url.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
+
+
+def test_urlencode():
+    assert url.urlencode([('foo', 'bar')])
+
+
+def test_urldecode():
+    s = "one=two&three=four"
+    assert len(url.urldecode(s)) == 2
diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py
index cd629d77..f9315667 100644
--- a/test/netlib/test_utils.py
+++ b/test/netlib/test_utils.py
@@ -38,70 +38,6 @@ def test_pretty_size():
     assert utils.pretty_size(1024 * 1024) == "1MB"
 
 
-def test_parse_url():
-    with tutils.raises(ValueError):
-        utils.parse_url("")
-
-    s, h, po, pa = utils.parse_url(b"http://foo.com:8888/test")
-    assert s == b"http"
-    assert h == b"foo.com"
-    assert po == 8888
-    assert pa == b"/test"
-
-    s, h, po, pa = utils.parse_url("http://foo/bar")
-    assert s == b"http"
-    assert h == b"foo"
-    assert po == 80
-    assert pa == b"/bar"
-
-    s, h, po, pa = utils.parse_url(b"http://user:pass@foo/bar")
-    assert s == b"http"
-    assert h == b"foo"
-    assert po == 80
-    assert pa == b"/bar"
-
-    s, h, po, pa = utils.parse_url(b"http://foo")
-    assert pa == b"/"
-
-    s, h, po, pa = utils.parse_url(b"https://foo")
-    assert po == 443
-
-    with tutils.raises(ValueError):
-        utils.parse_url(b"https://foo:bar")
-
-    # Invalid IDNA
-    with tutils.raises(ValueError):
-        utils.parse_url("http://\xfafoo")
-    # Invalid PATH
-    with tutils.raises(ValueError):
-        utils.parse_url("http:/\xc6/localhost:56121")
-    # Null byte in host
-    with tutils.raises(ValueError):
-        utils.parse_url("http://foo\0")
-    # Port out of range
-    _, _, port, _ = utils.parse_url("http://foo:999999")
-    assert port == 80
-    # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
-    with tutils.raises(ValueError):
-        utils.parse_url('http://lo[calhost')
-
-
-def test_unparse_url():
-    assert utils.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
-    assert utils.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
-    assert utils.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
-    assert utils.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
-
-
-def test_urlencode():
-    assert utils.urlencode([('foo', 'bar')])
-
-
-def test_urldecode():
-    s = "one=two&three=four"
-    assert len(utils.urldecode(s)) == 2
-
-
 def test_get_header_tokens():
     headers = Headers()
     assert utils.get_header_tokens(headers, "foo") == []