diff options
author | Aldo Cortesi <aldo@nullcube.com> | 2016-05-31 18:42:56 +1200 |
---|---|---|
committer | Aldo Cortesi <aldo@nullcube.com> | 2016-05-31 18:42:56 +1200 |
commit | 4e6c9c4e935458d23add259dc63c5e0a85fba9c8 (patch) | |
tree | 54421713fc65e0e8e8af3f6428b56d6ef096ff40 | |
parent | 08fbe6f1118455bc44d05db30b83bdf81feda2a0 (diff) | |
download | mitmproxy-4e6c9c4e935458d23add259dc63c5e0a85fba9c8.tar.gz mitmproxy-4e6c9c4e935458d23add259dc63c5e0a85fba9c8.tar.bz2 mitmproxy-4e6c9c4e935458d23add259dc63c5e0a85fba9c8.zip |
Extract url functions from netlib.utils and move to netlib.http.url
-rw-r--r-- | mitmproxy/cmdline.py | 4 | ||||
-rw-r--r-- | mitmproxy/console/flowlist.py | 4 | ||||
-rw-r--r-- | mitmproxy/contentviews.py | 5 | ||||
-rw-r--r-- | mitmproxy/protocol/http2.py | 4 | ||||
-rw-r--r-- | netlib/http/http1/read.py | 3 | ||||
-rw-r--r-- | netlib/http/http2/connections.py | 4 | ||||
-rw-r--r-- | netlib/http/request.py | 19 | ||||
-rw-r--r-- | netlib/http/url.py | 95 | ||||
-rw-r--r-- | netlib/utils.py | 95 | ||||
-rw-r--r-- | test/mitmproxy/test_contentview.py | 6 | ||||
-rw-r--r-- | test/netlib/http/test_url.py | 65 | ||||
-rw-r--r-- | test/netlib/test_utils.py | 64 |
12 files changed, 186 insertions, 182 deletions
diff --git a/mitmproxy/cmdline.py b/mitmproxy/cmdline.py index 8476f6f3..d8bbe448 100644 --- a/mitmproxy/cmdline.py +++ b/mitmproxy/cmdline.py @@ -6,7 +6,7 @@ import base64 import configargparse from netlib.tcp import Address, sslversion_choices -import netlib.utils +import netlib.http.url from . import filt, utils, version from .proxy import config @@ -105,7 +105,7 @@ def parse_setheader(s): def parse_server_spec(url): try: - p = netlib.utils.parse_url(url) + p = netlib.http.url.parse_url(url) if p[0] not in ("http", "https"): raise ValueError() except ValueError: diff --git a/mitmproxy/console/flowlist.py b/mitmproxy/console/flowlist.py index 78b30231..66d92fe1 100644 --- a/mitmproxy/console/flowlist.py +++ b/mitmproxy/console/flowlist.py @@ -1,7 +1,7 @@ from __future__ import absolute_import import urwid -import netlib.utils +import netlib.http.url from . import common, signals @@ -343,7 +343,7 @@ class FlowListBox(urwid.ListBox): ) def new_request(self, url, method): - parts = netlib.utils.parse_url(str(url)) + parts = netlib.http.url.parse_url(str(url)) if not parts: signals.status_message.send(message="Invalid Url") return diff --git a/mitmproxy/contentviews.py b/mitmproxy/contentviews.py index 1b0f389f..5c562f95 100644 --- a/mitmproxy/contentviews.py +++ b/mitmproxy/contentviews.py @@ -27,7 +27,8 @@ import html2text import six from netlib.odict import ODict from netlib import encoding -from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type +from netlib.http import url +from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type from . import utils from .exceptions import ContentViewException from .contrib import jsbeautifier @@ -257,7 +258,7 @@ class ViewURLEncoded(View): content_types = ["application/x-www-form-urlencoded"] def __call__(self, data, **metadata): - d = urldecode(data) + d = url.urldecode(data) return "URLEncoded form", format_dict(ODict(d)) diff --git a/mitmproxy/protocol/http2.py b/mitmproxy/protocol/http2.py index 24460ec9..f4a6cf9d 100644 --- a/mitmproxy/protocol/http2.py +++ b/mitmproxy/protocol/http2.py @@ -14,8 +14,8 @@ from hyperframe.frame import PriorityFrame from netlib.tcp import ssl_read_select from netlib.exceptions import HttpException from netlib.http import Headers -from netlib.utils import parse_url from netlib.http.http2 import frame +import netlib.http.url from .base import Layer from .http import _HttpTransmissionLayer, HttpLayer @@ -320,7 +320,7 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread): else: # pragma: no cover first_line_format = "absolute" # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = parse_url(path) + scheme, host, port, _ = netlib.http.url.parse_url(path) if authority: host, _, port = authority.partition(':') diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py index d30976bd..f776d0b5 100644 --- a/netlib/http/http1/read.py +++ b/netlib/http/http1/read.py @@ -6,6 +6,7 @@ import re from ... import utils from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect from .. import Request, Response, Headers +from .. import url def read_request(rfile, body_size_limit=None): @@ -240,7 +241,7 @@ def _read_request_line(rfile): scheme, path = None, None else: form = "absolute" - scheme, host, port, path = utils.parse_url(path) + scheme, host, port, path = url.parse_url(path) _check_http_version(http_version) except ValueError: diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py index 03f1804b..4c15ee07 100644 --- a/netlib/http/http2/connections.py +++ b/netlib/http/http2/connections.py @@ -6,7 +6,7 @@ import hyperframe.frame from hpack.hpack import Encoder, Decoder from ... import utils -from .. import Headers, Response, Request +from .. import Headers, Response, Request, url from . import frame @@ -118,7 +118,7 @@ class HTTP2Protocol(object): else: first_line_format = "absolute" # FIXME: verify if path or :host contains what we need - scheme, host, port, _ = utils.parse_url(path) + scheme, host, port, _ = url.parse_url(path) scheme = scheme.decode('ascii') host = host.decode('ascii') diff --git a/netlib/http/request.py b/netlib/http/request.py index 80a9ae65..170066f7 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -6,6 +6,7 @@ import six from six.moves import urllib from netlib import utils +import netlib.http.url from . import cookies from .. import encoding from ..multidict import MultiDictView @@ -179,11 +180,11 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.host, self.port) - return utils.unparse_url(self.scheme, self.host, self.port, self.path) + return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path) @url.setter def url(self, url): - self.scheme, self.host, self.port, self.path = utils.parse_url(url) + self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url) def _parse_host_header(self): """Extract the host and port from Host header""" @@ -219,7 +220,7 @@ class Request(Message): """ if self.first_line_format == "authority": return "%s:%d" % (self.pretty_host, self.port) - return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path) + return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path) @property def query(self): @@ -234,12 +235,12 @@ class Request(Message): def _get_query(self): _, _, _, _, query, _ = urllib.parse.urlparse(self.url) - return tuple(utils.urldecode(query)) + return tuple(netlib.http.url.urldecode(query)) def _set_query(self, value): - query = utils.urlencode(value) + query = netlib.http.url.urlencode(value) scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = utils.parse_url( + _, _, _, self.path = netlib.http.url.parse_url( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) @query.setter @@ -287,7 +288,7 @@ class Request(Message): components = map(lambda x: urllib.parse.quote(x, safe=""), components) path = "/" + "/".join(components) scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url) - _, _, _, self.path = utils.parse_url( + _, _, _, self.path = netlib.http.url.parse_url( urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment])) def anticache(self): @@ -339,7 +340,7 @@ class Request(Message): def _get_urlencoded_form(self): is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower() if is_valid_content_type: - return tuple(utils.urldecode(self.content)) + return tuple(netlib.http.url.urldecode(self.content)) return () def _set_urlencoded_form(self, value): @@ -348,7 +349,7 @@ class Request(Message): This will overwrite the existing content if there is one. """ self.headers["content-type"] = "application/x-www-form-urlencoded" - self.content = utils.urlencode(value) + self.content = netlib.http.url.urlencode(value) @urlencoded_form.setter def urlencoded_form(self, value): diff --git a/netlib/http/url.py b/netlib/http/url.py new file mode 100644 index 00000000..3961998b --- /dev/null +++ b/netlib/http/url.py @@ -0,0 +1,95 @@ +import six +from six.moves import urllib + +from .. import utils + +# PY2 workaround +def decode_parse_result(result, enc): + if hasattr(result, "decode"): + return result.decode(enc) + else: + return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) + + +# PY2 workaround +def encode_parse_result(result, enc): + if hasattr(result, "encode"): + return result.encode(enc) + else: + return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) + + +def parse_url(url): + """ + URL-parsing function that checks that + - port is an integer 0-65535 + - host is a valid IDNA-encoded hostname with no null-bytes + - path is valid ASCII + + Args: + A URL (as bytes or as unicode) + + Returns: + A (scheme, host, port, path) tuple + + Raises: + ValueError, if the URL is not properly formatted. + """ + parsed = urllib.parse.urlparse(url) + + if not parsed.hostname: + raise ValueError("No hostname given") + + if isinstance(url, six.binary_type): + host = parsed.hostname + + # this should not raise a ValueError, + # but we try to be very forgiving here and accept just everything. + # decode_parse_result(parsed, "ascii") + else: + host = parsed.hostname.encode("idna") + parsed = encode_parse_result(parsed, "ascii") + + port = parsed.port + if not port: + port = 443 if parsed.scheme == b"https" else 80 + + full_path = urllib.parse.urlunparse( + (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) + ) + if not full_path.startswith(b"/"): + full_path = b"/" + full_path + + if not utils.is_valid_host(host): + raise ValueError("Invalid Host") + if not utils.is_valid_port(port): + raise ValueError("Invalid Port") + + return parsed.scheme, host, port, full_path + + +def unparse_url(scheme, host, port, path=""): + """ + Returns a URL string, constructed from the specified components. + + Args: + All args must be str. + """ + if path == "*": + path = "" + return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path) + + +def urlencode(s): + """ + Takes a list of (key, value) tuples and returns a urlencoded string. + """ + s = [tuple(i) for i in s] + return urllib.parse.urlencode(s, False) + + +def urldecode(s): + """ + Takes a urlencoded string and returns a list of (key, value) tuples. + """ + return urllib.parse.parse_qsl(s, keep_blank_values=True) diff --git a/netlib/utils.py b/netlib/utils.py index 770ad6a6..cd8aa55a 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -8,9 +8,6 @@ import inspect import six -from six.moves import urllib - - def always_bytes(unicode_or_bytes, *encode_args): if isinstance(unicode_or_bytes, six.text_type): return unicode_or_bytes.encode(*encode_args) @@ -188,71 +185,6 @@ def is_valid_port(port): return 0 <= port <= 65535 -# PY2 workaround -def decode_parse_result(result, enc): - if hasattr(result, "decode"): - return result.decode(enc) - else: - return urllib.parse.ParseResult(*[x.decode(enc) for x in result]) - - -# PY2 workaround -def encode_parse_result(result, enc): - if hasattr(result, "encode"): - return result.encode(enc) - else: - return urllib.parse.ParseResult(*[x.encode(enc) for x in result]) - - -def parse_url(url): - """ - URL-parsing function that checks that - - port is an integer 0-65535 - - host is a valid IDNA-encoded hostname with no null-bytes - - path is valid ASCII - - Args: - A URL (as bytes or as unicode) - - Returns: - A (scheme, host, port, path) tuple - - Raises: - ValueError, if the URL is not properly formatted. - """ - parsed = urllib.parse.urlparse(url) - - if not parsed.hostname: - raise ValueError("No hostname given") - - if isinstance(url, six.binary_type): - host = parsed.hostname - - # this should not raise a ValueError, - # but we try to be very forgiving here and accept just everything. - # decode_parse_result(parsed, "ascii") - else: - host = parsed.hostname.encode("idna") - parsed = encode_parse_result(parsed, "ascii") - - port = parsed.port - if not port: - port = 443 if parsed.scheme == b"https" else 80 - - full_path = urllib.parse.urlunparse( - (b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment) - ) - if not full_path.startswith(b"/"): - full_path = b"/" + full_path - - if not is_valid_host(host): - raise ValueError("Invalid Host") - if not is_valid_port(port): - raise ValueError("Invalid Port") - - return parsed.scheme, host, port, full_path - - def get_header_tokens(headers, key): """ Retrieve all tokens for a header key. A number of different headers @@ -278,33 +210,6 @@ def hostport(scheme, host, port): return "%s:%d" % (host, port) -def unparse_url(scheme, host, port, path=""): - """ - Returns a URL string, constructed from the specified components. - - Args: - All args must be str. - """ - if path == "*": - path = "" - return "%s://%s%s" % (scheme, hostport(scheme, host, port), path) - - -def urlencode(s): - """ - Takes a list of (key, value) tuples and returns a urlencoded string. - """ - s = [tuple(i) for i in s] - return urllib.parse.urlencode(s, False) - - -def urldecode(s): - """ - Takes a urlencoded string and returns a list of (key, value) tuples. - """ - return urllib.parse.parse_qsl(s, keep_blank_values=True) - - def parse_content_type(c): """ A simple parser for content-type values. Returns a (type, subtype, diff --git a/test/mitmproxy/test_contentview.py b/test/mitmproxy/test_contentview.py index c00afa5f..57e5ae99 100644 --- a/test/mitmproxy/test_contentview.py +++ b/test/mitmproxy/test_contentview.py @@ -1,8 +1,8 @@ from mitmproxy.exceptions import ContentViewException from netlib.http import Headers from netlib.odict import ODict -import netlib.utils from netlib import encoding +from netlib.http import url import mitmproxy.contentviews as cv from . import tutils @@ -60,10 +60,10 @@ class TestContentView: assert f[0] == "Query" def test_view_urlencoded(self): - d = netlib.utils.urlencode([("one", "two"), ("three", "four")]) + d = url.urlencode([("one", "two"), ("three", "four")]) v = cv.ViewURLEncoded() assert v(d) - d = netlib.utils.urlencode([("adsfa", "")]) + d = url.urlencode([("adsfa", "")]) v = cv.ViewURLEncoded() assert v(d) diff --git a/test/netlib/http/test_url.py b/test/netlib/http/test_url.py new file mode 100644 index 00000000..d777a949 --- /dev/null +++ b/test/netlib/http/test_url.py @@ -0,0 +1,65 @@ +from netlib import tutils +from netlib.http import url + +def test_parse_url(): + with tutils.raises(ValueError): + url.parse_url("") + + s, h, po, pa = url.parse_url(b"http://foo.com:8888/test") + assert s == b"http" + assert h == b"foo.com" + assert po == 8888 + assert pa == b"/test" + + s, h, po, pa = url.parse_url("http://foo/bar") + assert s == b"http" + assert h == b"foo" + assert po == 80 + assert pa == b"/bar" + + s, h, po, pa = url.parse_url(b"http://user:pass@foo/bar") + assert s == b"http" + assert h == b"foo" + assert po == 80 + assert pa == b"/bar" + + s, h, po, pa = url.parse_url(b"http://foo") + assert pa == b"/" + + s, h, po, pa = url.parse_url(b"https://foo") + assert po == 443 + + with tutils.raises(ValueError): + url.parse_url(b"https://foo:bar") + + # Invalid IDNA + with tutils.raises(ValueError): + url.parse_url("http://\xfafoo") + # Invalid PATH + with tutils.raises(ValueError): + url.parse_url("http:/\xc6/localhost:56121") + # Null byte in host + with tutils.raises(ValueError): + url.parse_url("http://foo\0") + # Port out of range + _, _, port, _ = url.parse_url("http://foo:999999") + assert port == 80 + # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt + with tutils.raises(ValueError): + url.parse_url('http://lo[calhost') + + +def test_unparse_url(): + assert url.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99" + assert url.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar" + assert url.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80" + assert url.unparse_url("https", "foo.com", 443, "") == "https://foo.com" + + +def test_urlencode(): + assert url.urlencode([('foo', 'bar')]) + + +def test_urldecode(): + s = "one=two&three=four" + assert len(url.urldecode(s)) == 2 diff --git a/test/netlib/test_utils.py b/test/netlib/test_utils.py index cd629d77..f9315667 100644 --- a/test/netlib/test_utils.py +++ b/test/netlib/test_utils.py @@ -38,70 +38,6 @@ def test_pretty_size(): assert utils.pretty_size(1024 * 1024) == "1MB" -def test_parse_url(): - with tutils.raises(ValueError): - utils.parse_url("") - - s, h, po, pa = utils.parse_url(b"http://foo.com:8888/test") - assert s == b"http" - assert h == b"foo.com" - assert po == 8888 - assert pa == b"/test" - - s, h, po, pa = utils.parse_url("http://foo/bar") - assert s == b"http" - assert h == b"foo" - assert po == 80 - assert pa == b"/bar" - - s, h, po, pa = utils.parse_url(b"http://user:pass@foo/bar") - assert s == b"http" - assert h == b"foo" - assert po == 80 - assert pa == b"/bar" - - s, h, po, pa = utils.parse_url(b"http://foo") - assert pa == b"/" - - s, h, po, pa = utils.parse_url(b"https://foo") - assert po == 443 - - with tutils.raises(ValueError): - utils.parse_url(b"https://foo:bar") - - # Invalid IDNA - with tutils.raises(ValueError): - utils.parse_url("http://\xfafoo") - # Invalid PATH - with tutils.raises(ValueError): - utils.parse_url("http:/\xc6/localhost:56121") - # Null byte in host - with tutils.raises(ValueError): - utils.parse_url("http://foo\0") - # Port out of range - _, _, port, _ = utils.parse_url("http://foo:999999") - assert port == 80 - # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt - with tutils.raises(ValueError): - utils.parse_url('http://lo[calhost') - - -def test_unparse_url(): - assert utils.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99" - assert utils.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar" - assert utils.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80" - assert utils.unparse_url("https", "foo.com", 443, "") == "https://foo.com" - - -def test_urlencode(): - assert utils.urlencode([('foo', 'bar')]) - - -def test_urldecode(): - s = "one=two&three=four" - assert len(utils.urldecode(s)) == 2 - - def test_get_header_tokens(): headers = Headers() assert utils.get_header_tokens(headers, "foo") == [] |