aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mitmproxy/net/check.py26
-rw-r--r--test/mitmproxy/net/test_check.py64
2 files changed, 86 insertions, 4 deletions
diff --git a/mitmproxy/net/check.py b/mitmproxy/net/check.py
index a19ad6fe..90600195 100644
--- a/mitmproxy/net/check.py
+++ b/mitmproxy/net/check.py
@@ -1,8 +1,22 @@
import ipaddress
import re
-# Allow underscore in host name
-_label_valid = re.compile(br"(?!-)[A-Z\d\-_]{1,63}(?<!-)$", re.IGNORECASE)
+"""
+The rules for host names are different from DNS Names (aka "Label").
+DNS Names allow for hyphens and underscores (RFC-2872).
+Hostnames DO allow for hyphens, but not underscores. (RFC-952, RFC-1123)
+The main issue is the existence of DNS labels that are actually
+capable of being resolved to a valid IP, even if the label
+isn't a valid hostname (e.g. api-.example.com, @.example.com)
+
+Since the value we're checking could be an IP, a host name, a DNS label, or a FQDN,
+and there are cases where DNS or Hostnames are misconfigured despite RFC
+we'll go with the least restrictive rules while still providing a sanity check.
+"""
+
+# label regex: in total between 4 and 255 chars, tld 2 to 63 chars, each label 1 to 63 chars
+_label_valid = re.compile(br"^(?=.{4,255}$)([A-Z0-9_-]([A-Z0-9_-]{0,61}[A-Z0-9_-])?\.){1,126}[A-Z0-9][A-Z0-9-]{0,61}[A-Z0-9]$", re.IGNORECASE)
+_host_valid = re.compile(br"[A-Z0-9\-_]{1,63}$", re.IGNORECASE)
def is_valid_host(host: bytes) -> bool:
@@ -16,10 +30,14 @@ def is_valid_host(host: bytes) -> bool:
# RFC1035: 255 bytes or less.
if len(host) > 255:
return False
+ # Trim trailing period
if host and host[-1:] == b".":
host = host[:-1]
- # DNS hostname
- if all(_label_valid.match(x) for x in host.split(b".")):
+ # DNS label
+ if b"." in host and _label_valid.match(host):
+ return True
+ # hostname
+ if b"." not in host and _host_valid.match(host):
return True
# IPv4/IPv6 address
try:
diff --git a/test/mitmproxy/net/test_check.py b/test/mitmproxy/net/test_check.py
index 0ffd6b2e..e9258d7c 100644
--- a/test/mitmproxy/net/test_check.py
+++ b/test/mitmproxy/net/test_check.py
@@ -12,3 +12,67 @@ def test_is_valid_host():
# Allow underscore
assert check.is_valid_host(b"one_two")
assert check.is_valid_host(b"::1")
+
+ # IPv6 Validations
+ assert check.is_valid_host(b'2001:0db8:85a3:0000:0000:8a2e:0370:7334')
+ assert check.is_valid_host(b'2001:db8:85a3:0:0:8a2e:370:7334')
+ assert check.is_valid_host(b'2001:db8:85a3::8a2e:370:7334')
+ assert not check.is_valid_host(b'2001:db8::85a3::7334')
+ assert check.is_valid_host(b'2001-db8-85a3-8d3-1319-8a2e-370-7348.ipv6-literal.net')
+
+ # TLD must be between 2 and 63 chars
+ assert not check.is_valid_host(b'example.t')
+ assert check.is_valid_host(b'example.tl')
+ assert check.is_valid_host(b'example.tld')
+ assert check.is_valid_host(b'example.' + b"x" * 63)
+ assert not check.is_valid_host(b'example.' + b"x" * 64)
+
+ # misc characters test
+ assert not check.is_valid_host(b'ex@mple')
+ assert not check.is_valid_host(b'ex@mple.com')
+ assert not check.is_valid_host(b'example..com')
+ assert not check.is_valid_host(b'.example.com')
+ assert not check.is_valid_host(b'@.example.com')
+ assert not check.is_valid_host(b'!.example.com')
+
+ # Every label must be between 1 and 63 chars
+ #assert not check.is_valid_host('.tld')
+ assert check.is_valid_host(b'x' * 1 + b'.tld')
+ assert check.is_valid_host(b'x' * 30 + b'.tld')
+ assert not check.is_valid_host(b'x' * 64 + b'.tld')
+ assert check.is_valid_host(b'x' * 1 + b'.example.tld')
+ assert check.is_valid_host(b'x' * 30 + b'.example.tld')
+ assert not check.is_valid_host(b'x' * 64 + b'.example.tld')
+
+ # Misc Underscore Test Cases
+ assert check.is_valid_host(b'_example')
+ assert check.is_valid_host(b'_example_')
+ assert check.is_valid_host(b'example_')
+ assert check.is_valid_host(b'_a.example.tld')
+ assert check.is_valid_host(b'a_.example.tld')
+ assert check.is_valid_host(b'_a_.example.tld')
+ assert not check.is_valid_host(b'a._example')
+ assert not check.is_valid_host(b'a._example_')
+ assert not check.is_valid_host(b'a.example_')
+
+ # Misc Dash/Hyphen/Minus Test Cases
+ assert check.is_valid_host(b'-example')
+ assert check.is_valid_host(b'-example_')
+ assert check.is_valid_host(b'example-')
+ assert check.is_valid_host(b'-a.example.tld')
+ assert check.is_valid_host(b'a-.example.tld')
+ assert check.is_valid_host(b'-a-.example.tld')
+ assert not check.is_valid_host(b'a.-example')
+ assert not check.is_valid_host(b'a.-example-')
+ assert not check.is_valid_host(b'a.example-')
+
+ # Misc Combo Test Cases
+ assert check.is_valid_host(b'api-.example.com')
+ assert check.is_valid_host(b'__a.example-site.com')
+ assert check.is_valid_host(b'_-a.example-site.com')
+ assert check.is_valid_host(b'_a_.example-site.com')
+ assert check.is_valid_host(b'-a-.example-site.com')
+ assert check.is_valid_host(b'api-.a.example.com')
+ assert check.is_valid_host(b'api-._a.example.com')
+ assert check.is_valid_host(b'api-.a_.example.com')
+ assert check.is_valid_host(b'api-.ab.example.com') \ No newline at end of file