diff options
author | Marcus R. Matos <marcus@matosconsulting.com> | 2020-04-03 19:56:54 -0500 |
---|---|---|
committer | Marcus R. Matos <marcus@matosconsulting.com> | 2020-04-03 19:56:54 -0500 |
commit | 1039d09ed618afadf5d24a741d85ec34be29edd7 (patch) | |
tree | 7ae8272aae7f18e6893c4896b385b75cda10fe44 | |
parent | 6acabbb4f5977427dd08adb0d32b06c0007315d2 (diff) | |
download | mitmproxy-1039d09ed618afadf5d24a741d85ec34be29edd7.tar.gz mitmproxy-1039d09ed618afadf5d24a741d85ec34be29edd7.tar.bz2 mitmproxy-1039d09ed618afadf5d24a741d85ec34be29edd7.zip |
#3885 handle hyphens in domain name, enhance validation checks
-rw-r--r-- | mitmproxy/net/check.py | 26 | ||||
-rw-r--r-- | test/mitmproxy/net/test_check.py | 64 |
2 files changed, 86 insertions, 4 deletions
diff --git a/mitmproxy/net/check.py b/mitmproxy/net/check.py index a19ad6fe..90600195 100644 --- a/mitmproxy/net/check.py +++ b/mitmproxy/net/check.py @@ -1,8 +1,22 @@ import ipaddress import re -# Allow underscore in host name -_label_valid = re.compile(br"(?!-)[A-Z\d\-_]{1,63}(?<!-)$", re.IGNORECASE) +""" +The rules for host names are different from DNS Names (aka "Label"). +DNS Names allow for hyphens and underscores (RFC-2872). +Hostnames DO allow for hyphens, but not underscores. (RFC-952, RFC-1123) +The main issue is the existence of DNS labels that are actually +capable of being resolved to a valid IP, even if the label +isn't a valid hostname (e.g. api-.example.com, @.example.com) + +Since the value we're checking could be an IP, a host name, a DNS label, or a FQDN, +and there are cases where DNS or Hostnames are misconfigured despite RFC +we'll go with the least restrictive rules while still providing a sanity check. +""" + +# label regex: in total between 4 and 255 chars, tld 2 to 63 chars, each label 1 to 63 chars +_label_valid = re.compile(br"^(?=.{4,255}$)([A-Z0-9_-]([A-Z0-9_-]{0,61}[A-Z0-9_-])?\.){1,126}[A-Z0-9][A-Z0-9-]{0,61}[A-Z0-9]$", re.IGNORECASE) +_host_valid = re.compile(br"[A-Z0-9\-_]{1,63}$", re.IGNORECASE) def is_valid_host(host: bytes) -> bool: @@ -16,10 +30,14 @@ def is_valid_host(host: bytes) -> bool: # RFC1035: 255 bytes or less. if len(host) > 255: return False + # Trim trailing period if host and host[-1:] == b".": host = host[:-1] - # DNS hostname - if all(_label_valid.match(x) for x in host.split(b".")): + # DNS label + if b"." in host and _label_valid.match(host): + return True + # hostname + if b"." not in host and _host_valid.match(host): return True # IPv4/IPv6 address try: diff --git a/test/mitmproxy/net/test_check.py b/test/mitmproxy/net/test_check.py index 0ffd6b2e..e9258d7c 100644 --- a/test/mitmproxy/net/test_check.py +++ b/test/mitmproxy/net/test_check.py @@ -12,3 +12,67 @@ def test_is_valid_host(): # Allow underscore assert check.is_valid_host(b"one_two") assert check.is_valid_host(b"::1") + + # IPv6 Validations + assert check.is_valid_host(b'2001:0db8:85a3:0000:0000:8a2e:0370:7334') + assert check.is_valid_host(b'2001:db8:85a3:0:0:8a2e:370:7334') + assert check.is_valid_host(b'2001:db8:85a3::8a2e:370:7334') + assert not check.is_valid_host(b'2001:db8::85a3::7334') + assert check.is_valid_host(b'2001-db8-85a3-8d3-1319-8a2e-370-7348.ipv6-literal.net') + + # TLD must be between 2 and 63 chars + assert not check.is_valid_host(b'example.t') + assert check.is_valid_host(b'example.tl') + assert check.is_valid_host(b'example.tld') + assert check.is_valid_host(b'example.' + b"x" * 63) + assert not check.is_valid_host(b'example.' + b"x" * 64) + + # misc characters test + assert not check.is_valid_host(b'ex@mple') + assert not check.is_valid_host(b'ex@mple.com') + assert not check.is_valid_host(b'example..com') + assert not check.is_valid_host(b'.example.com') + assert not check.is_valid_host(b'@.example.com') + assert not check.is_valid_host(b'!.example.com') + + # Every label must be between 1 and 63 chars + #assert not check.is_valid_host('.tld') + assert check.is_valid_host(b'x' * 1 + b'.tld') + assert check.is_valid_host(b'x' * 30 + b'.tld') + assert not check.is_valid_host(b'x' * 64 + b'.tld') + assert check.is_valid_host(b'x' * 1 + b'.example.tld') + assert check.is_valid_host(b'x' * 30 + b'.example.tld') + assert not check.is_valid_host(b'x' * 64 + b'.example.tld') + + # Misc Underscore Test Cases + assert check.is_valid_host(b'_example') + assert check.is_valid_host(b'_example_') + assert check.is_valid_host(b'example_') + assert check.is_valid_host(b'_a.example.tld') + assert check.is_valid_host(b'a_.example.tld') + assert check.is_valid_host(b'_a_.example.tld') + assert not check.is_valid_host(b'a._example') + assert not check.is_valid_host(b'a._example_') + assert not check.is_valid_host(b'a.example_') + + # Misc Dash/Hyphen/Minus Test Cases + assert check.is_valid_host(b'-example') + assert check.is_valid_host(b'-example_') + assert check.is_valid_host(b'example-') + assert check.is_valid_host(b'-a.example.tld') + assert check.is_valid_host(b'a-.example.tld') + assert check.is_valid_host(b'-a-.example.tld') + assert not check.is_valid_host(b'a.-example') + assert not check.is_valid_host(b'a.-example-') + assert not check.is_valid_host(b'a.example-') + + # Misc Combo Test Cases + assert check.is_valid_host(b'api-.example.com') + assert check.is_valid_host(b'__a.example-site.com') + assert check.is_valid_host(b'_-a.example-site.com') + assert check.is_valid_host(b'_a_.example-site.com') + assert check.is_valid_host(b'-a-.example-site.com') + assert check.is_valid_host(b'api-.a.example.com') + assert check.is_valid_host(b'api-._a.example.com') + assert check.is_valid_host(b'api-.a_.example.com') + assert check.is_valid_host(b'api-.ab.example.com')
\ No newline at end of file |