aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Kriechbaumer <Kriechi@users.noreply.github.com>2019-09-28 11:44:15 +0200
committerGitHub <noreply@github.com>2019-09-28 11:44:15 +0200
commit76bd3ef82dd6b8e3d00b9b4dfe56c96a3c22dd47 (patch)
tree29628f3119da9ff5831cc23e113339bc56fd48f6
parent26e55b0a7f8ebbb5543615d3bdb91c76a9d5b9d9 (diff)
parent6e153b2c017be294a23e78469367346d0f9250e2 (diff)
downloadmitmproxy-76bd3ef82dd6b8e3d00b9b4dfe56c96a3c22dd47.tar.gz
mitmproxy-76bd3ef82dd6b8e3d00b9b4dfe56c96a3c22dd47.tar.bz2
mitmproxy-76bd3ef82dd6b8e3d00b9b4dfe56c96a3c22dd47.zip
Merge pull request #3486 from rjt-gupta/unicode-filter
filter unicode fix
-rw-r--r--mitmproxy/flowfilter.py23
-rw-r--r--test/mitmproxy/test_flowfilter.py23
2 files changed, 46 insertions, 0 deletions
diff --git a/mitmproxy/flowfilter.py b/mitmproxy/flowfilter.py
index 7f8df96f..0d8f1062 100644
--- a/mitmproxy/flowfilter.py
+++ b/mitmproxy/flowfilter.py
@@ -475,7 +475,30 @@ def _make():
parts.append(f)
simplerex = "".join(c for c in pp.printables if c not in "()~'\"")
+ alphdevanagari = pp.pyparsing_unicode.Devanagari.alphas
+ alphcyrillic = pp.pyparsing_unicode.Cyrillic.alphas
+ alphgreek = pp.pyparsing_unicode.Greek.alphas
+ alphchinese = pp.pyparsing_unicode.Chinese.alphas
+ alpharabic = pp.pyparsing_unicode.Arabic.alphas
+ alphhebrew = pp.pyparsing_unicode.Hebrew.alphas
+ alphjapanese = pp.pyparsing_unicode.Japanese.alphas
+ alphkorean = pp.pyparsing_unicode.Korean.alphas
+ alphlatin1 = pp.pyparsing_unicode.Latin1.alphas
+ alphlatinA = pp.pyparsing_unicode.LatinA.alphas
+ alphlatinB = pp.pyparsing_unicode.LatinB.alphas
+
rex = pp.Word(simplerex) |\
+ pp.Word(alphcyrillic) |\
+ pp.Word(alphgreek) |\
+ pp.Word(alphchinese) |\
+ pp.Word(alpharabic) |\
+ pp.Word(alphdevanagari) |\
+ pp.Word(alphhebrew) |\
+ pp.Word(alphjapanese) |\
+ pp.Word(alphkorean) |\
+ pp.Word(alphlatin1) |\
+ pp.Word(alphlatinA) |\
+ pp.Word(alphlatinB) |\
pp.QuotedString("\"", escChar='\\') |\
pp.QuotedString("'", escChar='\\')
for klass in filter_rex:
diff --git a/test/mitmproxy/test_flowfilter.py b/test/mitmproxy/test_flowfilter.py
index 4eb37d81..d53cec7d 100644
--- a/test/mitmproxy/test_flowfilter.py
+++ b/test/mitmproxy/test_flowfilter.py
@@ -28,6 +28,9 @@ class TestParsing:
self._dump(p)
assert len(p.lst) == 2
+ def test_non_ascii(self):
+ assert flowfilter.parse("~s шгн")
+
def test_naked_url(self):
a = flowfilter.parse("foobar ~h rex")
assert a.lst[0].expr == "foobar"
@@ -173,10 +176,30 @@ class TestMatchingHTTPFlow:
assert not self.q("~bq message", q)
assert not self.q("~bq message", s)
+ s.response.text = 'яч' # Cyrillic
+ assert self.q("~bs яч", s)
+ s.response.text = '测试' # Chinese
+ assert self.q('~bs 测试', s)
+ s.response.text = 'ॐ' # Hindi
+ assert self.q('~bs ॐ', s)
+ s.response.text = 'لله' # Arabic
+ assert self.q('~bs لله', s)
+ s.response.text = 'θεός' # Greek
+ assert self.q('~bs θεός', s)
+ s.response.text = 'לוהים' # Hebrew
+ assert self.q('~bs לוהים', s)
+ s.response.text = '神' # Japanese
+ assert self.q('~bs 神', s)
+ s.response.text = '하나님' # Korean
+ assert self.q('~bs 하나님', s)
+ s.response.text = 'Äÿ' # Latin
+ assert self.q('~bs Äÿ', s)
+
assert not self.q("~bs nomatch", s)
assert not self.q("~bs content", q)
assert not self.q("~bs content", s)
assert not self.q("~bs message", q)
+ s.response.text = 'message'
assert self.q("~bs message", s)
def test_body(self):