aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2019-11-15 15:57:28 +0100
committerGitHub <noreply@github.com>2019-11-15 15:57:28 +0100
commitbe2865cd7972ed0bdb5e902ae4284f86d773bf31 (patch)
tree39b553c5baed919ac66b090109f0756d84125407
parentcd660a035f74e00bd94a9b67c240ee44654ce1ce (diff)
parentbe46008b5ef0d71b65b8f4dab7528b4bcc7205e6 (diff)
downloadmitmproxy-be2865cd7972ed0bdb5e902ae4284f86d773bf31.tar.gz
mitmproxy-be2865cd7972ed0bdb5e902ae4284f86d773bf31.tar.bz2
mitmproxy-be2865cd7972ed0bdb5e902ae4284f86d773bf31.zip
Merge pull request #3699 from mhils/flowfilter2019
Improve flowfilter
-rw-r--r--mitmproxy/flowfilter.py112
-rw-r--r--setup.cfg2
2 files changed, 49 insertions, 65 deletions
diff --git a/mitmproxy/flowfilter.py b/mitmproxy/flowfilter.py
index 3f5afb48..b222d2a8 100644
--- a/mitmproxy/flowfilter.py
+++ b/mitmproxy/flowfilter.py
@@ -32,19 +32,17 @@
rex Equivalent to ~u rex
"""
+import functools
import re
import sys
-import functools
+from typing import Callable, ClassVar, Optional, Sequence, Type
+
+import pyparsing as pp
+from mitmproxy import flow
from mitmproxy import http
-from mitmproxy import websocket
from mitmproxy import tcp
-from mitmproxy import flow
-
-from mitmproxy.utils import strutils
-
-import pyparsing as pp
-from typing import Callable, Sequence, Type, Optional, ClassVar
+from mitmproxy import websocket
def only(*types):
@@ -54,7 +52,9 @@ def only(*types):
if isinstance(flow, types):
return fn(self, flow)
return False
+
return filter_types
+
return decorator
@@ -146,10 +146,10 @@ class _Rex(_Action):
def __init__(self, expr):
self.expr = expr
if self.is_binary:
- expr = strutils.escaped_str_to_bytes(expr)
+ expr = expr.encode()
try:
self.re = re.compile(expr, self.flags)
- except:
+ except Exception:
raise ValueError("Cannot compile expression.")
@@ -336,6 +336,7 @@ class FUrl(_Rex):
code = "u"
help = "URL"
is_binary = False
+
# FUrl is special, because it can be "naked".
@classmethod
@@ -469,68 +470,51 @@ def _make():
# Order is important - multi-char expressions need to come before narrow
# ones.
parts = []
- for klass in filter_unary:
- f = pp.Literal("~%s" % klass.code) + pp.WordEnd()
- f.setParseAction(klass.make)
+ for cls in filter_unary:
+ f = pp.Literal(f"~{cls.code}") + pp.WordEnd()
+ f.setParseAction(cls.make)
parts.append(f)
- simplerex = "".join(c for c in pp.printables if c not in "()~'\"")
- alphdevanagari = pp.pyparsing_unicode.Devanagari.alphas
- alphcyrillic = pp.pyparsing_unicode.Cyrillic.alphas
- alphgreek = pp.pyparsing_unicode.Greek.alphas
- alphchinese = pp.pyparsing_unicode.Chinese.alphas
- alpharabic = pp.pyparsing_unicode.Arabic.alphas
- alphhebrew = pp.pyparsing_unicode.Hebrew.alphas
- alphjapanese = pp.pyparsing_unicode.Japanese.alphas
- alphkorean = pp.pyparsing_unicode.Korean.alphas
- alphlatin1 = pp.pyparsing_unicode.Latin1.alphas
- alphlatinA = pp.pyparsing_unicode.LatinA.alphas
- alphlatinB = pp.pyparsing_unicode.LatinB.alphas
-
- rex = pp.Word(simplerex) |\
- pp.Word(alphcyrillic) |\
- pp.Word(alphgreek) |\
- pp.Word(alphchinese) |\
- pp.Word(alpharabic) |\
- pp.Word(alphdevanagari) |\
- pp.Word(alphhebrew) |\
- pp.Word(alphjapanese) |\
- pp.Word(alphkorean) |\
- pp.Word(alphlatin1) |\
- pp.Word(alphlatinA) |\
- pp.Word(alphlatinB) |\
- pp.QuotedString("\"", escChar='\\') |\
- pp.QuotedString("'", escChar='\\')
- for klass in filter_rex:
- f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + rex.copy()
- f.setParseAction(klass.make)
+ # This is a bit of a hack to simulate Word(pyparsing_unicode.printables),
+ # which has a horrible performance with len(pyparsing.pyparsing_unicode.printables) == 1114060
+ unicode_words = pp.CharsNotIn("()~'\"" + pp.ParserElement.DEFAULT_WHITE_CHARS)
+ unicode_words.skipWhitespace = True
+ regex = (
+ unicode_words
+ | pp.QuotedString('"', escChar='\\')
+ | pp.QuotedString("'", escChar='\\')
+ )
+ for cls in filter_rex:
+ f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + regex.copy()
+ f.setParseAction(cls.make)
parts.append(f)
- for klass in filter_int:
- f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + pp.Word(pp.nums)
- f.setParseAction(klass.make)
+ for cls in filter_int:
+ f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + pp.Word(pp.nums)
+ f.setParseAction(cls.make)
parts.append(f)
# A naked rex is a URL rex:
- f = rex.copy()
+ f = regex.copy()
f.setParseAction(FUrl.make)
parts.append(f)
atom = pp.MatchFirst(parts)
- expr = pp.operatorPrecedence(atom,
- [(pp.Literal("!").suppress(),
- 1,
- pp.opAssoc.RIGHT,
- lambda x: FNot(*x)),
- (pp.Literal("&").suppress(),
- 2,
- pp.opAssoc.LEFT,
- lambda x: FAnd(*x)),
- (pp.Literal("|").suppress(),
- 2,
- pp.opAssoc.LEFT,
- lambda x: FOr(*x)),
- ])
+ expr = pp.infixNotation(
+ atom,
+ [(pp.Literal("!").suppress(),
+ 1,
+ pp.opAssoc.RIGHT,
+ lambda x: FNot(*x)),
+ (pp.Literal("&").suppress(),
+ 2,
+ pp.opAssoc.LEFT,
+ lambda x: FAnd(*x)),
+ (pp.Literal("|").suppress(),
+ 2,
+ pp.opAssoc.LEFT,
+ lambda x: FOr(*x)),
+ ])
expr = pp.OneOrMore(expr)
return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
@@ -570,15 +554,15 @@ def match(flt, flow):
help = []
for a in filter_unary:
help.append(
- ("~%s" % a.code, a.help)
+ (f"~{a.code}", a.help)
)
for b in filter_rex:
help.append(
- ("~%s regex" % b.code, b.help)
+ (f"~{b.code} regex", b.help)
)
for c in filter_int:
help.append(
- ("~%s int" % c.code, c.help)
+ (f"~{c.code} int", c.help)
)
help.sort()
help.extend(
diff --git a/setup.cfg b/setup.cfg
index 0a34b805..e7643b08 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,7 @@
[flake8]
max-line-length = 140
max-complexity = 25
-ignore = E251,E252,C901,W292,W503,W504,W605,E722,E741
+ignore = E251,E252,C901,W292,W503,W504,W605,E722,E741,E126
exclude = mitmproxy/contrib/*,test/mitmproxy/data/*,release/build/*,mitmproxy/io/proto/*
addons = file,open,basestring,xrange,unicode,long,cmp