aboutsummaryrefslogtreecommitdiffstats
path: root/mitmproxy/flowfilter.py
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2019-11-15 02:27:33 +0100
committerMaximilian Hils <git@maximilianhils.com>2019-11-15 02:28:57 +0100
commit2239c49e18c62331a21aa75fa5e74e6a7ea88334 (patch)
tree0f5330a4e8fcc3d6cb839bcef0a80331bd6b7a71 /mitmproxy/flowfilter.py
parentcd660a035f74e00bd94a9b67c240ee44654ce1ce (diff)
downloadmitmproxy-2239c49e18c62331a21aa75fa5e74e6a7ea88334.tar.gz
mitmproxy-2239c49e18c62331a21aa75fa5e74e6a7ea88334.tar.bz2
mitmproxy-2239c49e18c62331a21aa75fa5e74e6a7ea88334.zip
improve flowfilter
Diffstat (limited to 'mitmproxy/flowfilter.py')
-rw-r--r--mitmproxy/flowfilter.py112
1 files changed, 48 insertions, 64 deletions
diff --git a/mitmproxy/flowfilter.py b/mitmproxy/flowfilter.py
index 3f5afb48..b222d2a8 100644
--- a/mitmproxy/flowfilter.py
+++ b/mitmproxy/flowfilter.py
@@ -32,19 +32,17 @@
rex Equivalent to ~u rex
"""
+import functools
import re
import sys
-import functools
+from typing import Callable, ClassVar, Optional, Sequence, Type
+
+import pyparsing as pp
+from mitmproxy import flow
from mitmproxy import http
-from mitmproxy import websocket
from mitmproxy import tcp
-from mitmproxy import flow
-
-from mitmproxy.utils import strutils
-
-import pyparsing as pp
-from typing import Callable, Sequence, Type, Optional, ClassVar
+from mitmproxy import websocket
def only(*types):
@@ -54,7 +52,9 @@ def only(*types):
if isinstance(flow, types):
return fn(self, flow)
return False
+
return filter_types
+
return decorator
@@ -146,10 +146,10 @@ class _Rex(_Action):
def __init__(self, expr):
self.expr = expr
if self.is_binary:
- expr = strutils.escaped_str_to_bytes(expr)
+ expr = expr.encode()
try:
self.re = re.compile(expr, self.flags)
- except:
+ except Exception:
raise ValueError("Cannot compile expression.")
@@ -336,6 +336,7 @@ class FUrl(_Rex):
code = "u"
help = "URL"
is_binary = False
+
# FUrl is special, because it can be "naked".
@classmethod
@@ -469,68 +470,51 @@ def _make():
# Order is important - multi-char expressions need to come before narrow
# ones.
parts = []
- for klass in filter_unary:
- f = pp.Literal("~%s" % klass.code) + pp.WordEnd()
- f.setParseAction(klass.make)
+ for cls in filter_unary:
+ f = pp.Literal(f"~{cls.code}") + pp.WordEnd()
+ f.setParseAction(cls.make)
parts.append(f)
- simplerex = "".join(c for c in pp.printables if c not in "()~'\"")
- alphdevanagari = pp.pyparsing_unicode.Devanagari.alphas
- alphcyrillic = pp.pyparsing_unicode.Cyrillic.alphas
- alphgreek = pp.pyparsing_unicode.Greek.alphas
- alphchinese = pp.pyparsing_unicode.Chinese.alphas
- alpharabic = pp.pyparsing_unicode.Arabic.alphas
- alphhebrew = pp.pyparsing_unicode.Hebrew.alphas
- alphjapanese = pp.pyparsing_unicode.Japanese.alphas
- alphkorean = pp.pyparsing_unicode.Korean.alphas
- alphlatin1 = pp.pyparsing_unicode.Latin1.alphas
- alphlatinA = pp.pyparsing_unicode.LatinA.alphas
- alphlatinB = pp.pyparsing_unicode.LatinB.alphas
-
- rex = pp.Word(simplerex) |\
- pp.Word(alphcyrillic) |\
- pp.Word(alphgreek) |\
- pp.Word(alphchinese) |\
- pp.Word(alpharabic) |\
- pp.Word(alphdevanagari) |\
- pp.Word(alphhebrew) |\
- pp.Word(alphjapanese) |\
- pp.Word(alphkorean) |\
- pp.Word(alphlatin1) |\
- pp.Word(alphlatinA) |\
- pp.Word(alphlatinB) |\
- pp.QuotedString("\"", escChar='\\') |\
- pp.QuotedString("'", escChar='\\')
- for klass in filter_rex:
- f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + rex.copy()
- f.setParseAction(klass.make)
+ # This is a bit of a hack to simulate Word(pyparsing_unicode.printables),
+ # which has a horrible performance with len(pyparsing.pyparsing_unicode.printables) == 1114060
+ unicode_words = pp.CharsNotIn("()~'\"" + pp.ParserElement.DEFAULT_WHITE_CHARS)
+ unicode_words.skipWhitespace = True
+ regex = (
+ unicode_words
+ | pp.QuotedString('"', escChar='\\')
+ | pp.QuotedString("'", escChar='\\')
+ )
+ for cls in filter_rex:
+ f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + regex.copy()
+ f.setParseAction(cls.make)
parts.append(f)
- for klass in filter_int:
- f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + pp.Word(pp.nums)
- f.setParseAction(klass.make)
+ for cls in filter_int:
+ f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + pp.Word(pp.nums)
+ f.setParseAction(cls.make)
parts.append(f)
# A naked rex is a URL rex:
- f = rex.copy()
+ f = regex.copy()
f.setParseAction(FUrl.make)
parts.append(f)
atom = pp.MatchFirst(parts)
- expr = pp.operatorPrecedence(atom,
- [(pp.Literal("!").suppress(),
- 1,
- pp.opAssoc.RIGHT,
- lambda x: FNot(*x)),
- (pp.Literal("&").suppress(),
- 2,
- pp.opAssoc.LEFT,
- lambda x: FAnd(*x)),
- (pp.Literal("|").suppress(),
- 2,
- pp.opAssoc.LEFT,
- lambda x: FOr(*x)),
- ])
+ expr = pp.infixNotation(
+ atom,
+ [(pp.Literal("!").suppress(),
+ 1,
+ pp.opAssoc.RIGHT,
+ lambda x: FNot(*x)),
+ (pp.Literal("&").suppress(),
+ 2,
+ pp.opAssoc.LEFT,
+ lambda x: FAnd(*x)),
+ (pp.Literal("|").suppress(),
+ 2,
+ pp.opAssoc.LEFT,
+ lambda x: FOr(*x)),
+ ])
expr = pp.OneOrMore(expr)
return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
@@ -570,15 +554,15 @@ def match(flt, flow):
help = []
for a in filter_unary:
help.append(
- ("~%s" % a.code, a.help)
+ (f"~{a.code}", a.help)
)
for b in filter_rex:
help.append(
- ("~%s regex" % b.code, b.help)
+ (f"~{b.code} regex", b.help)
)
for c in filter_int:
help.append(
- ("~%s int" % c.code, c.help)
+ (f"~{c.code} int", c.help)
)
help.sort()
help.extend(