diff options
Diffstat (limited to 'mitmproxy/filt.py')
| -rw-r--r-- | mitmproxy/filt.py | 165 |
1 files changed, 120 insertions, 45 deletions
diff --git a/mitmproxy/filt.py b/mitmproxy/filt.py index d98e3749..8b647b22 100644 --- a/mitmproxy/filt.py +++ b/mitmproxy/filt.py @@ -35,10 +35,26 @@ from __future__ import absolute_import, print_function, division import re import sys +import functools + +from mitmproxy.models.http import HTTPFlow +from mitmproxy.models.tcp import TCPFlow +from netlib import strutils import pyparsing as pp +def only(*types): + def decorator(fn): + @functools.wraps(fn) + def filter_types(self, flow): + if isinstance(flow, types): + return fn(self, flow) + return False + return filter_types + return decorator + + class _Token(object): def dump(self, indent=0, fp=sys.stdout): @@ -64,10 +80,29 @@ class FErr(_Action): return True if f.error else False +class FHTTP(_Action): + code = "http" + help = "Match HTTP flows" + + @only(HTTPFlow) + def __call__(self, f): + return True + + +class FTCP(_Action): + code = "tcp" + help = "Match TCP flows" + + @only(TCPFlow) + def __call__(self, f): + return True + + class FReq(_Action): code = "q" help = "Match request with no response" + @only(HTTPFlow) def __call__(self, f): if not f.response: return True @@ -77,40 +112,47 @@ class FResp(_Action): code = "s" help = "Match response" + @only(HTTPFlow) def __call__(self, f): - return True if f.response else False + return bool(f.response) class _Rex(_Action): flags = 0 + is_binary = True def __init__(self, expr): self.expr = expr + if self.is_binary: + expr = strutils.escaped_str_to_bytes(expr) try: - self.re = re.compile(self.expr, self.flags) + self.re = re.compile(expr, self.flags) except: raise ValueError("Cannot compile expression.") -def _check_content_type(expr, o): - val = o.headers.get("content-type") - if val and re.search(expr, val): - return True - return False +def _check_content_type(rex, message): + return any( + name.lower() == b"content-type" and + rex.search(value) + for name, value in message.headers.fields + ) class FAsset(_Action): code = "a" help = "Match asset in response: CSS, Javascript, Flash, images." ASSET_TYPES = [ - "text/javascript", - "application/x-javascript", - "application/javascript", - "text/css", - "image/.*", - "application/x-shockwave-flash" + b"text/javascript", + b"application/x-javascript", + b"application/javascript", + b"text/css", + b"image/.*", + b"application/x-shockwave-flash" ] + ASSET_TYPES = [re.compile(x) for x in ASSET_TYPES] + @only(HTTPFlow) def __call__(self, f): if f.response: for i in self.ASSET_TYPES: @@ -123,29 +165,32 @@ class FContentType(_Rex): code = "t" help = "Content-type header" + @only(HTTPFlow) def __call__(self, f): - if _check_content_type(self.expr, f.request): + if _check_content_type(self.re, f.request): return True - elif f.response and _check_content_type(self.expr, f.response): + elif f.response and _check_content_type(self.re, f.response): return True return False -class FRequestContentType(_Rex): +class FContentTypeRequest(_Rex): code = "tq" help = "Request Content-Type header" + @only(HTTPFlow) def __call__(self, f): - return _check_content_type(self.expr, f.request) + return _check_content_type(self.re, f.request) -class FResponseContentType(_Rex): +class FContentTypeResponse(_Rex): code = "ts" help = "Response Content-Type header" + @only(HTTPFlow) def __call__(self, f): if f.response: - return _check_content_type(self.expr, f.response) + return _check_content_type(self.re, f.response) return False @@ -154,6 +199,7 @@ class FHead(_Rex): help = "Header" flags = re.MULTILINE + @only(HTTPFlow) def __call__(self, f): if f.request and self.re.search(bytes(f.request.headers)): return True @@ -167,6 +213,7 @@ class FHeadRequest(_Rex): help = "Request header" flags = re.MULTILINE + @only(HTTPFlow) def __call__(self, f): if f.request and self.re.search(bytes(f.request.headers)): return True @@ -177,6 +224,7 @@ class FHeadResponse(_Rex): help = "Response header" flags = re.MULTILINE + @only(HTTPFlow) def __call__(self, f): if f.response and self.re.search(bytes(f.response.headers)): return True @@ -186,13 +234,19 @@ class FBod(_Rex): code = "b" help = "Body" + @only(HTTPFlow, TCPFlow) def __call__(self, f): - if f.request and f.request.content: - if self.re.search(f.request.get_decoded_content()): - return True - if f.response and f.response.content: - if self.re.search(f.response.get_decoded_content()): - return True + if isinstance(f, HTTPFlow): + if f.request and f.request.raw_content: + if self.re.search(f.request.get_content(strict=False)): + return True + if f.response and f.response.raw_content: + if self.re.search(f.response.get_content(strict=False)): + return True + elif isinstance(f, TCPFlow): + for msg in f.messages: + if self.re.search(msg.content): + return True return False @@ -200,20 +254,32 @@ class FBodRequest(_Rex): code = "bq" help = "Request body" + @only(HTTPFlow, TCPFlow) def __call__(self, f): - if f.request and f.request.content: - if self.re.search(f.request.get_decoded_content()): - return True + if isinstance(f, HTTPFlow): + if f.request and f.request.raw_content: + if self.re.search(f.request.get_content(strict=False)): + return True + elif isinstance(f, TCPFlow): + for msg in f.messages: + if msg.from_client and self.re.search(msg.content): + return True class FBodResponse(_Rex): code = "bs" help = "Response body" + @only(HTTPFlow, TCPFlow) def __call__(self, f): - if f.response and f.response.content: - if self.re.search(f.response.get_decoded_content()): - return True + if isinstance(f, HTTPFlow): + if f.response and f.response.raw_content: + if self.re.search(f.response.get_content(strict=False)): + return True + elif isinstance(f, TCPFlow): + for msg in f.messages: + if not msg.from_client and self.re.search(msg.content): + return True class FMethod(_Rex): @@ -221,8 +287,9 @@ class FMethod(_Rex): help = "Method" flags = re.IGNORECASE + @only(HTTPFlow) def __call__(self, f): - return bool(self.re.search(f.request.method)) + return bool(self.re.search(f.request.data.method)) class FDomain(_Rex): @@ -230,13 +297,15 @@ class FDomain(_Rex): help = "Domain" flags = re.IGNORECASE + @only(HTTPFlow) def __call__(self, f): - return bool(self.re.search(f.request.host)) + return bool(self.re.search(f.request.data.host)) class FUrl(_Rex): code = "u" help = "URL" + is_binary = False # FUrl is special, because it can be "naked". @classmethod @@ -245,6 +314,7 @@ class FUrl(_Rex): toks = toks[1:] return klass(*toks) + @only(HTTPFlow) def __call__(self, f): return self.re.search(f.request.url) @@ -252,6 +322,7 @@ class FUrl(_Rex): class FSrc(_Rex): code = "src" help = "Match source address" + is_binary = False def __call__(self, f): return f.client_conn.address and self.re.search(repr(f.client_conn.address)) @@ -260,6 +331,7 @@ class FSrc(_Rex): class FDst(_Rex): code = "dst" help = "Match destination address" + is_binary = False def __call__(self, f): return f.server_conn.address and self.re.search(repr(f.server_conn.address)) @@ -275,6 +347,7 @@ class FCode(_Int): code = "c" help = "HTTP response code" + @only(HTTPFlow) def __call__(self, f): if f.response and f.response.status_code == self.num: return True @@ -322,26 +395,28 @@ class FNot(_Token): filt_unary = [ + FAsset, + FErr, + FHTTP, FReq, FResp, - FAsset, - FErr + FTCP, ] filt_rex = [ - FHeadRequest, - FHeadResponse, - FHead, + FBod, FBodRequest, FBodResponse, - FBod, - FMethod, - FDomain, - FUrl, - FRequestContentType, - FResponseContentType, FContentType, - FSrc, + FContentTypeRequest, + FContentTypeResponse, + FDomain, FDst, + FHead, + FHeadRequest, + FHeadResponse, + FMethod, + FSrc, + FUrl, ] filt_int = [ FCode |
