aboutsummaryrefslogtreecommitdiffstats
path: root/examples/complex
diff options
context:
space:
mode:
authorNikhil Soni <krsoninikhil@gmail.com>2017-03-03 12:58:44 +0530
committerGitHub <noreply@github.com>2017-03-03 12:58:44 +0530
commit0081d9b82807b178bd6d00ca250d38aeeeed2d33 (patch)
tree8d244a7b9ade13f0e1836f8eb9fbc534a1cc662d /examples/complex
parent3da8532bed3305b01e3f3ab556f9dbc652177c6b (diff)
parentbae4cdf8d5cc434938c74a041f762075513dd8e4 (diff)
downloadmitmproxy-0081d9b82807b178bd6d00ca250d38aeeeed2d33.tar.gz
mitmproxy-0081d9b82807b178bd6d00ca250d38aeeeed2d33.tar.bz2
mitmproxy-0081d9b82807b178bd6d00ca250d38aeeeed2d33.zip
Merge branch 'master' into on-issues
Diffstat (limited to 'examples/complex')
-rw-r--r--examples/complex/har_dump.py10
-rwxr-xr-xexamples/complex/xss_scanner.py407
2 files changed, 410 insertions, 7 deletions
diff --git a/examples/complex/har_dump.py b/examples/complex/har_dump.py
index f7c1e658..51983b54 100644
--- a/examples/complex/har_dump.py
+++ b/examples/complex/har_dump.py
@@ -10,7 +10,7 @@ import zlib
import os
from datetime import datetime
-import pytz
+from datetime import timezone
import mitmproxy
@@ -89,7 +89,7 @@ def response(flow):
# Timings set to -1 will be ignored as per spec.
full_time = sum(v for v in timings.values() if v > -1)
- started_date_time = format_datetime(datetime.utcfromtimestamp(flow.request.timestamp_start))
+ started_date_time = datetime.fromtimestamp(flow.request.timestamp_start, timezone.utc).isoformat()
# Response body size and encoding
response_body_size = len(flow.response.raw_content)
@@ -173,10 +173,6 @@ def done():
mitmproxy.ctx.log("HAR dump finished (wrote %s bytes to file)" % len(json_dump))
-def format_datetime(dt):
- return dt.replace(tzinfo=pytz.timezone("UTC")).isoformat()
-
-
def format_cookies(cookie_list):
rv = []
@@ -198,7 +194,7 @@ def format_cookies(cookie_list):
# Expiration time needs to be formatted
expire_ts = cookies.get_expiration_ts(attrs)
if expire_ts is not None:
- cookie_har["expires"] = format_datetime(datetime.fromtimestamp(expire_ts))
+ cookie_har["expires"] = datetime.fromtimestamp(expire_ts, timezone.utc).isoformat()
rv.append(cookie_har)
diff --git a/examples/complex/xss_scanner.py b/examples/complex/xss_scanner.py
new file mode 100755
index 00000000..a0572d5d
--- /dev/null
+++ b/examples/complex/xss_scanner.py
@@ -0,0 +1,407 @@
+"""
+
+ __ __ _____ _____ _____
+ \ \ / // ____/ ____| / ____|
+ \ V /| (___| (___ | (___ ___ __ _ _ __ _ __ ___ _ __
+ > < \___ \\___ \ \___ \ / __/ _` | '_ \| '_ \ / _ \ '__|
+ / . \ ____) |___) | ____) | (_| (_| | | | | | | | __/ |
+ /_/ \_\_____/_____/ |_____/ \___\__,_|_| |_|_| |_|\___|_|
+
+
+This script automatically scans all visited webpages for XSS and SQLi vulnerabilities.
+
+Usage: mitmproxy -s xss_scanner.py
+
+This script scans for vulnerabilities by injecting a fuzzing payload (see PAYLOAD below) into 4 different places
+and examining the HTML to look for XSS and SQLi injection vulnerabilities. The XSS scanning functionality works by
+looking to see whether it is possible to inject HTML based off of of where the payload appears in the page and what
+characters are escaped. In addition, it also looks for any script tags that load javascript from unclaimed domains.
+The SQLi scanning functionality works by using regular expressions to look for errors from a number of different
+common databases. Since it is only looking for errors, it will not find blind SQLi vulnerabilities.
+
+The 4 places it injects the payload into are:
+1. URLs (e.g. https://example.com/ -> https://example.com/PAYLOAD/)
+2. Queries (e.g. https://example.com/index.html?a=b -> https://example.com/index.html?a=PAYLOAD)
+3. Referers (e.g. The referer changes from https://example.com to PAYLOAD)
+4. User Agents (e.g. The UA changes from Chrome to PAYLOAD)
+
+Reports from this script show up in the event log (viewable by pressing e) and formatted like:
+
+===== XSS Found ====
+XSS URL: http://daviddworken.com/vulnerableUA.php
+Injection Point: User Agent
+Suggested Exploit: <script>alert(0)</script>
+Line: 1029zxcs'd"ao<ac>so[sb]po(pc)se;sl/bsl\eq=3847asd
+
+"""
+
+from mitmproxy import ctx
+from socket import gaierror, gethostbyname
+from urllib.parse import urlparse
+import requests
+import re
+from html.parser import HTMLParser
+from mitmproxy import http
+from typing import Dict, Union, Tuple, Optional, List, NamedTuple
+
+# The actual payload is put between a frontWall and a backWall to make it easy
+# to locate the payload with regular expressions
+FRONT_WALL = b"1029zxc"
+BACK_WALL = b"3847asd"
+PAYLOAD = b"""s'd"ao<ac>so[sb]po(pc)se;sl/bsl\\eq="""
+FULL_PAYLOAD = FRONT_WALL + PAYLOAD + BACK_WALL
+
+# A XSSData is a named tuple with the following fields:
+# - url -> str
+# - injection_point -> str
+# - exploit -> str
+# - line -> str
+XSSData = NamedTuple('XSSData', [('url', str),
+ ('injection_point', str),
+ ('exploit', str),
+ ('line', str)])
+
+# A SQLiData is named tuple with the following fields:
+# - url -> str
+# - injection_point -> str
+# - regex -> str
+# - dbms -> str
+SQLiData = NamedTuple('SQLiData', [('url', str),
+ ('injection_point', str),
+ ('regex', str),
+ ('dbms', str)])
+
+
+VulnData = Tuple[Optional[XSSData], Optional[SQLiData]]
+Cookies = Dict[str, str]
+
+
+def get_cookies(flow: http.HTTPFlow) -> Cookies:
+ """ Return a dict going from cookie names to cookie values
+ - Note that it includes both the cookies sent in the original request and
+ the cookies sent by the server """
+ return {name: value for name, value in flow.request.cookies.fields}
+
+
+def find_unclaimed_URLs(body: Union[str, bytes], requestUrl: bytes) -> None:
+ """ Look for unclaimed URLs in script tags and log them if found"""
+ class ScriptURLExtractor(HTMLParser):
+ script_URLs = []
+
+ def handle_starttag(self, tag, attrs):
+ if tag == "script" and "src" in [name for name, value in attrs]:
+ for name, value in attrs:
+ if name == "src":
+ self.script_URLs.append(value)
+
+ parser = ScriptURLExtractor()
+ try:
+ parser.feed(body)
+ except TypeError:
+ parser.feed(body.decode('utf-8'))
+ for url in parser.script_URLs:
+ parser = urlparse(url)
+ domain = parser.netloc
+ try:
+ gethostbyname(domain)
+ except gaierror:
+ ctx.log.error("XSS found in %s due to unclaimed URL \"%s\" in script tag." % (requestUrl, url))
+
+
+def test_end_of_URL_injection(original_body: str, request_URL: str, cookies: Cookies) -> VulnData:
+ """ Test the given URL for XSS via injection onto the end of the URL and
+ log the XSS if found """
+ parsed_URL = urlparse(request_URL)
+ path = parsed_URL.path
+ if path != "" and path[-1] != "/": # ensure the path ends in a /
+ path += "/"
+ path += FULL_PAYLOAD.decode('utf-8') # the path must be a string while the payload is bytes
+ url = parsed_URL._replace(path=path).geturl()
+ body = requests.get(url, cookies=cookies).text.lower()
+ xss_info = get_XSS_data(body, url, "End of URL")
+ sqli_info = get_SQLi_data(body, original_body, url, "End of URL")
+ return xss_info, sqli_info
+
+
+def test_referer_injection(original_body: str, request_URL: str, cookies: Cookies) -> VulnData:
+ """ Test the given URL for XSS via injection into the referer and
+ log the XSS if found """
+ body = requests.get(request_URL, headers={'referer': FULL_PAYLOAD}, cookies=cookies).text.lower()
+ xss_info = get_XSS_data(body, request_URL, "Referer")
+ sqli_info = get_SQLi_data(body, original_body, request_URL, "Referer")
+ return xss_info, sqli_info
+
+
+def test_user_agent_injection(original_body: str, request_URL: str, cookies: Cookies) -> VulnData:
+ """ Test the given URL for XSS via injection into the user agent and
+ log the XSS if found """
+ body = requests.get(request_URL, headers={'User-Agent': FULL_PAYLOAD}, cookies=cookies).text.lower()
+ xss_info = get_XSS_data(body, request_URL, "User Agent")
+ sqli_info = get_SQLi_data(body, original_body, request_URL, "User Agent")
+ return xss_info, sqli_info
+
+
+def test_query_injection(original_body: str, request_URL: str, cookies: Cookies):
+ """ Test the given URL for XSS via injection into URL queries and
+ log the XSS if found """
+ parsed_URL = urlparse(request_URL)
+ query_string = parsed_URL.query
+ # queries is a list of parameters where each parameter is set to the payload
+ queries = [query.split("=")[0] + "=" + FULL_PAYLOAD.decode('utf-8') for query in query_string.split("&")]
+ new_query_string = "&".join(queries)
+ new_URL = parsed_URL._replace(query=new_query_string).geturl()
+ body = requests.get(new_URL, cookies=cookies).text.lower()
+ xss_info = get_XSS_data(body, new_URL, "Query")
+ sqli_info = get_SQLi_data(body, original_body, new_URL, "Query")
+ return xss_info, sqli_info
+
+
+def log_XSS_data(xss_info: Optional[XSSData]) -> None:
+ """ Log information about the given XSS to mitmproxy """
+ # If it is None, then there is no info to log
+ if not xss_info:
+ return
+ ctx.log.error("===== XSS Found ====")
+ ctx.log.error("XSS URL: %s" % xss_info.url)
+ ctx.log.error("Injection Point: %s" % xss_info.injection_point)
+ ctx.log.error("Suggested Exploit: %s" % xss_info.exploit)
+ ctx.log.error("Line: %s" % xss_info.line)
+
+
+def log_SQLi_data(sqli_info: Optional[SQLiData]) -> None:
+ """ Log information about the given SQLi to mitmproxy """
+ if not sqli_info:
+ return
+ ctx.log.error("===== SQLi Found =====")
+ ctx.log.error("SQLi URL: %s" % sqli_info.url.decode('utf-8'))
+ ctx.log.error("Injection Point: %s" % sqli_info.injection_point.decode('utf-8'))
+ ctx.log.error("Regex used: %s" % sqli_info.regex.decode('utf-8'))
+ ctx.log.error("Suspected DBMS: %s" % sqli_info.dbms.decode('utf-8'))
+
+
+def get_SQLi_data(new_body: str, original_body: str, request_URL: str, injection_point: str) -> Optional[SQLiData]:
+ """ Return a SQLiDict if there is a SQLi otherwise return None
+ String String URL String -> (SQLiDict or None) """
+ # Regexes taken from Damn Small SQLi Scanner: https://github.com/stamparm/DSSS/blob/master/dsss.py#L17
+ DBMS_ERRORS = {
+ "MySQL": (r"SQL syntax.*MySQL", r"Warning.*mysql_.*", r"valid MySQL result", r"MySqlClient\."),
+ "PostgreSQL": (r"PostgreSQL.*ERROR", r"Warning.*\Wpg_.*", r"valid PostgreSQL result", r"Npgsql\."),
+ "Microsoft SQL Server": (r"Driver.* SQL[\-\_\ ]*Server", r"OLE DB.* SQL Server", r"(\W|\A)SQL Server.*Driver",
+ r"Warning.*mssql_.*", r"(\W|\A)SQL Server.*[0-9a-fA-F]{8}",
+ r"(?s)Exception.*\WSystem\.Data\.SqlClient\.", r"(?s)Exception.*\WRoadhouse\.Cms\."),
+ "Microsoft Access": (r"Microsoft Access Driver", r"JET Database Engine", r"Access Database Engine"),
+ "Oracle": (r"\bORA-[0-9][0-9][0-9][0-9]", r"Oracle error", r"Oracle.*Driver", r"Warning.*\Woci_.*", r"Warning.*\Wora_.*"),
+ "IBM DB2": (r"CLI Driver.*DB2", r"DB2 SQL error", r"\bdb2_\w+\("),
+ "SQLite": (r"SQLite/JDBCDriver", r"SQLite.Exception", r"System.Data.SQLite.SQLiteException", r"Warning.*sqlite_.*",
+ r"Warning.*SQLite3::", r"\[SQLITE_ERROR\]"),
+ "Sybase": (r"(?i)Warning.*sybase.*", r"Sybase message", r"Sybase.*Server message.*"),
+ }
+ for dbms, regexes in DBMS_ERRORS.items():
+ for regex in regexes:
+ if re.search(regex, new_body) and not re.search(regex, original_body):
+ return SQLiData(request_URL,
+ injection_point,
+ regex,
+ dbms)
+
+
+# A qc is either ' or "
+def inside_quote(qc: str, substring: bytes, text_index: int, body: bytes) -> bool:
+ """ Whether the Numberth occurence of the first string in the second
+ string is inside quotes as defined by the supplied QuoteChar """
+ substring = substring.decode('utf-8')
+ body = body.decode('utf-8')
+ num_substrings_found = 0
+ in_quote = False
+ for index, char in enumerate(body):
+ # Whether the next chunk of len(substring) chars is the substring
+ next_part_is_substring = (
+ (not (index + len(substring) > len(body))) and
+ (body[index:index + len(substring)] == substring)
+ )
+ # Whether this char is escaped with a \
+ is_not_escaped = (
+ (index - 1 < 0 or index - 1 > len(body)) or
+ (body[index - 1] != "\\")
+ )
+ if char == qc and is_not_escaped:
+ in_quote = not in_quote
+ if next_part_is_substring:
+ if num_substrings_found == text_index:
+ return in_quote
+ num_substrings_found += 1
+ return False
+
+
+def paths_to_text(html: str, str: str) -> List[str]:
+ """ Return list of Paths to a given str in the given HTML tree
+ - Note that it does a BFS """
+
+ def remove_last_occurence_of_sub_string(str: str, substr: str):
+ """ Delete the last occurence of substr from str
+ String String -> String
+ """
+ index = str.rfind(substr)
+ return str[:index] + str[index + len(substr):]
+
+ class PathHTMLParser(HTMLParser):
+ currentPath = ""
+ paths = []
+
+ def handle_starttag(self, tag, attrs):
+ self.currentPath += ("/" + tag)
+
+ def handle_endtag(self, tag):
+ self.currentPath = remove_last_occurence_of_sub_string(self.currentPath, "/" + tag)
+
+ def handle_data(self, data):
+ if str in data:
+ self.paths.append(self.currentPath)
+
+ parser = PathHTMLParser()
+ parser.feed(html)
+ return parser.paths
+
+
+def get_XSS_data(body: str, request_URL: str, injection_point: str) -> Optional[XSSData]:
+ """ Return a XSSDict if there is a XSS otherwise return None """
+ def in_script(text, index, body) -> bool:
+ """ Whether the Numberth occurence of the first string in the second
+ string is inside a script tag """
+ paths = paths_to_text(body.decode('utf-8'), text.decode("utf-8"))
+ try:
+ path = paths[index]
+ return "script" in path
+ except IndexError:
+ return False
+
+ def in_HTML(text: bytes, index: int, body: bytes) -> bool:
+ """ Whether the Numberth occurence of the first string in the second
+ string is inside the HTML but not inside a script tag or part of
+ a HTML attribute"""
+ # if there is a < then lxml will interpret that as a tag, so only search for the stuff before it
+ text = text.split(b"<")[0]
+ paths = paths_to_text(body.decode('utf-8'), text.decode("utf-8"))
+ try:
+ path = paths[index]
+ return "script" not in path
+ except IndexError:
+ return False
+
+ def inject_javascript_handler(html: str) -> bool:
+ """ Whether you can inject a Javascript:alert(0) as a link """
+ class injectJSHandlerHTMLParser(HTMLParser):
+ injectJSHandler = False
+
+ def handle_starttag(self, tag, attrs):
+ for name, value in attrs:
+ if name == "href" and value.startswith(FRONT_WALL.decode('utf-8')):
+ self.injectJSHandler = True
+
+ parser = injectJSHandlerHTMLParser()
+ parser.feed(html)
+ return parser.injectJSHandler
+ # Only convert the body to bytes if needed
+ if isinstance(body, str):
+ body = bytes(body, 'utf-8')
+ # Regex for between 24 and 72 (aka 24*3) characters encapsulated by the walls
+ regex = re.compile(b"""%s.{24,72}?%s""" % (FRONT_WALL, BACK_WALL))
+ matches = regex.findall(body)
+ for index, match in enumerate(matches):
+ # Where the string is injected into the HTML
+ in_script = in_script(match, index, body)
+ in_HTML = in_HTML(match, index, body)
+ in_tag = not in_script and not in_HTML
+ in_single_quotes = inside_quote("'", match, index, body)
+ in_double_quotes = inside_quote('"', match, index, body)
+ # Whether you can inject:
+ inject_open_angle = b"ao<ac" in match # open angle brackets
+ inject_close_angle = b"ac>so" in match # close angle brackets
+ inject_single_quotes = b"s'd" in match # single quotes
+ inject_double_quotes = b'd"ao' in match # double quotes
+ inject_slash = b"sl/bsl" in match # forward slashes
+ inject_semi = b"se;sl" in match # semicolons
+ inject_equals = b"eq=" in match # equals sign
+ if in_script and inject_slash and inject_open_angle and inject_close_angle: # e.g. <script>PAYLOAD</script>
+ return XSSData(request_URL,
+ injection_point,
+ '</script><script>alert(0)</script><script>',
+ match.decode('utf-8'))
+ elif in_script and in_single_quotes and inject_single_quotes and inject_semi: # e.g. <script>t='PAYLOAD';</script>
+ return XSSData(request_URL,
+ injection_point,
+ "';alert(0);g='",
+ match.decode('utf-8'))
+ elif in_script and in_double_quotes and inject_double_quotes and inject_semi: # e.g. <script>t="PAYLOAD";</script>
+ return XSSData(request_URL,
+ injection_point,
+ '";alert(0);g="',
+ match.decode('utf-8'))
+ elif in_tag and in_single_quotes and inject_single_quotes and inject_open_angle and inject_close_angle and inject_slash:
+ # e.g. <a href='PAYLOAD'>Test</a>
+ return XSSData(request_URL,
+ injection_point,
+ "'><script>alert(0)</script>",
+ match.decode('utf-8'))
+ elif in_tag and in_double_quotes and inject_double_quotes and inject_open_angle and inject_close_angle and inject_slash:
+ # e.g. <a href="PAYLOAD">Test</a>
+ return XSSData(request_URL,
+ injection_point,
+ '"><script>alert(0)</script>',
+ match.decode('utf-8'))
+ elif in_tag and not in_double_quotes and not in_single_quotes and inject_open_angle and inject_close_angle and inject_slash:
+ # e.g. <a href=PAYLOAD>Test</a>
+ return XSSData(request_URL,
+ injection_point,
+ '><script>alert(0)</script>',
+ match.decode('utf-8'))
+ elif inject_javascript_handler(body.decode('utf-8')): # e.g. <html><a href=PAYLOAD>Test</a>
+ return XSSData(request_URL,
+ injection_point,
+ 'Javascript:alert(0)',
+ match.decode('utf-8'))
+ elif in_tag and in_double_quotes and inject_double_quotes and inject_equals: # e.g. <a href="PAYLOAD">Test</a>
+ return XSSData(request_URL,
+ injection_point,
+ '" onmouseover="alert(0)" t="',
+ match.decode('utf-8'))
+ elif in_tag and in_single_quotes and inject_single_quotes and inject_equals: # e.g. <a href='PAYLOAD'>Test</a>
+ return XSSData(request_URL,
+ injection_point,
+ "' onmouseover='alert(0)' t='",
+ match.decode('utf-8'))
+ elif in_tag and not in_single_quotes and not in_double_quotes and inject_equals: # e.g. <a href=PAYLOAD>Test</a>
+ return XSSData(request_URL,
+ injection_point,
+ " onmouseover=alert(0) t=",
+ match.decode('utf-8'))
+ elif in_HTML and not in_script and inject_open_angle and inject_close_angle and inject_slash: # e.g. <html>PAYLOAD</html>
+ return XSSData(request_URL,
+ injection_point,
+ '<script>alert(0)</script>',
+ match.decode('utf-8'))
+ else:
+ return None
+
+
+# response is mitmproxy's entry point
+def response(flow: http.HTTPFlow) -> None:
+ cookiesDict = get_cookies(flow)
+ # Example: http://xss.guru/unclaimedScriptTag.html
+ find_unclaimed_URLs(flow.response.content, flow.request.url)
+ results = test_end_of_URL_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict)
+ log_XSS_data(results[0])
+ log_SQLi_data(results[1])
+ # Example: https://daviddworken.com/vulnerableReferer.php
+ results = test_referer_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict)
+ log_XSS_data(results[0])
+ log_SQLi_data(results[1])
+ # Example: https://daviddworken.com/vulnerableUA.php
+ results = test_user_agent_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict)
+ log_XSS_data(results[0])
+ log_SQLi_data(results[1])
+ if "?" in flow.request.url:
+ # Example: https://daviddworken.com/vulnerable.php?name=
+ results = test_query_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict)
+ log_XSS_data(results[0])
+ log_SQLi_data(results[1])