From 57868ef5fab5c9f101126ec7981db2d769b22a4f Mon Sep 17 00:00:00 2001 From: jbrule Date: Fri, 12 Oct 2018 14:56:58 -0500 Subject: Added link_expander.py example to simple examples (#3348) --- examples/simple/README.md | 1 + examples/simple/link_expander.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 examples/simple/link_expander.py (limited to 'examples') diff --git a/examples/simple/README.md b/examples/simple/README.md index 2fafdd5a..66a05b30 100644 --- a/examples/simple/README.md +++ b/examples/simple/README.md @@ -8,6 +8,7 @@ | filter_flows.py | This script demonstrates how to use mitmproxy's filter pattern in scripts. | | io_read_dumpfile.py | Read a dumpfile generated by mitmproxy. | | io_write_dumpfile.py | Only write selected flows into a mitmproxy dumpfile. | +| link_expander.py | Discover relative links in HTML traffic and replace them with absolute paths | | log_events.py | Use mitmproxy's logging API. | | modify_body_inject_iframe.py | Inject configurable iframe into pages. | | modify_form.py | Modify HTTP form submissions. | diff --git a/examples/simple/link_expander.py b/examples/simple/link_expander.py new file mode 100644 index 00000000..0edf7c98 --- /dev/null +++ b/examples/simple/link_expander.py @@ -0,0 +1,28 @@ +# This script determines if request is an HTML webpage and if so seeks out +# relative links () and expands them to absolute links +# In practice this can be used to front an indexing spider that may not have the capability to expand relative page links. +# Usage: mitmdump -s link_expander.py or mitmproxy -s link_expander.py + +import re +from urllib.parse import urljoin + + +def response(flow): + + if "Content-Type" in flow.response.headers and flow.response.headers["Content-Type"].find("text/html") != -1: + pageUrl = flow.request.url + pageText = flow.response.text + pattern = (r"]*?\s+)?href=(?P[\"'])" + r"(?P(?!https?:\/\/|ftps?:\/\/|\/\/|#|javascript:|mailto:).*?)(?P=delimiter)") + rel_matcher = re.compile(pattern, flags=re.IGNORECASE) + rel_matches = rel_matcher.finditer(pageText) + map_dict = {} + for match_num, match in enumerate(rel_matches): + (delimiter, rel_link) = match.group("delimiter", "link") + abs_link = urljoin(pageUrl, rel_link) + map_dict["{0}{1}{0}".format(delimiter, rel_link)] = "{0}{1}{0}".format(delimiter, abs_link) + for map in map_dict.items(): + pageText = pageText.replace(*map) + # Uncomment the following to print the expansion mapping + # print("{0} -> {1}".format(*map)) + flow.response.text = pageText \ No newline at end of file -- cgit v1.2.3