aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/protocol/http.py
diff options
context:
space:
mode:
Diffstat (limited to 'libmproxy/protocol/http.py')
-rw-r--r--libmproxy/protocol/http.py234
1 files changed, 162 insertions, 72 deletions
diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py
index 35c5611f..adb743a2 100644
--- a/libmproxy/protocol/http.py
+++ b/libmproxy/protocol/http.py
@@ -1,5 +1,9 @@
from __future__ import absolute_import
-import Cookie, urllib, urlparse, time, copy
+import Cookie
+import urllib
+import urlparse
+import time
+import copy
from email.utils import parsedate_tz, formatdate, mktime_tz
import threading
from netlib import http, tcp, http_status
@@ -27,8 +31,17 @@ def get_line(fp):
def send_connect_request(conn, host, port, update_state=True):
- upstream_request = HTTPRequest("authority", "CONNECT", None, host, port, None,
- (1, 1), ODictCaseless(), "")
+ upstream_request = HTTPRequest(
+ "authority",
+ "CONNECT",
+ None,
+ host,
+ port,
+ None,
+ (1, 1),
+ ODictCaseless(),
+ ""
+ )
conn.send(upstream_request.assemble())
resp = HTTPResponse.from_stream(conn.rfile, upstream_request.method)
if resp.code != 200:
@@ -72,10 +85,11 @@ class decoded(object):
self.o.encode(self.ce)
-class HTTPMessage(stateobject.SimpleStateObject):
+class HTTPMessage(stateobject.StateObject):
"""
Base class for HTTPRequest and HTTPResponse
"""
+
def __init__(self, httpversion, headers, content, timestamp_start=None,
timestamp_end=None):
self.httpversion = httpversion
@@ -83,8 +97,8 @@ class HTTPMessage(stateobject.SimpleStateObject):
"""@type: ODictCaseless"""
self.content = content
- self.timestamp_start = timestamp_start if timestamp_start is not None else utils.timestamp()
- self.timestamp_end = timestamp_end if timestamp_end is not None else utils.timestamp()
+ self.timestamp_start = timestamp_start
+ self.timestamp_end = timestamp_end
_stateobject_attributes = dict(
httpversion=tuple,
@@ -93,10 +107,18 @@ class HTTPMessage(stateobject.SimpleStateObject):
timestamp_start=float,
timestamp_end=float
)
+ _stateobject_long_attributes = {"content"}
+
+ def get_state(self, short=False):
+ ret = super(HTTPMessage, self).get_state(short)
+ if short:
+ ret["contentLength"] = len(self.content)
+ return ret
def get_decoded_content(self):
"""
- Returns the decoded content based on the current Content-Encoding header.
+ Returns the decoded content based on the current Content-Encoding
+ header.
Doesn't change the message iteself or its headers.
"""
ce = self.headers.get_first("content-encoding")
@@ -156,7 +178,9 @@ class HTTPMessage(stateobject.SimpleStateObject):
Returns the number of replacements made.
"""
with decoded(self):
- self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
+ self.content, c = utils.safe_subn(
+ pattern, repl, self.content, *args, **kwargs
+ )
c += self.headers.replace(pattern, repl, *args, **kwargs)
return c
@@ -195,10 +219,11 @@ class HTTPRequest(HTTPMessage):
scheme: URL scheme (http/https)
- host: Target hostname of the request. This is not neccessarily the directy upstream server (which could be
- another proxy), but it's always the target server we want to reach at the end. This attribute is either
- inferred from the request itself (absolute-form, authority-form) or from the connection metadata (e.g. the
- host in reverse proxy mode).
+ host: Target hostname of the request. This is not neccessarily the
+ directy upstream server (which could be another proxy), but it's always
+ the target server we want to reach at the end. This attribute is either
+ inferred from the request itself (absolute-form, authority-form) or from
+ the connection metadata (e.g. the host in reverse proxy mode).
port: Destination port
@@ -258,13 +283,15 @@ class HTTPRequest(HTTPMessage):
)
@classmethod
- def _from_state(cls, state):
+ def from_state(cls, state):
f = cls(None, None, None, None, None, None, None, None, None, None, None)
- f._load_state(state)
+ f.load_state(state)
return f
def __repr__(self):
- return "<HTTPRequest: {0}>".format(self._assemble_first_line(self.form_in)[:-9])
+ return "<HTTPRequest: {0}>".format(
+ self._assemble_first_line(self.form_in)[:-9]
+ )
@classmethod
def from_stream(cls, rfile, include_body=True, body_size_limit=None):
@@ -453,7 +480,9 @@ class HTTPRequest(HTTPMessage):
lst = [urllib.quote(i, safe="") for i in lst]
path = "/" + "/".join(lst)
scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url)
- self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment])
+ self.url = urlparse.urlunparse(
+ [scheme, netloc, path, params, query, fragment]
+ )
def get_query(self):
"""
@@ -470,18 +499,23 @@ class HTTPRequest(HTTPMessage):
"""
scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url)
query = utils.urlencode(odict.lst)
- self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment])
+ self.url = urlparse.urlunparse(
+ [scheme, netloc, path, params, query, fragment]
+ )
def pretty_host(self, hostheader):
"""
Heuristic to get the host of the request.
- Note that pretty_host() does not always return the TCP destination of the request,
- e.g. if an upstream proxy is in place
+ Note that pretty_host() does not always return the TCP destination
+ of the request, e.g. if an upstream proxy is in place
+
+ If hostheader is set to True, the Host: header will be used as
+ additional (and preferred) data source. This is handy in transparent
+ mode, where only the ip of the destination is known, but not the
+ resolved name. This is disabled by default, as an attacker may spoof
+ the host header to confuse an analyst.
- If hostheader is set to True, the Host: header will be used as additional (and preferred) data source.
- This is handy in transparent mode, where only the ip of the destination is known, but not the
- resolved name. This is disabled by default, as an attacker may spoof the host header to confuse an analyst.
"""
host = None
if hostheader:
@@ -504,10 +538,12 @@ class HTTPRequest(HTTPMessage):
"""
Returns a URL string, constructed from the Request's URL components.
"""
- return utils.unparse_url(self.scheme,
- self.host,
- self.port,
- self.path).encode('ascii')
+ return utils.unparse_url(
+ self.scheme,
+ self.host,
+ self.port,
+ self.path
+ ).encode('ascii')
@url.setter
def url(self, url):
@@ -535,14 +571,16 @@ class HTTPRequest(HTTPMessage):
def replace(self, pattern, repl, *args, **kwargs):
"""
- Replaces a regular expression pattern with repl in the headers, the request path
- and the body of the request. Encoded content will be decoded before
- replacement, and re-encoded afterwards.
+ Replaces a regular expression pattern with repl in the headers, the
+ request path and the body of the request. Encoded content will be
+ decoded before replacement, and re-encoded afterwards.
Returns the number of replacements made.
"""
c = HTTPMessage.replace(self, pattern, repl, *args, **kwargs)
- self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
+ self.path, pc = utils.safe_subn(
+ pattern, repl, self.path, *args, **kwargs
+ )
c += pc
return c
@@ -573,8 +611,14 @@ class HTTPResponse(HTTPMessage):
def __init__(self, httpversion, code, msg, headers, content, timestamp_start=None,
timestamp_end=None):
assert isinstance(headers, ODictCaseless) or headers is None
- HTTPMessage.__init__(self, httpversion, headers, content, timestamp_start,
- timestamp_end)
+ HTTPMessage.__init__(
+ self,
+ httpversion,
+ headers,
+ content,
+ timestamp_start,
+ timestamp_end
+ )
self.code = code
self.msg = msg
@@ -590,9 +634,9 @@ class HTTPResponse(HTTPMessage):
)
@classmethod
- def _from_state(cls, state):
+ def from_state(cls, state):
f = cls(None, None, None, None, None)
- f._load_state(state)
+ f.load_state(state)
return f
def __repr__(self):
@@ -624,9 +668,20 @@ class HTTPResponse(HTTPMessage):
if hasattr(rfile, "first_byte_timestamp"): # more accurate timestamp_start
timestamp_start = rfile.first_byte_timestamp
- timestamp_end = utils.timestamp()
- return HTTPResponse(httpversion, code, msg, headers, content, timestamp_start,
- timestamp_end)
+ if include_body:
+ timestamp_end = utils.timestamp()
+ else:
+ timestamp_end = None
+
+ return HTTPResponse(
+ httpversion,
+ code,
+ msg,
+ headers,
+ content,
+ timestamp_start,
+ timestamp_end
+ )
def _assemble_first_line(self):
return 'HTTP/%s.%s %s %s' % \
@@ -771,9 +826,9 @@ class HTTPFlow(Flow):
)
@classmethod
- def _from_state(cls, state):
+ def from_state(cls, state):
f = cls(None, None)
- f._load_state(state)
+ f.load_state(state)
return f
def __repr__(self):
@@ -836,9 +891,9 @@ class HTTPFlow(Flow):
def replace(self, pattern, repl, *args, **kwargs):
"""
- Replaces a regular expression pattern with repl in both request and response of the
- flow. Encoded content will be decoded before replacement, and
- re-encoded afterwards.
+ Replaces a regular expression pattern with repl in both request and
+ response of the flow. Encoded content will be decoded before
+ replacement, and re-encoded afterwards.
Returns the number of replacements made.
"""
@@ -863,6 +918,7 @@ class HTTPHandler(ProtocolHandler):
HTTPHandler implements mitmproxys understanding of the HTTP protocol.
"""
+
def __init__(self, c):
super(HTTPHandler, self).__init__(c)
self.expected_form_in = c.config.mode.http_form_in
@@ -901,26 +957,39 @@ class HTTPHandler(ProtocolHandler):
else:
raise
- # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True
+ # call the appropriate script hook - this is an opportunity for an
+ # inline script to set flow.stream = True
self.c.channel.ask("responseheaders", flow)
- # now get the rest of the request body, if body still needs to be read but not streaming this response
+ # now get the rest of the request body, if body still needs to be read
+ # but not streaming this response
if flow.response.stream:
flow.response.content = CONTENT_MISSING
else:
- flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers,
- self.c.config.body_size_limit,
- flow.request.method, flow.response.code, False)
+ flow.response.content = http.read_http_body(
+ self.c.server_conn.rfile, flow.response.headers,
+ self.c.config.body_size_limit,
+ flow.request.method, flow.response.code, False
+ )
+ flow.response.timestamp_end = utils.timestamp()
def handle_flow(self):
flow = HTTPFlow(self.c.client_conn, self.c.server_conn, self.live)
try:
try:
- req = HTTPRequest.from_stream(self.c.client_conn.rfile,
- body_size_limit=self.c.config.body_size_limit)
- except tcp.NetLibDisconnect: # don't throw an error for disconnects that happen before/between requests.
+ req = HTTPRequest.from_stream(
+ self.c.client_conn.rfile,
+ body_size_limit=self.c.config.body_size_limit
+ )
+ except tcp.NetLibDisconnect:
+ # don't throw an error for disconnects that happen
+ # before/between requests.
return False
- self.c.log("request", "debug", [req._assemble_first_line(req.form_in)])
+ self.c.log(
+ "request",
+ "debug",
+ [req._assemble_first_line(req.form_in)]
+ )
ret = self.process_request(flow, req)
if ret is not None:
return ret
@@ -958,14 +1027,21 @@ class HTTPHandler(ProtocolHandler):
# We sent a CONNECT request to an upstream proxy.
if flow.request.form_in == "authority" and flow.response.code == 200:
- # TODO: Possibly add headers (memory consumption/usefulness tradeoff)
- # Make sure to add state info before the actual processing of the CONNECT request happens.
- # During an SSL upgrade, we may receive an SNI indication from the client,
- # which resets the upstream connection. If this is the case, we must
- # already re-issue the CONNECT request at this point.
- self.c.server_conn.state.append(("http", {"state": "connect",
- "host": flow.request.host,
- "port": flow.request.port}))
+ # TODO: Possibly add headers (memory consumption/usefulness
+ # tradeoff) Make sure to add state info before the actual
+ # processing of the CONNECT request happens. During an SSL
+ # upgrade, we may receive an SNI indication from the client,
+ # which resets the upstream connection. If this is the case, we
+ # must already re-issue the CONNECT request at this point.
+ self.c.server_conn.state.append(
+ (
+ "http", {
+ "state": "connect",
+ "host": flow.request.host,
+ "port": flow.request.port
+ }
+ )
+ )
if not self.process_connect_request((flow.request.host, flow.request.port)):
return False
@@ -977,7 +1053,6 @@ class HTTPHandler(ProtocolHandler):
except (HttpAuthenticationError, http.HttpError, proxy.ProxyError, tcp.NetLibError), e:
self.handle_error(e, flow)
finally:
- flow.timestamp_end = utils.timestamp()
flow.live = None # Connection is not live anymore.
return False
@@ -1004,7 +1079,7 @@ class HTTPHandler(ProtocolHandler):
if message:
self.c.log(message, level="info")
if message_debug:
- self.c.log(message, level="debug")
+ self.c.log(message_debug, level="debug")
if flow:
# TODO: no flows without request or with both request and response at the moment.
@@ -1028,7 +1103,7 @@ class HTTPHandler(ProtocolHandler):
html_content = '<html><head>\n<title>%d %s</title>\n</head>\n<body>\n%s\n</body>\n</html>' % \
(code, response, message)
self.c.client_conn.wfile.write("HTTP/1.1 %s %s\r\n" % (code, response))
- self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.server_version)
+ self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.config.server_version)
self.c.client_conn.wfile.write("Content-type: text/html\r\n")
self.c.client_conn.wfile.write("Content-Length: %d\r\n" % len(html_content))
if headers:
@@ -1079,7 +1154,7 @@ class HTTPHandler(ProtocolHandler):
self.c.client_conn.send(
'HTTP/1.1 200 Connection established\r\n' +
'Content-Length: 0\r\n' +
- ('Proxy-agent: %s\r\n' % self.c.server_version) +
+ ('Proxy-agent: %s\r\n' % self.c.config.server_version) +
'\r\n'
)
return self.process_connect_request(self.c.server_conn.address)
@@ -1095,7 +1170,8 @@ class HTTPHandler(ProtocolHandler):
if request.form_in == "absolute":
if request.scheme != "http":
raise http.HttpError(400, "Invalid request scheme: %s" % request.scheme)
- if request.form_out == "relative":
+ if self.c.config.mode == "regular":
+ # Update info so that an inline script sees the correct value at flow.server_conn
self.c.set_server_address((request.host, request.port))
flow.server_conn = self.c.server_conn
@@ -1147,7 +1223,7 @@ class HTTPHandler(ProtocolHandler):
self.c.client_conn.send(flow.response.assemble())
else:
# streaming:
- # First send the body and then transfer the response incrementally:
+ # First send the headers and then transfer the response incrementally:
h = flow.response._assemble_head(preserve_transfer_encoding=True)
self.c.client_conn.send(h)
for chunk in http.read_http_body_chunked(self.c.server_conn.rfile,
@@ -1157,7 +1233,7 @@ class HTTPHandler(ProtocolHandler):
for part in chunk:
self.c.client_conn.wfile.write(part)
self.c.client_conn.wfile.flush()
- flow.response.timestamp_end = utils.timestamp()
+ flow.response.timestamp_end = utils.timestamp()
def check_close_connection(self, flow):
"""
@@ -1184,20 +1260,33 @@ class HTTPHandler(ProtocolHandler):
Returns False, if the connection should be closed immediately.
"""
address = tcp.Address.wrap(address)
- if self.c.check_ignore_address(address):
+ if self.c.config.check_ignore(address):
self.c.log("Ignore host: %s:%s" % address(), "info")
- TCPHandler(self.c).handle_messages()
+ TCPHandler(self.c, log=False).handle_messages()
return False
else:
self.expected_form_in = "relative"
self.expected_form_out = "relative"
self.skip_authentication = True
- if address.port in self.c.config.ssl_ports:
+ # In practice, nobody issues a CONNECT request to send unencrypted HTTP requests afterwards.
+ # If we don't delegate to TCP mode, we should always negotiate a SSL connection.
+ should_establish_ssl = (
+ address.port in self.c.config.ssl_ports
+ or
+ not self.c.config.check_tcp(address)
+ )
+
+ if should_establish_ssl:
self.c.log("Received CONNECT request to SSL port. Upgrading to SSL...", "debug")
self.c.establish_ssl(server=True, client=True)
self.c.log("Upgrade to SSL completed.", "debug")
+ if self.c.config.check_tcp(address):
+ self.c.log("Generic TCP mode for host: %s:%s" % address(), "info")
+ TCPHandler(self.c).handle_messages()
+ return False
+
return True
def authenticate(self, request):
@@ -1228,7 +1317,7 @@ class RequestReplayThread(threading.Thread):
server.connect()
if r.scheme == "https":
send_connect_request(server, r.host, r.port)
- server.establish_ssl(self.config.clientcerts, sni=r.host)
+ server.establish_ssl(self.config.clientcerts, sni=self.flow.server_conn.sni)
r.form_out = "relative"
else:
r.form_out = "absolute"
@@ -1237,10 +1326,11 @@ class RequestReplayThread(threading.Thread):
server = ServerConnection(server_address)
server.connect()
if r.scheme == "https":
- server.establish_ssl(self.config.clientcerts, sni=r.host)
+ server.establish_ssl(self.config.clientcerts, sni=self.flow.server_conn.sni)
r.form_out = "relative"
server.send(r.assemble())
+ self.flow.server_conn = server
self.flow.response = HTTPResponse.from_stream(server.rfile, r.method,
body_size_limit=self.config.body_size_limit)
self.channel.ask("response", self.flow)
@@ -1248,4 +1338,4 @@ class RequestReplayThread(threading.Thread):
self.flow.error = Error(repr(v))
self.channel.ask("error", self.flow)
finally:
- r.form_out = form_out_backup \ No newline at end of file
+ r.form_out = form_out_backup