aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/protocol
diff options
context:
space:
mode:
Diffstat (limited to 'libmproxy/protocol')
-rw-r--r--libmproxy/protocol/http.py234
-rw-r--r--libmproxy/protocol/primitives.py53
2 files changed, 158 insertions, 129 deletions
diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py
index 9699c78a..253192dd 100644
--- a/libmproxy/protocol/http.py
+++ b/libmproxy/protocol/http.py
@@ -26,7 +26,7 @@ def get_line(fp):
return line
-def send_connect_request(conn, host, port):
+def send_connect_request(conn, host, port, update_state=True):
upstream_request = HTTPRequest("authority", "CONNECT", None, host, port, None,
(1, 1), ODictCaseless(), "")
conn.send(upstream_request._assemble())
@@ -36,6 +36,12 @@ def send_connect_request(conn, host, port):
"Cannot establish SSL " +
"connection with upstream proxy: \r\n" +
str(resp._assemble()))
+ if update_state:
+ conn.state.append(("http", {
+ "state": "connect",
+ "host": host,
+ "port": port}
+ ))
return resp
@@ -405,7 +411,14 @@ class HTTPRequest(HTTPMessage):
e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
)]
- def get_form_urlencoded(self):
+ def update_host_header(self):
+ """
+ Update the host header to reflect the current target.
+ """
+ self.headers["Host"] = [self.host]
+
+ @property
+ def form_urlencoded(self):
"""
Retrieves the URL-encoded form data, returning an ODict object.
Returns an empty ODict if there is no data or the content-type
@@ -415,7 +428,8 @@ class HTTPRequest(HTTPMessage):
return ODict(utils.urldecode(self.content))
return ODict([])
- def set_form_urlencoded(self, odict):
+ @form_urlencoded.setter
+ def form_urlencoded(self, odict):
"""
Sets the body to the URL-encoded form data, and adds the
appropriate content-type header. Note that this will destory the
@@ -426,16 +440,18 @@ class HTTPRequest(HTTPMessage):
self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
self.content = utils.urlencode(odict.lst)
- def get_path_components(self, f):
+ @property
+ def path_components(self):
"""
Returns the path components of the URL as a list of strings.
Components are unquoted.
"""
- _, _, path, _, _, _ = urlparse.urlparse(self.get_url(False, f))
+ _, _, path, _, _, _ = urlparse.urlparse(self.url)
return [urllib.unquote(i) for i in path.split("/") if i]
- def set_path_components(self, lst, f):
+ @path_components.setter
+ def path_components(self, lst):
"""
Takes a list of strings, and sets the path component of the URL.
@@ -443,32 +459,34 @@ class HTTPRequest(HTTPMessage):
"""
lst = [urllib.quote(i, safe="") for i in lst]
path = "/" + "/".join(lst)
- scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url(False, f))
- self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f)
+ scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url)
+ self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment])
- def get_query(self, f):
+ @property
+ def query(self):
"""
Gets the request query string. Returns an ODict object.
"""
- _, _, _, _, query, _ = urlparse.urlparse(self.get_url(False, f))
+ _, _, _, _, query, _ = urlparse.urlparse(self.url)
if query:
return ODict(utils.urldecode(query))
return ODict([])
- def set_query(self, odict, f):
+ @query.setter
+ def query(self, odict):
"""
Takes an ODict object, and sets the request query string.
"""
- scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url(False, f))
+ scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url)
query = utils.urlencode(odict.lst)
- self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]), f)
+ self.url = urlparse.urlunparse([scheme, netloc, path, params, query, fragment])
- def get_host(self, hostheader, flow):
+ def pretty_host(self, hostheader):
"""
Heuristic to get the host of the request.
- Note that get_host() does not always return the TCP destination of the request,
- e.g. on a transparently intercepted request to an unrelated HTTP proxy.
+ Note that pretty_host() does not always return the TCP destination of the request,
+ e.g. if an upstream proxy is in place
If hostheader is set to True, the Host: header will be used as additional (and preferred) data source.
This is handy in transparent mode, where only the ip of the destination is known, but not the
@@ -478,54 +496,27 @@ class HTTPRequest(HTTPMessage):
if hostheader:
host = self.headers.get_first("host")
if not host:
- if self.host:
- host = self.host
- else:
- for s in flow.server_conn.state:
- if s[0] == "http" and s[1]["state"] == "connect":
- host = s[1]["host"]
- break
- if not host:
- host = flow.server_conn.address.host
+ host = self.host
host = host.encode("idna")
return host
- def get_scheme(self, flow):
- """
- Returns the request port, either from the request itself or from the flow's server connection
- """
- if self.scheme:
- return self.scheme
- if self.form_out == "authority": # On SSLed connections, the original CONNECT request is still unencrypted.
- return "http"
- return "https" if flow.server_conn.ssl_established else "http"
-
- def get_port(self, flow):
- """
- Returns the request port, either from the request itself or from the flow's server connection
- """
- if self.port:
- return self.port
- for s in flow.server_conn.state:
- if s[0] == "http" and s[1].get("state") == "connect":
- return s[1]["port"]
- return flow.server_conn.address.port
+ def pretty_url(self, hostheader):
+ if self.form_out == "authority": # upstream proxy mode
+ return "%s:%s" % (self.pretty_host(hostheader), self.port)
+ return utils.unparse_url(self.scheme,
+ self.pretty_host(hostheader),
+ self.port,
+ self.path).encode('ascii')
- def get_url(self, hostheader, flow):
+ @property
+ def url(self):
"""
Returns a URL string, constructed from the Request's URL components.
-
- If hostheader is True, we use the value specified in the request
- Host header to construct the URL.
"""
- if self.form_out == "authority": # upstream proxy mode
- return "%s:%s" % (self.get_host(hostheader, flow), self.get_port(flow))
- return utils.unparse_url(self.get_scheme(flow),
- self.get_host(hostheader, flow),
- self.get_port(flow),
- self.path).encode('ascii')
+ return self.pretty_url(False)
- def set_url(self, url, flow):
+ @url.setter
+ def url(self, url):
"""
Parses a URL specification, and updates the Request's information
accordingly.
@@ -534,32 +525,11 @@ class HTTPRequest(HTTPMessage):
"""
parts = http.parse_url(url)
if not parts:
- return False
- scheme, host, port, path = parts
- is_ssl = (True if scheme == "https" else False)
-
- self.path = path
+ raise ValueError("Invalid URL: %s" % url)
+ self.scheme, self.host, self.port, self.path = parts
- if host != self.get_host(False, flow) or port != self.get_port(flow):
- if flow.live:
- flow.live.change_server((host, port), ssl=is_ssl)
- else:
- # There's not live server connection, we're just changing the attributes here.
- flow.server_conn = ServerConnection((host, port),
- proxy.AddressPriority.MANUALLY_CHANGED)
- flow.server_conn.ssl_established = is_ssl
-
- # If this is an absolute request, replace the attributes on the request object as well.
- if self.host:
- self.host = host
- if self.port:
- self.port = port
- if self.scheme:
- self.scheme = scheme
-
- return True
-
- def get_cookies(self):
+ @property
+ def cookies(self):
cookie_headers = self.headers.get("cookie")
if not cookie_headers:
return None
@@ -756,7 +726,8 @@ class HTTPResponse(HTTPMessage):
if c:
self.headers["set-cookie"] = c
- def get_cookies(self):
+ @property
+ def cookies(self):
cookie_headers = self.headers.get("set-cookie")
if not cookie_headers:
return None
@@ -816,7 +787,7 @@ class HTTPFlow(Flow):
s = "<HTTPFlow"
for a in ("request", "response", "error", "client_conn", "server_conn"):
if getattr(self, a, False):
- s += "\r\n %s = {flow.%s}" % (a,a)
+ s += "\r\n %s = {flow.%s}" % (a, a)
s += ">"
return s.format(flow=self)
@@ -951,8 +922,7 @@ class HTTPHandler(ProtocolHandler):
# sent through to the Master.
flow.request = req
request_reply = self.c.channel.ask("request", flow)
- self.determine_server_address(flow, flow.request) # The inline script may have changed request.host
- flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow
+ self.process_server_address(flow) # The inline script may have changed request.host
if request_reply is None or request_reply == KILL:
return False
@@ -1049,7 +1019,7 @@ class HTTPHandler(ProtocolHandler):
def handle_server_reconnect(self, state):
if state["state"] == "connect":
- send_connect_request(self.c.server_conn, state["host"], state["port"])
+ send_connect_request(self.c.server_conn, state["host"], state["port"], update_state=False)
else: # pragma: nocover
raise RuntimeError("Unknown State: %s" % state["state"])
@@ -1115,14 +1085,30 @@ class HTTPHandler(ProtocolHandler):
if not self.skip_authentication:
self.authenticate(request)
+ # Determine .scheme, .host and .port attributes
+ # For absolute-form requests, they are directly given in the request.
+ # For authority-form requests, we only need to determine the request scheme.
+ # For relative-form requests, we need to determine host and port as well.
+ if not request.scheme:
+ request.scheme = "https" if flow.server_conn and flow.server_conn.ssl_established else "http"
+ if not request.host:
+ # Host/Port Complication: In upstream mode, use the server we CONNECTed to,
+ # not the upstream proxy.
+ if flow.server_conn:
+ for s in flow.server_conn.state:
+ if s[0] == "http" and s[1]["state"] == "connect":
+ request.host, request.port = s[1]["host"], s[1]["port"]
+ if not request.host and flow.server_conn:
+ request.host, request.port = flow.server_conn.address.host, flow.server_conn.address.port
+
+ # Now we can process the request.
if request.form_in == "authority":
if self.c.client_conn.ssl_established:
raise http.HttpError(400, "Must not CONNECT on already encrypted connection")
if self.expected_form_in == "absolute":
- if not self.c.config.get_upstream_server:
- self.c.set_server_address((request.host, request.port),
- proxy.AddressPriority.FROM_PROTOCOL)
+ if not self.c.config.get_upstream_server: # Regular mode
+ self.c.set_server_address((request.host, request.port))
flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow
self.c.establish_server_connection()
self.c.client_conn.send(
@@ -1141,24 +1127,63 @@ class HTTPHandler(ProtocolHandler):
self.ssl_upgrade()
self.skip_authentication = True
return True
- else:
+ else: # upstream proxy mode
return None
+ else:
+ pass # CONNECT should never occur if we don't expect absolute-form requests
+
elif request.form_in == self.expected_form_in:
+
+ request.form_out = self.expected_form_out
+
if request.form_in == "absolute":
if request.scheme != "http":
raise http.HttpError(400, "Invalid request scheme: %s" % request.scheme)
- self.determine_server_address(flow, request)
- request.form_out = self.expected_form_out
+ if request.form_out == "relative":
+ self.c.set_server_address((request.host, request.port))
+ flow.server_conn = self.c.server_conn
+
+
return None
raise http.HttpError(400, "Invalid HTTP request form (expected: %s, got: %s)" %
(self.expected_form_in, request.form_in))
- def determine_server_address(self, flow, request):
- if request.form_in == "absolute":
- self.c.set_server_address((request.host, request.port),
- proxy.AddressPriority.FROM_PROTOCOL)
- flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow
+ def process_server_address(self, flow):
+ # Depending on the proxy mode, server handling is entirely different
+ # We provide a mostly unified API to the user, which needs to be unfiddled here
+ # ( See also: https://github.com/mitmproxy/mitmproxy/issues/337 )
+ address = netlib.tcp.Address((flow.request.host, flow.request.port))
+
+ ssl = (flow.request.scheme == "https")
+
+ if self.c.config.http_form_in == self.c.config.http_form_out == "absolute": # Upstream Proxy mode
+
+ # The connection to the upstream proxy may have a state we may need to take into account.
+ connected_to = None
+ for s in flow.server_conn.state:
+ if s[0] == "http" and s[1]["state"] == "connect":
+ connected_to = tcp.Address((s[1]["host"], s[1]["port"]))
+
+ # We need to reconnect if the current flow either requires a (possibly impossible)
+ # change to the connection state, e.g. the host has changed but we already CONNECTed somewhere else.
+ needs_server_change = (
+ ssl != self.c.server_conn.ssl_established
+ or
+ (connected_to and address != connected_to) # HTTP proxying is "stateless", CONNECT isn't.
+ )
+
+ if needs_server_change:
+ # force create new connection to the proxy server to reset state
+ self.live.change_server(self.c.server_conn.address, force=True)
+ if ssl:
+ send_connect_request(self.c.server_conn, address.host, address.port)
+ self.c.establish_ssl(server=True)
+ else:
+ # If we're not in upstream mode, we just want to update the host and possibly establish TLS.
+ self.live.change_server(address, ssl=ssl) # this is a no op if the addresses match.
+
+ flow.server_conn = self.c.server_conn
def authenticate(self, request):
if self.c.config.authenticator:
@@ -1184,7 +1209,9 @@ class RequestReplayThread(threading.Thread):
r.form_out = self.config.http_form_out
server_address, server_ssl = False, False
- if self.config.get_upstream_server:
+ # If the flow is live, r.host is already the correct upstream server unless modified by a script.
+ # If modified by a script, we probably want to keep the modified destination.
+ if self.config.get_upstream_server and not self.flow.live:
try:
# this will fail in transparent mode
upstream_info = self.config.get_upstream_server(self.flow.client_conn)
@@ -1193,17 +1220,16 @@ class RequestReplayThread(threading.Thread):
except proxy.ProxyError:
pass
if not server_address:
- server_address = (r.get_host(False, self.flow), r.get_port(self.flow))
+ server_address = (r.host, r.port)
- server = ServerConnection(server_address, None)
+ server = ServerConnection(server_address)
server.connect()
- if server_ssl or r.get_scheme(self.flow) == "https":
+ if server_ssl or r.scheme == "https":
if self.config.http_form_out == "absolute": # form_out == absolute -> forward mode -> send CONNECT
- send_connect_request(server, r.get_host(), r.get_port())
+ send_connect_request(server, r.host, r.port)
r.form_out = "relative"
- server.establish_ssl(self.config.clientcerts,
- self.flow.server_conn.sni)
+ server.establish_ssl(self.config.clientcerts, sni=r.host)
server.send(r._assemble())
self.flow.response = HTTPResponse.from_stream(server.rfile, r.method,
body_size_limit=self.config.body_size_limit)
diff --git a/libmproxy/protocol/primitives.py b/libmproxy/protocol/primitives.py
index a84b4061..416e6880 100644
--- a/libmproxy/protocol/primitives.py
+++ b/libmproxy/protocol/primitives.py
@@ -2,7 +2,6 @@ from __future__ import absolute_import
import copy
import netlib.tcp
from .. import stateobject, utils, version
-from ..proxy.primitives import AddressPriority
from ..proxy.connection import ClientConnection, ServerConnection
@@ -59,7 +58,7 @@ class Flow(stateobject.SimpleStateObject):
"""@type: ClientConnection"""
self.server_conn = server_conn
"""@type: ServerConnection"""
- self.live = live # Used by flow.request.set_url to change the server address
+ self.live = live
"""@type: LiveConnection"""
self.error = None
@@ -153,44 +152,48 @@ class LiveConnection(object):
without requiring the expose the ConnectionHandler.
"""
def __init__(self, c):
- self._c = c
+ self.c = c
+ self._backup_server_conn = None
"""@type: libmproxy.proxy.server.ConnectionHandler"""
- def change_server(self, address, ssl, persistent_change=False):
+ def change_server(self, address, ssl=False, force=False, persistent_change=False):
address = netlib.tcp.Address.wrap(address)
- if address != self._c.server_conn.address:
+ if force or address != self.c.server_conn.address or ssl != self.c.server_conn.ssl_established:
- self._c.log("Change server connection: %s:%s -> %s:%s" % (
- self._c.server_conn.address.host,
- self._c.server_conn.address.port,
+ self.c.log("Change server connection: %s:%s -> %s:%s [persistent: %s]" % (
+ self.c.server_conn.address.host,
+ self.c.server_conn.address.port,
address.host,
- address.port
+ address.port,
+ persistent_change
), "debug")
- if not hasattr(self, "_backup_server_conn"):
- self._backup_server_conn = self._c.server_conn
- self._c.server_conn = None
+ if self._backup_server_conn:
+ self._backup_server_conn = self.c.server_conn
+ self.c.server_conn = None
else: # This is at least the second temporary change. We can kill the current connection.
- self._c.del_server_connection()
+ self.c.del_server_connection()
- self._c.set_server_address(address, AddressPriority.MANUALLY_CHANGED)
- self._c.establish_server_connection(ask=False)
+ self.c.set_server_address(address)
+ self.c.establish_server_connection(ask=False)
if ssl:
- self._c.establish_ssl(server=True)
- if hasattr(self, "_backup_server_conn") and persistent_change:
- del self._backup_server_conn
+ self.c.establish_ssl(server=True)
+ if persistent_change:
+ self._backup_server_conn = None
def restore_server(self):
- if not hasattr(self, "_backup_server_conn"):
+ # TODO: Similar to _backup_server_conn, introduce _cache_server_conn, which keeps the changed connection open
+ # This may be beneficial if a user is rewriting all requests from http to https or similar.
+ if not self._backup_server_conn:
return
- self._c.log("Restore original server connection: %s:%s -> %s:%s" % (
- self._c.server_conn.address.host,
- self._c.server_conn.address.port,
+ self.c.log("Restore original server connection: %s:%s -> %s:%s" % (
+ self.c.server_conn.address.host,
+ self.c.server_conn.address.port,
self._backup_server_conn.address.host,
self._backup_server_conn.address.port
), "debug")
- self._c.del_server_connection()
- self._c.server_conn = self._backup_server_conn
- del self._backup_server_conn \ No newline at end of file
+ self.c.del_server_connection()
+ self.c.server_conn = self._backup_server_conn
+ self._backup_server_conn = None \ No newline at end of file