aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2014-01-30 05:21:53 +0100
committerMaximilian Hils <git@maximilianhils.com>2014-01-30 05:21:53 +0100
commite00bbccfd6b0e2e4710db96bd133748eb594b10e (patch)
tree79109137cbfbf5d5cb5677c3a57af34f6dd5b684 /libmproxy
parent40bf42f14a7ec386024a8925502fa3c6e6f0657e (diff)
downloadmitmproxy-e00bbccfd6b0e2e4710db96bd133748eb594b10e.tar.gz
mitmproxy-e00bbccfd6b0e2e4710db96bd133748eb594b10e.tar.bz2
mitmproxy-e00bbccfd6b0e2e4710db96bd133748eb594b10e.zip
remove old classes
Diffstat (limited to 'libmproxy')
-rw-r--r--libmproxy/flow.py480
-rw-r--r--libmproxy/protocol/http.py118
-rw-r--r--libmproxy/protocolold.py391
-rw-r--r--libmproxy/proxy.py8
4 files changed, 114 insertions, 883 deletions
diff --git a/libmproxy/flow.py b/libmproxy/flow.py
index 0f6204cf..4baee3ee 100644
--- a/libmproxy/flow.py
+++ b/libmproxy/flow.py
@@ -163,13 +163,12 @@ class StateObject:
class SimpleStateObject(StateObject):
"""
- A StateObject with opionated conventions that tries to keep everything DRY.y
+ A StateObject with opionated conventions that tries to keep everything DRY.
"""
_stateobject_attributes = None
"""
- A dict where the keys represent the attributes to be serialized.
- The values represent the attribute class or type.
+ An attribute-name -> class-or-type dict containing all attributes that should be serialized
If the attribute is a class, this class must be a subclass of StateObject.
"""
@@ -204,316 +203,6 @@ class SimpleStateObject(StateObject):
return f
-class Response(object):
- """
- An HTTP response.
-
- Exposes the following attributes:
-
- request: Request object.
-
- code: HTTP response code
-
- msg: HTTP response message
-
- headers: ODict object
-
- content: Content of the request, None, or CONTENT_MISSING if there
- is content associated, but not present. CONTENT_MISSING evaluates
- to False to make checking for the presence of content natural.
-
- timestamp_start: Seconds since the epoch signifying response transmission started
-
- timestamp_end: Seconds since the epoch signifying response transmission ended
- """
- def __init__(self, request, httpversion, code, msg, headers, content, cert, timestamp_start=None, timestamp_end=None):
- assert isinstance(headers, ODictCaseless)
- self.request = request
- self.httpversion, self.code, self.msg = httpversion, code, msg
- self.headers, self.content = headers, content
- self.cert = cert
- self.timestamp_start = timestamp_start or utils.timestamp()
- self.timestamp_end = timestamp_end or utils.timestamp()
- self.replay = False
-
- def _refresh_cookie(self, c, delta):
- """
- Takes a cookie string c and a time delta in seconds, and returns
- a refreshed cookie string.
- """
- c = Cookie.SimpleCookie(str(c))
- for i in c.values():
- if "expires" in i:
- d = parsedate_tz(i["expires"])
- if d:
- d = mktime_tz(d) + delta
- i["expires"] = formatdate(d)
- else:
- # This can happen when the expires tag is invalid.
- # reddit.com sends a an expires tag like this: "Thu, 31 Dec
- # 2037 23:59:59 GMT", which is valid RFC 1123, but not
- # strictly correct according tot he cookie spec. Browsers
- # appear to parse this tolerantly - maybe we should too.
- # For now, we just ignore this.
- del i["expires"]
- return c.output(header="").strip()
-
- def refresh(self, now=None):
- """
- This fairly complex and heuristic function refreshes a server
- response for replay.
-
- - It adjusts date, expires and last-modified headers.
- - It adjusts cookie expiration.
- """
- if not now:
- now = time.time()
- delta = now - self.timestamp_start
- refresh_headers = [
- "date",
- "expires",
- "last-modified",
- ]
- for i in refresh_headers:
- if i in self.headers:
- d = parsedate_tz(self.headers[i][0])
- if d:
- new = mktime_tz(d) + delta
- self.headers[i] = [formatdate(new)]
- c = []
- for i in self.headers["set-cookie"]:
- c.append(self._refresh_cookie(i, delta))
- if c:
- self.headers["set-cookie"] = c
-
- def _set_replay(self):
- self.replay = True
-
- def is_replay(self):
- """
- Is this response a replay?
- """
- return self.replay
-
- def _load_state(self, state):
- self.code = state["code"]
- self.msg = state["msg"]
- self.headers = ODictCaseless._from_state(state["headers"])
- self.content = state["content"]
- self.timestamp_start = state["timestamp_start"]
- self.timestamp_end = state["timestamp_end"]
- self.cert = certutils.SSLCert.from_pem(state["cert"]) if state["cert"] else None
-
- def _get_state(self):
- return dict(
- httpversion = self.httpversion,
- code = self.code,
- msg = self.msg,
- headers = self.headers._get_state(),
- timestamp_start = self.timestamp_start,
- timestamp_end = self.timestamp_end,
- cert = self.cert.to_pem() if self.cert else None,
- content = self.content,
- )
-
- @classmethod
- def _from_state(klass, request, state):
- return klass(
- request,
- state["httpversion"],
- state["code"],
- str(state["msg"]),
- ODictCaseless._from_state(state["headers"]),
- state["content"],
- certutils.SSLCert.from_pem(state["cert"]) if state["cert"] else None,
- state["timestamp_start"],
- state["timestamp_end"],
- )
-
- def copy(self):
- c = copy.copy(self)
- c.headers = self.headers.copy()
- return c
-
- def _assemble_head(self):
- FMT = '%s\r\n%s\r\n'
- headers = self.headers.copy()
- utils.del_all(
- headers,
- ['proxy-connection', 'transfer-encoding']
- )
- if self.content:
- headers["Content-Length"] = [str(len(self.content))]
- elif 'Transfer-Encoding' in self.headers:
- headers["Content-Length"] = ["0"]
- proto = "HTTP/%s.%s %s %s"%(self.httpversion[0], self.httpversion[1], self.code, str(self.msg))
- data = (proto, str(headers))
- return FMT%data
-
- def _assemble(self):
- """
- Assembles the response for transmission to the client. We make some
- modifications to make sure interception works properly.
-
- Returns None if the request cannot be assembled.
- """
- if self.content == CONTENT_MISSING:
- return None
- head = self._assemble_head()
- if self.content:
- return head + self.content
- else:
- return head
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in both the headers
- and the body of the response. Encoded content will be decoded
- before replacement, and re-encoded afterwards.
-
- Returns the number of replacements made.
- """
- with decoded(self):
- self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
- c += self.headers.replace(pattern, repl, *args, **kwargs)
- return c
-
- def get_header_size(self):
- FMT = '%s\r\n%s\r\n'
- proto = "HTTP/%s.%s %s %s"%(self.httpversion[0], self.httpversion[1], self.code, str(self.msg))
- assembled_header = FMT % (proto, str(self.headers))
- return len(assembled_header)
-
- def get_cookies(self):
- cookie_headers = self.headers.get("set-cookie")
- if not cookie_headers:
- return None
-
- cookies = []
- for header in cookie_headers:
- pairs = [pair.partition("=") for pair in header.split(';')]
- cookie_name = pairs[0][0] # the key of the first key/value pairs
- cookie_value = pairs[0][2] # the value of the first key/value pairs
- cookie_parameters = {key.strip().lower():value.strip() for key,sep,value in pairs[1:]}
- cookies.append((cookie_name, (cookie_value, cookie_parameters)))
- return dict(cookies)
-
-
-class ClientDisconnect:
- """
- A client disconnection event.
-
- Exposes the following attributes:
-
- client_conn: ClientConnect object.
- """
- def __init__(self, client_conn):
- self.client_conn = client_conn
-
-
-class ClientConnect(StateObject):
- """
- A single client connection. Each connection can result in multiple HTTP
- Requests.
-
- Exposes the following attributes:
-
- address: (address, port) tuple, or None if the connection is replayed.
- requestcount: Number of requests created by this client connection.
- close: Is the client connection closed?
- error: Error string or None.
- """
- def __init__(self, address):
- """
- address is an (address, port) tuple, or None if this connection has
- been replayed from within mitmproxy.
- """
- self.address = address
- self.close = False
- self.error = None
-
- def __str__(self):
- if self.address:
- return "%s:%d"%(self.address[0],self.address[1])
-
- def _load_state(self, state):
- self.close = True
- self.error = state["error"]
- self.requestcount = state["requestcount"]
-
- def _get_state(self):
- return dict(
- address = list(self.address),
- requestcount = -1, # FIXME self.requestcount,
- error = self.error,
- )
-
- @classmethod
- def _from_state(klass, state):
- if state:
- k = klass(state["address"])
- k._load_state(state)
- return k
- else:
- return None
-
- def copy(self):
- return copy.copy(self)
-
-
-class Error(StateObject):
- """
- An Error.
-
- This is distinct from an HTTP error response (say, a code 500), which
- is represented by a normal Response object. This class is responsible
- for indicating errors that fall outside of normal HTTP communications,
- like interrupted connections, timeouts, protocol errors.
-
- Exposes the following attributes:
-
- request: Request object
- msg: Message describing the error
- timestamp: Seconds since the epoch
- """
- def __init__(self, request, msg, timestamp=None):
- self.request, self.msg = request, msg
- self.timestamp = timestamp or utils.timestamp()
-
- def _load_state(self, state):
- self.msg = state["msg"]
- self.timestamp = state["timestamp"]
-
- def copy(self):
- c = copy.copy(self)
- return c
-
- def _get_state(self):
- return dict(
- msg = self.msg,
- timestamp = self.timestamp,
- )
-
- @classmethod
- def _from_state(klass, request, state):
- return klass(
- request,
- state["msg"],
- state["timestamp"],
- )
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in both the headers
- and the body of the request. Returns the number of replacements
- made.
-
- FIXME: Is replace useful on an Error object??
- """
- self.msg, c = utils.safe_subn(pattern, repl, self.msg, *args, **kwargs)
- return c
-
-
class ClientPlaybackState:
def __init__(self, flows, exit):
self.flows, self.exit = flows, exit
@@ -673,167 +362,6 @@ class StickyAuthState:
f.request.headers["authorization"] = self.hosts[f.request.host]
-class Flow:
- """
- A Flow is a collection of objects representing a single HTTP
- transaction. The main attributes are:
-
- request: Request object
- response: Response object
- error: Error object
-
- Note that it's possible for a Flow to have both a response and an error
- object. This might happen, for instance, when a response was received
- from the server, but there was an error sending it back to the client.
-
- The following additional attributes are exposed:
-
- intercepting: Is this flow currently being intercepted?
- """
- def __init__(self, request):
- self.request = request
- self.response, self.error = None, None
- self.intercepting = False
- self._backup = None
-
- def copy(self):
- rc = self.request.copy()
- f = Flow(rc)
- if self.response:
- f.response = self.response.copy()
- f.response.request = rc
- if self.error:
- f.error = self.error.copy()
- f.error.request = rc
- return f
-
- @classmethod
- def _from_state(klass, state):
- f = klass(None)
- f._load_state(state)
- return f
-
- def _get_state(self):
- d = dict(
- request = self.request._get_state() if self.request else None,
- response = self.response._get_state() if self.response else None,
- error = self.error._get_state() if self.error else None,
- version = version.IVERSION
- )
- return d
-
- def _load_state(self, state):
- if self.request:
- self.request._load_state(state["request"])
- else:
- self.request = Request._from_state(state["request"])
-
- if state["response"]:
- if self.response:
- self.response._load_state(state["response"])
- else:
- self.response = Response._from_state(self.request, state["response"])
- else:
- self.response = None
-
- if state["error"]:
- if self.error:
- self.error._load_state(state["error"])
- else:
- self.error = Error._from_state(self.request, state["error"])
- else:
- self.error = None
-
- def modified(self):
- """
- Has this Flow been modified?
- """
- # FIXME: Save a serialization in backup, compare current with
- # backup to detect if flow has _really_ been modified.
- if self._backup:
- return True
- else:
- return False
-
- def backup(self, force=False):
- """
- Save a backup of this Flow, which can be reverted to using a
- call to .revert().
- """
- if not self._backup:
- self._backup = self._get_state()
-
- def revert(self):
- """
- Revert to the last backed up state.
- """
- if self._backup:
- self._load_state(self._backup)
- self._backup = None
-
- def match(self, f):
- """
- Match this flow against a compiled filter expression. Returns True
- if matched, False if not.
-
- If f is a string, it will be compiled as a filter expression. If
- the expression is invalid, ValueError is raised.
- """
- if isinstance(f, basestring):
- f = filt.parse(f)
- if not f:
- raise ValueError("Invalid filter expression.")
- if f:
- return f(self)
- return True
-
- def kill(self, master):
- """
- Kill this request.
- """
- self.error = Error(self.request, "Connection killed")
- self.error.reply = controller.DummyReply()
- if self.request and not self.request.reply.acked:
- self.request.reply(proxy.KILL)
- elif self.response and not self.response.reply.acked:
- self.response.reply(proxy.KILL)
- master.handle_error(self.error)
- self.intercepting = False
-
- def intercept(self):
- """
- Intercept this Flow. Processing will stop until accept_intercept is
- called.
- """
- self.intercepting = True
-
- def accept_intercept(self):
- """
- Continue with the flow - called after an intercept().
- """
- if self.request:
- if not self.request.reply.acked:
- self.request.reply()
- elif self.response and not self.response.reply.acked:
- self.response.reply()
- self.intercepting = False
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in all parts of the
- flow. Encoded content will be decoded before replacement, and
- re-encoded afterwards.
-
- Returns the number of replacements made.
- """
- c = self.request.replace(pattern, repl, *args, **kwargs)
- if self.response:
- c += self.response.replace(pattern, repl, *args, **kwargs)
- if self.error:
- c += self.error.replace(pattern, repl, *args, **kwargs)
- return c
-
-
class State(object):
def __init__(self):
self._flow_map = {}
@@ -866,7 +394,7 @@ class State(object):
"""
Add a request to the state. Returns the matching flow.
"""
- f = Flow(req)
+ f = req.flow
self._flow_list.append(f)
self._flow_map[req] = f
assert len(self._flow_list) == len(self._flow_map)
@@ -891,7 +419,7 @@ class State(object):
Add an error response to the state. Returns the matching flow, or
None if there isn't one.
"""
- f = self._flow_map.get(err.request)
+ f = self._flow_map.get(err.flow)
if not f:
return None
f.error = err
diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py
index c161d297..51e73010 100644
--- a/libmproxy/protocol/http.py
+++ b/libmproxy/protocol/http.py
@@ -8,7 +8,7 @@ from ..flow import SimpleStateObject
from netlib import http, tcp, http_status
from netlib.odict import ODict, ODictCaseless
import netlib.utils
-from .. import encoding, utils, version
+from .. import encoding, utils, version, filt, controller
from ..proxy import ProxyError, ServerConnection, ClientConnection
from . import ProtocolHandler, ConnectionTypeChange, KILL
import libmproxy.flow
@@ -109,8 +109,15 @@ class Error(SimpleStateObject):
class Flow(SimpleStateObject, BackreferenceMixin):
+ def __init__(self, conntype, client_conn, server_conn, error):
+ self.conntype = conntype
+ self.client_conn = client_conn
+ self.server_conn = server_conn
+ self.error = error
+
_backrefattr = ("error",)
_backrefname = "flow"
+
_stateobject_attributes = dict(
error=Error,
client_conn=ClientConnection,
@@ -118,12 +125,6 @@ class Flow(SimpleStateObject, BackreferenceMixin):
conntype=str
)
- def __init__(self, conntype, client_conn, server_conn, error):
- self.conntype = conntype
- self.client_conn = client_conn
- self.server_conn = server_conn
- self.error = error
-
def _get_state(self):
d = super(Flow, self)._get_state()
d.update(version=version.IVERSION)
@@ -141,6 +142,30 @@ class Flow(SimpleStateObject, BackreferenceMixin):
f.error = self.error.copy()
return f
+ def modified(self):
+ """
+ Has this Flow been modified?
+ """
+ if self._backup:
+ return self._backup != self._get_state()
+ else:
+ return False
+
+ def backup(self, force=False):
+ """
+ Save a backup of this Flow, which can be reverted to using a
+ call to .revert().
+ """
+ if not self._backup:
+ self._backup = self._get_state()
+
+ def revert(self):
+ """
+ Revert to the last backed up state.
+ """
+ if self._backup:
+ self._load_state(self._backup)
+ self._backup = None
class HTTPMessage(SimpleStateObject):
def __init__(self):
@@ -301,8 +326,6 @@ class HTTPRequest(HTTPMessage):
self.timestamp_end = timestamp_end
self.form_out = form_out or form_in
- ## (Attributes below don't get serialized)
-
# Have this request's cookies been modified by sticky cookies or auth?
self.stickycookie = False
self.stickyauth = False
@@ -613,8 +636,6 @@ class HTTPResponse(HTTPMessage):
self.timestamp_start = timestamp_start
self.timestamp_end = timestamp_end
- ## (Attributes below don't get serialized)
-
# Is this request replayed?
self.is_replay = False
@@ -770,17 +791,22 @@ class HTTPFlow(Flow):
intercepting: Is this flow currently being intercepted?
"""
+ def __init__(self, client_conn, server_conn, error, request, response):
+ Flow.__init__(self, "http", client_conn, server_conn, error)
+ self.request = request
+ self.response = response
+
+ self.intercepting = False # FIXME: Should that rather be an attribute of Flow?
+ self._backup = None
+
_backrefattr = Flow._backrefattr + ("request", "response")
+
_stateobject_attributes = Flow._stateobject_attributes.copy()
_stateobject_attributes.update(
request=HTTPRequest,
response=HTTPResponse
)
- def __init__(self, client_conn, server_conn, error, request, response):
- Flow.__init__(self, "http", client_conn, server_conn, error)
- self.request, self.response = request, response
-
@classmethod
def _from_state(cls, state):
f = cls(None, None, None, None, None)
@@ -795,6 +821,66 @@ class HTTPFlow(Flow):
f.response = self.request.copy()
return f
+ def match(self, f):
+ """
+ Match this flow against a compiled filter expression. Returns True
+ if matched, False if not.
+
+ If f is a string, it will be compiled as a filter expression. If
+ the expression is invalid, ValueError is raised.
+ """
+ if isinstance(f, basestring):
+ f = filt.parse(f)
+ if not f:
+ raise ValueError("Invalid filter expression.")
+ if f:
+ return f(self)
+ return True
+
+ def kill(self, master):
+ """
+ Kill this request.
+ """
+ self.error = Error("Connection killed")
+ self.error.reply = controller.DummyReply()
+ if self.request and not self.request.reply.acked:
+ self.request.reply(KILL)
+ elif self.response and not self.response.reply.acked:
+ self.response.reply(KILL)
+ master.handle_error(self)
+ self.intercepting = False
+
+ def intercept(self):
+ """
+ Intercept this Flow. Processing will stop until accept_intercept is
+ called.
+ """
+ self.intercepting = True
+
+ def accept_intercept(self):
+ """
+ Continue with the flow - called after an intercept().
+ """
+ assert self.intercepting
+ if self.request:
+ if not self.request.reply.acked:
+ self.request.reply()
+ elif self.response and not self.response.reply.acked:
+ self.response.reply()
+ self.intercepting = False
+
+ def replace(self, pattern, repl, *args, **kwargs):
+ """
+ Replaces a regular expression pattern with repl in both request and response of the
+ flow. Encoded content will be decoded before replacement, and
+ re-encoded afterwards.
+
+ Returns the number of replacements made.
+ """
+ c = self.request.replace(pattern, repl, *args, **kwargs)
+ if self.response:
+ c += self.response.replace(pattern, repl, *args, **kwargs)
+ return c
class HttpAuthenticationError(Exception):
def __init__(self, auth_headers=None):
@@ -847,7 +933,7 @@ class HTTPHandler(ProtocolHandler):
if request_reply is None or request_reply == KILL:
return False
- if isinstance(request_reply, HTTPResponse) or (LEGACY and isinstance(request_reply, libmproxy.flow.Response)):
+ if isinstance(request_reply, HTTPResponse):
flow.response = request_reply
else:
self.process_request(flow.request)
diff --git a/libmproxy/protocolold.py b/libmproxy/protocolold.py
deleted file mode 100644
index 9f98e37e..00000000
--- a/libmproxy/protocolold.py
+++ /dev/null
@@ -1,391 +0,0 @@
-import libmproxy.utils, libmproxy.flow
-from netlib import http, http_status, tcp
-import netlib.utils
-from netlib.odict import ODictCaseless
-import select
-from proxy import ProxyError, KILL
-
-LEGACY = True
-
-
-"""
-Minimalistic cleanroom reimplemementation of a couple of flow.* classes. Most functionality is missing,
-but they demonstrate what needs to be added/changed to/within the existing classes.
-"""
-
-
-class Flow(object):
- def __init__(self, conntype, client_conn, server_conn, error):
- self.conntype = conntype
- self.client_conn, self.server_conn = client_conn, server_conn
- self.error = error
-
-
-class HTTPFlow(Flow):
- def __init__(self, client_conn, server_conn, error, request, response):
- Flow.__init__(self, "http", client_conn, server_conn, error)
- self.request, self.response = request, response
-
-
-class HttpAuthenticationError(Exception):
- def __init__(self, auth_headers=None):
- self.auth_headers = auth_headers
-
- def __str__(self):
- return "HttpAuthenticationError"
-
-
-class HTTPMessage(object):
- def _assemble_headers(self):
- headers = self.headers.copy()
- libmproxy.utils.del_all(headers,
- ["proxy-connection",
- "transfer-encoding"])
- if self.content:
- headers["Content-Length"] = [str(len(self.content))]
- elif 'Transfer-Encoding' in self.headers: # content-length for e.g. chuncked transfer-encoding with no content
- headers["Content-Length"] = ["0"]
-
- return str(headers)
-
-
-class HTTPResponse(HTTPMessage):
- def __init__(self, httpversion, code, msg, headers, content, timestamp_start, timestamp_end):
- self.httpversion = httpversion
- self.code = code
- self.msg = msg
- self.headers = headers
- self.content = content
- self.timestamp_start = timestamp_start
- self.timestamp_end = timestamp_end
-
- assert isinstance(headers, ODictCaseless)
-
- #FIXME: Compatibility Fix
- @property
- def request(self):
- return False
-
- def _assemble_response_line(self):
- return 'HTTP/%s.%s %s %s' % (self.httpversion[0], self.httpversion[1], self.code, self.msg)
-
- def _assemble(self):
- response_line = self._assemble_response_line()
- return '%s\r\n%s\r\n%s' % (response_line, self._assemble_headers(), self.content)
-
- @classmethod
- def from_stream(cls, rfile, request_method, include_content=True, body_size_limit=None):
- """
- Parse an HTTP response from a file stream
- """
- if not include_content:
- raise NotImplementedError
-
- httpversion, code, msg, headers, content = http.read_response(
- rfile,
- request_method,
- body_size_limit)
- timestamp_start = rfile.first_byte_timestamp
- timestamp_end = libmproxy.utils.timestamp()
- return HTTPResponse(httpversion, code, msg, headers, content, timestamp_start, timestamp_end)
-
-
-class HTTPRequest(HTTPMessage):
- def __init__(self, form_in, method, scheme, host, port, path, httpversion, headers, content,
- timestamp_start, timestamp_end, form_out=None):
- self.form_in = form_in
- self.method = method
- self.scheme = scheme
- self.host = host
- self.port = port
- self.path = path
- self.httpversion = httpversion
- self.headers = headers
- self.content = content
- self.timestamp_start = timestamp_start
- self.timestamp_end = timestamp_end
-
- self.form_out = form_out or self.form_in
- assert isinstance(headers, ODictCaseless)
-
- #FIXME: Compatibility Fixes
- def is_live(self):
- return True
- @property
- def wfile(self):
- import mock
- return mock.Mock(side_effect=tcp.NetLibDisconnect)
-
- def _assemble_request_line(self, form=None):
- form = form or self.form_out
- request_line = None
- if form == "asterisk" or form == "origin":
- request_line = '%s %s HTTP/%s.%s' % (self.method, self.path, self.httpversion[0], self.httpversion[1])
- elif form == "authority":
- request_line = '%s %s:%s HTTP/%s.%s' % (self.method, self.host, self.port,
- self.httpversion[0], self.httpversion[1])
- elif form == "absolute":
- request_line = '%s %s://%s:%s%s HTTP/%s.%s' % \
- (self.method, self.scheme, self.host, self.port, self.path,
- self.httpversion[0], self.httpversion[1])
- else:
- raise http.HttpError(400, "Invalid request form")
- return request_line
-
- def _assemble(self):
- request_line = self._assemble_request_line()
- return '%s\r\n%s\r\n%s' % (request_line, self._assemble_headers(), self.content)
-
- @classmethod
- def from_stream(cls, rfile, include_content=True, body_size_limit=None):
- """
- Parse an HTTP request from a file stream
- """
- httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \
- = None, None, None, None, None, None, None, None, None, None
-
- request_line = HTTPHandler.get_line(rfile)
- timestamp_start = rfile.first_byte_timestamp
-
- request_line_parts = http.parse_init(request_line)
- if not request_line_parts:
- raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
- method, path, httpversion = request_line_parts
-
- if path == '*':
- form_in = "asterisk"
- elif path.startswith("/"):
- form_in = "origin"
- if not netlib.utils.isascii(path):
- raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
- elif method.upper() == 'CONNECT':
- form_in = "authority"
- r = http.parse_init_connect(request_line)
- if not r:
- raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
- host, port, _ = r
- path = None
- else:
- form_in = "absolute"
- r = http.parse_init_proxy(request_line)
- if not r:
- raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
- _, scheme, host, port, path, _ = r
-
- headers = http.read_headers(rfile)
- if headers is None:
- raise http.HttpError(400, "Invalid headers")
-
- if include_content:
- content = http.read_http_body(rfile, headers, body_size_limit, True)
- timestamp_end = libmproxy.utils.timestamp()
-
- return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, content,
- timestamp_start, timestamp_end)
-
-
-class HTTPHandler(ProtocolHandler):
- def handle_messages(self):
- while self.handle_flow():
- pass
- self.c.close = True
-
- def get_response_from_server(self, request):
- request_raw = request._assemble()
-
- for i in range(2):
- try:
- self.c.server_conn.wfile.write(request_raw)
- self.c.server_conn.wfile.flush()
- return HTTPResponse.from_stream(self.c.server_conn.rfile, request.method,
- body_size_limit=self.c.config.body_size_limit)
- except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v:
- self.c.log("error in server communication: %s" % str(v))
- if i < 1:
- # In any case, we try to reconnect at least once.
- # This is necessary because it might be possible that we already initiated an upstream connection
- # after clientconnect that has already been expired, e.g consider the following event log:
- # > clientconnect (transparent mode destination known)
- # > serverconnect
- # > read n% of large request
- # > server detects timeout, disconnects
- # > read (100-n)% of large request
- # > send large request upstream
- self.c.server_reconnect()
- else:
- raise v
-
- def handle_flow(self):
- flow = HTTPFlow(self.c.client_conn, self.c.server_conn, None, None, None)
- try:
- flow.request = HTTPRequest.from_stream(self.c.client_conn.rfile,
- body_size_limit=self.c.config.body_size_limit)
- self.c.log("request", [flow.request._assemble_request_line(flow.request.form_in)])
-
- request_reply = self.c.channel.ask("request" if LEGACY else "httprequest",
- flow.request if LEGACY else flow)
- if request_reply is None or request_reply == KILL:
- return False
-
- if isinstance(request_reply, HTTPResponse) or (LEGACY and isinstance(request_reply, libmproxy.flow.Response)):
- flow.response = request_reply
- else:
- self.process_request(flow.request)
- flow.response = self.get_response_from_server(flow.request)
-
- self.c.log("response", [flow.response._assemble_response_line() if not LEGACY else flow.response._assemble().splitlines()[0]])
- response_reply = self.c.channel.ask("response" if LEGACY else "httpresponse",
- flow.response if LEGACY else flow)
- if response_reply is None or response_reply == KILL:
- return False
-
- raw = flow.response._assemble()
- self.c.client_conn.wfile.write(raw)
- self.c.client_conn.wfile.flush()
- flow.timestamp_end = libmproxy.utils.timestamp()
-
- if (http.connection_close(flow.request.httpversion, flow.request.headers) or
- http.connection_close(flow.response.httpversion, flow.response.headers)):
- return False
-
- if flow.request.form_in == "authority":
- self.ssl_upgrade(flow.request)
- return True
- except (HttpAuthenticationError, http.HttpError, ProxyError, tcp.NetLibError), e:
- self.handle_error(e, flow)
- return False
-
- def handle_error(self, error, flow=None):
- code, message, headers = None, None, None
- if isinstance(error, HttpAuthenticationError):
- code, message, headers = 407, "Proxy Authentication Required", error.auth_headers
- elif isinstance(error, (http.HttpError, ProxyError)):
- code, message = error.code, error.msg
- elif isinstance(error, tcp.NetLibError):
- code = 502
- message = error.message or error.__class__
-
- if code:
- err = "%s: %s" % (code, message)
- else:
- err = message
-
- self.c.log("error: %s" %err)
-
- if flow:
- flow.error = libmproxy.flow.Error(False, err)
- self.c.channel.ask("error" if LEGACY else "httperror",
- flow.error if LEGACY else flow)
- else:
- pass # FIXME: Is there any use case for persisting errors that occur outside of flows?
-
- if code:
- try:
- self.send_error(code, message, headers)
- except:
- pass
-
- def send_error(self, code, message, headers):
- response = http_status.RESPONSES.get(code, "Unknown")
- html_content = '<html><head>\n<title>%d %s</title>\n</head>\n<body>\n%s\n</body>\n</html>' % \
- (code, response, message)
- self.c.client_conn.wfile.write("HTTP/1.1 %s %s\r\n" % (code, response))
- self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.server_version)
- self.c.client_conn.wfile.write("Content-type: text/html\r\n")
- self.c.client_conn.wfile.write("Content-Length: %d\r\n" % len(html_content))
- if headers:
- for key, value in headers.items():
- self.c.client_conn.wfile.write("%s: %s\r\n" % (key, value))
- self.c.client_conn.wfile.write("Connection: close\r\n")
- self.c.client_conn.wfile.write("\r\n")
- self.c.client_conn.wfile.write(html_content)
- self.c.client_conn.wfile.flush()
-
- def ssl_upgrade(self, upstream_request=None):
- """
- Upgrade the connection to SSL after an authority (CONNECT) request has been made.
- If the authority request has been forwarded upstream (because we have another proxy server there),
- money-patch the ConnectionHandler.server_reconnect function to resend the request on reconnect.
-
- This isn't particular beautiful code, but it isolates this rare edge-case from the
- protocol-agnostic ConnectionHandler
- """
- self.c.mode = "transparent"
- self.c.determine_conntype()
- self.c.establish_ssl(server=True, client=True)
-
- if upstream_request:
- self.c.log("Hook reconnect function")
- original_reconnect_func = self.c.server_reconnect
-
- def reconnect_http_proxy():
- self.c.log("Hooked reconnect function")
- self.c.log("Hook: Run original redirect")
- original_reconnect_func(no_ssl=True)
- self.c.log("Hook: Write CONNECT request to upstream proxy", [upstream_request._assemble_request_line()])
- self.c.server_conn.wfile.write(upstream_request._assemble())
- self.c.server_conn.wfile.flush()
- self.c.log("Hook: Read answer to CONNECT request from proxy")
- resp = HTTPResponse.from_stream(self.c.server_conn.rfile, upstream_request.method)
- if resp.code != 200:
- raise ProxyError(resp.code,
- "Cannot reestablish SSL connection with upstream proxy: \r\n" + str(resp.headers))
- self.c.log("Hook: Establish SSL with upstream proxy")
- self.c.establish_ssl(server=True)
-
- self.c.server_reconnect = reconnect_http_proxy
-
- raise ConnectionTypeChange
-
- def process_request(self, request):
- if self.c.mode == "regular":
- self.authenticate(request)
- if request.form_in == "authority" and self.c.client_conn.ssl_established:
- raise http.HttpError(502, "Must not CONNECT on already encrypted connection")
-
- # If we have a CONNECT request, we might need to intercept
- if request.form_in == "authority":
- directly_addressed_at_mitmproxy = (self.c.mode == "regular") and not self.c.config.forward_proxy
- if directly_addressed_at_mitmproxy:
- self.c.establish_server_connection((request.host, request.port))
- self.c.client_conn.wfile.write(
- 'HTTP/1.1 200 Connection established\r\n' +
- ('Proxy-agent: %s\r\n' % self.c.server_version) +
- '\r\n'
- )
- self.c.client_conn.wfile.flush()
- self.ssl_upgrade() # raises ConnectionTypeChange exception
-
- if self.c.mode == "regular":
- if request.form_in == "authority":
- pass
- elif request.form_in == "absolute":
- if request.scheme != "http":
- raise http.HttpError(400, "Invalid Request")
- if not self.c.config.forward_proxy:
- request.form_out = "origin"
- if ((not self.c.server_conn) or
- (self.c.server_conn.address != (request.host, request.port))):
- self.c.establish_server_connection((request.host, request.port))
- else:
- raise http.HttpError(400, "Invalid Request")
-
- def authenticate(self, request):
- if self.c.config.authenticator:
- if self.c.config.authenticator.authenticate(request.headers):
- self.c.config.authenticator.clean(request.headers)
- else:
- raise HttpAuthenticationError(self.c.config.authenticator.auth_challenge_headers())
- return request.headers
-
- @staticmethod
- def get_line(fp):
- """
- Get a line, possibly preceded by a blank.
- """
- line = fp.readline()
- if line == "\r\n" or line == "\n": # Possible leftover from previous message
- line = fp.readline()
- if line == "":
- raise tcp.NetLibDisconnect
- return line \ No newline at end of file
diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py
index e43c811b..468a5708 100644
--- a/libmproxy/proxy.py
+++ b/libmproxy/proxy.py
@@ -49,6 +49,10 @@ class ClientConnection(tcp.BaseHandler, flow.SimpleStateObject):
# FIXME: Add missing attributes
)
+ @classmethod
+ def _from_state(cls, state):
+ raise NotImplementedError # FIXME
+
def convert_to_ssl(self, *args, **kwargs):
tcp.BaseHandler.convert_to_ssl(self, *args, **kwargs)
self.timestamp_ssl_setup = utils.timestamp()
@@ -77,6 +81,10 @@ class ServerConnection(tcp.TCPClient, flow.SimpleStateObject):
# FIXME: Add missing attributes
)
+ @classmethod
+ def _from_state(cls, state):
+ raise NotImplementedError # FIXME
+
def connect(self):
self.timestamp_start = utils.timestamp()
tcp.TCPClient.connect(self)