aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2014-01-30 05:00:13 +0100
committerMaximilian Hils <git@maximilianhils.com>2014-01-30 05:00:13 +0100
commit40bf42f14a7ec386024a8925502fa3c6e6f0657e (patch)
treedb4d76b903a170ba0315a1fc270b14bf9034f9ee /libmproxy
parent607f7778110d5c2720e60ffcf5f4b0c94e8fcc5f (diff)
downloadmitmproxy-40bf42f14a7ec386024a8925502fa3c6e6f0657e.tar.gz
mitmproxy-40bf42f14a7ec386024a8925502fa3c6e6f0657e.tar.bz2
mitmproxy-40bf42f14a7ec386024a8925502fa3c6e6f0657e.zip
merge flow classes. current status: basic mitmdump working
Diffstat (limited to 'libmproxy')
-rw-r--r--libmproxy/console/common.py2
-rw-r--r--libmproxy/flow.py477
-rw-r--r--libmproxy/protocol/__init__.py46
-rw-r--r--libmproxy/protocol/http.py999
-rw-r--r--libmproxy/protocolold.py (renamed from libmproxy/protocol.py)39
-rw-r--r--libmproxy/proxy.py29
6 files changed, 1112 insertions, 480 deletions
diff --git a/libmproxy/console/common.py b/libmproxy/console/common.py
index 951d2c2a..a94f7ae4 100644
--- a/libmproxy/console/common.py
+++ b/libmproxy/console/common.py
@@ -189,7 +189,7 @@ def format_flow(f, focus, extended=False, hostheader=False, padding=2):
contentdesc = "[no content]"
delta = f.response.timestamp_end - f.response.timestamp_start
- size = len(f.response.content) + f.response.get_header_size()
+ size = f.response.size()
rate = utils.pretty_size(size / ( delta if delta > 0 else 1 ) )
d.update(dict(
diff --git a/libmproxy/flow.py b/libmproxy/flow.py
index acebb71d..0f6204cf 100644
--- a/libmproxy/flow.py
+++ b/libmproxy/flow.py
@@ -143,459 +143,68 @@ class SetHeaders:
f.request.headers.add(header, value)
-class decoded(object):
- """
-
- A context manager that decodes a request, response or error, and then
- re-encodes it with the same encoding after execution of the block.
-
- Example:
-
- with decoded(request):
- request.content = request.content.replace("foo", "bar")
- """
- def __init__(self, o):
- self.o = o
- ce = o.headers.get_first("content-encoding")
- if ce in encoding.ENCODINGS:
- self.ce = ce
- else:
- self.ce = None
-
- def __enter__(self):
- if self.ce:
- self.o.decode()
+class StateObject:
+ def _get_state(self):
+ raise NotImplementedError
- def __exit__(self, type, value, tb):
- if self.ce:
- self.o.encode(self.ce)
+ def _load_state(self, state):
+ raise NotImplementedError
+ @classmethod
+ def _from_state(cls, state):
+ raise NotImplementedError
-class StateObject:
def __eq__(self, other):
try:
return self._get_state() == other._get_state()
- except AttributeError:
+ except AttributeError: # we may compare with something that's not a StateObject
return False
-class HTTPMsg(StateObject):
- def get_decoded_content(self):
- """
- Returns the decoded content based on the current Content-Encoding header.
- Doesn't change the message iteself or its headers.
- """
- ce = self.headers.get_first("content-encoding")
- if not self.content or ce not in encoding.ENCODINGS:
- return self.content
- return encoding.decode(ce, self.content)
-
- def decode(self):
- """
- Decodes content based on the current Content-Encoding header, then
- removes the header. If there is no Content-Encoding header, no
- action is taken.
-
- Returns True if decoding succeeded, False otherwise.
- """
- ce = self.headers.get_first("content-encoding")
- if not self.content or ce not in encoding.ENCODINGS:
- return False
- data = encoding.decode(
- ce,
- self.content
- )
- if data is None:
- return False
- self.content = data
- del self.headers["content-encoding"]
- return True
-
- def encode(self, e):
- """
- Encodes content with the encoding e, where e is "gzip", "deflate"
- or "identity".
- """
- # FIXME: Error if there's an existing encoding header?
- self.content = encoding.encode(e, self.content)
- self.headers["content-encoding"] = [e]
-
- def size(self, **kwargs):
- """
- Size in bytes of a fully rendered message, including headers and
- HTTP lead-in.
- """
- hl = len(self._assemble_head(**kwargs))
- if self.content:
- return hl + len(self.content)
- else:
- return hl
-
- def get_content_type(self):
- return self.headers.get_first("content-type")
-
- def get_transmitted_size(self):
- # FIXME: this is inprecise in case chunking is used
- # (we should count the chunking headers)
- if not self.content:
- return 0
- return len(self.content)
-
-
-class Request(HTTPMsg):
+class SimpleStateObject(StateObject):
"""
- An HTTP request.
-
- Exposes the following attributes:
-
- client_conn: ClientConnect object, or None if this is a replay.
-
- headers: ODictCaseless object
-
- content: Content of the request, None, or CONTENT_MISSING if there
- is content associated, but not present. CONTENT_MISSING evaluates
- to False to make checking for the presence of content natural.
-
- scheme: URL scheme (http/https)
-
- host: Host portion of the URL
-
- port: Destination port
-
- path: Path portion of the URL
-
- timestamp_start: Seconds since the epoch signifying request transmission started
-
- method: HTTP method
-
- timestamp_end: Seconds since the epoch signifying request transmission ended
-
- tcp_setup_timestamp: Seconds since the epoch signifying remote TCP connection setup completion time
- (or None, if request didn't results TCP setup)
-
- ssl_setup_timestamp: Seconds since the epoch signifying remote SSL encryption setup completion time
- (or None, if request didn't results SSL setup)
-
+ A StateObject with opionated conventions that tries to keep everything DRY.y
"""
- def __init__(
- self, client_conn, httpversion, host, port,
- scheme, method, path, headers, content, timestamp_start=None,
- timestamp_end=None, tcp_setup_timestamp=None,
- ssl_setup_timestamp=None, ip=None):
- assert isinstance(headers, ODictCaseless)
- self.client_conn = client_conn
- self.httpversion = httpversion
- self.host, self.port, self.scheme = host, port, scheme
- self.method, self.path, self.headers, self.content = method, path, headers, content
- self.timestamp_start = timestamp_start or utils.timestamp()
- self.timestamp_end = max(timestamp_end or utils.timestamp(), timestamp_start)
- self.close = False
- self.tcp_setup_timestamp = tcp_setup_timestamp
- self.ssl_setup_timestamp = ssl_setup_timestamp
- self.ip = ip
-
- # Have this request's cookies been modified by sticky cookies or auth?
- self.stickycookie = False
- self.stickyauth = False
-
- # Live attributes - not serialized
- self.wfile, self.rfile = None, None
-
- def set_live(self, rfile, wfile):
- self.wfile, self.rfile = wfile, rfile
- def is_live(self):
- return bool(self.wfile)
-
- def anticache(self):
- """
- Modifies this request to remove headers that might produce a cached
- response. That is, we remove ETags and If-Modified-Since headers.
- """
- delheaders = [
- "if-modified-since",
- "if-none-match",
- ]
- for i in delheaders:
- del self.headers[i]
-
- def anticomp(self):
- """
- Modifies this request to remove headers that will compress the
- resource's data.
- """
- self.headers["accept-encoding"] = ["identity"]
-
- def constrain_encoding(self):
- """
- Limits the permissible Accept-Encoding values, based on what we can
- decode appropriately.
- """
- if self.headers["accept-encoding"]:
- self.headers["accept-encoding"] = [', '.join(
- e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
- )]
-
- def _set_replay(self):
- self.client_conn = None
+ _stateobject_attributes = None
+ """
+ A dict where the keys represent the attributes to be serialized.
+ The values represent the attribute class or type.
+ If the attribute is a class, this class must be a subclass of StateObject.
+ """
- def is_replay(self):
- """
- Is this request a replay?
- """
- if self.client_conn:
- return False
- else:
- return True
+ def _get_state(self):
+ return {attr: (getattr(self, attr)._get_state()
+ if (type(cls) == 'classobj')
+ else getattr(self, attr))
+ for attr, cls in self._stateobject_attributes.iteritems()}
def _load_state(self, state):
- if state["client_conn"]:
- if self.client_conn:
- self.client_conn._load_state(state["client_conn"])
+ for attr, cls in self._stateobject_attributes.iteritems():
+ self._load_state_attr(attr, cls, state)
+
+ def _load_state_attr(self, attribute, cls, state):
+ if state[attribute] is not None:
+ if type(cls) == 'classobj':
+ assert issubclass(cls, StateObject)
+ curr = getattr(self, attribute)
+ if curr:
+ curr._load_state(state[attribute])
+ else:
+ setattr(self, attribute, cls._from_state(state[attribute]))
else:
- self.client_conn = ClientConnect._from_state(state["client_conn"])
+ setattr(self, attribute, cls(state[attribute]))
else:
- self.client_conn = None
- self.host = state["host"]
- self.port = state["port"]
- self.scheme = state["scheme"]
- self.method = state["method"]
- self.path = state["path"]
- self.headers = ODictCaseless._from_state(state["headers"])
- self.content = state["content"]
- self.timestamp_start = state["timestamp_start"]
- self.timestamp_end = state["timestamp_end"]
- self.tcp_setup_timestamp = state["tcp_setup_timestamp"]
- self.ssl_setup_timestamp = state["ssl_setup_timestamp"]
- self.ip = state["ip"]
-
- def _get_state(self):
- return dict(
- client_conn = self.client_conn._get_state() if self.client_conn else None,
- httpversion = self.httpversion,
- host = self.host,
- port = self.port,
- scheme = self.scheme,
- method = self.method,
- path = self.path,
- headers = self.headers._get_state(),
- content = self.content,
- timestamp_start = self.timestamp_start,
- timestamp_end = self.timestamp_end,
- tcp_setup_timestamp = self.tcp_setup_timestamp,
- ssl_setup_timestamp = self.ssl_setup_timestamp,
- ip = self.ip
- )
+ setattr(self, attribute, None)
@classmethod
- def _from_state(klass, state):
- return klass(
- ClientConnect._from_state(state["client_conn"]),
- tuple(state["httpversion"]),
- str(state["host"]),
- state["port"],
- str(state["scheme"]),
- str(state["method"]),
- str(state["path"]),
- ODictCaseless._from_state(state["headers"]),
- state["content"],
- state["timestamp_start"],
- state["timestamp_end"],
- state["tcp_setup_timestamp"],
- state["ssl_setup_timestamp"],
- state["ip"]
- )
-
- def __hash__(self):
- return id(self)
-
- def copy(self):
- c = copy.copy(self)
- c.headers = self.headers.copy()
- return c
-
- def get_form_urlencoded(self):
- """
- Retrieves the URL-encoded form data, returning an ODict object.
- Returns an empty ODict if there is no data or the content-type
- indicates non-form data.
- """
- if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
- return ODict(utils.urldecode(self.content))
- return ODict([])
-
- def set_form_urlencoded(self, odict):
- """
- Sets the body to the URL-encoded form data, and adds the
- appropriate content-type header. Note that this will destory the
- existing body if there is one.
- """
- # FIXME: If there's an existing content-type header indicating a
- # url-encoded form, leave it alone.
- self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
- self.content = utils.urlencode(odict.lst)
-
- def get_path_components(self):
- """
- Returns the path components of the URL as a list of strings.
-
- Components are unquoted.
- """
- _, _, path, _, _, _ = urlparse.urlparse(self.get_url())
- return [urllib.unquote(i) for i in path.split("/") if i]
-
- def set_path_components(self, lst):
- """
- Takes a list of strings, and sets the path component of the URL.
-
- Components are quoted.
- """
- lst = [urllib.quote(i, safe="") for i in lst]
- path = "/" + "/".join(lst)
- scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url())
- self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
-
- def get_query(self):
- """
- Gets the request query string. Returns an ODict object.
- """
- _, _, _, _, query, _ = urlparse.urlparse(self.get_url())
- if query:
- return ODict(utils.urldecode(query))
- return ODict([])
-
- def set_query(self, odict):
- """
- Takes an ODict object, and sets the request query string.
- """
- scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url())
- query = utils.urlencode(odict.lst)
- self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
-
- def get_url(self, hostheader=False):
- """
- Returns a URL string, constructed from the Request's URL compnents.
-
- If hostheader is True, we use the value specified in the request
- Host header to construct the URL.
- """
- if hostheader:
- host = self.headers.get_first("host") or self.host
- else:
- host = self.host
- host = host.encode("idna")
- return utils.unparse_url(self.scheme, host, self.port, self.path).encode('ascii')
-
- def set_url(self, url):
- """
- Parses a URL specification, and updates the Request's information
- accordingly.
-
- Returns False if the URL was invalid, True if the request succeeded.
- """
- parts = http.parse_url(url)
- if not parts:
- return False
- self.scheme, self.host, self.port, self.path = parts
- return True
-
- def get_cookies(self):
- cookie_headers = self.headers.get("cookie")
- if not cookie_headers:
- return None
-
- cookies = []
- for header in cookie_headers:
- pairs = [pair.partition("=") for pair in header.split(';')]
- cookies.extend((pair[0],(pair[2],{})) for pair in pairs)
- return dict(cookies)
-
- def get_header_size(self):
- FMT = '%s %s HTTP/%s.%s\r\n%s\r\n'
- assembled_header = FMT % (
- self.method,
- self.path,
- self.httpversion[0],
- self.httpversion[1],
- str(self.headers)
- )
- return len(assembled_header)
-
- def _assemble_head(self, proxy=False):
- FMT = '%s %s HTTP/%s.%s\r\n%s\r\n'
- FMT_PROXY = '%s %s://%s:%s%s HTTP/%s.%s\r\n%s\r\n'
-
- headers = self.headers.copy()
- utils.del_all(
- headers,
- [
- 'proxy-connection',
- 'keep-alive',
- 'connection',
- 'transfer-encoding'
- ]
- )
- if not 'host' in headers:
- headers["host"] = [utils.hostport(self.scheme, self.host, self.port)]
- content = self.content
- if content:
- headers["Content-Length"] = [str(len(content))]
- else:
- content = ""
- if self.close:
- headers["connection"] = ["close"]
- if not proxy:
- return FMT % (
- self.method,
- self.path,
- self.httpversion[0],
- self.httpversion[1],
- str(headers)
- )
- else:
- return FMT_PROXY % (
- self.method,
- self.scheme,
- self.host,
- self.port,
- self.path,
- self.httpversion[0],
- self.httpversion[1],
- str(headers)
- )
-
- def _assemble(self, _proxy = False):
- """
- Assembles the request for transmission to the server. We make some
- modifications to make sure interception works properly.
-
- Returns None if the request cannot be assembled.
- """
- if self.content == CONTENT_MISSING:
- return None
- head = self._assemble_head(_proxy)
- if self.content:
- return head + self.content
- else:
- return head
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in both the headers
- and the body of the request. Encoded content will be decoded before
- replacement, and re-encoded afterwards.
-
- Returns the number of replacements made.
- """
- with decoded(self):
- self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
- self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
- c += pc
- c += self.headers.replace(pattern, repl, *args, **kwargs)
- return c
+ def _from_state(cls, state):
+ f = cls()
+ f._load_state(state)
+ return f
-class Response(HTTPMsg):
+class Response(object):
"""
An HTTP response.
@@ -1269,7 +878,7 @@ class State(object):
"""
Add a response to the state. Returns the matching flow.
"""
- f = self._flow_map.get(resp.request)
+ f = self._flow_map.get(resp.flow)
if not f:
return False
f.response = resp
@@ -1596,7 +1205,7 @@ class FlowMaster(controller.Master):
return f
def handle_request(self, r):
- if r.is_live():
+ if False and r.is_live(): # FIXME
app = self.apps.get(r)
if app:
# FIXME: for the tcp proxy, use flow.client_conn.wfile
diff --git a/libmproxy/protocol/__init__.py b/libmproxy/protocol/__init__.py
new file mode 100644
index 00000000..5419c5ef
--- /dev/null
+++ b/libmproxy/protocol/__init__.py
@@ -0,0 +1,46 @@
+KILL = 0 # const for killed requests
+
+
+class ConnectionTypeChange(Exception):
+ """
+ Gets raised if the connetion type has been changed (e.g. after HTTP/1.1 101 Switching Protocols).
+ It's up to the raising ProtocolHandler to specify the new conntype before raising the exception.
+ """
+ pass
+
+
+class ProtocolHandler(object):
+ def __init__(self, c):
+ self.c = c
+
+ def handle_messages(self):
+ """
+ This method gets called if a client connection has been made. Depending on the proxy settings,
+ a server connection might already exist as well.
+ """
+ raise NotImplementedError
+
+ def handle_error(self, error):
+ """
+ This method gets called should there be an uncaught exception during the connection.
+ This might happen outside of handle_messages, e.g. if the initial SSL handshake fails in transparent mode.
+ """
+ raise NotImplementedError
+
+
+from .http import HTTPHandler
+
+
+def _handler(conntype, connection_handler):
+ if conntype == "http":
+ return HTTPHandler(connection_handler)
+
+ raise NotImplementedError
+
+
+def handle_messages(conntype, connection_handler):
+ return _handler(conntype, connection_handler).handle_messages()
+
+
+def handle_error(conntype, connection_handler, error):
+ return _handler(conntype, connection_handler).handle_error(error) \ No newline at end of file
diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py
new file mode 100644
index 00000000..c161d297
--- /dev/null
+++ b/libmproxy/protocol/http.py
@@ -0,0 +1,999 @@
+import Cookie
+from email.utils import parsedate_tz, formatdate, mktime_tz
+import urllib
+import urlparse
+import time
+import copy
+from ..flow import SimpleStateObject
+from netlib import http, tcp, http_status
+from netlib.odict import ODict, ODictCaseless
+import netlib.utils
+from .. import encoding, utils, version
+from ..proxy import ProxyError, ServerConnection, ClientConnection
+from . import ProtocolHandler, ConnectionTypeChange, KILL
+import libmproxy.flow
+
+HDR_FORM_URLENCODED = "application/x-www-form-urlencoded"
+CONTENT_MISSING = 0
+
+LEGACY = True
+
+
+def get_line(fp):
+ """
+ Get a line, possibly preceded by a blank.
+ """
+ line = fp.readline()
+ if line == "\r\n" or line == "\n": # Possible leftover from previous message
+ line = fp.readline()
+ if line == "":
+ raise tcp.NetLibDisconnect
+ return line
+
+
+class decoded(object):
+ """
+ A context manager that decodes a request or response, and then
+ re-encodes it with the same encoding after execution of the block.
+
+ Example:
+ with decoded(request):
+ request.content = request.content.replace("foo", "bar")
+ """
+
+ def __init__(self, o):
+ self.o = o
+ ce = o.headers.get_first("content-encoding")
+ if ce in encoding.ENCODINGS:
+ self.ce = ce
+ else:
+ self.ce = None
+
+ def __enter__(self):
+ if self.ce:
+ self.o.decode()
+
+ def __exit__(self, type, value, tb):
+ if self.ce:
+ self.o.encode(self.ce)
+
+
+
+class BackreferenceMixin(object):
+ """
+ If an attribute from the _backrefattr tuple is set,
+ this mixin sets a reference back on the attribute object.
+ Example:
+ e = Error()
+ f = Flow()
+ f.error = e
+ assert f is e.flow
+ """
+ _backrefattr = tuple()
+
+ def __setattr__(self, key, value):
+ super(BackreferenceMixin, self).__setattr__(key, value)
+ if key in self._backrefattr and value is not None:
+ # check if there is already a different object set as backref
+ assert (getattr(value, self._backrefname, self) or self) is self
+ setattr(value, self._backrefname, self)
+
+
+class Error(SimpleStateObject):
+ """
+ An Error.
+
+ This is distinct from an HTTP error response (say, a code 500), which
+ is represented by a normal Response object. This class is responsible
+ for indicating errors that fall outside of normal HTTP communications,
+ like interrupted connections, timeouts, protocol errors.
+
+ Exposes the following attributes:
+
+ flow: Flow object
+ msg: Message describing the error
+ timestamp: Seconds since the epoch
+ """
+ def __init__(self, msg, timestamp=None):
+ self.msg = msg
+ self.timestamp = timestamp or utils.timestamp()
+
+ _stateobject_attributes = dict(
+ msg=str,
+ timestamp=float
+ )
+
+ def copy(self):
+ c = copy.copy(self)
+ return c
+
+
+class Flow(SimpleStateObject, BackreferenceMixin):
+ _backrefattr = ("error",)
+ _backrefname = "flow"
+ _stateobject_attributes = dict(
+ error=Error,
+ client_conn=ClientConnection,
+ server_conn=ServerConnection,
+ conntype=str
+ )
+
+ def __init__(self, conntype, client_conn, server_conn, error):
+ self.conntype = conntype
+ self.client_conn = client_conn
+ self.server_conn = server_conn
+ self.error = error
+
+ def _get_state(self):
+ d = super(Flow, self)._get_state()
+ d.update(version=version.IVERSION)
+ return d
+
+ @classmethod
+ def _from_state(cls, state):
+ f = cls(None, None, None, None)
+ f._load_state(state)
+ return f
+
+ def copy(self):
+ f = copy.copy(self)
+ if self.error:
+ f.error = self.error.copy()
+ return f
+
+
+class HTTPMessage(SimpleStateObject):
+ def __init__(self):
+ self.flow = None # Will usually set by backref mixin
+
+ def get_decoded_content(self):
+ """
+ Returns the decoded content based on the current Content-Encoding header.
+ Doesn't change the message iteself or its headers.
+ """
+ ce = self.headers.get_first("content-encoding")
+ if not self.content or ce not in encoding.ENCODINGS:
+ return self.content
+ return encoding.decode(ce, self.content)
+
+ def decode(self):
+ """
+ Decodes content based on the current Content-Encoding header, then
+ removes the header. If there is no Content-Encoding header, no
+ action is taken.
+
+ Returns True if decoding succeeded, False otherwise.
+ """
+ ce = self.headers.get_first("content-encoding")
+ if not self.content or ce not in encoding.ENCODINGS:
+ return False
+ data = encoding.decode(ce, self.content)
+ if data is None:
+ return False
+ self.content = data
+ del self.headers["content-encoding"]
+ return True
+
+ def encode(self, e):
+ """
+ Encodes content with the encoding e, where e is "gzip", "deflate"
+ or "identity".
+ """
+ # FIXME: Error if there's an existing encoding header?
+ self.content = encoding.encode(e, self.content)
+ self.headers["content-encoding"] = [e]
+
+ def size(self, **kwargs):
+ """
+ Size in bytes of a fully rendered message, including headers and
+ HTTP lead-in.
+ """
+ hl = len(self._assemble_head(**kwargs))
+ if self.content:
+ return hl + len(self.content)
+ else:
+ return hl
+
+ def copy(self):
+ c = copy.copy(self)
+ c.headers = self.headers.copy()
+ return c
+
+ def replace(self, pattern, repl, *args, **kwargs):
+ """
+ Replaces a regular expression pattern with repl in both the headers
+ and the body of the message. Encoded content will be decoded
+ before replacement, and re-encoded afterwards.
+
+ Returns the number of replacements made.
+ """
+ with decoded(self):
+ self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
+ c += self.headers.replace(pattern, repl, *args, **kwargs)
+ return c
+
+ @classmethod
+ def from_stream(cls, rfile, include_content=True, body_size_limit=None):
+ """
+ Parse an HTTP message from a file stream
+ """
+ raise NotImplementedError
+
+ def _assemble_first_line(self):
+ """
+ Returns the assembled request/response line
+ """
+ raise NotImplementedError
+
+ def _assemble_headers(self):
+ """
+ Returns the assembled headers
+ """
+ raise NotImplementedError
+
+ def _assemble_head(self):
+ """
+ Returns the assembled request/response line plus headers
+ """
+ raise NotImplementedError
+
+ def _assemble(self):
+ """
+ Returns the assembled request/response
+ """
+ raise NotImplementedError
+
+
+class HTTPRequest(HTTPMessage):
+ """
+ An HTTP request.
+
+ Exposes the following attributes:
+
+ flow: Flow object the request belongs to
+
+ headers: ODictCaseless object
+
+ content: Content of the request, None, or CONTENT_MISSING if there
+ is content associated, but not present. CONTENT_MISSING evaluates
+ to False to make checking for the presence of content natural.
+
+ form_in: The request form which mitmproxy has received. The following values are possible:
+ - origin (GET /index.html)
+ - absolute (GET http://example.com:80/index.html)
+ - authority-form (CONNECT example.com:443)
+ - asterisk-form (OPTIONS *)
+ Details: http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-25#section-5.3
+
+ form_out: The request form which mitmproxy has send out to the destination
+
+ method: HTTP method
+
+ scheme: URL scheme (http/https) (absolute-form only)
+
+ host: Host portion of the URL (absolute-form and authority-form only)
+
+ port: Destination port (absolute-form and authority-form only)
+
+ path: Path portion of the URL (not present in authority-form)
+
+ httpversion: HTTP version tuple
+
+ timestamp_start: Timestamp indicating when request transmission started
+
+ timestamp_end: Timestamp indicating when request transmission ended
+ """
+ def __init__(self, form_in, method, scheme, host, port, path, httpversion, headers, content,
+ timestamp_start, timestamp_end, form_out=None):
+ assert isinstance(headers, ODictCaseless) or not headers
+ HTTPMessage.__init__(self)
+
+ self.form_in = form_in
+ self.method = method
+ self.scheme = scheme
+ self.host = host
+ self.port = port
+ self.path = path
+ self.httpversion = httpversion
+ self.headers = headers
+ self.content = content
+ self.timestamp_start = timestamp_start
+ self.timestamp_end = timestamp_end
+ self.form_out = form_out or form_in
+
+ ## (Attributes below don't get serialized)
+
+ # Have this request's cookies been modified by sticky cookies or auth?
+ self.stickycookie = False
+ self.stickyauth = False
+ # Is this request replayed?
+ self.is_replay = False
+
+ _stateobject_attributes = dict(
+ form_in=str,
+ method=str,
+ scheme=str,
+ host=str,
+ port=int,
+ path=str,
+ httpversion=tuple,
+ headers=ODictCaseless,
+ content=str,
+ timestamp_start=float,
+ timestamp_end=float,
+ form_out=str
+ )
+
+ @classmethod
+ def _from_state(cls, state):
+ f = cls(None, None, None, None, None, None, None, None, None, None, None)
+ f._load_state(state)
+ return f
+
+ @classmethod
+ def from_stream(cls, rfile, include_content=True, body_size_limit=None):
+ """
+ Parse an HTTP request from a file stream
+ """
+ httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \
+ = None, None, None, None, None, None, None, None, None, None
+
+ rfile.reset_timestamps()
+ request_line = get_line(rfile)
+ timestamp_start = rfile.first_byte_timestamp
+
+ request_line_parts = http.parse_init(request_line)
+ if not request_line_parts:
+ raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
+ method, path, httpversion = request_line_parts
+
+ if path == '*':
+ form_in = "asterisk"
+ elif path.startswith("/"):
+ form_in = "origin"
+ if not netlib.utils.isascii(path):
+ raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
+ elif method.upper() == 'CONNECT':
+ form_in = "authority"
+ r = http.parse_init_connect(request_line)
+ if not r:
+ raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
+ host, port, _ = r
+ path = None
+ else:
+ form_in = "absolute"
+ r = http.parse_init_proxy(request_line)
+ if not r:
+ raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
+ _, scheme, host, port, path, _ = r
+
+ headers = http.read_headers(rfile)
+ if headers is None:
+ raise http.HttpError(400, "Invalid headers")
+
+ if include_content:
+ content = http.read_http_body(rfile, headers, body_size_limit, True)
+ timestamp_end = utils.timestamp()
+
+ return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, content,
+ timestamp_start, timestamp_end)
+
+ def _assemble_first_line(self, form=None):
+ form = form or self.form_out
+
+ if form == "asterisk" or \
+ form == "origin":
+ request_line = '%s %s HTTP/%s.%s' % (self.method, self.path, self.httpversion[0], self.httpversion[1])
+ elif form == "authority":
+ request_line = '%s %s:%s HTTP/%s.%s' % (self.method, self.host, self.port,
+ self.httpversion[0], self.httpversion[1])
+ elif form == "absolute":
+ request_line = '%s %s://%s:%s%s HTTP/%s.%s' % \
+ (self.method, self.scheme, self.host, self.port, self.path,
+ self.httpversion[0], self.httpversion[1])
+ else:
+ raise http.HttpError(400, "Invalid request form")
+ return request_line
+
+ def _assemble_headers(self):
+ headers = self.headers.copy()
+ utils.del_all(
+ headers,
+ [
+ 'Proxy-Connection',
+ 'Keep-Alive',
+ 'Connection',
+ 'Transfer-Encoding'
+ ]
+ )
+ if not 'host' in headers:
+ headers["Host"] = [utils.hostport(self.scheme, self.host, self.port)]
+
+ if self.content:
+ headers["Content-Length"] = [str(len(self.content))]
+ elif 'Transfer-Encoding' in self.headers: # content-length for e.g. chuncked transfer-encoding with no content
+ headers["Content-Length"] = ["0"]
+
+ return str(headers)
+
+ def _assemble_head(self, form=None):
+ return "%s\r\n%s\r\n" % (self._assemble_first_line(form), self._assemble_headers())
+
+ def _assemble(self, form=None):
+ """
+ Assembles the request for transmission to the server. We make some
+ modifications to make sure interception works properly.
+
+ Raises an Exception if the request cannot be assembled.
+ """
+ if self.content == CONTENT_MISSING:
+ raise Exception("CONTENT_MISSING") # FIXME correct exception class
+ head = self._assemble_head(form)
+ if self.content:
+ return head + self.content
+ else:
+ return head
+
+ def __hash__(self):
+ return id(self)
+
+ def anticache(self):
+ """
+ Modifies this request to remove headers that might produce a cached
+ response. That is, we remove ETags and If-Modified-Since headers.
+ """
+ delheaders = [
+ "if-modified-since",
+ "if-none-match",
+ ]
+ for i in delheaders:
+ del self.headers[i]
+
+ def anticomp(self):
+ """
+ Modifies this request to remove headers that will compress the
+ resource's data.
+ """
+ self.headers["accept-encoding"] = ["identity"]
+
+ def constrain_encoding(self):
+ """
+ Limits the permissible Accept-Encoding values, based on what we can
+ decode appropriately.
+ """
+ if self.headers["accept-encoding"]:
+ self.headers["accept-encoding"] = [', '.join(
+ e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
+ )]
+
+
+ def get_form_urlencoded(self):
+ """
+ Retrieves the URL-encoded form data, returning an ODict object.
+ Returns an empty ODict if there is no data or the content-type
+ indicates non-form data.
+ """
+ if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
+ return ODict(utils.urldecode(self.content))
+ return ODict([])
+
+ def set_form_urlencoded(self, odict):
+ """
+ Sets the body to the URL-encoded form data, and adds the
+ appropriate content-type header. Note that this will destory the
+ existing body if there is one.
+ """
+ # FIXME: If there's an existing content-type header indicating a
+ # url-encoded form, leave it alone.
+ self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
+ self.content = utils.urlencode(odict.lst)
+
+ def get_path_components(self):
+ """
+ Returns the path components of the URL as a list of strings.
+
+ Components are unquoted.
+ """
+ _, _, path, _, _, _ = urlparse.urlparse(self.get_url())
+ return [urllib.unquote(i) for i in path.split("/") if i]
+
+ def set_path_components(self, lst):
+ """
+ Takes a list of strings, and sets the path component of the URL.
+
+ Components are quoted.
+ """
+ lst = [urllib.quote(i, safe="") for i in lst]
+ path = "/" + "/".join(lst)
+ scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url())
+ self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
+
+ def get_query(self):
+ """
+ Gets the request query string. Returns an ODict object.
+ """
+ _, _, _, _, query, _ = urlparse.urlparse(self.get_url())
+ if query:
+ return ODict(utils.urldecode(query))
+ return ODict([])
+
+ def set_query(self, odict):
+ """
+ Takes an ODict object, and sets the request query string.
+ """
+ scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url())
+ query = utils.urlencode(odict.lst)
+ self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
+
+ def get_url(self, hostheader=False):
+ """
+ Returns a URL string, constructed from the Request's URL compnents.
+
+ If hostheader is True, we use the value specified in the request
+ Host header to construct the URL.
+ """
+ if hostheader:
+ host = self.headers.get_first("host") or self.host
+ else:
+ host = self.host
+ host = host.encode("idna")
+ return utils.unparse_url(self.scheme, host, self.port, self.path).encode('ascii')
+
+ def set_url(self, url):
+ """
+ Parses a URL specification, and updates the Request's information
+ accordingly.
+
+ Returns False if the URL was invalid, True if the request succeeded.
+ """
+ parts = http.parse_url(url)
+ if not parts:
+ return False
+ self.scheme, self.host, self.port, self.path = parts
+ return True
+
+ def get_cookies(self):
+ cookie_headers = self.headers.get("cookie")
+ if not cookie_headers:
+ return None
+
+ cookies = []
+ for header in cookie_headers:
+ pairs = [pair.partition("=") for pair in header.split(';')]
+ cookies.extend((pair[0], (pair[2], {})) for pair in pairs)
+ return dict(cookies)
+
+ def replace(self, pattern, repl, *args, **kwargs):
+ """
+ Replaces a regular expression pattern with repl in the headers, the request path
+ and the body of the request. Encoded content will be decoded before
+ replacement, and re-encoded afterwards.
+
+ Returns the number of replacements made.
+ """
+ c = HTTPMessage.replace(self, pattern, repl, *args, **kwargs)
+ self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
+ c += pc
+ return c
+
+
+class HTTPResponse(HTTPMessage):
+ """
+ An HTTP response.
+
+ Exposes the following attributes:
+
+ flow: Flow object the request belongs to
+
+ code: HTTP response code
+
+ msg: HTTP response message
+
+ headers: ODict object
+
+ content: Content of the request, None, or CONTENT_MISSING if there
+ is content associated, but not present. CONTENT_MISSING evaluates
+ to False to make checking for the presence of content natural.
+
+ httpversion: HTTP version tuple
+
+ timestamp_start: Timestamp indicating when request transmission started
+
+ timestamp_end: Timestamp indicating when request transmission ended
+ """
+ def __init__(self, httpversion, code, msg, headers, content, timestamp_start, timestamp_end):
+ assert isinstance(headers, ODictCaseless)
+ HTTPMessage.__init__(self)
+
+ self.httpversion = httpversion
+ self.code = code
+ self.msg = msg
+ self.headers = headers
+ self.content = content
+ self.timestamp_start = timestamp_start
+ self.timestamp_end = timestamp_end
+
+ ## (Attributes below don't get serialized)
+
+ # Is this request replayed?
+ self.is_replay = False
+
+ _stateobject_attributes = dict(
+ httpversion=tuple,
+ code=int,
+ msg=str,
+ headers=ODictCaseless,
+ content=str,
+ timestamp_start=float,
+ timestamp_end=float
+ )
+
+ @classmethod
+ def _from_state(cls, state):
+ f = cls(None, None, None, None, None, None, None, None)
+ f._load_state(state)
+ return f
+
+ @classmethod
+ def from_stream(cls, rfile, request_method, include_content=True, body_size_limit=None):
+ """
+ Parse an HTTP response from a file stream
+ """
+ if not include_content:
+ raise NotImplementedError
+
+ rfile.reset_timestamps()
+ httpversion, code, msg, headers, content = http.read_response(
+ rfile,
+ request_method,
+ body_size_limit)
+ timestamp_start = rfile.first_byte_timestamp
+ timestamp_end = utils.timestamp()
+ return HTTPResponse(httpversion, code, msg, headers, content, timestamp_start, timestamp_end)
+
+ def _assemble_first_line(self):
+ return 'HTTP/%s.%s %s %s' % (self.httpversion[0], self.httpversion[1], self.code, self.msg)
+
+ def _assemble_headers(self):
+ headers = self.headers.copy()
+ utils.del_all(
+ headers,
+ [
+ 'Proxy-Connection',
+ 'Transfer-Encoding'
+ ]
+ )
+ if self.content:
+ headers["Content-Length"] = [str(len(self.content))]
+ elif 'Transfer-Encoding' in self.headers: # add content-length for chuncked transfer-encoding with no content
+ headers["Content-Length"] = ["0"]
+
+ return str(headers)
+
+ def _assemble_head(self):
+ return '%s\r\n%s\r\n' % (self._assemble_first_line(), self._assemble_headers())
+
+ def _assemble(self):
+ """
+ Assembles the response for transmission to the client. We make some
+ modifications to make sure interception works properly.
+
+ Raises an Exception if the request cannot be assembled.
+ """
+ if self.content == CONTENT_MISSING:
+ raise Exception("CONTENT_MISSING") # FIXME correct exception class
+ head = self._assemble_head()
+ if self.content:
+ return head + self.content
+ else:
+ return head
+
+ def _refresh_cookie(self, c, delta):
+ """
+ Takes a cookie string c and a time delta in seconds, and returns
+ a refreshed cookie string.
+ """
+ c = Cookie.SimpleCookie(str(c))
+ for i in c.values():
+ if "expires" in i:
+ d = parsedate_tz(i["expires"])
+ if d:
+ d = mktime_tz(d) + delta
+ i["expires"] = formatdate(d)
+ else:
+ # This can happen when the expires tag is invalid.
+ # reddit.com sends a an expires tag like this: "Thu, 31 Dec
+ # 2037 23:59:59 GMT", which is valid RFC 1123, but not
+ # strictly correct according tot he cookie spec. Browsers
+ # appear to parse this tolerantly - maybe we should too.
+ # For now, we just ignore this.
+ del i["expires"]
+ return c.output(header="").strip()
+
+ def refresh(self, now=None):
+ """
+ This fairly complex and heuristic function refreshes a server
+ response for replay.
+
+ - It adjusts date, expires and last-modified headers.
+ - It adjusts cookie expiration.
+ """
+ if not now:
+ now = time.time()
+ delta = now - self.timestamp_start
+ refresh_headers = [
+ "date",
+ "expires",
+ "last-modified",
+ ]
+ for i in refresh_headers:
+ if i in self.headers:
+ d = parsedate_tz(self.headers[i][0])
+ if d:
+ new = mktime_tz(d) + delta
+ self.headers[i] = [formatdate(new)]
+ c = []
+ for i in self.headers["set-cookie"]:
+ c.append(self._refresh_cookie(i, delta))
+ if c:
+ self.headers["set-cookie"] = c
+
+ def get_cookies(self):
+ cookie_headers = self.headers.get("set-cookie")
+ if not cookie_headers:
+ return None
+
+ cookies = []
+ for header in cookie_headers:
+ pairs = [pair.partition("=") for pair in header.split(';')]
+ cookie_name = pairs[0][0] # the key of the first key/value pairs
+ cookie_value = pairs[0][2] # the value of the first key/value pairs
+ cookie_parameters = {key.strip().lower(): value.strip() for key, sep, value in pairs[1:]}
+ cookies.append((cookie_name, (cookie_value, cookie_parameters)))
+ return dict(cookies)
+
+
+class HTTPFlow(Flow):
+ """
+ A Flow is a collection of objects representing a single HTTP
+ transaction. The main attributes are:
+
+ request: HTTPRequest object
+ response: HTTPResponse object
+ error: Error object
+
+ Note that it's possible for a Flow to have both a response and an error
+ object. This might happen, for instance, when a response was received
+ from the server, but there was an error sending it back to the client.
+
+ The following additional attributes are exposed:
+
+ intercepting: Is this flow currently being intercepted?
+ """
+ _backrefattr = Flow._backrefattr + ("request", "response")
+ _stateobject_attributes = Flow._stateobject_attributes.copy()
+ _stateobject_attributes.update(
+ request=HTTPRequest,
+ response=HTTPResponse
+ )
+
+ def __init__(self, client_conn, server_conn, error, request, response):
+ Flow.__init__(self, "http", client_conn, server_conn, error)
+ self.request, self.response = request, response
+
+ @classmethod
+ def _from_state(cls, state):
+ f = cls(None, None, None, None, None)
+ f._load_state(state)
+ return f
+
+ def copy(self):
+ f = super(HTTPFlow, self).copy()
+ if self.request:
+ f.request = self.request.copy()
+ if self.response:
+ f.response = self.request.copy()
+ return f
+
+
+class HttpAuthenticationError(Exception):
+ def __init__(self, auth_headers=None):
+ self.auth_headers = auth_headers
+
+ def __str__(self):
+ return "HttpAuthenticationError"
+
+
+class HTTPHandler(ProtocolHandler):
+ def handle_messages(self):
+ while self.handle_flow():
+ pass
+ self.c.close = True
+
+ def get_response_from_server(self, request):
+ request_raw = request._assemble()
+
+ for i in range(2):
+ try:
+ self.c.server_conn.wfile.write(request_raw)
+ self.c.server_conn.wfile.flush()
+ return HTTPResponse.from_stream(self.c.server_conn.rfile, request.method,
+ body_size_limit=self.c.config.body_size_limit)
+ except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v:
+ self.c.log("error in server communication: %s" % str(v))
+ if i < 1:
+ # In any case, we try to reconnect at least once.
+ # This is necessary because it might be possible that we already initiated an upstream connection
+ # after clientconnect that has already been expired, e.g consider the following event log:
+ # > clientconnect (transparent mode destination known)
+ # > serverconnect
+ # > read n% of large request
+ # > server detects timeout, disconnects
+ # > read (100-n)% of large request
+ # > send large request upstream
+ self.c.server_reconnect()
+ else:
+ raise v
+
+ def handle_flow(self):
+ flow = HTTPFlow(self.c.client_conn, self.c.server_conn, None, None, None)
+ try:
+ flow.request = HTTPRequest.from_stream(self.c.client_conn.rfile,
+ body_size_limit=self.c.config.body_size_limit)
+ self.c.log("request", [flow.request._assemble_first_line(flow.request.form_in)])
+
+ request_reply = self.c.channel.ask("request" if LEGACY else "httprequest",
+ flow.request if LEGACY else flow)
+ if request_reply is None or request_reply == KILL:
+ return False
+
+ if isinstance(request_reply, HTTPResponse) or (LEGACY and isinstance(request_reply, libmproxy.flow.Response)):
+ flow.response = request_reply
+ else:
+ self.process_request(flow.request)
+ flow.response = self.get_response_from_server(flow.request)
+
+ self.c.log("response", [flow.response._assemble_response_line() if not LEGACY else flow.response._assemble().splitlines()[0]])
+ response_reply = self.c.channel.ask("response" if LEGACY else "httpresponse",
+ flow.response if LEGACY else flow)
+ if response_reply is None or response_reply == KILL:
+ return False
+
+ raw = flow.response._assemble()
+ self.c.client_conn.wfile.write(raw)
+ self.c.client_conn.wfile.flush()
+ flow.timestamp_end = utils.timestamp()
+
+ if (http.connection_close(flow.request.httpversion, flow.request.headers) or
+ http.connection_close(flow.response.httpversion, flow.response.headers)):
+ return False
+
+ if flow.request.form_in == "authority":
+ self.ssl_upgrade(flow.request)
+ return True
+ except (HttpAuthenticationError, http.HttpError, ProxyError, tcp.NetLibError), e:
+ self.handle_error(e, flow)
+ return False
+
+ def handle_error(self, error, flow=None):
+ code, message, headers = None, None, None
+ if isinstance(error, HttpAuthenticationError):
+ code, message, headers = 407, "Proxy Authentication Required", error.auth_headers
+ elif isinstance(error, (http.HttpError, ProxyError)):
+ code, message = error.code, error.msg
+ elif isinstance(error, tcp.NetLibError):
+ code = 502
+ message = error.message or error.__class__
+
+ if code:
+ err = "%s: %s" % (code, message)
+ else:
+ err = message
+
+ self.c.log("error: %s" %err)
+
+ if flow:
+ flow.error = Error(err)
+ self.c.channel.ask("error" if LEGACY else "httperror",
+ flow.error if LEGACY else flow)
+ else:
+ pass # FIXME: Is there any use case for persisting errors that occur outside of flows?
+
+ if code:
+ try:
+ self.send_error(code, message, headers)
+ except:
+ pass
+
+ def send_error(self, code, message, headers):
+ response = http_status.RESPONSES.get(code, "Unknown")
+ html_content = '<html><head>\n<title>%d %s</title>\n</head>\n<body>\n%s\n</body>\n</html>' % \
+ (code, response, message)
+ self.c.client_conn.wfile.write("HTTP/1.1 %s %s\r\n" % (code, response))
+ self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.server_version)
+ self.c.client_conn.wfile.write("Content-type: text/html\r\n")
+ self.c.client_conn.wfile.write("Content-Length: %d\r\n" % len(html_content))
+ if headers:
+ for key, value in headers.items():
+ self.c.client_conn.wfile.write("%s: %s\r\n" % (key, value))
+ self.c.client_conn.wfile.write("Connection: close\r\n")
+ self.c.client_conn.wfile.write("\r\n")
+ self.c.client_conn.wfile.write(html_content)
+ self.c.client_conn.wfile.flush()
+
+ def ssl_upgrade(self, upstream_request=None):
+ """
+ Upgrade the connection to SSL after an authority (CONNECT) request has been made.
+ If the authority request has been forwarded upstream (because we have another proxy server there),
+ money-patch the ConnectionHandler.server_reconnect function to resend the request on reconnect.
+
+ This isn't particular beautiful code, but it isolates this rare edge-case from the
+ protocol-agnostic ConnectionHandler
+ """
+ self.c.mode = "transparent"
+ self.c.determine_conntype()
+ self.c.establish_ssl(server=True, client=True)
+
+ if upstream_request:
+ self.c.log("Hook reconnect function")
+ original_reconnect_func = self.c.server_reconnect
+
+ def reconnect_http_proxy():
+ self.c.log("Hooked reconnect function")
+ self.c.log("Hook: Run original redirect")
+ original_reconnect_func(no_ssl=True)
+ self.c.log("Hook: Write CONNECT request to upstream proxy", [upstream_request._assemble_first_line()])
+ self.c.server_conn.wfile.write(upstream_request._assemble())
+ self.c.server_conn.wfile.flush()
+ self.c.log("Hook: Read answer to CONNECT request from proxy")
+ resp = HTTPResponse.from_stream(self.c.server_conn.rfile, upstream_request.method)
+ if resp.code != 200:
+ raise ProxyError(resp.code,
+ "Cannot reestablish SSL connection with upstream proxy: \r\n" + str(resp.headers))
+ self.c.log("Hook: Establish SSL with upstream proxy")
+ self.c.establish_ssl(server=True)
+
+ self.c.server_reconnect = reconnect_http_proxy
+
+ raise ConnectionTypeChange
+
+ def process_request(self, request):
+ if self.c.mode == "regular":
+ self.authenticate(request)
+ if request.form_in == "authority" and self.c.client_conn.ssl_established:
+ raise http.HttpError(502, "Must not CONNECT on already encrypted connection")
+
+ # If we have a CONNECT request, we might need to intercept
+ if request.form_in == "authority":
+ directly_addressed_at_mitmproxy = (self.c.mode == "regular") and not self.c.config.forward_proxy
+ if directly_addressed_at_mitmproxy:
+ self.c.establish_server_connection((request.host, request.port))
+ self.c.client_conn.wfile.write(
+ 'HTTP/1.1 200 Connection established\r\n' +
+ ('Proxy-agent: %s\r\n' % self.c.server_version) +
+ '\r\n'
+ )
+ self.c.client_conn.wfile.flush()
+ self.ssl_upgrade() # raises ConnectionTypeChange exception
+
+ if self.c.mode == "regular":
+ if request.form_in == "authority":
+ pass
+ elif request.form_in == "absolute":
+ if request.scheme != "http":
+ raise http.HttpError(400, "Invalid Request")
+ if not self.c.config.forward_proxy:
+ request.form_out = "origin"
+ if ((not self.c.server_conn) or
+ (self.c.server_conn.address != (request.host, request.port))):
+ self.c.establish_server_connection((request.host, request.port))
+ else:
+ raise http.HttpError(400, "Invalid Request")
+
+ def authenticate(self, request):
+ if self.c.config.authenticator:
+ if self.c.config.authenticator.authenticate(request.headers):
+ self.c.config.authenticator.clean(request.headers)
+ else:
+ raise HttpAuthenticationError(self.c.config.authenticator.auth_challenge_headers())
+ return request.headers \ No newline at end of file
diff --git a/libmproxy/protocol.py b/libmproxy/protocolold.py
index 9d3f805c..9f98e37e 100644
--- a/libmproxy/protocol.py
+++ b/libmproxy/protocolold.py
@@ -7,44 +7,6 @@ from proxy import ProxyError, KILL
LEGACY = True
-def _handle(msg, conntype, connection_handler, *args, **kwargs):
- handler = None
- if conntype == "http":
- handler = HTTPHandler(connection_handler)
- else:
- raise NotImplementedError
-
- f = getattr(handler, "handle_" + msg)
- return f(*args, **kwargs)
-
-
-def handle_messages(conntype, connection_handler):
- _handle("messages", conntype, connection_handler)
-
-
-def handle_error(conntype, connection_handler, error):
- _handle("error", conntype, connection_handler, error)
-
-
-class ConnectionTypeChange(Exception):
- pass
-
-
-class ProtocolHandler(object):
- def __init__(self, c):
- self.c = c
- def handle_messages(self):
- """
- This method gets called if the connection has been established.
- """
- raise NotImplementedError
- def handle_error(self, error):
- """
- This method gets called should there be an uncaught exception during the connection.
- This might happen outside of handle_messages, e.g. if the initial SSL handshake fails in transparent mode.
- """
- raise NotImplementedError
-
"""
Minimalistic cleanroom reimplemementation of a couple of flow.* classes. Most functionality is missing,
@@ -202,6 +164,7 @@ class HTTPRequest(HTTPMessage):
if not r:
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
host, port, _ = r
+ path = None
else:
form_in = "absolute"
r = http.parse_init_proxy(request_line)
diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py
index a7ee9a7b..e43c811b 100644
--- a/libmproxy/proxy.py
+++ b/libmproxy/proxy.py
@@ -5,7 +5,6 @@ import utils, flow, version, platform, controller
TRANSPARENT_SSL_PORTS = [443, 8443]
-KILL = 0
class ProxyError(Exception):
@@ -15,10 +14,6 @@ class ProxyError(Exception):
def __str__(self):
return "ProxyError(%s, %s)" % (self.code, self.msg)
-
-import protocol
-
-
class Log:
def __init__(self, msg):
self.msg = msg
@@ -39,7 +34,7 @@ class ProxyConfig:
self.certstore = certutils.CertStore()
-class ClientConnection(tcp.BaseHandler):
+class ClientConnection(tcp.BaseHandler, flow.SimpleStateObject):
def __init__(self, client_connection, address, server):
tcp.BaseHandler.__init__(self, client_connection, address, server)
@@ -47,6 +42,13 @@ class ClientConnection(tcp.BaseHandler):
self.timestamp_end = None
self.timestamp_ssl_setup = None
+ _stateobject_attributes = dict(
+ timestamp_start=float,
+ timestamp_end=float,
+ timestamp_ssl_setup=float,
+ # FIXME: Add missing attributes
+ )
+
def convert_to_ssl(self, *args, **kwargs):
tcp.BaseHandler.convert_to_ssl(self, *args, **kwargs)
self.timestamp_ssl_setup = utils.timestamp()
@@ -56,7 +58,7 @@ class ClientConnection(tcp.BaseHandler):
self.timestamp_end = utils.timestamp()
-class ServerConnection(tcp.TCPClient):
+class ServerConnection(tcp.TCPClient, flow.SimpleStateObject):
def __init__(self, address):
tcp.TCPClient.__init__(self, address)
@@ -66,6 +68,15 @@ class ServerConnection(tcp.TCPClient):
self.timestamp_tcp_setup = None
self.timestamp_ssl_setup = None
+ _stateobject_attributes = dict(
+ peername=tuple,
+ timestamp_start=float,
+ timestamp_end=float,
+ timestamp_tcp_setup=float,
+ timestamp_ssl_setup=float,
+ # FIXME: Add missing attributes
+ )
+
def connect(self):
self.timestamp_start = utils.timestamp()
tcp.TCPClient.connect(self)
@@ -119,6 +130,8 @@ class RequestReplayThread(threading.Thread):
"""
+import protocol
+
class ConnectionHandler:
def __init__(self, config, client_connection, client_address, server, channel, server_version):
self.config = config
@@ -180,6 +193,8 @@ class ConnectionHandler:
protocol.handle_error(self.conntype, self, e)
except Exception, e:
self.log(e.__class__)
+ import traceback
+ self.log(traceback.format_exc())
self.log(str(e))
self.del_server_connection()