aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/flow.py
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2014-01-30 05:00:13 +0100
committerMaximilian Hils <git@maximilianhils.com>2014-01-30 05:00:13 +0100
commit40bf42f14a7ec386024a8925502fa3c6e6f0657e (patch)
treedb4d76b903a170ba0315a1fc270b14bf9034f9ee /libmproxy/flow.py
parent607f7778110d5c2720e60ffcf5f4b0c94e8fcc5f (diff)
downloadmitmproxy-40bf42f14a7ec386024a8925502fa3c6e6f0657e.tar.gz
mitmproxy-40bf42f14a7ec386024a8925502fa3c6e6f0657e.tar.bz2
mitmproxy-40bf42f14a7ec386024a8925502fa3c6e6f0657e.zip
merge flow classes. current status: basic mitmdump working
Diffstat (limited to 'libmproxy/flow.py')
-rw-r--r--libmproxy/flow.py477
1 files changed, 43 insertions, 434 deletions
diff --git a/libmproxy/flow.py b/libmproxy/flow.py
index acebb71d..0f6204cf 100644
--- a/libmproxy/flow.py
+++ b/libmproxy/flow.py
@@ -143,459 +143,68 @@ class SetHeaders:
f.request.headers.add(header, value)
-class decoded(object):
- """
-
- A context manager that decodes a request, response or error, and then
- re-encodes it with the same encoding after execution of the block.
-
- Example:
-
- with decoded(request):
- request.content = request.content.replace("foo", "bar")
- """
- def __init__(self, o):
- self.o = o
- ce = o.headers.get_first("content-encoding")
- if ce in encoding.ENCODINGS:
- self.ce = ce
- else:
- self.ce = None
-
- def __enter__(self):
- if self.ce:
- self.o.decode()
+class StateObject:
+ def _get_state(self):
+ raise NotImplementedError
- def __exit__(self, type, value, tb):
- if self.ce:
- self.o.encode(self.ce)
+ def _load_state(self, state):
+ raise NotImplementedError
+ @classmethod
+ def _from_state(cls, state):
+ raise NotImplementedError
-class StateObject:
def __eq__(self, other):
try:
return self._get_state() == other._get_state()
- except AttributeError:
+ except AttributeError: # we may compare with something that's not a StateObject
return False
-class HTTPMsg(StateObject):
- def get_decoded_content(self):
- """
- Returns the decoded content based on the current Content-Encoding header.
- Doesn't change the message iteself or its headers.
- """
- ce = self.headers.get_first("content-encoding")
- if not self.content or ce not in encoding.ENCODINGS:
- return self.content
- return encoding.decode(ce, self.content)
-
- def decode(self):
- """
- Decodes content based on the current Content-Encoding header, then
- removes the header. If there is no Content-Encoding header, no
- action is taken.
-
- Returns True if decoding succeeded, False otherwise.
- """
- ce = self.headers.get_first("content-encoding")
- if not self.content or ce not in encoding.ENCODINGS:
- return False
- data = encoding.decode(
- ce,
- self.content
- )
- if data is None:
- return False
- self.content = data
- del self.headers["content-encoding"]
- return True
-
- def encode(self, e):
- """
- Encodes content with the encoding e, where e is "gzip", "deflate"
- or "identity".
- """
- # FIXME: Error if there's an existing encoding header?
- self.content = encoding.encode(e, self.content)
- self.headers["content-encoding"] = [e]
-
- def size(self, **kwargs):
- """
- Size in bytes of a fully rendered message, including headers and
- HTTP lead-in.
- """
- hl = len(self._assemble_head(**kwargs))
- if self.content:
- return hl + len(self.content)
- else:
- return hl
-
- def get_content_type(self):
- return self.headers.get_first("content-type")
-
- def get_transmitted_size(self):
- # FIXME: this is inprecise in case chunking is used
- # (we should count the chunking headers)
- if not self.content:
- return 0
- return len(self.content)
-
-
-class Request(HTTPMsg):
+class SimpleStateObject(StateObject):
"""
- An HTTP request.
-
- Exposes the following attributes:
-
- client_conn: ClientConnect object, or None if this is a replay.
-
- headers: ODictCaseless object
-
- content: Content of the request, None, or CONTENT_MISSING if there
- is content associated, but not present. CONTENT_MISSING evaluates
- to False to make checking for the presence of content natural.
-
- scheme: URL scheme (http/https)
-
- host: Host portion of the URL
-
- port: Destination port
-
- path: Path portion of the URL
-
- timestamp_start: Seconds since the epoch signifying request transmission started
-
- method: HTTP method
-
- timestamp_end: Seconds since the epoch signifying request transmission ended
-
- tcp_setup_timestamp: Seconds since the epoch signifying remote TCP connection setup completion time
- (or None, if request didn't results TCP setup)
-
- ssl_setup_timestamp: Seconds since the epoch signifying remote SSL encryption setup completion time
- (or None, if request didn't results SSL setup)
-
+ A StateObject with opionated conventions that tries to keep everything DRY.y
"""
- def __init__(
- self, client_conn, httpversion, host, port,
- scheme, method, path, headers, content, timestamp_start=None,
- timestamp_end=None, tcp_setup_timestamp=None,
- ssl_setup_timestamp=None, ip=None):
- assert isinstance(headers, ODictCaseless)
- self.client_conn = client_conn
- self.httpversion = httpversion
- self.host, self.port, self.scheme = host, port, scheme
- self.method, self.path, self.headers, self.content = method, path, headers, content
- self.timestamp_start = timestamp_start or utils.timestamp()
- self.timestamp_end = max(timestamp_end or utils.timestamp(), timestamp_start)
- self.close = False
- self.tcp_setup_timestamp = tcp_setup_timestamp
- self.ssl_setup_timestamp = ssl_setup_timestamp
- self.ip = ip
-
- # Have this request's cookies been modified by sticky cookies or auth?
- self.stickycookie = False
- self.stickyauth = False
-
- # Live attributes - not serialized
- self.wfile, self.rfile = None, None
-
- def set_live(self, rfile, wfile):
- self.wfile, self.rfile = wfile, rfile
- def is_live(self):
- return bool(self.wfile)
-
- def anticache(self):
- """
- Modifies this request to remove headers that might produce a cached
- response. That is, we remove ETags and If-Modified-Since headers.
- """
- delheaders = [
- "if-modified-since",
- "if-none-match",
- ]
- for i in delheaders:
- del self.headers[i]
-
- def anticomp(self):
- """
- Modifies this request to remove headers that will compress the
- resource's data.
- """
- self.headers["accept-encoding"] = ["identity"]
-
- def constrain_encoding(self):
- """
- Limits the permissible Accept-Encoding values, based on what we can
- decode appropriately.
- """
- if self.headers["accept-encoding"]:
- self.headers["accept-encoding"] = [', '.join(
- e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
- )]
-
- def _set_replay(self):
- self.client_conn = None
+ _stateobject_attributes = None
+ """
+ A dict where the keys represent the attributes to be serialized.
+ The values represent the attribute class or type.
+ If the attribute is a class, this class must be a subclass of StateObject.
+ """
- def is_replay(self):
- """
- Is this request a replay?
- """
- if self.client_conn:
- return False
- else:
- return True
+ def _get_state(self):
+ return {attr: (getattr(self, attr)._get_state()
+ if (type(cls) == 'classobj')
+ else getattr(self, attr))
+ for attr, cls in self._stateobject_attributes.iteritems()}
def _load_state(self, state):
- if state["client_conn"]:
- if self.client_conn:
- self.client_conn._load_state(state["client_conn"])
+ for attr, cls in self._stateobject_attributes.iteritems():
+ self._load_state_attr(attr, cls, state)
+
+ def _load_state_attr(self, attribute, cls, state):
+ if state[attribute] is not None:
+ if type(cls) == 'classobj':
+ assert issubclass(cls, StateObject)
+ curr = getattr(self, attribute)
+ if curr:
+ curr._load_state(state[attribute])
+ else:
+ setattr(self, attribute, cls._from_state(state[attribute]))
else:
- self.client_conn = ClientConnect._from_state(state["client_conn"])
+ setattr(self, attribute, cls(state[attribute]))
else:
- self.client_conn = None
- self.host = state["host"]
- self.port = state["port"]
- self.scheme = state["scheme"]
- self.method = state["method"]
- self.path = state["path"]
- self.headers = ODictCaseless._from_state(state["headers"])
- self.content = state["content"]
- self.timestamp_start = state["timestamp_start"]
- self.timestamp_end = state["timestamp_end"]
- self.tcp_setup_timestamp = state["tcp_setup_timestamp"]
- self.ssl_setup_timestamp = state["ssl_setup_timestamp"]
- self.ip = state["ip"]
-
- def _get_state(self):
- return dict(
- client_conn = self.client_conn._get_state() if self.client_conn else None,
- httpversion = self.httpversion,
- host = self.host,
- port = self.port,
- scheme = self.scheme,
- method = self.method,
- path = self.path,
- headers = self.headers._get_state(),
- content = self.content,
- timestamp_start = self.timestamp_start,
- timestamp_end = self.timestamp_end,
- tcp_setup_timestamp = self.tcp_setup_timestamp,
- ssl_setup_timestamp = self.ssl_setup_timestamp,
- ip = self.ip
- )
+ setattr(self, attribute, None)
@classmethod
- def _from_state(klass, state):
- return klass(
- ClientConnect._from_state(state["client_conn"]),
- tuple(state["httpversion"]),
- str(state["host"]),
- state["port"],
- str(state["scheme"]),
- str(state["method"]),
- str(state["path"]),
- ODictCaseless._from_state(state["headers"]),
- state["content"],
- state["timestamp_start"],
- state["timestamp_end"],
- state["tcp_setup_timestamp"],
- state["ssl_setup_timestamp"],
- state["ip"]
- )
-
- def __hash__(self):
- return id(self)
-
- def copy(self):
- c = copy.copy(self)
- c.headers = self.headers.copy()
- return c
-
- def get_form_urlencoded(self):
- """
- Retrieves the URL-encoded form data, returning an ODict object.
- Returns an empty ODict if there is no data or the content-type
- indicates non-form data.
- """
- if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
- return ODict(utils.urldecode(self.content))
- return ODict([])
-
- def set_form_urlencoded(self, odict):
- """
- Sets the body to the URL-encoded form data, and adds the
- appropriate content-type header. Note that this will destory the
- existing body if there is one.
- """
- # FIXME: If there's an existing content-type header indicating a
- # url-encoded form, leave it alone.
- self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
- self.content = utils.urlencode(odict.lst)
-
- def get_path_components(self):
- """
- Returns the path components of the URL as a list of strings.
-
- Components are unquoted.
- """
- _, _, path, _, _, _ = urlparse.urlparse(self.get_url())
- return [urllib.unquote(i) for i in path.split("/") if i]
-
- def set_path_components(self, lst):
- """
- Takes a list of strings, and sets the path component of the URL.
-
- Components are quoted.
- """
- lst = [urllib.quote(i, safe="") for i in lst]
- path = "/" + "/".join(lst)
- scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url())
- self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
-
- def get_query(self):
- """
- Gets the request query string. Returns an ODict object.
- """
- _, _, _, _, query, _ = urlparse.urlparse(self.get_url())
- if query:
- return ODict(utils.urldecode(query))
- return ODict([])
-
- def set_query(self, odict):
- """
- Takes an ODict object, and sets the request query string.
- """
- scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url())
- query = utils.urlencode(odict.lst)
- self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
-
- def get_url(self, hostheader=False):
- """
- Returns a URL string, constructed from the Request's URL compnents.
-
- If hostheader is True, we use the value specified in the request
- Host header to construct the URL.
- """
- if hostheader:
- host = self.headers.get_first("host") or self.host
- else:
- host = self.host
- host = host.encode("idna")
- return utils.unparse_url(self.scheme, host, self.port, self.path).encode('ascii')
-
- def set_url(self, url):
- """
- Parses a URL specification, and updates the Request's information
- accordingly.
-
- Returns False if the URL was invalid, True if the request succeeded.
- """
- parts = http.parse_url(url)
- if not parts:
- return False
- self.scheme, self.host, self.port, self.path = parts
- return True
-
- def get_cookies(self):
- cookie_headers = self.headers.get("cookie")
- if not cookie_headers:
- return None
-
- cookies = []
- for header in cookie_headers:
- pairs = [pair.partition("=") for pair in header.split(';')]
- cookies.extend((pair[0],(pair[2],{})) for pair in pairs)
- return dict(cookies)
-
- def get_header_size(self):
- FMT = '%s %s HTTP/%s.%s\r\n%s\r\n'
- assembled_header = FMT % (
- self.method,
- self.path,
- self.httpversion[0],
- self.httpversion[1],
- str(self.headers)
- )
- return len(assembled_header)
-
- def _assemble_head(self, proxy=False):
- FMT = '%s %s HTTP/%s.%s\r\n%s\r\n'
- FMT_PROXY = '%s %s://%s:%s%s HTTP/%s.%s\r\n%s\r\n'
-
- headers = self.headers.copy()
- utils.del_all(
- headers,
- [
- 'proxy-connection',
- 'keep-alive',
- 'connection',
- 'transfer-encoding'
- ]
- )
- if not 'host' in headers:
- headers["host"] = [utils.hostport(self.scheme, self.host, self.port)]
- content = self.content
- if content:
- headers["Content-Length"] = [str(len(content))]
- else:
- content = ""
- if self.close:
- headers["connection"] = ["close"]
- if not proxy:
- return FMT % (
- self.method,
- self.path,
- self.httpversion[0],
- self.httpversion[1],
- str(headers)
- )
- else:
- return FMT_PROXY % (
- self.method,
- self.scheme,
- self.host,
- self.port,
- self.path,
- self.httpversion[0],
- self.httpversion[1],
- str(headers)
- )
-
- def _assemble(self, _proxy = False):
- """
- Assembles the request for transmission to the server. We make some
- modifications to make sure interception works properly.
-
- Returns None if the request cannot be assembled.
- """
- if self.content == CONTENT_MISSING:
- return None
- head = self._assemble_head(_proxy)
- if self.content:
- return head + self.content
- else:
- return head
-
- def replace(self, pattern, repl, *args, **kwargs):
- """
- Replaces a regular expression pattern with repl in both the headers
- and the body of the request. Encoded content will be decoded before
- replacement, and re-encoded afterwards.
-
- Returns the number of replacements made.
- """
- with decoded(self):
- self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
- self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
- c += pc
- c += self.headers.replace(pattern, repl, *args, **kwargs)
- return c
+ def _from_state(cls, state):
+ f = cls()
+ f._load_state(state)
+ return f
-class Response(HTTPMsg):
+class Response(object):
"""
An HTTP response.
@@ -1269,7 +878,7 @@ class State(object):
"""
Add a response to the state. Returns the matching flow.
"""
- f = self._flow_map.get(resp.request)
+ f = self._flow_map.get(resp.flow)
if not f:
return False
f.response = resp
@@ -1596,7 +1205,7 @@ class FlowMaster(controller.Master):
return f
def handle_request(self, r):
- if r.is_live():
+ if False and r.is_live(): # FIXME
app = self.apps.get(r)
if app:
# FIXME: for the tcp proxy, use flow.client_conn.wfile