aboutsummaryrefslogtreecommitdiffstats
path: root/libmproxy/protocol
diff options
context:
space:
mode:
authorThomas Kriechbaumer <thomas@kriechbaumer.name>2015-08-01 10:40:19 +0200
committerThomas Kriechbaumer <thomas@kriechbaumer.name>2015-08-01 14:37:32 +0200
commitdb02553e2afee044faac898d12bd8d1adadbcd21 (patch)
tree1e5910599b7b47c527528f03f896efd2fbf7c907 /libmproxy/protocol
parent8a051511706e2e62c32e0f70e05ecab11d444b6f (diff)
downloadmitmproxy-db02553e2afee044faac898d12bd8d1adadbcd21.tar.gz
mitmproxy-db02553e2afee044faac898d12bd8d1adadbcd21.tar.bz2
mitmproxy-db02553e2afee044faac898d12bd8d1adadbcd21.zip
move code from mitmproxy to netlib
Diffstat (limited to 'libmproxy/protocol')
-rw-r--r--libmproxy/protocol/http.py37
-rw-r--r--libmproxy/protocol/http_wrappers.py295
2 files changed, 48 insertions, 284 deletions
diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py
index 35fd7d28..961e50a6 100644
--- a/libmproxy/protocol/http.py
+++ b/libmproxy/protocol/http.py
@@ -19,9 +19,6 @@ from .. import encoding, utils, controller, stateobject, proxy
from .http_wrappers import decoded, HTTPRequest, HTTPResponse
-HDR_FORM_URLENCODED = "application/x-www-form-urlencoded"
-HDR_FORM_MULTIPART = "multipart/form-data"
-
class KillSignal(Exception):
pass
@@ -39,7 +36,10 @@ def send_connect_request(conn, host, port, update_state=True):
odict.ODictCaseless(),
""
)
+
+ # we currently only support HTTP/1 CONNECT requests
protocol = http1.HTTP1Protocol(conn)
+
conn.send(protocol.assemble(upstream_request))
resp = HTTPResponse.from_protocol(protocol, upstream_request.method)
if resp.status_code != 200:
@@ -144,18 +144,6 @@ class HTTPFlow(Flow):
return c
-class HttpAuthenticationError(Exception):
- def __init__(self, auth_headers=None):
- super(HttpAuthenticationError, self).__init__(
- "Proxy Authentication Required"
- )
- self.headers = auth_headers
- self.code = 407
-
- def __repr__(self):
- return "Proxy Authentication Required"
-
-
class HTTPHandler(ProtocolHandler):
"""
HTTPHandler implements mitmproxys understanding of the HTTP protocol.
@@ -179,7 +167,7 @@ class HTTPHandler(ProtocolHandler):
try:
if not self.c.server_conn.protocol:
# instantiate new protocol if connection does not have one yet
- self.c.server_conn.protocol = http2.HTTP2Protocol(self.c.server_conn)
+ self.c.server_conn.protocol = http2.HTTP2Protocol(self.c.server_conn) # TODO: select correct protocol
self.c.server_conn.protocol.perform_connection_preface()
self.c.server_conn.send(self.c.server_conn.protocol.assemble(flow.request))
@@ -225,6 +213,7 @@ class HTTPHandler(ProtocolHandler):
flow.response.content = CONTENT_MISSING
else:
if isinstance(flow.server_conn.protocol, http1.HTTP1Protocol):
+ # streaming is only supported with HTTP/1 at the moment
flow.response.content = flow.server_conn.protocol.read_http_body(
flow.response.headers,
self.c.config.body_size_limit,
@@ -241,6 +230,7 @@ class HTTPHandler(ProtocolHandler):
try:
if not flow.client_conn.protocol:
# instantiate new protocol if connection does not have one yet
+ # the first request might be a CONNECT - which is currently only supported with HTTP/1
flow.client_conn.protocol = http1.HTTP1Protocol(self.c.client_conn)
req = HTTPRequest.from_protocol(
@@ -258,8 +248,8 @@ class HTTPHandler(ProtocolHandler):
)
ret = self.process_request(flow, req)
if ret:
- # CONNECT successful - upgrade to HTTP/2
# instantiate new protocol if connection does not have one yet
+ # TODO: select correct protocol
flow.client_conn.protocol = http2.HTTP2Protocol(self.c.client_conn, is_server=True)
if ret is not None:
return ret
@@ -329,7 +319,7 @@ class HTTPHandler(ProtocolHandler):
return True # Next flow please.
except (
- HttpAuthenticationError,
+ http.HttpAuthenticationError,
http.HttpError,
proxy.ProxyError,
tcp.NetLibError,
@@ -389,6 +379,7 @@ class HTTPHandler(ProtocolHandler):
pass
def send_error(self, code, message, headers):
+ # TODO: implement this again
raise NotImplementedError("todo - adapt for HTTP/2 - make use of make_error_reponse from pathod")
# response = http.status_codes.RESPONSES.get(code, "Unknown")
# html_content = """
@@ -457,6 +448,9 @@ class HTTPHandler(ProtocolHandler):
self.c.set_server_address((request.host, request.port))
# Update server_conn attribute on the flow
flow.server_conn = self.c.server_conn
+
+ # since we currently only support HTTP/1 CONNECT requests
+ # the response must be HTTP/1 as well
self.c.client_conn.send(
('HTTP/%s.%s 200 ' % (request.httpversion[0], request.httpversion[1])) +
'Connection established\r\n' +
@@ -495,7 +489,7 @@ class HTTPHandler(ProtocolHandler):
400,
"Invalid request: No host information"
)
- p = http.parse_url("http://" + h)
+ p = netlib.utils.parse_url("http://" + h)
request.scheme = p[0]
request.host = p[1]
request.port = p[2]
@@ -602,6 +596,9 @@ class HTTPHandler(ProtocolHandler):
Checks if the connection should be closed depending on the HTTP
semantics. Returns True, if so.
"""
+
+ # TODO: add logic for HTTP/2
+
close_connection = (
http1.HTTP1Protocol.connection_close(
flow.request.httpversion,
@@ -684,7 +681,7 @@ class HTTPHandler(ProtocolHandler):
if self.c.config.authenticator.authenticate(request.headers):
self.c.config.authenticator.clean(request.headers)
else:
- raise HttpAuthenticationError(
+ raise http.HttpAuthenticationError(
self.c.config.authenticator.auth_challenge_headers())
return request.headers
diff --git a/libmproxy/protocol/http_wrappers.py b/libmproxy/protocol/http_wrappers.py
index 18a355dc..758ebfe0 100644
--- a/libmproxy/protocol/http_wrappers.py
+++ b/libmproxy/protocol/http_wrappers.py
@@ -8,18 +8,13 @@ import urlparse
from email.utils import parsedate_tz, formatdate, mktime_tz
import netlib
-from netlib import http, tcp, odict, utils
+from netlib import http, tcp, odict, utils, encoding
from netlib.http import cookies, semantics, http1
from .tcp import TCPHandler
from .primitives import KILL, ProtocolHandler, Flow, Error
from ..proxy.connection import ServerConnection
-from .. import encoding, utils, controller, stateobject, proxy
-
-
-HDR_FORM_URLENCODED = "application/x-www-form-urlencoded"
-HDR_FORM_MULTIPART = "multipart/form-data"
-CONTENT_MISSING = 0
+from .. import utils, controller, stateobject, proxy
class decoded(object):
@@ -249,12 +244,6 @@ class HTTPRequest(MessageMixin, semantics.Request):
f.load_state(state)
return f
- def __repr__(self):
- return "<HTTPRequest: {0}>".format(
- # just for visualisation purposes we use HTTP/1 protocol here
- http.http1.HTTP1Protocol._assemble_request_first_line(self)[:-9]
- )
-
@classmethod
def from_protocol(
self,
@@ -281,211 +270,26 @@ class HTTPRequest(MessageMixin, semantics.Request):
req.timestamp_end,
)
+ @classmethod
+ def wrap(self, request):
+ return HTTPRequest(
+ form_in=request.form_in,
+ method=request.method,
+ scheme=request.scheme,
+ host=request.host,
+ port=request.port,
+ path=request.path,
+ httpversion=request.httpversion,
+ headers=request.headers,
+ body=request.body,
+ timestamp_start=request.timestamp_start,
+ timestamp_end=request.timestamp_end,
+ form_out=(request.form_out if hasattr(request, 'form_out') else None),
+ )
def __hash__(self):
return id(self)
- def anticache(self):
- """
- Modifies this request to remove headers that might produce a cached
- response. That is, we remove ETags and If-Modified-Since headers.
- """
- delheaders = [
- "if-modified-since",
- "if-none-match",
- ]
- for i in delheaders:
- del self.headers[i]
-
- def anticomp(self):
- """
- Modifies this request to remove headers that will compress the
- resource's data.
- """
- self.headers["accept-encoding"] = ["identity"]
-
- def constrain_encoding(self):
- """
- Limits the permissible Accept-Encoding values, based on what we can
- decode appropriately.
- """
- if self.headers["accept-encoding"]:
- self.headers["accept-encoding"] = [
- ', '.join(
- e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0])]
-
- def update_host_header(self):
- """
- Update the host header to reflect the current target.
- """
- self.headers["Host"] = [self.host]
-
- def get_form(self):
- """
- Retrieves the URL-encoded or multipart form data, returning an ODict object.
- Returns an empty ODict if there is no data or the content-type
- indicates non-form data.
- """
- if self.body:
- if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
- return self.get_form_urlencoded()
- elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True):
- return self.get_form_multipart()
- return odict.ODict([])
-
- def get_form_urlencoded(self):
- """
- Retrieves the URL-encoded form data, returning an ODict object.
- Returns an empty ODict if there is no data or the content-type
- indicates non-form data.
- """
- if self.body and self.headers.in_any(
- "content-type",
- HDR_FORM_URLENCODED,
- True):
- return odict.ODict(utils.urldecode(self.body))
- return odict.ODict([])
-
- def get_form_multipart(self):
- if self.body and self.headers.in_any(
- "content-type",
- HDR_FORM_MULTIPART,
- True):
- return odict.ODict(
- utils.multipartdecode(
- self.headers,
- self.body))
- return odict.ODict([])
-
- def set_form_urlencoded(self, odict):
- """
- Sets the body to the URL-encoded form data, and adds the
- appropriate content-type header. Note that this will destory the
- existing body if there is one.
- """
- # FIXME: If there's an existing content-type header indicating a
- # url-encoded form, leave it alone.
- self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
- self.body = utils.urlencode(odict.lst)
-
- def get_path_components(self):
- """
- Returns the path components of the URL as a list of strings.
-
- Components are unquoted.
- """
- _, _, path, _, _, _ = urlparse.urlparse(self.url)
- return [urllib.unquote(i) for i in path.split("/") if i]
-
- def set_path_components(self, lst):
- """
- Takes a list of strings, and sets the path component of the URL.
-
- Components are quoted.
- """
- lst = [urllib.quote(i, safe="") for i in lst]
- path = "/" + "/".join(lst)
- scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url)
- self.url = urlparse.urlunparse(
- [scheme, netloc, path, params, query, fragment]
- )
-
- def get_query(self):
- """
- Gets the request query string. Returns an ODict object.
- """
- _, _, _, _, query, _ = urlparse.urlparse(self.url)
- if query:
- return odict.ODict(utils.urldecode(query))
- return odict.ODict([])
-
- def set_query(self, odict):
- """
- Takes an ODict object, and sets the request query string.
- """
- scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url)
- query = utils.urlencode(odict.lst)
- self.url = urlparse.urlunparse(
- [scheme, netloc, path, params, query, fragment]
- )
-
- def pretty_host(self, hostheader):
- """
- Heuristic to get the host of the request.
-
- Note that pretty_host() does not always return the TCP destination
- of the request, e.g. if an upstream proxy is in place
-
- If hostheader is set to True, the Host: header will be used as
- additional (and preferred) data source. This is handy in
- transparent mode, where only the IO of the destination is known,
- but not the resolved name. This is disabled by default, as an
- attacker may spoof the host header to confuse an analyst.
- """
- host = None
- if hostheader:
- host = self.headers.get_first("host")
- if not host:
- host = self.host
- if host:
- try:
- return host.encode("idna")
- except ValueError:
- return host
- else:
- return None
-
- def pretty_url(self, hostheader):
- if self.form_out == "authority": # upstream proxy mode
- return "%s:%s" % (self.pretty_host(hostheader), self.port)
- return utils.unparse_url(self.scheme,
- self.pretty_host(hostheader),
- self.port,
- self.path).encode('ascii')
-
- @property
- def url(self):
- """
- Returns a URL string, constructed from the Request's URL components.
- """
- return utils.unparse_url(
- self.scheme,
- self.host,
- self.port,
- self.path
- ).encode('ascii')
-
- @url.setter
- def url(self, url):
- """
- Parses a URL specification, and updates the Request's information
- accordingly.
-
- Returns False if the URL was invalid, True if the request succeeded.
- """
- parts = http.parse_url(url)
- if not parts:
- raise ValueError("Invalid URL: %s" % url)
- self.scheme, self.host, self.port, self.path = parts
-
- def get_cookies(self):
- """
-
- Returns a possibly empty netlib.odict.ODict object.
- """
- ret = odict.ODict()
- for i in self.headers["cookie"]:
- ret.extend(cookies.parse_cookie_header(i))
- return ret
-
- def set_cookies(self, odict):
- """
- Takes an netlib.odict.ODict object. Over-writes any existing Cookie
- headers.
- """
- v = cookies.format_cookie_header(odict)
- self.headers["Cookie"] = [v]
-
def replace(self, pattern, repl, *args, **kwargs):
"""
Replaces a regular expression pattern with repl in the headers, the
@@ -552,7 +356,7 @@ class HTTPResponse(MessageMixin, semantics.Response):
_stateobject_attributes = MessageMixin._stateobject_attributes.copy()
_stateobject_attributes.update(
- code=int,
+ status_code=int,
msg=str
)
@@ -567,20 +371,6 @@ class HTTPResponse(MessageMixin, semantics.Response):
f.load_state(state)
return f
- def __repr__(self):
- if self.body:
- size = netlib.utils.pretty_size(len(self.body))
- else:
- size = "content missing"
- return "<HTTPResponse: {status_code} {msg} ({contenttype}, {size})>".format(
- status_code=self.status_code,
- msg=self.msg,
- contenttype=self.headers.get_first(
- "content-type", "unknown content type"
- ),
- size=size
- )
-
@classmethod
def from_protocol(
self,
@@ -605,6 +395,18 @@ class HTTPResponse(MessageMixin, semantics.Response):
resp.timestamp_end,
)
+ @classmethod
+ def wrap(self, response):
+ return HTTPResponse(
+ httpversion=response.httpversion,
+ status_code=response.status_code,
+ msg=response.msg,
+ headers=response.headers,
+ body=response.body,
+ timestamp_start=response.timestamp_start,
+ timestamp_end=response.timestamp_end,
+ )
+
def _refresh_cookie(self, c, delta):
"""
Takes a cookie string c and a time delta in seconds, and returns
@@ -654,38 +456,3 @@ class HTTPResponse(MessageMixin, semantics.Response):
c.append(self._refresh_cookie(i, delta))
if c:
self.headers["set-cookie"] = c
-
- def get_cookies(self):
- """
- Get the contents of all Set-Cookie headers.
-
- Returns a possibly empty ODict, where keys are cookie name strings,
- and values are [value, attr] lists. Value is a string, and attr is
- an ODictCaseless containing cookie attributes. Within attrs, unary
- attributes (e.g. HTTPOnly) are indicated by a Null value.
- """
- ret = []
- for header in self.headers["set-cookie"]:
- v = http.cookies.parse_set_cookie_header(header)
- if v:
- name, value, attrs = v
- ret.append([name, [value, attrs]])
- return odict.ODict(ret)
-
- def set_cookies(self, odict):
- """
- Set the Set-Cookie headers on this response, over-writing existing
- headers.
-
- Accepts an ODict of the same format as that returned by get_cookies.
- """
- values = []
- for i in odict.lst:
- values.append(
- http.cookies.format_set_cookie_header(
- i[0],
- i[1][0],
- i[1][1]
- )
- )
- self.headers["Set-Cookie"] = values