From db02553e2afee044faac898d12bd8d1adadbcd21 Mon Sep 17 00:00:00 2001 From: Thomas Kriechbaumer Date: Sat, 1 Aug 2015 10:40:19 +0200 Subject: move code from mitmproxy to netlib --- libmproxy/protocol/http.py | 37 +++-- libmproxy/protocol/http_wrappers.py | 295 ++++-------------------------------- 2 files changed, 48 insertions(+), 284 deletions(-) (limited to 'libmproxy/protocol') diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 35fd7d28..961e50a6 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -19,9 +19,6 @@ from .. import encoding, utils, controller, stateobject, proxy from .http_wrappers import decoded, HTTPRequest, HTTPResponse -HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" -HDR_FORM_MULTIPART = "multipart/form-data" - class KillSignal(Exception): pass @@ -39,7 +36,10 @@ def send_connect_request(conn, host, port, update_state=True): odict.ODictCaseless(), "" ) + + # we currently only support HTTP/1 CONNECT requests protocol = http1.HTTP1Protocol(conn) + conn.send(protocol.assemble(upstream_request)) resp = HTTPResponse.from_protocol(protocol, upstream_request.method) if resp.status_code != 200: @@ -144,18 +144,6 @@ class HTTPFlow(Flow): return c -class HttpAuthenticationError(Exception): - def __init__(self, auth_headers=None): - super(HttpAuthenticationError, self).__init__( - "Proxy Authentication Required" - ) - self.headers = auth_headers - self.code = 407 - - def __repr__(self): - return "Proxy Authentication Required" - - class HTTPHandler(ProtocolHandler): """ HTTPHandler implements mitmproxys understanding of the HTTP protocol. @@ -179,7 +167,7 @@ class HTTPHandler(ProtocolHandler): try: if not self.c.server_conn.protocol: # instantiate new protocol if connection does not have one yet - self.c.server_conn.protocol = http2.HTTP2Protocol(self.c.server_conn) + self.c.server_conn.protocol = http2.HTTP2Protocol(self.c.server_conn) # TODO: select correct protocol self.c.server_conn.protocol.perform_connection_preface() self.c.server_conn.send(self.c.server_conn.protocol.assemble(flow.request)) @@ -225,6 +213,7 @@ class HTTPHandler(ProtocolHandler): flow.response.content = CONTENT_MISSING else: if isinstance(flow.server_conn.protocol, http1.HTTP1Protocol): + # streaming is only supported with HTTP/1 at the moment flow.response.content = flow.server_conn.protocol.read_http_body( flow.response.headers, self.c.config.body_size_limit, @@ -241,6 +230,7 @@ class HTTPHandler(ProtocolHandler): try: if not flow.client_conn.protocol: # instantiate new protocol if connection does not have one yet + # the first request might be a CONNECT - which is currently only supported with HTTP/1 flow.client_conn.protocol = http1.HTTP1Protocol(self.c.client_conn) req = HTTPRequest.from_protocol( @@ -258,8 +248,8 @@ class HTTPHandler(ProtocolHandler): ) ret = self.process_request(flow, req) if ret: - # CONNECT successful - upgrade to HTTP/2 # instantiate new protocol if connection does not have one yet + # TODO: select correct protocol flow.client_conn.protocol = http2.HTTP2Protocol(self.c.client_conn, is_server=True) if ret is not None: return ret @@ -329,7 +319,7 @@ class HTTPHandler(ProtocolHandler): return True # Next flow please. except ( - HttpAuthenticationError, + http.HttpAuthenticationError, http.HttpError, proxy.ProxyError, tcp.NetLibError, @@ -389,6 +379,7 @@ class HTTPHandler(ProtocolHandler): pass def send_error(self, code, message, headers): + # TODO: implement this again raise NotImplementedError("todo - adapt for HTTP/2 - make use of make_error_reponse from pathod") # response = http.status_codes.RESPONSES.get(code, "Unknown") # html_content = """ @@ -457,6 +448,9 @@ class HTTPHandler(ProtocolHandler): self.c.set_server_address((request.host, request.port)) # Update server_conn attribute on the flow flow.server_conn = self.c.server_conn + + # since we currently only support HTTP/1 CONNECT requests + # the response must be HTTP/1 as well self.c.client_conn.send( ('HTTP/%s.%s 200 ' % (request.httpversion[0], request.httpversion[1])) + 'Connection established\r\n' + @@ -495,7 +489,7 @@ class HTTPHandler(ProtocolHandler): 400, "Invalid request: No host information" ) - p = http.parse_url("http://" + h) + p = netlib.utils.parse_url("http://" + h) request.scheme = p[0] request.host = p[1] request.port = p[2] @@ -602,6 +596,9 @@ class HTTPHandler(ProtocolHandler): Checks if the connection should be closed depending on the HTTP semantics. Returns True, if so. """ + + # TODO: add logic for HTTP/2 + close_connection = ( http1.HTTP1Protocol.connection_close( flow.request.httpversion, @@ -684,7 +681,7 @@ class HTTPHandler(ProtocolHandler): if self.c.config.authenticator.authenticate(request.headers): self.c.config.authenticator.clean(request.headers) else: - raise HttpAuthenticationError( + raise http.HttpAuthenticationError( self.c.config.authenticator.auth_challenge_headers()) return request.headers diff --git a/libmproxy/protocol/http_wrappers.py b/libmproxy/protocol/http_wrappers.py index 18a355dc..758ebfe0 100644 --- a/libmproxy/protocol/http_wrappers.py +++ b/libmproxy/protocol/http_wrappers.py @@ -8,18 +8,13 @@ import urlparse from email.utils import parsedate_tz, formatdate, mktime_tz import netlib -from netlib import http, tcp, odict, utils +from netlib import http, tcp, odict, utils, encoding from netlib.http import cookies, semantics, http1 from .tcp import TCPHandler from .primitives import KILL, ProtocolHandler, Flow, Error from ..proxy.connection import ServerConnection -from .. import encoding, utils, controller, stateobject, proxy - - -HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" -HDR_FORM_MULTIPART = "multipart/form-data" -CONTENT_MISSING = 0 +from .. import utils, controller, stateobject, proxy class decoded(object): @@ -249,12 +244,6 @@ class HTTPRequest(MessageMixin, semantics.Request): f.load_state(state) return f - def __repr__(self): - return "".format( - # just for visualisation purposes we use HTTP/1 protocol here - http.http1.HTTP1Protocol._assemble_request_first_line(self)[:-9] - ) - @classmethod def from_protocol( self, @@ -281,211 +270,26 @@ class HTTPRequest(MessageMixin, semantics.Request): req.timestamp_end, ) + @classmethod + def wrap(self, request): + return HTTPRequest( + form_in=request.form_in, + method=request.method, + scheme=request.scheme, + host=request.host, + port=request.port, + path=request.path, + httpversion=request.httpversion, + headers=request.headers, + body=request.body, + timestamp_start=request.timestamp_start, + timestamp_end=request.timestamp_end, + form_out=(request.form_out if hasattr(request, 'form_out') else None), + ) def __hash__(self): return id(self) - def anticache(self): - """ - Modifies this request to remove headers that might produce a cached - response. That is, we remove ETags and If-Modified-Since headers. - """ - delheaders = [ - "if-modified-since", - "if-none-match", - ] - for i in delheaders: - del self.headers[i] - - def anticomp(self): - """ - Modifies this request to remove headers that will compress the - resource's data. - """ - self.headers["accept-encoding"] = ["identity"] - - def constrain_encoding(self): - """ - Limits the permissible Accept-Encoding values, based on what we can - decode appropriately. - """ - if self.headers["accept-encoding"]: - self.headers["accept-encoding"] = [ - ', '.join( - e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0])] - - def update_host_header(self): - """ - Update the host header to reflect the current target. - """ - self.headers["Host"] = [self.host] - - def get_form(self): - """ - Retrieves the URL-encoded or multipart form data, returning an ODict object. - Returns an empty ODict if there is no data or the content-type - indicates non-form data. - """ - if self.body: - if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): - return self.get_form_urlencoded() - elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): - return self.get_form_multipart() - return odict.ODict([]) - - def get_form_urlencoded(self): - """ - Retrieves the URL-encoded form data, returning an ODict object. - Returns an empty ODict if there is no data or the content-type - indicates non-form data. - """ - if self.body and self.headers.in_any( - "content-type", - HDR_FORM_URLENCODED, - True): - return odict.ODict(utils.urldecode(self.body)) - return odict.ODict([]) - - def get_form_multipart(self): - if self.body and self.headers.in_any( - "content-type", - HDR_FORM_MULTIPART, - True): - return odict.ODict( - utils.multipartdecode( - self.headers, - self.body)) - return odict.ODict([]) - - def set_form_urlencoded(self, odict): - """ - Sets the body to the URL-encoded form data, and adds the - appropriate content-type header. Note that this will destory the - existing body if there is one. - """ - # FIXME: If there's an existing content-type header indicating a - # url-encoded form, leave it alone. - self.headers["Content-Type"] = [HDR_FORM_URLENCODED] - self.body = utils.urlencode(odict.lst) - - def get_path_components(self): - """ - Returns the path components of the URL as a list of strings. - - Components are unquoted. - """ - _, _, path, _, _, _ = urlparse.urlparse(self.url) - return [urllib.unquote(i) for i in path.split("/") if i] - - def set_path_components(self, lst): - """ - Takes a list of strings, and sets the path component of the URL. - - Components are quoted. - """ - lst = [urllib.quote(i, safe="") for i in lst] - path = "/" + "/".join(lst) - scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url) - self.url = urlparse.urlunparse( - [scheme, netloc, path, params, query, fragment] - ) - - def get_query(self): - """ - Gets the request query string. Returns an ODict object. - """ - _, _, _, _, query, _ = urlparse.urlparse(self.url) - if query: - return odict.ODict(utils.urldecode(query)) - return odict.ODict([]) - - def set_query(self, odict): - """ - Takes an ODict object, and sets the request query string. - """ - scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url) - query = utils.urlencode(odict.lst) - self.url = urlparse.urlunparse( - [scheme, netloc, path, params, query, fragment] - ) - - def pretty_host(self, hostheader): - """ - Heuristic to get the host of the request. - - Note that pretty_host() does not always return the TCP destination - of the request, e.g. if an upstream proxy is in place - - If hostheader is set to True, the Host: header will be used as - additional (and preferred) data source. This is handy in - transparent mode, where only the IO of the destination is known, - but not the resolved name. This is disabled by default, as an - attacker may spoof the host header to confuse an analyst. - """ - host = None - if hostheader: - host = self.headers.get_first("host") - if not host: - host = self.host - if host: - try: - return host.encode("idna") - except ValueError: - return host - else: - return None - - def pretty_url(self, hostheader): - if self.form_out == "authority": # upstream proxy mode - return "%s:%s" % (self.pretty_host(hostheader), self.port) - return utils.unparse_url(self.scheme, - self.pretty_host(hostheader), - self.port, - self.path).encode('ascii') - - @property - def url(self): - """ - Returns a URL string, constructed from the Request's URL components. - """ - return utils.unparse_url( - self.scheme, - self.host, - self.port, - self.path - ).encode('ascii') - - @url.setter - def url(self, url): - """ - Parses a URL specification, and updates the Request's information - accordingly. - - Returns False if the URL was invalid, True if the request succeeded. - """ - parts = http.parse_url(url) - if not parts: - raise ValueError("Invalid URL: %s" % url) - self.scheme, self.host, self.port, self.path = parts - - def get_cookies(self): - """ - - Returns a possibly empty netlib.odict.ODict object. - """ - ret = odict.ODict() - for i in self.headers["cookie"]: - ret.extend(cookies.parse_cookie_header(i)) - return ret - - def set_cookies(self, odict): - """ - Takes an netlib.odict.ODict object. Over-writes any existing Cookie - headers. - """ - v = cookies.format_cookie_header(odict) - self.headers["Cookie"] = [v] - def replace(self, pattern, repl, *args, **kwargs): """ Replaces a regular expression pattern with repl in the headers, the @@ -552,7 +356,7 @@ class HTTPResponse(MessageMixin, semantics.Response): _stateobject_attributes = MessageMixin._stateobject_attributes.copy() _stateobject_attributes.update( - code=int, + status_code=int, msg=str ) @@ -567,20 +371,6 @@ class HTTPResponse(MessageMixin, semantics.Response): f.load_state(state) return f - def __repr__(self): - if self.body: - size = netlib.utils.pretty_size(len(self.body)) - else: - size = "content missing" - return "".format( - status_code=self.status_code, - msg=self.msg, - contenttype=self.headers.get_first( - "content-type", "unknown content type" - ), - size=size - ) - @classmethod def from_protocol( self, @@ -605,6 +395,18 @@ class HTTPResponse(MessageMixin, semantics.Response): resp.timestamp_end, ) + @classmethod + def wrap(self, response): + return HTTPResponse( + httpversion=response.httpversion, + status_code=response.status_code, + msg=response.msg, + headers=response.headers, + body=response.body, + timestamp_start=response.timestamp_start, + timestamp_end=response.timestamp_end, + ) + def _refresh_cookie(self, c, delta): """ Takes a cookie string c and a time delta in seconds, and returns @@ -654,38 +456,3 @@ class HTTPResponse(MessageMixin, semantics.Response): c.append(self._refresh_cookie(i, delta)) if c: self.headers["set-cookie"] = c - - def get_cookies(self): - """ - Get the contents of all Set-Cookie headers. - - Returns a possibly empty ODict, where keys are cookie name strings, - and values are [value, attr] lists. Value is a string, and attr is - an ODictCaseless containing cookie attributes. Within attrs, unary - attributes (e.g. HTTPOnly) are indicated by a Null value. - """ - ret = [] - for header in self.headers["set-cookie"]: - v = http.cookies.parse_set_cookie_header(header) - if v: - name, value, attrs = v - ret.append([name, [value, attrs]]) - return odict.ODict(ret) - - def set_cookies(self, odict): - """ - Set the Set-Cookie headers on this response, over-writing existing - headers. - - Accepts an ODict of the same format as that returned by get_cookies. - """ - values = [] - for i in odict.lst: - values.append( - http.cookies.format_set_cookie_header( - i[0], - i[1][0], - i[1][1] - ) - ) - self.headers["Set-Cookie"] = values -- cgit v1.2.3