diff options
| author | Thomas Kriechbaumer <thomas@kriechbaumer.name> | 2015-08-01 10:40:19 +0200 | 
|---|---|---|
| committer | Thomas Kriechbaumer <thomas@kriechbaumer.name> | 2015-08-01 14:37:32 +0200 | 
| commit | db02553e2afee044faac898d12bd8d1adadbcd21 (patch) | |
| tree | 1e5910599b7b47c527528f03f896efd2fbf7c907 /libmproxy | |
| parent | 8a051511706e2e62c32e0f70e05ecab11d444b6f (diff) | |
| download | mitmproxy-db02553e2afee044faac898d12bd8d1adadbcd21.tar.gz mitmproxy-db02553e2afee044faac898d12bd8d1adadbcd21.tar.bz2 mitmproxy-db02553e2afee044faac898d12bd8d1adadbcd21.zip | |
move code from mitmproxy to netlib
Diffstat (limited to 'libmproxy')
| -rw-r--r-- | libmproxy/cmdline.py | 5 | ||||
| -rw-r--r-- | libmproxy/console/contentview.py | 2 | ||||
| -rw-r--r-- | libmproxy/console/flowlist.py | 7 | ||||
| -rw-r--r-- | libmproxy/encoding.py | 82 | ||||
| -rw-r--r-- | libmproxy/protocol/http.py | 37 | ||||
| -rw-r--r-- | libmproxy/protocol/http_wrappers.py | 295 | ||||
| -rw-r--r-- | libmproxy/utils.py | 22 | 
7 files changed, 58 insertions, 392 deletions
| diff --git a/libmproxy/cmdline.py b/libmproxy/cmdline.py index 08639f6d..d033fb76 100644 --- a/libmproxy/cmdline.py +++ b/libmproxy/cmdline.py @@ -2,7 +2,10 @@ from __future__ import absolute_import  import os  import re  import configargparse +  from netlib import http +import netlib.utils +  from . import filt, utils, version  from .proxy import config @@ -100,7 +103,7 @@ def parse_setheader(s):  def parse_server_spec(url): -    p = http.parse_url(url) +    p = netlib.utils.parse_url(url)      if not p or not p[1] or p[0] not in ("http", "https"):          raise configargparse.ArgumentTypeError(              "Invalid server specification: %s" % url diff --git a/libmproxy/console/contentview.py b/libmproxy/console/contentview.py index e4ffcd47..8f18ea7a 100644 --- a/libmproxy/console/contentview.py +++ b/libmproxy/console/contentview.py @@ -225,7 +225,7 @@ class ViewURLEncoded:      content_types = ["application/x-www-form-urlencoded"]      def __call__(self, hdrs, content, limit): -        lines = utils.urldecode(content) +        lines = netlib.utils.urldecode(content)          if lines:              body = common.format_keyvals(                  [(k + ":", v) for (k, v) in lines], diff --git a/libmproxy/console/flowlist.py b/libmproxy/console/flowlist.py index 46cd0de1..2b77f4a3 100644 --- a/libmproxy/console/flowlist.py +++ b/libmproxy/console/flowlist.py @@ -1,6 +1,9 @@  from __future__ import absolute_import  import urwid +  from netlib import http +import netlib.utils +  from . import common, signals @@ -219,7 +222,7 @@ class ConnectionItem(urwid.WidgetWrap):          elif key == "U":              for f in self.state.flows:                  self.state.set_flow_marked(f, False) -            signals.flowlist_change.send(self)        +            signals.flowlist_change.send(self)          elif key == "V":              if not self.flow.modified():                  signals.status_message.send(message="Flow not modified.") @@ -321,7 +324,7 @@ class FlowListBox(urwid.ListBox):          )      def new_request(self, url, method): -        parts = http.parse_url(str(url)) +        parts = netlib.utils.parse_url(str(url))          if not parts:              signals.status_message.send(message="Invalid Url")              return diff --git a/libmproxy/encoding.py b/libmproxy/encoding.py deleted file mode 100644 index f107eb5f..00000000 --- a/libmproxy/encoding.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -    Utility functions for decoding response bodies. -""" -from __future__ import absolute_import -import cStringIO -import gzip -import zlib - -__ALL__ = ["ENCODINGS"] - -ENCODINGS = set(["identity", "gzip", "deflate"]) - - -def decode(e, content): -    encoding_map = { -        "identity": identity, -        "gzip": decode_gzip, -        "deflate": decode_deflate, -    } -    if e not in encoding_map: -        return None -    return encoding_map[e](content) - - -def encode(e, content): -    encoding_map = { -        "identity": identity, -        "gzip": encode_gzip, -        "deflate": encode_deflate, -    } -    if e not in encoding_map: -        return None -    return encoding_map[e](content) - - -def identity(content): -    """ -        Returns content unchanged. Identity is the default value of -        Accept-Encoding headers. -    """ -    return content - - -def decode_gzip(content): -    gfile = gzip.GzipFile(fileobj=cStringIO.StringIO(content)) -    try: -        return gfile.read() -    except (IOError, EOFError): -        return None - - -def encode_gzip(content): -    s = cStringIO.StringIO() -    gf = gzip.GzipFile(fileobj=s, mode='wb') -    gf.write(content) -    gf.close() -    return s.getvalue() - - -def decode_deflate(content): -    """ -        Returns decompressed data for DEFLATE. Some servers may respond with -        compressed data without a zlib header or checksum. An undocumented -        feature of zlib permits the lenient decompression of data missing both -        values. - -        http://bugs.python.org/issue5784 -    """ -    try: -        try: -            return zlib.decompress(content) -        except zlib.error: -            return zlib.decompress(content, -15) -    except zlib.error: -        return None - - -def encode_deflate(content): -    """ -        Returns compressed content, always including zlib header and checksum. -    """ -    return zlib.compress(content) diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 35fd7d28..961e50a6 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -19,9 +19,6 @@ from .. import encoding, utils, controller, stateobject, proxy  from .http_wrappers import decoded, HTTPRequest, HTTPResponse -HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" -HDR_FORM_MULTIPART = "multipart/form-data" -  class KillSignal(Exception):      pass @@ -39,7 +36,10 @@ def send_connect_request(conn, host, port, update_state=True):          odict.ODictCaseless(),          ""      ) + +    # we currently only support HTTP/1 CONNECT requests      protocol = http1.HTTP1Protocol(conn) +      conn.send(protocol.assemble(upstream_request))      resp = HTTPResponse.from_protocol(protocol, upstream_request.method)      if resp.status_code != 200: @@ -144,18 +144,6 @@ class HTTPFlow(Flow):          return c -class HttpAuthenticationError(Exception): -    def __init__(self, auth_headers=None): -        super(HttpAuthenticationError, self).__init__( -            "Proxy Authentication Required" -        ) -        self.headers = auth_headers -        self.code = 407 - -    def __repr__(self): -        return "Proxy Authentication Required" - -  class HTTPHandler(ProtocolHandler):      """      HTTPHandler implements mitmproxys understanding of the HTTP protocol. @@ -179,7 +167,7 @@ class HTTPHandler(ProtocolHandler):              try:                  if not self.c.server_conn.protocol:                      # instantiate new protocol if connection does not have one yet -                    self.c.server_conn.protocol = http2.HTTP2Protocol(self.c.server_conn) +                    self.c.server_conn.protocol = http2.HTTP2Protocol(self.c.server_conn)  # TODO: select correct protocol                      self.c.server_conn.protocol.perform_connection_preface()                  self.c.server_conn.send(self.c.server_conn.protocol.assemble(flow.request)) @@ -225,6 +213,7 @@ class HTTPHandler(ProtocolHandler):                  flow.response.content = CONTENT_MISSING              else:                  if isinstance(flow.server_conn.protocol, http1.HTTP1Protocol): +                    # streaming is only supported with HTTP/1 at the moment                      flow.response.content = flow.server_conn.protocol.read_http_body(                          flow.response.headers,                          self.c.config.body_size_limit, @@ -241,6 +230,7 @@ class HTTPHandler(ProtocolHandler):              try:                  if not flow.client_conn.protocol:                      # instantiate new protocol if connection does not have one yet +                    # the first request might be a CONNECT - which is currently only supported with HTTP/1                      flow.client_conn.protocol = http1.HTTP1Protocol(self.c.client_conn)                  req = HTTPRequest.from_protocol( @@ -258,8 +248,8 @@ class HTTPHandler(ProtocolHandler):              )              ret = self.process_request(flow, req)              if ret: -                # CONNECT successful - upgrade to HTTP/2                  # instantiate new protocol if connection does not have one yet +                # TODO: select correct protocol                  flow.client_conn.protocol = http2.HTTP2Protocol(self.c.client_conn, is_server=True)              if ret is not None:                  return ret @@ -329,7 +319,7 @@ class HTTPHandler(ProtocolHandler):              return True  # Next flow please.          except ( -                HttpAuthenticationError, +                http.HttpAuthenticationError,                  http.HttpError,                  proxy.ProxyError,                  tcp.NetLibError, @@ -389,6 +379,7 @@ class HTTPHandler(ProtocolHandler):              pass      def send_error(self, code, message, headers): +        # TODO: implement this again          raise NotImplementedError("todo - adapt for HTTP/2 - make use of make_error_reponse from pathod")          # response = http.status_codes.RESPONSES.get(code, "Unknown")          # html_content = """ @@ -457,6 +448,9 @@ class HTTPHandler(ProtocolHandler):                  self.c.set_server_address((request.host, request.port))                  # Update server_conn attribute on the flow                  flow.server_conn = self.c.server_conn + +                # since we currently only support HTTP/1 CONNECT requests +                # the response must be HTTP/1 as well                  self.c.client_conn.send(                      ('HTTP/%s.%s 200 ' % (request.httpversion[0], request.httpversion[1])) +                      'Connection established\r\n' + @@ -495,7 +489,7 @@ class HTTPHandler(ProtocolHandler):                              400,                              "Invalid request: No host information"                          ) -                    p = http.parse_url("http://" + h) +                    p = netlib.utils.parse_url("http://" + h)                      request.scheme = p[0]                      request.host = p[1]                      request.port = p[2] @@ -602,6 +596,9 @@ class HTTPHandler(ProtocolHandler):              Checks if the connection should be closed depending on the HTTP              semantics. Returns True, if so.          """ + +        # TODO: add logic for HTTP/2 +          close_connection = (              http1.HTTP1Protocol.connection_close(                  flow.request.httpversion, @@ -684,7 +681,7 @@ class HTTPHandler(ProtocolHandler):              if self.c.config.authenticator.authenticate(request.headers):                  self.c.config.authenticator.clean(request.headers)              else: -                raise HttpAuthenticationError( +                raise http.HttpAuthenticationError(                      self.c.config.authenticator.auth_challenge_headers())          return request.headers diff --git a/libmproxy/protocol/http_wrappers.py b/libmproxy/protocol/http_wrappers.py index 18a355dc..758ebfe0 100644 --- a/libmproxy/protocol/http_wrappers.py +++ b/libmproxy/protocol/http_wrappers.py @@ -8,18 +8,13 @@ import urlparse  from email.utils import parsedate_tz, formatdate, mktime_tz  import netlib -from netlib import http, tcp, odict, utils +from netlib import http, tcp, odict, utils, encoding  from netlib.http import cookies, semantics, http1  from .tcp import TCPHandler  from .primitives import KILL, ProtocolHandler, Flow, Error  from ..proxy.connection import ServerConnection -from .. import encoding, utils, controller, stateobject, proxy - - -HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" -HDR_FORM_MULTIPART = "multipart/form-data" -CONTENT_MISSING = 0 +from .. import utils, controller, stateobject, proxy  class decoded(object): @@ -249,12 +244,6 @@ class HTTPRequest(MessageMixin, semantics.Request):          f.load_state(state)          return f -    def __repr__(self): -        return "<HTTPRequest: {0}>".format( -            # just for visualisation purposes we use HTTP/1 protocol here -            http.http1.HTTP1Protocol._assemble_request_first_line(self)[:-9] -        ) -      @classmethod      def from_protocol(              self, @@ -281,211 +270,26 @@ class HTTPRequest(MessageMixin, semantics.Request):              req.timestamp_end,          ) +    @classmethod +    def wrap(self, request): +        return HTTPRequest( +            form_in=request.form_in, +            method=request.method, +            scheme=request.scheme, +            host=request.host, +            port=request.port, +            path=request.path, +            httpversion=request.httpversion, +            headers=request.headers, +            body=request.body, +            timestamp_start=request.timestamp_start, +            timestamp_end=request.timestamp_end, +            form_out=(request.form_out if hasattr(request, 'form_out') else None), +        )      def __hash__(self):          return id(self) -    def anticache(self): -        """ -            Modifies this request to remove headers that might produce a cached -            response. That is, we remove ETags and If-Modified-Since headers. -        """ -        delheaders = [ -            "if-modified-since", -            "if-none-match", -        ] -        for i in delheaders: -            del self.headers[i] - -    def anticomp(self): -        """ -            Modifies this request to remove headers that will compress the -            resource's data. -        """ -        self.headers["accept-encoding"] = ["identity"] - -    def constrain_encoding(self): -        """ -            Limits the permissible Accept-Encoding values, based on what we can -            decode appropriately. -        """ -        if self.headers["accept-encoding"]: -            self.headers["accept-encoding"] = [ -                ', '.join( -                    e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0])] - -    def update_host_header(self): -        """ -            Update the host header to reflect the current target. -        """ -        self.headers["Host"] = [self.host] - -    def get_form(self): -        """ -            Retrieves the URL-encoded or multipart form data, returning an ODict object. -            Returns an empty ODict if there is no data or the content-type -            indicates non-form data. -        """ -        if self.body: -            if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): -                return self.get_form_urlencoded() -            elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): -                return self.get_form_multipart() -        return odict.ODict([]) - -    def get_form_urlencoded(self): -        """ -            Retrieves the URL-encoded form data, returning an ODict object. -            Returns an empty ODict if there is no data or the content-type -            indicates non-form data. -        """ -        if self.body and self.headers.in_any( -                "content-type", -                HDR_FORM_URLENCODED, -                True): -            return odict.ODict(utils.urldecode(self.body)) -        return odict.ODict([]) - -    def get_form_multipart(self): -        if self.body and self.headers.in_any( -                "content-type", -                HDR_FORM_MULTIPART, -                True): -            return odict.ODict( -                utils.multipartdecode( -                    self.headers, -                    self.body)) -        return odict.ODict([]) - -    def set_form_urlencoded(self, odict): -        """ -            Sets the body to the URL-encoded form data, and adds the -            appropriate content-type header. Note that this will destory the -            existing body if there is one. -        """ -        # FIXME: If there's an existing content-type header indicating a -        # url-encoded form, leave it alone. -        self.headers["Content-Type"] = [HDR_FORM_URLENCODED] -        self.body = utils.urlencode(odict.lst) - -    def get_path_components(self): -        """ -            Returns the path components of the URL as a list of strings. - -            Components are unquoted. -        """ -        _, _, path, _, _, _ = urlparse.urlparse(self.url) -        return [urllib.unquote(i) for i in path.split("/") if i] - -    def set_path_components(self, lst): -        """ -            Takes a list of strings, and sets the path component of the URL. - -            Components are quoted. -        """ -        lst = [urllib.quote(i, safe="") for i in lst] -        path = "/" + "/".join(lst) -        scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.url) -        self.url = urlparse.urlunparse( -            [scheme, netloc, path, params, query, fragment] -        ) - -    def get_query(self): -        """ -            Gets the request query string. Returns an ODict object. -        """ -        _, _, _, _, query, _ = urlparse.urlparse(self.url) -        if query: -            return odict.ODict(utils.urldecode(query)) -        return odict.ODict([]) - -    def set_query(self, odict): -        """ -            Takes an ODict object, and sets the request query string. -        """ -        scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.url) -        query = utils.urlencode(odict.lst) -        self.url = urlparse.urlunparse( -            [scheme, netloc, path, params, query, fragment] -        ) - -    def pretty_host(self, hostheader): -        """ -            Heuristic to get the host of the request. - -            Note that pretty_host() does not always return the TCP destination -            of the request, e.g. if an upstream proxy is in place - -            If hostheader is set to True, the Host: header will be used as -            additional (and preferred) data source. This is handy in -            transparent mode, where only the IO of the destination is known, -            but not the resolved name. This is disabled by default, as an -            attacker may spoof the host header to confuse an analyst. -        """ -        host = None -        if hostheader: -            host = self.headers.get_first("host") -        if not host: -            host = self.host -        if host: -            try: -                return host.encode("idna") -            except ValueError: -                return host -        else: -            return None - -    def pretty_url(self, hostheader): -        if self.form_out == "authority":  # upstream proxy mode -            return "%s:%s" % (self.pretty_host(hostheader), self.port) -        return utils.unparse_url(self.scheme, -                                 self.pretty_host(hostheader), -                                 self.port, -                                 self.path).encode('ascii') - -    @property -    def url(self): -        """ -            Returns a URL string, constructed from the Request's URL components. -        """ -        return utils.unparse_url( -            self.scheme, -            self.host, -            self.port, -            self.path -        ).encode('ascii') - -    @url.setter -    def url(self, url): -        """ -            Parses a URL specification, and updates the Request's information -            accordingly. - -            Returns False if the URL was invalid, True if the request succeeded. -        """ -        parts = http.parse_url(url) -        if not parts: -            raise ValueError("Invalid URL: %s" % url) -        self.scheme, self.host, self.port, self.path = parts - -    def get_cookies(self): -        """ - -            Returns a possibly empty netlib.odict.ODict object. -        """ -        ret = odict.ODict() -        for i in self.headers["cookie"]: -            ret.extend(cookies.parse_cookie_header(i)) -        return ret - -    def set_cookies(self, odict): -        """ -            Takes an netlib.odict.ODict object. Over-writes any existing Cookie -            headers. -        """ -        v = cookies.format_cookie_header(odict) -        self.headers["Cookie"] = [v] -      def replace(self, pattern, repl, *args, **kwargs):          """              Replaces a regular expression pattern with repl in the headers, the @@ -552,7 +356,7 @@ class HTTPResponse(MessageMixin, semantics.Response):      _stateobject_attributes = MessageMixin._stateobject_attributes.copy()      _stateobject_attributes.update( -        code=int, +        status_code=int,          msg=str      ) @@ -567,20 +371,6 @@ class HTTPResponse(MessageMixin, semantics.Response):          f.load_state(state)          return f -    def __repr__(self): -        if self.body: -            size = netlib.utils.pretty_size(len(self.body)) -        else: -            size = "content missing" -        return "<HTTPResponse: {status_code} {msg} ({contenttype}, {size})>".format( -            status_code=self.status_code, -            msg=self.msg, -            contenttype=self.headers.get_first( -                "content-type", "unknown content type" -            ), -            size=size -        ) -      @classmethod      def from_protocol(              self, @@ -605,6 +395,18 @@ class HTTPResponse(MessageMixin, semantics.Response):              resp.timestamp_end,          ) +    @classmethod +    def wrap(self, response): +        return HTTPResponse( +            httpversion=response.httpversion, +            status_code=response.status_code, +            msg=response.msg, +            headers=response.headers, +            body=response.body, +            timestamp_start=response.timestamp_start, +            timestamp_end=response.timestamp_end, +        ) +      def _refresh_cookie(self, c, delta):          """              Takes a cookie string c and a time delta in seconds, and returns @@ -654,38 +456,3 @@ class HTTPResponse(MessageMixin, semantics.Response):              c.append(self._refresh_cookie(i, delta))          if c:              self.headers["set-cookie"] = c - -    def get_cookies(self): -        """ -            Get the contents of all Set-Cookie headers. - -            Returns a possibly empty ODict, where keys are cookie name strings, -            and values are [value, attr] lists. Value is a string, and attr is -            an ODictCaseless containing cookie attributes. Within attrs, unary -            attributes (e.g. HTTPOnly) are indicated by a Null value. -        """ -        ret = [] -        for header in self.headers["set-cookie"]: -            v = http.cookies.parse_set_cookie_header(header) -            if v: -                name, value, attrs = v -                ret.append([name, [value, attrs]]) -        return odict.ODict(ret) - -    def set_cookies(self, odict): -        """ -            Set the Set-Cookie headers on this response, over-writing existing -            headers. - -            Accepts an ODict of the same format as that returned by get_cookies. -        """ -        values = [] -        for i in odict.lst: -            values.append( -                http.cookies.format_set_cookie_header( -                    i[0], -                    i[1][0], -                    i[1][1] -                ) -            ) -        self.headers["Set-Cookie"] = values diff --git a/libmproxy/utils.py b/libmproxy/utils.py index 78f74767..22ab4344 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -61,21 +61,6 @@ def pretty_json(s):      return json.dumps(p, sort_keys=True, indent=4).split("\n") -def urldecode(s): -    """ -        Takes a urlencoded string and returns a list of (key, value) tuples. -    """ -    return cgi.parse_qsl(s, keep_blank_values=True) - - -def urlencode(s): -    """ -        Takes a list of (key, value) tuples and returns a urlencoded string. -    """ -    s = [tuple(i) for i in s] -    return urllib.urlencode(s, False) - -  def multipartdecode(hdrs, content):      """          Takes a multipart boundary encoded string and returns list of (key, value) tuples. @@ -197,13 +182,6 @@ def parse_content_type(c): -def unparse_url(scheme, host, port, path=""): -    """ -        Returns a URL string, constructed from the specified compnents. -    """ -    return "%s://%s%s" % (scheme, netlib.utils.hostport(scheme, host, port), path) - -  def clean_hanging_newline(t):      """          Many editors will silently add a newline to the final line of a | 
