aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2015-09-16 00:04:23 +0200
committerMaximilian Hils <git@maximilianhils.com>2015-09-16 00:04:23 +0200
commita077d8877d210562f703c23e9625e8467c81222d (patch)
tree47608f9f99d149634f6c5dcb755bdf534a096d45 /netlib
parent11e7f476bd4bbcd6d072fa3659f628ae3a19705d (diff)
downloadmitmproxy-a077d8877d210562f703c23e9625e8467c81222d.tar.gz
mitmproxy-a077d8877d210562f703c23e9625e8467c81222d.tar.bz2
mitmproxy-a077d8877d210562f703c23e9625e8467c81222d.zip
finish netlib.http.http1 refactor
Diffstat (limited to 'netlib')
-rw-r--r--netlib/http/__init__.py6
-rw-r--r--netlib/http/http1/__init__.py4
-rw-r--r--netlib/http/http1/assemble.py8
-rw-r--r--netlib/http/http1/read.py152
-rw-r--r--netlib/http/http2/connections.py4
-rw-r--r--netlib/http/http2/frame.py (renamed from netlib/http/http2/frames.py)39
-rw-r--r--netlib/http/models.py2
-rw-r--r--netlib/tutils.py74
-rw-r--r--netlib/utils.py6
9 files changed, 155 insertions, 140 deletions
diff --git a/netlib/http/__init__.py b/netlib/http/__init__.py
index 0b1a0bc5..9303de09 100644
--- a/netlib/http/__init__.py
+++ b/netlib/http/__init__.py
@@ -1,7 +1,9 @@
-from .models import Request, Response, Headers, CONTENT_MISSING
+from .models import Request, Response, Headers
+from .models import HDR_FORM_MULTIPART, HDR_FORM_URLENCODED, CONTENT_MISSING
from . import http1, http2
__all__ = [
- "Request", "Response", "Headers", "CONTENT_MISSING"
+ "Request", "Response", "Headers",
+ "HDR_FORM_MULTIPART", "HDR_FORM_URLENCODED", "CONTENT_MISSING",
"http1", "http2"
]
diff --git a/netlib/http/http1/__init__.py b/netlib/http/http1/__init__.py
index 4d223f97..a72c2e05 100644
--- a/netlib/http/http1/__init__.py
+++ b/netlib/http/http1/__init__.py
@@ -1,7 +1,7 @@
from .read import (
read_request, read_request_head,
read_response, read_response_head,
- read_message_body, read_message_body_chunked,
+ read_body,
connection_close,
expected_http_body_size,
)
@@ -14,7 +14,7 @@ from .assemble import (
__all__ = [
"read_request", "read_request_head",
"read_response", "read_response_head",
- "read_message_body", "read_message_body_chunked",
+ "read_body",
"connection_close",
"expected_http_body_size",
"assemble_request", "assemble_request_head",
diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py
index a3269eed..47c7e95a 100644
--- a/netlib/http/http1/assemble.py
+++ b/netlib/http/http1/assemble.py
@@ -31,8 +31,6 @@ def assemble_response_head(response):
return b"%s\r\n%s\r\n" % (first_line, headers)
-
-
def _assemble_request_line(request, form=None):
if form is None:
form = request.form_out
@@ -50,7 +48,7 @@ def _assemble_request_line(request, form=None):
request.httpversion
)
elif form == "absolute":
- return b"%s %s://%s:%s%s %s" % (
+ return b"%s %s://%s:%d%s %s" % (
request.method,
request.scheme,
request.host,
@@ -78,11 +76,11 @@ def _assemble_request_headers(request):
if request.body or request.body == b"":
headers[b"Content-Length"] = str(len(request.body)).encode("ascii")
- return str(headers)
+ return bytes(headers)
def _assemble_response_line(response):
- return b"%s %s %s" % (
+ return b"%s %d %s" % (
response.httpversion,
response.status_code,
response.msg,
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
index 573bc739..4c423c4c 100644
--- a/netlib/http/http1/read.py
+++ b/netlib/http/http1/read.py
@@ -7,12 +7,13 @@ from ... import utils
from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException
from .. import Request, Response, Headers
-ALPN_PROTO_HTTP1 = 'http/1.1'
+ALPN_PROTO_HTTP1 = b'http/1.1'
def read_request(rfile, body_size_limit=None):
request = read_request_head(rfile)
- request.body = read_message_body(rfile, request, limit=body_size_limit)
+ expected_body_size = expected_http_body_size(request)
+ request.body = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit))
request.timestamp_end = time.time()
return request
@@ -23,15 +24,14 @@ def read_request_head(rfile):
Args:
rfile: The input stream
- body_size_limit (bool): Maximum body size
Returns:
- The HTTP request object
+ The HTTP request object (without body)
Raises:
- HttpReadDisconnect: If no bytes can be read from rfile.
- HttpSyntaxException: If the input is invalid.
- HttpException: A different error occured.
+ HttpReadDisconnect: No bytes can be read from rfile.
+ HttpSyntaxException: The input is malformed HTTP.
+ HttpException: Any other error occured.
"""
timestamp_start = time.time()
if hasattr(rfile, "reset_timestamps"):
@@ -51,12 +51,28 @@ def read_request_head(rfile):
def read_response(rfile, request, body_size_limit=None):
response = read_response_head(rfile)
- response.body = read_message_body(rfile, request, response, body_size_limit)
+ expected_body_size = expected_http_body_size(request, response)
+ response.body = b"".join(read_body(rfile, expected_body_size, body_size_limit))
response.timestamp_end = time.time()
return response
def read_response_head(rfile):
+ """
+ Parse an HTTP response head (response line + headers) from an input stream
+
+ Args:
+ rfile: The input stream
+
+ Returns:
+ The HTTP request object (without body)
+
+ Raises:
+ HttpReadDisconnect: No bytes can be read from rfile.
+ HttpSyntaxException: The input is malformed HTTP.
+ HttpException: Any other error occured.
+ """
+
timestamp_start = time.time()
if hasattr(rfile, "reset_timestamps"):
rfile.reset_timestamps()
@@ -68,50 +84,33 @@ def read_response_head(rfile):
# more accurate timestamp_start
timestamp_start = rfile.first_byte_timestamp
- return Response(
- http_version,
- status_code,
- message,
- headers,
- None,
- timestamp_start
- )
-
-
-def read_message_body(*args, **kwargs):
- chunks = read_message_body_chunked(*args, **kwargs)
- return b"".join(chunks)
+ return Response(http_version, status_code, message, headers, None, timestamp_start)
-def read_message_body_chunked(rfile, request, response=None, limit=None, max_chunk_size=None):
+def read_body(rfile, expected_size, limit=None, max_chunk_size=4096):
"""
- Read an HTTP message body:
+ Read an HTTP message body
Args:
- If a request body should be read, only request should be passed.
- If a response body should be read, both request and response should be passed.
+ rfile: The input stream
+ expected_size: The expected body size (see :py:meth:`expected_body_size`)
+ limit: Maximum body size
+ max_chunk_size: Maximium chunk size that gets yielded
+
+ Returns:
+ A generator that yields byte chunks of the content.
Raises:
- HttpException
- """
- if not response:
- headers = request.headers
- response_code = None
- is_request = True
- else:
- headers = response.headers
- response_code = response.status_code
- is_request = False
+ HttpException, if an error occurs
+ Caveats:
+ max_chunk_size is not considered if the transfer encoding is chunked.
+ """
if not limit or limit < 0:
limit = sys.maxsize
if not max_chunk_size:
max_chunk_size = limit
- expected_size = expected_http_body_size(
- headers, is_request, request.method, response_code
- )
-
if expected_size is None:
for x in _read_chunked(rfile, limit):
yield x
@@ -125,6 +124,8 @@ def read_message_body_chunked(rfile, request, response=None, limit=None, max_chu
while bytes_left:
chunk_size = min(bytes_left, max_chunk_size)
content = rfile.read(chunk_size)
+ if len(content) < chunk_size:
+ raise HttpException("Unexpected EOF")
yield content
bytes_left -= chunk_size
else:
@@ -148,10 +149,10 @@ def connection_close(http_version, headers):
"""
# At first, check if we have an explicit Connection header.
if b"connection" in headers:
- toks = utils.get_header_tokens(headers, "connection")
- if b"close" in toks:
+ tokens = utils.get_header_tokens(headers, "connection")
+ if b"close" in tokens:
return True
- elif b"keep-alive" in toks:
+ elif b"keep-alive" in tokens:
return False
# If we don't have a Connection header, HTTP 1.1 connections are assumed to
@@ -159,37 +160,41 @@ def connection_close(http_version, headers):
return http_version != (1, 1)
-def expected_http_body_size(
- headers,
- is_request,
- request_method,
- response_code,
-):
+def expected_http_body_size(request, response=False):
"""
- Returns the expected body length:
- - a positive integer, if the size is known in advance
- - None, if the size in unknown in advance (chunked encoding)
- - -1, if all data should be read until end of stream.
+ Returns:
+ The expected body length:
+ - a positive integer, if the size is known in advance
+ - None, if the size in unknown in advance (chunked encoding)
+ - -1, if all data should be read until end of stream.
Raises:
HttpSyntaxException, if the content length header is invalid
"""
# Determine response size according to
# http://tools.ietf.org/html/rfc7230#section-3.3
- if request_method:
- request_method = request_method.upper()
+ if not response:
+ headers = request.headers
+ response_code = None
+ is_request = True
+ else:
+ headers = response.headers
+ response_code = response.status_code
+ is_request = False
- is_empty_response = (not is_request and (
- request_method == b"HEAD" or
- 100 <= response_code <= 199 or
- (response_code == 200 and request_method == b"CONNECT") or
- response_code in (204, 304)
- ))
+ if is_request:
+ if headers.get(b"expect", b"").lower() == b"100-continue":
+ return 0
+ else:
+ if request.method.upper() == b"HEAD":
+ return 0
+ if 100 <= response_code <= 199:
+ return 0
+ if response_code == 200 and request.method.upper() == b"CONNECT":
+ return 0
+ if response_code in (204, 304):
+ return 0
- if is_empty_response:
- return 0
- if is_request and headers.get(b"expect", b"").lower() == b"100-continue":
- return 0
if b"chunked" in headers.get(b"transfer-encoding", b"").lower():
return None
if b"content-length" in headers:
@@ -212,18 +217,22 @@ def _get_first_line(rfile):
line = rfile.readline()
if not line:
raise HttpReadDisconnect()
- return line
+ line = line.strip()
+ try:
+ line.decode("ascii")
+ except ValueError:
+ raise HttpSyntaxException("Non-ascii characters in first line: {}".format(line))
+ return line.strip()
def _read_request_line(rfile):
line = _get_first_line(rfile)
try:
- method, path, http_version = line.strip().split(b" ")
+ method, path, http_version = line.split(b" ")
if path == b"*" or path.startswith(b"/"):
form = "relative"
- path.decode("ascii") # should not raise a ValueError
scheme, host, port = None, None, None
elif method == b"CONNECT":
form = "authority"
@@ -233,6 +242,7 @@ def _read_request_line(rfile):
form = "absolute"
scheme, host, port, path = utils.parse_url(path)
+ _check_http_version(http_version)
except ValueError:
raise HttpSyntaxException("Bad HTTP request line: {}".format(line))
@@ -253,7 +263,7 @@ def _parse_authority_form(hostport):
if not utils.is_valid_host(host) or not utils.is_valid_port(port):
raise ValueError()
except ValueError:
- raise ValueError("Invalid host specification: {}".format(hostport))
+ raise HttpSyntaxException("Invalid host specification: {}".format(hostport))
return host, port
@@ -263,7 +273,7 @@ def _read_response_line(rfile):
try:
- parts = line.strip().split(b" ")
+ parts = line.split(b" ", 2)
if len(parts) == 2: # handle missing message gracefully
parts.append(b"")
@@ -278,7 +288,7 @@ def _read_response_line(rfile):
def _check_http_version(http_version):
- if not re.match(rb"^HTTP/\d\.\d$", http_version):
+ if not re.match(br"^HTTP/\d\.\d$", http_version):
raise HttpSyntaxException("Unknown HTTP version: {}".format(http_version))
@@ -313,7 +323,7 @@ def _read_headers(rfile):
return Headers(ret)
-def _read_chunked(rfile, limit):
+def _read_chunked(rfile, limit=sys.maxsize):
"""
Read a HTTP body with chunked transfer encoding.
diff --git a/netlib/http/http2/connections.py b/netlib/http/http2/connections.py
index b6d376d3..036bf68f 100644
--- a/netlib/http/http2/connections.py
+++ b/netlib/http/http2/connections.py
@@ -4,7 +4,7 @@ import time
from hpack.hpack import Encoder, Decoder
from netlib import http, utils
-from netlib.http import semantics
+from netlib.http import models as semantics
from . import frame
@@ -15,7 +15,7 @@ class TCPHandler(object):
self.wfile = wfile
-class HTTP2Protocol(semantics.ProtocolMixin):
+class HTTP2Protocol(object):
ERROR_CODES = utils.BiDi(
NO_ERROR=0x0,
diff --git a/netlib/http/http2/frames.py b/netlib/http/http2/frame.py
index b36b3adf..cb2cde99 100644
--- a/netlib/http/http2/frames.py
+++ b/netlib/http/http2/frame.py
@@ -1,12 +1,31 @@
-import sys
+from __future__ import absolute_import, print_function, division
import struct
from hpack.hpack import Encoder, Decoder
-from .. import utils
+from ...utils import BiDi
+from ...exceptions import HttpSyntaxException
-class FrameSizeError(Exception):
- pass
+ERROR_CODES = BiDi(
+ NO_ERROR=0x0,
+ PROTOCOL_ERROR=0x1,
+ INTERNAL_ERROR=0x2,
+ FLOW_CONTROL_ERROR=0x3,
+ SETTINGS_TIMEOUT=0x4,
+ STREAM_CLOSED=0x5,
+ FRAME_SIZE_ERROR=0x6,
+ REFUSED_STREAM=0x7,
+ CANCEL=0x8,
+ COMPRESSION_ERROR=0x9,
+ CONNECT_ERROR=0xa,
+ ENHANCE_YOUR_CALM=0xb,
+ INADEQUATE_SECURITY=0xc,
+ HTTP_1_1_REQUIRED=0xd
+)
+
+CLIENT_CONNECTION_PREFACE = b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
+
+ALPN_PROTO_H2 = b'h2'
class Frame(object):
@@ -30,7 +49,9 @@ class Frame(object):
length=0,
flags=FLAG_NO_FLAGS,
stream_id=0x0):
- valid_flags = reduce(lambda x, y: x | y, self.VALID_FLAGS, 0x0)
+ valid_flags = 0
+ for flag in self.VALID_FLAGS:
+ valid_flags |= flag
if flags | valid_flags != valid_flags:
raise ValueError('invalid flags detected.')
@@ -61,7 +82,7 @@ class Frame(object):
SettingsFrame.SETTINGS.SETTINGS_MAX_FRAME_SIZE]
if length > max_frame_size:
- raise FrameSizeError(
+ raise HttpSyntaxException(
"Frame size exceeded: %d, but only %d allowed." % (
length, max_frame_size))
@@ -80,7 +101,7 @@ class Frame(object):
stream_id = fields[4]
if raw_header[:4] == b'HTTP': # pragma no cover
- print >> sys.stderr, "WARNING: This looks like an HTTP/1 connection!"
+ raise HttpSyntaxException("Expected HTTP2 Frame, got HTTP/1 connection")
cls._check_frame_size(length, state)
@@ -339,7 +360,7 @@ class SettingsFrame(Frame):
TYPE = 0x4
VALID_FLAGS = [Frame.FLAG_ACK]
- SETTINGS = utils.BiDi(
+ SETTINGS = BiDi(
SETTINGS_HEADER_TABLE_SIZE=0x1,
SETTINGS_ENABLE_PUSH=0x2,
SETTINGS_MAX_CONCURRENT_STREAMS=0x3,
@@ -366,7 +387,7 @@ class SettingsFrame(Frame):
def from_bytes(cls, state, length, flags, stream_id, payload):
f = cls(state=state, length=length, flags=flags, stream_id=stream_id)
- for i in xrange(0, len(payload), 6):
+ for i in range(0, len(payload), 6):
identifier, value = struct.unpack("!HL", payload[i:i + 6])
f.settings[identifier] = value
diff --git a/netlib/http/models.py b/netlib/http/models.py
index bd5863b1..572d66c9 100644
--- a/netlib/http/models.py
+++ b/netlib/http/models.py
@@ -474,7 +474,6 @@ class Response(object):
msg=None,
headers=None,
body=None,
- sslinfo=None,
timestamp_start=None,
timestamp_end=None,
):
@@ -487,7 +486,6 @@ class Response(object):
self.msg = msg
self.headers = headers
self.body = body
- self.sslinfo = sslinfo
self.timestamp_start = timestamp_start
self.timestamp_end = timestamp_end
diff --git a/netlib/tutils.py b/netlib/tutils.py
index 65c4a313..758f8410 100644
--- a/netlib/tutils.py
+++ b/netlib/tutils.py
@@ -7,13 +7,15 @@ from contextlib import contextmanager
import six
import sys
-from netlib import tcp, utils, http
+from . import utils
+from .http import Request, Response, Headers
def treader(bytes):
"""
Construct a tcp.Read object from bytes.
"""
+ from . import tcp # TODO: move to top once cryptography is on Python 3.5
fp = BytesIO(bytes)
return tcp.Reader(fp)
@@ -91,55 +93,39 @@ class RaisesContext(object):
test_data = utils.Data(__name__)
-def treq(content="content", scheme="http", host="address", port=22):
+def treq(**kwargs):
"""
- @return: libmproxy.protocol.http.HTTPRequest
+ Returns:
+ netlib.http.Request
"""
- headers = http.Headers()
- headers["header"] = "qvalue"
- req = http.Request(
- "relative",
- "GET",
- scheme,
- host,
- port,
- "/path",
- (1, 1),
- headers,
- content,
- None,
- None,
+ default = dict(
+ form_in="relative",
+ method=b"GET",
+ scheme=b"http",
+ host=b"address",
+ port=22,
+ path=b"/path",
+ httpversion=b"HTTP/1.1",
+ headers=Headers(header=b"qvalue"),
+ body=b"content"
)
- return req
+ default.update(kwargs)
+ return Request(**default)
-def treq_absolute(content="content"):
+def tresp(**kwargs):
"""
- @return: libmproxy.protocol.http.HTTPRequest
+ Returns:
+ netlib.http.Response
"""
- r = treq(content)
- r.form_in = r.form_out = "absolute"
- r.host = "address"
- r.port = 22
- r.scheme = "http"
- return r
-
-
-def tresp(content="message"):
- """
- @return: libmproxy.protocol.http.HTTPResponse
- """
-
- headers = http.Headers()
- headers["header_response"] = "svalue"
-
- resp = http.semantics.Response(
- (1, 1),
- 200,
- "OK",
- headers,
- content,
+ default = dict(
+ httpversion=b"HTTP/1.1",
+ status_code=200,
+ msg=b"OK",
+ headers=Headers(header_response=b"svalue"),
+ body=b"message",
timestamp_start=time.time(),
- timestamp_end=time.time(),
+ timestamp_end=time.time()
)
- return resp
+ default.update(kwargs)
+ return Response(**default)
diff --git a/netlib/utils.py b/netlib/utils.py
index fb579cac..a86b8019 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -40,9 +40,9 @@ def clean_bin(s, keep_spacing=True):
)
else:
if keep_spacing:
- keep = b"\n\r\t"
+ keep = (9, 10, 13) # \t, \n, \r,
else:
- keep = b""
+ keep = ()
return b"".join(
six.int2byte(ch) if (31 < ch < 127 or ch in keep) else b"."
for ch in six.iterbytes(s)
@@ -251,7 +251,7 @@ def hostport(scheme, host, port):
if (port, scheme) in [(80, "http"), (443, "https")]:
return host
else:
- return b"%s:%s" % (host, port)
+ return b"%s:%d" % (host, port)
def unparse_url(scheme, host, port, path=""):