aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/http1
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2016-10-20 11:56:38 +1300
committerAldo Cortesi <aldo@nullcube.com>2016-10-20 11:56:38 +1300
commit8430f857b504a3e7406dc36e54dc32783569d0dd (patch)
treed3116cd540faf01f272a0892fc6a9b83b4f6de8a /netlib/http/http1
parent853e03a5e753354fad3a3fa5384ef3a09384ef43 (diff)
downloadmitmproxy-8430f857b504a3e7406dc36e54dc32783569d0dd.tar.gz
mitmproxy-8430f857b504a3e7406dc36e54dc32783569d0dd.tar.bz2
mitmproxy-8430f857b504a3e7406dc36e54dc32783569d0dd.zip
The final piece: netlib -> mitproxy.net
Diffstat (limited to 'netlib/http/http1')
-rw-r--r--netlib/http/http1/__init__.py24
-rw-r--r--netlib/http/http1/assemble.py100
-rw-r--r--netlib/http/http1/read.py377
3 files changed, 0 insertions, 501 deletions
diff --git a/netlib/http/http1/__init__.py b/netlib/http/http1/__init__.py
deleted file mode 100644
index e4bf01c5..00000000
--- a/netlib/http/http1/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from .read import (
- read_request, read_request_head,
- read_response, read_response_head,
- read_body,
- connection_close,
- expected_http_body_size,
-)
-from .assemble import (
- assemble_request, assemble_request_head,
- assemble_response, assemble_response_head,
- assemble_body,
-)
-
-
-__all__ = [
- "read_request", "read_request_head",
- "read_response", "read_response_head",
- "read_body",
- "connection_close",
- "expected_http_body_size",
- "assemble_request", "assemble_request_head",
- "assemble_response", "assemble_response_head",
- "assemble_body",
-]
diff --git a/netlib/http/http1/assemble.py b/netlib/http/http1/assemble.py
deleted file mode 100644
index e0a91ad8..00000000
--- a/netlib/http/http1/assemble.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import netlib.http.url
-from mitmproxy import exceptions
-
-
-def assemble_request(request):
- if request.data.content is None:
- raise exceptions.HttpException("Cannot assemble flow with missing content")
- head = assemble_request_head(request)
- body = b"".join(assemble_body(request.data.headers, [request.data.content]))
- return head + body
-
-
-def assemble_request_head(request):
- first_line = _assemble_request_line(request.data)
- headers = _assemble_request_headers(request.data)
- return b"%s\r\n%s\r\n" % (first_line, headers)
-
-
-def assemble_response(response):
- if response.data.content is None:
- raise exceptions.HttpException("Cannot assemble flow with missing content")
- head = assemble_response_head(response)
- body = b"".join(assemble_body(response.data.headers, [response.data.content]))
- return head + body
-
-
-def assemble_response_head(response):
- first_line = _assemble_response_line(response.data)
- headers = _assemble_response_headers(response.data)
- return b"%s\r\n%s\r\n" % (first_line, headers)
-
-
-def assemble_body(headers, body_chunks):
- if "chunked" in headers.get("transfer-encoding", "").lower():
- for chunk in body_chunks:
- if chunk:
- yield b"%x\r\n%s\r\n" % (len(chunk), chunk)
- yield b"0\r\n\r\n"
- else:
- for chunk in body_chunks:
- yield chunk
-
-
-def _assemble_request_line(request_data):
- """
- Args:
- request_data (netlib.http.request.RequestData)
- """
- form = request_data.first_line_format
- if form == "relative":
- return b"%s %s %s" % (
- request_data.method,
- request_data.path,
- request_data.http_version
- )
- elif form == "authority":
- return b"%s %s:%d %s" % (
- request_data.method,
- request_data.host,
- request_data.port,
- request_data.http_version
- )
- elif form == "absolute":
- return b"%s %s://%s:%d%s %s" % (
- request_data.method,
- request_data.scheme,
- request_data.host,
- request_data.port,
- request_data.path,
- request_data.http_version
- )
- else:
- raise RuntimeError("Invalid request form")
-
-
-def _assemble_request_headers(request_data):
- """
- Args:
- request_data (netlib.http.request.RequestData)
- """
- headers = request_data.headers.copy()
- if "host" not in headers and request_data.scheme and request_data.host and request_data.port:
- headers["host"] = netlib.http.url.hostport(
- request_data.scheme,
- request_data.host,
- request_data.port
- )
- return bytes(headers)
-
-
-def _assemble_response_line(response_data):
- return b"%s %d %s" % (
- response_data.http_version,
- response_data.status_code,
- response_data.reason,
- )
-
-
-def _assemble_response_headers(response):
- return bytes(response.headers)
diff --git a/netlib/http/http1/read.py b/netlib/http/http1/read.py
deleted file mode 100644
index e6b22863..00000000
--- a/netlib/http/http1/read.py
+++ /dev/null
@@ -1,377 +0,0 @@
-import time
-import sys
-import re
-
-from netlib.http import request
-from netlib.http import response
-from netlib.http import headers
-from netlib.http import url
-from netlib import check
-from mitmproxy import exceptions
-
-
-def get_header_tokens(headers, key):
- """
- Retrieve all tokens for a header key. A number of different headers
- follow a pattern where each header line can containe comma-separated
- tokens, and headers can be set multiple times.
- """
- if key not in headers:
- return []
- tokens = headers[key].split(",")
- return [token.strip() for token in tokens]
-
-
-def read_request(rfile, body_size_limit=None):
- request = read_request_head(rfile)
- expected_body_size = expected_http_body_size(request)
- request.data.content = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit))
- request.timestamp_end = time.time()
- return request
-
-
-def read_request_head(rfile):
- """
- Parse an HTTP request head (request line + headers) from an input stream
-
- Args:
- rfile: The input stream
-
- Returns:
- The HTTP request object (without body)
-
- Raises:
- exceptions.HttpReadDisconnect: No bytes can be read from rfile.
- exceptions.HttpSyntaxException: The input is malformed HTTP.
- exceptions.HttpException: Any other error occured.
- """
- timestamp_start = time.time()
- if hasattr(rfile, "reset_timestamps"):
- rfile.reset_timestamps()
-
- form, method, scheme, host, port, path, http_version = _read_request_line(rfile)
- headers = _read_headers(rfile)
-
- if hasattr(rfile, "first_byte_timestamp"):
- # more accurate timestamp_start
- timestamp_start = rfile.first_byte_timestamp
-
- return request.Request(
- form, method, scheme, host, port, path, http_version, headers, None, timestamp_start
- )
-
-
-def read_response(rfile, request, body_size_limit=None):
- response = read_response_head(rfile)
- expected_body_size = expected_http_body_size(request, response)
- response.data.content = b"".join(read_body(rfile, expected_body_size, body_size_limit))
- response.timestamp_end = time.time()
- return response
-
-
-def read_response_head(rfile):
- """
- Parse an HTTP response head (response line + headers) from an input stream
-
- Args:
- rfile: The input stream
-
- Returns:
- The HTTP request object (without body)
-
- Raises:
- exceptions.HttpReadDisconnect: No bytes can be read from rfile.
- exceptions.HttpSyntaxException: The input is malformed HTTP.
- exceptions.HttpException: Any other error occured.
- """
-
- timestamp_start = time.time()
- if hasattr(rfile, "reset_timestamps"):
- rfile.reset_timestamps()
-
- http_version, status_code, message = _read_response_line(rfile)
- headers = _read_headers(rfile)
-
- if hasattr(rfile, "first_byte_timestamp"):
- # more accurate timestamp_start
- timestamp_start = rfile.first_byte_timestamp
-
- return response.Response(http_version, status_code, message, headers, None, timestamp_start)
-
-
-def read_body(rfile, expected_size, limit=None, max_chunk_size=4096):
- """
- Read an HTTP message body
-
- Args:
- rfile: The input stream
- expected_size: The expected body size (see :py:meth:`expected_body_size`)
- limit: Maximum body size
- max_chunk_size: Maximium chunk size that gets yielded
-
- Returns:
- A generator that yields byte chunks of the content.
-
- Raises:
- exceptions.HttpException, if an error occurs
-
- Caveats:
- max_chunk_size is not considered if the transfer encoding is chunked.
- """
- if not limit or limit < 0:
- limit = sys.maxsize
- if not max_chunk_size:
- max_chunk_size = limit
-
- if expected_size is None:
- for x in _read_chunked(rfile, limit):
- yield x
- elif expected_size >= 0:
- if limit is not None and expected_size > limit:
- raise exceptions.HttpException(
- "HTTP Body too large. "
- "Limit is {}, content length was advertised as {}".format(limit, expected_size)
- )
- bytes_left = expected_size
- while bytes_left:
- chunk_size = min(bytes_left, max_chunk_size)
- content = rfile.read(chunk_size)
- if len(content) < chunk_size:
- raise exceptions.HttpException("Unexpected EOF")
- yield content
- bytes_left -= chunk_size
- else:
- bytes_left = limit
- while bytes_left:
- chunk_size = min(bytes_left, max_chunk_size)
- content = rfile.read(chunk_size)
- if not content:
- return
- yield content
- bytes_left -= chunk_size
- not_done = rfile.read(1)
- if not_done:
- raise exceptions.HttpException("HTTP body too large. Limit is {}.".format(limit))
-
-
-def connection_close(http_version, headers):
- """
- Checks the message to see if the client connection should be closed
- according to RFC 2616 Section 8.1.
- """
- # At first, check if we have an explicit Connection header.
- if "connection" in headers:
- tokens = get_header_tokens(headers, "connection")
- if "close" in tokens:
- return True
- elif "keep-alive" in tokens:
- return False
-
- # If we don't have a Connection header, HTTP 1.1 connections are assumed to
- # be persistent
- return http_version != "HTTP/1.1" and http_version != b"HTTP/1.1" # FIXME: Remove one case.
-
-
-def expected_http_body_size(request, response=None):
- """
- Returns:
- The expected body length:
- - a positive integer, if the size is known in advance
- - None, if the size in unknown in advance (chunked encoding)
- - -1, if all data should be read until end of stream.
-
- Raises:
- exceptions.HttpSyntaxException, if the content length header is invalid
- """
- # Determine response size according to
- # http://tools.ietf.org/html/rfc7230#section-3.3
- if not response:
- headers = request.headers
- response_code = None
- is_request = True
- else:
- headers = response.headers
- response_code = response.status_code
- is_request = False
-
- if is_request:
- if headers.get("expect", "").lower() == "100-continue":
- return 0
- else:
- if request.method.upper() == "HEAD":
- return 0
- if 100 <= response_code <= 199:
- return 0
- if response_code == 200 and request.method.upper() == "CONNECT":
- return 0
- if response_code in (204, 304):
- return 0
-
- if "chunked" in headers.get("transfer-encoding", "").lower():
- return None
- if "content-length" in headers:
- try:
- size = int(headers["content-length"])
- if size < 0:
- raise ValueError()
- return size
- except ValueError:
- raise exceptions.HttpSyntaxException("Unparseable Content Length")
- if is_request:
- return 0
- return -1
-
-
-def _get_first_line(rfile):
- try:
- line = rfile.readline()
- if line == b"\r\n" or line == b"\n":
- # Possible leftover from previous message
- line = rfile.readline()
- except exceptions.TcpDisconnect:
- raise exceptions.HttpReadDisconnect("Remote disconnected")
- if not line:
- raise exceptions.HttpReadDisconnect("Remote disconnected")
- return line.strip()
-
-
-def _read_request_line(rfile):
- try:
- line = _get_first_line(rfile)
- except exceptions.HttpReadDisconnect:
- # We want to provide a better error message.
- raise exceptions.HttpReadDisconnect("Client disconnected")
-
- try:
- method, path, http_version = line.split()
-
- if path == b"*" or path.startswith(b"/"):
- form = "relative"
- scheme, host, port = None, None, None
- elif method == b"CONNECT":
- form = "authority"
- host, port = _parse_authority_form(path)
- scheme, path = None, None
- else:
- form = "absolute"
- scheme, host, port, path = url.parse(path)
-
- _check_http_version(http_version)
- except ValueError:
- raise exceptions.HttpSyntaxException("Bad HTTP request line: {}".format(line))
-
- return form, method, scheme, host, port, path, http_version
-
-
-def _parse_authority_form(hostport):
- """
- Returns (host, port) if hostport is a valid authority-form host specification.
- http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1
-
- Raises:
- ValueError, if the input is malformed
- """
- try:
- host, port = hostport.split(b":")
- port = int(port)
- if not check.is_valid_host(host) or not check.is_valid_port(port):
- raise ValueError()
- except ValueError:
- raise exceptions.HttpSyntaxException("Invalid host specification: {}".format(hostport))
-
- return host, port
-
-
-def _read_response_line(rfile):
- try:
- line = _get_first_line(rfile)
- except exceptions.HttpReadDisconnect:
- # We want to provide a better error message.
- raise exceptions.HttpReadDisconnect("Server disconnected")
-
- try:
- parts = line.split(None, 2)
- if len(parts) == 2: # handle missing message gracefully
- parts.append(b"")
-
- http_version, status_code, message = parts
- status_code = int(status_code)
- _check_http_version(http_version)
-
- except ValueError:
- raise exceptions.HttpSyntaxException("Bad HTTP response line: {}".format(line))
-
- return http_version, status_code, message
-
-
-def _check_http_version(http_version):
- if not re.match(br"^HTTP/\d\.\d$", http_version):
- raise exceptions.HttpSyntaxException("Unknown HTTP version: {}".format(http_version))
-
-
-def _read_headers(rfile):
- """
- Read a set of headers.
- Stop once a blank line is reached.
-
- Returns:
- A headers object
-
- Raises:
- exceptions.HttpSyntaxException
- """
- ret = []
- while True:
- line = rfile.readline()
- if not line or line == b"\r\n" or line == b"\n":
- break
- if line[0] in b" \t":
- if not ret:
- raise exceptions.HttpSyntaxException("Invalid headers")
- # continued header
- ret[-1] = (ret[-1][0], ret[-1][1] + b'\r\n ' + line.strip())
- else:
- try:
- name, value = line.split(b":", 1)
- value = value.strip()
- if not name:
- raise ValueError()
- ret.append((name, value))
- except ValueError:
- raise exceptions.HttpSyntaxException(
- "Invalid header line: %s" % repr(line)
- )
- return headers.Headers(ret)
-
-
-def _read_chunked(rfile, limit=sys.maxsize):
- """
- Read a HTTP body with chunked transfer encoding.
-
- Args:
- rfile: the input file
- limit: A positive integer
- """
- total = 0
- while True:
- line = rfile.readline(128)
- if line == b"":
- raise exceptions.HttpException("Connection closed prematurely")
- if line != b"\r\n" and line != b"\n":
- try:
- length = int(line, 16)
- except ValueError:
- raise exceptions.HttpSyntaxException("Invalid chunked encoding length: {}".format(line))
- total += length
- if total > limit:
- raise exceptions.HttpException(
- "HTTP Body too large. Limit is {}, "
- "chunked content longer than {}".format(limit, total)
- )
- chunk = rfile.read(length)
- suffix = rfile.readline(5)
- if suffix != b"\r\n":
- raise exceptions.HttpSyntaxException("Malformed chunked body")
- if length == 0:
- return
- yield chunk