diff options
-rw-r--r-- | netlib/http.py | 124 | ||||
-rw-r--r-- | netlib/utils.py | 2 | ||||
-rw-r--r-- | test/test_http.py | 72 |
3 files changed, 195 insertions, 3 deletions
diff --git a/netlib/http.py b/netlib/http.py index 26438863..aacdd1d4 100644 --- a/netlib/http.py +++ b/netlib/http.py @@ -1,7 +1,10 @@ from __future__ import (absolute_import, print_function, division) -import string, urlparse, binascii +import collections +import string +import urlparse +import binascii import sys -from . import odict, utils +from . import odict, utils, tcp class HttpError(Exception): @@ -30,6 +33,19 @@ def _is_valid_host(host): return True +def get_line(fp): + """ + Get a line, possibly preceded by a blank. + """ + line = fp.readline() + if line == "\r\n" or line == "\n": + # Possible leftover from previous message + line = fp.readline() + if line == "": + raise tcp.NetLibDisconnect() + return line + + def parse_url(url): """ Returns a (scheme, host, port, path) tuple, or None on error. @@ -436,3 +452,107 @@ def expected_http_body_size(headers, is_request, request_method, response_code): if is_request: return 0 return -1 + + +Request = collections.namedtuple( + "Request", + [ + "form_in", + "method", + "scheme", + "host", + "port", + "path", + "httpversion", + "headers", + "content" + ] +) + + +def read_request(rfile, include_body=True, body_size_limit=None, wfile=None): + """ + Parse an HTTP request from a file stream + + Args: + rfile (file): Input file to read from + include_body (bool): Read response body as well + body_size_limit (bool): Maximum body size + wfile (file): If specified, HTTP Expect headers are handled + automatically, by writing a HTTP 100 CONTINUE response to the stream. + + Returns: + Request: The HTTP request + + Raises: + HttpError: If the input is invalid. + """ + httpversion, host, port, scheme, method, path, headers, content = ( + None, None, None, None, None, None, None, None) + + request_line = get_line(rfile) + + request_line_parts = parse_init(request_line) + if not request_line_parts: + raise HttpError( + 400, + "Bad HTTP request line: %s" % repr(request_line) + ) + method, path, httpversion = request_line_parts + + if path == '*' or path.startswith("/"): + form_in = "relative" + if not utils.isascii(path): + raise HttpError( + 400, + "Bad HTTP request line: %s" % repr(request_line) + ) + elif method.upper() == 'CONNECT': + form_in = "authority" + r = parse_init_connect(request_line) + if not r: + raise HttpError( + 400, + "Bad HTTP request line: %s" % repr(request_line) + ) + host, port, _ = r + path = None + else: + form_in = "absolute" + r = parse_init_proxy(request_line) + if not r: + raise HttpError( + 400, + "Bad HTTP request line: %s" % repr(request_line) + ) + _, scheme, host, port, path, _ = r + + headers = read_headers(rfile) + if headers is None: + raise HttpError(400, "Invalid headers") + + expect_header = headers.get_first("expect") + if expect_header and expect_header.lower() == "100-continue" and httpversion >= (1, 1): + wfile.write( + 'HTTP/1.1 100 Continue\r\n' + '\r\n' + ) + wfile.flush() + del headers['expect'] + + if include_body: + content = read_http_body( + rfile, headers, body_size_limit, method, None, True + ) + + return Request( + form_in, + method, + scheme, + host, + port, + path, + httpversion, + headers, + content + ) diff --git a/netlib/utils.py b/netlib/utils.py index 03a70977..57532453 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -46,4 +46,4 @@ def hexdump(s): parts.append( (o, x, cleanBin(part, True)) ) - return parts
\ No newline at end of file + return parts diff --git a/test/test_http.py b/test/test_http.py index b1c62458..5bd7cab2 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -366,3 +366,75 @@ def test_parse_http_basic_auth(): assert not http.parse_http_basic_auth("foo bar") v = "basic " + binascii.b2a_base64("foo") assert not http.parse_http_basic_auth(v) + + +def test_get_line(): + r = cStringIO.StringIO("\nfoo") + assert http.get_line(r) == "foo" + tutils.raises(tcp.NetLibDisconnect, http.get_line, r) + + +class TestReadRequest(): + + def tst(self, data, **kwargs): + r = cStringIO.StringIO(data) + return http.read_request(r, **kwargs) + + def test_invalid(self): + tutils.raises( + "bad http request", + self.tst, + "xxx" + ) + tutils.raises( + "bad http request line", + self.tst, + "get /\xff HTTP/1.1" + ) + tutils.raises( + "invalid headers", + self.tst, + "get / HTTP/1.1\r\nfoo" + ) + + def test_asterisk_form_in(self): + v = self.tst("OPTIONS * HTTP/1.1") + assert v.form_in == "relative" + assert v.method == "OPTIONS" + + def test_absolute_form_in(self): + tutils.raises( + "Bad HTTP request line", + self.tst, + "GET oops-no-protocol.com HTTP/1.1" + ) + v = self.tst("GET http://address:22/ HTTP/1.1") + assert v.form_in == "absolute" + assert v.port == 22 + assert v.host == "address" + assert v.scheme == "http" + + def test_connect(self): + tutils.raises( + "Bad HTTP request line", + self.tst, + "CONNECT oops-no-port.com HTTP/1.1" + ) + v = self.tst("CONNECT foo.com:443 HTTP/1.1") + assert v.form_in == "authority" + assert v.method == "CONNECT" + assert v.port == 443 + assert v.host == "foo.com" + + def test_expect(self): + w = cStringIO.StringIO() + r = cStringIO.StringIO( + "GET / HTTP/1.1\r\n" + "Content-Length: 3\r\n" + "Expect: 100-continue\r\n\r\n" + "foobar", + ) + v = http.read_request(r, wfile=w) + assert w.getvalue() == "HTTP/1.1 100 Continue\r\n\r\n" + assert v.content == "foo" + assert r.read(3) == "bar" |