From e80b2af4aed325741d8b51458d59351e70743aef Mon Sep 17 00:00:00 2001 From: Shadab Zafar Date: Sat, 30 Jul 2016 20:38:06 +0530 Subject: Add support for comma separated cookies --- netlib/http/cookies.py | 81 +++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/netlib/http/cookies.py b/netlib/http/cookies.py index 1421d8eb..6fec7daf 100644 --- a/netlib/http/cookies.py +++ b/netlib/http/cookies.py @@ -14,12 +14,9 @@ information. Duplicate cookies are preserved in parsing, and can be set in formatting. We do attempt to escape and quote values where needed, but will not reject data that violate the specs. -Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do -not parse the comma-separated variant of Set-Cookie that allows multiple -cookies to be set in a single header. Technically this should be feasible, but -it turns out that violations of RFC6265 that makes the parsing problem -indeterminate are much more common than genuine occurences of the multi-cookie -variants. Serialization follows RFC6265. +Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We +also parse the comma-separated variant of Set-Cookie that allows multiple +cookies to be set in a single header. Serialization follows RFC6265. http://tools.ietf.org/html/rfc6265 http://tools.ietf.org/html/rfc2109 @@ -51,7 +48,7 @@ def _read_token(s, start): """ Read a token - the LHS of a token/value pair in a cookie. """ - return _read_until(s, start, ";=") + return _read_until(s, start, ",;=") def _read_quoted_string(s, start): @@ -84,9 +81,6 @@ def _read_quoted_string(s, start): def _read_value(s, start, delims): """ Reads a value - the RHS of a token/value pair in a cookie. - - special: If the value is special, commas are premitted. Else comma - terminates. This helps us support old and new style values. """ if start >= len(s): return "", start @@ -96,27 +90,49 @@ def _read_value(s, start, delims): return _read_until(s, start, delims) +# TODO: Disallow LHS-only Cookie values def _read_pairs(s, off=0): """ - Read pairs of lhs=rhs values. + Read pairs of lhs=rhs values while handling multiple cookies. off: start offset - specials: a lower-cased list of keys that may contain commas """ - vals = [] + cookies = [] + pairs = [] + while True: lhs, off = _read_token(s, off) lhs = lhs.lstrip() + if lhs: rhs = None - if off < len(s): - if s[off] == "=": - rhs, off = _read_value(s, off + 1, ";") - vals.append([lhs, rhs]) + if off < len(s) and s[off] == "=": + rhs, off = _read_value(s, off + 1, ";,") + + # expires values can contain commas in them so they need to + # be handled separately. + if lhs.lower() == "expires": + # This is a heuristic we use to determine whether we've + # only read a part of the datetime and should read more. + if len(rhs) <= 3: + trail, off = _read_value(s, off + 1, ";,") + rhs = rhs + "," + trail + + pairs.append([lhs, rhs]) + + # comma marks the beginning of a new cookie + if off < len(s) and s[off] == ",": + cookies.append(pairs) + pairs = [] + off += 1 + if not off < len(s): break - return vals, off + + if pairs or not cookies: + cookies.append(pairs) + return cookies, off def _has_special(s): @@ -167,10 +183,11 @@ def _parse_set_cookie_pairs(s): def parse_set_cookie_headers(headers): ret = [] for header in headers: - v = parse_set_cookie_header(header) - if v: - name, value, attrs = v - ret.append((name, SetCookie(value, attrs))) + cookies = parse_set_cookie_header(header) + if cookies: + for cookie in cookies: + name, value, attrs = cookie + ret.append((name, SetCookie(value, attrs))) return ret @@ -193,13 +210,17 @@ def parse_set_cookie_header(line): """ Parse a Set-Cookie header value - Returns a (name, value, attrs) tuple, or None, where attrs is an - CookieAttrs dict of attributes. No attempt is made to parse attribute - values - they are treated purely as strings. + Returns a list of (name, value, attrs) tuple for each cokie, or None. + Where attrs is a CookieAttrs dict of attributes. No attempt is made + to parse attribute values - they are treated purely as strings. """ - pairs = _parse_set_cookie_pairs(line) - if pairs: - return pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:]) + cookies = [ + (pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:])) + for pairs in _parse_set_cookie_pairs(line) if pairs + ] + + if cookies: + return cookies def format_set_cookie_header(name, value, attrs): @@ -216,7 +237,7 @@ def format_set_cookie_header(name, value, attrs): def parse_cookie_headers(cookie_headers): cookie_list = [] for header in cookie_headers: - cookie_list.extend(parse_cookie_header(header)) + cookie_list.extend(parse_cookie_header(header)[0]) return cookie_list @@ -245,7 +266,7 @@ def refresh_set_cookie_header(c, delta): A refreshed Set-Cookie string """ - name, value, attrs = parse_set_cookie_header(c) + name, value, attrs = parse_set_cookie_header(c)[0] if not name or not value: raise ValueError("Invalid Cookie") -- cgit v1.2.3