import collections import re import email.utils from netlib import multidict """ A flexible module for cookie parsing and manipulation. This module differs from usual standards-compliant cookie modules in a number of ways. We try to be as permissive as possible, and to retain even mal-formed information. Duplicate cookies are preserved in parsing, and can be set in formatting. We do attempt to escape and quote values where needed, but will not reject data that violate the specs. Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do not parse the comma-separated variant of Set-Cookie that allows multiple cookies to be set in a single header. Technically this should be feasible, but it turns out that violations of RFC6265 that makes the parsing problem indeterminate are much more common than genuine occurences of the multi-cookie variants. Serialization follows RFC6265. http://tools.ietf.org/html/rfc6265 http://tools.ietf.org/html/rfc2109 http://tools.ietf.org/html/rfc2965 """ # TODO: Disallow LHS-only Cookie values def _read_until(s, start, term): """ Read until one of the characters in term is reached. """ if start == len(s): return "", start + 1 for i in range(start, len(s)): if s[i] in term: return s[start:i], i return s[start:i + 1], i + 1 def _read_token(s, start): """ Read a token - the LHS of a token/value pair in a cookie. """ return _read_until(s, start, ";=") def _read_quoted_string(s, start): """ start: offset to the first quote of the string to be read A sort of loose super-set of the various quoted string specifications. RFC6265 disallows backslashes or double quotes within quoted strings. Prior RFCs use backslashes to escape. This leaves us free to apply backslash escaping by default and be compatible with everything. """ escaping = False ret = [] # Skip the first quote i = start # initialize in case the loop doesn't run. for i in range(start + 1, len(s)): if escaping: ret.append(s[i]) escaping = False elif s[i] == '"': break elif s[i] == "\\": escaping = True else: ret.append(s[i]) return "".join(ret), i + 1 def _read_value(s, start, delims): """ Reads a value - the RHS of a token/value pair in a cookie. special: If the value is special, commas are premitted. Else comma terminates. This helps us support old and new style values. """ if start >= len(s): return "", start elif s[start] == '"': return _read_quoted_string(s, start) else: return _read_until(s, start, delims) def _read_pairs(s, off=0): """ Read pairs of lhs=rhs values. off: start offset specials: a lower-cased list of keys that may contain commas """ vals = [] while True: lhs, off = _read_token(s, off) lhs = lhs.lstrip() if lhs: rhs = None if off < len(s): if s[off] == "=": rhs, off = _read_value(s, off + 1, ";") vals.append([lhs, rhs]) off += 1 if not off < len(s): break return vals, off def _has_special(s): for i in s: if i in '",;\\': return True o = ord(i) if o < 0x21 or o > 0x7e: return True return False ESCAPE = re.compile(r"([\"\\])") def _format_pairs(lst, specials=(), sep="; "): """ specials: A lower-cased list of keys that will not be quoted. """ vals = [] for k, v in lst: if v is None: vals.append(k) else: if k.lower() not in specials and _has_special(v): v = ESCAPE.sub(r"\\\1", v) v = '"%s"' % v vals.append("%s=%s" % (k, v)) return sep.join(vals) def _format_set_cookie_pairs(lst): return _format_pairs( lst, specials=("expires", "path") ) def _parse_set_cookie_pairs(s): """ For Set-Cookie, we support multiple cookies as described in RFC2109. This function therefore returns a list of lists. """ pairs, off_ = _read_pairs(s) return pairs def parse_set_cookie_headers(headers): ret = [] for header in headers: v = parse_set_cookie_header(header) if v: name, value, attrs = v ret.append((name, SetCookie(value, attrs))) return ret class CookieAttrs(multidict.ImmutableMultiDict): @staticmethod def _kconv(key): return key.lower() @staticmethod def _reduce_values(values): # See the StickyCookieTest for a weird cookie that only makes sense # if we take the last part. return values[-1] SetCookie = collections.namedtuple("SetCookie", ["value", "attrs"]) def parse_set_cookie_header(line): """ Parse a Set-Cookie header value Returns a (name, value, attrs) tuple, or None, where attrs is an CookieAttrs dict of attributes. No attempt is made to parse attribute values - they are treated purely as strings. """ pairs = _parse_set_cookie_pairs(line) if pairs: return pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:]) def format_set_cookie_header(name, value, attrs): """ Formats a Set-Cookie header value. """ pairs = [(name, value)] pairs.extend( attrs.fields if hasattr(attrs, "fields") else attrs ) return _format_set_cookie_pairs(pairs) def parse_cookie_headers(cookie_headers): cookie_list = [] for header in cookie_headers: cookie_list.extend(parse_cookie_header(header)) return cookie_list def parse_cookie_header(line): """ Parse a Cookie header value. Returns a list of (lhs, rhs) tuples. """ pairs, off_ = _read_pairs(line) return pairs def format_cookie_header(lst): """ Formats a Cookie header value. """ return _format_pairs(lst) def refresh_set_cookie_header(c, delta): """ Args: c: A Set-Cookie string delta: Time delta in seconds Returns: A refreshed Set-Cookie string """ name, value, attrs = parse_set_cookie_header(c) if not name or not value: raise ValueError("Invalid Cookie") if "expires" in attrs: e = email.utils.parsedate_tz(attrs["expires"]) if e: f = email.utils.mktime_tz(e) + delta attrs = attrs.with_set_all("expires", [email.utils.formatdate(f)]) else: # This can happen when the expires tag is invalid. # reddit.com sends a an expires tag like this: "Thu, 31 Dec # 2037 23:59:59 GMT", which is valid RFC 1123, but not # strictly correct according to the cookie spec. Browsers # appear to parse this tolerantly - maybe we should too. # For now, we just ignore this. attrs = attrs.with_delitem("expires") ret = format_set_cookie_header(name, value, attrs) if not ret: raise ValueError("Invalid Cookie") return ret