diff options
Diffstat (limited to 'mitmproxy/net/http/cookies.py')
| -rw-r--r-- | mitmproxy/net/http/cookies.py | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/mitmproxy/net/http/cookies.py b/mitmproxy/net/http/cookies.py new file mode 100644 index 00000000..9f32fa5e --- /dev/null +++ b/mitmproxy/net/http/cookies.py @@ -0,0 +1,384 @@ +import collections +import email.utils +import re +import time + +from mitmproxy.types import multidict + +""" +A flexible module for cookie parsing and manipulation. + +This module differs from usual standards-compliant cookie modules in a number +of ways. We try to be as permissive as possible, and to retain even mal-formed +information. Duplicate cookies are preserved in parsing, and can be set in +formatting. We do attempt to escape and quote values where needed, but will not +reject data that violate the specs. + +Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We +also parse the comma-separated variant of Set-Cookie that allows multiple +cookies to be set in a single header. Serialization follows RFC6265. + + http://tools.ietf.org/html/rfc6265 + http://tools.ietf.org/html/rfc2109 + http://tools.ietf.org/html/rfc2965 +""" + +_cookie_params = set(( + 'expires', 'path', 'comment', 'max-age', + 'secure', 'httponly', 'version', +)) + +ESCAPE = re.compile(r"([\"\\])") + + +class CookieAttrs(multidict.ImmutableMultiDict): + @staticmethod + def _kconv(key): + return key.lower() + + @staticmethod + def _reduce_values(values): + # See the StickyCookieTest for a weird cookie that only makes sense + # if we take the last part. + return values[-1] + +SetCookie = collections.namedtuple("SetCookie", ["value", "attrs"]) + + +def _read_until(s, start, term): + """ + Read until one of the characters in term is reached. + """ + if start == len(s): + return "", start + 1 + for i in range(start, len(s)): + if s[i] in term: + return s[start:i], i + return s[start:i + 1], i + 1 + + +def _read_quoted_string(s, start): + """ + start: offset to the first quote of the string to be read + + A sort of loose super-set of the various quoted string specifications. + + RFC6265 disallows backslashes or double quotes within quoted strings. + Prior RFCs use backslashes to escape. This leaves us free to apply + backslash escaping by default and be compatible with everything. + """ + escaping = False + ret = [] + # Skip the first quote + i = start # initialize in case the loop doesn't run. + for i in range(start + 1, len(s)): + if escaping: + ret.append(s[i]) + escaping = False + elif s[i] == '"': + break + elif s[i] == "\\": + escaping = True + else: + ret.append(s[i]) + return "".join(ret), i + 1 + + +def _read_key(s, start, delims=";="): + """ + Read a key - the LHS of a token/value pair in a cookie. + """ + return _read_until(s, start, delims) + + +def _read_value(s, start, delims): + """ + Reads a value - the RHS of a token/value pair in a cookie. + """ + if start >= len(s): + return "", start + elif s[start] == '"': + return _read_quoted_string(s, start) + else: + return _read_until(s, start, delims) + + +def _read_cookie_pairs(s, off=0): + """ + Read pairs of lhs=rhs values from Cookie headers. + + off: start offset + """ + pairs = [] + + while True: + lhs, off = _read_key(s, off) + lhs = lhs.lstrip() + + if lhs: + rhs = None + if off < len(s) and s[off] == "=": + rhs, off = _read_value(s, off + 1, ";") + + pairs.append([lhs, rhs]) + + off += 1 + + if not off < len(s): + break + + return pairs, off + + +def _read_set_cookie_pairs(s, off=0): + """ + Read pairs of lhs=rhs values from SetCookie headers while handling multiple cookies. + + off: start offset + specials: attributes that are treated specially + """ + cookies = [] + pairs = [] + + while True: + lhs, off = _read_key(s, off, ";=,") + lhs = lhs.lstrip() + + if lhs: + rhs = None + if off < len(s) and s[off] == "=": + rhs, off = _read_value(s, off + 1, ";,") + + # Special handliing of attributes + if lhs.lower() == "expires": + # 'expires' values can contain commas in them so they need to + # be handled separately. + + # We actually bank on the fact that the expires value WILL + # contain a comma. Things will fail, if they don't. + + # '3' is just a heuristic we use to determine whether we've + # only read a part of the expires value and we should read more. + if len(rhs) <= 3: + trail, off = _read_value(s, off + 1, ";,") + rhs = rhs + "," + trail + + pairs.append([lhs, rhs]) + + # comma marks the beginning of a new cookie + if off < len(s) and s[off] == ",": + cookies.append(pairs) + pairs = [] + + off += 1 + + if not off < len(s): + break + + if pairs or not cookies: + cookies.append(pairs) + + return cookies, off + + +def _has_special(s): + for i in s: + if i in '",;\\': + return True + o = ord(i) + if o < 0x21 or o > 0x7e: + return True + return False + + +def _format_pairs(pairs, specials=(), sep="; "): + """ + specials: A lower-cased list of keys that will not be quoted. + """ + vals = [] + for k, v in pairs: + if v is None: + vals.append(k) + else: + if k.lower() not in specials and _has_special(v): + v = ESCAPE.sub(r"\\\1", v) + v = '"%s"' % v + vals.append("%s=%s" % (k, v)) + return sep.join(vals) + + +def _format_set_cookie_pairs(lst): + return _format_pairs( + lst, + specials=("expires", "path") + ) + + +def parse_cookie_header(line): + """ + Parse a Cookie header value. + Returns a list of (lhs, rhs) tuples. + """ + pairs, off_ = _read_cookie_pairs(line) + return pairs + + +def parse_cookie_headers(cookie_headers): + cookie_list = [] + for header in cookie_headers: + cookie_list.extend(parse_cookie_header(header)) + return cookie_list + + +def format_cookie_header(lst): + """ + Formats a Cookie header value. + """ + return _format_pairs(lst) + + +def parse_set_cookie_header(line): + """ + Parse a Set-Cookie header value + + Returns a list of (name, value, attrs) tuples, where attrs is a + CookieAttrs dict of attributes. No attempt is made to parse attribute + values - they are treated purely as strings. + """ + cookie_pairs, off = _read_set_cookie_pairs(line) + cookies = [ + (pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:])) + for pairs in cookie_pairs if pairs + ] + return cookies + + +def parse_set_cookie_headers(headers): + rv = [] + for header in headers: + cookies = parse_set_cookie_header(header) + if cookies: + for name, value, attrs in cookies: + rv.append((name, SetCookie(value, attrs))) + return rv + + +def format_set_cookie_header(set_cookies): + """ + Formats a Set-Cookie header value. + """ + + rv = [] + + for set_cookie in set_cookies: + name, value, attrs = set_cookie + + pairs = [(name, value)] + pairs.extend( + attrs.fields if hasattr(attrs, "fields") else attrs + ) + + rv.append(_format_set_cookie_pairs(pairs)) + + return ", ".join(rv) + + +def refresh_set_cookie_header(c, delta): + """ + Args: + c: A Set-Cookie string + delta: Time delta in seconds + Returns: + A refreshed Set-Cookie string + """ + + name, value, attrs = parse_set_cookie_header(c)[0] + if not name or not value: + raise ValueError("Invalid Cookie") + + if "expires" in attrs: + e = email.utils.parsedate_tz(attrs["expires"]) + if e: + f = email.utils.mktime_tz(e) + delta + attrs = attrs.with_set_all("expires", [email.utils.formatdate(f)]) + else: + # This can happen when the expires tag is invalid. + # reddit.com sends a an expires tag like this: "Thu, 31 Dec + # 2037 23:59:59 GMT", which is valid RFC 1123, but not + # strictly correct according to the cookie spec. Browsers + # appear to parse this tolerantly - maybe we should too. + # For now, we just ignore this. + attrs = attrs.with_delitem("expires") + + rv = format_set_cookie_header([(name, value, attrs)]) + if not rv: + raise ValueError("Invalid Cookie") + return rv + + +def get_expiration_ts(cookie_attrs): + """ + Determines the time when the cookie will be expired. + + Considering both 'expires' and 'max-age' parameters. + + Returns: timestamp of when the cookie will expire. + None, if no expiration time is set. + """ + if 'expires' in cookie_attrs: + e = email.utils.parsedate_tz(cookie_attrs["expires"]) + if e: + return email.utils.mktime_tz(e) + + elif 'max-age' in cookie_attrs: + try: + max_age = int(cookie_attrs['Max-Age']) + except ValueError: + pass + else: + now_ts = time.time() + return now_ts + max_age + + return None + + +def is_expired(cookie_attrs): + """ + Determines whether a cookie has expired. + + Returns: boolean + """ + + exp_ts = get_expiration_ts(cookie_attrs) + now_ts = time.time() + + # If no expiration information was provided with the cookie + if exp_ts is None: + return False + else: + return exp_ts <= now_ts + + +def group_cookies(pairs): + """ + Converts a list of pairs to a (name, value, attrs) for each cookie. + """ + + if not pairs: + return [] + + cookie_list = [] + + # First pair is always a new cookie + name, value = pairs[0] + attrs = [] + + for k, v in pairs[1:]: + if k.lower() in _cookie_params: + attrs.append((k, v)) + else: + cookie_list.append((name, value, CookieAttrs(attrs))) + name, value, attrs = k, v, [] + + cookie_list.append((name, value, CookieAttrs(attrs))) + return cookie_list |
