aboutsummaryrefslogtreecommitdiffstats
path: root/mitmproxy/net/http/cookies.py
diff options
context:
space:
mode:
Diffstat (limited to 'mitmproxy/net/http/cookies.py')
-rw-r--r--mitmproxy/net/http/cookies.py384
1 files changed, 384 insertions, 0 deletions
diff --git a/mitmproxy/net/http/cookies.py b/mitmproxy/net/http/cookies.py
new file mode 100644
index 00000000..9f32fa5e
--- /dev/null
+++ b/mitmproxy/net/http/cookies.py
@@ -0,0 +1,384 @@
+import collections
+import email.utils
+import re
+import time
+
+from mitmproxy.types import multidict
+
+"""
+A flexible module for cookie parsing and manipulation.
+
+This module differs from usual standards-compliant cookie modules in a number
+of ways. We try to be as permissive as possible, and to retain even mal-formed
+information. Duplicate cookies are preserved in parsing, and can be set in
+formatting. We do attempt to escape and quote values where needed, but will not
+reject data that violate the specs.
+
+Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We
+also parse the comma-separated variant of Set-Cookie that allows multiple
+cookies to be set in a single header. Serialization follows RFC6265.
+
+ http://tools.ietf.org/html/rfc6265
+ http://tools.ietf.org/html/rfc2109
+ http://tools.ietf.org/html/rfc2965
+"""
+
+_cookie_params = set((
+ 'expires', 'path', 'comment', 'max-age',
+ 'secure', 'httponly', 'version',
+))
+
+ESCAPE = re.compile(r"([\"\\])")
+
+
+class CookieAttrs(multidict.ImmutableMultiDict):
+ @staticmethod
+ def _kconv(key):
+ return key.lower()
+
+ @staticmethod
+ def _reduce_values(values):
+ # See the StickyCookieTest for a weird cookie that only makes sense
+ # if we take the last part.
+ return values[-1]
+
+SetCookie = collections.namedtuple("SetCookie", ["value", "attrs"])
+
+
+def _read_until(s, start, term):
+ """
+ Read until one of the characters in term is reached.
+ """
+ if start == len(s):
+ return "", start + 1
+ for i in range(start, len(s)):
+ if s[i] in term:
+ return s[start:i], i
+ return s[start:i + 1], i + 1
+
+
+def _read_quoted_string(s, start):
+ """
+ start: offset to the first quote of the string to be read
+
+ A sort of loose super-set of the various quoted string specifications.
+
+ RFC6265 disallows backslashes or double quotes within quoted strings.
+ Prior RFCs use backslashes to escape. This leaves us free to apply
+ backslash escaping by default and be compatible with everything.
+ """
+ escaping = False
+ ret = []
+ # Skip the first quote
+ i = start # initialize in case the loop doesn't run.
+ for i in range(start + 1, len(s)):
+ if escaping:
+ ret.append(s[i])
+ escaping = False
+ elif s[i] == '"':
+ break
+ elif s[i] == "\\":
+ escaping = True
+ else:
+ ret.append(s[i])
+ return "".join(ret), i + 1
+
+
+def _read_key(s, start, delims=";="):
+ """
+ Read a key - the LHS of a token/value pair in a cookie.
+ """
+ return _read_until(s, start, delims)
+
+
+def _read_value(s, start, delims):
+ """
+ Reads a value - the RHS of a token/value pair in a cookie.
+ """
+ if start >= len(s):
+ return "", start
+ elif s[start] == '"':
+ return _read_quoted_string(s, start)
+ else:
+ return _read_until(s, start, delims)
+
+
+def _read_cookie_pairs(s, off=0):
+ """
+ Read pairs of lhs=rhs values from Cookie headers.
+
+ off: start offset
+ """
+ pairs = []
+
+ while True:
+ lhs, off = _read_key(s, off)
+ lhs = lhs.lstrip()
+
+ if lhs:
+ rhs = None
+ if off < len(s) and s[off] == "=":
+ rhs, off = _read_value(s, off + 1, ";")
+
+ pairs.append([lhs, rhs])
+
+ off += 1
+
+ if not off < len(s):
+ break
+
+ return pairs, off
+
+
+def _read_set_cookie_pairs(s, off=0):
+ """
+ Read pairs of lhs=rhs values from SetCookie headers while handling multiple cookies.
+
+ off: start offset
+ specials: attributes that are treated specially
+ """
+ cookies = []
+ pairs = []
+
+ while True:
+ lhs, off = _read_key(s, off, ";=,")
+ lhs = lhs.lstrip()
+
+ if lhs:
+ rhs = None
+ if off < len(s) and s[off] == "=":
+ rhs, off = _read_value(s, off + 1, ";,")
+
+ # Special handliing of attributes
+ if lhs.lower() == "expires":
+ # 'expires' values can contain commas in them so they need to
+ # be handled separately.
+
+ # We actually bank on the fact that the expires value WILL
+ # contain a comma. Things will fail, if they don't.
+
+ # '3' is just a heuristic we use to determine whether we've
+ # only read a part of the expires value and we should read more.
+ if len(rhs) <= 3:
+ trail, off = _read_value(s, off + 1, ";,")
+ rhs = rhs + "," + trail
+
+ pairs.append([lhs, rhs])
+
+ # comma marks the beginning of a new cookie
+ if off < len(s) and s[off] == ",":
+ cookies.append(pairs)
+ pairs = []
+
+ off += 1
+
+ if not off < len(s):
+ break
+
+ if pairs or not cookies:
+ cookies.append(pairs)
+
+ return cookies, off
+
+
+def _has_special(s):
+ for i in s:
+ if i in '",;\\':
+ return True
+ o = ord(i)
+ if o < 0x21 or o > 0x7e:
+ return True
+ return False
+
+
+def _format_pairs(pairs, specials=(), sep="; "):
+ """
+ specials: A lower-cased list of keys that will not be quoted.
+ """
+ vals = []
+ for k, v in pairs:
+ if v is None:
+ vals.append(k)
+ else:
+ if k.lower() not in specials and _has_special(v):
+ v = ESCAPE.sub(r"\\\1", v)
+ v = '"%s"' % v
+ vals.append("%s=%s" % (k, v))
+ return sep.join(vals)
+
+
+def _format_set_cookie_pairs(lst):
+ return _format_pairs(
+ lst,
+ specials=("expires", "path")
+ )
+
+
+def parse_cookie_header(line):
+ """
+ Parse a Cookie header value.
+ Returns a list of (lhs, rhs) tuples.
+ """
+ pairs, off_ = _read_cookie_pairs(line)
+ return pairs
+
+
+def parse_cookie_headers(cookie_headers):
+ cookie_list = []
+ for header in cookie_headers:
+ cookie_list.extend(parse_cookie_header(header))
+ return cookie_list
+
+
+def format_cookie_header(lst):
+ """
+ Formats a Cookie header value.
+ """
+ return _format_pairs(lst)
+
+
+def parse_set_cookie_header(line):
+ """
+ Parse a Set-Cookie header value
+
+ Returns a list of (name, value, attrs) tuples, where attrs is a
+ CookieAttrs dict of attributes. No attempt is made to parse attribute
+ values - they are treated purely as strings.
+ """
+ cookie_pairs, off = _read_set_cookie_pairs(line)
+ cookies = [
+ (pairs[0][0], pairs[0][1], CookieAttrs(tuple(x) for x in pairs[1:]))
+ for pairs in cookie_pairs if pairs
+ ]
+ return cookies
+
+
+def parse_set_cookie_headers(headers):
+ rv = []
+ for header in headers:
+ cookies = parse_set_cookie_header(header)
+ if cookies:
+ for name, value, attrs in cookies:
+ rv.append((name, SetCookie(value, attrs)))
+ return rv
+
+
+def format_set_cookie_header(set_cookies):
+ """
+ Formats a Set-Cookie header value.
+ """
+
+ rv = []
+
+ for set_cookie in set_cookies:
+ name, value, attrs = set_cookie
+
+ pairs = [(name, value)]
+ pairs.extend(
+ attrs.fields if hasattr(attrs, "fields") else attrs
+ )
+
+ rv.append(_format_set_cookie_pairs(pairs))
+
+ return ", ".join(rv)
+
+
+def refresh_set_cookie_header(c, delta):
+ """
+ Args:
+ c: A Set-Cookie string
+ delta: Time delta in seconds
+ Returns:
+ A refreshed Set-Cookie string
+ """
+
+ name, value, attrs = parse_set_cookie_header(c)[0]
+ if not name or not value:
+ raise ValueError("Invalid Cookie")
+
+ if "expires" in attrs:
+ e = email.utils.parsedate_tz(attrs["expires"])
+ if e:
+ f = email.utils.mktime_tz(e) + delta
+ attrs = attrs.with_set_all("expires", [email.utils.formatdate(f)])
+ else:
+ # This can happen when the expires tag is invalid.
+ # reddit.com sends a an expires tag like this: "Thu, 31 Dec
+ # 2037 23:59:59 GMT", which is valid RFC 1123, but not
+ # strictly correct according to the cookie spec. Browsers
+ # appear to parse this tolerantly - maybe we should too.
+ # For now, we just ignore this.
+ attrs = attrs.with_delitem("expires")
+
+ rv = format_set_cookie_header([(name, value, attrs)])
+ if not rv:
+ raise ValueError("Invalid Cookie")
+ return rv
+
+
+def get_expiration_ts(cookie_attrs):
+ """
+ Determines the time when the cookie will be expired.
+
+ Considering both 'expires' and 'max-age' parameters.
+
+ Returns: timestamp of when the cookie will expire.
+ None, if no expiration time is set.
+ """
+ if 'expires' in cookie_attrs:
+ e = email.utils.parsedate_tz(cookie_attrs["expires"])
+ if e:
+ return email.utils.mktime_tz(e)
+
+ elif 'max-age' in cookie_attrs:
+ try:
+ max_age = int(cookie_attrs['Max-Age'])
+ except ValueError:
+ pass
+ else:
+ now_ts = time.time()
+ return now_ts + max_age
+
+ return None
+
+
+def is_expired(cookie_attrs):
+ """
+ Determines whether a cookie has expired.
+
+ Returns: boolean
+ """
+
+ exp_ts = get_expiration_ts(cookie_attrs)
+ now_ts = time.time()
+
+ # If no expiration information was provided with the cookie
+ if exp_ts is None:
+ return False
+ else:
+ return exp_ts <= now_ts
+
+
+def group_cookies(pairs):
+ """
+ Converts a list of pairs to a (name, value, attrs) for each cookie.
+ """
+
+ if not pairs:
+ return []
+
+ cookie_list = []
+
+ # First pair is always a new cookie
+ name, value = pairs[0]
+ attrs = []
+
+ for k, v in pairs[1:]:
+ if k.lower() in _cookie_params:
+ attrs.append((k, v))
+ else:
+ cookie_list.append((name, value, CookieAttrs(attrs)))
+ name, value, attrs = k, v, []
+
+ cookie_list.append((name, value, CookieAttrs(attrs)))
+ return cookie_list