aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/cookies.py
blob: 18544b5e3f57b6ea17638150b49a26124e4a48e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import re

from .. import odict

"""
A flexible module for cookie parsing and manipulation.

This module differs from usual standards-compliant cookie modules in a number
of ways. We try to be as permissive as possible, and to retain even mal-formed
information. Duplicate cookies are preserved in parsing, and can be set in
formatting. We do attempt to escape and quote values where needed, but will not
reject data that violate the specs.

Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We do
not parse the comma-separated variant of Set-Cookie that allows multiple
cookies to be set in a single header. Technically this should be feasible, but
it turns out that violations of RFC6265 that makes the parsing problem
indeterminate are much more common than genuine occurences of the multi-cookie
variants. Serialization follows RFC6265.

    http://tools.ietf.org/html/rfc6265
    http://tools.ietf.org/html/rfc2109
    http://tools.ietf.org/html/rfc2965
"""

# TODO: Disallow LHS-only Cookie values


def _read_until(s, start, term):
    """
        Read until one of the characters in term is reached.
    """
    if start == len(s):
        return "", start + 1
    for i in range(start, len(s)):
        if s[i] in term:
            return s[start:i], i
    return s[start:i + 1], i + 1


def _read_token(s, start):
    """
        Read a token - the LHS of a token/value pair in a cookie.
    """
    return _read_until(s, start, ";=")


def _read_quoted_string(s, start):
    """
        start: offset to the first quote of the string to be read

        A sort of loose super-set of the various quoted string specifications.

        RFC6265 disallows backslashes or double quotes within quoted strings.
        Prior RFCs use backslashes to escape. This leaves us free to apply
        backslash escaping by default and be compatible with everything.
    """
    escaping = False
    ret = []
    # Skip the first quote
    i = start  # initialize in case the loop doesn't run.
    for i in range(start + 1, len(s)):
        if escaping:
            ret.append(s[i])
            escaping = False
        elif s[i] == '"':
            break
        elif s[i] == "\\":
            escaping = True
        else:
            ret.append(s[i])
    return "".join(ret), i + 1


def _read_value(s, start, delims):
    """
        Reads a value - the RHS of a token/value pair in a cookie.

        special: If the value is special, commas are premitted. Else comma
        terminates. This helps us support old and new style values.
    """
    if start >= len(s):
        return "", start
    elif s[start] == '"':
        return _read_quoted_string(s, start)
    else:
        return _read_until(s, start, delims)


def _read_pairs(s, off=0):
    """
        Read pairs of lhs=rhs values.

        off: start offset
        specials: a lower-cased list of keys that may contain commas
    """
    vals = []
    while True:
        lhs, off = _read_token(s, off)
        lhs = lhs.lstrip()
        if lhs:
            rhs = None
            if off < len(s):
                if s[off] == "=":
                    rhs, off = _read_value(s, off + 1, ";")
            vals.append([lhs, rhs])
        off += 1
        if not off < len(s):
            break
    return vals, off


def _has_special(s):
    for i in s:
        if i in '",;\\':
            return True
        o = ord(i)
        if o < 0x21 or o > 0x7e:
            return True
    return False


ESCAPE = re.compile(r"([\"\\])")


def _format_pairs(lst, specials=(), sep="; "):
    """
        specials: A lower-cased list of keys that will not be quoted.
    """
    vals = []
    for k, v in lst:
        if v is None:
            vals.append(k)
        else:
            if k.lower() not in specials and _has_special(v):
                v = ESCAPE.sub(r"\\\1", v)
                v = '"%s"' % v
            vals.append("%s=%s" % (k, v))
    return sep.join(vals)


def _format_set_cookie_pairs(lst):
    return _format_pairs(
        lst,
        specials=("expires", "path")
    )


def _parse_set_cookie_pairs(s):
    """
        For Set-Cookie, we support multiple cookies as described in RFC2109.
        This function therefore returns a list of lists.
    """
    pairs, off_ = _read_pairs(s)
    return pairs


def parse_set_cookie_header(line):
    """
        Parse a Set-Cookie header value

        Returns a (name, value, attrs) tuple, or None, where attrs is an
        ODictCaseless set of attributes. No attempt is made to parse attribute
        values - they are treated purely as strings.
    """
    pairs = _parse_set_cookie_pairs(line)
    if pairs:
        return pairs[0][0], pairs[0][1], odict.ODictCaseless(pairs[1:])


def format_set_cookie_header(name, value, attrs):
    """
        Formats a Set-Cookie header value.
    """
    pairs = [[name, value]]
    pairs.extend(attrs.lst)
    return _format_set_cookie_pairs(pairs)


def parse_cookie_header(line):
    """
        Parse a Cookie header value.
        Returns a (possibly empty) ODict object.
    """
    pairs, off_ = _read_pairs(line)
    return odict.ODict(pairs)


def format_cookie_header(od):
    """
        Formats a Cookie header value.
    """
    return _format_pairs(od.lst)