aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/http/headers.py
blob: 72739f900153011109729a8e39a7169a4edead94 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
"""

Unicode Handling
----------------
See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/
"""
from __future__ import absolute_import, print_function, division

import re

try:
    from collections.abc import MutableMapping
except ImportError:  # pragma: no cover
    from collections import MutableMapping  # Workaround for Python < 3.3


import six

from netlib.utils import always_byte_args, always_bytes, Serializable

if six.PY2:  # pragma: no cover
    _native = lambda x: x
    _always_bytes = lambda x: x
    _always_byte_args = lambda x: x
else:
    # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
    _native = lambda x: x.decode("utf-8", "surrogateescape")
    _always_bytes = lambda x: always_bytes(x, "utf-8", "surrogateescape")
    _always_byte_args = always_byte_args("utf-8", "surrogateescape")


class Headers(MutableMapping, Serializable):
    """
    Header class which allows both convenient access to individual headers as well as
    direct access to the underlying raw data. Provides a full dictionary interface.

    Example:

    .. code-block:: python

        # Create headers with keyword arguments
        >>> h = Headers(host="example.com", content_type="application/xml")

        # Headers mostly behave like a normal dict.
        >>> h["Host"]
        "example.com"

        # HTTP Headers are case insensitive
        >>> h["host"]
        "example.com"

        # Headers can also be creatd from a list of raw (header_name, header_value) byte tuples
        >>> h = Headers([
            [b"Host",b"example.com"],
            [b"Accept",b"text/html"],
            [b"accept",b"application/xml"]
        ])

        # Multiple headers are folded into a single header as per RFC7230
        >>> h["Accept"]
        "text/html, application/xml"

        # Setting a header removes all existing headers with the same name.
        >>> h["Accept"] = "application/text"
        >>> h["Accept"]
        "application/text"

        # bytes(h) returns a HTTP1 header block.
        >>> print(bytes(h))
        Host: example.com
        Accept: application/text

        # For full control, the raw header fields can be accessed
        >>> h.fields

    Caveats:
        For use with the "Set-Cookie" header, see :py:meth:`get_all`.
    """

    @_always_byte_args
    def __init__(self, fields=None, **headers):
        """
        Args:
            fields: (optional) list of ``(name, value)`` header byte tuples,
                e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes.
            **headers: Additional headers to set. Will overwrite existing values from `fields`.
                For convenience, underscores in header names will be transformed to dashes -
                this behaviour does not extend to other methods.
                If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
                the behavior is undefined.
        """
        self.fields = fields or []

        for name, value in self.fields:
            if not isinstance(name, bytes) or not isinstance(value, bytes):
                raise ValueError("Headers passed as fields must be bytes.")

        # content_type -> content-type
        headers = {
            _always_bytes(name).replace(b"_", b"-"): value
            for name, value in six.iteritems(headers)
            }
        self.update(headers)

    def __bytes__(self):
        if self.fields:
            return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"
        else:
            return b""

    if six.PY2:  # pragma: no cover
        __str__ = __bytes__

    @_always_byte_args
    def __getitem__(self, name):
        values = self.get_all(name)
        if not values:
            raise KeyError(name)
        return ", ".join(values)

    @_always_byte_args
    def __setitem__(self, name, value):
        idx = self._index(name)

        # To please the human eye, we insert at the same position the first existing header occured.
        if idx is not None:
            del self[name]
            self.fields.insert(idx, [name, value])
        else:
            self.fields.append([name, value])

    @_always_byte_args
    def __delitem__(self, name):
        if name not in self:
            raise KeyError(name)
        name = name.lower()
        self.fields = [
            field for field in self.fields
            if name != field[0].lower()
        ]

    def __iter__(self):
        seen = set()
        for name, _ in self.fields:
            name_lower = name.lower()
            if name_lower not in seen:
                seen.add(name_lower)
                yield _native(name)

    def __len__(self):
        return len(set(name.lower() for name, _ in self.fields))

    # __hash__ = object.__hash__

    def _index(self, name):
        name = name.lower()
        for i, field in enumerate(self.fields):
            if field[0].lower() == name:
                return i
        return None

    def __eq__(self, other):
        if isinstance(other, Headers):
            return self.fields == other.fields
        return False

    def __ne__(self, other):
        return not self.__eq__(other)

    @_always_byte_args
    def get_all(self, name):
        """
        Like :py:meth:`get`, but does not fold multiple headers into a single one.
        This is useful for Set-Cookie headers, which do not support folding.

        See also: https://tools.ietf.org/html/rfc7230#section-3.2.2
        """
        name_lower = name.lower()
        values = [_native(value) for n, value in self.fields if n.lower() == name_lower]
        return values

    @_always_byte_args
    def set_all(self, name, values):
        """
        Explicitly set multiple headers for the given key.
        See: :py:meth:`get_all`
        """
        values = map(_always_bytes, values)  # _always_byte_args does not fix lists
        if name in self:
            del self[name]
        self.fields.extend(
            [name, value] for value in values
        )

    def get_state(self):
        return tuple(tuple(field) for field in self.fields)

    def set_state(self, state):
        self.fields = [list(field) for field in state]

    @classmethod
    def from_state(cls, state):
        return cls([list(field) for field in state])

    @_always_byte_args
    def replace(self, pattern, repl, flags=0):
        """
        Replaces a regular expression pattern with repl in each "name: value"
        header line.

        Returns:
            The number of replacements made.
        """
        pattern = re.compile(pattern, flags)
        replacements = 0

        fields = []
        for name, value in self.fields:
            line, n = pattern.subn(repl, name + b": " + value)
            try:
                name, value = line.split(b": ", 1)
            except ValueError:
                # We get a ValueError if the replacement removed the ": "
                # There's not much we can do about this, so we just keep the header as-is.
                pass
            else:
                replacements += n
            fields.append([name, value])
        self.fields = fields
        return replacements