aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/encoding.py
blob: a6ae9a963016bd9fbc4d438d02913de63851c985 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""
Utility functions for decoding response bodies.
"""
from __future__ import absolute_import

import codecs
import collections
from io import BytesIO

import gzip
import zlib
import brotli

from typing import Union


# We have a shared single-element cache for encoding and decoding.
# This is quite useful in practice, e.g.
# flow.request.content = flow.request.content.replace(b"foo", b"bar")
# does not require an .encode() call if content does not contain b"foo"
CachedDecode = collections.namedtuple("CachedDecode", "encoded encoding errors decoded")
_cache = CachedDecode(None, None, None, None)


def decode(encoded: Union[str, bytes], encoding: str, errors: str='strict') -> Union[str, bytes]:
    """
    Decode the given input object

    Returns:
        The decoded value

    Raises:
        ValueError, if decoding fails.
    """
    if len(encoded) == 0:
        return encoded

    global _cache
    cached = (
        isinstance(encoded, bytes) and
        _cache.encoded == encoded and
        _cache.encoding == encoding and
        _cache.errors == errors
    )
    if cached:
        return _cache.decoded
    try:
        try:
            decoded = custom_decode[encoding](encoded)
        except KeyError:
            decoded = codecs.decode(encoded, encoding, errors)
        if encoding in ("gzip", "deflate", "br"):
            _cache = CachedDecode(encoded, encoding, errors, decoded)
        return decoded
    except TypeError:
        raise
    except Exception as e:
        raise ValueError("{} when decoding {} with {}: {}".format(
            type(e).__name__,
            repr(encoded)[:10],
            repr(encoding),
            repr(e),
        ))


def encode(decoded: Union[str, bytes], encoding: str, errors: str='strict') -> Union[str, bytes]:
    """
    Encode the given input object

    Returns:
        The encoded value

    Raises:
        ValueError, if encoding fails.
    """
    if len(decoded) == 0:
        return decoded

    global _cache
    cached = (
        isinstance(decoded, bytes) and
        _cache.decoded == decoded and
        _cache.encoding == encoding and
        _cache.errors == errors
    )
    if cached:
        return _cache.encoded
    try:
        try:
            value = decoded
            if isinstance(value, str):
                value = decoded.encode()
            encoded = custom_encode[encoding](value)
        except KeyError:
            encoded = codecs.encode(decoded, encoding, errors)
        if encoding in ("gzip", "deflate", "br"):
            _cache = CachedDecode(encoded, encoding, errors, decoded)
        return encoded
    except TypeError:
        raise
    except Exception as e:
        raise ValueError("{} when encoding {} with {}: {}".format(
            type(e).__name__,
            repr(decoded)[:10],
            repr(encoding),
            repr(e),
        ))


def identity(content):
    """
        Returns content unchanged. Identity is the default value of
        Accept-Encoding headers.
    """
    return content


def decode_gzip(content):
    gfile = gzip.GzipFile(fileobj=BytesIO(content))
    return gfile.read()


def encode_gzip(content):
    s = BytesIO()
    gf = gzip.GzipFile(fileobj=s, mode='wb')
    gf.write(content)
    gf.close()
    return s.getvalue()


def decode_brotli(content):
    return brotli.decompress(content)


def encode_brotli(content):
    return brotli.compress(content)


def decode_deflate(content):
    """
        Returns decompressed data for DEFLATE. Some servers may respond with
        compressed data without a zlib header or checksum. An undocumented
        feature of zlib permits the lenient decompression of data missing both
        values.

        http://bugs.python.org/issue5784
    """
    try:
        return zlib.decompress(content)
    except zlib.error:
        return zlib.decompress(content, -15)


def encode_deflate(content):
    """
        Returns compressed content, always including zlib header and checksum.
    """
    return zlib.compress(content)


custom_decode = {
    "none": identity,
    "identity": identity,
    "gzip": decode_gzip,
    "deflate": decode_deflate,
    "br": decode_brotli,
}
custom_encode = {
    "none": identity,
    "identity": identity,
    "gzip": encode_gzip,
    "deflate": encode_deflate,
    "br": encode_brotli,
}

__all__ = ["encode", "decode"]