aboutsummaryrefslogtreecommitdiffstats
path: root/netlib/encoding.py
diff options
context:
space:
mode:
authorAldo Cortesi <aldo@nullcube.com>2016-10-20 11:06:57 +1300
committerAldo Cortesi <aldo@nullcube.com>2016-10-20 11:06:57 +1300
commit301d52d9d05f2c5f074fe68c73acc1c32e518020 (patch)
treef55ac41b1deb5ea0a3baa63930c0ef5193f3886f /netlib/encoding.py
parentf964d49853a3f0d22e0f6d4cff7cfbc49008e40e (diff)
downloadmitmproxy-301d52d9d05f2c5f074fe68c73acc1c32e518020.tar.gz
mitmproxy-301d52d9d05f2c5f074fe68c73acc1c32e518020.tar.bz2
mitmproxy-301d52d9d05f2c5f074fe68c73acc1c32e518020.zip
netlib.encoding -> netlib.http.encoding
Encoding is highly specific to http, and only used within this module.
Diffstat (limited to 'netlib/encoding.py')
-rw-r--r--netlib/encoding.py175
1 files changed, 0 insertions, 175 deletions
diff --git a/netlib/encoding.py b/netlib/encoding.py
deleted file mode 100644
index e123a033..00000000
--- a/netlib/encoding.py
+++ /dev/null
@@ -1,175 +0,0 @@
-"""
-Utility functions for decoding response bodies.
-"""
-
-import codecs
-import collections
-from io import BytesIO
-
-import gzip
-import zlib
-import brotli
-
-from typing import Union
-
-
-# We have a shared single-element cache for encoding and decoding.
-# This is quite useful in practice, e.g.
-# flow.request.content = flow.request.content.replace(b"foo", b"bar")
-# does not require an .encode() call if content does not contain b"foo"
-CachedDecode = collections.namedtuple("CachedDecode", "encoded encoding errors decoded")
-_cache = CachedDecode(None, None, None, None)
-
-
-def decode(encoded: Union[str, bytes], encoding: str, errors: str='strict') -> Union[str, bytes]:
- """
- Decode the given input object
-
- Returns:
- The decoded value
-
- Raises:
- ValueError, if decoding fails.
- """
- if len(encoded) == 0:
- return encoded
-
- global _cache
- cached = (
- isinstance(encoded, bytes) and
- _cache.encoded == encoded and
- _cache.encoding == encoding and
- _cache.errors == errors
- )
- if cached:
- return _cache.decoded
- try:
- try:
- decoded = custom_decode[encoding](encoded)
- except KeyError:
- decoded = codecs.decode(encoded, encoding, errors)
- if encoding in ("gzip", "deflate", "br"):
- _cache = CachedDecode(encoded, encoding, errors, decoded)
- return decoded
- except TypeError:
- raise
- except Exception as e:
- raise ValueError("{} when decoding {} with {}: {}".format(
- type(e).__name__,
- repr(encoded)[:10],
- repr(encoding),
- repr(e),
- ))
-
-
-def encode(decoded: Union[str, bytes], encoding: str, errors: str='strict') -> Union[str, bytes]:
- """
- Encode the given input object
-
- Returns:
- The encoded value
-
- Raises:
- ValueError, if encoding fails.
- """
- if len(decoded) == 0:
- return decoded
-
- global _cache
- cached = (
- isinstance(decoded, bytes) and
- _cache.decoded == decoded and
- _cache.encoding == encoding and
- _cache.errors == errors
- )
- if cached:
- return _cache.encoded
- try:
- try:
- value = decoded
- if isinstance(value, str):
- value = decoded.encode()
- encoded = custom_encode[encoding](value)
- except KeyError:
- encoded = codecs.encode(decoded, encoding, errors)
- if encoding in ("gzip", "deflate", "br"):
- _cache = CachedDecode(encoded, encoding, errors, decoded)
- return encoded
- except TypeError:
- raise
- except Exception as e:
- raise ValueError("{} when encoding {} with {}: {}".format(
- type(e).__name__,
- repr(decoded)[:10],
- repr(encoding),
- repr(e),
- ))
-
-
-def identity(content):
- """
- Returns content unchanged. Identity is the default value of
- Accept-Encoding headers.
- """
- return content
-
-
-def decode_gzip(content):
- gfile = gzip.GzipFile(fileobj=BytesIO(content))
- return gfile.read()
-
-
-def encode_gzip(content):
- s = BytesIO()
- gf = gzip.GzipFile(fileobj=s, mode='wb')
- gf.write(content)
- gf.close()
- return s.getvalue()
-
-
-def decode_brotli(content):
- return brotli.decompress(content)
-
-
-def encode_brotli(content):
- return brotli.compress(content)
-
-
-def decode_deflate(content):
- """
- Returns decompressed data for DEFLATE. Some servers may respond with
- compressed data without a zlib header or checksum. An undocumented
- feature of zlib permits the lenient decompression of data missing both
- values.
-
- http://bugs.python.org/issue5784
- """
- try:
- return zlib.decompress(content)
- except zlib.error:
- return zlib.decompress(content, -15)
-
-
-def encode_deflate(content):
- """
- Returns compressed content, always including zlib header and checksum.
- """
- return zlib.compress(content)
-
-
-custom_decode = {
- "none": identity,
- "identity": identity,
- "gzip": decode_gzip,
- "deflate": decode_deflate,
- "br": decode_brotli,
-}
-custom_encode = {
- "none": identity,
- "identity": identity,
- "gzip": encode_gzip,
- "deflate": encode_deflate,
- "br": encode_brotli,
-}
-
-__all__ = ["encode", "decode"]