diff options
author | Maximilian Hils <git@maximilianhils.com> | 2015-09-12 17:03:09 +0200 |
---|---|---|
committer | Maximilian Hils <git@maximilianhils.com> | 2015-09-12 17:03:09 +0200 |
commit | 997fcde8ce94be9d8decddd4bc783106dbb41ab3 (patch) | |
tree | c668d1e49dcc9298bed2a85de136e7f053d72b17 /netlib | |
parent | a38142d5950a899c6e3f854841a45f4785515761 (diff) | |
download | mitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.tar.gz mitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.tar.bz2 mitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.zip |
make clean_bin unicode-aware
Diffstat (limited to 'netlib')
-rw-r--r-- | netlib/utils.py | 39 | ||||
-rw-r--r-- | netlib/websockets/frame.py | 2 |
2 files changed, 26 insertions, 15 deletions
diff --git a/netlib/utils.py b/netlib/utils.py index aae187da..d6774419 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -5,6 +5,8 @@ import urllib import urlparse import string import re +import six +import unicodedata def isascii(s): @@ -20,22 +22,31 @@ def bytes_to_int(i): return int(i.encode('hex'), 16) -def cleanBin(s, fixspacing=False): +def clean_bin(s, keep_spacing=True): """ - Cleans binary data to make it safe to display. If fixspacing is True, - tabs, newlines and so forth will be maintained, if not, they will be - replaced with a placeholder. + Cleans binary data to make it safe to display. + + Args: + keep_spacing: If False, tabs and newlines will also be replaced. """ - parts = [] - for i in s: - o = ord(i) - if (o > 31 and o < 127): - parts.append(i) - elif i in "\n\t" and not fixspacing: - parts.append(i) + if isinstance(s, six.text_type): + if keep_spacing: + keep = u" \n\r\t" + else: + keep = u" " + return u"".join( + ch if (unicodedata.category(ch)[0] not in "CZ" or ch in keep) else u"." + for ch in s + ) + else: + if keep_spacing: + keep = b"\n\r\t" else: - parts.append(".") - return "".join(parts) + keep = b"" + return b"".join( + ch if (31 < ord(ch) < 127 or ch in keep) else b"." + for ch in s + ) def hexdump(s): @@ -52,7 +63,7 @@ def hexdump(s): x += " " x += " ".join(" " for i in range(16 - len(part))) parts.append( - (o, x, cleanBin(part, True)) + (o, x, clean_bin(part, False)) ) return parts diff --git a/netlib/websockets/frame.py b/netlib/websockets/frame.py index 1c4a03b2..e3ff1405 100644 --- a/netlib/websockets/frame.py +++ b/netlib/websockets/frame.py @@ -236,7 +236,7 @@ class Frame(object): def human_readable(self): ret = self.header.human_readable() if self.payload: - ret = ret + "\nPayload:\n" + utils.cleanBin(self.payload) + ret = ret + "\nPayload:\n" + utils.clean_bin(self.payload) return ret def __repr__(self): |