aboutsummaryrefslogtreecommitdiffstats
path: root/netlib
diff options
context:
space:
mode:
authorMaximilian Hils <git@maximilianhils.com>2015-09-12 17:03:09 +0200
committerMaximilian Hils <git@maximilianhils.com>2015-09-12 17:03:09 +0200
commit997fcde8ce94be9d8decddd4bc783106dbb41ab3 (patch)
treec668d1e49dcc9298bed2a85de136e7f053d72b17 /netlib
parenta38142d5950a899c6e3f854841a45f4785515761 (diff)
downloadmitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.tar.gz
mitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.tar.bz2
mitmproxy-997fcde8ce94be9d8decddd4bc783106dbb41ab3.zip
make clean_bin unicode-aware
Diffstat (limited to 'netlib')
-rw-r--r--netlib/utils.py39
-rw-r--r--netlib/websockets/frame.py2
2 files changed, 26 insertions, 15 deletions
diff --git a/netlib/utils.py b/netlib/utils.py
index aae187da..d6774419 100644
--- a/netlib/utils.py
+++ b/netlib/utils.py
@@ -5,6 +5,8 @@ import urllib
import urlparse
import string
import re
+import six
+import unicodedata
def isascii(s):
@@ -20,22 +22,31 @@ def bytes_to_int(i):
return int(i.encode('hex'), 16)
-def cleanBin(s, fixspacing=False):
+def clean_bin(s, keep_spacing=True):
"""
- Cleans binary data to make it safe to display. If fixspacing is True,
- tabs, newlines and so forth will be maintained, if not, they will be
- replaced with a placeholder.
+ Cleans binary data to make it safe to display.
+
+ Args:
+ keep_spacing: If False, tabs and newlines will also be replaced.
"""
- parts = []
- for i in s:
- o = ord(i)
- if (o > 31 and o < 127):
- parts.append(i)
- elif i in "\n\t" and not fixspacing:
- parts.append(i)
+ if isinstance(s, six.text_type):
+ if keep_spacing:
+ keep = u" \n\r\t"
+ else:
+ keep = u" "
+ return u"".join(
+ ch if (unicodedata.category(ch)[0] not in "CZ" or ch in keep) else u"."
+ for ch in s
+ )
+ else:
+ if keep_spacing:
+ keep = b"\n\r\t"
else:
- parts.append(".")
- return "".join(parts)
+ keep = b""
+ return b"".join(
+ ch if (31 < ord(ch) < 127 or ch in keep) else b"."
+ for ch in s
+ )
def hexdump(s):
@@ -52,7 +63,7 @@ def hexdump(s):
x += " "
x += " ".join(" " for i in range(16 - len(part)))
parts.append(
- (o, x, cleanBin(part, True))
+ (o, x, clean_bin(part, False))
)
return parts
diff --git a/netlib/websockets/frame.py b/netlib/websockets/frame.py
index 1c4a03b2..e3ff1405 100644
--- a/netlib/websockets/frame.py
+++ b/netlib/websockets/frame.py
@@ -236,7 +236,7 @@ class Frame(object):
def human_readable(self):
ret = self.header.human_readable()
if self.payload:
- ret = ret + "\nPayload:\n" + utils.cleanBin(self.payload)
+ ret = ret + "\nPayload:\n" + utils.clean_bin(self.payload)
return ret
def __repr__(self):