From 806aa0f41c7816b2859a6961939ed19499b73fe7 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 2 Apr 2016 14:38:33 +0200 Subject: improve .replace() and move it into netlib --- netlib/http/headers.py | 31 ++++++++++++++++++++++++++++++- netlib/http/message.py | 19 +++++++++++++++++++ netlib/http/request.py | 17 +++++++++++++++++ netlib/odict.py | 12 +----------- netlib/utils.py | 10 ++++++++++ 5 files changed, 77 insertions(+), 12 deletions(-) (limited to 'netlib') diff --git a/netlib/http/headers.py b/netlib/http/headers.py index bcb828da..72739f90 100644 --- a/netlib/http/headers.py +++ b/netlib/http/headers.py @@ -6,6 +6,8 @@ See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/ """ from __future__ import absolute_import, print_function, division +import re + try: from collections.abc import MutableMapping except ImportError: # pragma: no cover @@ -198,4 +200,31 @@ class Headers(MutableMapping, Serializable): @classmethod def from_state(cls, state): - return cls([list(field) for field in state]) \ No newline at end of file + return cls([list(field) for field in state]) + + @_always_byte_args + def replace(self, pattern, repl, flags=0): + """ + Replaces a regular expression pattern with repl in each "name: value" + header line. + + Returns: + The number of replacements made. + """ + pattern = re.compile(pattern, flags) + replacements = 0 + + fields = [] + for name, value in self.fields: + line, n = pattern.subn(repl, name + b": " + value) + try: + name, value = line.split(b": ", 1) + except ValueError: + # We get a ValueError if the replacement removed the ": " + # There's not much we can do about this, so we just keep the header as-is. + pass + else: + replacements += n + fields.append([name, value]) + self.fields = fields + return replacements diff --git a/netlib/http/message.py b/netlib/http/message.py index b265ac4f..da9681a0 100644 --- a/netlib/http/message.py +++ b/netlib/http/message.py @@ -175,6 +175,25 @@ class Message(utils.Serializable): self.headers["content-encoding"] = e return True + def replace(self, pattern, repl, flags=0): + """ + Replaces a regular expression pattern with repl in both the headers + and the body of the message. Encoded body will be decoded + before replacement, and re-encoded afterwards. + + Returns: + The number of replacements made. + """ + # TODO: Proper distinction between text and bytes. + replacements = 0 + if self.content: + with decoded(self): + self.content, replacements = utils.safe_subn( + pattern, repl, self.content, flags=flags + ) + replacements += self.headers.replace(pattern, repl, flags) + return replacements + # Legacy @property diff --git a/netlib/http/request.py b/netlib/http/request.py index 5bd2547e..07a11969 100644 --- a/netlib/http/request.py +++ b/netlib/http/request.py @@ -54,6 +54,23 @@ class Request(Message): self.method, hostport, path ) + def replace(self, pattern, repl, flags=0): + """ + Replaces a regular expression pattern with repl in the headers, the + request path and the body of the request. Encoded content will be + decoded before replacement, and re-encoded afterwards. + + Returns: + The number of replacements made. + """ + # TODO: Proper distinction between text and bytes. + c = super(Request, self).replace(pattern, repl, flags) + self.path, pc = utils.safe_subn( + pattern, repl, self.path, flags=flags + ) + c += pc + return c + @property def first_line_format(self): """ diff --git a/netlib/odict.py b/netlib/odict.py index 1e6e381a..461192f7 100644 --- a/netlib/odict.py +++ b/netlib/odict.py @@ -1,18 +1,8 @@ from __future__ import (absolute_import, print_function, division) -import re import copy import six -from .utils import Serializable - - -def safe_subn(pattern, repl, target, *args, **kwargs): - """ - There are Unicode conversion problems with re.subn. We try to smooth - that over by casting the pattern and replacement to strings. We really - need a better solution that is aware of the actual content ecoding. - """ - return re.subn(str(pattern), str(repl), target, *args, **kwargs) +from .utils import Serializable, safe_subn class ODict(Serializable): diff --git a/netlib/utils.py b/netlib/utils.py index 09be29d9..dda76808 100644 --- a/netlib/utils.py +++ b/netlib/utils.py @@ -414,8 +414,18 @@ def http2_read_raw_frame(rfile): body = rfile.safe_read(length) return [header, body] + def http2_read_frame(rfile): header, body = http2_read_raw_frame(rfile) frame, length = hyperframe.frame.Frame.parse_frame_header(header) frame.parse_body(memoryview(body)) return frame + + +def safe_subn(pattern, repl, target, *args, **kwargs): + """ + There are Unicode conversion problems with re.subn. We try to smooth + that over by casting the pattern and replacement to strings. We really + need a better solution that is aware of the actual content ecoding. + """ + return re.subn(str(pattern), str(repl), target, *args, **kwargs) -- cgit v1.2.3