diff options
author | Aldo Cortesi <aldo@nullcube.com> | 2015-05-02 16:17:00 +1200 |
---|---|---|
committer | Aldo Cortesi <aldo@nullcube.com> | 2015-05-02 16:17:00 +1200 |
commit | 9109b3cc8cca05f34d3ddee062cf7f8bc401af31 (patch) | |
tree | 2d1f2f4ed115a9bc7c26eb87e697b34fe860368f /libpathod/language/base.py | |
parent | 601cdf70c7339a59537cc8402e4f2648f398b28d (diff) | |
download | mitmproxy-9109b3cc8cca05f34d3ddee062cf7f8bc401af31.tar.gz mitmproxy-9109b3cc8cca05f34d3ddee062cf7f8bc401af31.tar.bz2 mitmproxy-9109b3cc8cca05f34d3ddee062cf7f8bc401af31.zip |
Massive refactoring to split up language implementation.
Diffstat (limited to 'libpathod/language/base.py')
-rw-r--r-- | libpathod/language/base.py | 751 |
1 files changed, 751 insertions, 0 deletions
diff --git a/libpathod/language/base.py b/libpathod/language/base.py new file mode 100644 index 00000000..407d5473 --- /dev/null +++ b/libpathod/language/base.py @@ -0,0 +1,751 @@ +import operator +import random +import os +import copy +import abc +import contrib.pyparsing as pp +from netlib import http_uastrings + +from .. import utils +from . import generators, exceptions + +TRUNCATE = 1024 + + +def quote(s): + quotechar = s[0] + s = s[1:-1] + s = s.replace(quotechar, "\\" + quotechar) + return quotechar + s + quotechar + + +v_integer = pp.Word(pp.nums)\ + .setName("integer")\ + .setParseAction(lambda toks: int(toks[0])) + + +v_literal = pp.MatchFirst( + [ + pp.QuotedString( + "\"", + escChar="\\", + unquoteResults=True, + multiline=True + ), + pp.QuotedString( + "'", + escChar="\\", + unquoteResults=True, + multiline=True + ), + ] +) + +v_naked_literal = pp.MatchFirst( + [ + v_literal, + pp.Word("".join(i for i in pp.printables if i not in ",:\n@\'\"")) + ] +) + + +class _Token(object): + """ + A specification token. Tokens are immutable. + """ + __metaclass__ = abc.ABCMeta + + @classmethod + def expr(klass): # pragma: no cover + """ + A parse expression. + """ + return None + + @abc.abstractmethod + def spec(self): # pragma: no cover + """ + A parseable specification for this token. + """ + return None + + def resolve(self, settings, msg): + """ + Resolves this token to ready it for transmission. This means that + the calculated offsets of actions are fixed. + """ + return self + + def __repr__(self): + return self.spec() + + +class _ValueLiteral(_Token): + def __init__(self, val): + self.val = val.decode("string_escape") + + def get_generator(self, settings): + return generators.LiteralGenerator(self.val) + + def freeze(self, settings): + return self + + +class ValueLiteral(_ValueLiteral): + @classmethod + def expr(klass): + e = v_literal.copy() + return e.setParseAction(klass.parseAction) + + @classmethod + def parseAction(klass, x): + v = klass(*x) + return v + + def spec(self): + ret = "'%s'"%self.val.encode("string_escape") + return ret + + +class ValueNakedLiteral(_ValueLiteral): + @classmethod + def expr(klass): + e = v_naked_literal.copy() + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return self.val.encode("string_escape") + + +class ValueGenerate(_Token): + def __init__(self, usize, unit, datatype): + if not unit: + unit = "b" + self.usize, self.unit, self.datatype = usize, unit, datatype + + def bytes(self): + return self.usize * utils.SIZE_UNITS[self.unit] + + def get_generator(self, settings): + return generators.RandomGenerator(self.datatype, self.bytes()) + + def freeze(self, settings): + g = self.get_generator(settings) + return ValueLiteral(g[:].encode("string_escape")) + + @classmethod + def expr(klass): + e = pp.Literal("@").suppress() + v_integer + + u = reduce( + operator.or_, + [pp.Literal(i) for i in utils.SIZE_UNITS.keys()] + ).leaveWhitespace() + e = e + pp.Optional(u, default=None) + + s = pp.Literal(",").suppress() + s += reduce( + operator.or_, + [pp.Literal(i) for i in generators.DATATYPES.keys()] + ) + e += pp.Optional(s, default="bytes") + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + s = "@%s"%self.usize + if self.unit != "b": + s += self.unit + if self.datatype != "bytes": + s += ",%s"%self.datatype + return s + + +class ValueFile(_Token): + def __init__(self, path): + self.path = str(path) + + @classmethod + def expr(klass): + e = pp.Literal("<").suppress() + e = e + v_naked_literal + return e.setParseAction(lambda x: klass(*x)) + + def freeze(self, settings): + return self + + def get_generator(self, settings): + if not settings.staticdir: + raise exceptions.FileAccessDenied("File access disabled.") + s = os.path.expanduser(self.path) + s = os.path.normpath( + os.path.abspath(os.path.join(settings.staticdir, s)) + ) + uf = settings.unconstrained_file_access + if not uf and not s.startswith(settings.staticdir): + raise exceptions.FileAccessDenied( + "File access outside of configured directory" + ) + if not os.path.isfile(s): + raise exceptions.FileAccessDenied("File not readable") + return generators.FileGenerator(s) + + def spec(self): + return "<'%s'"%self.path.encode("string_escape") + + +Value = pp.MatchFirst( + [ + ValueGenerate.expr(), + ValueFile.expr(), + ValueLiteral.expr() + ] +) + + +NakedValue = pp.MatchFirst( + [ + ValueGenerate.expr(), + ValueFile.expr(), + ValueLiteral.expr(), + ValueNakedLiteral.expr(), + ] +) + + +Offset = pp.MatchFirst( + [ + v_integer, + pp.Literal("r"), + pp.Literal("a") + ] +) + + +class Raw(_Token): + @classmethod + def expr(klass): + e = pp.Literal("r").suppress() + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "r" + + def freeze(self, settings): + return self + + +class _Component(_Token): + """ + A value component of the primary specification of an HTTP message. + """ + @abc.abstractmethod + def values(self, settings): # pragma: no cover + """ + A sequence of value objects. + """ + return None + + def string(self, settings=None): + """ + A string representation of the object. + """ + return "".join(i[:] for i in self.values(settings or {})) + + +class _Header(_Component): + def __init__(self, key, value): + self.key, self.value = key, value + + def values(self, settings): + return [ + self.key.get_generator(settings), + ": ", + self.value.get_generator(settings), + "\r\n", + ] + + +class Header(_Header): + @classmethod + def expr(klass): + e = pp.Literal("h").suppress() + e += Value + e += pp.Literal("=").suppress() + e += Value + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "h%s=%s"%(self.key.spec(), self.value.spec()) + + def freeze(self, settings): + return Header(self.key.freeze(settings), self.value.freeze(settings)) + + +class ShortcutContentType(_Header): + def __init__(self, value): + _Header.__init__(self, ValueLiteral("Content-Type"), value) + + @classmethod + def expr(klass): + e = pp.Literal("c").suppress() + e = e + Value + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "c%s"%(self.value.spec()) + + def freeze(self, settings): + return ShortcutContentType(self.value.freeze(settings)) + + +class ShortcutLocation(_Header): + def __init__(self, value): + _Header.__init__(self, ValueLiteral("Location"), value) + + @classmethod + def expr(klass): + e = pp.Literal("l").suppress() + e = e + Value + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "l%s"%(self.value.spec()) + + def freeze(self, settings): + return ShortcutLocation(self.value.freeze(settings)) + + +class ShortcutUserAgent(_Header): + def __init__(self, value): + self.specvalue = value + if isinstance(value, basestring): + value = ValueLiteral(http_uastrings.get_by_shortcut(value)[2]) + _Header.__init__(self, ValueLiteral("User-Agent"), value) + + @classmethod + def expr(klass): + e = pp.Literal("u").suppress() + u = reduce( + operator.or_, + [pp.Literal(i[1]) for i in http_uastrings.UASTRINGS] + ) + e += u | Value + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "u%s"%self.specvalue + + def freeze(self, settings): + return ShortcutUserAgent(self.value.freeze(settings)) + + +class Body(_Component): + def __init__(self, value): + self.value = value + + @classmethod + def expr(klass): + e = pp.Literal("b").suppress() + e = e + Value + return e.setParseAction(lambda x: klass(*x)) + + def values(self, settings): + return [ + self.value.get_generator(settings), + ] + + def spec(self): + return "b%s"%(self.value.spec()) + + def freeze(self, settings): + return Body(self.value.freeze(settings)) + + +class PathodSpec(_Token): + def __init__(self, value): + self.value = value + try: + import http + self.parsed = http.Response( + http.Response.expr().parseString( + value.val, + parseAll=True + ) + ) + except pp.ParseException, v: + raise exceptions.ParseException(v.msg, v.line, v.col) + + @classmethod + def expr(klass): + e = pp.Literal("s").suppress() + e = e + ValueLiteral.expr() + return e.setParseAction(lambda x: klass(*x)) + + def values(self, settings): + return [ + self.value.get_generator(settings), + ] + + def spec(self): + return "s%s"%(self.value.spec()) + + def freeze(self, settings): + f = self.parsed.freeze(settings).spec() + return PathodSpec(ValueLiteral(f.encode("string_escape"))) + + +class Path(_Component): + def __init__(self, value): + if isinstance(value, basestring): + value = ValueLiteral(value) + self.value = value + + @classmethod + def expr(klass): + e = Value | NakedValue + return e.setParseAction(lambda x: klass(*x)) + + def values(self, settings): + return [ + self.value.get_generator(settings), + ] + + def spec(self): + return "%s"%(self.value.spec()) + + def freeze(self, settings): + return Path(self.value.freeze(settings)) + + +class _Token(_Component): + def __init__(self, value): + self.value = value + + @classmethod + def expr(klass): + spec = pp.CaselessLiteral(klass.TOK) + spec = spec.setParseAction(lambda x: klass(*x)) + return spec + + def values(self, settings): + return self.TOK + + def spec(self): + return self.TOK + + def freeze(self, settings): + return self + + +class WS(_Token): + TOK = "ws" + + +class WF(_Token): + TOK = "wf" + + +class Method(_Component): + methods = [ + "get", + "head", + "post", + "put", + "delete", + "options", + "trace", + "connect", + ] + + def __init__(self, value): + # If it's a string, we were passed one of the methods, so we upper-case + # it to be canonical. The user can specify a different case by using a + # string value literal. + if isinstance(value, basestring): + value = ValueLiteral(value.upper()) + self.value = value + + @classmethod + def expr(klass): + parts = [pp.CaselessLiteral(i) for i in klass.methods] + m = pp.MatchFirst(parts) + spec = m | Value.copy() + spec = spec.setParseAction(lambda x: klass(*x)) + return spec + + def values(self, settings): + return [ + self.value.get_generator(settings) + ] + + def spec(self): + s = self.value.spec() + if s[1:-1].lower() in self.methods: + s = s[1:-1].lower() + return "%s"%s + + def freeze(self, settings): + return Method(self.value.freeze(settings)) + + +class Code(_Component): + def __init__(self, code): + self.code = str(code) + + @classmethod + def expr(klass): + e = v_integer.copy() + return e.setParseAction(lambda x: klass(*x)) + + def values(self, settings): + return [generators.LiteralGenerator(self.code)] + + def spec(self): + return "%s"%(self.code) + + def freeze(self, settings): + return Code(self.code) + + +class Reason(_Component): + def __init__(self, value): + self.value = value + + @classmethod + def expr(klass): + e = pp.Literal("m").suppress() + e = e + Value + return e.setParseAction(lambda x: klass(*x)) + + def values(self, settings): + return [self.value.get_generator(settings)] + + def spec(self): + return "m%s"%(self.value.spec()) + + def freeze(self, settings): + return Reason(self.value.freeze(settings)) + + +class _Action(_Token): + """ + An action that operates on the raw data stream of the message. All + actions have one thing in common: an offset that specifies where the + action should take place. + """ + def __init__(self, offset): + self.offset = offset + + def resolve(self, settings, msg): + """ + Resolves offset specifications to a numeric offset. Returns a copy + of the action object. + """ + c = copy.copy(self) + l = msg.length(settings) + if c.offset == "r": + c.offset = random.randrange(l) + elif c.offset == "a": + c.offset = l + 1 + return c + + def __cmp__(self, other): + return cmp(self.offset, other.offset) + + def __repr__(self): + return self.spec() + + @abc.abstractmethod + def spec(self): # pragma: no cover + pass + + @abc.abstractmethod + def intermediate(self, settings): # pragma: no cover + pass + + +class PauseAt(_Action): + def __init__(self, offset, seconds): + _Action.__init__(self, offset) + self.seconds = seconds + + @classmethod + def expr(klass): + e = pp.Literal("p").suppress() + e += Offset + e += pp.Literal(",").suppress() + e += pp.MatchFirst( + [ + v_integer, + pp.Literal("f") + ] + ) + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "p%s,%s"%(self.offset, self.seconds) + + def intermediate(self, settings): + return (self.offset, "pause", self.seconds) + + def freeze(self, settings): + return self + + +class DisconnectAt(_Action): + def __init__(self, offset): + _Action.__init__(self, offset) + + @classmethod + def expr(klass): + e = pp.Literal("d").suppress() + e += Offset + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "d%s"%self.offset + + def intermediate(self, settings): + return (self.offset, "disconnect") + + def freeze(self, settings): + return self + + +class InjectAt(_Action): + def __init__(self, offset, value): + _Action.__init__(self, offset) + self.value = value + + @classmethod + def expr(klass): + e = pp.Literal("i").suppress() + e += Offset + e += pp.Literal(",").suppress() + e += Value + return e.setParseAction(lambda x: klass(*x)) + + def spec(self): + return "i%s,%s"%(self.offset, self.value.spec()) + + def intermediate(self, settings): + return ( + self.offset, + "inject", + self.value.get_generator(settings) + ) + + def freeze(self, settings): + return InjectAt(self.offset, self.value.freeze(settings)) + + +class _Message(object): + __metaclass__ = abc.ABCMeta + logattrs = [] + + def __init__(self, tokens): + self.tokens = tokens + + def toks(self, klass): + """ + Fetch all tokens that are instances of klass + """ + return [i for i in self.tokens if isinstance(i, klass)] + + def tok(self, klass): + """ + Fetch first token that is an instance of klass + """ + l = self.toks(klass) + if l: + return l[0] + + @property + def raw(self): + return bool(self.tok(Raw)) + + @property + def actions(self): + return self.toks(_Action) + + @property + def body(self): + return self.tok(Body) + + @property + def headers(self): + return self.toks(_Header) + + def length(self, settings): + """ + Calculate the length of the base message without any applied + actions. + """ + return sum(len(x) for x in self.values(settings)) + + def preview_safe(self): + """ + Return a copy of this message that issafe for previews. + """ + tokens = [i for i in self.tokens if not isinstance(i, PauseAt)] + return self.__class__(tokens) + + def maximum_length(self, settings): + """ + Calculate the maximum length of the base message with all applied + actions. + """ + l = self.length(settings) + for i in self.actions: + if isinstance(i, InjectAt): + l += len(i.value.get_generator(settings)) + return l + + @classmethod + def expr(klass): # pragma: no cover + pass + + def log(self, settings): + """ + A dictionary that should be logged if this message is served. + """ + ret = {} + for i in self.logattrs: + v = getattr(self, i) + # Careful not to log any VALUE specs without sanitizing them first. + # We truncate at 1k. + if hasattr(v, "values"): + v = [x[:TRUNCATE] for x in v.values(settings)] + v = "".join(v).encode("string_escape") + elif hasattr(v, "__len__"): + v = v[:TRUNCATE] + v = v.encode("string_escape") + ret[i] = v + ret["spec"] = self.spec() + return ret + + def freeze(self, settings): + r = self.resolve(settings) + return self.__class__([i.freeze(settings) for i in r.tokens]) + + def __repr__(self): + return self.spec() + + +Sep = pp.Optional(pp.Literal(":")).suppress() + + +def read_file(settings, s): + uf = settings.get("unconstrained_file_access") + sd = settings.get("staticdir") + if not sd: + raise exceptions.FileAccessDenied("File access disabled.") + sd = os.path.normpath(os.path.abspath(sd)) + s = s[1:] + s = os.path.expanduser(s) + s = os.path.normpath(os.path.abspath(os.path.join(sd, s))) + if not uf and not s.startswith(sd): + raise exceptions.FileAccessDenied("File access outside of configured directory") + if not os.path.isfile(s): + raise exceptions.FileAccessDenied("File not readable") + return file(s, "rb").read() |