diff options
Diffstat (limited to 'pathod/language')
-rw-r--r-- | pathod/language/__init__.py | 113 | ||||
-rw-r--r-- | pathod/language/actions.py | 126 | ||||
-rw-r--r-- | pathod/language/base.py | 576 | ||||
-rw-r--r-- | pathod/language/exceptions.py | 22 | ||||
-rw-r--r-- | pathod/language/generators.py | 86 | ||||
-rw-r--r-- | pathod/language/http.py | 381 | ||||
-rw-r--r-- | pathod/language/http2.py | 299 | ||||
-rw-r--r-- | pathod/language/message.py | 96 | ||||
-rw-r--r-- | pathod/language/websockets.py | 241 | ||||
-rw-r--r-- | pathod/language/writer.py | 67 |
10 files changed, 2007 insertions, 0 deletions
diff --git a/pathod/language/__init__.py b/pathod/language/__init__.py new file mode 100644 index 00000000..32199e08 --- /dev/null +++ b/pathod/language/__init__.py @@ -0,0 +1,113 @@ +import itertools +import time + +import pyparsing as pp + +from . import http, http2, websockets, writer, exceptions + +from exceptions import * +from base import Settings +assert Settings # prevent pyflakes from messing with this + + +def expand(msg): + times = getattr(msg, "times", None) + if times: + for j_ in xrange(int(times.value)): + yield msg.strike_token("times") + else: + yield msg + + +def parse_pathod(s, use_http2=False): + """ + May raise ParseException + """ + try: + s = s.decode("ascii") + except UnicodeError: + raise exceptions.ParseException("Spec must be valid ASCII.", 0, 0) + try: + if use_http2: + expressions = [ + # http2.Frame.expr(), + http2.Response.expr(), + ] + else: + expressions = [ + websockets.WebsocketFrame.expr(), + http.Response.expr(), + ] + reqs = pp.Or(expressions).parseString(s, parseAll=True) + except pp.ParseException as v: + raise exceptions.ParseException(v.msg, v.line, v.col) + return itertools.chain(*[expand(i) for i in reqs]) + + +def parse_pathoc(s, use_http2=False): + try: + s = s.decode("ascii") + except UnicodeError: + raise exceptions.ParseException("Spec must be valid ASCII.", 0, 0) + try: + if use_http2: + expressions = [ + # http2.Frame.expr(), + http2.Request.expr(), + ] + else: + expressions = [ + websockets.WebsocketClientFrame.expr(), + http.Request.expr(), + ] + reqs = pp.OneOrMore(pp.Or(expressions)).parseString(s, parseAll=True) + except pp.ParseException as v: + raise exceptions.ParseException(v.msg, v.line, v.col) + return itertools.chain(*[expand(i) for i in reqs]) + + +def parse_websocket_frame(s): + """ + May raise ParseException + """ + try: + reqs = pp.OneOrMore( + websockets.WebsocketFrame.expr() + ).parseString( + s, + parseAll=True + ) + except pp.ParseException as v: + raise exceptions.ParseException(v.msg, v.line, v.col) + return itertools.chain(*[expand(i) for i in reqs]) + + +def serve(msg, fp, settings): + """ + fp: The file pointer to write to. + + request_host: If this a request, this is the connecting host. If + None, we assume it's a response. Used to decide what standard + modifications to make if raw is not set. + + Calling this function may modify the object. + """ + msg = msg.resolve(settings) + started = time.time() + + vals = msg.values(settings) + vals.reverse() + + actions = sorted(msg.actions[:]) + actions.reverse() + actions = [i.intermediate(settings) for i in actions] + + disconnect = writer.write_values(fp, vals, actions[:]) + duration = time.time() - started + ret = dict( + disconnect=disconnect, + started=started, + duration=duration, + ) + ret.update(msg.log(settings)) + return ret diff --git a/pathod/language/actions.py b/pathod/language/actions.py new file mode 100644 index 00000000..34a9bafb --- /dev/null +++ b/pathod/language/actions.py @@ -0,0 +1,126 @@ +import abc +import copy +import random + +import pyparsing as pp + +from . import base + + +class _Action(base.Token): + + """ + An action that operates on the raw data stream of the message. All + actions have one thing in common: an offset that specifies where the + action should take place. + """ + + def __init__(self, offset): + self.offset = offset + + def resolve(self, settings, msg): + """ + Resolves offset specifications to a numeric offset. Returns a copy + of the action object. + """ + c = copy.copy(self) + l = msg.length(settings) + if c.offset == "r": + c.offset = random.randrange(l) + elif c.offset == "a": + c.offset = l + 1 + return c + + def __cmp__(self, other): + return cmp(self.offset, other.offset) + + def __repr__(self): + return self.spec() + + @abc.abstractmethod + def spec(self): # pragma: no cover + pass + + @abc.abstractmethod + def intermediate(self, settings): # pragma: no cover + pass + + +class PauseAt(_Action): + unique_name = None + + def __init__(self, offset, seconds): + _Action.__init__(self, offset) + self.seconds = seconds + + @classmethod + def expr(cls): + e = pp.Literal("p").suppress() + e += base.TokOffset + e += pp.Literal(",").suppress() + e += pp.MatchFirst( + [ + base.v_integer, + pp.Literal("f") + ] + ) + return e.setParseAction(lambda x: cls(*x)) + + def spec(self): + return "p%s,%s" % (self.offset, self.seconds) + + def intermediate(self, settings): + return (self.offset, "pause", self.seconds) + + def freeze(self, settings_): + return self + + +class DisconnectAt(_Action): + + def __init__(self, offset): + _Action.__init__(self, offset) + + @classmethod + def expr(cls): + e = pp.Literal("d").suppress() + e += base.TokOffset + return e.setParseAction(lambda x: cls(*x)) + + def spec(self): + return "d%s" % self.offset + + def intermediate(self, settings): + return (self.offset, "disconnect") + + def freeze(self, settings_): + return self + + +class InjectAt(_Action): + unique_name = None + + def __init__(self, offset, value): + _Action.__init__(self, offset) + self.value = value + + @classmethod + def expr(cls): + e = pp.Literal("i").suppress() + e += base.TokOffset + e += pp.Literal(",").suppress() + e += base.TokValue + return e.setParseAction(lambda x: cls(*x)) + + def spec(self): + return "i%s,%s" % (self.offset, self.value.spec()) + + def intermediate(self, settings): + return ( + self.offset, + "inject", + self.value.get_generator(settings) + ) + + def freeze(self, settings): + return InjectAt(self.offset, self.value.freeze(settings)) diff --git a/pathod/language/base.py b/pathod/language/base.py new file mode 100644 index 00000000..a4302998 --- /dev/null +++ b/pathod/language/base.py @@ -0,0 +1,576 @@ +import operator +import os +import abc +import pyparsing as pp + +from .. import utils +from . import generators, exceptions + +class Settings(object): + + def __init__( + self, + is_client=False, + staticdir=None, + unconstrained_file_access=False, + request_host=None, + websocket_key=None, + protocol=None, + ): + self.is_client = is_client + self.staticdir = staticdir + self.unconstrained_file_access = unconstrained_file_access + self.request_host = request_host + self.websocket_key = websocket_key # TODO: refactor this into the protocol + self.protocol = protocol + + +Sep = pp.Optional(pp.Literal(":")).suppress() + + +v_integer = pp.Word(pp.nums)\ + .setName("integer")\ + .setParseAction(lambda toks: int(toks[0])) + + +v_literal = pp.MatchFirst( + [ + pp.QuotedString( + "\"", + unquoteResults=True, + multiline=True + ), + pp.QuotedString( + "'", + unquoteResults=True, + multiline=True + ), + ] +) + +v_naked_literal = pp.MatchFirst( + [ + v_literal, + pp.Word("".join(i for i in pp.printables if i not in ",:\n@\'\"")) + ] +) + + +class Token(object): + + """ + A token in the specification language. Tokens are immutable. The token + classes have no meaning in and of themselves, and are combined into + Components and Actions to build the language. + """ + __metaclass__ = abc.ABCMeta + + @classmethod + def expr(cls): # pragma: no cover + """ + A parse expression. + """ + return None + + @abc.abstractmethod + def spec(self): # pragma: no cover + """ + A parseable specification for this token. + """ + return None + + @property + def unique_name(self): + """ + Controls uniqueness constraints for tokens. No two tokens with the + same name will be allowed. If no uniquness should be applied, this + should be None. + """ + return self.__class__.__name__.lower() + + def resolve(self, settings_, msg_): + """ + Resolves this token to ready it for transmission. This means that + the calculated offsets of actions are fixed. + + settings: a language.Settings instance + msg: The containing message + """ + return self + + def __repr__(self): + return self.spec() + + +class _TokValueLiteral(Token): + + def __init__(self, val): + self.val = val.decode("string_escape") + + def get_generator(self, settings_): + return self.val + + def freeze(self, settings_): + return self + + +class TokValueLiteral(_TokValueLiteral): + + """ + A literal with Python-style string escaping + """ + @classmethod + def expr(cls): + e = v_literal.copy() + return e.setParseAction(cls.parseAction) + + @classmethod + def parseAction(cls, x): + v = cls(*x) + return v + + def spec(self): + inner = self.val.encode("string_escape") + inner = inner.replace(r"\'", r"\x27") + return "'" + inner + "'" + + +class TokValueNakedLiteral(_TokValueLiteral): + + @classmethod + def expr(cls): + e = v_naked_literal.copy() + return e.setParseAction(lambda x: cls(*x)) + + def spec(self): + return self.val.encode("string_escape") + + +class TokValueGenerate(Token): + + def __init__(self, usize, unit, datatype): + if not unit: + unit = "b" + self.usize, self.unit, self.datatype = usize, unit, datatype + + def bytes(self): + return self.usize * utils.SIZE_UNITS[self.unit] + + def get_generator(self, settings_): + return generators.RandomGenerator(self.datatype, self.bytes()) + + def freeze(self, settings): + g = self.get_generator(settings) + return TokValueLiteral(g[:].encode("string_escape")) + + @classmethod + def expr(cls): + e = pp.Literal("@").suppress() + v_integer + + u = reduce( + operator.or_, + [pp.Literal(i) for i in utils.SIZE_UNITS.keys()] + ).leaveWhitespace() + e = e + pp.Optional(u, default=None) + + s = pp.Literal(",").suppress() + s += reduce( + operator.or_, + [pp.Literal(i) for i in generators.DATATYPES.keys()] + ) + e += pp.Optional(s, default="bytes") + return e.setParseAction(lambda x: cls(*x)) + + def spec(self): + s = "@%s" % self.usize + if self.unit != "b": + s += self.unit + if self.datatype != "bytes": + s += ",%s" % self.datatype + return s + + +class TokValueFile(Token): + + def __init__(self, path): + self.path = str(path) + + @classmethod + def expr(cls): + e = pp.Literal("<").suppress() + e = e + v_naked_literal + return e.setParseAction(lambda x: cls(*x)) + + def freeze(self, settings_): + return self + + def get_generator(self, settings): + if not settings.staticdir: + raise exceptions.FileAccessDenied("File access disabled.") + s = os.path.expanduser(self.path) + s = os.path.normpath( + os.path.abspath(os.path.join(settings.staticdir, s)) + ) + uf = settings.unconstrained_file_access + if not uf and not s.startswith(settings.staticdir): + raise exceptions.FileAccessDenied( + "File access outside of configured directory" + ) + if not os.path.isfile(s): + raise exceptions.FileAccessDenied("File not readable") + return generators.FileGenerator(s) + + def spec(self): + return "<'%s'" % self.path.encode("string_escape") + + +TokValue = pp.MatchFirst( + [ + TokValueGenerate.expr(), + TokValueFile.expr(), + TokValueLiteral.expr() + ] +) + + +TokNakedValue = pp.MatchFirst( + [ + TokValueGenerate.expr(), + TokValueFile.expr(), + TokValueLiteral.expr(), + TokValueNakedLiteral.expr(), + ] +) + + +TokOffset = pp.MatchFirst( + [ + v_integer, + pp.Literal("r"), + pp.Literal("a") + ] +) + + +class _Component(Token): + + """ + A value component of the primary specification of an message. + Components produce byte values desribe the bytes of the message. + """ + + def values(self, settings): # pragma: no cover + """ + A sequence of values, which can either be strings or generators. + """ + pass + + def string(self, settings=None): + """ + A string representation of the object. + """ + return "".join(i[:] for i in self.values(settings or {})) + + +class KeyValue(_Component): + + """ + A key/value pair. + cls.preamble: leader + """ + + def __init__(self, key, value): + self.key, self.value = key, value + + @classmethod + def expr(cls): + e = pp.Literal(cls.preamble).suppress() + e += TokValue + e += pp.Literal("=").suppress() + e += TokValue + return e.setParseAction(lambda x: cls(*x)) + + def spec(self): + return "%s%s=%s" % (self.preamble, self.key.spec(), self.value.spec()) + + def freeze(self, settings): + return self.__class__( + self.key.freeze(settings), self.value.freeze(settings) + ) + + +class CaselessLiteral(_Component): + + """ + A caseless token that can take only one value. + """ + + def __init__(self, value): + self.value = value + + @classmethod + def expr(cls): + spec = pp.CaselessLiteral(cls.TOK) + spec = spec.setParseAction(lambda x: cls(*x)) + return spec + + def values(self, settings): + return self.TOK + + def spec(self): + return self.TOK + + def freeze(self, settings_): + return self + + +class OptionsOrValue(_Component): + + """ + Can be any of a specified set of options, or a value specifier. + """ + preamble = "" + options = [] + + def __init__(self, value): + # If it's a string, we were passed one of the options, so we lower-case + # it to be canonical. The user can specify a different case by using a + # string value literal. + self.option_used = False + if isinstance(value, basestring): + for i in self.options: + # Find the exact option value in a case-insensitive way + if i.lower() == value.lower(): + self.option_used = True + value = TokValueLiteral(i) + break + self.value = value + + @classmethod + def expr(cls): + parts = [pp.CaselessLiteral(i) for i in cls.options] + m = pp.MatchFirst(parts) + spec = m | TokValue.copy() + spec = spec.setParseAction(lambda x: cls(*x)) + if cls.preamble: + spec = pp.Literal(cls.preamble).suppress() + spec + return spec + + def values(self, settings): + return [ + self.value.get_generator(settings) + ] + + def spec(self): + s = self.value.spec() + if s[1:-1].lower() in self.options: + s = s[1:-1].lower() + return "%s%s" % (self.preamble, s) + + def freeze(self, settings): + return self.__class__(self.value.freeze(settings)) + + +class Integer(_Component): + bounds = (None, None) + preamble = "" + + def __init__(self, value): + v = int(value) + outofbounds = any([ + self.bounds[0] is not None and v < self.bounds[0], + self.bounds[1] is not None and v > self.bounds[1] + ]) + if outofbounds: + raise exceptions.ParseException( + "Integer value must be between %s and %s." % self.bounds, + 0, 0 + ) + self.value = str(value) + + @classmethod + def expr(cls): + e = v_integer.copy() + if cls.preamble: + e = pp.Literal(cls.preamble).suppress() + e + return e.setParseAction(lambda x: cls(*x)) + + def values(self, settings): + return self.value + + def spec(self): + return "%s%s" % (self.preamble, self.value) + + def freeze(self, settings_): + return self + + +class Value(_Component): + + """ + A value component lead by an optional preamble. + """ + preamble = "" + + def __init__(self, value): + self.value = value + + @classmethod + def expr(cls): + e = (TokValue | TokNakedValue) + if cls.preamble: + e = pp.Literal(cls.preamble).suppress() + e + return e.setParseAction(lambda x: cls(*x)) + + def values(self, settings): + return [self.value.get_generator(settings)] + + def spec(self): + return "%s%s" % (self.preamble, self.value.spec()) + + def freeze(self, settings): + return self.__class__(self.value.freeze(settings)) + + +class FixedLengthValue(Value): + + """ + A value component lead by an optional preamble. + """ + preamble = "" + length = None + + def __init__(self, value): + Value.__init__(self, value) + lenguess = None + try: + lenguess = len(value.get_generator(Settings())) + except exceptions.RenderError: + pass + # This check will fail if we know the length upfront + if lenguess is not None and lenguess != self.length: + raise exceptions.RenderError( + "Invalid value length: '%s' is %s bytes, should be %s." % ( + self.spec(), + lenguess, + self.length + ) + ) + + def values(self, settings): + ret = Value.values(self, settings) + l = sum(len(i) for i in ret) + # This check will fail if we don't know the length upfront - i.e. for + # file inputs + if l != self.length: + raise exceptions.RenderError( + "Invalid value length: '%s' is %s bytes, should be %s." % ( + self.spec(), + l, + self.length + ) + ) + return ret + + +class Boolean(_Component): + + """ + A boolean flag. + name = true + -name = false + """ + name = "" + + def __init__(self, value): + self.value = value + + @classmethod + def expr(cls): + e = pp.Optional(pp.Literal("-"), default=True) + e += pp.Literal(cls.name).suppress() + + def parse(s_, loc_, toks): + val = True + if toks[0] == "-": + val = False + return cls(val) + + return e.setParseAction(parse) + + def spec(self): + return "%s%s" % ("-" if not self.value else "", self.name) + + +class IntField(_Component): + + """ + An integer field, where values can optionally specified by name. + """ + names = {} + max = 16 + preamble = "" + + def __init__(self, value): + self.origvalue = value + self.value = self.names.get(value, value) + if self.value > self.max: + raise exceptions.ParseException( + "Value can't exceed %s" % self.max, 0, 0 + ) + + @classmethod + def expr(cls): + parts = [pp.CaselessLiteral(i) for i in cls.names.keys()] + m = pp.MatchFirst(parts) + spec = m | v_integer.copy() + spec = spec.setParseAction(lambda x: cls(*x)) + if cls.preamble: + spec = pp.Literal(cls.preamble).suppress() + spec + return spec + + def values(self, settings): + return [str(self.value)] + + def spec(self): + return "%s%s" % (self.preamble, self.origvalue) + + +class NestedMessage(Token): + + """ + A nested message, as an escaped string with a preamble. + """ + preamble = "" + nest_type = None + + def __init__(self, value): + Token.__init__(self) + self.value = value + try: + self.parsed = self.nest_type( + self.nest_type.expr().parseString( + value.val, + parseAll=True + ) + ) + except pp.ParseException as v: + raise exceptions.ParseException(v.msg, v.line, v.col) + + @classmethod + def expr(cls): + e = pp.Literal(cls.preamble).suppress() + e = e + TokValueLiteral.expr() + return e.setParseAction(lambda x: cls(*x)) + + def values(self, settings): + return [ + self.value.get_generator(settings), + ] + + def spec(self): + return "%s%s" % (self.preamble, self.value.spec()) + + def freeze(self, settings): + f = self.parsed.freeze(settings).spec() + return self.__class__(TokValueLiteral(f.encode("string_escape"))) diff --git a/pathod/language/exceptions.py b/pathod/language/exceptions.py new file mode 100644 index 00000000..84ad3c02 --- /dev/null +++ b/pathod/language/exceptions.py @@ -0,0 +1,22 @@ + +class RenderError(Exception): + pass + + +class FileAccessDenied(RenderError): + pass + + +class ParseException(Exception): + + def __init__(self, msg, s, col): + Exception.__init__(self) + self.msg = msg + self.s = s + self.col = col + + def marked(self): + return "%s\n%s" % (self.s, " " * (self.col - 1) + "^") + + def __str__(self): + return "%s at char %s" % (self.msg, self.col) diff --git a/pathod/language/generators.py b/pathod/language/generators.py new file mode 100644 index 00000000..a17e7052 --- /dev/null +++ b/pathod/language/generators.py @@ -0,0 +1,86 @@ +import string +import random +import mmap + +DATATYPES = dict( + ascii_letters=string.ascii_letters, + ascii_lowercase=string.ascii_lowercase, + ascii_uppercase=string.ascii_uppercase, + digits=string.digits, + hexdigits=string.hexdigits, + octdigits=string.octdigits, + punctuation=string.punctuation, + whitespace=string.whitespace, + ascii=string.printable, + bytes="".join(chr(i) for i in range(256)) +) + + +class TransformGenerator(object): + + """ + Perform a byte-by-byte transform another generator - that is, for each + input byte, the transformation must produce one output byte. + + gen: A generator to wrap + transform: A function (offset, data) -> transformed + """ + + def __init__(self, gen, transform): + self.gen = gen + self.transform = transform + + def __len__(self): + return len(self.gen) + + def __getitem__(self, x): + d = self.gen.__getitem__(x) + return self.transform(x, d) + + def __getslice__(self, a, b): + d = self.gen.__getslice__(a, b) + return self.transform(a, d) + + def __repr__(self): + return "'transform(%s)'" % self.gen + + +class RandomGenerator(object): + + def __init__(self, dtype, length): + self.dtype = dtype + self.length = length + + def __len__(self): + return self.length + + def __getitem__(self, x): + return random.choice(DATATYPES[self.dtype]) + + def __getslice__(self, a, b): + b = min(b, self.length) + chars = DATATYPES[self.dtype] + return "".join(random.choice(chars) for x in range(a, b)) + + def __repr__(self): + return "%s random from %s" % (self.length, self.dtype) + + +class FileGenerator(object): + + def __init__(self, path): + self.path = path + self.fp = file(path, "rb") + self.map = mmap.mmap(self.fp.fileno(), 0, access=mmap.ACCESS_READ) + + def __len__(self): + return len(self.map) + + def __getitem__(self, x): + return self.map.__getitem__(x) + + def __getslice__(self, a, b): + return self.map.__getslice__(a, b) + + def __repr__(self): + return "<%s" % self.path diff --git a/pathod/language/http.py b/pathod/language/http.py new file mode 100644 index 00000000..a82f12fe --- /dev/null +++ b/pathod/language/http.py @@ -0,0 +1,381 @@ + +import abc + +import pyparsing as pp + +import netlib.websockets +from netlib.http import status_codes, user_agents +from . import base, exceptions, actions, message + +# TODO: use netlib.semantics.protocol assemble method, +# instead of duplicating the HTTP on-the-wire representation here. +# see http2 language for an example + +class WS(base.CaselessLiteral): + TOK = "ws" + + +class Raw(base.CaselessLiteral): + TOK = "r" + + +class Path(base.Value): + pass + + +class StatusCode(base.Integer): + pass + + +class Reason(base.Value): + preamble = "m" + + +class Body(base.Value): + preamble = "b" + + +class Times(base.Integer): + preamble = "x" + + +class Method(base.OptionsOrValue): + options = [ + "GET", + "HEAD", + "POST", + "PUT", + "DELETE", + "OPTIONS", + "TRACE", + "CONNECT", + ] + + +class _HeaderMixin(object): + unique_name = None + + def format_header(self, key, value): + return [key, ": ", value, "\r\n"] + + def values(self, settings): + return self.format_header( + self.key.get_generator(settings), + self.value.get_generator(settings), + ) + + +class Header(_HeaderMixin, base.KeyValue): + preamble = "h" + + +class ShortcutContentType(_HeaderMixin, base.Value): + preamble = "c" + key = base.TokValueLiteral("Content-Type") + + +class ShortcutLocation(_HeaderMixin, base.Value): + preamble = "l" + key = base.TokValueLiteral("Location") + + +class ShortcutUserAgent(_HeaderMixin, base.OptionsOrValue): + preamble = "u" + options = [i[1] for i in user_agents.UASTRINGS] + key = base.TokValueLiteral("User-Agent") + + def values(self, settings): + value = self.value.val + if self.option_used: + value = user_agents.get_by_shortcut(value.lower())[2] + + return self.format_header( + self.key.get_generator(settings), + value + ) + + +def get_header(val, headers): + """ + Header keys may be Values, so we have to "generate" them as we try the + match. + """ + for h in headers: + k = h.key.get_generator({}) + if len(k) == len(val) and k[:].lower() == val.lower(): + return h + return None + + +class _HTTPMessage(message.Message): + version = "HTTP/1.1" + + @property + def actions(self): + return self.toks(actions._Action) + + @property + def raw(self): + return bool(self.tok(Raw)) + + @property + def body(self): + return self.tok(Body) + + @abc.abstractmethod + def preamble(self, settings): # pragma: no cover + pass + + @property + def headers(self): + return self.toks(_HeaderMixin) + + def values(self, settings): + vals = self.preamble(settings) + vals.append("\r\n") + for h in self.headers: + vals.extend(h.values(settings)) + vals.append("\r\n") + if self.body: + vals.extend(self.body.values(settings)) + return vals + + +class Response(_HTTPMessage): + unique_name = None + comps = ( + Header, + ShortcutContentType, + ShortcutLocation, + Raw, + Reason, + Body, + + actions.PauseAt, + actions.DisconnectAt, + actions.InjectAt, + ) + logattrs = ["status_code", "reason", "version", "body"] + + @property + def ws(self): + return self.tok(WS) + + @property + def status_code(self): + return self.tok(StatusCode) + + @property + def reason(self): + return self.tok(Reason) + + def preamble(self, settings): + l = [self.version, " "] + l.extend(self.status_code.values(settings)) + status_code = int(self.status_code.value) + l.append(" ") + if self.reason: + l.extend(self.reason.values(settings)) + else: + l.append( + status_codes.RESPONSES.get( + status_code, + "Unknown code" + ) + ) + return l + + def resolve(self, settings, msg=None): + tokens = self.tokens[:] + if self.ws: + if not settings.websocket_key: + raise exceptions.RenderError( + "No websocket key - have we seen a client handshake?" + ) + if not self.status_code: + tokens.insert( + 1, + StatusCode(101) + ) + headers = netlib.websockets.WebsocketsProtocol.server_handshake_headers( + settings.websocket_key + ) + for i in headers.fields: + if not get_header(i[0], self.headers): + tokens.append( + Header( + base.TokValueLiteral(i[0]), + base.TokValueLiteral(i[1])) + ) + if not self.raw: + if not get_header("Content-Length", self.headers): + if not self.body: + length = 0 + else: + length = sum( + len(i) for i in self.body.values(settings) + ) + tokens.append( + Header( + base.TokValueLiteral("Content-Length"), + base.TokValueLiteral(str(length)), + ) + ) + intermediate = self.__class__(tokens) + return self.__class__( + [i.resolve(settings, intermediate) for i in tokens] + ) + + @classmethod + def expr(cls): + parts = [i.expr() for i in cls.comps] + atom = pp.MatchFirst(parts) + resp = pp.And( + [ + pp.MatchFirst( + [ + WS.expr() + pp.Optional( + base.Sep + StatusCode.expr() + ), + StatusCode.expr(), + ] + ), + pp.ZeroOrMore(base.Sep + atom) + ] + ) + resp = resp.setParseAction(cls) + return resp + + def spec(self): + return ":".join([i.spec() for i in self.tokens]) + + +class NestedResponse(base.NestedMessage): + preamble = "s" + nest_type = Response + + +class Request(_HTTPMessage): + comps = ( + Header, + ShortcutContentType, + ShortcutUserAgent, + Raw, + NestedResponse, + Body, + Times, + + actions.PauseAt, + actions.DisconnectAt, + actions.InjectAt, + ) + logattrs = ["method", "path", "body"] + + @property + def ws(self): + return self.tok(WS) + + @property + def method(self): + return self.tok(Method) + + @property + def path(self): + return self.tok(Path) + + @property + def times(self): + return self.tok(Times) + + @property + def nested_response(self): + return self.tok(NestedResponse) + + def preamble(self, settings): + v = self.method.values(settings) + v.append(" ") + v.extend(self.path.values(settings)) + if self.nested_response: + v.append(self.nested_response.parsed.spec()) + v.append(" ") + v.append(self.version) + return v + + def resolve(self, settings, msg=None): + tokens = self.tokens[:] + if self.ws: + if not self.method: + tokens.insert( + 1, + Method("get") + ) + for i in netlib.websockets.WebsocketsProtocol.client_handshake_headers().fields: + if not get_header(i[0], self.headers): + tokens.append( + Header( + base.TokValueLiteral(i[0]), + base.TokValueLiteral(i[1]) + ) + ) + if not self.raw: + if not get_header("Content-Length", self.headers): + if self.body: + length = sum( + len(i) for i in self.body.values(settings) + ) + tokens.append( + Header( + base.TokValueLiteral("Content-Length"), + base.TokValueLiteral(str(length)), + ) + ) + if settings.request_host: + if not get_header("Host", self.headers): + tokens.append( + Header( + base.TokValueLiteral("Host"), + base.TokValueLiteral(settings.request_host) + ) + ) + intermediate = self.__class__(tokens) + return self.__class__( + [i.resolve(settings, intermediate) for i in tokens] + ) + + @classmethod + def expr(cls): + parts = [i.expr() for i in cls.comps] + atom = pp.MatchFirst(parts) + resp = pp.And( + [ + pp.MatchFirst( + [ + WS.expr() + pp.Optional( + base.Sep + Method.expr() + ), + Method.expr(), + ] + ), + base.Sep, + Path.expr(), + pp.ZeroOrMore(base.Sep + atom) + ] + ) + resp = resp.setParseAction(cls) + return resp + + def spec(self): + return ":".join([i.spec() for i in self.tokens]) + + +def make_error_response(reason, body=None): + tokens = [ + StatusCode("800"), + Header( + base.TokValueLiteral("Content-Type"), + base.TokValueLiteral("text/plain") + ), + Reason(base.TokValueLiteral(reason)), + Body(base.TokValueLiteral("pathod error: " + (body or reason))), + ] + return Response(tokens) diff --git a/pathod/language/http2.py b/pathod/language/http2.py new file mode 100644 index 00000000..d5e3ca31 --- /dev/null +++ b/pathod/language/http2.py @@ -0,0 +1,299 @@ +import pyparsing as pp + +from netlib import http +from netlib.http import user_agents, Headers +from . import base, message + +""" + Normal HTTP requests: + <method>:<path>:<header>:<body> + e.g.: + GET:/ + GET:/:h"foo"="bar" + POST:/:h"foo"="bar":b'content body payload' + + Normal HTTP responses: + <code>:<header>:<body> + e.g.: + 200 + 302:h"foo"="bar" + 404:h"foo"="bar":b'content body payload' + + Individual HTTP/2 frames: + h2f:<payload_length>:<type>:<flags>:<stream_id>:<payload> + e.g.: + h2f:0:PING + h2f:42:HEADERS:END_HEADERS:0x1234567:foo=bar,host=example.com + h2f:42:DATA:END_STREAM,PADDED:0x1234567:'content body payload' +""" + +def get_header(val, headers): + """ + Header keys may be Values, so we have to "generate" them as we try the + match. + """ + for h in headers: + k = h.key.get_generator({}) + if len(k) == len(val) and k[:].lower() == val.lower(): + return h + return None + + +class _HeaderMixin(object): + unique_name = None + + def values(self, settings): + return ( + self.key.get_generator(settings), + self.value.get_generator(settings), + ) + +class _HTTP2Message(message.Message): + @property + def actions(self): + return [] # self.toks(actions._Action) + + @property + def headers(self): + headers = self.toks(_HeaderMixin) + + if not self.raw: + if not get_header("content-length", headers): + if not self.body: + length = 0 + else: + length = len(self.body.string()) + headers.append( + Header( + base.TokValueLiteral("content-length"), + base.TokValueLiteral(str(length)), + ) + ) + return headers + + @property + def raw(self): + return bool(self.tok(Raw)) + + @property + def body(self): + return self.tok(Body) + + def resolve(self, settings): + return self + + +class StatusCode(base.Integer): + pass + + +class Method(base.OptionsOrValue): + options = [ + "GET", + "HEAD", + "POST", + "PUT", + "DELETE", + ] + + +class Path(base.Value): + pass + + +class Header(_HeaderMixin, base.KeyValue): + preamble = "h" + + +class ShortcutContentType(_HeaderMixin, base.Value): + preamble = "c" + key = base.TokValueLiteral("content-type") + + +class ShortcutLocation(_HeaderMixin, base.Value): + preamble = "l" + key = base.TokValueLiteral("location") + + +class ShortcutUserAgent(_HeaderMixin, base.OptionsOrValue): + preamble = "u" + options = [i[1] for i in user_agents.UASTRINGS] + key = base.TokValueLiteral("user-agent") + + def values(self, settings): + value = self.value.val + if self.option_used: + value = user_agents.get_by_shortcut(value.lower())[2] + + return ( + self.key.get_generator(settings), + value + ) + + +class Raw(base.CaselessLiteral): + TOK = "r" + + +class Body(base.Value): + preamble = "b" + + +class Times(base.Integer): + preamble = "x" + + +class Response(_HTTP2Message): + unique_name = None + comps = ( + Header, + Body, + ShortcutContentType, + ShortcutLocation, + Raw, + ) + + def __init__(self, tokens): + super(Response, self).__init__(tokens) + self.rendered_values = None + self.stream_id = 2 + + @property + def status_code(self): + return self.tok(StatusCode) + + @classmethod + def expr(cls): + parts = [i.expr() for i in cls.comps] + atom = pp.MatchFirst(parts) + resp = pp.And( + [ + StatusCode.expr(), + pp.ZeroOrMore(base.Sep + atom) + ] + ) + resp = resp.setParseAction(cls) + return resp + + def values(self, settings): + if self.rendered_values: + return self.rendered_values + else: + headers = Headers([header.values(settings) for header in self.headers]) + + body = self.body + if body: + body = body.string() + + resp = http.Response( + (2, 0), + self.status_code.string(), + '', + headers, + body, + ) + resp.stream_id = self.stream_id + + self.rendered_values = settings.protocol.assemble(resp) + return self.rendered_values + + def spec(self): + return ":".join([i.spec() for i in self.tokens]) + + +class NestedResponse(base.NestedMessage): + preamble = "s" + nest_type = Response + + +class Request(_HTTP2Message): + comps = ( + Header, + ShortcutContentType, + ShortcutUserAgent, + Raw, + NestedResponse, + Body, + Times, + ) + logattrs = ["method", "path"] + + def __init__(self, tokens): + super(Request, self).__init__(tokens) + self.rendered_values = None + self.stream_id = 1 + + @property + def method(self): + return self.tok(Method) + + @property + def path(self): + return self.tok(Path) + + @property + def nested_response(self): + return self.tok(NestedResponse) + + @property + def times(self): + return self.tok(Times) + + @classmethod + def expr(cls): + parts = [i.expr() for i in cls.comps] + atom = pp.MatchFirst(parts) + resp = pp.And( + [ + Method.expr(), + base.Sep, + Path.expr(), + pp.ZeroOrMore(base.Sep + atom) + ] + ) + resp = resp.setParseAction(cls) + return resp + + def values(self, settings): + if self.rendered_values: + return self.rendered_values + else: + path = self.path.string() + if self.nested_response: + path += self.nested_response.parsed.spec() + + headers = Headers([header.values(settings) for header in self.headers]) + + body = self.body + if body: + body = body.string() + + req = http.Request( + '', + self.method.string(), + '', + '', + '', + path, + (2, 0), + headers, + body, + ) + req.stream_id = self.stream_id + + self.rendered_values = settings.protocol.assemble(req) + return self.rendered_values + + def spec(self): + return ":".join([i.spec() for i in self.tokens]) + +def make_error_response(reason, body=None): + tokens = [ + StatusCode("800"), + Body(base.TokValueLiteral("pathod error: " + (body or reason))), + ] + return Response(tokens) + + +# class Frame(message.Message): +# pass diff --git a/pathod/language/message.py b/pathod/language/message.py new file mode 100644 index 00000000..33124856 --- /dev/null +++ b/pathod/language/message.py @@ -0,0 +1,96 @@ +import abc +from . import actions, exceptions + +LOG_TRUNCATE = 1024 + + +class Message(object): + __metaclass__ = abc.ABCMeta + logattrs = [] + + def __init__(self, tokens): + track = set([]) + for i in tokens: + if i.unique_name: + if i.unique_name in track: + raise exceptions.ParseException( + "Message has multiple %s clauses, " + "but should only have one." % i.unique_name, + 0, 0 + ) + else: + track.add(i.unique_name) + self.tokens = tokens + + def strike_token(self, name): + toks = [i for i in self.tokens if i.unique_name != name] + return self.__class__(toks) + + def toks(self, klass): + """ + Fetch all tokens that are instances of klass + """ + return [i for i in self.tokens if isinstance(i, klass)] + + def tok(self, klass): + """ + Fetch first token that is an instance of klass + """ + l = self.toks(klass) + if l: + return l[0] + + def length(self, settings): + """ + Calculate the length of the base message without any applied + actions. + """ + return sum(len(x) for x in self.values(settings)) + + def preview_safe(self): + """ + Return a copy of this message that issafe for previews. + """ + tokens = [i for i in self.tokens if not isinstance(i, actions.PauseAt)] + return self.__class__(tokens) + + def maximum_length(self, settings): + """ + Calculate the maximum length of the base message with all applied + actions. + """ + l = self.length(settings) + for i in self.actions: + if isinstance(i, actions.InjectAt): + l += len(i.value.get_generator(settings)) + return l + + @classmethod + def expr(cls): # pragma: no cover + pass + + def log(self, settings): + """ + A dictionary that should be logged if this message is served. + """ + ret = {} + for i in self.logattrs: + v = getattr(self, i) + # Careful not to log any VALUE specs without sanitizing them first. + # We truncate at 1k. + if hasattr(v, "values"): + v = [x[:LOG_TRUNCATE] for x in v.values(settings)] + v = "".join(v).encode("string_escape") + elif hasattr(v, "__len__"): + v = v[:LOG_TRUNCATE] + v = v.encode("string_escape") + ret[i] = v + ret["spec"] = self.spec() + return ret + + def freeze(self, settings): + r = self.resolve(settings) + return self.__class__([i.freeze(settings) for i in r.tokens]) + + def __repr__(self): + return self.spec() diff --git a/pathod/language/websockets.py b/pathod/language/websockets.py new file mode 100644 index 00000000..ea7c870e --- /dev/null +++ b/pathod/language/websockets.py @@ -0,0 +1,241 @@ +import os +import netlib.websockets +import pyparsing as pp +from . import base, generators, actions, message + +NESTED_LEADER = "pathod!" + + +class WF(base.CaselessLiteral): + TOK = "wf" + + +class OpCode(base.IntField): + names = { + "continue": netlib.websockets.OPCODE.CONTINUE, + "text": netlib.websockets.OPCODE.TEXT, + "binary": netlib.websockets.OPCODE.BINARY, + "close": netlib.websockets.OPCODE.CLOSE, + "ping": netlib.websockets.OPCODE.PING, + "pong": netlib.websockets.OPCODE.PONG, + } + max = 15 + preamble = "c" + + +class Body(base.Value): + preamble = "b" + + +class RawBody(base.Value): + unique_name = "body" + preamble = "r" + + +class Fin(base.Boolean): + name = "fin" + + +class RSV1(base.Boolean): + name = "rsv1" + + +class RSV2(base.Boolean): + name = "rsv2" + + +class RSV3(base.Boolean): + name = "rsv3" + + +class Mask(base.Boolean): + name = "mask" + + +class Key(base.FixedLengthValue): + preamble = "k" + length = 4 + + +class KeyNone(base.CaselessLiteral): + unique_name = "key" + TOK = "knone" + + +class Length(base.Integer): + bounds = (0, 1 << 64) + preamble = "l" + + +class Times(base.Integer): + preamble = "x" + + +COMPONENTS = ( + OpCode, + Length, + # Bit flags + Fin, + RSV1, + RSV2, + RSV3, + Mask, + actions.PauseAt, + actions.DisconnectAt, + actions.InjectAt, + KeyNone, + Key, + Times, + + Body, + RawBody, +) + + +class WebsocketFrame(message.Message): + components = COMPONENTS + logattrs = ["body"] + # Used for nested frames + unique_name = "body" + + @property + def actions(self): + return self.toks(actions._Action) + + @property + def body(self): + return self.tok(Body) + + @property + def rawbody(self): + return self.tok(RawBody) + + @property + def opcode(self): + return self.tok(OpCode) + + @property + def fin(self): + return self.tok(Fin) + + @property + def rsv1(self): + return self.tok(RSV1) + + @property + def rsv2(self): + return self.tok(RSV2) + + @property + def rsv3(self): + return self.tok(RSV3) + + @property + def mask(self): + return self.tok(Mask) + + @property + def key(self): + return self.tok(Key) + + @property + def knone(self): + return self.tok(KeyNone) + + @property + def times(self): + return self.tok(Times) + + @property + def toklength(self): + return self.tok(Length) + + @classmethod + def expr(cls): + parts = [i.expr() for i in cls.components] + atom = pp.MatchFirst(parts) + resp = pp.And( + [ + WF.expr(), + base.Sep, + pp.ZeroOrMore(base.Sep + atom) + ] + ) + resp = resp.setParseAction(cls) + return resp + + @property + def nested_frame(self): + return self.tok(NestedFrame) + + def resolve(self, settings, msg=None): + tokens = self.tokens[:] + if not self.mask and settings.is_client: + tokens.append( + Mask(True) + ) + if not self.knone and self.mask and self.mask.value and not self.key: + tokens.append( + Key(base.TokValueLiteral(os.urandom(4))) + ) + return self.__class__( + [i.resolve(settings, self) for i in tokens] + ) + + def values(self, settings): + if self.body: + bodygen = self.body.value.get_generator(settings) + length = len(self.body.value.get_generator(settings)) + elif self.rawbody: + bodygen = self.rawbody.value.get_generator(settings) + length = len(self.rawbody.value.get_generator(settings)) + elif self.nested_frame: + bodygen = NESTED_LEADER + self.nested_frame.parsed.spec() + length = len(bodygen) + else: + bodygen = None + length = 0 + if self.toklength: + length = int(self.toklength.value) + frameparts = dict( + payload_length=length + ) + if self.mask and self.mask.value: + frameparts["mask"] = True + if self.knone: + frameparts["masking_key"] = None + elif self.key: + key = self.key.values(settings)[0][:] + frameparts["masking_key"] = key + for i in ["opcode", "fin", "rsv1", "rsv2", "rsv3", "mask"]: + v = getattr(self, i, None) + if v is not None: + frameparts[i] = v.value + frame = netlib.websockets.FrameHeader(**frameparts) + vals = [bytes(frame)] + if bodygen: + if frame.masking_key and not self.rawbody: + masker = netlib.websockets.Masker(frame.masking_key) + vals.append( + generators.TransformGenerator( + bodygen, + masker.mask + ) + ) + else: + vals.append(bodygen) + return vals + + def spec(self): + return ":".join([i.spec() for i in self.tokens]) + + +class NestedFrame(base.NestedMessage): + preamble = "f" + nest_type = WebsocketFrame + + +class WebsocketClientFrame(WebsocketFrame): + components = COMPONENTS + ( + NestedFrame, + ) diff --git a/pathod/language/writer.py b/pathod/language/writer.py new file mode 100644 index 00000000..1a27e1ef --- /dev/null +++ b/pathod/language/writer.py @@ -0,0 +1,67 @@ +import time +from netlib.exceptions import TcpDisconnect +import netlib.tcp + +BLOCKSIZE = 1024 +# It's not clear what the upper limit for time.sleep is. It's lower than the +# maximum int or float. 1 year should do. +FOREVER = 60 * 60 * 24 * 365 + + +def send_chunk(fp, val, blocksize, start, end): + """ + (start, end): Inclusive lower bound, exclusive upper bound. + """ + for i in range(start, end, blocksize): + fp.write( + val[i:min(i + blocksize, end)] + ) + return end - start + + +def write_values(fp, vals, actions, sofar=0, blocksize=BLOCKSIZE): + """ + vals: A list of values, which may be strings or Value objects. + + actions: A list of (offset, action, arg) tuples. Action may be "pause" + or "disconnect". + + Both vals and actions are in reverse order, with the first items last. + + Return True if connection should disconnect. + """ + sofar = 0 + try: + while vals: + v = vals.pop() + offset = 0 + while actions and actions[-1][0] < (sofar + len(v)): + a = actions.pop() + offset += send_chunk( + fp, + v, + blocksize, + offset, + a[0] - sofar - offset + ) + if a[1] == "pause": + time.sleep( + FOREVER if a[2] == "f" else a[2] + ) + elif a[1] == "disconnect": + return True + elif a[1] == "inject": + send_chunk(fp, a[2], blocksize, 0, len(a[2])) + send_chunk(fp, v, blocksize, offset, len(v)) + sofar += len(v) + # Remainders + while actions: + a = actions.pop() + if a[1] == "pause": + time.sleep(a[2]) + elif a[1] == "disconnect": + return True + elif a[1] == "inject": + send_chunk(fp, a[2], blocksize, 0, len(a[2])) + except TcpDisconnect: # pragma: no cover + return True |