From e037fe05ff1f0c2893b3f51e06e0261ca4245d63 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 7 Jun 2016 12:55:32 +1200 Subject: Migrate pathod docs to Sphinx All the content of the pathod docs are moved into Sphinx. The interactive format has not translated well to static docs, and there's still a lot of rewriting, format fixing, structuring, etc to be done. --- docs/_static/theme_overrides.css | 11 ++ docs/conf.py | 10 +- docs/index.rst | 12 +- docs/pathod/intro.rst | 307 +++++++++++++++++++++++++++++++++++++++ docs/pathod/language.rst | 257 ++++++++++++++++++++++++++++++++ docs/pathod/library.rst | 14 ++ docs/pathod/test.rst | 35 +++++ 7 files changed, 643 insertions(+), 3 deletions(-) create mode 100644 docs/_static/theme_overrides.css create mode 100644 docs/pathod/intro.rst create mode 100644 docs/pathod/language.rst create mode 100644 docs/pathod/library.rst create mode 100644 docs/pathod/test.rst (limited to 'docs') diff --git a/docs/_static/theme_overrides.css b/docs/_static/theme_overrides.css new file mode 100644 index 00000000..63c7cc78 --- /dev/null +++ b/docs/_static/theme_overrides.css @@ -0,0 +1,11 @@ + +/* override table width restrictions */ +.wy-table-responsive table td, .wy-table-responsive table th { + white-space: normal; +} + +.wy-table-responsive { + margin-bottom: 24px; + max-width: 100%; + overflow: visible; +} diff --git a/docs/conf.py b/docs/conf.py index 01bcce1b..bd51c4c5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -153,7 +153,7 @@ html_favicon = "favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] +html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -216,4 +216,10 @@ html_favicon = "favicon.ico" #html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'mitmproxydoc' \ No newline at end of file +htmlhelp_basename = 'mitmproxydoc' + +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # overrides for wide tables in RTD theme + ], +} diff --git a/docs/index.rst b/docs/index.rst index d3b6f434..28c0c66f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -64,6 +64,17 @@ tutorials/gamecenter tutorials/transparent-dhcp + +.. toctree:: + :hidden: + :caption: Pathod & Pathoc + + pathod/intro + pathod/language + pathod/library + pathod/test + + .. toctree:: :hidden: :caption: Hacking @@ -80,4 +91,3 @@ * :ref:`genindex` * :ref:`modindex` - diff --git a/docs/pathod/intro.rst b/docs/pathod/intro.rst new file mode 100644 index 00000000..f4c8b974 --- /dev/null +++ b/docs/pathod/intro.rst @@ -0,0 +1,307 @@ +.. _intro: + +Pathology 101 +============= + + +pathod +------ + +Pathod is a pathological HTTP daemon designed to let you craft almost any +conceivable HTTP response, including ones that creatively violate the +standards. HTTP responses are specified using a :ref:`small, terse language +` which pathod shares with its evil twin :ref:`pathoc`. To start +playing with pathod, fire up the daemon: + +>>> pathod + +By default, the service listens on port 9999 of localhost, and the default +crafting anchor point is the path **/p/**. Anything after this URL prefix is +treated as a response specifier. So, hitting the following URL will generate an +HTTP 200 response with 100 bytes of random data: + + http://localhost:9999/p/200:b@100 + +See the :ref:`language documentation ` to get (much) fancier. The +pathod daemon also takes a range of configuration options. To view those, use +the command-line help: + +>>> pathod --help + +Mimicing a proxy +^^^^^^^^^^^^^^^^ + +Pathod automatically responds to both straight HTTP and proxy requests. For +proxy requests, the upstream host is ignored, and the path portion of the URL +is used to match anchors. This lets you test software that supports a proxy +configuration by spoofing responses from upstream servers. + +By default, we treat all proxy CONNECT requests as HTTPS traffic, serving the +response using either pathod's built-in certificates, or the cert/key pair +specified by the user. You can over-ride this behaviour if you're testing a +client that makes a non-SSL CONNECT request using the **-C** command-line +option. + +Anchors +^^^^^^^ + +Anchors provide an alternative to specifying the response in the URL. Instead, +you attach a response to a pre-configured anchor point, specified with a regex. +When a URL matching the regex is requested, the specified response is served. + +>>> pathod -a "/foo=200" + +Here, "/foo" is the regex specifying the anchor path, and the part after the "=" +is a response specifier. + + +File Access +^^^^^^^^^^^ + +There are two operators in the :ref:`language `` that load contents +from file - the **+** operator to load an entire request specification from +file, and the **>** value specifier. In pathod, both of these operators are +restricted to a directory specified at startup, or disabled if no directory is +specified: + +>>> pathod -d ~/staticdir" + + +Internal Error Responses +^^^^^^^^^^^^^^^^^^^^^^^^ + +Pathod uses the non-standard 800 response code to indicate internal errors, to +distinguish them from crafted responses. For example, a request to: + + http://localhost:9999/p/foo + +... will return an 800 response because "foo" is not a valid page specifier. + + + + + +.. _pathoc: + + +pathoc +------ + +Pathoc is a perverse HTTP daemon designed to let you craft almost any +conceivable HTTP request, including ones that creatively violate the standards. +HTTP requests are specified using a :ref:`small, terse language `, +which pathod shares with its server-side twin pathod. To view pathoc's complete +range of options, use the command-line help: + +>>> pathoc --help + + +Getting Started +^^^^^^^^^^^^^^^ + +The basic pattern for pathoc commands is as follows: + + pathoc hostname request [request ...] + +That is, we specify the hostname to connect to, followed by one or more +requests. Lets start with a simple example:: + + > pathoc google.com get:/ + 07-06-16 12:13:43: >> 'GET':/ + << 302 Found: 261 bytes + +Here, we make a GET request to the path / on port 80 of google.com. Pathoc's +output tells us that the server responded with a 302 redirection. We can tell +pathoc to connect using SSL, in which case the default port is changed to 443 +(you can over-ride the default port with the **-p** command-line option):: + + > pathoc -s www.google.com get:/ + 07-06-16 12:14:56: >> 'GET':/ + << 302 Found: 262 bytes + + +Multiple Requests +^^^^^^^^^^^^^^^^^ + +There are two ways to tell pathoc to issue multiple requests. The first is to specify +them on the command-line, like so:: + + > pathoc google.com get:/ get:/ + 07-06-16 12:21:04: >> 'GET':/ + << 302 Found: 261 bytes + 07-06-16 12:21:04: >> 'GET':/ + << 302 Found: 261 bytes + +In this case, pathoc issues the specified requests over the same TCP connection - +so in the above example only one connection is made to google.com + +The other way to issue multiple requests is to use the **-n** flag:: + + > pathoc -n 2 google.com get:/ + 07-06-16 12:21:04: >> 'GET':/ + << 302 Found: 261 bytes + 07-06-16 12:21:04: >> 'GET':/ + << 302 Found: 261 bytes + +The output is identical, but two separate TCP connections are made to the +upstream server. These two specification styles can be combined:: + + pathoc -n 2 google.com get:/ get:/ + + +Here, two distinct TCP connections are made, with two requests issued over +each. + + + +Basic Fuzzing +^^^^^^^^^^^^^ + +The combination of pathoc's powerful request specification language and a few +of its command-line options makes for quite a powerful basic fuzzer. Here's an +example:: + + pathoc -e -I 200 -t 2 -n 1000 localhost get:/:b@10:ir,@1 + +The request specified here is a valid GET with a body consisting of 10 random bytes, +but with 1 random byte inserted in a random place. This could be in the headers, +in the initial request line, or in the body itself. There are a few things +to note here: + +- Corrupting the request in this way will often make the server enter a state where + it's awaiting more input from the client. This is where the + **-t** option comes in, which sets a timeout that causes pathoc to + disconnect after two seconds. +- The **-n** option tells pathoc to repeat the request 1000 times. +- The **-I** option tells pathoc to ignore HTTP 200 response codes. + You can use this to fine-tune what pathoc considers to be an exceptional + condition, and therefore log-worthy. +- The **-e** option tells pathoc to print an explanation of each logged + request, in the form of an expanded pathoc specification with all random + portions and automatic header additions resolved. This lets you precisely + replay a request that triggered an error. + + +Interacting with Proxies +^^^^^^^^^^^^^^^^^^^^^^^^ + +Pathoc has a reasonably sophisticated suite of features for interacting with +proxies. The proxy request syntax very closely mirrors that of straight HTTP, +which means that it is possible to make proxy-style requests using pathoc +without any additional syntax, by simply specifying a full URL instead of a +simple path: + +>>> pathoc -p 8080 localhost "get:'http://google.com'" + +Another common use case is to use an HTTP CONNECT request to probe remote +servers via a proxy. This is done with the **-c** command-line option, which +allows you to specify a remote host and port pair: + +>>> pathoc -c google.com:80 -p 8080 localhost get:/ + +Note that pathoc does **not** negotiate SSL without being explictly instructed +to do so. If you're making a CONNECT request to an SSL-protected resource, you +must also pass the **-s** flag: + +>>> pathoc -sc google.com:443 -p 8080 localhost get:/ + + + +Embedded response specification +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One interesting feature of the Request specification language is that you can +embed a response specification in it, which is then added to the request path. +Here's an example: + +>>> pathoc localhost:9999 "get:/p/:s'401:ir,@1'" + +This crafts a request that connects to the pathod server, and which then crafts +a response that generates a 401, with one random byte embedded at a random +point. The response specification is parsed and expanded by pathoc, so you see +syntax errors immediately. This really becomes handy when combined with the +**-e** flag to show the expanded request:: + + 07-06-16 12:32:01: >> 'GET':/p/:s'401:i35,\x27\\x1b\x27:h\x27Content-Length\x27=\x270\x27:h\x27Content-Length\x27=\x270\x27':h'Host'='localhost' + << 401 Unauthorized: 0 bytes + +Note that the embedded response has been resolved *before* being sent to +the server, so that "ir,@1" (embed a random byte at a random location) has +become "i15,\'o\'" (embed the character "o" at offset 15). You now have a +pathoc request specification that is precisely reproducible, even with random +components. This feature comes in terribly handy when testing a proxy, since +you can now drive the server response completely from the client, and have a +complete log of reproducible requests to analyze afterwards. + + +Request Examples +---------------- + +.. list-table:: + :widths: 50 50 + :header-rows: 0 + + * - get:/ + - Get path / + + * - get:/:b@100 + - 100 random bytes as the body + + * - get:/:h"Etag"="&;drop table browsers;" + - Add a header + + * - get:/:u"&;drop table browsers;" + - Add a User-Agent header + + * - get:/:b@100:dr + - Drop the connection randomly + + * - get:/:b@100,ascii:ir,@1 + - 100 ASCII bytes as the body, and randomly inject a random byte + + * - ws:/ + - Initiate a websocket handshake. + + +Response Examples +----------------- + +.. list-table:: + :widths: 50 50 + :header-rows: 0 + + + * - 200 + - A basic HTTP 200 response. + + * - 200:r + - A basic HTTP 200 response with no Content-Length header. This will hang. + + * - 200:da + - Server-side disconnect after all content has been sent. + + * - 200:b\@100 + - 100 random bytes as the body. A Content-Length header is added, so the disconnect + is no longer needed. + + * - 200:b\@100:h"Etag"="';drop table servers;" + - Add a Server header + + * - 200:b\@100:dr + - Drop the connection randomly + + * - 200:b\@100,ascii:ir,@1 + - 100 ASCII bytes as the body, and randomly inject a random byte + + * - 200:b\@1k:c"text/json" + - 1k of random bytes, with a text/json content type + + * - 200:b\@1k:p50,120 + - 1k of random bytes, pause for 120 seconds after 50 bytes + + * - 200:b\@1k:pr,f + - 1k of random bytes, but hang forever at a random location + + * - 200:b\@100:h\@1k,ascii_letters='foo' + - 100 ASCII bytes as the body, randomly generated 100k header name, with the value + 'foo'. diff --git a/docs/pathod/language.rst b/docs/pathod/language.rst new file mode 100644 index 00000000..672e21b6 --- /dev/null +++ b/docs/pathod/language.rst @@ -0,0 +1,257 @@ +.. _language: + +language spec +============= + +************ +HTTP Request +************ + + **method:path:[colon-separated list of features]** + +.. list-table:: + :widths: 20 80 + :header-rows: 0 + + * - method + - A :ref:`VALUE` specifying the HTTP method to + use. Standard methods do not need to be enclosed in quotes, while + non-standard methods can be specified as quoted strings. + + The special method **ws** creates a valid websocket upgrade + GET request, and signals to pathoc to switch to websocket recieve + mode if the server responds correctly. Apart from that, websocket + requests are just like any other, and all aspects of the request + can be over-ridden. + * - h\:\ :ref:`VALUE`\ =\ :ref:`VALUE`\ + - Set a header. + * - r + - Set the **raw** flag on this response. Pathod will not calculate a + *Content-Length* header if a body is set. + * - c\ :ref:`VALUE` + - A shortcut for setting the Content-Type header. Equivalent to + ``h"Content-Type"=VALUE`` + * - u\ :ref:`VALUE` + uSHORTCUT + - Set a User-Agent header on this request. You can specify either a + complete :ref:`VALUE`, or a User-Agent shortcut: **android**, + **blackberry**, **bingbot**, **chrome**, **firefox**, **googlebot**, + **ie9**, **ipad**, **iphone**, **safari**. + * - b\ :ref:`VALUE` + - Set the body. The appropriate Content-Length header is added + automatically unless the **r** flag is set. + * - s\ :ref:`VALUE` + - An embedded Response specification, appended to the path of the request. + * - x\ :ref:`INTEGER` + - Repeat this message N times. + * - d\ :ref:`OFFSET` + - Disconnect after OFFSET bytes (HTTP/1 only). + * - i\ :ref:`OFFSET`,\ :ref:`VALUE` + - Inject the specified value at the offset (HTTP/1 only) + * - p\ :ref:`OFFSET`,SECONDS + - Pause for SECONDS seconds after OFFSET bytes. SECONDS can be an integer + or "f" to pause forever (HTTP/1 only) + + +************* +HTTP Response +************* + + **code:[colon-separated list of features]** + +.. list-table:: + :widths: 20 80 + :header-rows: 0 + + * - code + - An integer specifying the HTTP response code. + + The special method **ws** creates a valid websocket upgrade + response (code 101), and moves pathod to websocket mode. Apart + from that, websocket responses are just like any other, and all + aspects of the response can be over-ridden. + * - m\ :ref:`VALUE` + - HTTP Reason message. Automatically chosen according to the response + code if not specified. (HTTP/1 only) + * - h\:\ :ref:`VALUE`\ =\ :ref:`VALUE`\ + - Set a header. + * - r + - Set the **raw** flag on this response. Pathod will not calculate a + *Content-Length* header if a body is set. + * - l\ :ref:`VALUE` + - A shortcut for setting the Location header. Equivalent to + ``h"Location"=VALUE`` + * - c\ :ref:`VALUE` + - A shortcut for setting the Content-Type header. Equivalent to + ``h"Content-Type"=VALUE`` + * - b\ :ref:`VALUE` + - Set the body. The appropriate Content-Length header is added + automatically unless the **r** flag is set. + * - d\ :ref:`OFFSET` + - Disconnect after OFFSET bytes (HTTP/1 only). + * - i\ :ref:`OFFSET`,\ :ref:`VALUE` + - Inject the specified value at the offset (HTTP/1 only) + * - p\ :ref:`OFFSET`,SECONDS + - Pause for SECONDS seconds after OFFSET bytes. SECONDS can be an integer + or "f" to pause forever (HTTP/1 only) + +*************** +Websocket Frame +*************** + + **wf:[colon-separated list of features]** + +.. list-table:: + :widths: 20 80 + :header-rows: 0 + + * - b\ :ref:`VALUE` + - Set the frame payload. If a masking key is present, the value is + encoded automatically. + * - c\ :ref:`INTEGER` + - Set the op code. This can either be an integer from 0-15, or be one of + the following opcode names: **text** (the default), **continue**, + **binary**, **close**, **ping**, **pong**. + * - d\ :ref:`OFFSET` + - Disconnect after OFFSET bytes + * - i\ :ref:`OFFSET`,\ :ref:`VALUE` + - Inject the specified value at the offset + * - p\ :ref:`OFFSET`,SECONDS + - Pause for SECONDS seconds after OFFSET bytes. SECONDS can be an integer + or "f" to pause forever + * - x\ :ref:`INTEGER` + - Repeat this message N times. + * - [-]fin + - Set or un-set the **fin** bit. + * - k\ :ref:`VALUE` + - Set the masking key. The resulting value must be exactly 4 bytes long. + The special form **knone** specifies that no key should be set, even if + the mask bit is on. + * - l\ :ref:`INTEGER` + - Set the payload length in the frame header, regardless of the actual + body length. + * - [-]mask + - Set or un-set the mask bit. + * - r\ :ref:`VALUE` + - Set the raw frame payload. This disables masking, even if the key is present. + * - [-]rsv1 + - Set or un-set the **rsv1** bit. + * - [-]rsv2 + - Set or un-set the **rsv2** bit. + * - [-]rsv2 + - Set or un-set the **rsv2** bit. + + + +********** +Data types +********** + +.. _INTEGER: + +INTEGER +^^^^^^^ + +.. _OFFSET: + +OFFSET +^^^^^^ + +Offsets are calculated relative to the base message, before any injections or +other transforms are applied. They have 3 flavors: + +======= ========================== +integer An integer byte offset +**r** A random location +**a** The end of the message +======= ========================== + + +.. _VALUE: + +VALUE +^^^^^ + +Literals +"""""""" + +Literal values are specified as a quoted strings:: + + "foo" + +Either single or double quotes are accepted, and quotes can be escaped with +backslashes within the string:: + + 'fo\'o' + +Literal values can contain Python-style backslash escape sequences:: + + 'foo\r\nbar' + + + +Generated +""""""""" + +An @-symbol lead-in specifies that generated data should be used. There are two +components to a generator specification - a size, and a data type. By default +pathod assumes a data type of "bytes". + +Here's a value specifier for generating 100 bytes:: + + @100 + +You can use standard suffixes to indicate larger values. Here, for instance, is +a specifier for generating 100 megabytes: + + @100m + +Data is generated and served efficiently - if you really want to send a +terabyte of data to a client, pathod can do it. The supported suffixes are: + +========== ==================== +b 1024**0 (bytes) +k 1024**1 (kilobytes) +m 1024**2 (megabytes) +g 1024**3 (gigabytes) +t 1024**4 (terabytes) +========== ==================== + +Data types are separated from the size specification by a comma. This specification +generates 100mb of ASCII:: + + @100m,ascii + +Supported data types are: + +================= ============================================== +ascii All ASCII characters +ascii_letters A-Za-z +ascii_lowercase a-z +ascii_uppercase A-Z +bytes All 256 byte values +digits 0-9 +hexdigits 0-f +octdigits 0-7 +punctuation !"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ and space +whitespace \\t \\n \\x0b \\x0c \\r and space +================= ============================================== + + + +Files +""""" + +You can load a value from a specified file path. To do so, you have to specify a +_staticdir_ option to pathod on the command-line, like so: + +>>> pathod -d ~/myassets + +All paths are relative paths under this directory. File loads are indicated by +starting the value specifier with the left angle bracket:: + + >> pydoc pathod.test + +The remainder of this page demonstrates some common interaction patterns using +nose. These examples are +also applicable with only minor modification to most commonly used Python testing +engines. + + +Context Manager +--------------- + +.. literalinclude:: ../../examples/pathod/test_context.py + :caption: examples/pathod/test_context.py + :language: python + + +One instance per test +--------------------- + +.. literalinclude:: ../../examples/pathod/test_setup.py + :caption: examples/pathod/test_setup.py + :language: python -- cgit v1.2.3