aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore8
-rw-r--r--LICENSE674
-rw-r--r--MANIFEST.in5
-rw-r--r--README38
-rw-r--r--doc-src/01-reset-fonts-grids-base.css8
-rw-r--r--doc-src/02-docstyle.css95
-rw-r--r--doc-src/_layout.html16
-rw-r--r--doc-src/admin.html14
-rw-r--r--doc-src/faq.html17
-rw-r--r--doc-src/index.html3
-rw-r--r--doc-src/index.py26
-rw-r--r--doc-src/library.html15
-rw-r--r--doc-src/syntax.css120
-rw-r--r--examples/stickycookies.py35
-rw-r--r--libmproxy/__init__.py0
-rw-r--r--libmproxy/console.py1065
-rw-r--r--libmproxy/controller.py119
-rw-r--r--libmproxy/filt.py316
-rw-r--r--libmproxy/proxy.py374
-rw-r--r--libmproxy/pyparsing.py3707
-rw-r--r--libmproxy/resources/bogus_template11
-rw-r--r--libmproxy/utils.py277
-rwxr-xr-xmitmproxy70
-rw-r--r--setup.py97
-rw-r--r--test/.pry5
-rw-r--r--test/data/serverkey.pem32
-rw-r--r--test/data/testkey.pem32
-rw-r--r--test/handler.py25
-rw-r--r--test/serv.py10
-rw-r--r--test/sslserv.py22
-rw-r--r--test/test_console.py269
-rw-r--r--test/test_filt.py220
-rw-r--r--test/test_proxy.py259
-rw-r--r--test/test_utils.py221
-rwxr-xr-xtest/tserv30
-rw-r--r--todo17
36 files changed, 8252 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..2d49315b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+MANIFEST
+/build
+/dist
+/tmp
+/doc
+*.py[cd]
+*.swp
+*.swo
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..94a9ed02
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..beffe885
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,5 @@
+include LICENSE
+recursive-include doc *
+recursive-include test *
+recursive-include libmproxy/resources *
+recursive-exclude test *.swo *.swp *.pyc
diff --git a/README b/README
new file mode 100644
index 00000000..3096277f
--- /dev/null
+++ b/README
@@ -0,0 +1,38 @@
+mitmproxy is an interactive SSL-capable intercepting HTTP proxy. It lets you to
+observe, modify and replay requests and responses on the fly. The underlying
+library that mitmproxy is built on can also be used to do these things
+programmatically.
+
+By default, mitmproxy starts up with a mutt-like interactive curses interface -
+the help page (which you can view by pressing "?") should tell you everything
+you need to know. Note that requests and responses are stored in-memory until
+you delete them, so leaving mitmproxy running indefinitely or requesting very
+large amounts of data through it is a bad idea.
+
+mitmproxy intercepts SSL requests by simply assuming that all CONNECT requests
+are https. The connection from the browser is wrapped in SSL, and we read the
+request by pretending to be the connecting server. We then open an SSL request
+to the destination server, and replay the request.
+
+Releases can be found here: http://corte.si/software
+
+Source is hosted here: http://github.com/cortesi/mitmproxy
+
+
+Requirements
+------------
+
+* The curses interface relies on a current version of the
+[urwid](http://excess.org/urwid/) library.
+* The test suite uses the [pry](http://github.com/cortesi/pry) unit testing
+ library.
+
+You should also make sure that your console environment is set up with the
+following:
+
+* EDITOR environment variable to determine the external editor.
+* PAGER environment variable to determine the external pager.
+* Appropriate entries in your mailcap files to determine external
+ viewers for request and response contents.
+
+
diff --git a/doc-src/01-reset-fonts-grids-base.css b/doc-src/01-reset-fonts-grids-base.css
new file mode 100644
index 00000000..32f5f8bf
--- /dev/null
+++ b/doc-src/01-reset-fonts-grids-base.css
@@ -0,0 +1,8 @@
+/*
+Copyright (c) 2009, Yahoo! Inc. All rights reserved.
+Code licensed under the BSD License:
+http://developer.yahoo.net/yui/license.txt
+version: 2.7.0
+*/
+html{color:#000;background:#FFF;}body,div,dl,dt,dd,ul,ol,li,h1,h2,h3,h4,h5,h6,pre,code,form,fieldset,legend,input,button,textarea,p,blockquote,th,td{margin:0;padding:0;}table{border-collapse:collapse;border-spacing:0;}fieldset,img{border:0;}address,caption,cite,code,dfn,em,strong,th,var,optgroup{font-style:inherit;font-weight:inherit;}del,ins{text-decoration:none;}li{list-style:none;}caption,th{text-align:left;}h1,h2,h3,h4,h5,h6{font-size:100%;font-weight:normal;}q:before,q:after{content:'';}abbr,acronym{border:0;font-variant:normal;}sup{vertical-align:baseline;}sub{vertical-align:baseline;}legend{color:#000;}input,button,textarea,select,optgroup,option{font-family:inherit;font-size:inherit;font-style:inherit;font-weight:inherit;}input,button,textarea,select{*font-size:100%;}body{font:13px/1.231 arial,helvetica,clean,sans-serif;*font-size:small;*font:x-small;}select,input,button,textarea,button{font:99% arial,helvetica,clean,sans-serif;}table{font-size:inherit;font:100%;}pre,code,kbd,samp,tt{font-family:monospace;*font-size:108%;line-height:100%;}body{text-align:center;}#doc,#doc2,#doc3,#doc4,.yui-t1,.yui-t2,.yui-t3,.yui-t4,.yui-t5,.yui-t6,.yui-t7{margin:auto;text-align:left;width:57.69em;*width:56.25em;}#doc2{width:73.076em;*width:71.25em;}#doc3{margin:auto 10px;width:auto;}#doc4{width:74.923em;*width:73.05em;}.yui-b{position:relative;}.yui-b{_position:static;}#yui-main .yui-b{position:static;}#yui-main,.yui-g .yui-u .yui-g{width:100%;}.yui-t1 #yui-main,.yui-t2 #yui-main,.yui-t3 #yui-main{float:right;margin-left:-25em;}.yui-t4 #yui-main,.yui-t5 #yui-main,.yui-t6 #yui-main{float:left;margin-right:-25em;}.yui-t1 .yui-b{float:left;width:12.30769em;*width:12.00em;}.yui-t1 #yui-main .yui-b{margin-left:13.30769em;*margin-left:13.05em;}.yui-t2 .yui-b{float:left;width:13.8461em;*width:13.50em;}.yui-t2 #yui-main .yui-b{margin-left:14.8461em;*margin-left:14.55em;}.yui-t3 .yui-b{float:left;width:23.0769em;*width:22.50em;}.yui-t3 #yui-main .yui-b{margin-left:24.0769em;*margin-left:23.62em;}.yui-t4 .yui-b{float:right;width:13.8456em;*width:13.50em;}.yui-t4 #yui-main .yui-b{margin-right:14.8456em;*margin-right:14.55em;}.yui-t5 .yui-b{float:right;width:18.4615em;*width:18.00em;}.yui-t5 #yui-main .yui-b{margin-right:19.4615em;*margin-right:19.125em;}.yui-t6 .yui-b{float:right;width:23.0769em;*width:22.50em;}.yui-t6 #yui-main .yui-b{margin-right:24.0769em;*margin-right:23.62em;}.yui-t7 #yui-main .yui-b{display:block;margin:0 0 1em 0;}#yui-main .yui-b{float:none;width:auto;}.yui-gb .yui-u,.yui-g .yui-gb .yui-u,.yui-gb .yui-g,.yui-gb .yui-gb,.yui-gb .yui-gc,.yui-gb .yui-gd,.yui-gb .yui-ge,.yui-gb .yui-gf,.yui-gc .yui-u,.yui-gc .yui-g,.yui-gd .yui-u{float:left;}.yui-g .yui-u,.yui-g .yui-g,.yui-g .yui-gb,.yui-g .yui-gc,.yui-g .yui-gd,.yui-g .yui-ge,.yui-g .yui-gf,.yui-gc .yui-u,.yui-gd .yui-g,.yui-g .yui-gc .yui-u,.yui-ge .yui-u,.yui-ge .yui-g,.yui-gf .yui-g,.yui-gf .yui-u{float:right;}.yui-g div.first,.yui-gb div.first,.yui-gc div.first,.yui-gd div.first,.yui-ge div.first,.yui-gf div.first,.yui-g .yui-gc div.first,.yui-g .yui-ge div.first,.yui-gc div.first div.first{float:left;}.yui-g .yui-u,.yui-g .yui-g,.yui-g .yui-gb,.yui-g .yui-gc,.yui-g .yui-gd,.yui-g .yui-ge,.yui-g .yui-gf{width:49.1%;}.yui-gb .yui-u,.yui-g .yui-gb .yui-u,.yui-gb .yui-g,.yui-gb .yui-gb,.yui-gb .yui-gc,.yui-gb .yui-gd,.yui-gb .yui-ge,.yui-gb .yui-gf,.yui-gc .yui-u,.yui-gc .yui-g,.yui-gd .yui-u{width:32%;margin-left:1.99%;}.yui-gb .yui-u{*margin-left:1.9%;*width:31.9%;}.yui-gc div.first,.yui-gd .yui-u{width:66%;}.yui-gd div.first{width:32%;}.yui-ge div.first,.yui-gf .yui-u{width:74.2%;}.yui-ge .yui-u,.yui-gf div.first{width:24%;}.yui-g .yui-gb div.first,.yui-gb div.first,.yui-gc div.first,.yui-gd div.first{margin-left:0;}.yui-g .yui-g .yui-u,.yui-gb .yui-g .yui-u,.yui-gc .yui-g .yui-u,.yui-gd .yui-g .yui-u,.yui-ge .yui-g .yui-u,.yui-gf .yui-g .yui-u{width:49%;*width:48.1%;*margin-left:0;}.yui-g .yui-g .yui-u{width:48.1%;}.yui-g .yui-gb div.first,.yui-gb .yui-gb div.first{*margin-right:0;*width:32%;_width:31.7%;}.yui-g .yui-gc div.first,.yui-gd .yui-g{width:66%;}.yui-gb .yui-g div.first{*margin-right:4%;_margin-right:1.3%;}.yui-gb .yui-gc div.first,.yui-gb .yui-gd div.first{*margin-right:0;}.yui-gb .yui-gb .yui-u,.yui-gb .yui-gc .yui-u{*margin-left:1.8%;_margin-left:4%;}.yui-g .yui-gb .yui-u{_margin-left:1.0%;}.yui-gb .yui-gd .yui-u{*width:66%;_width:61.2%;}.yui-gb .yui-gd div.first{*width:31%;_width:29.5%;}.yui-g .yui-gc .yui-u,.yui-gb .yui-gc .yui-u{width:32%;_float:right;margin-right:0;_margin-left:0;}.yui-gb .yui-gc div.first{width:66%;*float:left;*margin-left:0;}.yui-gb .yui-ge .yui-u,.yui-gb .yui-gf .yui-u{margin:0;}.yui-gb .yui-gb .yui-u{_margin-left:.7%;}.yui-gb .yui-g div.first,.yui-gb .yui-gb div.first{*margin-left:0;}.yui-gc .yui-g .yui-u,.yui-gd .yui-g .yui-u{*width:48.1%;*margin-left:0;}.yui-gb .yui-gd div.first{width:32%;}.yui-g .yui-gd div.first{_width:29.9%;}.yui-ge .yui-g{width:24%;}.yui-gf .yui-g{width:74.2%;}.yui-gb .yui-ge div.yui-u,.yui-gb .yui-gf div.yui-u{float:right;}.yui-gb .yui-ge div.first,.yui-gb .yui-gf div.first{float:left;}.yui-gb .yui-ge .yui-u,.yui-gb .yui-gf div.first{*width:24%;_width:20%;}.yui-gb .yui-ge div.first,.yui-gb .yui-gf .yui-u{*width:73.5%;_width:65.5%;}.yui-ge div.first .yui-gd .yui-u{width:65%;}.yui-ge div.first .yui-gd div.first{width:32%;}#hd:after,#bd:after,#ft:after,.yui-g:after,.yui-gb:after,.yui-gc:after,.yui-gd:after,.yui-ge:after,.yui-gf:after{content:".";display:block;height:0;clear:both;visibility:hidden;}#hd,#bd,#ft,.yui-g,.yui-gb,.yui-gc,.yui-gd,.yui-ge,.yui-gf{zoom:1;}
+body{margin:10px;}h1{font-size:138.5%;}h2{font-size:123.1%;}h3{font-size:108%;}h1,h2,h3{margin:1em 0;}h1,h2,h3,h4,h5,h6,strong,dt{font-weight:bold;}optgroup{font-weight:normal;}abbr,acronym{border-bottom:1px dotted #000;cursor:help;}em{font-style:italic;}del{text-decoration:line-through;}blockquote,ul,ol,dl{margin:1em;}ol,ul,dl{margin-left:2em;}ol li{list-style:decimal outside;}ul li{list-style:disc outside;}dl dd{margin-left:1em;}th,td{border:1px solid #000;padding:.5em;}th{font-weight:bold;text-align:center;}caption{margin-bottom:.5em;text-align:center;}sup{vertical-align:super;}sub{vertical-align:sub;}p,fieldset,table,pre{margin-bottom:1em;}button,input[type="checkbox"],input[type="radio"],input[type="reset"],input[type="submit"]{padding:1px;}
diff --git a/doc-src/02-docstyle.css b/doc-src/02-docstyle.css
new file mode 100644
index 00000000..7d33b861
--- /dev/null
+++ b/doc-src/02-docstyle.css
@@ -0,0 +1,95 @@
+body {
+ -x-system-font:none;
+ font-family: Helvetica,Arial,Tahoma,Verdana,Sans-Serif;
+ color: #555555;
+ font-size: 1.3em;
+}
+
+a {
+ color: #3F8ED8;
+}
+
+#hd {
+ margin: 0;
+ border-bottom: 1px solid #999;
+}
+#hd h1 {
+ letter-spacing: 3px;
+ font-size: 2.5em;
+ line-height: 100%;
+ margin: 0.3em 0;
+ font-weight: normal;
+}
+
+#bd {
+ padding: 20px;
+}
+
+#bd h1 {
+ font-size: 1.6em;
+ margin-top: 5px;
+ margin-bottom: 5px;
+}
+
+#bd h2 {
+ font-size: 1.2em;
+ margin-top: 5px;
+ margin-bottom: 5px;
+}
+
+#ft {
+ color: #aaa;
+ border-top: 1px solid #aaa;
+ clear: both;
+ margin: 0 0 2em 0;
+ font-size: 0.8em;
+ letter-spacing: 0.5px;
+}
+
+.pageindex {
+ font-size: 1.5em;
+}
+
+.pageindex ul {
+ list-style-image:none;
+ list-style-position:outside;
+ list-style-type:none;
+ margin: 0px;
+}
+
+.pageindex li {
+ list-style-image:none;
+ list-style-position:outside;
+ list-style-type:none;
+ margin: 0;
+}
+
+.pageindex li.active {
+ padding-left: 4px;
+ border-left: 5px solid #ff0000;
+}
+
+.pageindex li.inactive{
+ border-left: none;
+ margin-left: 9px;
+}
+
+.pageindex li li a {
+ display: block;
+ background-color: transparent;
+ margin: 0;
+ border-top: none;
+ border-bottom: none;
+}
+
+.pageindex ul ul {
+ margin-left: 20px;
+ padding: 0;
+ list-style-type: none;
+}
+
+
+.faq .question {
+ font-size: 1.1em;
+ font-weight: bold;
+}
diff --git a/doc-src/_layout.html b/doc-src/_layout.html
new file mode 100644
index 00000000..2e706ecd
--- /dev/null
+++ b/doc-src/_layout.html
@@ -0,0 +1,16 @@
+<div class="yui-t2" id="doc3">
+ <div style="" id="hd">
+ $!head!$
+ </div>
+ <div id="bd">
+ <div id="yui-main">
+ <div style="" class="yui-b">$!body!$</div>
+ </div>
+ <div style="" class="yui-b">
+ <div>@!sidebar!@</div>
+ </div>
+ </div>
+ <div style="" id="ft">
+ <p>@!copyright!@</p>
+ </div>
+</div>
diff --git a/doc-src/admin.html b/doc-src/admin.html
new file mode 100644
index 00000000..6954010c
--- /dev/null
+++ b/doc-src/admin.html
@@ -0,0 +1,14 @@
+
+<h2>Contact</h2>
+
+<p> Please send any comments, suggestions and bug reports to
+ <a href="mailto:$!docMaintainerEmail!$">$!docMaintainerEmail!$</a>.
+</p>
+
+
+<h2>License</h2>
+
+<pre>
+@!license!@
+</pre>
+
diff --git a/doc-src/faq.html b/doc-src/faq.html
new file mode 100644
index 00000000..8b0a3ff5
--- /dev/null
+++ b/doc-src/faq.html
@@ -0,0 +1,17 @@
+
+<div class="faq">
+
+ <p class="question">On some sites I see a lot of "Connection from.."
+ entries that never complete.</p>
+
+ <p> This is probably because the page requests resources from SSL-protected
+ domains. These requests are intercepted by mitmproxy, but because we're
+ using a bogus certificate, the browser-side of the connection hangs. The
+ browser doesn't prompt you to add a certificate trust exception for remote
+ page components, only for the primary domain being visited. </p>
+
+ <p> To solve this, use something like FireBug to find out which page
+ components are hanging. Visit the relevant domains using your browser, and
+ add a certificate trust exception for each one. </p>
+
+</div>
diff --git a/doc-src/index.html b/doc-src/index.html
new file mode 100644
index 00000000..859ffad0
--- /dev/null
+++ b/doc-src/index.html
@@ -0,0 +1,3 @@
+
+@!index_contents!@
+
diff --git a/doc-src/index.py b/doc-src/index.py
new file mode 100644
index 00000000..2b6dde6a
--- /dev/null
+++ b/doc-src/index.py
@@ -0,0 +1,26 @@
+import countershape
+from countershape import Page, Directory, PythonModule
+import countershape.grok
+
+this.layout = countershape.Layout("_layout.html")
+this.markup = "markdown"
+ns.docTitle = "mitmproxy"
+ns.docMaintainer = "Aldo Cortesi"
+ns.docMaintainerEmail = "aldo@corte.si"
+ns.copyright = "Aldo Cortesi 2010"
+ns.head = countershape.template.Template(None, "<h1> @!docTitle!@ - @!this.title!@ </h1>")
+ns.sidebar = countershape.widgets.SiblingPageIndex(
+ '/index.html',
+ exclude=['countershape']
+ )
+
+ns.license = file("../LICENSE").read()
+ns.index_contents = file("../README").read()
+ns.example = file("../examples/stickycookies.py").read()
+
+pages = [
+ Page("index.html", "introduction"),
+ Page("library.html", "library"),
+ Page("faq.html", "faq"),
+ Page("admin.html", "administrivia")
+]
diff --git a/doc-src/library.html b/doc-src/library.html
new file mode 100644
index 00000000..e8533731
--- /dev/null
+++ b/doc-src/library.html
@@ -0,0 +1,15 @@
+
+All of mitmproxy's basic functionality is exposed through the __libmproxy__
+library. The example below shows a simple implementation of the "sticky cookie"
+functionality included in the interactive mitmproxy program. Traffic is
+monitored for __cookie__ and __set-cookie__ headers, and requests are rewritten
+to include a previously seen cookie if they don't already have one. In effect,
+this lets you log in to a site using your browser, and then make subsequent
+requests using a tool like __curl__, which will then seem to be part of the
+authenticated session.
+
+
+<!--(block |pySyntax)-->
+$!example!$
+<!--(end)-->
+
diff --git a/doc-src/syntax.css b/doc-src/syntax.css
new file mode 100644
index 00000000..e371658a
--- /dev/null
+++ b/doc-src/syntax.css
@@ -0,0 +1,120 @@
+.highlight { background: #f8f8f8; }
+.highlight .c { color: #408080; font-style: italic } /* Comment */
+.highlight .err { border: 1px solid #FF0000 } /* Error */
+.highlight .k { color: #008000; font-weight: bold } /* Keyword */
+.highlight .o { color: #666666 } /* Operator */
+.highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */
+.highlight .cp { color: #BC7A00 } /* Comment.Preproc */
+.highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */
+.highlight .cs { color: #408080; font-style: italic } /* Comment.Special */
+.highlight .gd { color: #A00000 } /* Generic.Deleted */
+.highlight .ge { font-style: italic } /* Generic.Emph */
+.highlight .gr { color: #FF0000 } /* Generic.Error */
+.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.highlight .gi { color: #00A000 } /* Generic.Inserted */
+.highlight .go { color: #808080 } /* Generic.Output */
+.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
+.highlight .gs { font-weight: bold } /* Generic.Strong */
+.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.highlight .gt { color: #0040D0 } /* Generic.Traceback */
+.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
+.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
+.highlight .kp { color: #008000 } /* Keyword.Pseudo */
+.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
+.highlight .kt { color: #B00040 } /* Keyword.Type */
+.highlight .m { color: #666666 } /* Literal.Number */
+.highlight .s { color: #BA2121 } /* Literal.String */
+.highlight .na { color: #7D9029 } /* Name.Attribute */
+.highlight .nb { color: #008000 } /* Name.Builtin */
+.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
+.highlight .no { color: #880000 } /* Name.Constant */
+.highlight .nd { color: #AA22FF } /* Name.Decorator */
+.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */
+.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
+.highlight .nf { color: #0000FF } /* Name.Function */
+.highlight .nl { color: #A0A000 } /* Name.Label */
+.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
+.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
+.highlight .nv { color: #19177C } /* Name.Variable */
+.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
+.highlight .w { color: #bbbbbb } /* Text.Whitespace */
+.highlight .mf { color: #666666 } /* Literal.Number.Float */
+.highlight .mh { color: #666666 } /* Literal.Number.Hex */
+.highlight .mi { color: #666666 } /* Literal.Number.Integer */
+.highlight .mo { color: #666666 } /* Literal.Number.Oct */
+.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
+.highlight .sc { color: #BA2121 } /* Literal.String.Char */
+.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
+.highlight .s2 { color: #BA2121 } /* Literal.String.Double */
+.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
+.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
+.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
+.highlight .sx { color: #008000 } /* Literal.String.Other */
+.highlight .sr { color: #BB6688 } /* Literal.String.Regex */
+.highlight .s1 { color: #BA2121 } /* Literal.String.Single */
+.highlight .ss { color: #19177C } /* Literal.String.Symbol */
+.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
+.highlight .vc { color: #19177C } /* Name.Variable.Class */
+.highlight .vg { color: #19177C } /* Name.Variable.Global */
+.highlight .vi { color: #19177C } /* Name.Variable.Instance */
+.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
+.grokdoc { background: #f8f8f8; }
+.grokdoc .c { color: #408080; font-style: italic } /* Comment */
+.grokdoc .err { border: 1px solid #FF0000 } /* Error */
+.grokdoc .k { color: #008000; font-weight: bold } /* Keyword */
+.grokdoc .o { color: #666666 } /* Operator */
+.grokdoc .cm { color: #408080; font-style: italic } /* Comment.Multiline */
+.grokdoc .cp { color: #BC7A00 } /* Comment.Preproc */
+.grokdoc .c1 { color: #408080; font-style: italic } /* Comment.Single */
+.grokdoc .cs { color: #408080; font-style: italic } /* Comment.Special */
+.grokdoc .gd { color: #A00000 } /* Generic.Deleted */
+.grokdoc .ge { font-style: italic } /* Generic.Emph */
+.grokdoc .gr { color: #FF0000 } /* Generic.Error */
+.grokdoc .gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.grokdoc .gi { color: #00A000 } /* Generic.Inserted */
+.grokdoc .go { color: #808080 } /* Generic.Output */
+.grokdoc .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
+.grokdoc .gs { font-weight: bold } /* Generic.Strong */
+.grokdoc .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.grokdoc .gt { color: #0040D0 } /* Generic.Traceback */
+.grokdoc .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
+.grokdoc .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
+.grokdoc .kp { color: #008000 } /* Keyword.Pseudo */
+.grokdoc .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
+.grokdoc .kt { color: #B00040 } /* Keyword.Type */
+.grokdoc .m { color: #666666 } /* Literal.Number */
+.grokdoc .s { color: #BA2121 } /* Literal.String */
+.grokdoc .na { color: #7D9029 } /* Name.Attribute */
+.grokdoc .nb { color: #008000 } /* Name.Builtin */
+.grokdoc .nc { color: #0000FF; font-weight: bold } /* Name.Class */
+.grokdoc .no { color: #880000 } /* Name.Constant */
+.grokdoc .nd { color: #AA22FF } /* Name.Decorator */
+.grokdoc .ni { color: #999999; font-weight: bold } /* Name.Entity */
+.grokdoc .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
+.grokdoc .nf { color: #0000FF } /* Name.Function */
+.grokdoc .nl { color: #A0A000 } /* Name.Label */
+.grokdoc .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
+.grokdoc .nt { color: #008000; font-weight: bold } /* Name.Tag */
+.grokdoc .nv { color: #19177C } /* Name.Variable */
+.grokdoc .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
+.grokdoc .w { color: #bbbbbb } /* Text.Whitespace */
+.grokdoc .mf { color: #666666 } /* Literal.Number.Float */
+.grokdoc .mh { color: #666666 } /* Literal.Number.Hex */
+.grokdoc .mi { color: #666666 } /* Literal.Number.Integer */
+.grokdoc .mo { color: #666666 } /* Literal.Number.Oct */
+.grokdoc .sb { color: #BA2121 } /* Literal.String.Backtick */
+.grokdoc .sc { color: #BA2121 } /* Literal.String.Char */
+.grokdoc .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
+.grokdoc .s2 { color: #BA2121 } /* Literal.String.Double */
+.grokdoc .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
+.grokdoc .sh { color: #BA2121 } /* Literal.String.Heredoc */
+.grokdoc .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
+.grokdoc .sx { color: #008000 } /* Literal.String.Other */
+.grokdoc .sr { color: #BB6688 } /* Literal.String.Regex */
+.grokdoc .s1 { color: #BA2121 } /* Literal.String.Single */
+.grokdoc .ss { color: #19177C } /* Literal.String.Symbol */
+.grokdoc .bp { color: #008000 } /* Name.Builtin.Pseudo */
+.grokdoc .vc { color: #19177C } /* Name.Variable.Class */
+.grokdoc .vg { color: #19177C } /* Name.Variable.Global */
+.grokdoc .vi { color: #19177C } /* Name.Variable.Instance */
+.grokdoc .il { color: #666666 } /* Literal.Number.Integer.Long */
diff --git a/examples/stickycookies.py b/examples/stickycookies.py
new file mode 100644
index 00000000..94b35876
--- /dev/null
+++ b/examples/stickycookies.py
@@ -0,0 +1,35 @@
+from libmproxy import controller, proxy
+
+proxy.config = proxy.Config(
+ "~/.mitmproxy/cert.pem"
+)
+
+class StickyMaster(controller.Master):
+ def __init__(self, server):
+ controller.Master.__init__(self, server)
+ self.stickyhosts = {}
+
+ def run(self):
+ try:
+ return controller.Master.run(self)
+ except KeyboardInterrupt:
+ self.shutdown()
+
+ def handle_request(self, msg):
+ hid = (msg.host, msg.port)
+ if msg.headers.has_key("cookie"):
+ self.stickyhosts[hid] = msg.headers["cookie"]
+ elif hid in self.stickyhosts:
+ msg.headers["cookie"] = self.stickyhosts[hid]
+ msg.ack()
+
+ def handle_response(self, msg):
+ hid = (msg.request.host, msg.request.port)
+ if msg.headers.has_key("set-cookie"):
+ self.stickyhosts[hid] = f.response.headers["set-cookie"]
+ msg.ack()
+
+
+server = proxy.ProxyServer(8080)
+m = StickyMaster(server)
+m.run()
diff --git a/libmproxy/__init__.py b/libmproxy/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/libmproxy/__init__.py
diff --git a/libmproxy/console.py b/libmproxy/console.py
new file mode 100644
index 00000000..7a96ec38
--- /dev/null
+++ b/libmproxy/console.py
@@ -0,0 +1,1065 @@
+# Copyright (C) 2010 Aldo Cortesi
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import Queue, mailcap, mimetypes, tempfile, os, subprocess, threading
+import cStringIO
+import urwid.curses_display
+import urwid
+import controller, utils, filt, proxy
+
+
+class Stop(Exception): pass
+
+
+def format_keyvals(lst, key="key", val="text", space=5, indent=0):
+ ret = []
+ if lst:
+ pad = max(len(i[0]) for i in lst) + space
+ for i in lst:
+ ret.extend(
+ [
+ " "*indent,
+ (key, i[0]),
+ " "*(pad-len(i[0])),
+ (val, i[1]),
+ "\n"
+ ]
+ )
+ return ret
+
+
+#begin nocover
+
+class ReplayThread(threading.Thread):
+ def __init__(self, flow, masterq):
+ self.flow, self.masterq = flow, masterq
+ threading.Thread.__init__(self)
+
+ def run(self):
+ try:
+ server = proxy.ServerConnection(self.flow.request)
+ response = server.read_response()
+ response.send(self.masterq)
+ except proxy.ProxyError, v:
+ err = proxy.Error(self.flow.connection, v.msg)
+ err.send(self.masterq)
+
+
+class ConnectionItem(urwid.WidgetWrap):
+ def __init__(self, master, state, flow):
+ self.master, self.state, self.flow = master, state, flow
+ w = self.get_text()
+ urwid.WidgetWrap.__init__(self, w)
+
+ def intercept(self):
+ self.intercepting = True
+ self.w = self.get_text()
+
+ def get_text(self, nofocus=False):
+ return urwid.Text(self.flow.get_text(nofocus))
+
+ def selectable(self):
+ return True
+
+ def keypress(self, (maxcol,), key):
+ if key == "a":
+ self.flow.accept_intercept()
+ self.master.sync_list_view()
+ elif key == "d":
+ if not self.state.delete_flow(self.flow):
+ self.master.statusbar.message("Can't delete connection mid-intercept.")
+ self.master.sync_list_view()
+ elif key == "r":
+ r = self.state.replay(self.flow, self.master.masterq)
+ if r:
+ self.master.statusbar.message(r)
+ self.master.sync_list_view()
+ elif key == "R":
+ self.state.revert(self.flow)
+ self.master.sync_list_view()
+ elif key == "z":
+ self.master.kill_connection(self.flow)
+ elif key == "enter":
+ if self.flow.request:
+ self.master.view_connection(self.flow)
+ return key
+
+
+class ConnectionListView(urwid.ListWalker):
+ def __init__(self, master, state):
+ self.master, self.state = master, state
+
+ def get_focus(self):
+ f, i = self.state.get_focus()
+ f = ConnectionItem(self.master, self.state, f) if f else None
+ return f, i
+
+ def set_focus(self, focus):
+ ret = self.state.set_focus(focus)
+ self._modified()
+ return ret
+
+ def get_next(self, pos):
+ f, i = self.state.get_next(pos)
+ f = ConnectionItem(self.master, self.state, f) if f else None
+ return f, i
+
+ def get_prev(self, pos):
+ f, i = self.state.get_prev(pos)
+ f = ConnectionItem(self.master, self.state, f) if f else None
+ return f, i
+
+
+class ConnectionViewHeader(urwid.WidgetWrap):
+ def __init__(self, flow):
+ self.flow = flow
+ self.w = urwid.Text(flow.get_text(nofocus=True, padding=0))
+
+ def refresh_connection(self, f):
+ if f == self.flow:
+ self.w = urwid.Text(f.get_text(nofocus=True, padding=0))
+
+
+class ConnectionView(urwid.WidgetWrap):
+ REQ = 0
+ RESP = 1
+ tabs = ["Request", "Response"]
+ methods = [
+ ("get", "g"),
+ ("post", "p"),
+ ("put", "u"),
+ ("head", "h"),
+ ("trace", "t"),
+ ("delete", "d"),
+ ("options", "o"),
+ ]
+ def __init__(self, master, state, flow):
+ self.master, self.state, self.flow = master, state, flow
+ self.binary = False
+ self.view_request()
+
+ def _tab(self, content, active):
+ if active:
+ attr = "heading"
+ else:
+ attr = "inactive"
+ p = urwid.Text(content)
+ p = urwid.Padding(p, align="left", width=("relative", 100))
+ p = urwid.AttrWrap(p, attr)
+ return p
+
+ def wrap_body(self, active, body):
+ parts = []
+
+ if self.flow.intercepting and not self.flow.request.acked:
+ qt = "Request (intercepted)"
+ else:
+ qt = "Request"
+ if active == self.REQ:
+ parts.append(self._tab(qt, True))
+ else:
+ parts.append(self._tab(qt, False))
+
+ if self.flow.response:
+ if self.flow.intercepting and not self.flow.response.acked:
+ st = "Response (intercepted)"
+ else:
+ st = "Response"
+ if active == self.RESP:
+ parts.append(self._tab(st, True))
+ else:
+ parts.append(self._tab(st, False))
+
+ h = urwid.Columns(parts, dividechars=1)
+ f = urwid.Frame(
+ body,
+ header=h
+ )
+ return f
+
+ def _conn_text(self, conn):
+ txt = []
+ txt.extend(
+ format_keyvals(
+ [(h+":", v) for (h, v) in sorted(conn.headers.itemPairs())],
+ key = "header",
+ val = "text"
+ )
+ )
+ txt.append("\n\n")
+ if conn.content:
+ if self.binary or utils.isBin(conn.content):
+ for offset, hex, s in utils.hexdump(conn.content):
+ txt.extend([
+ ("offset", offset),
+ " ",
+ ("text", hex),
+ " ",
+ ("text", s),
+ "\n"
+ ])
+ else:
+ for i in conn.content.splitlines():
+ txt.append(
+ ("text", i),
+ )
+ txt.append(
+ ("text", "\n"),
+ )
+ return urwid.ListBox([urwid.Text(txt)])
+
+ def view_request(self):
+ self.viewing = self.REQ
+ body = self._conn_text(self.flow.request)
+ self.w = self.wrap_body(self.REQ, body)
+
+ def view_response(self):
+ if self.flow.response:
+ self.viewing = self.RESP
+ body = self._conn_text(self.flow.response)
+ self.w = self.wrap_body(self.RESP, body)
+
+ def refresh_connection(self, c=None):
+ if c == self.flow:
+ if self.viewing == self.REQ:
+ self.view_request()
+ else:
+ self.view_response()
+
+ def _spawn_editor(self, data):
+ fd, name = tempfile.mkstemp('', "mproxy")
+ os.write(fd, data)
+ os.close(fd)
+ c = os.environ.get("EDITOR")
+ cmd = [c, name]
+ ret = subprocess.call(cmd)
+ # Not sure why, unless we do this we get a visible cursor after
+ # spawning 'less'.
+ self.master.ui._curs_set(1)
+ self.master.ui.clear()
+ data = open(name).read()
+ os.unlink(name)
+ return data
+
+ def edit_method(self, m):
+ for i in self.methods:
+ if i[1] == m:
+ self.flow.request.method = i[0].upper()
+ self.master.refresh_connection(self.flow)
+
+ def edit(self, part):
+ if self.viewing == self.REQ:
+ conn = self.flow.request
+ else:
+ conn = self.flow.response
+ if part == "b":
+ conn.content = self._spawn_editor(conn.content or "")
+ elif part == "h":
+ headertext = self._spawn_editor(repr(conn.headers))
+ headers = utils.Headers()
+ fp = cStringIO.StringIO(headertext)
+ headers.read(fp)
+ conn.headers = headers
+ elif part == "u" and self.viewing == self.REQ:
+ conn = self.flow.request
+ url = self._spawn_editor(conn.url())
+ url = url.strip()
+ if not conn.set_url(url):
+ return "Invalid URL."
+ elif part == "m" and self.viewing == self.REQ:
+ self.master.prompt_onekey("Method ", self.methods, self.edit_method)
+ key = None
+ self.master.refresh_connection(self.flow)
+
+ def keypress(self, size, key):
+ if key == "tab":
+ if self.viewing == self.REQ:
+ self.view_response()
+ else:
+ self.view_request()
+ elif key in ("up", "down", "page up", "page down"):
+ # Why doesn't this just work??
+ self.w.body.keypress(size, key)
+ elif key == "a":
+ self.flow.accept_intercept()
+ self.master.view_connection(self.flow)
+ elif key == "b":
+ self.binary = not self.binary
+ self.master.refresh_connection(self.flow)
+ elif key == "e":
+ if self.viewing == self.REQ:
+ self.master.prompt_onekey(
+ "Edit request ",
+ (
+ ("header", "h"),
+ ("body", "b"),
+ ("url", "u"),
+ ("method", "m")
+ ),
+ self.edit
+ )
+ else:
+ self.master.prompt_onekey(
+ "Edit response ",
+ (
+ ("header", "h"),
+ ("body", "b"),
+ ),
+ self.edit
+ )
+ key = None
+ elif key == "r":
+ r = self.state.replay(self.flow, self.master.masterq)
+ if r:
+ self.master.statusbar.message(r)
+ self.master.refresh_connection(self.flow)
+ elif key == "R":
+ self.state.revert(self.flow)
+ self.master.refresh_connection(self.flow)
+ elif key == "v":
+ if self.viewing == self.REQ:
+ conn = self.flow.request
+ else:
+ conn = self.flow.response
+ if conn.content:
+ t = conn.headers.get("content-type", [None])
+ t = t[0]
+ if t:
+ ext = mimetypes.guess_extension(t) or ""
+ else:
+ ext = ""
+ fd, name = tempfile.mkstemp(ext, "mproxy")
+ os.write(fd, conn.content)
+ os.close(fd)
+ t = conn.headers.get("content-type", [None])
+ t = t[0]
+
+ cmd = None
+ shell = False
+
+ if t:
+ c = mailcap.getcaps()
+ cmd, _ = mailcap.findmatch(c, t, filename=name)
+ if cmd:
+ shell = True
+ if not cmd:
+ c = os.environ.get("PAGER") or os.environ.get("EDITOR")
+ cmd = [c, name]
+ ret = subprocess.call(cmd, shell=shell)
+ # Not sure why, unless we do this we get a visible cursor after
+ # spawning 'less'.
+ self.master.ui._curs_set(1)
+ self.master.ui.clear()
+ os.unlink(name)
+ return key
+
+
+class ActionBar(urwid.WidgetWrap):
+ def __init__(self):
+ self.message("")
+
+ def selectable(self):
+ return True
+
+ def prompt(self, prompt):
+ self.w = urwid.Edit(prompt)
+
+ def message(self, message):
+ self.w = urwid.Text(message)
+
+
+class StatusBar(urwid.WidgetWrap):
+ def __init__(self, master, text):
+ self.master, self.text = master, text
+ self.ab = ActionBar()
+ self.ib = urwid.AttrWrap(urwid.Text(""), 'foot')
+ self.w = urwid.Pile([self.ib, self.ab])
+ self.redraw()
+
+ def redraw(self):
+ status = urwid.Columns([
+ urwid.Text([('title', "mproxy:%s"%self.master.server.port)]),
+ urwid.Text(
+ [
+ self.text,
+ ('text', "%5s"%("[%s]"%len(self.master.state.flow_list)))
+ ],
+ align="right"),
+ ])
+ self.ib.set_w(status)
+
+ def update(self, text):
+ self.text = text
+ self.redraw()
+
+ def selectable(self):
+ return True
+
+ def get_edit_text(self):
+ return self.ab.w.get_edit_text()
+
+ def prompt(self, prompt):
+ self.ab.prompt(prompt)
+
+ def message(self, msg):
+ self.ab.message(msg)
+
+
+#end nocover
+
+class ReplayConnection:
+ pass
+
+
+class Flow:
+ def __init__(self, connection):
+ self.connection = connection
+ self.request, self.response, self.error = None, None, None
+ self.waiting = True
+ self.focus = False
+ self.intercepting = False
+ self._backup = None
+
+ def backup(self):
+ if not self._backup:
+ self._backup = [
+ self.connection.copy() if self.connection else None,
+ self.request.copy() if self.request else None,
+ self.response.copy() if self.response else None,
+ self.error.copy() if self.error else None,
+ ]
+
+ def revert(self):
+ if self._backup:
+ self.waiting = False
+ restore = [i.copy() if i else None for i in self._backup]
+ self.connection, self.request, self.response, self.error = restore
+
+ def match(self, pattern):
+ if pattern:
+ if self.response:
+ return pattern(self.response)
+ elif self.request:
+ return pattern(self.request)
+ return False
+
+ def is_replay(self):
+ return isinstance(self.connection, ReplayConnection)
+
+ def get_text(self, nofocus=False, padding=3):
+ if not self.request and not self.response:
+ txt = [
+ ("title", " Connection from %s..."%(self.connection.address)),
+ ]
+ else:
+ txt = [
+ ("ack", "!") if self.intercepting and not self.request.acked else " ",
+ ("method", self.request.method),
+ " ",
+ (
+ "text" if (self.response or self.error) else "title",
+ self.request.url(),
+ ),
+ ]
+ if self.response or self.error or self.is_replay():
+ txt.append("\n" + " "*(padding+2))
+ if self.is_replay():
+ txt.append(("method", "[replay] "))
+ if not (self.response or self.error):
+ txt.append(("text", "waiting for response..."))
+
+ if self.response:
+ txt.append(
+ ("ack", "!") if self.intercepting and not self.response.acked else " "
+ )
+ txt.append("-> ")
+ if self.response.code in [200, 304]:
+ txt.append(("goodcode", str(self.response.code)))
+ else:
+ txt.append(("error", str(self.response.code)))
+ t = self.response.headers.get("content-type")
+ if t:
+ t = t[0].split(";")[0]
+ txt.append(("text", " %s"%t))
+ if self.response.content:
+ txt.append(", %s"%utils.pretty_size(len(self.response.content)))
+ elif self.error:
+ txt.append(
+ ("error", self.error.msg)
+ )
+ if self.focus and not nofocus:
+ txt.insert(0, ("focus", ">>" + " "*(padding-2)))
+ else:
+ txt.insert(0, " "*padding)
+ return txt
+
+ def kill(self):
+ if self.intercepting:
+ if not self.request.acked:
+ self.request.kill = True
+ self.request.ack()
+ elif self.response and not self.response.acked:
+ self.response.kill = True
+ self.response.ack()
+ self.intercepting = False
+
+ def intercept(self):
+ self.intercepting = True
+
+ def accept_intercept(self):
+ if not self.request.acked:
+ self.request.ack()
+ elif self.response and not self.response.acked:
+ self.response.ack()
+ self.intercepting = False
+
+
+class State:
+ def __init__(self):
+ self.flow_map = {}
+ self.flow_list = []
+ self.focus = None
+ # These are compiled filt expressions:
+ self.limit = None
+ self.intercept = None
+
+ def add_browserconnect(self, f):
+ self.flow_list.insert(0, f)
+ self.flow_map[f.connection] = f
+ if self.focus is None:
+ self.set_focus(0)
+ else:
+ self.set_focus(self.focus + 1)
+
+ def add_request(self, req):
+ f = self.flow_map.get(req.connection)
+ if not f:
+ return False
+ f.request = req
+ return f
+
+ def add_response(self, resp):
+ f = self.flow_map.get(resp.request.connection)
+ if not f:
+ return False
+ f.response = resp
+ f.waiting = False
+ f.backup()
+ return f
+
+ def add_error(self, err):
+ f = self.flow_map.get(err.connection)
+ if not f:
+ return False
+ f.error = err
+ f.waiting = False
+ f.backup()
+ return f
+
+ @property
+ def view(self):
+ if self.limit:
+ return [i for i in self.flow_list if i.match(self.limit)]
+ else:
+ return self.flow_list[:]
+
+ def set_limit(self, limit):
+ """
+ Limit is a compiled filter expression, or None.
+ """
+ self.limit = limit
+ self.set_focus(self.focus)
+
+ def get_connection(self, itm):
+ if isinstance(itm, (proxy.BrowserConnection, ReplayConnection)):
+ return itm
+ elif hasattr(itm, "connection"):
+ return itm.connection
+ elif hasattr(itm, "request"):
+ return itm.request.connection
+
+ def lookup(self, itm):
+ """
+ Checks for matching connection, using a Flow, Replay Connection,
+ BrowserConnection, Request, Response or Error object. Returns None
+ if not found.
+ """
+ connection = self.get_connection(itm)
+ return self.flow_map.get(connection)
+
+ def get_focus(self):
+ if not self.view:
+ return None, None
+ return self.view[self.focus], self.focus
+
+ def set_focus(self, idx):
+ if self.view:
+ for i in self.view:
+ i.focus = False
+ if idx >= len(self.view):
+ idx = len(self.view) - 1
+ elif idx < 0:
+ idx = 0
+ self.view[idx].focus = True
+ self.focus = idx
+
+ def get_from_pos(self, pos):
+ if len(self.view) <= pos or pos < 0:
+ return None, None
+ return self.view[pos], pos
+
+ def get_next(self, pos):
+ return self.get_from_pos(pos+1)
+
+ def get_prev(self, pos):
+ return self.get_from_pos(pos-1)
+
+ def delete_flow(self, f):
+ if not f.intercepting:
+ c = self.get_connection(f)
+ self.view[self.focus].focus = False
+ del self.flow_map[c]
+ self.flow_list.remove(f)
+ self.set_focus(self.focus)
+ return True
+ return False
+
+ def clear(self):
+ for i in self.flow_list[:]:
+ self.delete_flow(i)
+
+ def kill_flow(self, f):
+ f.kill()
+ self.delete_flow(f)
+
+ def revert(self, f):
+ """
+ Replaces the matching connection object with a ReplayConnection object.
+ """
+ conn = self.get_connection(f)
+ del self.flow_map[conn]
+ f.revert()
+ self.flow_map[f.connection] = f
+
+ def replay(self, f, masterq):
+ """
+ Replaces the matching connection object with a ReplayConnection object.
+
+ Returns None if successful, or error message if not.
+ """
+ #begin nocover
+ if f.intercepting:
+ return "Can't replay while intercepting..."
+ if f.request:
+ f.backup()
+ conn = self.get_connection(f)
+ del self.flow_map[conn]
+ rp = ReplayConnection()
+ f.connection = rp
+ f.request.connection = rp
+ if f.request.content:
+ f.request.headers["content-length"] = [str(len(f.request.content))]
+ f.response = None
+ f.error = None
+ self.flow_map[rp] = f
+ rt = ReplayThread(f, masterq)
+ rt.start()
+ #end nocover
+
+
+#begin nocover
+
+class ConsoleMaster(controller.Master):
+ palette = [
+ ('body', 'black', 'dark cyan', 'standout'),
+ ('foot', 'light gray', 'black'),
+ ('title', 'white', 'black',),
+ ('editline', 'white', 'black',),
+
+ # Help
+ ('key', 'light cyan', 'black', 'underline'),
+ ('head', 'white', 'black'),
+ ('text', 'light gray', 'black'),
+
+ # List and Connections
+ ('method', 'dark cyan', 'black'),
+ ('focus', 'yellow', 'black'),
+ ('goodcode', 'light green', 'black'),
+ ('error', 'light red', 'black'),
+ ('header', 'dark cyan', 'black'),
+ ('heading', 'white', 'dark blue'),
+ ('inactive', 'dark gray', 'black'),
+ ('ack', 'light red', 'black'),
+
+ # Hex view
+ ('offset', 'dark cyan', 'black'),
+ ]
+ footer_text_default = [
+ ('key', "?"), ":help ",
+ ('key', "q"), ":exit ",
+ ]
+ footer_text_connview = [
+ ('key', "tab"), ":toggle view ",
+ ('key', "?"), ":help ",
+ ('key', "q"), ":back ",
+ ]
+ def __init__(self, server, config):
+ controller.Master.__init__(self, server)
+ self.config = config
+ self.state = State()
+
+ self.stickycookie = None
+ self.stickyhosts = {}
+
+ def run(self):
+ self.ui = urwid.curses_display.Screen()
+ self.ui.register_palette(self.palette)
+ self.conn_list_view = ConnectionListView(self, self.state)
+
+ self.view = None
+ self.statusbar = None
+ self.header = None
+ self.body = None
+
+ self.prompting = False
+ self.onekey = False
+ self.view_connlist()
+
+ self.ui.run_wrapper(self.loop)
+ # If True, quit just pops out to connection list view.
+ self.nested = False
+
+ def make_view(self):
+ self.view = urwid.Frame(
+ self.body,
+ header = self.header,
+ footer = self.statusbar
+ )
+ self.view.set_focus("body")
+
+ def view_connlist(self):
+ self.body = urwid.ListBox(self.conn_list_view)
+ self.statusbar = StatusBar(self, self.footer_text_default)
+ self.header = None
+ self.nested = False
+ self.make_view()
+
+ def view_connection(self, flow):
+ self.statusbar = StatusBar(self, self.footer_text_connview)
+ self.body = ConnectionView(self, self.state, flow)
+ self.header = ConnectionViewHeader(flow)
+ self.nested = True
+ self.make_view()
+
+ def helptext(self):
+ text = []
+ text.extend([("head", "Global keys:\n")])
+ keys = [
+ ("a", "accept intercepted request or response"),
+ ("i", "set interception pattern"),
+ ("j, k", "up, down"),
+ ("l", "set limit filter pattern"),
+ ("q", "quit / return to connection list"),
+ ("r", "replay request"),
+ ("s", "set sticky cookie expression"),
+ ("R", "revert changes to request"),
+ ("page up/down", "page up/down"),
+ ("space", "page down"),
+ ("enter", "view connection"),
+ ]
+ text.extend(format_keyvals(keys, key="key", val="text", indent=4))
+
+ text.extend([("head", "\n\nConnection list keys:\n")])
+ keys = [
+ ("C", "clear connection list"),
+ ("d", "delete connection from view"),
+ ("z", "kill and delete connection, even if it's mid-intercept"),
+ ]
+ text.extend(format_keyvals(keys, key="key", val="text", indent=4))
+
+ text.extend([("head", "\n\nConnection view keys:\n")])
+ keys = [
+ ("b", "toggle hexdump view"),
+ ("e", "edit response/request"),
+ ("v", "view contents in external viewer"),
+ ("tab", "toggle response/request view"),
+ ]
+ text.extend(format_keyvals(keys, key="key", val="text", indent=4))
+
+ text.extend([("head", "\n\nFilter expressions:\n")])
+ f = []
+ for i in filt.filt_unary:
+ f.append(
+ ("~%s"%i.code, i.help)
+ )
+ for i in filt.filt_rex:
+ f.append(
+ ("~%s regex"%i.code, i.help)
+ )
+ for i in filt.filt_int:
+ f.append(
+ ("~%s int"%i.code, i.help)
+ )
+ f.sort()
+ f.extend(
+ [
+ ("!", "unary not"),
+ ("&", "and"),
+ ("|", "or"),
+ ("(...)", "grouping"),
+ ]
+ )
+ text.extend(format_keyvals(f, key="key", val="text", indent=4))
+
+ text.extend(
+ [
+ "\n",
+ ("text", " Regexes are Python-style.\n"),
+ ("text", " Regexes can be specified as quoted strings.\n"),
+ ("text", " Header matching (~h, ~hq, ~hs) is against a string of the form \"name: value\".\n"),
+ ("text", " Expressions with no operators are regex matches against URL.\n"),
+ ("text", " Default binary operator is &.\n"),
+ ("head", "\n Examples:\n"),
+ ]
+ )
+ examples = [
+ ("google\.com", "Url containing \"google.com"),
+ ("~r ~b test", "Requests where body contains \"test\""),
+ ("!(~r & ~t \"text/html\")", "Anything but requests with a text/html content type."),
+ ]
+ text.extend(format_keyvals(examples, key="key", val="text", indent=4))
+ return urwid.ListBox([urwid.Text(text)])
+
+ def view_help(self):
+ self.body = self.helptext()
+ self.header = None
+ self.nested = True
+ self.make_view()
+
+ def prompt(self, prompt, callback):
+ self.statusbar.prompt(prompt)
+ self.view.set_focus("footer")
+ self.prompting = callback
+
+ def prompt_onekey(self, prompt, keys, callback):
+ """
+ Keys are a set of (word, key) tuples. The appropriate key in the
+ word is highlighted.
+ """
+ prompt = [prompt, "("]
+ mkup = []
+ for i, e in enumerate(keys):
+ parts = e[0].split(e[1], 1)
+ if parts[0]:
+ mkup.append(("text", parts[0]))
+ mkup.append(("key", e[1]))
+ if parts[1]:
+ mkup.append(("text", parts[1]))
+ if i < len(keys)-1:
+ mkup.append(",")
+ prompt.extend(mkup)
+ prompt.append(")? ")
+ self.onekey = "".join([i[1] for i in keys])
+ self.prompt(prompt, callback)
+
+ def prompt_done(self):
+ self.prompting = False
+ self.onekey = False
+ self.view.set_focus("body")
+ self.statusbar.message("")
+
+ def prompt_execute(self, txt=None):
+ if not txt:
+ txt = self.statusbar.get_edit_text()
+ p = self.prompting
+ self.prompt_done()
+ msg = p(txt)
+ if msg:
+ self.statusbar.message(msg)
+
+ def prompt_cancel(self):
+ self.prompt_done()
+
+ def search(self, txt):
+ pass
+
+ def set_limit(self, txt):
+ if txt:
+ f = filt.parse(txt)
+ if not f:
+ return "Invalid filter expression."
+ self.state.set_limit(f)
+ else:
+ self.state.set_limit(None)
+ self.sync_list_view()
+
+ def set_intercept(self, txt):
+ if txt:
+ self.state.intercept = filt.parse(txt)
+ if not self.state.intercept:
+ return "Invalid filter expression."
+ else:
+ self.state.intercept = None
+ self.sync_list_view()
+
+ def set_stickycookie(self, txt):
+ if txt:
+ self.stickycookie = filt.parse(txt)
+ if not self.stickycookie:
+ return "Invalid filter expression."
+ else:
+ self.stickyhosts = {}
+ self.stickycookie = None
+
+ def drawscreen(self):
+ size = self.ui.get_cols_rows()
+ canvas = self.view.render(size, focus=1)
+ self.ui.draw_screen(size, canvas)
+ return size
+
+ def loop(self):
+ q = Queue.Queue()
+ self.masterq = q
+ slave = controller.Slave(q, self.server)
+ slave.start()
+ try:
+ while not self._shutdown:
+ size = self.drawscreen()
+ self.statusbar.redraw()
+ self.tick(q)
+ keys = self.ui.get_input()
+ for k in keys:
+ if self.prompting:
+ if k == "esc":
+ self.prompt_cancel()
+ k = None
+ elif self.onekey:
+ if k == "enter":
+ self.prompt_cancel()
+ elif k in self.onekey:
+ self.prompt_execute(k)
+ k = None
+ elif k == "enter":
+ self.prompt_execute()
+ k = None
+ else:
+ self.statusbar.message("")
+ if k == "?":
+ self.view_help()
+ elif k == "l":
+ self.prompt("Limit: ", self.set_limit)
+ k = None
+ elif k == "i":
+ self.prompt("Intercept: ", self.set_intercept)
+ k = None
+ elif k == "C":
+ self.clear_connections()
+ elif k == "j":
+ k = "down"
+ elif k == "k":
+ k = "up"
+ elif k == " ":
+ k = "page down"
+ elif k in ('q','Q'):
+ if self.nested:
+ self.view_connlist()
+ else:
+ raise Stop
+ elif k == "s":
+ self.prompt("Sticky cookie: ", self.set_stickycookie)
+ k = None
+ if k:
+ self.view.keypress(size, k)
+ except (Stop, KeyboardInterrupt):
+ pass
+ self.shutdown()
+
+ def shutdown(self):
+ for i in self.state.flow_list:
+ i.kill()
+ controller.Master.shutdown(self)
+
+ def sync_list_view(self):
+ self.conn_list_view._modified()
+
+ def clear_connections(self):
+ self.state.clear()
+ self.sync_list_view()
+
+ def delete_connection(self, f):
+ self.state.delete_flow(f)
+ self.sync_list_view()
+
+ def kill_connection(self, f):
+ self.state.kill_flow(f)
+
+ def refresh_connection(self, c):
+ if hasattr(self.header, "refresh_connection"):
+ self.header.refresh_connection(c)
+ if hasattr(self.body, "refresh_connection"):
+ self.body.refresh_connection(c)
+ if hasattr(self.statusbar, "refresh_connection"):
+ self.statusbar.refresh_connection(c)
+
+ # Handlers
+ def handle_browserconnection(self, r):
+ f = Flow(r)
+ self.state.add_browserconnect(f)
+ r.ack()
+ self.sync_list_view()
+
+ def handle_error(self, r):
+ f = self.state.add_error(r)
+ if not f:
+ r.ack()
+ else:
+ self.sync_list_view()
+ self.refresh_connection(f)
+
+ def handle_request(self, r):
+ f = self.state.add_request(r)
+ if not f:
+ r.ack()
+ else:
+ if f.match(self.stickycookie):
+ hid = (f.request.host, f.request.port)
+ if f.request.headers.has_key("cookie"):
+ self.stickyhosts[hid] = f.request.headers["cookie"]
+ elif hid in self.stickyhosts:
+ f.request.headers["cookie"] = self.stickyhosts[hid]
+
+ if f.match(self.state.intercept):
+ f.intercept()
+ else:
+ r.ack()
+ self.sync_list_view()
+ self.refresh_connection(f)
+
+ def handle_response(self, r):
+ f = self.state.add_response(r)
+ if not f:
+ r.ack()
+ else:
+ if f.match(self.stickycookie):
+ hid = (f.request.host, f.request.port)
+ if f.response.headers.has_key("set-cookie"):
+ self.stickyhosts[hid] = f.response.headers["set-cookie"]
+
+ if f.match(self.state.intercept):
+ f.intercept()
+ else:
+ r.ack()
+ self.sync_list_view()
+ self.refresh_connection(f)
diff --git a/libmproxy/controller.py b/libmproxy/controller.py
new file mode 100644
index 00000000..4955aae3
--- /dev/null
+++ b/libmproxy/controller.py
@@ -0,0 +1,119 @@
+
+# Copyright (C) 2010 Aldo Cortesi
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import Queue, threading
+
+#begin nocover
+
+class Msg:
+ def __init__(self):
+ self.q = Queue.Queue()
+ self.acked = False
+
+ def ack(self, data=None):
+ self.acked = True
+ self.q.put(data or self)
+
+ def send(self, masterq):
+ self.acked = False
+ masterq.put(self)
+ return self.q.get()
+
+
+class Slave(threading.Thread):
+ def __init__(self, masterq, server):
+ self.masterq, self.server = masterq, server
+ self.server.set_mqueue(masterq)
+ threading.Thread.__init__(self)
+
+ def run(self):
+ self.server.serve_forever()
+
+
+class Master:
+ def __init__(self, server):
+ self.server = server
+ self._shutdown = False
+ self.masterq = None
+
+ def tick(self, q):
+ try:
+ # Small timeout to prevent pegging the CPU
+ msg = q.get(timeout=0.01)
+ self.handle(msg)
+ except Queue.Empty:
+ pass
+
+ def run(self):
+ q = Queue.Queue()
+ self.masterq = q
+ slave = Slave(q, self.server)
+ slave.start()
+ while not self._shutdown:
+ self.tick(q)
+ self.shutdown()
+
+ def handle(self, msg):
+ c = "handle_" + msg.__class__.__name__.lower()
+ m = getattr(self, c, None)
+ if m:
+ m(msg)
+ else:
+ msg.ack()
+
+ def shutdown(self):
+ if not self._shutdown:
+ self._shutdown = True
+ self.server.shutdown()
+
+
+class DumpMaster(Master):
+ """
+ A simple master that just dumps to screen.
+ """
+ def __init__(self, server, verbosity):
+ self.verbosity = verbosity
+ Master.__init__(self, server)
+
+ def run(self):
+ try:
+ return Master.run(self)
+ except KeyboardInterrupt:
+ self.shutdown()
+
+ def handle_response(self, msg):
+ if 0 < self.verbosity < 3:
+ print >> sys.stderr, ">>",
+ print >> sys.stderr, msg.request.short()
+ if self.verbosity == 1:
+ print >> sys.stderr, "<<",
+ print >> sys.stderr, msg.short()
+ elif self.verbosity == 2:
+ print >> sys.stderr, "<<"
+ for i in msg.assemble().splitlines():
+ print >> sys.stderr, "\t", i
+ print >> sys.stderr, "<<"
+ elif self.verbosity == 3:
+ print >> sys.stderr, ">>"
+ for i in msg.request.assemble().splitlines():
+ print >> sys.stderr, "\t", i
+ print >> sys.stderr, ">>"
+ print >> sys.stderr, "<<"
+ for i in msg.assemble().splitlines():
+ print >> sys.stderr, "\t", i
+ print >> sys.stderr, "<<"
+ msg.ack()
diff --git a/libmproxy/filt.py b/libmproxy/filt.py
new file mode 100644
index 00000000..49fff6c7
--- /dev/null
+++ b/libmproxy/filt.py
@@ -0,0 +1,316 @@
+
+# Copyright (C) 2010 Aldo Cortesi
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""
+ The following operators are understood:
+
+ ~q Request
+ ~s Response
+
+ Headers:
+
+ Patterns are matched against "name: value" strings. Field names are
+ all-lowercase.
+
+ ~h rex Header line in either request or response
+ ~hq rex Header in request
+ ~hs rex Header in response
+
+ ~b rex Expression in the body of either request or response
+ ~bq rex Expression in the body of request
+ ~bq rex Expression in the body of response
+ ~t rex Shortcut for content-type header.
+
+ ~u rex URL
+ ~c CODE Response code.
+ rex Equivalent to ~u rex
+"""
+import re, sys
+import pyparsing as pp
+
+
+class _Token:
+ def dump(self, indent=0, fp=sys.stdout):
+ print >> fp, "\t"*indent, self.__class__.__name__,
+ if hasattr(self, "expr"):
+ print >> fp, "(%s)"%self.expr,
+ print >> fp
+
+
+class _Action(_Token):
+ @classmethod
+ def make(klass, s, loc, toks):
+ return klass(*toks[1:])
+
+
+class FReq(_Action):
+ code = "q"
+ help = "Match request"
+ def __call__(self, conn):
+ return not conn.is_response()
+
+
+class FResp(_Action):
+ code = "s"
+ help = "Match response"
+ def __call__(self, conn):
+ return conn.is_response()
+
+
+class _Rex(_Action):
+ def __init__(self, expr):
+ self.expr = expr
+ self.re = re.compile(self.expr)
+
+
+def _check_content_type(expr, o):
+ val = o.headers.get("content-type")
+ if val and re.search(expr, val[0]):
+ return True
+ return False
+
+
+class FContentType(_Rex):
+ code = "t"
+ help = "Content-type header"
+ def __call__(self, o):
+ if _check_content_type(self.expr, o):
+ return True
+ elif o.is_response() and _check_content_type(self.expr, o.request):
+ return True
+ else:
+ return False
+
+
+class FRequestContentType(_Rex):
+ code = "tq"
+ help = "Request Content-Type header"
+ def __call__(self, o):
+ if o.is_response():
+ return _check_content_type(self.expr, o.request)
+ else:
+ return _check_content_type(self.expr, o)
+
+
+class FResponseContentType(_Rex):
+ code = "ts"
+ help = "Request Content-Type header"
+ def __call__(self, o):
+ if o.is_response():
+ return _check_content_type(self.expr, o)
+ else:
+ return False
+
+
+class FHead(_Rex):
+ code = "h"
+ help = "Header"
+ def __call__(self, o):
+ val = o.headers.match_re(self.expr)
+ if not val and o.is_response():
+ val = o.request.headers.match_re(self.expr)
+ return val
+
+
+class FHeadRequest(_Rex):
+ code = "hq"
+ help = "Request header"
+ def __call__(self, o):
+ if o.is_response():
+ h = o.request.headers
+ else:
+ h = o.headers
+ return h.match_re(self.expr)
+
+
+class FHeadResponse(_Rex):
+ code = "hs"
+ help = "Response header"
+ def __call__(self, o):
+ if not o.is_response():
+ return False
+ return o.headers.match_re(self.expr)
+
+
+class FBod(_Rex):
+ code = "b"
+ help = "Body"
+ def __call__(self, o):
+ if o.content and re.search(self.expr, o.content):
+ return True
+ elif o.is_response() and o.request.content and re.search(self.expr, o.request.content):
+ return True
+ return False
+
+
+class FBodRequest(_Rex):
+ code = "bq"
+ help = "Request body"
+ def __call__(self, o):
+ if o.is_response() and o.request.content and re.search(self.expr, o.request.content):
+ return True
+ elif not o.is_response() and o.content and re.search(self.expr, o.content):
+ return True
+ return False
+
+
+class FBodResponse(_Rex):
+ code = "bs"
+ help = "Response body"
+ def __call__(self, o):
+ if not o.is_response():
+ return False
+ elif o.content and re.search(self.expr, o.content):
+ return True
+ return False
+
+
+class FUrl(_Rex):
+ code = "u"
+ help = "URL"
+ # FUrl is special, because it can be "naked".
+ @classmethod
+ def make(klass, s, loc, toks):
+ if len(toks) > 1:
+ toks = toks[1:]
+ return klass(*toks)
+
+ def __call__(self, o):
+ if o.is_response():
+ c = o.request
+ else:
+ c = o
+ return re.search(self.expr, c.url())
+
+
+class _Int(_Action):
+ def __init__(self, num):
+ self.num = int(num)
+
+
+class FCode(_Int):
+ code = "c"
+ help = "HTTP response code"
+ def __call__(self, o):
+ if o.is_response():
+ return o.code == self.num
+ return False
+
+
+class FAnd(_Token):
+ def __init__(self, lst):
+ self.lst = lst
+
+ def dump(self, indent=0, fp=sys.stdout):
+ print >> fp, "\t"*indent, self.__class__.__name__
+ for i in self.lst:
+ i.dump(indent+1, fp)
+
+ def __call__(self, o):
+ return all([i(o) for i in self.lst])
+
+
+class FOr(_Token):
+ def __init__(self, lst):
+ self.lst = lst
+
+ def dump(self, indent=0, fp=sys.stdout):
+ print >> fp, "\t"*indent, self.__class__.__name__
+ for i in self.lst:
+ i.dump(indent+1, fp)
+
+ def __call__(self, o):
+ return any([i(o) for i in self.lst])
+
+
+class FNot(_Token):
+ def __init__(self, itm):
+ self.itm = itm[0]
+
+ def dump(self, indent=0, fp=sys.stdout):
+ print >> fp, "\t"*indent, self.__class__.__name__
+ self.itm.dump(indent + 1, fp)
+
+ def __call__(self, o):
+ return not self.itm(o)
+
+filt_unary = [
+ FReq,
+ FResp
+]
+filt_rex = [
+ FHeadRequest,
+ FHeadResponse,
+ FHead,
+ FBodRequest,
+ FBodResponse,
+ FBod,
+ FUrl,
+ FRequestContentType,
+ FResponseContentType,
+ FContentType,
+]
+filt_int = [
+ FCode
+]
+def _make():
+ # Order is important - multi-char expressions need to come before narrow
+ # ones.
+ parts = []
+ for klass in filt_unary:
+ f = pp.Literal("~%s"%klass.code)
+ f.setParseAction(klass.make)
+ parts.append(f)
+
+ simplerex = "".join([c for c in pp.printables if c not in "()~'\""])
+ rex = pp.Word(simplerex) |\
+ pp.QuotedString("\"", escChar='\\') |\
+ pp.QuotedString("'", escChar='\\')
+ for klass in filt_rex:
+ f = pp.Literal("~%s"%klass.code) + rex.copy()
+ f.setParseAction(klass.make)
+ parts.append(f)
+
+ for klass in filt_int:
+ f = pp.Literal("~%s"%klass.code) + pp.Word(pp.nums)
+ f.setParseAction(klass.make)
+ parts.append(f)
+
+ # A naked rex is a URL rex:
+ f = rex.copy()
+ f.setParseAction(FUrl.make)
+ parts.append(f)
+
+ atom = pp.MatchFirst(parts)
+ expr = pp.operatorPrecedence(
+ atom,
+ [
+ (pp.Literal("!").suppress(), 1, pp.opAssoc.RIGHT, lambda x: FNot(*x)),
+ (pp.Literal("&").suppress(), 2, pp.opAssoc.LEFT, lambda x: FAnd(*x)),
+ (pp.Literal("|").suppress(), 2, pp.opAssoc.LEFT, lambda x: FOr(*x)),
+ ]
+ )
+ expr = pp.OneOrMore(expr)
+ return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
+bnf = _make()
+
+
+def parse(s):
+ try:
+ return bnf.parseString(s, parseAll=True)[0]
+ except pp.ParseException:
+ return None
+
diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py
new file mode 100644
index 00000000..c2a9b494
--- /dev/null
+++ b/libmproxy/proxy.py
@@ -0,0 +1,374 @@
+"""
+ A simple proxy server implementation, which always reads all of a server
+ response into memory, performs some transformation, and then writes it back
+ to the client.
+
+ Development started from Neil Schemenauer's munchy.py
+"""
+import sys, os, time, string, socket, urlparse, re, select, copy
+import SocketServer, ssl
+import utils, controller
+
+NAME = "mitmproxy"
+config = None
+
+
+class ProxyError(Exception):
+ def __init__(self, code, msg):
+ self.code, self.msg = code, msg
+
+ def __str__(self):
+ return "ProxyError(%s, %s)"%(self.code, self.msg)
+
+
+class Config:
+ def __init__(self, pemfile):
+ self.pemfile = pemfile
+
+
+def try_del(dict, key):
+ try:
+ del dict[key]
+ except KeyError:
+ pass
+
+
+def parse_url(url):
+ """
+ Returns a (scheme, host, port, path) tuple, or None on error.
+ """
+ scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
+ if not scheme:
+ return None
+ if ':' in netloc:
+ host, port = string.split(netloc, ':')
+ port = int(port)
+ else:
+ host = netloc
+ port = 80
+ path = urlparse.urlunparse(('', '', path, params, query, fragment))
+ if not path:
+ path = "/"
+ return scheme, host, port, path
+
+
+def parse_proxy_request(request):
+ """
+ Parse a proxy request line. Return (method, scheme, host, port, path).
+ Raise ProxyError on error.
+ """
+ try:
+ method, url, protocol = string.split(request)
+ except ValueError:
+ raise ProxyError(400, "Can't parse request")
+ if method in ['GET', 'HEAD', 'POST']:
+ if url.startswith("/"):
+ scheme, port, host, path = None, None, None, url
+ else:
+ parts = parse_url(url)
+ if not parts:
+ raise ProxyError(400, "Invalid url: %s"%url)
+ scheme, host, port, path = parts
+ elif method == 'CONNECT':
+ scheme = None
+ path = None
+ host, port = url.split(":")
+ port = int(port)
+ else:
+ raise ProxyError(501, "Unknown request method: %s" % method)
+ return method, scheme, host, port, path
+
+
+class Request(controller.Msg):
+ FMT = '%s %s HTTP/1.0\r\n%s\r\n%s'
+ def __init__(self, connection, host, port, scheme, method, path, headers, content):
+ self.connection = connection
+ self.host, self.port, self.scheme = host, port, scheme
+ self.method, self.path, self.headers, self.content = method, path, headers, content
+ self.kill = False
+ controller.Msg.__init__(self)
+
+ def copy(self):
+ c = copy.copy(self)
+ c.headers = self.headers.copy()
+ return c
+
+ def url(self):
+ if (self.port, self.scheme) in [(80, "http"), (443, "https")]:
+ host = self.host
+ else:
+ host = "%s:%s"%(self.host, self.port)
+ return "%s://%s%s"%(self.scheme, host, self.path)
+
+ def set_url(self, url):
+ parts = parse_url(url)
+ if not parts:
+ return False
+ self.scheme, self.host, self.port, self.path = parts
+ return True
+
+ def is_response(self):
+ return False
+
+ def short(self):
+ return "%s %s"%(self.method, self.url())
+
+ def assemble(self):
+ """
+ Assembles the request for transmission to the server. We make some
+ modifications to make sure interception works properly.
+ """
+ headers = self.headers.copy()
+ try_del(headers, 'accept-encoding')
+ try_del(headers, 'proxy-connection')
+ try_del(headers, 'keep-alive')
+ try_del(headers, 'connection')
+ headers["connection"] = ["close"]
+ data = (self.method, self.path, str(headers), self.content)
+ return self.FMT%data
+
+
+class Response(controller.Msg):
+ FMT = '%s\r\n%s\r\n%s'
+ def __init__(self, request, code, proto, msg, headers, content):
+ self.request = request
+ self.code, self.proto, self.msg = code, proto, msg
+ self.headers, self.content = headers, content
+ self.kill = False
+ controller.Msg.__init__(self)
+
+ def copy(self):
+ c = copy.copy(self)
+ c.headers = self.headers.copy()
+ return c
+
+ def is_response(self):
+ return True
+
+ def short(self):
+ return "%s %s"%(self.code, self.proto)
+
+ def assemble(self):
+ """
+ Assembles the response for transmission to the client. We make some
+ modifications to make sure interception works properly.
+ """
+ headers = self.headers.copy()
+ try_del(headers, 'accept-encoding')
+ try_del(headers, 'proxy-connection')
+ try_del(headers, 'connection')
+ try_del(headers, 'keep-alive')
+ headers["connection"] = ["close"]
+ proto = "%s %s %s"%(self.proto, self.code, self.msg)
+ data = (proto, str(headers), self.content)
+ return self.FMT%data
+
+
+class BrowserConnection(controller.Msg):
+ def __init__(self, address, port):
+ self.address, self.port = address, port
+ controller.Msg.__init__(self)
+
+ def copy(self):
+ return copy.copy(self)
+
+
+class Error(controller.Msg):
+ def __init__(self, connection, msg):
+ self.connection, self.msg = connection, msg
+ controller.Msg.__init__(self)
+
+ def copy(self):
+ return copy.copy(self)
+
+
+class FileLike:
+ def __init__(self, o):
+ self.o = o
+
+ def __getattr__(self, attr):
+ return getattr(self.o, attr)
+
+ def flush(self):
+ pass
+
+ def readline(self):
+ result = ''
+ while True:
+ ch = self.read(1)
+ if not ch:
+ break
+ else:
+ result += ch
+ if ch == '\n':
+ break
+ return result
+
+
+class ServerConnection:
+ def __init__(self, request):
+ self.request = request
+ self.server, self.rfile, self.wfile = None, None, None
+ self.connect()
+ self.send_request()
+
+ def connect(self):
+ try:
+ addr = socket.gethostbyname(self.request.host)
+ server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ if self.request.scheme == "https":
+ server = ssl.wrap_socket(server)
+ server.connect((addr, self.request.port))
+ except socket.error, err:
+ raise ProxyError(200, 'Error connecting to "%s": %s' % (self.request.host, err))
+ self.server = server
+ self.rfile, self.wfile = server.makefile('rb'), server.makefile('wb')
+
+ def send_request(self):
+ try:
+ self.wfile.write(self.request.assemble())
+ self.wfile.flush()
+ except socket.error, err:
+ raise ProxyError(500, 'Error sending data to "%s": %s' % (request.host, err))
+
+ def read_response(self):
+ proto = self.rfile.readline()
+ parts = proto.strip().split(" ", 2)
+ if not len(parts) == 3:
+ raise ProxyError(200, "Invalid server response.")
+ proto, code, msg = parts
+ code = int(code)
+ headers = utils.Headers()
+ headers.read(self.rfile)
+ if headers.has_key("content-length"):
+ content = self.rfile.read(int(headers["content-length"][0]))
+ else:
+ content = self.rfile.read()
+ return Response(self.request, code, proto, msg, headers, content)
+
+ def terminate(self):
+ try:
+ if not self.wfile.closed:
+ self.wfile.flush()
+ self.server.close()
+ except IOError:
+ pass
+
+
+class ProxyHandler(SocketServer.StreamRequestHandler):
+ def __init__(self, request, client_address, server, q):
+ self.mqueue = q
+ SocketServer.StreamRequestHandler.__init__(self, request, client_address, server)
+
+ def handle(self):
+ server = None
+ bc = BrowserConnection(*self.client_address)
+ bc.send(self.mqueue)
+ try:
+ request = self.read_request(bc)
+ request = request.send(self.mqueue)
+ if request.kill:
+ self.finish()
+ return
+ server = ServerConnection(request)
+ response = server.read_response()
+ response = response.send(self.mqueue)
+ if response.kill:
+ server.terminate()
+ self.finish()
+ return
+ self.send_response(response)
+ except IOError:
+ pass
+ except ProxyError, e:
+ err = Error(bc, e.msg)
+ err.send(self.mqueue)
+ self.send_error(e.code, e.msg)
+ if server:
+ server.terminate()
+ self.finish()
+
+ def read_request(self, connection):
+ request = self.rfile.readline()
+ method, scheme, host, port, path = parse_proxy_request(request)
+ if not host:
+ raise ProxyError(200, 'Invalid request: %s'%request)
+ if method == "CONNECT":
+ # Discard additional headers sent to the proxy. Should I expose
+ # these to users?
+ while 1:
+ d = self.rfile.readline()
+ if not d.strip():
+ break
+ self.wfile.write('HTTP/1.1 200 Connection established\r\n')
+ self.wfile.write('Proxy-agent: %s\r\n'%NAME)
+ self.wfile.write('\r\n')
+ self.wfile.flush()
+ self.connection = ssl.wrap_socket(
+ self.connection,
+ certfile = config.pemfile,
+ keyfile = config.pemfile,
+ server_side = True,
+ ssl_version = ssl.PROTOCOL_SSLv23,
+ do_handshake_on_connect = False
+ )
+ self.rfile = FileLike(self.connection)
+ self.wfile = FileLike(self.connection)
+ method, _, _, _, path = parse_proxy_request(self.rfile.readline())
+ scheme = "https"
+ headers = utils.Headers()
+ headers.read(self.rfile)
+ if method == 'POST' and not headers.has_key('content-length'):
+ raise ProxyError(400, "Missing Content-Length for POST method")
+ if headers.has_key("content-length"):
+ content = self.rfile.read(int(headers["content-length"][0]))
+ else:
+ content = ""
+ return Request(connection, host, port, scheme, method, path, headers, content)
+
+ def send_response(self, response):
+ self.wfile.write(response.assemble())
+ self.wfile.flush()
+
+ def terminate(self, connection, wfile, rfile):
+ try:
+ if not getattr(wfile, "closed", False):
+ wfile.flush()
+ connection.close()
+ except IOError:
+ pass
+
+ def finish(self):
+ self.terminate(self.connection, self.wfile, self.rfile)
+
+ def send_error(self, code, body):
+ import BaseHTTPServer
+ response = BaseHTTPServer.BaseHTTPRequestHandler.responses[code][0]
+ self.wfile.write("HTTP/1.0 %s %s\r\n" % (code, response))
+ self.wfile.write("Server: %s\r\n"%NAME)
+ self.wfile.write("Content-type: text/html\r\n")
+ self.wfile.write("\r\n")
+ self.wfile.write('<html><head>\n<title>%d %s</title>\n</head>\n'
+ '<body>\n%s\n</body>\n</html>' % (code, response, body))
+ self.wfile.flush()
+ self.wfile.close()
+ self.rfile.close()
+
+
+ServerBase = SocketServer.ThreadingTCPServer
+class ProxyServer(ServerBase):
+ allow_reuse_address = True
+ def __init__(self, port):
+ self.port = port
+ ServerBase.__init__(self, ('', port), ProxyHandler)
+ self.masterq = None
+
+ def set_mqueue(self, q):
+ self.masterq = q
+
+ def process_request(self, request, client_address):
+ return ServerBase.process_request(self, request, client_address)
+
+ def finish_request(self, request, client_address):
+ self.RequestHandlerClass(request, client_address, self, self.masterq)
+
diff --git a/libmproxy/pyparsing.py b/libmproxy/pyparsing.py
new file mode 100644
index 00000000..06b11d98
--- /dev/null
+++ b/libmproxy/pyparsing.py
@@ -0,0 +1,3707 @@
+# module pyparsing.py
+#
+# Copyright (c) 2003-2009 Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+#from __future__ import generators
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+
+The pyparsing module is an alternative approach to creating and executing simple grammars,
+vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+provides a library of classes that you use to construct the grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
+
+ from pyparsing import Word, alphas
+
+ # define grammar of a greeting
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
+
+ hello = "Hello, World!"
+ print hello, "->", greet.parseString( hello )
+
+The program outputs the following::
+
+ Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the self-explanatory
+class names, and the use of '+', '|' and '^' operators.
+
+The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
+object with named attributes.
+
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
+ - quoted strings
+ - embedded comments
+"""
+
+__version__ = "1.5.2"
+__versionTime__ = "17 February 2009 19:45"
+__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+
+__all__ = [
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
+'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+'indentedBlock', 'originalTextFor',
+]
+
+
+"""
+Detect if we are running version 3.X and make appropriate changes
+Robert A. Clark
+"""
+if sys.version_info[0] > 2:
+ _PY3K = True
+ _MAX_INT = sys.maxsize
+ basestring = str
+else:
+ _PY3K = False
+ _MAX_INT = sys.maxint
+
+if not _PY3K:
+ def _ustr(obj):
+ """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+ str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+ then < returns the unicode object | encodes it with the default encoding | ... >.
+ """
+ if isinstance(obj,unicode):
+ return obj
+
+ try:
+ # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+ # it won't break any existing code.
+ return str(obj)
+
+ except UnicodeEncodeError:
+ # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
+ # state that "The return value must be a string object". However, does a
+ # unicode object (being a subclass of basestring) count as a "string
+ # object"?
+ # If so, then return a unicode object:
+ return unicode(obj)
+ # Else encode it... but how? There are many choices... :)
+ # Replace unprintables with escape codes?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
+ # Replace unprintables with question marks?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
+ # ...
+else:
+ _ustr = str
+ unichr = chr
+
+if not _PY3K:
+ def _str2dict(strg):
+ return dict( [(c,0) for c in strg] )
+else:
+ _str2dict = set
+
+def _xml_escape(data):
+ """Escape &, <, >, ", ', etc. in a string of data."""
+
+ # ampersand must be replaced first
+ from_symbols = '&><"\''
+ to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
+ for from_,to_ in zip(from_symbols, to_symbols):
+ data = data.replace(from_, to_)
+ return data
+
+class _Constants(object):
+ pass
+
+if not _PY3K:
+ alphas = string.lowercase + string.uppercase
+else:
+ alphas = string.ascii_lowercase + string.ascii_uppercase
+nums = string.digits
+hexnums = nums + "ABCDEFabcdef"
+alphanums = alphas + nums
+_bslash = chr(92)
+printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
+
+class ParseBaseException(Exception):
+ """base exception class for all parsing runtime exceptions"""
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, pstr, loc=0, msg=None, elem=None ):
+ self.loc = loc
+ if msg is None:
+ self.msg = pstr
+ self.pstr = ""
+ else:
+ self.msg = msg
+ self.pstr = pstr
+ self.parserElement = elem
+
+ def __getattr__( self, aname ):
+ """supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ if( aname == "lineno" ):
+ return lineno( self.loc, self.pstr )
+ elif( aname in ("col", "column") ):
+ return col( self.loc, self.pstr )
+ elif( aname == "line" ):
+ return line( self.loc, self.pstr )
+ else:
+ raise AttributeError(aname)
+
+ def __str__( self ):
+ return "%s (at char %d), (line:%d, col:%d)" % \
+ ( self.msg, self.loc, self.lineno, self.column )
+ def __repr__( self ):
+ return _ustr(self)
+ def markInputline( self, markerString = ">!<" ):
+ """Extracts the exception line from the input string, and marks
+ the location of the exception with a special symbol.
+ """
+ line_str = self.line
+ line_column = self.column - 1
+ if markerString:
+ line_str = "".join( [line_str[:line_column],
+ markerString, line_str[line_column:]])
+ return line_str.strip()
+ def __dir__(self):
+ return "loc msg pstr parserElement lineno col line " \
+ "markInputLine __str__ __repr__".split()
+
+class ParseException(ParseBaseException):
+ """exception thrown when parse expressions don't match class;
+ supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ pass
+
+class ParseFatalException(ParseBaseException):
+ """user-throwable exception thrown when inconsistent parse content
+ is found; stops all parsing immediately"""
+ pass
+
+class ParseSyntaxException(ParseFatalException):
+ """just like ParseFatalException, but thrown internally when an
+ ErrorStop indicates that parsing is to stop immediately because
+ an unbacktrackable syntax error has been found"""
+ def __init__(self, pe):
+ super(ParseSyntaxException, self).__init__(
+ pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+#~ class ReparseException(ParseBaseException):
+ #~ """Experimental class - parse actions can raise this exception to cause
+ #~ pyparsing to reparse the input string:
+ #~ - with a modified input string, and/or
+ #~ - with a modified start location
+ #~ Set the values of the ReparseException in the constructor, and raise the
+ #~ exception in a parse action to cause pyparsing to use the new string/location.
+ #~ Setting the values as None causes no change to be made.
+ #~ """
+ #~ def __init_( self, newstring, restartLoc ):
+ #~ self.newParseText = newstring
+ #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+ """exception thrown by validate() if the grammar could be improperly recursive"""
+ def __init__( self, parseElementList ):
+ self.parseElementTrace = parseElementList
+
+ def __str__( self ):
+ return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+ def __init__(self,p1,p2):
+ self.tup = (p1,p2)
+ def __getitem__(self,i):
+ return self.tup[i]
+ def __repr__(self):
+ return repr(self.tup)
+ def setOffset(self,i):
+ self.tup = (self.tup[0],i)
+
+class ParseResults(object):
+ """Structured parse results, to provide multiple means of access to the parsed data:
+ - as a list (len(results))
+ - by list index (results[0], results[1], etc.)
+ - by attribute (results.<resultsName>)
+ """
+ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
+ def __new__(cls, toklist, name=None, asList=True, modal=True ):
+ if isinstance(toklist, cls):
+ return toklist
+ retobj = object.__new__(cls)
+ retobj.__doinit = True
+ return retobj
+
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, toklist, name=None, asList=True, modal=True ):
+ if self.__doinit:
+ self.__doinit = False
+ self.__name = None
+ self.__parent = None
+ self.__accumNames = {}
+ if isinstance(toklist, list):
+ self.__toklist = toklist[:]
+ else:
+ self.__toklist = [toklist]
+ self.__tokdict = dict()
+
+ if name:
+ if not modal:
+ self.__accumNames[name] = 0
+ if isinstance(name,int):
+ name = _ustr(name) # will always return a str, but use _ustr for consistency
+ self.__name = name
+ if not toklist in (None,'',[]):
+ if isinstance(toklist,basestring):
+ toklist = [ toklist ]
+ if asList:
+ if isinstance(toklist,ParseResults):
+ self[name] = _ParseResultsWithOffset(toklist.copy(),0)
+ else:
+ self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
+ self[name].__name = name
+ else:
+ try:
+ self[name] = toklist[0]
+ except (KeyError,TypeError,IndexError):
+ self[name] = toklist
+
+ def __getitem__( self, i ):
+ if isinstance( i, (int,slice) ):
+ return self.__toklist[i]
+ else:
+ if i not in self.__accumNames:
+ return self.__tokdict[i][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[i] ])
+
+ def __setitem__( self, k, v ):
+ if isinstance(v,_ParseResultsWithOffset):
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+ sub = v[0]
+ elif isinstance(k,int):
+ self.__toklist[k] = v
+ sub = v
+ else:
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
+ sub = v
+ if isinstance(sub,ParseResults):
+ sub.__parent = wkref(self)
+
+ def __delitem__( self, i ):
+ if isinstance(i,(int,slice)):
+ mylen = len( self.__toklist )
+ del self.__toklist[i]
+
+ # convert int to slice
+ if isinstance(i, int):
+ if i < 0:
+ i += mylen
+ i = slice(i, i+1)
+ # get removed indices
+ removed = list(range(*i.indices(mylen)))
+ removed.reverse()
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for j in removed:
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+ else:
+ del self.__tokdict[i]
+
+ def __contains__( self, k ):
+ return k in self.__tokdict
+
+ def __len__( self ): return len( self.__toklist )
+ def __bool__(self): return len( self.__toklist ) > 0
+ __nonzero__ = __bool__
+ def __iter__( self ): return iter( self.__toklist )
+ def __reversed__( self ): return iter( reversed(self.__toklist) )
+ def keys( self ):
+ """Returns all named result keys."""
+ return self.__tokdict.keys()
+
+ def pop( self, index=-1 ):
+ """Removes and returns item at specified index (default=last).
+ Will work with either numeric indices or dict-key indicies."""
+ ret = self[index]
+ del self[index]
+ return ret
+
+ def get(self, key, defaultValue=None):
+ """Returns named result matching the given key, or if there is no
+ such name, then returns the given defaultValue or None if no
+ defaultValue is specified."""
+ if key in self:
+ return self[key]
+ else:
+ return defaultValue
+
+ def insert( self, index, insStr ):
+ self.__toklist.insert(index, insStr)
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+ def items( self ):
+ """Returns all named result keys and values as a list of tuples."""
+ return [(k,self[k]) for k in self.__tokdict]
+
+ def values( self ):
+ """Returns all named result values."""
+ return [ v[-1][0] for v in self.__tokdict.values() ]
+
+ def __getattr__( self, name ):
+ if name not in self.__slots__:
+ if name in self.__tokdict:
+ if name not in self.__accumNames:
+ return self.__tokdict[name][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[name] ])
+ else:
+ return ""
+ return None
+
+ def __add__( self, other ):
+ ret = self.copy()
+ ret += other
+ return ret
+
+ def __iadd__( self, other ):
+ if other.__tokdict:
+ offset = len(self.__toklist)
+ addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
+ otheritems = other.__tokdict.items()
+ otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
+ for (k,vlist) in otheritems for v in vlist]
+ for k,v in otherdictitems:
+ self[k] = v
+ if isinstance(v[0],ParseResults):
+ v[0].__parent = wkref(self)
+
+ self.__toklist += other.__toklist
+ self.__accumNames.update( other.__accumNames )
+ del other
+ return self
+
+ def __repr__( self ):
+ return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+
+ def __str__( self ):
+ out = "["
+ sep = ""
+ for i in self.__toklist:
+ if isinstance(i, ParseResults):
+ out += sep + _ustr(i)
+ else:
+ out += sep + repr(i)
+ sep = ", "
+ out += "]"
+ return out
+
+ def _asStringList( self, sep='' ):
+ out = []
+ for item in self.__toklist:
+ if out and sep:
+ out.append(sep)
+ if isinstance( item, ParseResults ):
+ out += item._asStringList()
+ else:
+ out.append( _ustr(item) )
+ return out
+
+ def asList( self ):
+ """Returns the parse results as a nested list of matching tokens, all converted to strings."""
+ out = []
+ for res in self.__toklist:
+ if isinstance(res,ParseResults):
+ out.append( res.asList() )
+ else:
+ out.append( res )
+ return out
+
+ def asDict( self ):
+ """Returns the named parse results as dictionary."""
+ return dict( self.items() )
+
+ def copy( self ):
+ """Returns a new copy of a ParseResults object."""
+ ret = ParseResults( self.__toklist )
+ ret.__tokdict = self.__tokdict.copy()
+ ret.__parent = self.__parent
+ ret.__accumNames.update( self.__accumNames )
+ ret.__name = self.__name
+ return ret
+
+ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+ """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
+ nl = "\n"
+ out = []
+ namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
+ for v in vlist ] )
+ nextLevelIndent = indent + " "
+
+ # collapse out indents if formatting is not desired
+ if not formatted:
+ indent = ""
+ nextLevelIndent = ""
+ nl = ""
+
+ selfTag = None
+ if doctag is not None:
+ selfTag = doctag
+ else:
+ if self.__name:
+ selfTag = self.__name
+
+ if not selfTag:
+ if namedItemsOnly:
+ return ""
+ else:
+ selfTag = "ITEM"
+
+ out += [ nl, indent, "<", selfTag, ">" ]
+
+ worklist = self.__toklist
+ for i,res in enumerate(worklist):
+ if isinstance(res,ParseResults):
+ if i in namedItems:
+ out += [ res.asXML(namedItems[i],
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ out += [ res.asXML(None,
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ # individual token, see if there is a name for it
+ resTag = None
+ if i in namedItems:
+ resTag = namedItems[i]
+ if not resTag:
+ if namedItemsOnly:
+ continue
+ else:
+ resTag = "ITEM"
+ xmlBodyText = _xml_escape(_ustr(res))
+ out += [ nl, nextLevelIndent, "<", resTag, ">",
+ xmlBodyText,
+ "</", resTag, ">" ]
+
+ out += [ nl, indent, "</", selfTag, ">" ]
+ return "".join(out)
+
+ def __lookup(self,sub):
+ for k,vlist in self.__tokdict.items():
+ for v,loc in vlist:
+ if sub is v:
+ return k
+ return None
+
+ def getName(self):
+ """Returns the results name for this token expression."""
+ if self.__name:
+ return self.__name
+ elif self.__parent:
+ par = self.__parent()
+ if par:
+ return par.__lookup(self)
+ else:
+ return None
+ elif (len(self) == 1 and
+ len(self.__tokdict) == 1 and
+ self.__tokdict.values()[0][0][1] in (0,-1)):
+ return self.__tokdict.keys()[0]
+ else:
+ return None
+
+ def dump(self,indent='',depth=0):
+ """Diagnostic method for listing out the contents of a ParseResults.
+ Accepts an optional indent argument so that this string can be embedded
+ in a nested display of other data."""
+ out = []
+ out.append( indent+_ustr(self.asList()) )
+ keys = self.items()
+ keys.sort()
+ for k,v in keys:
+ if out:
+ out.append('\n')
+ out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
+ if isinstance(v,ParseResults):
+ if v.keys():
+ #~ out.append('\n')
+ out.append( v.dump(indent,depth+1) )
+ #~ out.append('\n')
+ else:
+ out.append(_ustr(v))
+ else:
+ out.append(_ustr(v))
+ #~ out.append('\n')
+ return "".join(out)
+
+ # add support for pickle protocol
+ def __getstate__(self):
+ return ( self.__toklist,
+ ( self.__tokdict.copy(),
+ self.__parent is not None and self.__parent() or None,
+ self.__accumNames,
+ self.__name ) )
+
+ def __setstate__(self,state):
+ self.__toklist = state[0]
+ self.__tokdict, \
+ par, \
+ inAccumNames, \
+ self.__name = state[1]
+ self.__accumNames = {}
+ self.__accumNames.update(inAccumNames)
+ if par is not None:
+ self.__parent = wkref(par)
+ else:
+ self.__parent = None
+
+ def __dir__(self):
+ return dir(super(ParseResults,self)) + self.keys()
+
+def col (loc,strg):
+ """Returns current column within a string, counting newlines as line separators.
+ The first column is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing <TAB>s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
+
+def lineno(loc,strg):
+ """Returns current line number within a string, counting newlines as line separators.
+ The first line is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing <TAB>s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return strg.count("\n",0,loc) + 1
+
+def line( loc, strg ):
+ """Returns the line of text containing loc within a string, counting newlines as line separators.
+ """
+ lastCR = strg.rfind("\n", 0, loc)
+ nextCR = strg.find("\n", loc)
+ if nextCR > 0:
+ return strg[lastCR+1:nextCR]
+ else:
+ return strg[lastCR+1:]
+
+def _defaultStartDebugAction( instring, loc, expr ):
+ print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+ print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+ print ("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+ """'Do-nothing' debug action, to suppress debugging output during parsing."""
+ pass
+
+class ParserElement(object):
+ """Abstract base level parser element class."""
+ DEFAULT_WHITE_CHARS = " \n\t\r"
+
+ def setDefaultWhitespaceChars( chars ):
+ """Overrides the default whitespace chars
+ """
+ ParserElement.DEFAULT_WHITE_CHARS = chars
+ setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
+
+ def __init__( self, savelist=False ):
+ self.parseAction = list()
+ self.failAction = None
+ #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
+ self.strRepr = None
+ self.resultsName = None
+ self.saveAsList = savelist
+ self.skipWhitespace = True
+ self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ self.copyDefaultWhiteChars = True
+ self.mayReturnEmpty = False # used when checking for left-recursion
+ self.keepTabs = False
+ self.ignoreExprs = list()
+ self.debug = False
+ self.streamlined = False
+ self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+ self.errmsg = ""
+ self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+ self.debugActions = ( None, None, None ) #custom debug actions
+ self.re = None
+ self.callPreparse = True # used to avoid redundant calls to preParse
+ self.callDuringTry = False
+
+ def copy( self ):
+ """Make a copy of this ParserElement. Useful for defining different parse actions
+ for the same parsing pattern, using copies of the original parse element."""
+ cpy = copy.copy( self )
+ cpy.parseAction = self.parseAction[:]
+ cpy.ignoreExprs = self.ignoreExprs[:]
+ if self.copyDefaultWhiteChars:
+ cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ return cpy
+
+ def setName( self, name ):
+ """Define name for this expression, for use in debugging."""
+ self.name = name
+ self.errmsg = "Expected " + self.name
+ if hasattr(self,"exception"):
+ self.exception.msg = self.errmsg
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ """Define name for referencing matching tokens as a nested attribute
+ of the returned parse results.
+ NOTE: this returns a *copy* of the original ParserElement object;
+ this is so that the client can define a basic element, such as an
+ integer, and reference it in multiple places with different names.
+ """
+ newself = self.copy()
+ newself.resultsName = name
+ newself.modalResults = not listAllMatches
+ return newself
+
+ def setBreak(self,breakFlag = True):
+ """Method to invoke the Python pdb debugger when this element is
+ about to be parsed. Set breakFlag to True to enable, False to
+ disable.
+ """
+ if breakFlag:
+ _parseMethod = self._parse
+ def breaker(instring, loc, doActions=True, callPreParse=True):
+ import pdb
+ pdb.set_trace()
+ return _parseMethod( instring, loc, doActions, callPreParse )
+ breaker._originalParseMethod = _parseMethod
+ self._parse = breaker
+ else:
+ if hasattr(self._parse,"_originalParseMethod"):
+ self._parse = self._parse._originalParseMethod
+ return self
+
+ def _normalizeParseActionArgs( f ):
+ """Internal method used to decorate parse actions that take fewer than 3 arguments,
+ so that all parse actions can be called as f(s,l,t)."""
+ STAR_ARGS = 4
+
+ try:
+ restore = None
+ if isinstance(f,type):
+ restore = f
+ f = f.__init__
+ if not _PY3K:
+ codeObj = f.func_code
+ else:
+ codeObj = f.code
+ if codeObj.co_flags & STAR_ARGS:
+ return f
+ numargs = codeObj.co_argcount
+ if not _PY3K:
+ if hasattr(f,"im_self"):
+ numargs -= 1
+ else:
+ if hasattr(f,"__self__"):
+ numargs -= 1
+ if restore:
+ f = restore
+ except AttributeError:
+ try:
+ if not _PY3K:
+ call_im_func_code = f.__call__.im_func.func_code
+ else:
+ call_im_func_code = f.__code__
+
+ # not a function, must be a callable object, get info from the
+ # im_func binding of its bound __call__ method
+ if call_im_func_code.co_flags & STAR_ARGS:
+ return f
+ numargs = call_im_func_code.co_argcount
+ if not _PY3K:
+ if hasattr(f.__call__,"im_self"):
+ numargs -= 1
+ else:
+ if hasattr(f.__call__,"__self__"):
+ numargs -= 0
+ except AttributeError:
+ if not _PY3K:
+ call_func_code = f.__call__.func_code
+ else:
+ call_func_code = f.__call__.__code__
+ # not a bound method, get info directly from __call__ method
+ if call_func_code.co_flags & STAR_ARGS:
+ return f
+ numargs = call_func_code.co_argcount
+ if not _PY3K:
+ if hasattr(f.__call__,"im_self"):
+ numargs -= 1
+ else:
+ if hasattr(f.__call__,"__self__"):
+ numargs -= 1
+
+
+ #~ print ("adding function %s with %d args" % (f.func_name,numargs))
+ if numargs == 3:
+ return f
+ else:
+ if numargs > 3:
+ def tmp(s,l,t):
+ return f(f.__call__.__self__, s,l,t)
+ if numargs == 2:
+ def tmp(s,l,t):
+ return f(l,t)
+ elif numargs == 1:
+ def tmp(s,l,t):
+ return f(t)
+ else: #~ numargs == 0:
+ def tmp(s,l,t):
+ return f()
+ try:
+ tmp.__name__ = f.__name__
+ except (AttributeError,TypeError):
+ # no need for special handling if attribute doesnt exist
+ pass
+ try:
+ tmp.__doc__ = f.__doc__
+ except (AttributeError,TypeError):
+ # no need for special handling if attribute doesnt exist
+ pass
+ try:
+ tmp.__dict__.update(f.__dict__)
+ except (AttributeError,TypeError):
+ # no need for special handling if attribute doesnt exist
+ pass
+ return tmp
+ _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
+
+ def setParseAction( self, *fns, **kwargs ):
+ """Define action to perform when successfully matching parse element definition.
+ Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
+ fn(loc,toks), fn(toks), or just fn(), where:
+ - s = the original string being parsed (see note below)
+ - loc = the location of the matching substring
+ - toks = a list of the matched tokens, packaged as a ParseResults object
+ If the functions in fns modify the tokens, they can return them as the return
+ value from fn, and the modified list of tokens will replace the original.
+ Otherwise, fn does not need to return any value.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{parseString}<parseString>} for more information
+ on parsing strings containing <TAB>s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
+ self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def addParseAction( self, *fns, **kwargs ):
+ """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
+ self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
+ self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def setFailAction( self, fn ):
+ """Define action to perform if parsing fails at this expression.
+ Fail acton fn is a callable function that takes the arguments
+ fn(s,loc,expr,err) where:
+ - s = string being parsed
+ - loc = location where expression match was attempted and failed
+ - expr = the parse expression that failed
+ - err = the exception thrown
+ The function returns no value. It may throw ParseFatalException
+ if it is desired to stop parsing immediately."""
+ self.failAction = fn
+ return self
+
+ def _skipIgnorables( self, instring, loc ):
+ exprsFound = True
+ while exprsFound:
+ exprsFound = False
+ for e in self.ignoreExprs:
+ try:
+ while 1:
+ loc,dummy = e._parse( instring, loc )
+ exprsFound = True
+ except ParseException:
+ pass
+ return loc
+
+ def preParse( self, instring, loc ):
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+
+ if self.skipWhitespace:
+ wt = self.whiteChars
+ instrlen = len(instring)
+ while loc < instrlen and instring[loc] in wt:
+ loc += 1
+
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ return loc, []
+
+ def postParse( self, instring, loc, tokenlist ):
+ return tokenlist
+
+ #~ @profile
+ def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+ debugging = ( self.debug ) #and doActions )
+
+ if debugging or self.failAction:
+ #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+ if (self.debugActions[0] ):
+ self.debugActions[0]( instring, loc, self )
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = loc
+ try:
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ except ParseBaseException, err:
+ #~ print ("Exception raised:", err)
+ if self.debugActions[2]:
+ self.debugActions[2]( instring, tokensStart, self, err )
+ if self.failAction:
+ self.failAction( instring, tokensStart, self, err )
+ raise
+ else:
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = loc
+ if self.mayIndexError or loc >= len(instring):
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ else:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+
+ tokens = self.postParse( instring, loc, tokens )
+
+ retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+ if self.parseAction and (doActions or self.callDuringTry):
+ if debugging:
+ try:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+ except ParseBaseException, err:
+ #~ print "Exception raised in user parse action:", err
+ if (self.debugActions[2] ):
+ self.debugActions[2]( instring, tokensStart, self, err )
+ raise
+ else:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+
+ if debugging:
+ #~ print ("Matched",self,"->",retTokens.asList())
+ if (self.debugActions[1] ):
+ self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+
+ return loc, retTokens
+
+ def tryParse( self, instring, loc ):
+ try:
+ return self._parse( instring, loc, doActions=False )[0]
+ except ParseFatalException:
+ raise ParseException( instring, loc, self.errmsg, self)
+
+ # this method gets repeatedly called during backtracking with the same arguments -
+ # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+ def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+ lookup = (self,instring,loc,callPreParse,doActions)
+ if lookup in ParserElement._exprArgCache:
+ value = ParserElement._exprArgCache[ lookup ]
+ if isinstance(value,Exception):
+ raise value
+ return value
+ else:
+ try:
+ value = self._parseNoCache( instring, loc, doActions, callPreParse )
+ ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
+ return value
+ except ParseBaseException, pe:
+ ParserElement._exprArgCache[ lookup ] = pe
+ raise
+
+ _parse = _parseNoCache
+
+ # argument cache for optimizing repeated calls when backtracking through recursive expressions
+ _exprArgCache = {}
+ def resetCache():
+ ParserElement._exprArgCache.clear()
+ resetCache = staticmethod(resetCache)
+
+ _packratEnabled = False
+ def enablePackrat():
+ """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+ Repeated parse attempts at the same string location (which happens
+ often in many complex grammars) can immediately return a cached value,
+ instead of re-executing parsing/validating code. Memoizing is done of
+ both valid results and parsing exceptions.
+
+ This speedup may break existing programs that use parse actions that
+ have side-effects. For this reason, packrat parsing is disabled when
+ you first import pyparsing. To activate the packrat feature, your
+ program must call the class method ParserElement.enablePackrat(). If
+ your program uses psyco to "compile as you go", you must call
+ enablePackrat before calling psyco.full(). If you do not do this,
+ Python will crash. For best results, call enablePackrat() immediately
+ after importing pyparsing.
+ """
+ if not ParserElement._packratEnabled:
+ ParserElement._packratEnabled = True
+ ParserElement._parse = ParserElement._parseCache
+ enablePackrat = staticmethod(enablePackrat)
+
+ def parseString( self, instring, parseAll=False ):
+ """Execute the parse expression with the given string.
+ This is the main interface to the client code, once the complete
+ expression has been built.
+
+ If you want the grammar to require that the entire input string be
+ successfully parsed, then set parseAll to True (equivalent to ending
+ the grammar with StringEnd()).
+
+ Note: parseString implicitly calls expandtabs() on the input string,
+ in order to report proper column numbers in parse actions.
+ If the input string contains tabs and
+ the grammar uses parse actions that use the loc argument to index into the
+ string being parsed, you can ensure you have a consistent view of the input
+ string by:
+ - calling parseWithTabs on your grammar before calling parseString
+ (see L{I{parseWithTabs}<parseWithTabs>})
+ - define your parse action using the full (s,loc,toks) signature, and
+ reference the input string using the parse action's s argument
+ - explictly expand the tabs in your input string before calling
+ parseString
+ """
+ ParserElement.resetCache()
+ if not self.streamlined:
+ self.streamline()
+ #~ self.saveAsList = True
+ for e in self.ignoreExprs:
+ e.streamline()
+ if not self.keepTabs:
+ instring = instring.expandtabs()
+ try:
+ loc, tokens = self._parse( instring, 0 )
+ if parseAll:
+ loc = self.preParse( instring, loc )
+ StringEnd()._parse( instring, loc )
+ except ParseBaseException, exc:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ raise exc
+ else:
+ return tokens
+
+ def scanString( self, instring, maxMatches=_MAX_INT ):
+ """Scan the input string for expression matches. Each match will return the
+ matching tokens, start location, and end location. May be called with optional
+ maxMatches argument, to clip scanning after 'n' matches are found.
+
+ Note that the start and end locations are reported relative to the string
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
+ strings with embedded tabs."""
+ if not self.streamlined:
+ self.streamline()
+ for e in self.ignoreExprs:
+ e.streamline()
+
+ if not self.keepTabs:
+ instring = _ustr(instring).expandtabs()
+ instrlen = len(instring)
+ loc = 0
+ preparseFn = self.preParse
+ parseFn = self._parse
+ ParserElement.resetCache()
+ matches = 0
+ try:
+ while loc <= instrlen and matches < maxMatches:
+ try:
+ preloc = preparseFn( instring, loc )
+ nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+ except ParseException:
+ loc = preloc+1
+ else:
+ matches += 1
+ yield tokens, preloc, nextLoc
+ loc = nextLoc
+ except ParseBaseException, pe:
+ raise pe
+
+ def transformString( self, instring ):
+ """Extension to scanString, to modify matching text with modified tokens that may
+ be returned from a parse action. To use transformString, define a grammar and
+ attach a parse action to it that modifies the returned token list.
+ Invoking transformString() on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
+ action. transformString() returns the resulting transformed string."""
+ out = []
+ lastE = 0
+ # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
+ # keep string locs straight between transformString and scanString
+ self.keepTabs = True
+ try:
+ for t,s,e in self.scanString( instring ):
+ out.append( instring[lastE:s] )
+ if t:
+ if isinstance(t,ParseResults):
+ out += t.asList()
+ elif isinstance(t,list):
+ out += t
+ else:
+ out.append(t)
+ lastE = e
+ out.append(instring[lastE:])
+ return "".join(map(_ustr,out))
+ except ParseBaseException, pe:
+ raise pe
+
+ def searchString( self, instring, maxMatches=_MAX_INT ):
+ """Another extension to scanString, simplifying the access to the tokens found
+ to match the given parse expression. May be called with optional
+ maxMatches argument, to clip searching after 'n' matches are found.
+ """
+ try:
+ return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+ except ParseBaseException, pe:
+ raise pe
+
+ def __add__(self, other ):
+ """Implementation of + operator - returns And"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, other ] )
+
+ def __radd__(self, other ):
+ """Implementation of + operator when left operand is not a ParserElement"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other + self
+
+ def __sub__(self, other):
+ """Implementation of - operator, returns And with error stop"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, And._ErrorStop(), other ] )
+
+ def __rsub__(self, other ):
+ """Implementation of - operator when left operand is not a ParserElement"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other - self
+
+ def __mul__(self,other):
+ if isinstance(other,int):
+ minElements, optElements = other,0
+ elif isinstance(other,tuple):
+ other = (other + (None, None))[:2]
+ if other[0] is None:
+ other = (0, other[1])
+ if isinstance(other[0],int) and other[1] is None:
+ if other[0] == 0:
+ return ZeroOrMore(self)
+ if other[0] == 1:
+ return OneOrMore(self)
+ else:
+ return self*other[0] + ZeroOrMore(self)
+ elif isinstance(other[0],int) and isinstance(other[1],int):
+ minElements, optElements = other
+ optElements -= minElements
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+ if minElements < 0:
+ raise ValueError("cannot multiply ParserElement by negative value")
+ if optElements < 0:
+ raise ValueError("second tuple value must be greater or equal to first tuple value")
+ if minElements == optElements == 0:
+ raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
+
+ if (optElements):
+ def makeOptionalList(n):
+ if n>1:
+ return Optional(self + makeOptionalList(n-1))
+ else:
+ return Optional(self)
+ if minElements:
+ if minElements == 1:
+ ret = self + makeOptionalList(optElements)
+ else:
+ ret = And([self]*minElements) + makeOptionalList(optElements)
+ else:
+ ret = makeOptionalList(optElements)
+ else:
+ if minElements == 1:
+ ret = self
+ else:
+ ret = And([self]*minElements)
+ return ret
+
+ def __rmul__(self, other):
+ return self.__mul__(other)
+
+ def __or__(self, other ):
+ """Implementation of | operator - returns MatchFirst"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return MatchFirst( [ self, other ] )
+
+ def __ror__(self, other ):
+ """Implementation of | operator when left operand is not a ParserElement"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other | self
+
+ def __xor__(self, other ):
+ """Implementation of ^ operator - returns Or"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Or( [ self, other ] )
+
+ def __rxor__(self, other ):
+ """Implementation of ^ operator when left operand is not a ParserElement"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other ^ self
+
+ def __and__(self, other ):
+ """Implementation of & operator - returns Each"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Each( [ self, other ] )
+
+ def __rand__(self, other ):
+ """Implementation of & operator when left operand is not a ParserElement"""
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other & self
+
+ def __invert__( self ):
+ """Implementation of ~ operator - returns NotAny"""
+ return NotAny( self )
+
+ def __call__(self, name):
+ """Shortcut for setResultsName, with listAllMatches=default::
+ userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+ could be written as::
+ userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
+ """
+ return self.setResultsName(name)
+
+ def suppress( self ):
+ """Suppresses the output of this ParserElement; useful to keep punctuation from
+ cluttering up returned output.
+ """
+ return Suppress( self )
+
+ def leaveWhitespace( self ):
+ """Disables the skipping of whitespace before matching the characters in the
+ ParserElement's defined pattern. This is normally only used internally by
+ the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+ """
+ self.skipWhitespace = False
+ return self
+
+ def setWhitespaceChars( self, chars ):
+ """Overrides the default whitespace chars
+ """
+ self.skipWhitespace = True
+ self.whiteChars = chars
+ self.copyDefaultWhiteChars = False
+ return self
+
+ def parseWithTabs( self ):
+ """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
+ Must be called before parseString when the input grammar contains elements that
+ match <TAB> characters."""
+ self.keepTabs = True
+ return self
+
+ def ignore( self, other ):
+ """Define expression to be ignored (e.g., comments) while doing pattern
+ matching; may be called repeatedly, to define multiple comment or other
+ ignorable patterns.
+ """
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ self.ignoreExprs.append( other )
+ else:
+ self.ignoreExprs.append( Suppress( other ) )
+ return self
+
+ def setDebugActions( self, startAction, successAction, exceptionAction ):
+ """Enable display of debugging messages while doing pattern matching."""
+ self.debugActions = (startAction or _defaultStartDebugAction,
+ successAction or _defaultSuccessDebugAction,
+ exceptionAction or _defaultExceptionDebugAction)
+ self.debug = True
+ return self
+
+ def setDebug( self, flag=True ):
+ """Enable display of debugging messages while doing pattern matching.
+ Set flag to True to enable, False to disable."""
+ if flag:
+ self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+ else:
+ self.debug = False
+ return self
+
+ def __str__( self ):
+ return self.name
+
+ def __repr__( self ):
+ return _ustr(self)
+
+ def streamline( self ):
+ self.streamlined = True
+ self.strRepr = None
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ pass
+
+ def validate( self, validateTrace=[] ):
+ """Check defined expressions for valid structure, check for infinite recursive definitions."""
+ self.checkRecursion( [] )
+
+ def parseFile( self, file_or_filename, parseAll=False ):
+ """Execute the parse expression on the given file or filename.
+ If a filename is specified (instead of a file object),
+ the entire file is opened, read, and closed before parsing.
+ """
+ try:
+ file_contents = file_or_filename.read()
+ except AttributeError:
+ f = open(file_or_filename, "rb")
+ file_contents = f.read()
+ f.close()
+ try:
+ return self.parseString(file_contents, parseAll)
+ except ParseBaseException, exc:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ raise exc
+
+ def getException(self):
+ return ParseException("",0,self.errmsg,self)
+
+ def __getattr__(self,aname):
+ if aname == "myException":
+ self.myException = ret = self.getException();
+ return ret;
+ else:
+ raise AttributeError("no such attribute " + aname)
+
+ def __eq__(self,other):
+ if isinstance(other, ParserElement):
+ return self is other or self.__dict__ == other.__dict__
+ elif isinstance(other, basestring):
+ try:
+ self.parseString(_ustr(other), parseAll=True)
+ return True
+ except ParseBaseException:
+ return False
+ else:
+ return super(ParserElement,self)==other
+
+ def __ne__(self,other):
+ return not (self == other)
+
+ def __hash__(self):
+ return hash(id(self))
+
+ def __req__(self,other):
+ return self == other
+
+ def __rne__(self,other):
+ return not (self == other)
+
+
+class Token(ParserElement):
+ """Abstract ParserElement subclass, for defining atomic matching patterns."""
+ def __init__( self ):
+ super(Token,self).__init__( savelist=False )
+ #self.myException = ParseException("",0,"",self)
+
+ def setName(self, name):
+ s = super(Token,self).setName(name)
+ self.errmsg = "Expected " + self.name
+ #s.myException.msg = self.errmsg
+ return s
+
+
+class Empty(Token):
+ """An empty token, will always match."""
+ def __init__( self ):
+ super(Empty,self).__init__()
+ self.name = "Empty"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+
+class NoMatch(Token):
+ """A token that will never match."""
+ def __init__( self ):
+ super(NoMatch,self).__init__()
+ self.name = "NoMatch"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.errmsg = "Unmatchable token"
+ #self.myException.msg = self.errmsg
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+
+class Literal(Token):
+ """Token to exactly match a specified string."""
+ def __init__( self, matchString ):
+ super(Literal,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Literal; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.__class__ = Empty
+ self.name = '"%s"' % _ustr(self.match)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ #self.myException.msg = self.errmsg
+ self.mayIndexError = False
+
+ # Performance tuning: this routine gets called a *lot*
+ # if this is a single character match string and the first character matches,
+ # short-circuit as quickly as possible, and avoid calling startswith
+ #~ @profile
+ def parseImpl( self, instring, loc, doActions=True ):
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+_L = Literal
+
+class Keyword(Token):
+ """Token to exactly match a specified string as a keyword, that is, it must be
+ immediately followed by a non-keyword character. Compare with Literal::
+ Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
+ Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
+ Accepts two optional constructor arguments in addition to the keyword string:
+ identChars is a string of characters that would be valid identifier characters,
+ defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
+ matching, default is False.
+ """
+ DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+
+ def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
+ super(Keyword,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Keyword; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.name = '"%s"' % self.match
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ #self.myException.msg = self.errmsg
+ self.mayIndexError = False
+ self.caseless = caseless
+ if caseless:
+ self.caselessmatch = matchString.upper()
+ identChars = identChars.upper()
+ self.identChars = _str2dict(identChars)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.caseless:
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+ (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ else:
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+ (loc == 0 or instring[loc-1] not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ def copy(self):
+ c = super(Keyword,self).copy()
+ c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+ return c
+
+ def setDefaultKeywordChars( chars ):
+ """Overrides the default Keyword chars
+ """
+ Keyword.DEFAULT_KEYWORD_CHARS = chars
+ setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
+
+class CaselessLiteral(Literal):
+ """Token to match a specified string, ignoring case of letters.
+ Note: the matched results will always be in the case of the given
+ match string, NOT the case of the input text.
+ """
+ def __init__( self, matchString ):
+ super(CaselessLiteral,self).__init__( matchString.upper() )
+ # Preserve the defining literal.
+ self.returnString = matchString
+ self.name = "'%s'" % self.returnString
+ self.errmsg = "Expected " + self.name
+ #self.myException.msg = self.errmsg
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[ loc:loc+self.matchLen ].upper() == self.match:
+ return loc+self.matchLen, self.returnString
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class CaselessKeyword(Keyword):
+ def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
+ super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class Word(Token):
+ """Token for matching words composed of allowed character sets.
+ Defined with string containing all allowed initial characters,
+ an optional string containing allowed body characters (if omitted,
+ defaults to the initial character set), and an optional minimum,
+ maximum, and/or exact length. The default value for min is 1 (a
+ minimum value < 1 is not valid); the default values for max and exact
+ are 0, meaning no maximum or exact length restriction.
+ """
+ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
+ super(Word,self).__init__()
+ self.initCharsOrig = initChars
+ self.initChars = _str2dict(initChars)
+ if bodyChars :
+ self.bodyCharsOrig = bodyChars
+ self.bodyChars = _str2dict(bodyChars)
+ else:
+ self.bodyCharsOrig = initChars
+ self.bodyChars = _str2dict(initChars)
+
+ self.maxSpecified = max > 0
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ #self.myException.msg = self.errmsg
+ self.mayIndexError = False
+ self.asKeyword = asKeyword
+
+ if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+ if self.bodyCharsOrig == self.initCharsOrig:
+ self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+ elif len(self.bodyCharsOrig) == 1:
+ self.reString = "%s[%s]*" % \
+ (re.escape(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ else:
+ self.reString = "[%s][%s]*" % \
+ (_escapeRegexRangeChars(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ if self.asKeyword:
+ self.reString = r"\b"+self.reString+r"\b"
+ try:
+ self.re = re.compile( self.reString )
+ except:
+ self.re = None
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.re:
+ result = self.re.match(instring,loc)
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ return loc,result.group()
+
+ if not(instring[ loc ] in self.initChars):
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ start = loc
+ loc += 1
+ instrlen = len(instring)
+ bodychars = self.bodyChars
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, instrlen )
+ while loc < maxloc and instring[loc] in bodychars:
+ loc += 1
+
+ throwException = False
+ if loc - start < self.minLen:
+ throwException = True
+ if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+ throwException = True
+ if self.asKeyword:
+ if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
+ throwException = True
+
+ if throwException:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(Word,self).__str__()
+ except:
+ pass
+
+
+ if self.strRepr is None:
+
+ def charsAsStr(s):
+ if len(s)>4:
+ return s[:4]+"..."
+ else:
+ return s
+
+ if ( self.initCharsOrig != self.bodyCharsOrig ):
+ self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+ else:
+ self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+ return self.strRepr
+
+
+class Regex(Token):
+ """Token for matching strings that match a given regular expression.
+ Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ """
+ def __init__( self, pattern, flags=0):
+ """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
+ super(Regex,self).__init__()
+
+ if len(pattern) == 0:
+ warnings.warn("null string passed to Regex; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+
+ self.pattern = pattern
+ self.flags = flags
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ #self.myException.msg = self.errmsg
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = self.re.match(instring,loc)
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ d = result.groupdict()
+ ret = ParseResults(result.group())
+ if d:
+ for k in d:
+ ret[k] = d[k]
+ return loc,ret
+
+ def __str__( self ):
+ try:
+ return super(Regex,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+ return self.strRepr
+
+
+class QuotedString(Token):
+ """Token for matching strings that are delimited by quoting characters.
+ """
+ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
+ """
+ Defined with the following parameters:
+ - quoteChar - string of one or more characters defining the quote delimiting string
+ - escChar - character to escape quotes, typically backslash (default=None)
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
+ - multiline - boolean indicating whether quotes can span multiple lines (default=False)
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
+ """
+ super(QuotedString,self).__init__()
+
+ # remove white space from quote chars - wont work anyway
+ quoteChar = quoteChar.strip()
+ if len(quoteChar) == 0:
+ warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ if endQuoteChar is None:
+ endQuoteChar = quoteChar
+ else:
+ endQuoteChar = endQuoteChar.strip()
+ if len(endQuoteChar) == 0:
+ warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ self.quoteChar = quoteChar
+ self.quoteCharLen = len(quoteChar)
+ self.firstQuoteChar = quoteChar[0]
+ self.endQuoteChar = endQuoteChar
+ self.endQuoteCharLen = len(endQuoteChar)
+ self.escChar = escChar
+ self.escQuote = escQuote
+ self.unquoteResults = unquoteResults
+
+ if multiline:
+ self.flags = re.MULTILINE | re.DOTALL
+ self.pattern = r'%s(?:[^%s%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ else:
+ self.flags = 0
+ self.pattern = r'%s(?:[^%s\n\r%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ if len(self.endQuoteChar) > 1:
+ self.pattern += (
+ '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+ _escapeRegexRangeChars(self.endQuoteChar[i]))
+ for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
+ )
+ if escQuote:
+ self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+ if escChar:
+ self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+ self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
+ self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ #self.myException.msg = self.errmsg
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ ret = result.group()
+
+ if self.unquoteResults:
+
+ # strip off quotes
+ ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+
+ if isinstance(ret,basestring):
+ # replace escaped characters
+ if self.escChar:
+ ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
+
+ # replace escaped quotes
+ if self.escQuote:
+ ret = ret.replace(self.escQuote, self.endQuoteChar)
+
+ return loc, ret
+
+ def __str__( self ):
+ try:
+ return super(QuotedString,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+
+ return self.strRepr
+
+
+class CharsNotIn(Token):
+ """Token for matching words composed of characters *not* in a given set.
+ Defined with string containing all disallowed characters, and an optional
+ minimum, maximum, and/or exact length. The default value for min is 1 (a
+ minimum value < 1 is not valid); the default values for max and exact
+ are 0, meaning no maximum or exact length restriction.
+ """
+ def __init__( self, notChars, min=1, max=0, exact=0 ):
+ super(CharsNotIn,self).__init__()
+ self.skipWhitespace = False
+ self.notChars = notChars
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = ( self.minLen == 0 )
+ #self.myException.msg = self.errmsg
+ self.mayIndexError = False
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[loc] in self.notChars:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ start = loc
+ loc += 1
+ notchars = self.notChars
+ maxlen = min( start+self.maxLen, len(instring) )
+ while loc < maxlen and \
+ (instring[loc] not in notchars):
+ loc += 1
+
+ if loc - start < self.minLen:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(CharsNotIn, self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ if len(self.notChars) > 4:
+ self.strRepr = "!W:(%s...)" % self.notChars[:4]
+ else:
+ self.strRepr = "!W:(%s)" % self.notChars
+
+ return self.strRepr
+
+class White(Token):
+ """Special matching class for matching whitespace. Normally, whitespace is ignored
+ by pyparsing grammars. This class is included when some whitespace structures
+ are significant. Define with a string containing the whitespace characters to be
+ matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
+ as defined for the Word class."""
+ whiteStrs = {
+ " " : "<SPC>",
+ "\t": "<TAB>",
+ "\n": "<LF>",
+ "\r": "<CR>",
+ "\f": "<FF>",
+ }
+ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+ super(White,self).__init__()
+ self.matchWhite = ws
+ self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
+ #~ self.leaveWhitespace()
+ self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
+ self.mayReturnEmpty = True
+ self.errmsg = "Expected " + self.name
+ #self.myException.msg = self.errmsg
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not(instring[ loc ] in self.matchWhite):
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ start = loc
+ loc += 1
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, len(instring) )
+ while loc < maxloc and instring[loc] in self.matchWhite:
+ loc += 1
+
+ if loc - start < self.minLen:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+
+class _PositionToken(Token):
+ def __init__( self ):
+ super(_PositionToken,self).__init__()
+ self.name=self.__class__.__name__
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+class GoToColumn(_PositionToken):
+ """Token to advance to a specific column of input text; useful for tabular report scraping."""
+ def __init__( self, colno ):
+ super(GoToColumn,self).__init__()
+ self.col = colno
+
+ def preParse( self, instring, loc ):
+ if col(loc,instring) != self.col:
+ instrlen = len(instring)
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+ while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ thiscol = col( loc, instring )
+ if thiscol > self.col:
+ raise ParseException( instring, loc, "Text not in expected column", self )
+ newloc = loc + self.col - thiscol
+ ret = instring[ loc: newloc ]
+ return newloc, ret
+
+class LineStart(_PositionToken):
+ """Matches if current position is at the beginning of a line within the parse string"""
+ def __init__( self ):
+ super(LineStart,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected start of line"
+ #self.myException.msg = self.errmsg
+
+ def preParse( self, instring, loc ):
+ preloc = super(LineStart,self).preParse(instring,loc)
+ if instring[preloc] == "\n":
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not( loc==0 or
+ (loc == self.preParse( instring, 0 )) or
+ (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
+ #~ raise ParseException( instring, loc, "Expected start of line" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class LineEnd(_PositionToken):
+ """Matches if current position is at the end of a line within the parse string"""
+ def __init__( self ):
+ super(LineEnd,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected end of line"
+ #self.myException.msg = self.errmsg
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc<len(instring):
+ if instring[loc] == "\n":
+ return loc+1, "\n"
+ else:
+ #~ raise ParseException( instring, loc, "Expected end of line" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ elif loc == len(instring):
+ return loc+1, []
+ else:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class StringStart(_PositionToken):
+ """Matches if current position is at the beginning of the parse string"""
+ def __init__( self ):
+ super(StringStart,self).__init__()
+ self.errmsg = "Expected start of text"
+ #self.myException.msg = self.errmsg
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc != 0:
+ # see if entire string up to here is just whitespace and ignoreables
+ if loc != self.preParse( instring, 0 ):
+ #~ raise ParseException( instring, loc, "Expected start of text" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class StringEnd(_PositionToken):
+ """Matches if current position is at the end of the parse string"""
+ def __init__( self ):
+ super(StringEnd,self).__init__()
+ self.errmsg = "Expected end of text"
+ #self.myException.msg = self.errmsg
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc < len(instring):
+ #~ raise ParseException( instring, loc, "Expected end of text" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ elif loc == len(instring):
+ return loc+1, []
+ elif loc > len(instring):
+ return loc, []
+ else:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class WordStart(_PositionToken):
+ """Matches if the current position is at the beginning of a Word, and
+ is not preceded by any character in a given set of wordChars
+ (default=printables). To emulate the \b behavior of regular expressions,
+ use WordStart(alphanums). WordStart will also match at the beginning of
+ the string being parsed, or at the beginning of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordStart,self).__init__()
+ self.wordChars = _str2dict(wordChars)
+ self.errmsg = "Not at the start of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ if loc != 0:
+ if (instring[loc-1] in self.wordChars or
+ instring[loc] not in self.wordChars):
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class WordEnd(_PositionToken):
+ """Matches if the current position is at the end of a Word, and
+ is not followed by any character in a given set of wordChars
+ (default=printables). To emulate the \b behavior of regular expressions,
+ use WordEnd(alphanums). WordEnd will also match at the end of
+ the string being parsed, or at the end of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordEnd,self).__init__()
+ self.wordChars = _str2dict(wordChars)
+ self.skipWhitespace = False
+ self.errmsg = "Not at the end of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ instrlen = len(instring)
+ if instrlen>0 and loc<instrlen:
+ if (instring[loc] in self.wordChars or
+ instring[loc-1] not in self.wordChars):
+ #~ raise ParseException( instring, loc, "Expected end of word" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+
+class ParseExpression(ParserElement):
+ """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ def __init__( self, exprs, savelist = False ):
+ super(ParseExpression,self).__init__(savelist)
+ if isinstance( exprs, list ):
+ self.exprs = exprs
+ elif isinstance( exprs, basestring ):
+ self.exprs = [ Literal( exprs ) ]
+ else:
+ try:
+ self.exprs = list( exprs )
+ except TypeError:
+ self.exprs = [ exprs ]
+ self.callPreparse = False
+
+ def __getitem__( self, i ):
+ return self.exprs[i]
+
+ def append( self, other ):
+ self.exprs.append( other )
+ self.strRepr = None
+ return self
+
+ def leaveWhitespace( self ):
+ """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
+ all contained expressions."""
+ self.skipWhitespace = False
+ self.exprs = [ e.copy() for e in self.exprs ]
+ for e in self.exprs:
+ e.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def __str__( self ):
+ try:
+ return super(ParseExpression,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
+ return self.strRepr
+
+ def streamline( self ):
+ super(ParseExpression,self).streamline()
+
+ for e in self.exprs:
+ e.streamline()
+
+ # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
+ # but only if there are no parse actions or resultsNames on the nested And's
+ # (likewise for Or's and MatchFirst's)
+ if ( len(self.exprs) == 2 ):
+ other = self.exprs[0]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = other.exprs[:] + [ self.exprs[1] ]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ other = self.exprs[-1]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = self.exprs[:-1] + other.exprs[:]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+ return ret
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ for e in self.exprs:
+ e.validate(tmp)
+ self.checkRecursion( [] )
+
+class And(ParseExpression):
+ """Requires all given ParseExpressions to be found in the given order.
+ Expressions may be separated by whitespace.
+ May be constructed using the '+' operator.
+ """
+
+ class _ErrorStop(Empty):
+ def __init__(self, *args, **kwargs):
+ super(Empty,self).__init__(*args, **kwargs)
+ self.leaveWhitespace()
+
+ def __init__( self, exprs, savelist = True ):
+ super(And,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.setWhitespaceChars( exprs[0].whiteChars )
+ self.skipWhitespace = exprs[0].skipWhitespace
+ self.callPreparse = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ # pass False as last arg to _parse for first element, since we already
+ # pre-parsed the string as part of our And pre-parsing
+ loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+ errorStop = False
+ for e in self.exprs[1:]:
+ if isinstance(e, And._ErrorStop):
+ errorStop = True
+ continue
+ if errorStop:
+ try:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ except ParseSyntaxException:
+ raise
+ except ParseBaseException, pe:
+ raise ParseSyntaxException(pe)
+ except IndexError, ie:
+ raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
+ else:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ if exprtokens or exprtokens.keys():
+ resultlist += exprtokens
+ return loc, resultlist
+
+ def __iadd__(self, other ):
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ return self.append( other ) #And( [ self, other ] )
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+ if not e.mayReturnEmpty:
+ break
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+
+class Or(ParseExpression):
+ """Requires that at least one ParseExpression is found.
+ If two expressions match, the expression that matches the longest string will be used.
+ May be constructed using the '^' operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(Or,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxMatchLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ loc2 = e.tryParse( instring, loc )
+ except ParseException, err:
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+ else:
+ if loc2 > maxMatchLoc:
+ maxMatchLoc = loc2
+ maxMatchExp = e
+
+ if maxMatchLoc < 0:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ return maxMatchExp._parse( instring, loc, doActions )
+
+ def __ixor__(self, other ):
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ return self.append( other ) #Or( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class MatchFirst(ParseExpression):
+ """Requires that at least one ParseExpression is found.
+ If two expressions match, the first one listed is the one that will match.
+ May be constructed using the '|' operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(MatchFirst,self).__init__(exprs, savelist)
+ if exprs:
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+ else:
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ ret = e._parse( instring, loc, doActions )
+ return ret
+ except ParseException, err:
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+
+ # only got here if no expression matched, raise exception for match that made it the furthest
+ else:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ def __ior__(self, other ):
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ return self.append( other ) #MatchFirst( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class Each(ParseExpression):
+ """Requires all given ParseExpressions to be found, but in any order.
+ Expressions may be separated by whitespace.
+ May be constructed using the '&' operator.
+ """
+ def __init__( self, exprs, savelist = True ):
+ super(Each,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.skipWhitespace = True
+ self.initExprGroups = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.initExprGroups:
+ self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
+ self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
+ self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
+ self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+ self.required += self.multirequired
+ self.initExprGroups = False
+ tmpLoc = loc
+ tmpReqd = self.required[:]
+ tmpOpt = self.optionals[:]
+ matchOrder = []
+
+ keepMatching = True
+ while keepMatching:
+ tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+ failed = []
+ for e in tmpExprs:
+ try:
+ tmpLoc = e.tryParse( instring, tmpLoc )
+ except ParseException:
+ failed.append(e)
+ else:
+ matchOrder.append(e)
+ if e in tmpReqd:
+ tmpReqd.remove(e)
+ elif e in tmpOpt:
+ tmpOpt.remove(e)
+ if len(failed) == len(tmpExprs):
+ keepMatching = False
+
+ if tmpReqd:
+ missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
+ raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+
+ # add any unmatched Optionals, in case they have default values defined
+ matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
+
+ resultlist = []
+ for e in matchOrder:
+ loc,results = e._parse(instring,loc,doActions)
+ resultlist.append(results)
+
+ finalResults = ParseResults([])
+ for r in resultlist:
+ dups = {}
+ for k in r.keys():
+ if k in finalResults.keys():
+ tmp = ParseResults(finalResults[k])
+ tmp += ParseResults(r[k])
+ dups[k] = tmp
+ finalResults += ParseResults(r)
+ for k,v in dups.items():
+ finalResults[k] = v
+ return loc, finalResults
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class ParseElementEnhance(ParserElement):
+ """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ def __init__( self, expr, savelist=False ):
+ super(ParseElementEnhance,self).__init__(savelist)
+ if isinstance( expr, basestring ):
+ expr = Literal(expr)
+ self.expr = expr
+ self.strRepr = None
+ if expr is not None:
+ self.mayIndexError = expr.mayIndexError
+ self.mayReturnEmpty = expr.mayReturnEmpty
+ self.setWhitespaceChars( expr.whiteChars )
+ self.skipWhitespace = expr.skipWhitespace
+ self.saveAsList = expr.saveAsList
+ self.callPreparse = expr.callPreparse
+ self.ignoreExprs.extend(expr.ignoreExprs)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.expr is not None:
+ return self.expr._parse( instring, loc, doActions, callPreParse=False )
+ else:
+ raise ParseException("",loc,self.errmsg,self)
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ self.expr = self.expr.copy()
+ if self.expr is not None:
+ self.expr.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def streamline( self ):
+ super(ParseElementEnhance,self).streamline()
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ if self in parseElementList:
+ raise RecursiveGrammarException( parseElementList+[self] )
+ subRecCheckList = parseElementList[:] + [ self ]
+ if self.expr is not None:
+ self.expr.checkRecursion( subRecCheckList )
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion( [] )
+
+ def __str__( self ):
+ try:
+ return super(ParseElementEnhance,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None and self.expr is not None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+ return self.strRepr
+
+
+class FollowedBy(ParseElementEnhance):
+ """Lookahead matching of the given parse expression. FollowedBy
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression matches at the current
+ position. FollowedBy always returns a null token list."""
+ def __init__( self, expr ):
+ super(FollowedBy,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ self.expr.tryParse( instring, loc )
+ return loc, []
+
+
+class NotAny(ParseElementEnhance):
+ """Lookahead to disallow matching with the given parse expression. NotAny
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression does *not* match at the current
+ position. Also, NotAny does *not* skip over leading whitespace. NotAny
+ always returns a null token list. May be constructed using the '~' operator."""
+ def __init__( self, expr ):
+ super(NotAny,self).__init__(expr)
+ #~ self.leaveWhitespace()
+ self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+ self.mayReturnEmpty = True
+ self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+ #self.myException = ParseException("",0,self.errmsg,self)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ self.expr.tryParse( instring, loc )
+ except (ParseException,IndexError):
+ pass
+ else:
+ #~ raise ParseException(instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "~{" + _ustr(self.expr) + "}"
+
+ return self.strRepr
+
+
+class ZeroOrMore(ParseElementEnhance):
+ """Optional repetition of zero or more of the given expression."""
+ def __init__( self, expr ):
+ super(ZeroOrMore,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ tokens = []
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+
+class OneOrMore(ParseElementEnhance):
+ """Repetition of one or more of the given expression."""
+ def parseImpl( self, instring, loc, doActions=True ):
+ # must be at least one
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ try:
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + _ustr(self.expr) + "}..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+class _NullToken(object):
+ def __bool__(self):
+ return False
+ __nonzero__ = __bool__
+ def __str__(self):
+ return ""
+
+_optionalNotMatched = _NullToken()
+class Optional(ParseElementEnhance):
+ """Optional matching of the given expression.
+ A default return string can also be specified, if the optional expression
+ is not found.
+ """
+ def __init__( self, exprs, default=_optionalNotMatched ):
+ super(Optional,self).__init__( exprs, savelist=False )
+ self.defaultValue = default
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ except (ParseException,IndexError):
+ if self.defaultValue is not _optionalNotMatched:
+ if self.expr.resultsName:
+ tokens = ParseResults([ self.defaultValue ])
+ tokens[self.expr.resultsName] = self.defaultValue
+ else:
+ tokens = [ self.defaultValue ]
+ else:
+ tokens = []
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]"
+
+ return self.strRepr
+
+
+class SkipTo(ParseElementEnhance):
+ """Token for skipping over all undefined text until the matched expression is found.
+ If include is set to true, the matched expression is also parsed (the skipped text
+ and matched expression are returned as a 2-element list). The ignore
+ argument is used to define grammars (typically quoted strings and comments) that
+ might contain false matches.
+ """
+ def __init__( self, other, include=False, ignore=None, failOn=None ):
+ super( SkipTo, self ).__init__( other )
+ self.ignoreExpr = ignore
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.includeMatch = include
+ self.asList = False
+ if failOn is not None and isinstance(failOn, basestring):
+ self.failOn = Literal(failOn)
+ else:
+ self.failOn = failOn
+ self.errmsg = "No match found for "+_ustr(self.expr)
+ #self.myException = ParseException("",0,self.errmsg,self)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ startLoc = loc
+ instrlen = len(instring)
+ expr = self.expr
+ failParse = False
+ while loc <= instrlen:
+ try:
+ if self.failOn:
+ try:
+ self.failOn.tryParse(instring, loc)
+ except ParseBaseException:
+ pass
+ else:
+ failParse = True
+ raise ParseException(instring, loc, "Found expression " + str(self.failOn))
+ failParse = False
+ if self.ignoreExpr is not None:
+ while 1:
+ try:
+ loc = self.ignoreExpr.tryParse(instring,loc)
+ print "found ignoreExpr, advance to", loc
+ except ParseBaseException:
+ break
+ expr._parse( instring, loc, doActions=False, callPreParse=False )
+ skipText = instring[startLoc:loc]
+ if self.includeMatch:
+ loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
+ if mat:
+ skipRes = ParseResults( skipText )
+ skipRes += mat
+ return loc, [ skipRes ]
+ else:
+ return loc, [ skipText ]
+ else:
+ return loc, [ skipText ]
+ except (ParseException,IndexError):
+ if failParse:
+ raise
+ else:
+ loc += 1
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class Forward(ParseElementEnhance):
+ """Forward declaration of an expression to be defined later -
+ used for recursive grammars, such as algebraic infix notation.
+ When the expression is known, it is assigned to the Forward variable using the '<<' operator.
+
+ Note: take care when assigning to Forward not to overlook precedence of operators.
+ Specifically, '|' has a lower precedence than '<<', so that::
+ fwdExpr << a | b | c
+ will actually be evaluated as::
+ (fwdExpr << a) | b | c
+ thereby leaving b and c out as parseable alternatives. It is recommended that you
+ explicitly group the values inserted into the Forward::
+ fwdExpr << (a | b | c)
+ """
+ def __init__( self, other=None ):
+ super(Forward,self).__init__( other, savelist=False )
+
+ def __lshift__( self, other ):
+ if isinstance( other, basestring ):
+ other = Literal(other)
+ self.expr = other
+ self.mayReturnEmpty = other.mayReturnEmpty
+ self.strRepr = None
+ self.mayIndexError = self.expr.mayIndexError
+ self.mayReturnEmpty = self.expr.mayReturnEmpty
+ self.setWhitespaceChars( self.expr.whiteChars )
+ self.skipWhitespace = self.expr.skipWhitespace
+ self.saveAsList = self.expr.saveAsList
+ self.ignoreExprs.extend(self.expr.ignoreExprs)
+ return None
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ return self
+
+ def streamline( self ):
+ if not self.streamlined:
+ self.streamlined = True
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def validate( self, validateTrace=[] ):
+ if self not in validateTrace:
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion([])
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ self._revertClass = self.__class__
+ self.__class__ = _ForwardNoRecurse
+ try:
+ if self.expr is not None:
+ retString = _ustr(self.expr)
+ else:
+ retString = "None"
+ finally:
+ self.__class__ = self._revertClass
+ return self.__class__.__name__ + ": " + retString
+
+ def copy(self):
+ if self.expr is not None:
+ return super(Forward,self).copy()
+ else:
+ ret = Forward()
+ ret << self
+ return ret
+
+class _ForwardNoRecurse(Forward):
+ def __str__( self ):
+ return "..."
+
+class TokenConverter(ParseElementEnhance):
+ """Abstract subclass of ParseExpression, for converting parsed results."""
+ def __init__( self, expr, savelist=False ):
+ super(TokenConverter,self).__init__( expr )#, savelist )
+ self.saveAsList = False
+
+class Upcase(TokenConverter):
+ """Converter to upper case all matching tokens."""
+ def __init__(self, *args):
+ super(Upcase,self).__init__(*args)
+ warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
+ DeprecationWarning,stacklevel=2)
+
+ def postParse( self, instring, loc, tokenlist ):
+ return list(map( string.upper, tokenlist ))
+
+
+class Combine(TokenConverter):
+ """Converter to concatenate all matching tokens to a single string.
+ By default, the matching patterns must also be contiguous in the input string;
+ this can be disabled by specifying 'adjacent=False' in the constructor.
+ """
+ def __init__( self, expr, joinString="", adjacent=True ):
+ super(Combine,self).__init__( expr )
+ # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+ if adjacent:
+ self.leaveWhitespace()
+ self.adjacent = adjacent
+ self.skipWhitespace = True
+ self.joinString = joinString
+
+ def ignore( self, other ):
+ if self.adjacent:
+ ParserElement.ignore(self, other)
+ else:
+ super( Combine, self).ignore( other )
+ return self
+
+ def postParse( self, instring, loc, tokenlist ):
+ retToks = tokenlist.copy()
+ del retToks[:]
+ retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+
+ if self.resultsName and len(retToks.keys())>0:
+ return [ retToks ]
+ else:
+ return retToks
+
+class Group(TokenConverter):
+ """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
+ def __init__( self, expr ):
+ super(Group,self).__init__( expr )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ return [ tokenlist ]
+
+class Dict(TokenConverter):
+ """Converter to return a repetitive expression as a list, but also as a dictionary.
+ Each element can also be referenced using the first token in the expression as its key.
+ Useful for tabular report scraping when the first column can be used as a item key.
+ """
+ def __init__( self, exprs ):
+ super(Dict,self).__init__( exprs )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ for i,tok in enumerate(tokenlist):
+ if len(tok) == 0:
+ continue
+ ikey = tok[0]
+ if isinstance(ikey,int):
+ ikey = _ustr(tok[0]).strip()
+ if len(tok)==1:
+ tokenlist[ikey] = _ParseResultsWithOffset("",i)
+ elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+ tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
+ else:
+ dictvalue = tok.copy() #ParseResults(i)
+ del dictvalue[0]
+ if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
+ else:
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
+
+ if self.resultsName:
+ return [ tokenlist ]
+ else:
+ return tokenlist
+
+
+class Suppress(TokenConverter):
+ """Converter for ignoring the results of a parsed expression."""
+ def postParse( self, instring, loc, tokenlist ):
+ return []
+
+ def suppress( self ):
+ return self
+
+
+class OnlyOnce(object):
+ """Wrapper for parse actions, to ensure they are only called once."""
+ def __init__(self, methodCall):
+ self.callable = ParserElement._normalizeParseActionArgs(methodCall)
+ self.called = False
+ def __call__(self,s,l,t):
+ if not self.called:
+ results = self.callable(s,l,t)
+ self.called = True
+ return results
+ raise ParseException(s,l,"")
+ def reset(self):
+ self.called = False
+
+def traceParseAction(f):
+ """Decorator for debugging parse actions."""
+ f = ParserElement._normalizeParseActionArgs(f)
+ def z(*paArgs):
+ thisFunc = f.func_name
+ s,l,t = paArgs[-3:]
+ if len(paArgs)>3:
+ thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+ sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
+ try:
+ ret = f(*paArgs)
+ except Exception, exc:
+ sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
+ raise
+ sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
+ return ret
+ try:
+ z.__name__ = f.__name__
+ except AttributeError:
+ pass
+ return z
+
+#
+# global helpers
+#
+def delimitedList( expr, delim=",", combine=False ):
+ """Helper to define a delimited list of expressions - the delimiter defaults to ','.
+ By default, the list elements and delimiters can have intervening whitespace, and
+ comments, but this can be overridden by passing 'combine=True' in the constructor.
+ If combine is set to True, the matching tokens are returned as a single token
+ string, with the delimiters included; otherwise, the matching tokens are returned
+ as a list of tokens, with the delimiters suppressed.
+ """
+ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+ if combine:
+ return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+ else:
+ return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+
+def countedArray( expr ):
+ """Helper to define a counted list of expressions.
+ This helper defines a pattern of the form::
+ integer expr expr expr...
+ where the leading integer tells how many expr expressions follow.
+ The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+ """
+ arrayExpr = Forward()
+ def countFieldParseAction(s,l,t):
+ n = int(t[0])
+ arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+ return []
+ return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
+
+def _flatten(L):
+ if type(L) is not list: return [L]
+ if L == []: return L
+ return _flatten(L[0]) + _flatten(L[1:])
+
+def matchPreviousLiteral(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousLiteral(first)
+ matchExpr = first + ":" + second
+ will match "1:1", but not "1:2". Because this matches a
+ previous literal, will also match the leading "1:1" in "1:10".
+ If this is not desired, use matchPreviousExpr.
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ def copyTokenToRepeater(s,l,t):
+ if t:
+ if len(t) == 1:
+ rep << t[0]
+ else:
+ # flatten t tokens
+ tflat = _flatten(t.asList())
+ rep << And( [ Literal(tt) for tt in tflat ] )
+ else:
+ rep << Empty()
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def matchPreviousExpr(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousExpr(first)
+ matchExpr = first + ":" + second
+ will match "1:1", but not "1:2". Because this matches by
+ expressions, will *not* match the leading "1:1" in "1:10";
+ the expressions are evaluated first, and then compared, so
+ "1" is compared with "10".
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ e2 = expr.copy()
+ rep << e2
+ def copyTokenToRepeater(s,l,t):
+ matchTokens = _flatten(t.asList())
+ def mustMatchTheseTokens(s,l,t):
+ theseTokens = _flatten(t.asList())
+ if theseTokens != matchTokens:
+ raise ParseException("",0,"")
+ rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def _escapeRegexRangeChars(s):
+ #~ escape these chars: ^-]
+ for c in r"\^-]":
+ s = s.replace(c,_bslash+c)
+ s = s.replace("\n",r"\n")
+ s = s.replace("\t",r"\t")
+ return _ustr(s)
+
+def oneOf( strs, caseless=False, useRegex=True ):
+ """Helper to quickly define a set of alternative Literals, and makes sure to do
+ longest-first testing when there is a conflict, regardless of the input order,
+ but returns a MatchFirst for best performance.
+
+ Parameters:
+ - strs - a string of space-delimited literals, or a list of string literals
+ - caseless - (default=False) - treat all literals as caseless
+ - useRegex - (default=True) - as an optimization, will generate a Regex
+ object; otherwise, will generate a MatchFirst object (if caseless=True, or
+ if creating a Regex raises an exception)
+ """
+ if caseless:
+ isequal = ( lambda a,b: a.upper() == b.upper() )
+ masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+ parseElementClass = CaselessLiteral
+ else:
+ isequal = ( lambda a,b: a == b )
+ masks = ( lambda a,b: b.startswith(a) )
+ parseElementClass = Literal
+
+ if isinstance(strs,(list,tuple)):
+ symbols = list(strs[:])
+ elif isinstance(strs,basestring):
+ symbols = strs.split()
+ else:
+ warnings.warn("Invalid argument to oneOf, expected string or list",
+ SyntaxWarning, stacklevel=2)
+
+ i = 0
+ while i < len(symbols)-1:
+ cur = symbols[i]
+ for j,other in enumerate(symbols[i+1:]):
+ if ( isequal(other, cur) ):
+ del symbols[i+j+1]
+ break
+ elif ( masks(cur, other) ):
+ del symbols[i+j+1]
+ symbols.insert(i,other)
+ cur = other
+ break
+ else:
+ i += 1
+
+ if not caseless and useRegex:
+ #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
+ try:
+ if len(symbols)==len("".join(symbols)):
+ return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
+ else:
+ return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
+ except:
+ warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+ SyntaxWarning, stacklevel=2)
+
+
+ # last resort, just use MatchFirst
+ return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
+
+def dictOf( key, value ):
+ """Helper to easily and clearly define a dictionary by specifying the respective patterns
+ for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
+ in the proper order. The key pattern can include delimiting markers or punctuation,
+ as long as they are suppressed, thereby leaving the significant key text. The value
+ pattern can include named results, so that the Dict results can include named token
+ fields.
+ """
+ return Dict( ZeroOrMore( Group ( key + value ) ) )
+
+def originalTextFor(expr, asString=True):
+ """Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. Simpler to use than the parse action keepOriginalText, and does not
+ require the inspect module to chase up the call stack. By default, returns a
+ string containing the original parsed text.
+
+ If the optional asString argument is passed as False, then the return value is a
+ ParseResults containing any results names that were originally matched, and a
+ single token containing the original matched text from the input string. So if
+ the expression passed to originalTextFor contains expressions with defined
+ results names, you must set asString to False if you want to preserve those
+ results name values."""
+ locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+ matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
+ if asString:
+ extractText = lambda s,l,t: s[t._original_start:t._original_end]
+ else:
+ def extractText(s,l,t):
+ del t[:]
+ t.insert(0, s[t._original_start:t._original_end])
+ del t["_original_start"]
+ del t["_original_end"]
+ matchExpr.setParseAction(extractText)
+ return matchExpr
+
+# convenience constants for positional expressions
+empty = Empty().setName("empty")
+lineStart = LineStart().setName("lineStart")
+lineEnd = LineEnd().setName("lineEnd")
+stringStart = StringStart().setName("stringStart")
+stringEnd = StringEnd().setName("stringEnd")
+
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
+_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
+_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+
+_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
+
+def srange(s):
+ r"""Helper to easily define string ranges for use in Word construction. Borrows
+ syntax from regexp '[]' string range definitions::
+ srange("[0-9]") -> "0123456789"
+ srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
+ srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+ The input string must be enclosed in []'s, and the returned string is the expanded
+ character set joined into a single string.
+ The values enclosed in the []'s may be::
+ a single character
+ an escaped character with a leading backslash (such as \- or \])
+ an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
+ an escaped octal character with a leading '\0' (\041, which is a '!' character)
+ a range of any of the above, separated by a dash ('a-z', etc.)
+ any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
+ """
+ try:
+ return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
+ except:
+ return ""
+
+def matchOnlyAtCol(n):
+ """Helper method for defining parse actions that require matching at a specific
+ column in the input text.
+ """
+ def verifyCol(strg,locn,toks):
+ if col(locn,strg) != n:
+ raise ParseException(strg,locn,"matched token not at column %d" % n)
+ return verifyCol
+
+def replaceWith(replStr):
+ """Helper method for common parse actions that simply return a literal value. Especially
+ useful when used with transformString().
+ """
+ def _replFunc(*args):
+ return [replStr]
+ return _replFunc
+
+def removeQuotes(s,l,t):
+ """Helper parse action for removing quotation marks from parsed quoted strings.
+ To use, add this parse action to quoted string using::
+ quotedString.setParseAction( removeQuotes )
+ """
+ return t[0][1:-1]
+
+def upcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to upper case."""
+ return [ tt.upper() for tt in map(_ustr,t) ]
+
+def downcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to lower case."""
+ return [ tt.lower() for tt in map(_ustr,t) ]
+
+def keepOriginalText(s,startLoc,t):
+ """Helper parse action to preserve original parsed text,
+ overriding any nested parse actions."""
+ try:
+ endloc = getTokensEndLoc()
+ except ParseException:
+ raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
+ del t[:]
+ t += ParseResults(s[startLoc:endloc])
+ return t
+
+def getTokensEndLoc():
+ """Method to be called from within a parse action to determine the end
+ location of the parsed tokens."""
+ import inspect
+ fstack = inspect.stack()
+ try:
+ # search up the stack (through intervening argument normalizers) for correct calling routine
+ for f in fstack[2:]:
+ if f[3] == "_parseNoCache":
+ endloc = f[0].f_locals["loc"]
+ return endloc
+ else:
+ raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
+ finally:
+ del fstack
+
+def _makeTags(tagStr, xml):
+ """Internal helper to construct opening and closing tag expressions, given a tag name"""
+ if isinstance(tagStr,basestring):
+ resname = tagStr
+ tagStr = Keyword(tagStr, caseless=not xml)
+ else:
+ resname = tagStr.name
+
+ tagAttrName = Word(alphas,alphanums+"_-:")
+ if (xml):
+ tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+ openTag = Suppress("<") + tagStr + \
+ Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ else:
+ printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
+ tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+ openTag = Suppress("<") + tagStr + \
+ Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+ Optional( Suppress("=") + tagAttrValue ) ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ closeTag = Combine(_L("</") + tagStr + ">")
+
+ openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
+ closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
+
+ return openTag, closeTag
+
+def makeHTMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
+ return _makeTags( tagStr, False )
+
+def makeXMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for XML, given a tag name"""
+ return _makeTags( tagStr, True )
+
+def withAttribute(*args,**attrDict):
+ """Helper to create a validating parse action to be used with start tags created
+ with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
+ <TD> or <DIV>.
+
+ Call withAttribute with a series of attribute names and values. Specify the list
+ of filter attributes names and values as:
+ - keyword arguments, as in (class="Customer",align="right"), or
+ - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+ For attribute names with a namespace prefix, you must use the second form. Attribute
+ names are matched insensitive to upper/lower case.
+
+ To verify that the attribute exists, but without specifying a value, pass
+ withAttribute.ANY_VALUE as the value.
+ """
+ if args:
+ attrs = args[:]
+ else:
+ attrs = attrDict.items()
+ attrs = [(k,v) for k,v in attrs]
+ def pa(s,l,tokens):
+ for attrName,attrValue in attrs:
+ if attrName not in tokens:
+ raise ParseException(s,l,"no matching attribute " + attrName)
+ if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
+ raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
+ (attrName, tokens[attrName], attrValue))
+ return pa
+withAttribute.ANY_VALUE = object()
+
+opAssoc = _Constants()
+opAssoc.LEFT = object()
+opAssoc.RIGHT = object()
+
+def operatorPrecedence( baseExpr, opList ):
+ """Helper method for constructing grammars of expressions made up of
+ operators working in a precedence hierarchy. Operators may be unary or
+ binary, left- or right-associative. Parse actions can also be attached
+ to operator expressions.
+
+ Parameters:
+ - baseExpr - expression representing the most basic element for the nested
+ - opList - list of tuples, one for each operator precedence level in the
+ expression grammar; each tuple is of the form
+ (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+ - opExpr is the pyparsing expression for the operator;
+ may also be a string, which will be converted to a Literal;
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
+ two operators separating the 3 terms
+ - numTerms is the number of terms for this operator (must
+ be 1, 2, or 3)
+ - rightLeftAssoc is the indicator whether the operator is
+ right or left associative, using the pyparsing-defined
+ constants opAssoc.RIGHT and opAssoc.LEFT.
+ - parseAction is the parse action to be associated with
+ expressions matching this operator expression (the
+ parse action tuple member may be omitted)
+ """
+ ret = Forward()
+ lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
+ for i,operDef in enumerate(opList):
+ opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
+ if arity == 3:
+ if opExpr is None or len(opExpr) != 2:
+ raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
+ opExpr1, opExpr2 = opExpr
+ thisExpr = Forward()#.setName("expr%d" % i)
+ if rightLeftAssoc == opAssoc.LEFT:
+ if arity == 1:
+ matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
+ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ elif rightLeftAssoc == opAssoc.RIGHT:
+ if arity == 1:
+ # try to avoid LR with this extra test
+ if not isinstance(opExpr, Optional):
+ opExpr = Optional(opExpr)
+ matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
+ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ else:
+ raise ValueError("operator must indicate right or left associativity")
+ if pa:
+ matchExpr.setParseAction( pa )
+ thisExpr << ( matchExpr | lastExpr )
+ lastExpr = thisExpr
+ ret << lastExpr
+ return ret
+
+dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
+sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
+quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
+unicodeString = Combine(_L('u') + quotedString.copy())
+
+def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
+ """Helper method for defining nested lists enclosed in opening and closing
+ delimiters ("(" and ")" are the default).
+
+ Parameters:
+ - opener - opening character for a nested list (default="("); can also be a pyparsing expression
+ - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
+ - content - expression for items within the nested lists (default=None)
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
+
+ If an expression is not provided for the content argument, the nested
+ expression will capture all whitespace-delimited content between delimiters
+ as a list of separate values.
+
+ Use the ignoreExpr argument to define expressions that may contain
+ opening or closing characters that should not be treated as opening
+ or closing characters for nesting, such as quotedString or a comment
+ expression. Specify multiple expressions using an Or or MatchFirst.
+ The default is quotedString, but if no expressions are to be ignored,
+ then pass None for this argument.
+ """
+ if opener == closer:
+ raise ValueError("opening and closing strings cannot be the same")
+ if content is None:
+ if isinstance(opener,basestring) and isinstance(closer,basestring):
+ if len(opener) == 1 and len(closer)==1:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ ~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ raise ValueError("opening and closing arguments must be strings if no content expression is given")
+ ret = Forward()
+ if ignoreExpr is not None:
+ ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
+ else:
+ ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
+ return ret
+
+def indentedBlock(blockStatementExpr, indentStack, indent=True):
+ """Helper method for defining space-delimited indentation blocks, such as
+ those used to define block statements in Python source code.
+
+ Parameters:
+ - blockStatementExpr - expression defining syntax of statement that
+ is repeated within the indented block
+ - indentStack - list created by caller to manage indentation stack
+ (multiple statementWithIndentedBlock expressions within a single grammar
+ should share a common indentStack)
+ - indent - boolean indicating whether block must be indented beyond the
+ the current level; set to False for block of left-most statements
+ (default=True)
+
+ A valid block must contain at least one blockStatement.
+ """
+ def checkPeerIndent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if curCol != indentStack[-1]:
+ if curCol > indentStack[-1]:
+ raise ParseFatalException(s,l,"illegal nesting")
+ raise ParseException(s,l,"not a peer entry")
+
+ def checkSubIndent(s,l,t):
+ curCol = col(l,s)
+ if curCol > indentStack[-1]:
+ indentStack.append( curCol )
+ else:
+ raise ParseException(s,l,"not a subentry")
+
+ def checkUnindent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
+ raise ParseException(s,l,"not an unindent")
+ indentStack.pop()
+
+ NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
+ INDENT = Empty() + Empty().setParseAction(checkSubIndent)
+ PEER = Empty().setParseAction(checkPeerIndent)
+ UNDENT = Empty().setParseAction(checkUnindent)
+ if indent:
+ smExpr = Group( Optional(NL) +
+ FollowedBy(blockStatementExpr) +
+ INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
+ else:
+ smExpr = Group( Optional(NL) +
+ (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
+ blockStatementExpr.ignore(_bslash + LineEnd())
+ return smExpr
+
+alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
+punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
+
+anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
+commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
+replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
+
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
+
+htmlComment = Regex(r"<!--[\s\S]*?-->")
+restOfLine = Regex(r".*").leaveWhitespace()
+dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
+cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
+
+javaStyleComment = cppStyleComment
+pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+_noncomma = "".join( [ c for c in printables if c != "," ] )
+_commasepitem = Combine(OneOrMore(Word(_noncomma) +
+ Optional( Word(" \t") +
+ ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
+commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
+
+
+if __name__ == "__main__":
+
+ def test( teststring ):
+ try:
+ tokens = simpleSQL.parseString( teststring )
+ tokenlist = tokens.asList()
+ print (teststring + "->" + str(tokenlist))
+ print ("tokens = " + str(tokens))
+ print ("tokens.columns = " + str(tokens.columns))
+ print ("tokens.tables = " + str(tokens.tables))
+ print (tokens.asXML("SQL",True))
+ except ParseBaseException,err:
+ print (teststring + "->")
+ print (err.line)
+ print (" "*(err.column-1) + "^")
+ print (err)
+ print()
+
+ selectToken = CaselessLiteral( "select" )
+ fromToken = CaselessLiteral( "from" )
+
+ ident = Word( alphas, alphanums + "_$" )
+ columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
+ tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
+ simpleSQL = ( selectToken + \
+ ( '*' | columnNameList ).setResultsName( "columns" ) + \
+ fromToken + \
+ tableNameList.setResultsName( "tables" ) )
+
+ test( "SELECT * from XYZZY, ABC" )
+ test( "select * from SYS.XYZZY" )
+ test( "Select A from Sys.dual" )
+ test( "Select AA,BB,CC from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Xelect A, B, C from Sys.dual" )
+ test( "Select A, B, C frox Sys.dual" )
+ test( "Select" )
+ test( "Select ^^^ frox Sys.dual" )
+ test( "Select A, B, C from Sys.dual, Table2 " )
diff --git a/libmproxy/resources/bogus_template b/libmproxy/resources/bogus_template
new file mode 100644
index 00000000..afa7281c
--- /dev/null
+++ b/libmproxy/resources/bogus_template
@@ -0,0 +1,11 @@
+[ req ]
+prompt = no
+distinguished_name = req_distinguished_name
+
+[ req_distinguished_name ]
+C = NZ
+ST = none
+L = none
+O = none
+OU = none
+emailAddress = none
diff --git a/libmproxy/utils.py b/libmproxy/utils.py
new file mode 100644
index 00000000..82ddf8fc
--- /dev/null
+++ b/libmproxy/utils.py
@@ -0,0 +1,277 @@
+# Copyright (C) 2010 Aldo Cortesi
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import re, os, subprocess
+
+def isBin(s):
+ """
+ Does this string have any non-ASCII characters?
+ """
+ for i in s:
+ i = ord(i)
+ if i < 9:
+ return True
+ elif i > 13 and i < 32:
+ return True
+ elif i > 126:
+ return True
+ return False
+
+
+def cleanBin(s):
+ parts = []
+ for i in s:
+ o = ord(i)
+ if o > 31 and o < 127:
+ parts.append(i)
+ else:
+ parts.append(".")
+ return "".join(parts)
+
+
+def hexdump(s):
+ """
+ Returns a set of typles:
+ (offset, hex, str)
+ """
+ parts = []
+ for i in range(0, len(s), 16):
+ o = "%.10x"%i
+ part = s[i:i+16]
+ x = " ".join(["%.2x"%ord(i) for i in part])
+ if len(part) < 16:
+ x += " "
+ x += " ".join([" " for i in range(16-len(part))])
+ parts.append(
+ (o, x, cleanBin(part))
+ )
+ return parts
+
+
+def isStringLike(anobj):
+ try:
+ # Avoid succeeding expensively if anobj is large.
+ anobj[:0]+''
+ except:
+ return 0
+ else:
+ return 1
+
+
+def isSequenceLike(anobj):
+ """
+ Is anobj a non-string sequence type (list, tuple, iterator, or
+ similar)? Crude, but mostly effective.
+ """
+ if not hasattr(anobj, "next"):
+ if isStringLike(anobj):
+ return 0
+ try:
+ anobj[:0]
+ except:
+ return 0
+ return 1
+
+
+def _caseless(s):
+ return s.lower()
+
+
+class MultiDict:
+ """
+ Simple wrapper around a dictionary to make holding multiple objects per
+ key easier.
+
+ Note that this class assumes that keys are strings.
+
+ Keys have no order, but the order in which values are added to a key is
+ preserved.
+ """
+ # This ridiculous bit of subterfuge is needed to prevent the class from
+ # treating this as a bound method.
+ _helper = (str,)
+ def __init__(self):
+ self._d = dict()
+
+ def copy(self):
+ m = self.__class__()
+ m._d = self._d.copy()
+ return m
+
+ def clear(self):
+ return self._d.clear()
+
+ def get(self, key, d=None):
+ key = self._helper[0](key)
+ return self._d.get(key, d)
+
+ def __eq__(self, other):
+ return dict(self) == dict(other)
+
+ def __delitem__(self, key):
+ self._d.__delitem__(key)
+
+ def __getitem__(self, key):
+ key = self._helper[0](key)
+ return self._d.__getitem__(key)
+
+ def __setitem__(self, key, value):
+ if not isSequenceLike(value):
+ raise ValueError, "Cannot insert non-sequence."
+ key = self._helper[0](key)
+ return self._d.__setitem__(key, value)
+
+ def has_key(self, key):
+ key = self._helper[0](key)
+ return self._d.has_key(key)
+
+ def keys(self):
+ return self._d.keys()
+
+ def extend(self, key, value):
+ if not self.has_key(key):
+ self[key] = []
+ self[key].extend(value)
+
+ def append(self, key, value):
+ self.extend(key, [value])
+
+ def itemPairs(self):
+ """
+ Yield all possible pairs of items.
+ """
+ for i in self.keys():
+ for j in self[i]:
+ yield (i, j)
+
+
+class Headers(MultiDict):
+ """
+ A dictionary-like class for keeping track of HTTP headers.
+
+ It is case insensitive, and __repr__ formats the headers correcty for
+ output to the server.
+ """
+ _helper = (_caseless,)
+ def __repr__(self):
+ """
+ Returns a string containing a formatted header string.
+ """
+ headerElements = []
+ for key in self.keys():
+ for val in self[key]:
+ headerElements.append(key + ": " + val)
+ headerElements.append("")
+ return "\r\n".join(headerElements)
+
+ def match_re(self, expr):
+ """
+ Match the regular expression against each header (key, value) pair.
+ """
+ for k, v in self.itemPairs():
+ s = "%s: %s"%(k, v)
+ if re.search(expr, s):
+ return True
+ return False
+
+ def read(self, fp):
+ """
+ Read a set of headers from a file pointer. Stop once a blank line
+ is reached.
+ """
+ name = ''
+ while 1:
+ line = fp.readline()
+ if not line or line == '\r\n' or line == '\n':
+ break
+ if line[0] in ' \t':
+ # continued header
+ self[name][-1] = self[name][-1] + '\r\n ' + line.strip()
+ else:
+ i = line.find(':')
+ # We're being liberal in what we accept, here.
+ if i > 0:
+ name = line[:i]
+ value = line[i+1:].strip()
+ if self.has_key(name):
+ # merge value
+ self.append(name, value)
+ else:
+ self[name] = [value]
+
+
+def pretty_size(size):
+ suffixes = [
+ ("B", 2**10),
+ ("kB", 2**20),
+ ("M", 2**30),
+ ]
+ for suf, lim in suffixes:
+ if size >= lim:
+ continue
+ else:
+ x = round(size/float(lim/2**10), 2)
+ if x == int(x):
+ x = int(x)
+ return str(x) + suf
+
+
+class Data:
+ def __init__(self, name):
+ m = __import__(name)
+ dirname, _ = os.path.split(m.__file__)
+ self.dirname = os.path.abspath(dirname)
+
+ def path(self, path):
+ """
+ Returns a path to the package data housed at 'path' under this
+ module.Path can be a path to a file, or to a directory.
+
+ This function will raise ValueError if the path does not exist.
+ """
+ fullpath = os.path.join(self.dirname, path)
+ if not os.path.exists(fullpath):
+ raise ValueError, "dataPath: %s does not exist."%fullpath
+ return fullpath
+data = Data(__name__)
+
+
+def make_bogus_cert(path):
+ # Generates a bogus certificate like so:
+ # openssl req -config template -x509 -nodes -days 9999 -newkey rsa:1024 \
+ # -keyout cert.pem -out cert.pem
+
+ d = os.path.dirname(path)
+ if not os.path.exists(d):
+ os.makedirs(d)
+
+ cmd = [
+ "openssl",
+ "req",
+ "-config", data.path("resources/bogus_template"),
+ "-x509" ,
+ "-nodes",
+ "-days", "9999",
+ "-newkey", "rsa:1024",
+ "-keyout", path,
+ "-out", path,
+ ]
+ subprocess.call(
+ cmd,
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stdin=subprocess.PIPE
+ )
+
diff --git a/mitmproxy b/mitmproxy
new file mode 100755
index 00000000..692d7bc6
--- /dev/null
+++ b/mitmproxy
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 Aldo Cortesi
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import sys, os.path
+from libmproxy import proxy, controller, console, utils
+from optparse import OptionParser, OptionGroup
+
+
+if __name__ == '__main__':
+ parser = OptionParser(
+ usage = "%prog [options] output",
+ version="%prog 0.1",
+ )
+ parser.add_option(
+ "-d", "--dump", action="store_true",
+ dest="dump", default=False,
+ help = "Just dump data to screen."
+ )
+ parser.add_option(
+ "-c", "--cert", action="store",
+ type = "str", dest="cert", default="~/.mitmproxy/cert.pem",
+ help = "SSL certificate file."
+ )
+ parser.add_option(
+ "-p", "--port", action="store",
+ type = "int", dest="port", default=8080,
+ help = "Port."
+ )
+ parser.add_option("-q", "--quiet",
+ action="store_true", dest="quiet",
+ help="Quiet.")
+ parser.add_option("-v", "--verbose",
+ action="count", dest="verbose", default=1,
+ help="Increase verbosity. Can be passed multiple times.")
+ options, args = parser.parse_args()
+
+ if options.quiet:
+ options.verbose = 0
+
+ certpath = os.path.expanduser(options.cert)
+
+ if not os.path.exists(certpath):
+ print >> sys.stderr, "Creating bogus certificate at %s"%options.cert
+ utils.make_bogus_cert(certpath)
+
+ proxy.config = proxy.Config(
+ certpath
+ )
+ server = proxy.ProxyServer(options.port)
+ if options.dump:
+ m = controller.DumpMaster(server, options.verbose)
+ else:
+ m = console.ConsoleMaster(server, options.verbose)
+ if options.verbose > 0:
+ print >> sys.stderr, "Running on port %s"%options.port
+ m.run()
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..45728608
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,97 @@
+from distutils.core import setup
+import fnmatch, os.path
+
+def _fnmatch(name, patternList):
+ for i in patternList:
+ if fnmatch.fnmatch(name, i):
+ return True
+ return False
+
+
+def _splitAll(path):
+ parts = []
+ h = path
+ while 1:
+ if not h:
+ break
+ h, t = os.path.split(h)
+ parts.append(t)
+ parts.reverse()
+ return parts
+
+
+def findPackages(path, dataExclude=[]):
+ """
+ Recursively find all packages and data directories rooted at path. Note
+ that only data _directories_ and their contents are returned -
+ non-Python files at module scope are not, and should be manually
+ included.
+
+ dataExclude is a list of fnmatch-compatible expressions for files and
+ directories that should not be included in pakcage_data.
+
+ Returns a (packages, package_data) tuple, ready to be passed to the
+ corresponding distutils.core.setup arguments.
+ """
+ packages = []
+ datadirs = []
+ for root, dirs, files in os.walk(path, topdown=True):
+ if "__init__.py" in files:
+ p = _splitAll(root)
+ packages.append(".".join(p))
+ else:
+ dirs[:] = []
+ if packages:
+ datadirs.append(root)
+
+ # Now we recurse into the data directories
+ package_data = {}
+ for i in datadirs:
+ if not _fnmatch(i, dataExclude):
+ parts = _splitAll(i)
+ module = ".".join(parts[:-1])
+ acc = package_data.get(module, [])
+ for root, dirs, files in os.walk(i, topdown=True):
+ sub = os.path.join(*_splitAll(root)[1:])
+ if not _fnmatch(sub, dataExclude):
+ for fname in files:
+ path = os.path.join(sub, fname)
+ if not _fnmatch(path, dataExclude):
+ acc.append(path)
+ else:
+ dirs[:] = []
+ package_data[module] = acc
+ return packages, package_data
+
+
+
+
+long_description = """
+A man-in-the-middle intercepting proxy written in Python.
+
+Features
+========
+
+ * Intercept HTTP and HTTPS traffic.
+ * Modify, manipulate and replay requests and responses on the fly.
+"""
+packages, package_data = findPackages("libmproxy")
+print packages, package_data
+version = "0.1"
+setup(
+ name = "mitmproxy",
+ version = version,
+ description = "An interactive intercepting proxy server.",
+ long_description = long_description,
+ author = "Aldo Cortesi",
+ author_email = "aldo@corte.si",
+ url = "http://corte.si/software/mitmproxy",
+ packages = packages,
+ package_data = package_data,
+ scripts = ["mitmproxy"],
+ classifiers = [
+ "Development Status :: 4 - Beta",
+ "Programming Language :: Python",
+ "Operating System :: OS Independent",
+ ]
+)
diff --git a/test/.pry b/test/.pry
new file mode 100644
index 00000000..0e4b2e23
--- /dev/null
+++ b/test/.pry
@@ -0,0 +1,5 @@
+base = ..
+coverage = ../libmproxy
+exclude = ../libmproxy/pyparsing.py
+ .
+
diff --git a/test/data/serverkey.pem b/test/data/serverkey.pem
new file mode 100644
index 00000000..289bfa71
--- /dev/null
+++ b/test/data/serverkey.pem
@@ -0,0 +1,32 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIICXQIBAAKBgQC+N+9bv1YC0GKbGdv2wMuuWTGSNwE/Hq5IIxYN1eITsvbD1GgB
+69x++XJd6KTIthnta0KCpCAtbaYbCkhUfxCVv2bP+iQt2AjwMOZlgRZ+RGJ25dBu
+AjAxQmqDJcAdS6MoRHWziomnUNfNogVrfqjpvJor+1iRnrj2q00ab9WYCwIDAQAB
+AoGBAIM7V9l2UcKzPbQ/zO+Z52urgXWcmTGQ2zBNdIOrEcQBbhmAyxi4PnEja3G6
+dSU77PtNSp+S19g/k5+IIoqY9zkGigdaPhRVRKJgBTAzFzMz+WHpQIffDojFKCnL
+gyDnzMRJY8+cnsCqbHRY4hqFiCr8Rq9sCdlynAytdtrnxzqhAkEA9bha6MO+L0JA
+6IEEbVY1vtaUO9Xg5DUDjRxQcfniSJACb/2IvF0tvxAnG7I/S8AavCXqtlDPtYkI
+WOxY5Sd62QJBAMYtKUxGka4XxwCyBK8EUNaN8m9C++mpjoHD1kFri9B1bXm91nCO
+iGWqtqdarwyEc/pAHw5UGzVyBXticPIcs4MCQQCcPvsHsZhYoq91aLyw7bXFQNsH
+ZUvYsOEuNIfuwa+i5ne2UKhG5pU1PgcwNFrNRz140D98aMx7KcS2DqvEIyOZAkBF
+6Yi4L+0Uza6WwDaGx679AfaU6byVIgv0G3JqgdZBJCwK1r3f12im9SKax5MZh2Ci
+2Bwcoe83W5IzhPbzcsyhAkBo8O2U2vig5PQWQ0BUKJrCGHLq//D/ttdLVtmc6eWc
+zqssCF3Unkk3bOq35swSKeAx8WotPPVsALWr87N2hCB+
+-----END RSA PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIICsDCCAhmgAwIBAgIJANwogM9sqMHLMA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV
+BAYTAkFVMRMwEQYDVQQIEwpTb21lLVN0YXRlMSEwHwYDVQQKExhJbnRlcm5ldCBX
+aWRnaXRzIFB0eSBMdGQwHhcNMTAwMTMxMDEzOTEzWhcNMTEwMTMxMDEzOTEzWjBF
+MQswCQYDVQQGEwJBVTETMBEGA1UECBMKU29tZS1TdGF0ZTEhMB8GA1UEChMYSW50
+ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKB
+gQC+N+9bv1YC0GKbGdv2wMuuWTGSNwE/Hq5IIxYN1eITsvbD1GgB69x++XJd6KTI
+thnta0KCpCAtbaYbCkhUfxCVv2bP+iQt2AjwMOZlgRZ+RGJ25dBuAjAxQmqDJcAd
+S6MoRHWziomnUNfNogVrfqjpvJor+1iRnrj2q00ab9WYCwIDAQABo4GnMIGkMB0G
+A1UdDgQWBBTTnBZyw7ZZsb8+/6gvZFIHhVgtDzB1BgNVHSMEbjBsgBTTnBZyw7ZZ
+sb8+/6gvZFIHhVgtD6FJpEcwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgTClNvbWUt
+U3RhdGUxITAfBgNVBAoTGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZIIJANwogM9s
+qMHLMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEApz428aOar0EBuAib
+I+liefRlK4I3MQQxq3tOeB1dgAIo0ivKtdVJGi1kPg8EO0KMvFfn6IRtssUmFgCp
+JBD+HoDzFxwI1bLMVni+g7OzaNSwL3nQ94lZUdpWMYDxqY4bLUv3goX1TlN9lmpG
+8FiBLYUC0RNTCCRDFGfDr/wUT/M=
+-----END CERTIFICATE-----
diff --git a/test/data/testkey.pem b/test/data/testkey.pem
new file mode 100644
index 00000000..af8d9d8f
--- /dev/null
+++ b/test/data/testkey.pem
@@ -0,0 +1,32 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIICXQIBAAKBgQC+6rG6A/BGD0dI+mh2FZIqQZn82z/pGs4f3pyxbHb+ROxjjQOr
+fDCw2jc11XDxK7CXpDQAnkO6au/sQ5t50vSZ+PGhFD+t558VV2ausB5OYZsR7RRx
+gl1jsxWdde3EHGjxSK+aXRgFpVrZzPLSy6dl8tMoqUMWIBi0u1WTbmyYjwIDAQAB
+AoGBAKyqhmK9/Sjf2JDgKGnjyHX/Ls3JXVvtqk6Yfw7YEiaVH1ZJyu/lOgQ414YQ
+rDzyTpxXHdERUh/fZ24/FvZvHFgy5gWEQjQPpprIxvqCLKJhX73L2+TnXmfYDApb
+J7V/JfnTeOaK9LTpHsofB98A1s9DWX/ccOgKTtZIYMjYpdoBAkEA9hLvtixbO2A2
+ZgDcA9ftVX2WwdpRH+mYXl1G60Fem5nlO3Rl3FDoafRvSQNZiqyOlObvKbbYh/S2
+L7ihEMMNYQJBAMaeLnAc9jO/z4ApTqSBGUpM9b7ul16aSgq56saUI0VULIZcXeo3
+3BwdL2fEOOnzjNy6NpH2BW63h/+2t7lV++8CQQDK+S+1Sr0uKtx0Iv1YRkHEJMW3
+vQbxldNS8wnOf6s0GisVcZubsTkkPLWWuiaf1ln9xMc9106gRmAI2PgyRVHBAkA6
+iI+C9uYP5i1Oxd2pWWqMnRWnSUVO2gWMF7J7B1lFq0Lb7gi3Z/L0Th2UZR2oxN/0
+hORkK676LBhmYgDPG+n9AkAJOnPIFQVAEBAO9bAxFrje8z6GRt332IlgxuiTeDE3
+EAlH9tmZma4Tri4sWnhJwCsxl+5hWamI8NL4EIeXRvPw
+-----END RSA PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIICsDCCAhmgAwIBAgIJAI7G7a/d5YwEMA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV
+BAYTAkFVMRMwEQYDVQQIEwpTb21lLVN0YXRlMSEwHwYDVQQKExhJbnRlcm5ldCBX
+aWRnaXRzIFB0eSBMdGQwHhcNMTAwMjAyMDM0MTExWhcNMTEwMjAyMDM0MTExWjBF
+MQswCQYDVQQGEwJBVTETMBEGA1UECBMKU29tZS1TdGF0ZTEhMB8GA1UEChMYSW50
+ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKB
+gQC+6rG6A/BGD0dI+mh2FZIqQZn82z/pGs4f3pyxbHb+ROxjjQOrfDCw2jc11XDx
+K7CXpDQAnkO6au/sQ5t50vSZ+PGhFD+t558VV2ausB5OYZsR7RRxgl1jsxWdde3E
+HGjxSK+aXRgFpVrZzPLSy6dl8tMoqUMWIBi0u1WTbmyYjwIDAQABo4GnMIGkMB0G
+A1UdDgQWBBS+MFJTsriCPNYsj8/4f+PympPEkzB1BgNVHSMEbjBsgBS+MFJTsriC
+PNYsj8/4f+PympPEk6FJpEcwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgTClNvbWUt
+U3RhdGUxITAfBgNVBAoTGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZIIJAI7G7a/d
+5YwEMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEAlpan/QX2fpXVRihV
+lQic2DktF4xd5unrZnFC8X8ScNX1ClU+AO79ejaobt4YGjeVYs0iQQsUL2E0G43c
+mOXfsq1b970Ep6xRS76EmZ+tTdFBd86tFTIhZJrOi67gs+twj5V2elyp3tQpg2ze
+G/jwDQS8V1X9CbfqBQriL7x5Tk4=
+-----END CERTIFICATE-----
diff --git a/test/handler.py b/test/handler.py
new file mode 100644
index 00000000..5803b4d1
--- /dev/null
+++ b/test/handler.py
@@ -0,0 +1,25 @@
+import socket
+from BaseHTTPServer import BaseHTTPRequestHandler
+
+
+class TestRequestHandler(BaseHTTPRequestHandler):
+ default_request_version = "HTTP/1.1"
+ def setup(self):
+ self.connection = self.request
+ self.rfile = socket._fileobject(self.request, "rb", self.rbufsize)
+ self.wfile = socket._fileobject(self.request, "wb", self.wbufsize)
+
+ def log_message(self, *args, **kwargs):
+ pass
+
+ def do_GET(self):
+ data = "data: %s\npath: %s\n"%(self.headers, self.path)
+ self.send_response(200)
+ self.send_header("proxtest", "testing")
+ self.send_header("Content-type", "text-html")
+ self.send_header("Content-length", len(data))
+ self.end_headers()
+ self.wfile.write(data)
+
+
+
diff --git a/test/serv.py b/test/serv.py
new file mode 100644
index 00000000..9e43c08f
--- /dev/null
+++ b/test/serv.py
@@ -0,0 +1,10 @@
+import socket, os, cStringIO, tempfile
+from SocketServer import BaseServer
+from BaseHTTPServer import HTTPServer
+import handler
+
+def make(port):
+ server_address = ('', port)
+ return HTTPServer(server_address, handler.TestRequestHandler)
+
+
diff --git a/test/sslserv.py b/test/sslserv.py
new file mode 100644
index 00000000..5153d0da
--- /dev/null
+++ b/test/sslserv.py
@@ -0,0 +1,22 @@
+import socket, os, cStringIO, tempfile
+from SocketServer import BaseServer
+from BaseHTTPServer import HTTPServer
+import ssl
+import handler
+
+
+class SecureHTTPServer(HTTPServer):
+ def __init__(self, server_address, HandlerClass):
+ BaseServer.__init__(self, server_address, HandlerClass)
+ self.socket = ssl.wrap_socket(
+ socket.socket(self.address_family, self.socket_type),
+ keyfile = "data/serverkey.pem",
+ certfile = "data/serverkey.pem"
+ )
+ self.server_bind()
+ self.server_activate()
+
+
+def make(port):
+ server_address = ('', port)
+ return SecureHTTPServer(server_address, handler.TestRequestHandler)
diff --git a/test/test_console.py b/test/test_console.py
new file mode 100644
index 00000000..50780aa5
--- /dev/null
+++ b/test/test_console.py
@@ -0,0 +1,269 @@
+from libmproxy import console, proxy, utils, filt
+import libpry
+
+def treq(conn=None):
+ if not conn:
+ conn = proxy.BrowserConnection("address", 22)
+ headers = utils.Headers()
+ headers["header"] = ["qvalue"]
+ return proxy.Request(conn, "host", 80, "http", "GET", "/path", headers, "content")
+
+
+def tresp(req=None):
+ if not req:
+ req = treq()
+ headers = utils.Headers()
+ headers["header_response"] = ["svalue"]
+ return proxy.Response(req, 200, "HTTP/1.1", "message", headers, "content_response")
+
+
+def tflow():
+ bc = proxy.BrowserConnection("address", 22)
+ return console.Flow(bc)
+
+
+class uState(libpry.AutoTree):
+ def test_backup(self):
+ bc = proxy.BrowserConnection("address", 22)
+ c = console.State()
+ f = console.Flow(bc)
+ c.add_browserconnect(f)
+
+ f.backup()
+ c.revert(f)
+
+ def test_flow(self):
+ """
+ normal flow:
+
+ connect -> request -> response
+ """
+ bc = proxy.BrowserConnection("address", 22)
+ c = console.State()
+ f = console.Flow(bc)
+ c.add_browserconnect(f)
+ assert c.lookup(bc)
+ assert c.get_focus() == (f, 0)
+
+ req = treq(bc)
+ assert c.add_request(req)
+ assert len(c.flow_list) == 1
+ assert c.lookup(req)
+
+ newreq = treq()
+ assert not c.add_request(newreq)
+ assert not c.lookup(newreq)
+
+ resp = tresp(req)
+ assert c.add_response(resp)
+ assert len(c.flow_list) == 1
+ assert f.waiting == False
+ assert c.lookup(resp)
+
+ newresp = tresp()
+ assert not c.add_response(newresp)
+ assert not c.lookup(newresp)
+
+ def test_err(self):
+ bc = proxy.BrowserConnection("address", 22)
+ c = console.State()
+ f = console.Flow(bc)
+ c.add_browserconnect(f)
+ e = proxy.Error(bc, "message")
+ assert c.add_error(e)
+
+ e = proxy.Error(proxy.BrowserConnection("address", 22), "message")
+ assert not c.add_error(e)
+
+ def test_view(self):
+ c = console.State()
+
+ f = tflow()
+ c.add_browserconnect(f)
+ assert len(c.view) == 1
+ c.set_limit(filt.parse("~q"))
+ assert len(c.view) == 0
+ c.set_limit(None)
+
+
+ f = tflow()
+ req = treq(f.connection)
+ c.add_browserconnect(f)
+ c.add_request(req)
+ assert len(c.view) == 2
+ c.set_limit(filt.parse("~q"))
+ assert len(c.view) == 1
+ c.set_limit(filt.parse("~s"))
+ assert len(c.view) == 0
+
+ def test_focus(self):
+ """
+ normal flow:
+
+ connect -> request -> response
+ """
+ c = console.State()
+
+ bc = proxy.BrowserConnection("address", 22)
+ f = console.Flow(bc)
+ c.add_browserconnect(f)
+ assert c.get_focus() == (f, 0)
+ assert c.get_from_pos(0) == (f, 0)
+ assert c.get_from_pos(1) == (None, None)
+ assert c.get_next(0) == (None, None)
+
+ bc2 = proxy.BrowserConnection("address", 22)
+ f2 = console.Flow(bc2)
+ c.add_browserconnect(f2)
+ assert c.get_focus() == (f, 1)
+ assert c.get_next(0) == (f, 1)
+ assert c.get_prev(1) == (f2, 0)
+ assert c.get_next(1) == (None, None)
+
+ c.set_focus(0)
+ assert c.get_focus() == (f2, 0)
+ c.set_focus(-1)
+ assert c.get_focus() == (f2, 0)
+
+ c.delete_flow(f2)
+ assert c.get_focus() == (f, 0)
+ c.delete_flow(f)
+ assert c.get_focus() == (None, None)
+
+ def _add_request(self, state):
+ f = tflow()
+ state.add_browserconnect(f)
+ q = treq(f.connection)
+ state.add_request(q)
+ return f
+
+ def _add_response(self, state):
+ f = self._add_request(state)
+ r = tresp(f.request)
+ state.add_response(r)
+
+ def test_focus_view(self):
+ c = console.State()
+ self._add_request(c)
+ self._add_response(c)
+ self._add_request(c)
+ self._add_response(c)
+ self._add_request(c)
+ self._add_response(c)
+ c.set_limit(filt.parse("~q"))
+ assert len(c.view) == 3
+ assert c.focus == 2
+
+ def test_delete_last(self):
+ c = console.State()
+ f1 = tflow()
+ f2 = tflow()
+ c.add_browserconnect(f1)
+ c.add_browserconnect(f2)
+ c.set_focus(1)
+ c.delete_flow(f1)
+ assert c.focus == 0
+
+ def test_kill_flow(self):
+ c = console.State()
+ f = tflow()
+ c.add_browserconnect(f)
+ c.kill_flow(f)
+ assert not c.flow_list
+
+ def test_clear(self):
+ c = console.State()
+ f = tflow()
+ c.add_browserconnect(f)
+ f.intercepting = True
+
+ c.clear()
+ assert len(c.flow_list) == 1
+ f.intercepting = False
+ c.clear()
+ assert len(c.flow_list) == 0
+
+
+class uFlow(libpry.AutoTree):
+ def test_match(self):
+ f = tflow()
+ f.response = tresp()
+ f.request = f.response.request
+ assert not f.match(filt.parse("~b test"))
+
+ def test_backup(self):
+ f = tflow()
+ f.backup()
+ f.revert()
+
+ def test_simple(self):
+ f = tflow()
+ assert f.get_text()
+
+ f.request = treq()
+ assert f.get_text()
+
+ f.response = tresp()
+ f.response.headers["content-type"] = ["text/html"]
+ assert f.get_text()
+ f.response.code = 404
+ assert f.get_text()
+
+ f.focus = True
+ assert f.get_text()
+
+ f.connection = console.ReplayConnection()
+ assert f.get_text()
+
+ f.response = None
+ assert f.get_text()
+
+ f.error = proxy.Error(200, "test")
+ assert f.get_text()
+
+ def test_kill(self):
+ f = tflow()
+ f.request = treq()
+ f.intercept()
+ assert not f.request.acked
+ f.kill()
+ assert f.request.acked
+ f.intercept()
+ f.response = tresp()
+ f.request = f.response.request
+ f.request.ack()
+ assert not f.response.acked
+ f.kill()
+ assert f.response.acked
+
+ def test_accept_intercept(self):
+ f = tflow()
+ f.request = treq()
+ f.intercept()
+ assert not f.request.acked
+ f.accept_intercept()
+ assert f.request.acked
+ f.response = tresp()
+ f.request = f.response.request
+ f.intercept()
+ f.request.ack()
+ assert not f.response.acked
+ f.accept_intercept()
+ assert f.response.acked
+
+
+class uformat_keyvals(libpry.AutoTree):
+ def test_simple(self):
+ assert console.format_keyvals(
+ [
+ ("aa", "bb"),
+ ("cc", "dd"),
+ ]
+ )
+
+
+tests = [
+ uFlow(),
+ uformat_keyvals(),
+ uState()
+]
diff --git a/test/test_filt.py b/test/test_filt.py
new file mode 100644
index 00000000..3cf0f6cd
--- /dev/null
+++ b/test/test_filt.py
@@ -0,0 +1,220 @@
+import cStringIO
+from libmproxy import filt, proxy, utils
+import libpry
+
+
+class uParsing(libpry.AutoTree):
+ def _dump(self, x):
+ c = cStringIO.StringIO()
+ x.dump(fp=c)
+ assert c.getvalue()
+
+ def test_simple(self):
+ assert not filt.parse("~b")
+ assert filt.parse("~q")
+ assert filt.parse("~c 10")
+ assert filt.parse("~u foobar")
+ assert filt.parse("~q ~c 10")
+ p = filt.parse("~q ~c 10")
+ self._dump(p)
+ assert len(p.lst) == 2
+
+ def test_naked_url(self):
+ #a = filt.parse("foobar")
+ #assert a.lst[0].expr == "foobar"
+
+ a = filt.parse("foobar ~h rex")
+ assert a.lst[0].expr == "foobar"
+ assert a.lst[1].expr == "rex"
+ self._dump(a)
+
+ def test_quoting(self):
+ a = filt.parse("~u 'foo ~u bar' ~u voing")
+ assert a.lst[0].expr == "foo ~u bar"
+ assert a.lst[1].expr == "voing"
+ self._dump(a)
+
+ a = filt.parse("~u foobar")
+ assert a.expr == "foobar"
+
+ a = filt.parse(r"~u 'foobar\"\''")
+ assert a.expr == "foobar\"'"
+
+ a = filt.parse(r'~u "foo \'bar"')
+ assert a.expr == "foo 'bar"
+
+ def test_nesting(self):
+ a = filt.parse("(~u foobar & ~h voing)")
+ assert a.lst[0].expr == "foobar"
+ self._dump(a)
+
+ def test_not(self):
+ a = filt.parse("!~h test")
+ assert a.itm.expr == "test"
+ a = filt.parse("!(~u test & ~h bar)")
+ assert a.itm.lst[0].expr == "test"
+ self._dump(a)
+
+ def test_binaryops(self):
+ a = filt.parse("~u foobar | ~h voing")
+ isinstance(a, filt.FOr)
+ self._dump(a)
+
+ a = filt.parse("~u foobar & ~h voing")
+ isinstance(a, filt.FAnd)
+ self._dump(a)
+
+ def test_wideops(self):
+ a = filt.parse("~hq 'header: qvalue'")
+ assert isinstance(a, filt.FHeadRequest)
+ self._dump(a)
+
+
+class uMatching(libpry.AutoTree):
+ def req(self):
+ conn = proxy.BrowserConnection("one", 2222)
+ headers = utils.Headers()
+ headers["header"] = ["qvalue"]
+ return proxy.Request(
+ conn,
+ "host",
+ 80,
+ "http",
+ "GET",
+ "/path",
+ headers,
+ "content_request"
+ )
+
+ def resp(self):
+ q = self.req()
+ headers = utils.Headers()
+ headers["header_response"] = ["svalue"]
+ return proxy.Response(
+ q,
+ 200,
+ "HTTP/1.1",
+ "message",
+ headers,
+ "content_response"
+ )
+
+ def q(self, q, o):
+ return filt.parse(q)(o)
+
+ def test_fcontenttype(self):
+ q = self.req()
+ s = self.resp()
+ assert not self.q("~t content", q)
+ assert not self.q("~t content", s)
+
+ q.headers["content-type"] = ["text/json"]
+ assert self.q("~t json", q)
+ assert self.q("~tq json", q)
+ assert not self.q("~ts json", q)
+
+ s.headers["content-type"] = ["text/json"]
+ assert self.q("~t json", s)
+
+ del s.headers["content-type"]
+ s.request.headers["content-type"] = ["text/json"]
+ assert self.q("~t json", s)
+ assert self.q("~tq json", s)
+ assert not self.q("~ts json", s)
+
+ def test_freq_fresp(self):
+ q = self.req()
+ s = self.resp()
+
+ assert self.q("~q", q)
+ assert not self.q("~q", s)
+
+ assert not self.q("~s", q)
+ assert self.q("~s", s)
+
+ def test_head(self):
+ q = self.req()
+ s = self.resp()
+ assert not self.q("~h nonexistent", q)
+ assert self.q("~h qvalue", q)
+ assert self.q("~h header", q)
+ assert self.q("~h 'header: qvalue'", q)
+
+ assert self.q("~h 'header: qvalue'", s)
+ assert self.q("~h 'header_response: svalue'", s)
+
+ assert self.q("~hq 'header: qvalue'", s)
+ assert not self.q("~hq 'header_response: svalue'", s)
+
+ assert self.q("~hq 'header: qvalue'", q)
+ assert not self.q("~hq 'header_request: svalue'", q)
+
+ assert not self.q("~hs 'header: qvalue'", s)
+ assert self.q("~hs 'header_response: svalue'", s)
+ assert not self.q("~hs 'header: qvalue'", q)
+
+ def test_body(self):
+ q = self.req()
+ s = self.resp()
+ assert not self.q("~b nonexistent", q)
+ assert self.q("~b content", q)
+ assert self.q("~b response", s)
+ assert self.q("~b content_request", s)
+
+ assert self.q("~bq content", q)
+ assert self.q("~bq content", s)
+ assert not self.q("~bq response", q)
+ assert not self.q("~bq response", s)
+
+ assert not self.q("~bs content", q)
+ assert self.q("~bs content", s)
+ assert not self.q("~bs nomatch", s)
+ assert not self.q("~bs response", q)
+ assert self.q("~bs response", s)
+
+ def test_url(self):
+ q = self.req()
+ s = self.resp()
+ assert self.q("~u host", q)
+ assert self.q("~u host/path", q)
+ assert not self.q("~u moo/path", q)
+
+ assert self.q("~u host", s)
+ assert self.q("~u host/path", s)
+ assert not self.q("~u moo/path", s)
+
+ def test_code(self):
+ q = self.req()
+ s = self.resp()
+ assert not self.q("~c 200", q)
+ assert self.q("~c 200", s)
+ assert not self.q("~c 201", s)
+
+ def test_and(self):
+ s = self.resp()
+ assert self.q("~c 200 & ~h head", s)
+ assert not self.q("~c 200 & ~h nohead", s)
+ assert self.q("(~c 200 & ~h head) & ~b content", s)
+ assert not self.q("(~c 200 & ~h head) & ~b nonexistent", s)
+ assert not self.q("(~c 200 & ~h nohead) & ~b content", s)
+
+ def test_or(self):
+ s = self.resp()
+ assert self.q("~c 200 | ~h nohead", s)
+ assert self.q("~c 201 | ~h head", s)
+ assert not self.q("~c 201 | ~h nohead", s)
+ assert self.q("(~c 201 | ~h nohead) | ~s", s)
+ assert not self.q("(~c 201 | ~h nohead) | ~q", s)
+
+ def test_not(self):
+ s = self.resp()
+ assert not self.q("! ~c 200", s)
+ assert self.q("! ~c 201", s)
+ assert self.q("!~c 201 !~c 202", s)
+ assert not self.q("!~c 201 !~c 200", s)
+
+
+tests = [
+ uMatching(),
+ uParsing()
+]
diff --git a/test/test_proxy.py b/test/test_proxy.py
new file mode 100644
index 00000000..90cfbbfb
--- /dev/null
+++ b/test/test_proxy.py
@@ -0,0 +1,259 @@
+import threading, urllib, Queue, urllib2, cStringIO
+import libpry
+import serv, sslserv
+from libmproxy import proxy, controller, utils
+import random
+
+# Yes, the random ports are horrible. During development, sockets are often not
+# properly closed during error conditions, which means you have to wait until
+# you can re-bind to the same port. This is a pain in the ass, so we just pick
+# a random port and keep moving.
+PROXL_PORT = random.randint(10000, 20000)
+HTTP_PORT = random.randint(20000, 30000)
+HTTPS_PORT = random.randint(30000, 40000)
+
+
+class TestMaster(controller.Master):
+ def __init__(self, port, testq):
+ serv = proxy.ProxyServer(port)
+ controller.Master.__init__(self, serv)
+ self.testq = testq
+ self.log = []
+
+ def clear(self):
+ self.log = []
+
+ def handle(self, m):
+ self.log.append(m)
+ m.ack()
+
+
+class ProxyThread(threading.Thread):
+ def __init__(self, port, testq):
+ self.tmaster = TestMaster(port, testq)
+ threading.Thread.__init__(self)
+
+ def run(self):
+ self.tmaster.run()
+
+ def shutdown(self):
+ self.tmaster.shutdown()
+
+
+class ServerThread(threading.Thread):
+ def __init__(self, server):
+ self.server = server
+ threading.Thread.__init__(self)
+
+ def run(self):
+ self.server.serve_forever()
+
+ def shutdown(self):
+ self.server.shutdown()
+
+
+class _TestServers(libpry.TestContainer):
+ def setUpAll(self):
+ proxy.config = proxy.Config("data/testkey.pem")
+ self.tqueue = Queue.Queue()
+ # We don't make any concurrent requests, so we can access
+ # the attributes on this object safely.
+ self.proxthread = ProxyThread(PROXL_PORT, self.tqueue)
+ self.threads = [
+ ServerThread(serv.make(HTTP_PORT)),
+ ServerThread(sslserv.make(HTTPS_PORT)),
+ self.proxthread
+ ]
+ for i in self.threads:
+ i.start()
+
+ def setUp(self):
+ self.proxthread.tmaster.clear()
+
+ def tearDownAll(self):
+ for i in self.threads:
+ i.shutdown()
+
+
+class _ProxTests(libpry.AutoTree):
+ def log(self):
+ pthread = self.findAttr("proxthread")
+ return pthread.tmaster.log
+
+
+class uSanity(_ProxTests):
+ def test_http(self):
+ """
+ Just check that the HTTP server is running.
+ """
+ f = urllib.urlopen("http://127.0.0.1:%s"%HTTP_PORT)
+ assert f.read()
+
+ def test_https(self):
+ """
+ Just check that the HTTPS server is running.
+ """
+ f = urllib.urlopen("https://127.0.0.1:%s"%HTTPS_PORT)
+ assert f.read()
+
+
+class uProxy(_ProxTests):
+ HOST = "127.0.0.1"
+ def _get(self, host=HOST):
+ r = urllib2.Request("http://%s:%s"%(host, HTTP_PORT))
+ r.set_proxy("127.0.0.1:%s"%PROXL_PORT, "http")
+ return urllib2.urlopen(r)
+
+ def _sget(self, host=HOST):
+ proxy_support = urllib2.ProxyHandler(
+ {"https" : "https://127.0.0.1:%s"%PROXL_PORT}
+ )
+ opener = urllib2.build_opener(proxy_support)
+ r = urllib2.Request("https://%s:%s"%(host, HTTPS_PORT))
+ return opener.open(r)
+
+ def test_http(self):
+ f = self._get()
+ assert f.code == 200
+ assert f.read()
+ f.close()
+
+ l = self.log()
+ assert l[0].address
+ assert l[1].headers.has_key("host")
+ assert l[2].code == 200
+
+ def test_https(self):
+ f = self._sget()
+ assert f.code == 200
+ assert f.read()
+ f.close()
+
+ l = self.log()
+ assert l[0].address
+ assert l[1].headers.has_key("host")
+ assert l[2].code == 200
+
+ # Disable these two for now: they take a long time.
+ def _test_http_nonexistent(self):
+ f = self._get("nonexistent")
+ assert f.code == 200
+ assert "Error" in f.read()
+
+ def _test_https_nonexistent(self):
+ f = self._sget("nonexistent")
+ assert f.code == 200
+ assert "Error" in f.read()
+
+
+
+class u_parse_proxy_request(libpry.AutoTree):
+ def test_simple(self):
+ libpry.raises(proxy.ProxyError, proxy.parse_proxy_request, "")
+
+ u = "GET ... HTTP/1.1"
+ libpry.raises("invalid url", proxy.parse_proxy_request, u)
+
+ u = "MORK / HTTP/1.1"
+ libpry.raises("unknown request method", proxy.parse_proxy_request, u)
+
+ u = "GET http://foo.com:8888/test HTTP/1.1"
+ m, s, h, po, pa = proxy.parse_proxy_request(u)
+ assert m == "GET"
+ assert s == "http"
+ assert h == "foo.com"
+ assert po == 8888
+ assert pa == "/test"
+
+ def test_connect(self):
+ u = "CONNECT host.com:443 HTTP/1.0"
+ expected = ('CONNECT', None, 'host.com', 443, None)
+ ret = proxy.parse_proxy_request(u)
+ assert expected == ret
+
+ def test_inner(self):
+ u = "GET / HTTP/1.1"
+ assert proxy.parse_proxy_request(u) == ('GET', None, None, None, '/')
+
+
+class u_parse_url(libpry.AutoTree):
+ def test_simple(self):
+ assert not proxy.parse_url("")
+
+ u = "http://foo.com:8888/test"
+ s, h, po, pa = proxy.parse_url(u)
+ assert s == "http"
+ assert h == "foo.com"
+ assert po == 8888
+ assert pa == "/test"
+
+ s, h, po, pa = proxy.parse_url("http://foo/bar")
+ assert s == "http"
+ assert h == "foo"
+ assert po == 80
+ assert pa == "/bar"
+
+ s, h, po, pa = proxy.parse_url("http://foo")
+ assert pa == "/"
+
+
+class uConfig(libpry.AutoTree):
+ def test_pem(self):
+ c = proxy.Config(pemfile="data/testkey.pem")
+ assert c.pemfile
+
+
+class uFileLike(libpry.AutoTree):
+ def test_wrap(self):
+ s = cStringIO.StringIO("foobar\nfoobar")
+ s = proxy.FileLike(s)
+ s.flush()
+ assert s.readline() == "foobar\n"
+ assert s.readline() == "foobar"
+
+
+class uRequest(libpry.AutoTree):
+ def test_simple(self):
+ h = utils.Headers()
+ h["test"] = ["test"]
+ c = proxy.BrowserConnection("addr", 2222)
+ r = proxy.Request(c, "host", 22, "https", "GET", "/", h, "content")
+ u = r.url()
+ assert r.set_url(u)
+ assert not r.set_url("")
+ assert r.url() == u
+ assert r.short()
+ assert r.assemble()
+
+
+class uResponse(libpry.AutoTree):
+ def test_simple(self):
+ h = utils.Headers()
+ h["test"] = ["test"]
+ c = proxy.BrowserConnection("addr", 2222)
+ req = proxy.Request(c, "host", 22, "https", "GET", "/", h, "content")
+ resp = proxy.Response(req, 200, "HTTP", "msg", h.copy(), "content")
+ assert resp.short()
+ assert resp.assemble()
+
+
+class uProxyError(libpry.AutoTree):
+ def test_simple(self):
+ p = proxy.ProxyError(111, "msg")
+ assert repr(p)
+
+
+
+tests = [
+ uProxyError(),
+ uRequest(),
+ uResponse(),
+ uFileLike(),
+ uConfig(),
+ u_parse_proxy_request(),
+ u_parse_url(),
+ _TestServers(), [
+ uSanity(),
+ uProxy(),
+ ]
+]
diff --git a/test/test_utils.py b/test/test_utils.py
new file mode 100644
index 00000000..8a4da968
--- /dev/null
+++ b/test/test_utils.py
@@ -0,0 +1,221 @@
+import textwrap, cStringIO, os
+import libpry
+from libmproxy import utils
+
+
+class uisBin(libpry.AutoTree):
+ def test_simple(self):
+ assert not utils.isBin("testing\n\r")
+ assert utils.isBin("testing\x01")
+ assert utils.isBin("testing\x0e")
+ assert utils.isBin("testing\x7f")
+
+
+class uhexdump(libpry.AutoTree):
+ def test_simple(self):
+ assert utils.hexdump("one\0"*10)
+
+
+class upretty_size(libpry.AutoTree):
+ def test_simple(self):
+ assert utils.pretty_size(100) == "100B"
+ assert utils.pretty_size(1024) == "1kB"
+ assert utils.pretty_size(1024 + (1024/2)) == "1.5kB"
+ assert utils.pretty_size(1024*1024) == "1M"
+
+
+class uData(libpry.AutoTree):
+ def test_nonexistent(self):
+ libpry.raises("does not exist", utils.data.path, "nonexistent")
+
+
+class uMultiDict(libpry.AutoTree):
+ def setUp(self):
+ self.md = utils.MultiDict()
+
+ def test_setget(self):
+ assert not self.md.has_key("foo")
+ self.md.append("foo", 1)
+ assert self.md["foo"] == [1]
+ assert self.md.has_key("foo")
+
+ def test_del(self):
+ self.md.append("foo", 1)
+ del self.md["foo"]
+ assert not self.md.has_key("foo")
+
+ def test_extend(self):
+ self.md.append("foo", 1)
+ self.md.extend("foo", [2, 3])
+ assert self.md["foo"] == [1, 2, 3]
+
+ def test_extend_err(self):
+ self.md.append("foo", 1)
+ libpry.raises("not iterable", self.md.extend, "foo", 2)
+
+ def test_get(self):
+ self.md.append("foo", 1)
+ self.md.append("foo", 2)
+ assert self.md.get("foo") == [1, 2]
+ assert self.md.get("bar") == None
+
+ def test_caseSensitivity(self):
+ self.md._helper = (utils._caseless,)
+ self.md["foo"] = [1]
+ self.md.append("FOO", 2)
+ assert self.md["foo"] == [1, 2]
+ assert self.md["FOO"] == [1, 2]
+ assert self.md.has_key("FoO")
+
+ def test_dict(self):
+ self.md.append("foo", 1)
+ self.md.append("foo", 2)
+ self.md["bar"] = [3]
+ assert self.md == self.md
+ assert dict(self.md) == self.md
+
+ def test_copy(self):
+ self.md["foo"] = [1, 2]
+ self.md["bar"] = [3, 4]
+ md2 = self.md.copy()
+ assert md2 == self.md
+ assert id(md2) != id(self.md)
+
+ def test_clear(self):
+ self.md["foo"] = [1, 2]
+ self.md["bar"] = [3, 4]
+ self.md.clear()
+ assert not self.md.keys()
+
+ def test_setitem(self):
+ libpry.raises(ValueError, self.md.__setitem__, "foo", "bar")
+ self.md["foo"] = ["bar"]
+ assert self.md["foo"] == ["bar"]
+
+ def test_itemPairs(self):
+ self.md.append("foo", 1)
+ self.md.append("foo", 2)
+ self.md.append("bar", 3)
+ l = list(self.md.itemPairs())
+ assert len(l) == 3
+ assert ("foo", 1) in l
+ assert ("foo", 2) in l
+ assert ("bar", 3) in l
+
+
+class uHeaders(libpry.AutoTree):
+ def setUp(self):
+ self.hd = utils.Headers()
+
+ def test_read_simple(self):
+ data = """
+ Header: one
+ Header2: two
+ \r\n
+ """
+ data = textwrap.dedent(data)
+ data = data.strip()
+ s = cStringIO.StringIO(data)
+ self.hd.read(s)
+ assert self.hd["header"] == ["one"]
+ assert self.hd["header2"] == ["two"]
+
+ def test_read_multi(self):
+ data = """
+ Header: one
+ Header: two
+ \r\n
+ """
+ data = textwrap.dedent(data)
+ data = data.strip()
+ s = cStringIO.StringIO(data)
+ self.hd.read(s)
+ assert self.hd["header"] == ["one", "two"]
+
+ def test_read_continued(self):
+ data = """
+ Header: one
+ \ttwo
+ Header2: three
+ \r\n
+ """
+ data = textwrap.dedent(data)
+ data = data.strip()
+ s = cStringIO.StringIO(data)
+ self.hd.read(s)
+ assert self.hd["header"] == ['one\r\n two']
+
+ def test_dictToHeader1(self):
+ self.hd.append("one", "uno")
+ self.hd.append("two", "due")
+ self.hd.append("two", "tre")
+ expected = [
+ "one: uno\r\n",
+ "two: due\r\n",
+ "two: tre\r\n",
+ "\r\n"
+ ]
+ out = repr(self.hd)
+ for i in expected:
+ assert out.find(i) >= 0
+
+ def test_dictToHeader2(self):
+ self.hd["one"] = ["uno"]
+ expected1 = "one: uno\r\n"
+ expected2 = "\r\n"
+ out = repr(self.hd)
+ assert out.find(expected1) >= 0
+ assert out.find(expected2) >= 0
+
+ def test_match_re(self):
+ h = utils.Headers()
+ h.append("one", "uno")
+ h.append("two", "due")
+ h.append("two", "tre")
+ assert h.match_re("uno")
+ assert h.match_re("two: due")
+ assert not h.match_re("nonono")
+
+
+
+class uisStringLike(libpry.AutoTree):
+ def test_all(self):
+ assert utils.isStringLike("foo")
+ assert not utils.isStringLike([1, 2, 3])
+ assert not utils.isStringLike((1, 2, 3))
+ assert not utils.isStringLike(["1", "2", "3"])
+
+
+class uisSequenceLike(libpry.AutoTree):
+ def test_all(self):
+ assert utils.isSequenceLike([1, 2, 3])
+ assert utils.isSequenceLike((1, 2, 3))
+ assert not utils.isSequenceLike("foobar")
+ assert utils.isSequenceLike(["foobar", "foo"])
+ x = iter([1, 2, 3])
+ assert utils.isSequenceLike(x)
+ assert not utils.isSequenceLike(1)
+
+
+class umake_bogus_cert(libpry.AutoTree):
+ def test_all(self):
+ d = self.tmpdir()
+ path = os.path.join(d, "foo", "cert")
+ utils.make_bogus_cert(path)
+
+ d = open(path).read()
+ assert "PRIVATE KEY" in d
+ assert "CERTIFICATE" in d
+
+
+tests = [
+ umake_bogus_cert(),
+ uisBin(),
+ uhexdump(),
+ upretty_size(),
+ uisStringLike(),
+ uisSequenceLike(),
+ uMultiDict(),
+ uHeaders(),
+ uData(),
+]
diff --git a/test/tserv b/test/tserv
new file mode 100755
index 00000000..5b35b72a
--- /dev/null
+++ b/test/tserv
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+ A simple program for testing the test HTTP/S servers.
+"""
+from optparse import OptionParser, OptionGroup
+import sslserv, serv
+
+if __name__ == "__main__":
+ parser = OptionParser(
+ usage = "%prog [options] output",
+ version="%prog 0.1",
+ )
+ parser.add_option(
+ "-s", "--ssl", action="store_true",
+ dest="ssl", default=False
+ )
+ options, args = parser.parse_args()
+
+ if options.ssl:
+ port = 8443
+ print "Running on port %s"%port
+ s = sslserv.make(port)
+ else:
+ port = 8080
+ print "Running on port %s"%port
+ s = serv.make(port)
+ try:
+ s.serve_forever()
+ except KeyboardInterrupt:
+ pass
diff --git a/todo b/todo
new file mode 100644
index 00000000..5ba41f76
--- /dev/null
+++ b/todo
@@ -0,0 +1,17 @@
+
+Future:
+
+ - Strings view.
+ - Field parsing and editing.
+ - On-the-fly generation of keys, signed with a CA
+ - Pass-through fast-track for things that don't match filter?
+ - Reading contents from file
+ - Saving contents to file
+ - Shortcut for viewing in pager
+ - Serializing and de-serializing requests and responses.
+
+
+Bugs:
+
+ - In some circumstances, long URLs in list view are line-broken oddly.
+ - Termination sometimes hangs.