diff options
| author | Maximilian Hils <git@maximilianhils.com> | 2017-08-26 18:18:58 +0200 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-08-26 18:18:58 +0200 | 
| commit | 9942f782177b83d57da1d579bd930209e99002d3 (patch) | |
| tree | dc7a6ac2dfff6072f0591d7e8fd4305c6c9707f9 | |
| parent | 6350d5a19edafa992b53073610f05c0d38f28ddd (diff) | |
| parent | e1af76e71f6b2230847767c0bbccdc40fd98bb3b (diff) | |
| download | mitmproxy-9942f782177b83d57da1d579bd930209e99002d3.tar.gz mitmproxy-9942f782177b83d57da1d579bd930209e99002d3.tar.bz2 mitmproxy-9942f782177b83d57da1d579bd930209e99002d3.zip | |
Merge pull request #2542 from mhils/fast-css
Replace cssutils with custom, faster, css prettifier
25 files changed, 425 insertions, 37 deletions
| diff --git a/mitmproxy/contentviews/css.py b/mitmproxy/contentviews/css.py index 353a3257..8fa09ed3 100644 --- a/mitmproxy/contentviews/css.py +++ b/mitmproxy/contentviews/css.py @@ -1,8 +1,51 @@ -import logging +import re +import time -import cssutils +from mitmproxy.contentviews import base +from mitmproxy.utils import strutils -from . import base +""" +A custom CSS prettifier. Compared to other prettifiers, its main features are: + +- Implemented in pure Python. +- Modifies whitespace only. +- Works with any input. +- Considerably faster than e.g. cssutils. +""" + +CSS_SPECIAL_AREAS = ( +    ("'", strutils.NO_ESCAPE + "'"), +    ('"', strutils.NO_ESCAPE + '"'), +    (r"/\*", r"\*/"), +    ("//", "$") +) +CSS_SPECIAL_CHARS = "{};:" + + +def beautify(data: str, indent: str = "    "): +    """Beautify a string containing CSS code""" +    data = strutils.escape_special_areas( +        data.strip(), +        CSS_SPECIAL_AREAS, +        CSS_SPECIAL_CHARS, +    ) + +    # Add newlines +    data = re.sub(r"\s*;\s*", ";\n", data) +    data = re.sub(r"\s*{\s*", " {\n", data) +    data = re.sub(r"\s*}\s*", "\n}\n\n", data) + +    # Fix incorrect ":" placement +    data = re.sub(r"\s*:\s*(?=[^{]+})", ": ", data) +    # Fix no space after "," +    data = re.sub(r"\s*,\s*", ", ", data) + +    # indent +    data = re.sub("\n[ \t]+", "\n", data) +    data = re.sub("\n(?![}\n])(?=[^{]*})", "\n" + indent, data) + +    data = strutils.unescape_special_areas(data) +    return data.rstrip("\n") + "\n"  class ViewCSS(base.View): @@ -13,13 +56,15 @@ class ViewCSS(base.View):      ]      def __call__(self, data, **metadata): -        cssutils.log.setLevel(logging.CRITICAL) -        cssutils.ser.prefs.keepComments = True -        cssutils.ser.prefs.omitLastSemicolon = False -        cssutils.ser.prefs.indentClosingBrace = False -        cssutils.ser.prefs.validOnly = False +        data = data.decode("utf8", "surrogateescape") +        beautified = beautify(data) +        return "CSS", base.format_text(beautified) -        sheet = cssutils.parseString(data) -        beautified = sheet.cssText -        return "CSS", base.format_text(beautified) +if __name__ == "__main__":  # pragma: no cover +    with open("../tools/web/static/vendor.css") as f: +        data = f.read() + +    t = time.time() +    x = beautify(data) +    print("Beautifying vendor.css took {:.2}s".format(time.time() - t)) diff --git a/mitmproxy/utils/strutils.py b/mitmproxy/utils/strutils.py index db0cfd2e..37bed7de 100644 --- a/mitmproxy/utils/strutils.py +++ b/mitmproxy/utils/strutils.py @@ -1,6 +1,7 @@ +import io  import re  import codecs -from typing import AnyStr, Optional, cast +from typing import AnyStr, Optional, cast, Iterable, Tuple  def always_bytes(str_or_bytes: Optional[AnyStr], *encode_args) -> Optional[bytes]: @@ -141,3 +142,87 @@ def hexdump(s):              False          ))          yield (offset, x, part_repr) + + +def _move_to_private_code_plane(matchobj): +    return chr(ord(matchobj.group(0)) + 0xE000) + + +def _restore_from_private_code_plane(matchobj): +    return chr(ord(matchobj.group(0)) - 0xE000) + + +NO_ESCAPE = r"(?<!\\)(?:\\\\)*" + + +def split_special_areas( +        data: str, +        area_delimiter: Iterable[Tuple[str, str]], +): +    """ +    Split a string of code into a [code, special area, code, special area, ..., code] list. + +    For example, + +    >>> split_special_areas( +    >>>     "test /* don't modify me */ foo", +    >>>     [(r"/\*", r"\*/")])  # (left delimiter regex, right delimiter regex) +    ["test ", "/* don't modify me */", " foo"] + +    "".join(split_special_areas(x, ...)) == x always holds true. +    """ +    patterns = "|".join( +        r"{lchar}[\s\S]*?{rchar}".format( +            lchar=a, +            rchar=b, +        ) for (a, b) in area_delimiter) +    return re.split( +        "({})".format(patterns), +        data, +        flags=re.MULTILINE +    ) + + +def escape_special_areas( +        data: str, +        area_delimiter: Iterable[Tuple[str, str]], +        control_characters, +): +    """ +    Escape all control characters present in special areas with UTF8 symbols +    in the private use plane (U+E000 t+ ord(char)). +    This is useful so that one can then use regex replacements on the resulting string without +    interfering with special areas. + +    control_characters must be 0 < ord(x) < 256. + +    Example: + +    >>> print(x) +    if (true) { console.log('{}'); } +    >>> x = escape_special_areas(x, "{", [("'", "'")]) +    >>> print(x) +    if (true) { console.log('�}'); } +    >>> x = re.sub(r"\s*{\s*", " {\n    ", x) +    >>> x = unescape_special_areas(x, "{", [("'", "'")]) +    >>> print(x) +    if (true) { +        console.log('{}'); } +    """ +    buf = io.StringIO() +    parts = split_special_areas(data, area_delimiter) +    rex = re.compile(r"[{}]".format(control_characters)) +    for i, x in enumerate(parts): +        if i % 2: +            x = rex.sub(_move_to_private_code_plane, x) +        buf.write(x) +    return buf.getvalue() + + +def unescape_special_areas(data: str): +    """ +    Invert escape_special_areas. + +    x == unescape_special_areas(escape_special_areas(x)) always holds true. +    """ +    return re.sub(r"[\ue000-\ue0ff]", _restore_from_private_code_plane, data) @@ -65,7 +65,6 @@ setup(          "certifi>=2015.11.20.1",  # no semver here - this should always be on the last release!          "click>=6.2, <7",          "cryptography>=2.0,<2.1", -        "cssutils>=1.0.1, <1.1",          "h2>=3.0, <4",          "html2text>=2016.1.8, <=2016.9.19",          "hyperframe>=5.0, <6", diff --git a/test/mitmproxy/contentviews/test_css.py b/test/mitmproxy/contentviews/test_css.py index ecb9259b..814f6e83 100644 --- a/test/mitmproxy/contentviews/test_css.py +++ b/test/mitmproxy/contentviews/test_css.py @@ -1,29 +1,42 @@ +import pytest +  from mitmproxy.contentviews import css  from mitmproxy.test import tutils  from . import full_eval -try: -    import cssutils -except: -    cssutils = None - - -def test_view_css(): +data = tutils.test_data.push("mitmproxy/contentviews/test_css_data/") + + +@pytest.mark.parametrize("filename", [ +    "animation-keyframe.css", +    "blank-lines-and-spaces.css", +    "block-comment.css", +    "empty-rule.css", +    "import-directive.css", +    "indentation.css", +    "media-directive.css", +    "quoted-string.css", +    "selectors.css", +    "simple.css", +]) +def test_beautify(filename): +    path = data.path(filename) +    with open(path) as f: +        input = f.read() +    with open("-formatted.".join(path.rsplit(".", 1))) as f: +        expected = f.read() +    formatted = css.beautify(input) +    assert formatted == expected + + +def test_simple():      v = full_eval(css.ViewCSS()) - -    with open(tutils.test_data.path('mitmproxy/data/1.css'), 'r') as fp: -        fixture_1 = fp.read() - -    result = v('a') - -    if cssutils: -        assert len(list(result[1])) == 0 -    else: -        assert len(list(result[1])) == 1 - -    result = v(fixture_1) - -    if cssutils: -        assert len(list(result[1])) > 1 -    else: -        assert len(list(result[1])) == 1 +    assert v(b"#foo{color:red}") == ('CSS', [ +        [('text', '#foo {')], +        [('text', '    color: red')], +        [('text', '}')] +    ]) +    assert v(b"") == ('CSS', [[('text', '')]]) +    assert v(b"console.log('not really css')") == ( +        'CSS', [[('text', "console.log('not really css')")]] +    ) diff --git a/test/mitmproxy/contentviews/test_css_data/animation-keyframe-formatted.css b/test/mitmproxy/contentviews/test_css_data/animation-keyframe-formatted.css new file mode 100644 index 00000000..3f91d508 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/animation-keyframe-formatted.css @@ -0,0 +1,11 @@ +@-webkit-keyframes anim { +0% { +    -webkit-transform: translate3d(0px, 0px, 0px); +} + +100% { +    -webkit-transform: translate3d(150px, 0px, 0px) +} + + +} diff --git a/test/mitmproxy/contentviews/test_css_data/animation-keyframe.css b/test/mitmproxy/contentviews/test_css_data/animation-keyframe.css new file mode 100644 index 00000000..ce63da5c --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/animation-keyframe.css @@ -0,0 +1,3 @@ +@-webkit-keyframes anim { +0% { -webkit-transform: translate3d(0px, 0px, 0px); } +100% { -webkit-transform: translate3d(150px, 0px, 0px) }} diff --git a/test/mitmproxy/contentviews/test_css_data/blank-lines-and-spaces-formatted.css b/test/mitmproxy/contentviews/test_css_data/blank-lines-and-spaces-formatted.css new file mode 100644 index 00000000..de6bd045 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/blank-lines-and-spaces-formatted.css @@ -0,0 +1,35 @@ +/* only one blank line between */ +menu { +    color: red +} + +navi { +    color: black +} + +/* automatically insert a blank line */ +button { +    border: 1px +} + +sidebar { +    color: #ffe +} + +/* always whitespace before { */ +hidden { +    opacity: 0% +} + +/* no blank lines inside ruleset */ +imprint { +    color: blue; +    opacity: 0.5; +    font-size: small +} + +/* before colon: no space, after colon: one space only */ +footer { +    font-family: Arial; +    float: right; +} diff --git a/test/mitmproxy/contentviews/test_css_data/blank-lines-and-spaces.css b/test/mitmproxy/contentviews/test_css_data/blank-lines-and-spaces.css new file mode 100644 index 00000000..c6892105 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/blank-lines-and-spaces.css @@ -0,0 +1,30 @@ +/* only one blank line between */ +menu { color: red } + + + + +navi { color: black } + +/* automatically insert a blank line */ +button { border: 1px } sidebar { color: #ffe } + +/* always whitespace before { */ +hidden{opacity:0%} + +/* no blank lines inside ruleset */ +imprint { +  color: blue; + + +    opacity: 0.5; + +   font-size: small +} + +/* before colon: no space, after colon: one space only */ +footer { +      font-family:     Arial; + +  float   :right; +  } diff --git a/test/mitmproxy/contentviews/test_css_data/block-comment-formatted.css b/test/mitmproxy/contentviews/test_css_data/block-comment-formatted.css new file mode 100644 index 00000000..83e0f4e6 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/block-comment-formatted.css @@ -0,0 +1,22 @@ +/* line comment */ +navigation { +    color: blue +} + +menu { +    /* line comment inside */ +    border: 2px +} + +/* block +comment */ +sidebar { +    color: red +} + +invisible { +    /* block +    * comment +    * inside */ +    color: #eee +} diff --git a/test/mitmproxy/contentviews/test_css_data/block-comment.css b/test/mitmproxy/contentviews/test_css_data/block-comment.css new file mode 100644 index 00000000..3ba26540 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/block-comment.css @@ -0,0 +1,18 @@ +/* line comment */ +navigation { color: blue } + +menu { +    /* line comment inside */ +    border: 2px +} + +/* block + comment */ +sidebar { color: red } + +invisible { +    /* block +     * comment +     * inside */ +    color: #eee +} diff --git a/test/mitmproxy/contentviews/test_css_data/empty-rule-formatted.css b/test/mitmproxy/contentviews/test_css_data/empty-rule-formatted.css new file mode 100644 index 00000000..7c0a78f4 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/empty-rule-formatted.css @@ -0,0 +1,2 @@ +menu { +} diff --git a/test/mitmproxy/contentviews/test_css_data/empty-rule.css b/test/mitmproxy/contentviews/test_css_data/empty-rule.css new file mode 100644 index 00000000..7d6ecfcd --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/empty-rule.css @@ -0,0 +1 @@ +menu{} diff --git a/test/mitmproxy/contentviews/test_css_data/import-directive-formatted.css b/test/mitmproxy/contentviews/test_css_data/import-directive-formatted.css new file mode 100644 index 00000000..08a0ad57 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/import-directive-formatted.css @@ -0,0 +1,8 @@ +menu { +    background-color: red +} + +@import url('foobar.css') screen; +nav { +    margin: 0 +} diff --git a/test/mitmproxy/contentviews/test_css_data/import-directive.css b/test/mitmproxy/contentviews/test_css_data/import-directive.css new file mode 100644 index 00000000..61979f0a --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/import-directive.css @@ -0,0 +1,2 @@ +menu{background-color:red} @import url('foobar.css') screen; +nav{margin:0} diff --git a/test/mitmproxy/contentviews/test_css_data/indentation-formatted.css b/test/mitmproxy/contentviews/test_css_data/indentation-formatted.css new file mode 100644 index 00000000..18ea527d --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/indentation-formatted.css @@ -0,0 +1,3 @@ +navigation { +    color: blue +} diff --git a/test/mitmproxy/contentviews/test_css_data/indentation.css b/test/mitmproxy/contentviews/test_css_data/indentation.css new file mode 100644 index 00000000..77e00f83 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/indentation.css @@ -0,0 +1,3 @@ +     navigation { +    color: blue +  } diff --git a/test/mitmproxy/contentviews/test_css_data/media-directive-formatted.css b/test/mitmproxy/contentviews/test_css_data/media-directive-formatted.css new file mode 100644 index 00000000..84d95421 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/media-directive-formatted.css @@ -0,0 +1,17 @@ +@import "subs.css"; +@import "print-main.css" print; +@media print { +body { +    font-size: 10pt +} + +nav { +    color: blue; +} + + +} + +h1 { +    color: red; +} diff --git a/test/mitmproxy/contentviews/test_css_data/media-directive.css b/test/mitmproxy/contentviews/test_css_data/media-directive.css new file mode 100644 index 00000000..ddf67c58 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/media-directive.css @@ -0,0 +1,7 @@ +@import "subs.css"; +@import "print-main.css" print; +@media print { +  body { font-size: 10pt } +  nav { color: blue; } +} +h1 {color: red; } diff --git a/test/mitmproxy/contentviews/test_css_data/quoted-string-formatted.css b/test/mitmproxy/contentviews/test_css_data/quoted-string-formatted.css new file mode 100644 index 00000000..ab4c3412 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/quoted-string-formatted.css @@ -0,0 +1,7 @@ +nav:after { +    content: '}' +} + +nav:before { +    content: "}" +} diff --git a/test/mitmproxy/contentviews/test_css_data/quoted-string.css b/test/mitmproxy/contentviews/test_css_data/quoted-string.css new file mode 100644 index 00000000..f5f3279e --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/quoted-string.css @@ -0,0 +1,2 @@ +nav:after{content:'}'} +nav:before{content:"}"} diff --git a/test/mitmproxy/contentviews/test_css_data/selectors-formatted.css b/test/mitmproxy/contentviews/test_css_data/selectors-formatted.css new file mode 100644 index 00000000..166251cb --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/selectors-formatted.css @@ -0,0 +1,19 @@ +* { +    border: 0px solid blue; +} + +div[class="{}"] { +    color: red; +} + +a[id=\"foo"] { +    padding: 0; +} + +[id=\"foo"] { +    margin: 0; +} + +#menu, #nav, #footer { +    color: royalblue; +} diff --git a/test/mitmproxy/contentviews/test_css_data/selectors.css b/test/mitmproxy/contentviews/test_css_data/selectors.css new file mode 100644 index 00000000..dc36f9e5 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/selectors.css @@ -0,0 +1,5 @@ +* { border: 0px solid blue; } +div[class="{}"] { color: red; } +a[id=\"foo"] { padding: 0; } +[id=\"foo"] { margin: 0; } +#menu, #nav, #footer { color: royalblue; } diff --git a/test/mitmproxy/contentviews/test_css_data/simple-formatted.css b/test/mitmproxy/contentviews/test_css_data/simple-formatted.css new file mode 100644 index 00000000..9435236b --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/simple-formatted.css @@ -0,0 +1,16 @@ +menu { +    color: blue; +} + +box { +    border-radius: 4px; +    background-color: red +} + +a { +    color: green +} + +b { +    color: red +} diff --git a/test/mitmproxy/contentviews/test_css_data/simple.css b/test/mitmproxy/contentviews/test_css_data/simple.css new file mode 100644 index 00000000..33b29a03 --- /dev/null +++ b/test/mitmproxy/contentviews/test_css_data/simple.css @@ -0,0 +1,5 @@ +menu { color: blue; } + +box { border-radius: 4px; background-color: red } +a { color: green } +b { color: red } diff --git a/test/mitmproxy/utils/test_strutils.py b/test/mitmproxy/utils/test_strutils.py index bacd7f62..7ec72e4e 100644 --- a/test/mitmproxy/utils/test_strutils.py +++ b/test/mitmproxy/utils/test_strutils.py @@ -96,3 +96,33 @@ def test_clean_hanging_newline():  def test_hexdump():      assert list(strutils.hexdump(b"one\0" * 10)) + + +ESCAPE_QUOTES = [ +    ("'", strutils.NO_ESCAPE + "'"), +    ('"', strutils.NO_ESCAPE + '"') +] + + +def test_split_special_areas(): +    assert strutils.split_special_areas("foo", ESCAPE_QUOTES) == ["foo"] +    assert strutils.split_special_areas("foo 'bar' baz", ESCAPE_QUOTES) == ["foo ", "'bar'", " baz"] +    assert strutils.split_special_areas( +        """foo 'b\\'a"r' baz""", +        ESCAPE_QUOTES +    ) == ["foo ", "'b\\'a\"r'", " baz"] +    assert strutils.split_special_areas( +        "foo\n/*bar\nbaz*/\nqux", +        [(r'/\*', r'\*/')] +    ) == ["foo\n", "/*bar\nbaz*/", "\nqux"] +    assert strutils.split_special_areas( +        "foo\n//bar\nbaz", +        [(r'//', r'$')] +    ) == ["foo\n", "//bar", "\nbaz"] + + +def test_escape_special_areas(): +    assert strutils.escape_special_areas('foo "bar" baz', ESCAPE_QUOTES, "*") == 'foo "bar" baz' +    esc = strutils.escape_special_areas('foo "b*r" b*z', ESCAPE_QUOTES, "*") +    assert esc == 'foo "b\ue02ar" b*z' +    assert strutils.unescape_special_areas(esc) == 'foo "b*r" b*z' | 
