From 73ce169e3d11eeabeb78143bd86edfdbc3e07fd9 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 12 Apr 2015 10:26:09 +1200 Subject: Initial outline of a cookie parsing and serialization module. --- test/test_http_cookies.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 test/test_http_cookies.py (limited to 'test/test_http_cookies.py') diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py new file mode 100644 index 00000000..b3f1f914 --- /dev/null +++ b/test/test_http_cookies.py @@ -0,0 +1,106 @@ +from netlib import http_cookies, odict +import nose.tools + + +def test_read_token(): + tokens = [ + [("foo", 0), ("foo", 3)], + [("foo", 1), ("oo", 3)], + [(" foo", 1), ("foo", 4)], + [(" foo;", 1), ("foo", 4)], + [(" foo=", 1), ("foo", 4)], + [(" foo=bar", 1), ("foo", 4)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_token(*q), a) + + +def test_read_quoted_string(): + tokens = [ + [('"foo" x', 0), ("foo", 5)], + [('"f\oo" x', 0), ("foo", 6)], + [(r'"f\\o" x', 0), (r"f\o", 6)], + [(r'"f\\" x', 0), (r"f" + '\\', 5)], + [('"fo\\\"" x', 0), ("fo\"", 6)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_quoted_string(*q), a) + + +def test_read_pairs(): + vals = [ + [ + "one", + [["one", None]] + ], + [ + "one=two", + [["one", "two"]] + ], + [ + 'one="two"', + [["one", "two"]] + ], + [ + 'one="two"; three=four', + [["one", "two"], ["three", "four"]] + ], + [ + 'one="two"; three=four; five', + [["one", "two"], ["three", "four"], ["five", None]] + ], + [ + 'one="\\"two"; three=four', + [["one", '"two'], ["three", "four"]] + ], + ] + for s, lst in vals: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + + +def test_pairs_roundtrips(): + pairs = [ + [ + "one=uno", + [["one", "uno"]] + ], + [ + "one", + [["one", None]] + ], + [ + "one=uno; two=due", + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="uno"; two="\due"', + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="un\\"o"', + [["one", 'un"o']] + ], + [ + "one=uno; two; three=tre", + [["one", "uno"], ["two", None], ["three", "tre"]] + ], + [ + "_lvs2=zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g=; " + "_rcc2=53VdltWl+Ov6ordflA==;", + [ + ["_lvs2", "zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g="], + ["_rcc2", "53VdltWl+Ov6ordflA=="] + ] + ] + ] + for s, lst in pairs: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + s2 = http_cookies._format_pairs(lst) + ret, off = http_cookies._read_pairs(s2) + nose.tools.eq_(ret, lst) + + +def test_parse_set_cookie(): + pass -- cgit v1.2.3 From 2630da7263242411d413b5e4b2c520d29848c918 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 12 Apr 2015 11:26:02 +1200 Subject: cookies: Cater for special values, fix some bugs found in real-world testing --- test/test_http_cookies.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'test/test_http_cookies.py') diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py index b3f1f914..31e5f0b0 100644 --- a/test/test_http_cookies.py +++ b/test/test_http_cookies.py @@ -37,6 +37,10 @@ def test_read_pairs(): "one=two", [["one", "two"]] ], + [ + "one=", + [["one", ""]] + ], [ 'one="two"', [["one", "two"]] @@ -81,6 +85,10 @@ def test_pairs_roundtrips(): 'one="un\\"o"', [["one", 'un"o']] ], + [ + 'one="uno,due"', + [["one", 'uno,due']] + ], [ "one=uno; two; three=tre", [["one", "uno"], ["two", None], ["three", "tre"]] -- cgit v1.2.3 From de9e7411253c4f67ea4d0b96f6f9e952024c5fa3 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 14 Apr 2015 10:02:10 +1200 Subject: Firm up cookie parsing and formatting API Make a tough call: we won't support old-style comma-separated set-cookie headers. Real world testing has shown that the latest rfc (6265) is often violated in ways that make the parsing problem indeterminate. Since this is much more common than the old style deprecated set-cookie variant, we focus on the most useful case. --- test/test_http_cookies.py | 115 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 3 deletions(-) (limited to 'test/test_http_cookies.py') diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py index 31e5f0b0..c0e5a5b7 100644 --- a/test/test_http_cookies.py +++ b/test/test_http_cookies.py @@ -1,6 +1,8 @@ -from netlib import http_cookies, odict +import pprint import nose.tools +from netlib import http_cookies, odict + def test_read_token(): tokens = [ @@ -65,6 +67,10 @@ def test_read_pairs(): def test_pairs_roundtrips(): pairs = [ + [ + "", + [] + ], [ "one=uno", [["one", "uno"]] @@ -110,5 +116,108 @@ def test_pairs_roundtrips(): nose.tools.eq_(ret, lst) -def test_parse_set_cookie(): - pass +def test_cookie_roundtrips(): + pairs = [ + [ + "one=uno", + [["one", "uno"]] + ], + [ + "one=uno; two=due", + [["one", "uno"], ["two", "due"]] + ], + ] + for s, lst in pairs: + ret = http_cookies.parse_cookie_header(s) + nose.tools.eq_(ret.lst, lst) + s2 = http_cookies.format_cookie_header(ret) + ret = http_cookies.parse_cookie_header(s2) + nose.tools.eq_(ret.lst, lst) + + +# TODO +# I've seen the following pathological cookie in the wild: +# +# cid=09,0,0,0,0; expires=Wed, 10-Jun-2015 21:54:53 GMT; path=/ +# +# It's not compliant under any RFC - the latest RFC prohibits commas in cookie +# values completely, earlier RFCs require them to be within a quoted string. +# +# If we ditch support for earlier RFCs, we can handle this correctly. This +# leaves us with the question: what's more common, multiple-value Set-Cookie +# headers, or Set-Cookie headers that violate the standards? + +def test_parse_set_cookie_pairs(): + pairs = [ + [ + "one=uno", + [ + ["one", "uno"] + ] + ], + [ + "one=uno; foo", + [ + ["one", "uno"], + ["foo", None] + ] + ], + [ + "mun=1.390.f60; " + "expires=sun, 11-oct-2015 12:38:31 gmt; path=/; " + "domain=b.aol.com", + [ + ["mun", "1.390.f60"], + ["expires", "sun, 11-oct-2015 12:38:31 gmt"], + ["path", "/"], + ["domain", "b.aol.com"] + ] + ], + [ + r'rpb=190%3d1%2616726%3d1%2634832%3d1%2634874%3d1; ' + 'domain=.rubiconproject.com; ' + 'expires=mon, 11-may-2015 21:54:57 gmt; ' + 'path=/', + [ + ['rpb', r'190%3d1%2616726%3d1%2634832%3d1%2634874%3d1'], + ['domain', '.rubiconproject.com'], + ['expires', 'mon, 11-may-2015 21:54:57 gmt'], + ['path', '/'] + ] + ], + ] + for s, lst in pairs: + ret = http_cookies._parse_set_cookie_pairs(s) + nose.tools.eq_(ret, lst) + s2 = http_cookies._format_set_cookie_pairs(ret) + ret2 = http_cookies._parse_set_cookie_pairs(s2) + nose.tools.eq_(ret2, lst) + + +def test_parse_set_cookie_header(): + vals = [ + [ + "", None + ], + [ + "one=uno", + ("one", "uno", []) + ], + [ + "one=uno; foo=bar", + ("one", "uno", [["foo", "bar"]]) + ] + ] + for s, expected in vals: + ret = http_cookies.parse_set_cookie_header(s) + if expected: + assert ret[0] == expected[0] + assert ret[1] == expected[1] + nose.tools.eq_(ret[2].lst, expected[2]) + s2 = http_cookies.format_set_cookie_header(*ret) + ret2 = http_cookies.parse_set_cookie_header(s2) + assert ret2[0] == expected[0] + assert ret2[1] == expected[1] + nose.tools.eq_(ret2[2].lst, expected[2]) + else: + assert ret is None -- cgit v1.2.3 From 6db5e0a4a133e6e6150f9cab87cd56b40d6db0b2 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 14 Apr 2015 10:13:03 +1200 Subject: Remove old-style set-cookie cruft, unit tests to 100% --- test/test_http_cookies.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'test/test_http_cookies.py') diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py index c0e5a5b7..ad509254 100644 --- a/test/test_http_cookies.py +++ b/test/test_http_cookies.py @@ -155,6 +155,12 @@ def test_parse_set_cookie_pairs(): ["one", "uno"] ] ], + [ + "one=un\x20", + [ + ["one", "un\x20"] + ] + ], [ "one=uno; foo", [ -- cgit v1.2.3 From 0c85c72dc43d0d017e2bf5af9c2def46968d0499 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Wed, 15 Apr 2015 10:28:17 +1200 Subject: ODict improvements - Setting values now tries to preserve the existing order, rather than just appending to the end. - __repr__ now returns a repr of the tuple list. The old repr becomes a .format() method. This is clearer, makes troubleshooting easier, and doesn't assume all data in ODicts are header-like --- test/test_http_cookies.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'test/test_http_cookies.py') diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py index ad509254..7438af7c 100644 --- a/test/test_http_cookies.py +++ b/test/test_http_cookies.py @@ -135,18 +135,6 @@ def test_cookie_roundtrips(): nose.tools.eq_(ret.lst, lst) -# TODO -# I've seen the following pathological cookie in the wild: -# -# cid=09,0,0,0,0; expires=Wed, 10-Jun-2015 21:54:53 GMT; path=/ -# -# It's not compliant under any RFC - the latest RFC prohibits commas in cookie -# values completely, earlier RFCs require them to be within a quoted string. -# -# If we ditch support for earlier RFCs, we can handle this correctly. This -# leaves us with the question: what's more common, multiple-value Set-Cookie -# headers, or Set-Cookie headers that violate the standards? - def test_parse_set_cookie_pairs(): pairs = [ [ @@ -205,6 +193,9 @@ def test_parse_set_cookie_header(): [ "", None ], + [ + ";", None + ], [ "one=uno", ("one", "uno", []) -- cgit v1.2.3