@@ -42,7 +42,7 @@ def fail(s):
4242 duplicate_allowed_list = [x .rstrip ('\n ' ) for x in duplicate_fh .readlines ()]
4343
4444
45- def test_bad_regexp (tree , filename , from_attrib , to ):
45+ def test_bad_regexp (tree , rulename , from_attrib , to ):
4646 # Rules with invalid regular expressions.
4747 """The 'from' rule contains an invalid extended regular expression."""
4848 patterns = from_attrib + xpath_exlusion_pattern (tree ) + xpath_cookie_pattern (tree )
@@ -53,33 +53,46 @@ def test_bad_regexp(tree, filename, from_attrib, to):
5353 return False
5454 return True
5555
56- def test_unescaped_dots (tree , filename , from_attrib , to ):
56+ def unescaped_dot (s ):
57+ escaped = False
58+ bracketed = False
59+ for c in s :
60+ if c == "\\ " :
61+ escaped = not escaped
62+ elif not escaped and c == "[" :
63+ bracketed = True
64+ elif not escaped and c == "]" :
65+ bracketed = False
66+ elif not escaped and not bracketed and c == "." :
67+ return True
68+ elif not bracketed and c == "/" :
69+ break
70+ else :
71+ escaped = False
72+ return False
73+
74+ def test_unescaped_dots (tree , rulename , from_attrib , to ):
5775 # Rules containing unescaped dots outside of brackets and before slash.
5876 # Note: this is meant to require example\.com instead of example.com,
5977 # but it also forbids things like .* which usually ought to be replaced
6078 # with something like ([^/:@\.]+)
6179 """The 'from' rule contains unescaped period in regular expression. Try escaping it with a backslash."""
6280 for f in from_attrib :
63- escaped = False
64- bracketed = False
6581 s = re .sub ("^\^https?://" , "" , f )
66- for c in s :
67- if c == "\\ " :
68- escaped = not escaped
69- elif not escaped and c == "[" :
70- bracketed = True
71- elif not escaped and c == "]" :
72- bracketed = False
73- elif not escaped and not bracketed and c == "." :
74- return False
75- elif not bracketed and c == "/" :
76- break
77- else :
78- escaped = False
82+ if unescaped_dot (s ):
83+ return False
84+ return True
85+
86+ def test_unescaped_dots_in_exclusion (tree , rulename , from_attrib , to ):
87+ """The 'exclusion' tag contains unescaped period in regular expression. Try escaping it with a backslash."""
88+ pattern_attrib = etree .XPath ("/ruleset/exclusion/@pattern" )(tree )
89+ for f in pattern_attrib :
90+ if unescaped_dot (f ):
91+ return False
7992 return True
8093
8194xpath_rule = etree .XPath ("/ruleset/rule" )
82- def test_unencrypted_to (tree , filename , from_attrib , to ):
95+ def test_unencrypted_to (tree , rulename , from_attrib , to ):
8396 # Rules that redirect to something other than https or http.
8497 # This used to test for http: but testing for lack of https: will
8598 # catch more kinds of mistakes.
@@ -91,19 +104,19 @@ def test_unencrypted_to(tree, filename, from_attrib, to):
91104 if to [:6 ] != "https:" and to [:5 ] != "http:" :
92105 return False
93106 elif to [:5 ] == "http:" and downgrade :
94- if filename in downgrade_allowed_list :
95- warn ("whitelisted downgrade rule in %s redirects to http." % filename )
107+ if rulename in downgrade_allowed_list :
108+ warn ("whitelisted downgrade rule in %s redirects to http." % rulename )
96109 else :
97- fail ("non-whitelisted downgrade rule in %s redirects to http." % filename )
110+ fail ("non-whitelisted downgrade rule in %s redirects to http." % rulename )
98111 return False
99112 elif to [:5 ] == "http:" :
100- fail ("non-downgrade rule in %s redirects to http." % filename )
113+ fail ("non-downgrade rule in %s redirects to http." % rulename )
101114 return False
102115 return True
103116
104117printable_characters = set (map (chr , list (range (32 , 127 ))))
105118
106- def test_non_ascii (tree , filename , from_attrib , to ):
119+ def test_non_ascii (tree , rulename , from_attrib , to ):
107120 # Rules containing non-printable characters.
108121 """Rule contains non-printable character in 'to' pattern."""
109122 for t in to :
@@ -135,6 +148,7 @@ def nomes_all(where=sys.argv[1:]):
135148tests = [
136149 test_bad_regexp ,
137150 test_unescaped_dots ,
151+ test_unescaped_dots_in_exclusion ,
138152 test_unencrypted_to ,
139153 test_non_ascii
140154]
@@ -157,18 +171,18 @@ def nomes_all(where=sys.argv[1:]):
157171 except Exception as oops :
158172 failure = 1
159173 print ("failed XML validity: %s\n " % (oops ))
160- if failure or not xpath_ruleset (tree ):
174+ if not xpath_ruleset (tree ):
161175 continue
162176 rn = xpath_ruleset_name (tree )[0 ]
163177 if not rn :
164178 failure = 1
165179 fail ("unnamed ruleset" )
166180 continue
167- rf = xpath_ruleset_name (tree )[0 ]
181+ rf = xpath_ruleset_file (tree )[0 ]
168182 from_attrib = xpath_from (tree )
169183 to = xpath_to (tree )
170184 for test in tests :
171- if not test (tree , rf , from_attrib = from_attrib , to = to ):
185+ if not test (tree , rn , from_attrib = from_attrib , to = to ):
172186 failure = 1
173187 fail ("%s failed test: %s" % (rf , test .__doc__ ))
174188
0 commit comments