Skip to content

Commit fb2d2bf

Browse files
committed
Add detection of unescaped dots in exclusion rules
Also fix up two cases.
1 parent 0953898 commit fb2d2bf

File tree

3 files changed

+44
-30
lines changed

3 files changed

+44
-30
lines changed

src/chrome/content/rules/A2z.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
<ruleset name="a2z, Inc (partial)">
1+
<ruleset name="a2z, Inc (partial)" default_off="Needs ruleset tests">
22

33
<target host="*.a2zinc.net"/>
44

5-
<exclusion pattern="http://www\..+/$"/>
6-
<exclusion pattern=".+\.aspx$"/>
5+
<exclusion pattern="^http://www\.a2zinc\.net/"/>
6+
<exclusion pattern="\.aspx$"/>
77

88
<rule from="^http://(\w+)\.a2zinc\.net/"
99
to="https://$1.a2zinc.net/"/>

src/chrome/content/rules/kantonalbanken.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ Fetch error: http://glkb.ch/ => https://www.glkb.ch/: (60, 'SSL certificate prob
6464
<exclusion pattern="test\.bcf\.ch"/>
6565
<exclusion pattern="structures\.bvc\.ch"/>
6666
<exclusion pattern="art\.bcvs\.ch"/>
67-
<exclusion pattern="www\.mobile\.gkb.ch"/>
67+
<exclusion pattern="www\.mobile\.gkb\.ch"/>
6868
<exclusion pattern="mobile\.(?:glkb|nkb|owkb|sgkb|tkb)\.ch"/>
6969
<exclusion pattern="(?:wap|ina)\.lukb\.ch"/>
7070
<exclusion pattern="galerie\.sgkb\.ch"/>

utils/trivial-validate.py

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def fail(s):
4242
duplicate_allowed_list = [x.rstrip('\n') for x in duplicate_fh.readlines()]
4343

4444

45-
def test_bad_regexp(tree, filename, from_attrib, to):
45+
def test_bad_regexp(tree, rulename, from_attrib, to):
4646
# Rules with invalid regular expressions.
4747
"""The 'from' rule contains an invalid extended regular expression."""
4848
patterns = from_attrib + xpath_exlusion_pattern(tree) + xpath_cookie_pattern(tree)
@@ -53,33 +53,46 @@ def test_bad_regexp(tree, filename, from_attrib, to):
5353
return False
5454
return True
5555

56-
def test_unescaped_dots(tree, filename, from_attrib, to):
56+
def unescaped_dot(s):
57+
escaped = False
58+
bracketed = False
59+
for c in s:
60+
if c == "\\":
61+
escaped = not escaped
62+
elif not escaped and c == "[":
63+
bracketed = True
64+
elif not escaped and c == "]":
65+
bracketed = False
66+
elif not escaped and not bracketed and c == ".":
67+
return True
68+
elif not bracketed and c == "/":
69+
break
70+
else:
71+
escaped = False
72+
return False
73+
74+
def test_unescaped_dots(tree, rulename, from_attrib, to):
5775
# Rules containing unescaped dots outside of brackets and before slash.
5876
# Note: this is meant to require example\.com instead of example.com,
5977
# but it also forbids things like .* which usually ought to be replaced
6078
# with something like ([^/:@\.]+)
6179
"""The 'from' rule contains unescaped period in regular expression. Try escaping it with a backslash."""
6280
for f in from_attrib:
63-
escaped = False
64-
bracketed = False
6581
s = re.sub("^\^https?://", "", f)
66-
for c in s:
67-
if c == "\\":
68-
escaped = not escaped
69-
elif not escaped and c == "[":
70-
bracketed = True
71-
elif not escaped and c == "]":
72-
bracketed = False
73-
elif not escaped and not bracketed and c == ".":
74-
return False
75-
elif not bracketed and c == "/":
76-
break
77-
else:
78-
escaped = False
82+
if unescaped_dot(s):
83+
return False
84+
return True
85+
86+
def test_unescaped_dots_in_exclusion(tree, rulename, from_attrib, to):
87+
"""The 'exclusion' tag contains unescaped period in regular expression. Try escaping it with a backslash."""
88+
pattern_attrib = etree.XPath("/ruleset/exclusion/@pattern")(tree)
89+
for f in pattern_attrib:
90+
if unescaped_dot(f):
91+
return False
7992
return True
8093

8194
xpath_rule = etree.XPath("/ruleset/rule")
82-
def test_unencrypted_to(tree, filename, from_attrib, to):
95+
def test_unencrypted_to(tree, rulename, from_attrib, to):
8396
# Rules that redirect to something other than https or http.
8497
# This used to test for http: but testing for lack of https: will
8598
# catch more kinds of mistakes.
@@ -91,19 +104,19 @@ def test_unencrypted_to(tree, filename, from_attrib, to):
91104
if to[:6] != "https:" and to[:5] != "http:":
92105
return False
93106
elif to[:5] == "http:" and downgrade:
94-
if filename in downgrade_allowed_list:
95-
warn("whitelisted downgrade rule in %s redirects to http." % filename)
107+
if rulename in downgrade_allowed_list:
108+
warn("whitelisted downgrade rule in %s redirects to http." % rulename)
96109
else:
97-
fail("non-whitelisted downgrade rule in %s redirects to http." % filename)
110+
fail("non-whitelisted downgrade rule in %s redirects to http." % rulename)
98111
return False
99112
elif to[:5] == "http:":
100-
fail("non-downgrade rule in %s redirects to http." % filename)
113+
fail("non-downgrade rule in %s redirects to http." % rulename)
101114
return False
102115
return True
103116

104117
printable_characters = set(map(chr, list(range(32, 127))))
105118

106-
def test_non_ascii(tree, filename, from_attrib, to):
119+
def test_non_ascii(tree, rulename, from_attrib, to):
107120
# Rules containing non-printable characters.
108121
"""Rule contains non-printable character in 'to' pattern."""
109122
for t in to:
@@ -135,6 +148,7 @@ def nomes_all(where=sys.argv[1:]):
135148
tests = [
136149
test_bad_regexp,
137150
test_unescaped_dots,
151+
test_unescaped_dots_in_exclusion,
138152
test_unencrypted_to,
139153
test_non_ascii
140154
]
@@ -157,18 +171,18 @@ def nomes_all(where=sys.argv[1:]):
157171
except Exception as oops:
158172
failure = 1
159173
print("failed XML validity: %s\n" % (oops))
160-
if failure or not xpath_ruleset(tree):
174+
if not xpath_ruleset(tree):
161175
continue
162176
rn = xpath_ruleset_name(tree)[0]
163177
if not rn:
164178
failure = 1
165179
fail("unnamed ruleset")
166180
continue
167-
rf = xpath_ruleset_name(tree)[0]
181+
rf = xpath_ruleset_file(tree)[0]
168182
from_attrib = xpath_from(tree)
169183
to = xpath_to(tree)
170184
for test in tests:
171-
if not test(tree, rf, from_attrib=from_attrib, to=to):
185+
if not test(tree, rn, from_attrib=from_attrib, to=to):
172186
failure = 1
173187
fail("%s failed test: %s" % (rf, test.__doc__))
174188

0 commit comments

Comments
 (0)