Skip to content

Commit 32ca6c6

Browse files
committed
Move some validations into the RELAX NG grammar.
1 parent bdc0977 commit 32ca6c6

File tree

2 files changed

+20
-61
lines changed

2 files changed

+20
-61
lines changed

utils/relaxng.xml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,21 @@
4444

4545
<oneOrMore>
4646
<element name="rule">
47-
<attribute name="from"/>
47+
<attribute name="from">
48+
<data type="string">
49+
<!-- The from attribute of a rule must start with a caret.
50+
It also must contain two slashes (separating the protocol from
51+
the hostname), and at least one other slash (terminating the
52+
hostname, and possible beginning a path). -->
53+
<param name="pattern">\^.*//.*/.*</param>
54+
</data>
55+
</attribute>
4856
<attribute name="to">
4957
<data type="string">
50-
<param name="pattern">https?://.*</param>
58+
<!-- The to attribute of a rule must not contain spaces or
59+
backslashes. It also must contain at least one slash after the
60+
hostname. -->
61+
<param name="pattern">https?://[^ \\]*/[^ \\]*</param>
5162
</data>
5263
</attribute>
5364
<optional>

utils/trivial-validate.py

Lines changed: 7 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,6 @@ def warn(s):
3535
def fail(s):
3636
sys.stdout.write("failure: %s\n" % s)
3737

38-
def test_not_anchored(tree, filename, from_attrib, to):
39-
# Rules not anchored to the beginning of a line.
40-
"""The 'from' rule is not anchored to beginning of line using the ^ symbol."""
41-
for f in from_attrib:
42-
if not f or f[0] != "^":
43-
return False
44-
return True
45-
4638
# Precompile xpath expressions that get run repeatedly.
4739
xpath_exlusion_pattern = etree.XPath("/ruleset/exclusion/@pattern")
4840
xpath_cookie_pattern = etree.XPath("/ruleset/securecookie/@host")
@@ -58,19 +50,6 @@ def test_bad_regexp(tree, filename, from_attrib, to):
5850
return False
5951
return True
6052

61-
xpath_rule = etree.XPath("/ruleset/rule")
62-
def test_missing_to(tree, filename, from_attrib, to):
63-
# Rules that are terminated before setting 'to'.
64-
# These cases are probably either due to a misplaced
65-
# rule end or intended to be different elements.
66-
"""Rule is missing a 'to' value."""
67-
for rule in xpath_rule(tree):
68-
if not rule.get("to"):
69-
warn("'to' attribute missing in %s. " % filename)
70-
warn("Misplaced end or misnamed element?")
71-
return False
72-
return True
73-
7453
def test_unescaped_dots(tree, filename, from_attrib, to):
7554
# Rules containing unescaped dots outside of brackets and before slash.
7655
# Note: this is meant to require example\.com instead of example.com,
@@ -96,14 +75,7 @@ def test_unescaped_dots(tree, filename, from_attrib, to):
9675
escaped = False
9776
return True
9877

99-
def test_space_in_to(tree, filename, from_attrib, to):
100-
# Rules where the to pattern contains a space.
101-
"""The 'to' rule contains a space."""
102-
for t in to:
103-
if ' ' in t:
104-
return False
105-
return True
106-
78+
xpath_rule = etree.XPath("/ruleset/rule")
10779
def test_unencrypted_to(tree, filename, from_attrib, to):
10880
# Rules that redirect to something other than https or http.
10981
# This used to test for http: but testing for lack of https: will
@@ -122,27 +94,6 @@ def test_unencrypted_to(tree, filename, from_attrib, to):
12294
return False
12395
return True
12496

125-
def test_backslash_in_to(tree, filename, from_attrib, to):
126-
# Rules containing backslashes in to pattern.
127-
"""The 'to' rule contains a backslash."""
128-
for t in to:
129-
if '\\' in t:
130-
return False
131-
return True
132-
133-
RE_TRAILING_SLASH = re.compile("//.*/")
134-
135-
def test_no_trailing_slash(tree, filename, from_attrib, to):
136-
# Rules not containing trailing slash in from or to pattern.
137-
"""Rule omits forward slash after host name."""
138-
for r in xpath_rule(tree):
139-
f, t = r.get("from"), r.get("to")
140-
if not RE_TRAILING_SLASH.search(f):
141-
return False
142-
if not RE_TRAILING_SLASH.search(t):
143-
return False
144-
return True
145-
14697
printable_characters = set(map(chr, list(range(32, 127))))
14798

14899
def test_non_ascii(tree, filename, from_attrib, to):
@@ -174,15 +125,12 @@ def nomes_all(where=sys.argv[1:]):
174125
for filename in f:
175126
yield os.path.join(r, filename)
176127

177-
tests = [test_not_anchored,
178-
test_bad_regexp,
179-
test_unescaped_dots,
180-
test_missing_to,
181-
test_space_in_to,
182-
test_unencrypted_to,
183-
test_backslash_in_to,
184-
test_no_trailing_slash,
185-
test_non_ascii]
128+
tests = [
129+
test_bad_regexp,
130+
test_unescaped_dots,
131+
test_unencrypted_to,
132+
test_non_ascii
133+
]
186134

187135
failure = 0
188136
seen_file = False

0 commit comments

Comments
 (0)