Skip to content

Commit b6604b1

Browse files
committed
Merge single and multi file trivial validation
1. Add option --dupdir to specify where to check for duplicate rulesets if this is not among the rulesets being validated. 2. Remove distinction between single and multi file validation. 3. Error handling for non-ruleset xml files and rulesets that are not named.
1 parent a606e38 commit b6604b1

File tree

1 file changed

+46
-63
lines changed

1 file changed

+46
-63
lines changed

utils/trivial-validate.py

Lines changed: 46 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,13 @@
1010
sys.stderr.write("** Please install libxml2 and lxml to permit validation!\n")
1111
sys.exit(0)
1212

13-
longargs, args = getopt.gnu_getopt(sys.argv[1:], "", ["ignoredups="])
13+
longargs, args = getopt.gnu_getopt(sys.argv[1:], "", ["ignoredups=", "dupdir="])
1414

1515
ignoredups = [re.compile(val) for opt, val in longargs if opt == "--ignoredups"]
16+
dupdir = [val for opt, val in longargs if opt == "--dupdir"]
1617

1718
multi_file_validate = True
1819

19-
if args:
20-
if os.path.isfile(args[0]):
21-
multi_file_validate = False
22-
else:
23-
try:
24-
os.chdir(args[0])
25-
except:
26-
sys.stderr.write("could not chdir to %s\n" % args[0])
27-
sys.stderr.write("usage: %s (<directoryname> or <file to validate><directory of all rulesets (optional)>\n" % sys.argv[0])
28-
sys.exit(2)
29-
3020
def test_not_anchored(tree):
3121
# Rules not anchored to the beginning of a line.
3222
"""The 'from' rule is not anchored to beginning of line using the ^ symbol."""
@@ -164,23 +154,33 @@ def test_non_ascii(tree):
164154
return False
165155
return True
166156

167-
def get_all_names_and_targets(d):
157+
def test_ruleset_name(tree):
158+
"""Rule has name"""
159+
if tree.xpath("/ruleset/@name"):
160+
return True
161+
else:
162+
return False
163+
164+
def get_all_names_and_targets(ds):
165+
"""extract unique names and targets from a list of dirs of xml files"""
168166
names = set()
169167
targets = set()
170-
for fi in os.listdir(d):
171-
try:
172-
tree = etree.parse(fi)
173-
ruleset_name = tree.xpath("/ruleset/@name")[0]
174-
target_names = tree.xpath("/ruleset/target/@host")
175-
except Exception:
176-
continue
177-
names.add(ruleset_name)
178-
for target in target_names:
179-
targets.add(target)
168+
for d in ds:
169+
for fi in os.listdir(d):
170+
try:
171+
tree = etree.parse(fi)
172+
ruleset_name = tree.xpath("/ruleset/@name")[0]
173+
target_names = tree.xpath("/ruleset/target/@host")
174+
except Exception:
175+
continue
176+
names.add(ruleset_name)
177+
for target in target_names:
178+
targets.add(target)
180179
return names, targets
181180

182181
def nomes(where=sys.argv[1:]):
183-
"""Returns generator to extract files from a list of files / directories"""
182+
"""Returns generator to extract files from a list of files / directories:
183+
Note that this only works to depth 1 in directories"""
184184
# TODO: extract files recursively to a certain depth?
185185
orig = os.getcwd()
186186
if not where: where=["."]
@@ -193,66 +193,48 @@ def nomes(where=sys.argv[1:]):
193193
elif os.path.isfile(i):
194194
yield open(i)
195195

196+
def nomes_all(where=sys.argv[1:]):
197+
"""Returns generator to extract all files from a list of files/dirs"""
198+
if not where: where=['.']
199+
for i in where:
200+
for r, d, f in os.walk(i):
201+
for fi in map(lambda x: '/'.join([r, x]), f):
202+
yield fi
203+
196204
tests = [test_not_anchored, test_bad_regexp, test_unescaped_dots, test_missing_to,
197205
test_space_in_to, test_unencrypted_to, test_backslash_in_to,
198206
test_no_trailing_slash, test_lacks_target_host, test_bad_target_host,
199207
test_duplicated_target_host, test_non_ascii]
200208

201209
failure = 0
202210
seen_file = False
203-
all_targets = set()
204-
all_names = set()
211+
all_names, all_targets = get_all_names_and_targets(dupdir)
205212

206-
if multi_file_validate:
207-
for fi in nomes():
208-
try:
209-
tree = etree.parse(fi)
210-
if fi[-4:] != ".xml":
211-
if tree.xpath("/ruleset"):
212-
sys.stdout.write("warning: ruleset in file without .xml extension: %s\n" % fi)
213-
else:
214-
continue
215-
seen_file = True
216-
except Exception as oops:
217-
if fi[-4:] != ".xml":
218-
continue
219-
failure = 1
220-
sys.stdout.write("%s failed XML validity: %s\n" % (fi, oops))
221-
ruleset_name = tree.xpath("/ruleset/@name")[0]
222-
if ruleset_name in all_names:
223-
failure = 1
224-
sys.stdout.write("failure: duplicate ruleset name %s\n" % ruleset_name)
225-
all_names.add(ruleset_name)
226-
for test in tests:
227-
if not test(tree):
228-
failure = 1
229-
sys.stdout.write("failure: %s failed test: %s\n" % (fi, test.__doc__))
230-
for target in tree.xpath("/ruleset/target/@host"):
231-
if target in all_targets and not any(ign.search(target) for ign in ignoredups):
232-
# suppress warning about duplicate targets if an --ignoredups
233-
# pattern matches target
234-
sys.stdout.write("warning: duplicate target: %s\n" % target)
235-
all_targets.add(target)
236-
fi.close()
237-
else:
238-
fi = os.path.basename(args[0])
239-
if len(args) > 1:
240-
all_names, all_targets = get_all_names_and_targets(args[1])
241-
else:
242-
sys.stdout.write("warning: pass a directory of existing rulesets as the second argument to check for duplicates \n")
213+
for fi in nomes_all():
243214
try:
244215
tree = etree.parse(fi)
245216
if fi[-4:] != ".xml":
246217
if tree.xpath("/ruleset"):
247218
sys.stdout.write("warning: ruleset in file without .xml extension: %s\n" % fi)
219+
else:
220+
continue
248221
seen_file = True
249222
except Exception as oops:
223+
if fi[-4:] != ".xml":
224+
continue
250225
failure = 1
251226
sys.stdout.write("%s failed XML validity: %s\n" % (fi, oops))
227+
if not tree.xpath("/ruleset"):
228+
continue
229+
if not test_ruleset_name(tree):
230+
failure = 1
231+
sys.stdout.write("failure: unnamed ruleset: %s\n" % fi)
232+
continue
252233
ruleset_name = tree.xpath("/ruleset/@name")[0]
253234
if ruleset_name in all_names:
254235
failure = 1
255236
sys.stdout.write("failure: duplicate ruleset name %s\n" % ruleset_name)
237+
all_names.add(ruleset_name)
256238
for test in tests:
257239
if not test(tree):
258240
failure = 1
@@ -262,6 +244,7 @@ def nomes(where=sys.argv[1:]):
262244
# suppress warning about duplicate targets if an --ignoredups
263245
# pattern matches target
264246
sys.stdout.write("warning: duplicate target: %s\n" % target)
247+
all_targets.add(target)
265248

266249
if not seen_file:
267250
which = "specified" if args else "current"

0 commit comments

Comments
 (0)