11#!/usr/bin/env python2.7
22
33import argparse
4- import sys , re , os
4+ import os
5+ import re
56import sqlite3
7+ import sys
68
7- try :
8- from lxml import etree
9- except ImportError :
10- sys .stderr .write ("** Could not import lxml! Rule validation SKIPPED.\n " )
11- sys .stderr .write ("** Caution: A resulting build MAY CONTAIN INVALID RULES.\n " )
12- sys .stderr .write ("** Please install libxml2 and lxml to permit validation!\n " )
13- sys .exit (0 )
9+ from lxml import etree
1410
1511parser = argparse .ArgumentParser (
1612 formatter_class = argparse .RawDescriptionHelpFormatter ,
1915 default = "" ,
2016 help = "Ignore entries."
2117 )
22- parser .add_argument ('--dupdir' , type = str , nargs = "*" ,
23- default = "" ,
24- help = "Duplicate directory."
25- )
2618parser .add_argument ('--quiet' , action = "store_true" ,
2719 default = False , help = "Suppress debug output."
2820 )
29- parser .add_argument ('ruleset' , metavar = 'XML directory' , type = str , nargs = "*" ,
30- default = "src/chrome/content/rules" ,
31- help = 'Directory of XML files to validate.' )
21+ parser .add_argument ('--db' , type = str , nargs = "*" ,
22+ default = os .path .join (os .path .dirname (__file__ ),
23+ "../src/defaults/rulesets.sqlite" ),
24+ help = 'SQLite db with rules' )
3225
3326args = parser .parse_args ()
3427
3528ignoredups = [re .compile (val ) for val in args .ignoredups ]
36- dupdir = [val for val in args .dupdir ]
3729quiet = args .quiet
3830
3931def warn (s ):
40- if not quiet : sys .stdout .write ("warning: %s\n " % s )
32+ if not quiet :
33+ sys .stdout .write ("warning: %s\n " % s )
4134
4235def fail (s ):
4336 sys .stdout .write ("failure: %s\n " % s )
4437
45- def test_not_anchored (tree , fi ):
38+ def test_not_anchored (tree , fi , host , fm , to ):
4639 # Rules not anchored to the beginning of a line.
4740 """The 'from' rule is not anchored to beginning of line using the ^ symbol."""
48- for f in tree . xpath ( "/ruleset/rule/@from" ) :
41+ for f in fm :
4942 if not f or f [0 ] != "^" :
5043 return False
5144 return True
5245
53- def test_bad_regexp (tree , fi ):
46+ # Precompile xpath expressions that get run repeatedly.
47+ xpath_exlusion_pattern = etree .XPath ("/ruleset/exclusion/@pattern" )
48+ xpath_cookie_pattern = etree .XPath ("/ruleset/securecookie/@host" )
49+
50+ def test_bad_regexp (tree , fi , host , fm , to ):
5451 # Rules with invalid regular expressions.
5552 """The 'from' rule contains an invalid extended regular expression."""
56- for f in tree .xpath ("/ruleset/rule/@from" ) + \
57- tree .xpath ("/ruleset/exclusion/@pattern" ) + \
58- tree .xpath ("/ruleset/securecookie/@host" ):
53+ patterns = fm + xpath_exlusion_pattern (tree ) + xpath_cookie_pattern (tree )
54+ for pat in patterns :
5955 try :
60- re .compile (f )
56+ re .compile (pat )
6157 except :
6258 return False
6359 return True
6460
65- def test_missing_to (tree , fi ):
61+ xpath_rule = etree .XPath ("/ruleset/rule" )
62+ def test_missing_to (tree , fi , host , fm , to ):
6663 # Rules that are terminated before setting 'to'.
6764 # These cases are probably either due to a misplaced
6865 # rule end or intended to be different elements.
6966 """Rule is missing a 'to' value."""
70- for rule in tree . xpath ( "/ruleset/rule" ):
67+ for rule in xpath_rule ( tree ):
7168 if not rule .get ("to" ):
7269 warn ("'to' attribute missing in %s. " % fi )
7370 warn ("Misplaced end or misnamed element?" )
7471 return False
7572 return True
7673
77- def test_unescaped_dots (tree , fi ):
74+ def test_unescaped_dots (tree , fi , host , fm , to ):
7875 # Rules containing unescaped dots outside of brackets and before slash.
7976 # Note: this is meant to require example\.com instead of example.com,
8077 # but it also forbids things like .* which usually ought to be replaced
8178 # with something like ([^/:@\.]+)
8279 """The 'from' rule contains unescaped period in regular expression. Try escaping it with a backslash."""
83- for f in tree . xpath ( "/ruleset/rule/@from" ) :
80+ for f in fm :
8481 escaped = False
8582 bracketed = False
8683 s = re .sub ("^\^https?://" , "" , f )
@@ -99,22 +96,22 @@ def test_unescaped_dots(tree, fi):
9996 escaped = False
10097 return True
10198
102- def test_space_in_to (tree , fi ):
99+ def test_space_in_to (tree , fi , host , fm , to ):
103100 # Rules where the to pattern contains a space.
104101 """The 'to' rule contains a space."""
105- for t in tree . xpath ( "/ruleset/rule/@to" ) :
102+ for t in to :
106103 if ' ' in t :
107104 return False
108105 return True
109106
110- def test_unencrypted_to (tree , fi ):
107+ def test_unencrypted_to (tree , fi , host , fm , to ):
111108 # Rules that redirect to something other than https or http.
112109 # This used to test for http: but testing for lack of https: will
113110 # catch more kinds of mistakes.
114111 # Now warn if the rule author indicates they intended it, with the
115112 # downgrade attribute. Error if this attribute is not present.
116113 """Rule redirects to something other than https."""
117- for rule in tree . xpath ( "/ruleset/rule" ):
114+ for rule in xpath_rule ( tree ):
118115 to , downgrade = rule .get ("to" ), rule .get ("downgrade" )
119116 if to [:6 ] != "https:" and to [:5 ] != "http:" :
120117 return False
@@ -125,82 +122,58 @@ def test_unencrypted_to(tree, fi):
125122 return False
126123 return True
127124
128- def test_backslash_in_to (tree , fi ):
125+ def test_backslash_in_to (tree , fi , host , fm , to ):
129126 # Rules containing backslashes in to pattern.
130127 """The 'to' rule contains a backslash."""
131- for t in tree . xpath ( "/ruleset/rule/@to" ) :
128+ for t in to :
132129 if '\\ ' in t :
133130 return False
134131 return True
135132
136- def test_no_trailing_slash (tree , fi ):
133+ RE_TRAILING_SLASH = re .compile ("//.*/" )
134+
135+ def test_no_trailing_slash (tree , fi , host , fm , to ):
137136 # Rules not containing trailing slash in from or to pattern.
138137 """Rule omits forward slash after host name."""
139- for r in tree . xpath ( "/ruleset/rule" ):
138+ for r in xpath_rule ( tree ):
140139 f , t = r .get ("from" ), r .get ("to" )
141- if not re .search ("//.*/" , f ):
140+ if not RE_TRAILING_SLASH .search (f ):
142141 return False
143- if not re .search ("//.*/" , t ):
142+ if not RE_TRAILING_SLASH .search (t ):
144143 return False
145144 return True
146145
147- def test_lacks_target_host (tree , fi ):
146+ def test_lacks_target_host (tree , fi , host , fm , to ):
148147 # Rules that lack at least one target host (target tag with host attr).
149148 """Rule fails to specify at least one target host."""
150- return not not tree . xpath ( "/ruleset/target/@ host" )
149+ return not not host
151150
152- def test_bad_target_host (tree , fi ):
151+ def test_bad_target_host (tree , fi , host , fm , to ):
153152 # Rules where a target host contains multiple wildcards or a slash.
154153 """The target host must be a hostname, not URL, and must use at most one wildcard."""
155- for target in tree . xpath ( "/ruleset/target/@ host" ) :
154+ for target in host :
156155 if "/" in target :
157156 return False
158157 if target .count ("*" ) > 1 :
159158 return False
160159 return True
161160
162- def test_duplicated_target_host (tree , fi ):
161+ def test_duplicated_target_host (tree , fi , host , fm , to ):
163162 # Rules where a single target host appears more than once.
164163 """Rule contains the same target host more than once."""
165- targets = tree .xpath ("/ruleset/target/@host" )
166- return len (set (targets )) == len (targets )
164+ return len (set (host )) == len (host )
167165
168166printable_characters = set (map (chr , list (range (32 , 127 ))))
169167
170- def test_non_ascii (tree , fi ):
168+ def test_non_ascii (tree , fi , host , fm , to ):
171169 # Rules containing non-printable characters.
172170 """Rule contains non-printable character in 'to' pattern."""
173- for t in tree . xpath ( "/ruleset/rule/@to" ) :
171+ for t in to :
174172 for c in t :
175173 if c not in printable_characters :
176174 return False
177175 return True
178176
179- def test_ruleset_name (tree ):
180- """Rule has name"""
181- if tree .xpath ("/ruleset/@name" ):
182- return True
183- else :
184- return False
185-
186- def get_all_names_and_targets (ds ):
187- """extract unique names and targets from a list of dirs of xml files"""
188- names = set ()
189- targets = set ()
190- for d in ds :
191- for fi in os .listdir (d ):
192- fi = os .path .join (d , fi )
193- try :
194- tree = etree .parse (fi )
195- ruleset_name = tree .xpath ("/ruleset/@name" )[0 ]
196- target_names = tree .xpath ("/ruleset/target/@host" )
197- except Exception :
198- continue
199- names .add (ruleset_name )
200- for target in target_names :
201- targets .add (target )
202- return names , targets
203-
204177def nomes_all (where = sys .argv [1 :]):
205178 """Returns generator to extract all files from a list of files/dirs"""
206179 if not where : where = ['.' ]
@@ -212,34 +185,53 @@ def nomes_all(where=sys.argv[1:]):
212185 for fi in f :
213186 yield os .path .join (r , fi )
214187
215- tests = [test_not_anchored , test_bad_regexp , test_unescaped_dots , test_missing_to ,
216- test_space_in_to , test_unencrypted_to , test_backslash_in_to ,
217- test_no_trailing_slash , test_bad_target_host ,
218- test_duplicated_target_host , test_non_ascii ]
188+ tests = [test_not_anchored ,
189+ test_bad_regexp ,
190+ test_unescaped_dots ,
191+ test_missing_to ,
192+ test_space_in_to ,
193+ test_unencrypted_to ,
194+ test_backslash_in_to ,
195+ test_no_trailing_slash ,
196+ test_bad_target_host ,
197+ test_duplicated_target_host ,
198+ test_non_ascii ]
219199
220200failure = 0
221201seen_file = False
222202
223- conn = sqlite3 .connect (os .path .join (os .path .dirname (__file__ ), '../src/defaults/rulesets.sqlite' ))
203+ xpath_ruleset = etree .XPath ("/ruleset" )
204+ xpath_ruleset_name = etree .XPath ("/ruleset/@name" )
205+ xpath_ruleset_file = etree .XPath ("/ruleset/@f" )
206+ xpath_host = etree .XPath ("/ruleset/target/@host" )
207+ xpath_from = etree .XPath ("/ruleset/rule/@from" )
208+ xpath_to = etree .XPath ("/ruleset/rule/@to" )
209+
210+ print args .db
211+ conn = sqlite3 .connect (args .db )
224212c = conn .cursor ()
225213for row in c .execute ('''SELECT contents from rulesets''' ):
226214 try :
227215 tree = etree .fromstring (row [0 ])
228216 except Exception as oops :
229217 failure = 1
230218 print ("failed XML validity: %s\n " % (oops ))
231- if failure or not tree . xpath ( "/ruleset" ):
219+ if failure or not xpath_ruleset ( tree ):
232220 continue
233- if not test_ruleset_name (tree ):
221+ rn = xpath_ruleset_name (tree )[0 ]
222+ if not rn :
234223 failure = 1
235224 fail ("unnamed ruleset" )
236225 continue
237- ruleset_name = tree .xpath ("/ruleset/@name" )[0 ]
238- ruleset_file = tree .xpath ("/ruleset/@f" )[0 ]
226+ rf = xpath_ruleset_name (tree )[0 ]
227+ host = xpath_host (tree )
228+ fm = xpath_from (tree )
229+ to = xpath_to (tree )
239230 for test in tests :
240- if not test (tree , ruleset_file ):
241- print ("%s failed test: %s" % (ruleset_file , test .__doc__ ))
242- for target in tree .xpath ("/ruleset/target/@host" ):
231+ if not test (tree , rf , host = host , fm = fm , to = to ):
232+ print ("%s failed test: %s" % (rf , test .__doc__ ))
233+ for target in xpath_host (tree ):
234+ print target
243235 if target in all_targets and not any (ign .search (target ) for ign in ignoredups ):
244236 # suppress warning about duplicate targets if an --ignoredups
245237 # pattern matches target
0 commit comments