#!/usr/bin/env python2.7 # # Builds a JSON DB containing all the rulesets, indexed by target. # The JSON DB is used by the Firefox addon. # import glob import locale import json import os import re import sqlite3 import subprocess import sys import collections from lxml import etree # Explicitly set locale so sorting order for filenames is consistent. # This is important for deterministic builds. # https://trac.torproject.org/projects/tor/ticket/11630#comment:20 # It's also helpful to ensure consistency for the lowercase check below. locale.setlocale(locale.LC_ALL, 'C') json_path = os.path.join(os.path.dirname(__file__), '../pkg/rulesets.json') json_output = { "rulesetStrings": [], "targets": collections.defaultdict(list) } parser = etree.XMLParser(remove_blank_text=True) # Precompile xpath expressions that get run repeatedly. xpath_host = etree.XPath("/ruleset/target/@host") xpath_ruleset = etree.XPath("/ruleset") # Sort filenames so output is deterministic. filenames = sorted(glob.glob('src/chrome/content/rules/*')) counted_lowercase_names = collections.Counter([name.lower() for name in filenames]) most_common_entry = counted_lowercase_names.most_common(1)[0] if most_common_entry[1] > 1: dupe_filename = re.compile(re.escape(most_common_entry[0]), re.IGNORECASE) print("%s failed case-insensitivity testing." % filter(dupe_filename.match, filenames)) print("Rules exist with identical case-insensitive names, which breaks some filesystems.") sys.exit(1) for fi in filenames: if fi.endswith('/00README') or fi.endswith('/make-trivial-rule') or fi.endswith('/default.rulesets'): continue if " " in fi: print("%s failed validity: Rule filenames cannot contain spaces" % (fi)) sys.exit(1) if not fi.endswith('.xml'): print("%s failed validity: Rule filenames must end in .xml" % (fi)) sys.exit(1) try: tree = etree.parse(fi, parser) except Exception as oops: print("%s failed XML validity: %s\n" % (fi, oops)) sys.exit(1) # Remove comments to save space. etree.strip_tags(tree, etree.Comment) targets = xpath_host(tree) if not targets: print('File %s has no targets' % fi) sys.exit(1) # Strip out the target tags. These aren't necessary in the DB because # targets are looked up in the target table, which has a foreign key # pointing into the ruleset table. etree.strip_tags(tree, 'target') etree.strip_tags(tree, 'test') # Store the filename in the `f' attribute so "view source XML" for rules in # FF version can find it. xpath_ruleset(tree)[0].attrib["f"] = os.path.basename(fi).decode(encoding="UTF-8") for target in targets: # id is the current length of the rules list - i.e. the offset at which # this rule will be added in the list. json_output["targets"][target].append(len(json_output["rulesetStrings"])) json_output["rulesetStrings"].append(etree.tostring(tree)) with open(json_path, 'w') as f: f.write(json.dumps(json_output))