Skip to content

Commit 4c05c9e

Browse files
committed
Merge branch 'fix-ruleset-loading' of https://github.com/cschanaj/https-everywhere
2 parents 8362250 + d437c69 commit 4c05c9e

3 files changed

Lines changed: 174 additions & 100 deletions

File tree

chromium/background.js

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"use strict";
22
/**
3-
* Fetch and parse XML to be loaded as RuleSets.
3+
* Load a file packaged with the extension
44
*
5-
* @param url: a relative URL to local XML
5+
* @param url: a relative URL to local file
66
*/
77
function loadExtensionFile(url, returnType) {
88
var xhr = new XMLHttpRequest();
@@ -17,6 +17,9 @@ function loadExtensionFile(url, returnType) {
1717
if (returnType === 'xml') {
1818
return xhr.responseXML;
1919
}
20+
if (returnType === 'json') {
21+
return JSON.parse(xhr.responseText);
22+
}
2023
return xhr.responseText;
2124
}
2225

@@ -34,7 +37,7 @@ all_rules = new RuleSets(ls);
3437
var enableMixedRulesets = false;
3538
storage.get({enableMixedRulesets: false}, function(item) {
3639
enableMixedRulesets = item.enableMixedRulesets;
37-
all_rules.addFromXml(loadExtensionFile('rules/default.rulesets', 'xml'));
40+
all_rules.addFromJson(loadExtensionFile('rules/default.rulesets', 'json'));
3841
});
3942

4043
// Load in the legacy custom rulesets, if any
@@ -649,7 +652,7 @@ async function import_settings(settings) {
649652
}
650653

651654
all_rules = new RuleSets(ls);
652-
all_rules.addFromXml(loadExtensionFile('rules/default.rulesets', 'xml'));
655+
all_rules.addFromJson(loadExtensionFile('rules/default.rulesets', 'json'));
653656

654657
// Load custom rulesets
655658
load_legacy_custom_rulesets(settings.custom_rulesets);

chromium/rules.js

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,92 @@ RuleSets.prototype = {
196196
var sets = ruleXml.getElementsByTagName("ruleset");
197197
for (let s of sets) {
198198
try {
199-
this.parseOneRuleset(s);
199+
this.parseOneXmlRuleset(s);
200200
} catch (e) {
201201
log(WARN, 'Error processing ruleset:' + e);
202202
}
203203
}
204204
},
205205

206+
addFromJson: function(ruleJson) {
207+
for (let ruleset of ruleJson) {
208+
try {
209+
this.parseOneJsonRuleset(ruleset);
210+
} catch(e) {
211+
log(WARN, 'Error processing ruleset:' + e);
212+
}
213+
}
214+
},
215+
216+
parseOneJsonRuleset: function(ruletag) {
217+
var default_state = true;
218+
var note = "";
219+
var default_off = ruletag["default_off"];
220+
if (default_off) {
221+
default_state = false;
222+
note += default_off + "\n";
223+
}
224+
225+
// If a ruleset declares a platform, and we don't match it, treat it as
226+
// off-by-default. In practice, this excludes "mixedcontent" & "cacert" rules.
227+
var platform = ruletag["platform"]
228+
if (platform) {
229+
default_state = false;
230+
if (platform == "mixedcontent" && enableMixedRulesets) {
231+
default_state = true;
232+
}
233+
note += "Platform(s): " + platform + "\n";
234+
}
235+
236+
var rule_set = new RuleSet(ruletag["name"], default_state, note.trim());
237+
238+
// Read user prefs
239+
if (rule_set.name in this.ruleActiveStates) {
240+
rule_set.active = (this.ruleActiveStates[rule_set.name] == "true");
241+
}
242+
243+
var rules = ruletag["rule"];
244+
for (let rule of rules) {
245+
if (rule["from"] != null && rule["to"] != null) {
246+
rule_set.rules.push(new Rule(rule["from"], rule["to"]));
247+
}
248+
}
249+
250+
var exclusions = ruletag["exclusion"];
251+
if (exclusions != null) {
252+
for (let exclusion of exclusions) {
253+
if (exclusion != null) {
254+
if (!rule_set.exclusions) {
255+
rule_set.exclusions = [];
256+
}
257+
rule_set.exclusions.push(new Exclusion(exclusion));
258+
}
259+
}
260+
}
261+
262+
var cookierules = ruletag["securecookie"];
263+
if (cookierules != null) {
264+
for (let cookierule of cookierules) {
265+
if (cookierule["host"] != null && cookierule["name"] != null) {
266+
if (!rule_set.cookierules) {
267+
rule_set.cookierules = [];
268+
}
269+
rule_set.cookierules.push(new CookieRule(cookierule["host"], cookierule["name"]));
270+
}
271+
}
272+
}
273+
274+
var targets = ruletag["target"];
275+
for (let target of targets) {
276+
if (target != null) {
277+
if (!this.targets.has(target)) {
278+
this.targets.set(target, []);
279+
}
280+
this.targets.get(target).push(rule_set);
281+
}
282+
}
283+
},
284+
206285
/**
207286
* Load a user rule
208287
* @param params
@@ -253,7 +332,7 @@ RuleSets.prototype = {
253332
* Does the loading of a ruleset.
254333
* @param ruletag The whole <ruleset> tag to parse
255334
*/
256-
parseOneRuleset: function(ruletag) {
335+
parseOneXmlRuleset: function(ruletag) {
257336
var default_state = true;
258337
var note = "";
259338
var default_off = ruletag.getAttribute("default_off");

utils/merge-rulesets.py

Lines changed: 86 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,105 +1,97 @@
11
#!/usr/bin/env python2.7
22

33
# Merge all the .xml rulesets into a single "default.rulesets" file -- this
4-
# prevents inodes from wasting disk space, but more importantly, works around
5-
# the fact that zip does not perform well on a pile of small files.
4+
# prevents inodes from wasting disk space, but more importantly, this works
5+
# around the fact that zip does not perform well on a pile of small files.
66

7-
# currently a very literal translation of merge-rulesets.sh, but about five
8-
# times faster
9-
from __future__ import print_function
10-
pass
7+
# Currently, it merges rulesets into a JSON Object for minimal overhead,
8+
# in both storage and parsing speed.
9+
10+
import argparse
11+
import glob
12+
import json
1113
import os
12-
from glob import glob
13-
from subprocess import call
14+
import subprocess
1415
import sys
15-
import traceback
16-
import re
1716
import unicodedata
18-
import argparse
17+
import xml.etree.ElementTree
18+
19+
def normalize(f):
20+
"""
21+
OSX and Linux filesystems encode composite characters differently in
22+
filenames. We should normalize to NFC: http://unicode.org/reports/tr15/
23+
"""
24+
f = unicodedata.normalize("NFC", unicode(f, "utf-8")).encode("utf-8")
25+
return f
26+
27+
# commandline arguments parsing (nobody use it, though)
28+
parser = argparse.ArgumentParser(description="Merge rulesets")
29+
parser.add_argument("--source_dir", default="src/chrome/content/rules")
1930

20-
parser = argparse.ArgumentParser(description='Merge rulesets.')
21-
parser.add_argument('--source_dir', default='src/chrome/content/rules', help='source directory')
22-
parser.add_argument('--fast', help='fast merge', action='store_true')
2331
args = parser.parse_args()
2432

25-
def normalize(f):
26-
"""
27-
OSX and Linux filesystems encode composite characters differently in filenames.
28-
We should normalize to NFC: http://unicode.org/reports/tr15/.
29-
"""
30-
f = unicodedata.normalize('NFC', unicode(f, 'utf-8')).encode('utf-8')
31-
return f
32-
33-
rulesets_fn= args.source_dir + "/default.rulesets"
34-
xml_ruleset_files = map(normalize, glob(args.source_dir + "/*.xml"))
35-
36-
# cleanup after bugs :/
37-
misfile = rulesets_fn + "r"
38-
if os.path.exists(misfile):
39-
print("Cleaning up malformed rulesets file...")
40-
os.unlink(misfile)
41-
42-
if args.fast:
43-
library_compiled_time = os.path.getmtime(rulesets_fn)
44-
newest_xml = max([os.path.getmtime(f) for f in xml_ruleset_files])
45-
if library_compiled_time >= newest_xml:
46-
print("Library is newer that all rulesets, skipping rebuild...")
47-
sys.exit(0)
48-
49-
print("Creating ruleset library...")
50-
51-
# Under git bash, sed -i issues errors and sets the file "read only". Thanks.
52-
if os.path.isfile(rulesets_fn):
53-
os.system("chmod u+w " + rulesets_fn)
54-
55-
def rulesize():
56-
return len(open(rulesets_fn).read())
57-
58-
def clean_up(rulefile):
59-
"""Remove extra whitespace, comments and tests from a ruleset"""
60-
comment_and_newline_pattern = re.compile(r"<!--.*?-->|\n|\r", flags=re.DOTALL)
61-
rulefile = comment_and_newline_pattern.sub('', rulefile)
62-
to_and_from_pattern = re.compile(r'\s*(from=)')
63-
rulefile = to_and_from_pattern.sub(r' \1', rulefile)
64-
rulefile = re.sub(r'"\s*(to=)', r'" \1', rulefile)
65-
rulefile = re.sub(r">\s*<", r"><", rulefile)
66-
rulefile = re.sub(r"</ruleset>\s*", r"</ruleset>\n", rulefile)
67-
rulefile = re.sub(r"\s*(/>|<ruleset)", r"\1", rulefile)
68-
rulefile = re.sub(r"<test.+?/>", r"", rulefile)
69-
return rulefile
70-
71-
library = open(rulesets_fn,"w")
72-
73-
try:
74-
commit_id = os.environ["GIT_COMMIT_ID"]
75-
library.write('<rulesetlibrary gitcommitid="%s">' % commit_id)
76-
except:
77-
# Chromium
78-
library.write('<rulesetlibrary>')
79-
80-
# Include the filename.xml as the "f" attribute
81-
print("Removing whitespaces and comments...")
82-
83-
for rfile in sorted(xml_ruleset_files):
84-
ruleset = open(rfile).read()
85-
fn = os.path.basename(rfile)
86-
ruleset = ruleset.replace("<ruleset", '<ruleset f="%s"' % fn, 1)
87-
library.write(clean_up(ruleset))
88-
library.write("</rulesetlibrary>\n")
89-
library.close()
90-
91-
try:
92-
if 0 == call(["xmllint", "--noout", rulesets_fn]):
93-
print(rulesets_fn, "passed XML validity test.")
94-
else:
95-
print("ERROR:", rulesets_fn, "failed XML validity test!")
96-
sys.exit(1)
97-
except OSError as e:
98-
if "No such file or directory" not in traceback.format_exc():
99-
raise
100-
print("WARNING: xmllint not present; validation of", rulesets_fn, " skipped.")
101-
102-
# We make default.rulesets at build time, but it shouldn't have a variable
103-
# timestamp
104-
call(["touch", "-r", "src/install.rdf", rulesets_fn])
33+
# output filename, pointed to the merged ruleset
34+
ofn = os.path.join(args.source_dir, "default.rulesets")
35+
36+
# XML Ruleset Files
37+
files = map(normalize, glob.glob(os.path.join(args.source_dir, "*.xml")))
38+
39+
# Under git bash, sed -i issues errors and sets the file "read-only".
40+
if os.path.isfile(ofn):
41+
os.system("chmod u+w " + ofn)
42+
43+
# Library (JSON Object)
44+
library = []
45+
46+
# Parse XML ruleset and construct JSON library
47+
print(" * Parsing XML ruleset and constructing JSON library...")
48+
for filename in sorted(files):
49+
tree = xml.etree.ElementTree.parse(filename)
50+
root = tree.getroot()
51+
52+
ruleset = {}
53+
54+
for attr in root.attrib:
55+
ruleset[attr] = root.attrib[attr]
56+
57+
for child in root:
58+
if child.tag in ["target", "rule", "securecookie", "exclusion"]:
59+
if child.tag not in ruleset:
60+
ruleset[child.tag] = []
61+
else:
62+
continue
63+
64+
if child.tag == "target":
65+
ruleset["target"].append(child.attrib["host"])
66+
67+
elif child.tag == "rule":
68+
ru = {}
69+
ru["from"] = child.attrib["from"]
70+
ru["to"] = child.attrib["to"]
71+
72+
ruleset["rule"].append(ru)
73+
74+
elif child.tag == "securecookie":
75+
sc = {}
76+
sc["host"] = child.attrib["host"]
77+
sc["name"] = child.attrib["name"]
78+
79+
ruleset["securecookie"].append(sc)
80+
81+
elif child.tag == "exclusion":
82+
ruleset["exclusion"].append(child.attrib["pattern"])
83+
84+
library.append(ruleset);
85+
86+
# Write to default.rulesets
87+
print(" * Writing JSON library to %s" % ofn)
88+
outfile = open(ofn, "w")
89+
outfile.write(json.dumps(library))
90+
outfile.close()
91+
92+
# We make default.rulesets at build time,
93+
# but it shouldn't have a variable timestamp
94+
subprocess.call(["touch", "-r", "src/install.rdf", ofn])
10595

96+
# Everything is okay.
97+
print(" * Everything is Okay.")

0 commit comments

Comments
 (0)