Skip to content

Commit 31a0018

Browse files
committed
Switch to using JSON to store rulesets.
This loads much faster, and avoids reads from disk after startup. Previously, loading all targets from SQLite took about 268ms. Now it takes about 60ms, including reading the file from disk and parsing JSON.
1 parent 31e70c9 commit 31a0018

File tree

4 files changed

+29
-43
lines changed

4 files changed

+29
-43
lines changed

src/chrome/content/code/HTTPSRules.js

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ const RuleWriter = {
324324
// Add this ruleset id into HTTPSRules.targets if it's not already there.
325325
// This should only happen for custom user rules. Built-in rules get
326326
// their ids preloaded into the targets map, and have their <target>
327-
// tags stripped when the sqlite database is built.
327+
// tags stripped when the JSON database is built.
328328
var targets = xmlruleset.getElementsByTagName("target");
329329
for (var i = 0; i < targets.length; i++) {
330330
var host = targets[i].getAttribute("host");
@@ -392,29 +392,7 @@ const HTTPSRules = {
392392
var rulefiles = RuleWriter.enumerate(RuleWriter.getCustomRuleDir());
393393
this.scanRulefiles(rulefiles);
394394

395-
// Initialize database connection.
396-
var dbFile = new FileUtils.File(RuleWriter.chromeToPath("chrome://https-everywhere/content/rulesets.sqlite"));
397-
var rulesetDBConn = Services.storage.openDatabase(dbFile);
398-
this.queryForRuleset = rulesetDBConn.createStatement(
399-
"select contents from rulesets where id = :id");
400-
401-
// Preload the mapping of hostname target -> ruleset ID from DB.
402-
// This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD),
403-
// but is faster than loading all of the rulesets. If this becomes a
404-
// bottleneck, change it to load in a background webworker, or load
405-
// a smaller bloom filter instead.
406-
var targetsQuery = rulesetDBConn.createStatement("select host, ruleset_id from targets");
407-
this.log(DBUG, "Loading targets...");
408-
while (targetsQuery.executeStep()) {
409-
var host = targetsQuery.row.host;
410-
var id = targetsQuery.row.ruleset_id;
411-
if (!this.targets[host]) {
412-
this.targets[host] = [id];
413-
} else {
414-
this.targets[host].push(id);
415-
}
416-
}
417-
this.log(DBUG, "Loading adding targets.");
395+
this.loadTargets();
418396
} catch(e) {
419397
this.log(DBUG,"Rules Failed: "+e);
420398
}
@@ -424,6 +402,13 @@ const HTTPSRules = {
424402
return;
425403
},
426404

405+
loadTargets: function() {
406+
var file = new FileUtils.File(RuleWriter.chromeToPath("chrome://https-everywhere/content/rulesets.json"));
407+
var rules = JSON.parse(RuleWriter.read(file));
408+
this.targets = rules.targets;
409+
this.rules_list = rules.rules_list;
410+
},
411+
427412
checkMixedContentHandling: function() {
428413
// Firefox 23+ blocks mixed content by default, so rulesets that create
429414
// mixed content situations should be disabled there
@@ -576,22 +561,8 @@ const HTTPSRules = {
576561
},
577562

578563
// Load a ruleset by numeric id, e.g. 234
579-
// NOTE: This call runs synchronously, which can lock up the browser UI. Is
580-
// there any way to fix that, given that we need to run blocking in the request
581-
// flow? Perhaps we can preload all targets from the DB into memory at startup
582-
// so we only hit the DB when we know there is something to be had.
583564
loadRulesetById: function(ruleset_id) {
584-
this.queryForRuleset.params.id = ruleset_id;
585-
586-
try {
587-
if (this.queryForRuleset.executeStep()) {
588-
RuleWriter.readFromString(this.queryForRuleset.row.contents, this, ruleset_id);
589-
} else {
590-
this.log(WARN,"Couldn't find ruleset for id " + ruleset_id);
591-
}
592-
} finally {
593-
this.queryForRuleset.reset();
594-
}
565+
RuleWriter.readFromString(this.rules_list[ruleset_id], this, ruleset_id);
595566
},
596567

597568
// Get all rulesets matching a given target, lazy-loading from DB as necessary.

src/components/https-everywhere.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ HTTPSEverywhere.prototype = {
569569
break;
570570
}
571571
} else if (topic == "browser:purge-session-history") {
572-
// The list of rulesets that have been loaded from the sqlite DB
572+
// The list of rulesets that have been loaded from the JSON DB
573573
// constitutes a parallel history store, so we have to clear it.
574574
this.log(DBUG, "History cleared, reloading HTTPSRules to avoid information leak.");
575575
HTTPSRules.init();

utils/make-sqlite.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44

55
import glob
66
import locale
7+
import json
78
import os
89
import re
910
import sqlite3
1011
import subprocess
1112
import sys
1213

13-
from collections import Counter
14+
import collections
1415
from lxml import etree
1516

1617
# Explicitly set locale so sorting order for filenames is consistent.
@@ -19,6 +20,7 @@
1920
# It's also helpful to ensure consistency for the lowercase check below.
2021
locale.setlocale(locale.LC_ALL, 'C')
2122

23+
json_path = os.path.join(os.path.dirname(__file__), '../pkg/rulesets.json')
2224
# Removing the file before we create it avoids some non-determinism.
2325
db_path = os.path.join(os.path.dirname(__file__), '../pkg/rulesets.unvalidated.sqlite')
2426
if os.path.isfile(db_path):
@@ -34,6 +36,11 @@
3436
(host TEXT,
3537
ruleset_id INTEGER)''')
3638

39+
json_output = {
40+
"rules_list": [],
41+
"targets": collections.defaultdict(list)
42+
}
43+
3744
parser = etree.XMLParser(remove_blank_text=True)
3845

3946
# Precompile xpath expressions that get run repeatedly.
@@ -43,7 +50,7 @@
4350
# Sort filenames so output is deterministic.
4451
filenames = sorted(glob.glob('src/chrome/content/rules/*'))
4552

46-
counted_lowercase_names = Counter([name.lower() for name in filenames])
53+
counted_lowercase_names = collections.Counter([name.lower() for name in filenames])
4754
most_common_entry = counted_lowercase_names.most_common(1)[0]
4855
if most_common_entry[1] > 1:
4956
dupe_filename = re.compile(re.escape(most_common_entry[0]), re.IGNORECASE)
@@ -80,6 +87,7 @@
8087
# targets are looked up in the target table, which has a foreign key
8188
# pointing into the ruleset table.
8289
etree.strip_tags(tree, 'target')
90+
etree.strip_tags(tree, 'test')
8391

8492
# Store the filename in the `f' attribute so "view source XML" for rules in
8593
# FF version can find it.
@@ -89,7 +97,14 @@
8997
ruleset_id = c.lastrowid
9098
for target in targets:
9199
c.execute('''INSERT INTO targets (host, ruleset_id) VALUES(?, ?)''', (target, ruleset_id))
100+
# id is the current length of the rules list - i.e. the offset at which
101+
# this rule will be added in the list.
102+
json_output["targets"][target].append(len(json_output["rules_list"]))
103+
json_output["rules_list"].append(etree.tostring(tree))
92104

93105
conn.commit()
94106
conn.execute("VACUUM")
95107
conn.close()
108+
109+
with open(json_path, 'w') as f:
110+
f.write(json.dumps(json_output))

utils/validate.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,4 @@ else
5656
die "Validation of rulesets against $GRAMMAR failed."
5757
fi
5858

59-
cp "$INPUT" ../src/defaults/rulesets.sqlite
59+
cp "../pkg/rulesets.json" ../src/chrome/content/rulesets.json

0 commit comments

Comments
 (0)