Skip to content

Commit 915f914

Browse files
committed
Use an SQLite ruleset DB to speed Firefox startup.
Note that this queries the DB synchronously on many requests, potentially slowing down browsing. Needs additional work.
1 parent 6c8cd5d commit 915f914

File tree

4 files changed

+145
-41
lines changed

4 files changed

+145
-41
lines changed

makexpi.sh

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ APP_NAME=https-everywhere
1515
# ./makexpi.sh 0.2.3.development.2
1616

1717
cd "`dirname $0`"
18+
RULESETS_SQLITE="$PWD/src/defaults/rulesets.sqlite"
1819

1920
[ -d pkg ] || mkdir pkg
2021

@@ -97,6 +98,11 @@ if [ "$1" != "--fast" ] ; then
9798
fi
9899
# =============== END VALIDATION ================
99100

101+
if [ "$1" != "--fast" -o ! -f "$RULESETS_SQLITE" ] ; then
102+
echo "Generating sqlite DB"
103+
./utils/make-sqlite.py src/chrome/content/rules
104+
fi
105+
100106
# The name/version of the XPI we're building comes from src/install.rdf
101107
XPI_NAME="pkg/$APP_NAME-`grep em:version src/install.rdf | sed -e 's/[<>]/ /g' | cut -f3`"
102108
if [ "$1" ] && [ "$1" != "--fast" ] ; then
@@ -114,14 +120,6 @@ if [ -e "$GIT_OBJECT_FILE" ]; then
114120
export GIT_COMMIT_ID=$(cat "$GIT_OBJECT_FILE")
115121
fi
116122

117-
# Unless we're in a hurry and there's already a ruleset library, build it from
118-
# the ruleset .xml files
119-
120-
if [ "$1" = "--fast" ] ; then
121-
FAST="--fast"
122-
fi
123-
python ./utils/merge-rulesets.py $FAST
124-
125123
cd src
126124

127125
# Build the XPI!
@@ -135,7 +133,7 @@ if [ "$ret" != 0 ]; then
135133
rm -f "../$XPI_NAME"
136134
exit "$?"
137135
else
138-
echo >&2 "Total included rules: `find chrome/content/rules -name "*.xml" | wc -l`"
136+
echo >&2 "Total included rules: `sqlite3 $RULESETS_SQLITE 'select count(*) from rulesets'`"
139137
echo >&2 "Rules disabled by default: `find chrome/content/rules -name "*.xml" | xargs grep -F default_off | wc -l`"
140138
echo >&2 "Created $XPI_NAME"
141139
if [ -n "$BRANCH" ]; then

src/chrome/content/code/HTTPSRules.js

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,12 @@ const RuleWriter = {
280280

281281
sstream.close();
282282
fstream.close();
283+
return this.readFromString(data, rule_store, file);
284+
},
285+
286+
readFromString: function(data, rule_store, file) {
287+
if (typeof file === 'undefined') file = {path: 'fromString'};
288+
283289
// XXX: With DOMParser, we probably do not need to throw away the XML
284290
// declaration anymore nowadays.
285291
data = data.replace(/<\?xml[^>]*\?>/, "");
@@ -410,32 +416,19 @@ const HTTPSRules = {
410416
this.rulesets = [];
411417
this.targets = {}; // dict mapping target host patterns -> lists of
412418
// applicable rules
419+
// dict listing target host patterns that don't exist in the DB
420+
// (aka negative cache)
421+
// TODO: Make this an LRU cache; clear it on history clear
422+
this.nonTargets = {};
413423
this.rulesetsByID = {};
414424
this.rulesetsByName = {};
415425
var t1 = new Date().getTime();
416426
this.checkMixedContentHandling();
417-
var rulefiles = RuleWriter.enumerate(RuleWriter.getCustomRuleDir());
418-
this.scanRulefiles(rulefiles);
419-
rulefiles = RuleWriter.enumerate(RuleWriter.getRuleDir());
420-
this.scanRulefiles(rulefiles);
421-
var t,i;
422-
for (t in this.targets) {
423-
for (i = 0 ; i < this.targets[t].length ; i++) {
424-
this.log(INFO, t + " -> " + this.targets[t][i].name);
425-
}
426-
}
427427

428-
// for any rulesets with <target host="*">
429-
// every URI needs to be checked against these rulesets
430-
// (though currently we don't ship any)
431-
this.global_rulesets = this.targets["*"] ? this.targets["*"] : [];
432-
433-
this.rulesets.sort(
434-
function(r1,r2) {
435-
if (r1.name.toLowerCase() < r2.name.toLowerCase()) return -1;
436-
else return 1;
437-
}
438-
);
428+
// Initialize database connection.
429+
var dbFile = FileUtils.getFile("ProfD", ["extensions", "https-everywhere@eff.org", "defaults", "rulesets.sqlite"]);
430+
var mDBConn = Services.storage.openDatabase(dbFile);
431+
this.queryForTarget = mDBConn.createStatement("select id, contents from targets, rulesets where targets.ruleset_id = rulesets.id and host = :target;");
439432
} catch(e) {
440433
this.log(WARN,"Rules Failed: "+e);
441434
}
@@ -491,6 +484,8 @@ const HTTPSRules = {
491484
}
492485
},
493486

487+
httpMatch: /^http/i,
488+
494489
rewrittenURI: function(alist, input_uri) {
495490
// This function oversees the task of working out if a uri should be
496491
// rewritten, what it should be rewritten to, and recordkeeping of which
@@ -511,7 +506,7 @@ const HTTPSRules = {
511506
try {
512507
var rs = this.potentiallyApplicableRulesets(uri.host);
513508
} catch(e) {
514-
this.log(WARN, 'Could not check applicable rules for '+uri.spec);
509+
this.log(WARN, 'Could not check applicable rules for '+uri.spec + '\n'+e);
515510
return null;
516511
}
517512

@@ -595,31 +590,66 @@ const HTTPSRules = {
595590
intoList.push(fromList[i]);
596591
},
597592

593+
// Try to find a ruleset in the SQLite database for a given target (e.g.
594+
// '*.openssl.org')
595+
// NOTE: This call runs synchronously, which can lock up the browser UI. Is
596+
// there any way to fix that, given that we need to run blocking in the request
597+
// flow? Perhaps we can preload all targets from the DB into memory at startup
598+
// so we only hit the DB when we know there is something to be had.
599+
queryTarget: function(target) {
600+
this.log(WARN, "Querying DB for " + target);
601+
var statement = this.queryForTarget.clone();
602+
statement.params.target = target;
603+
604+
try {
605+
if (statement.executeStep())
606+
return statement.row.contents;
607+
} finally {
608+
statement.reset();
609+
}
610+
},
611+
598612
potentiallyApplicableRulesets: function(host) {
599613
// Return a list of rulesets that declare targets matching this host
600614
var i, tmp, t;
601-
var results = this.global_rulesets.slice(0); // copy global_rulesets
602-
try {
603-
if (this.targets[host])
604-
results = results.concat(this.targets[host]);
605-
} catch(e) {
606-
this.log(DBUG,"Couldn't check for ApplicableRulesets: " + e);
607-
return [];
608-
}
615+
var results = [];
616+
617+
var attempt = function(target) {
618+
// First check for this target in our in-memory negative cache
619+
if (this.nonTargets[target]) {
620+
return;
621+
} else if (this.targets[target] && // Then our positive cache
622+
this.targets[target].length > 0) {
623+
this.setInsert(results, this.targets[target]);
624+
} else {
625+
// If not found there, check the DB and load the ruleset as appropriate
626+
// TODO: Add negative caching so we don't repeatedly query the DB for
627+
// things that aren't there.
628+
var ruleset = this.queryTarget(target);
629+
if (ruleset != null) {
630+
this.log(INFO, "Found ruleset in DB for " + host + ": " + ruleset);
631+
RuleWriter.readFromString(ruleset, this);
632+
this.setInsert(results, this.targets[target]);
633+
} else {
634+
this.nonTargets[target] = 1;
635+
}
636+
}
637+
}.bind(this);
638+
609639
// replace each portion of the domain with a * in turn
610640
var segmented = host.split(".");
611641
for (i = 0; i < segmented.length; ++i) {
612642
tmp = segmented[i];
613643
segmented[i] = "*";
614644
t = segmented.join(".");
615645
segmented[i] = tmp;
616-
this.setInsert(results, this.targets[t]);
646+
attempt(t);
617647
}
618648
// now eat away from the left, with *, so that for x.y.z.google.com we
619649
// check *.z.google.com and *.google.com (we did *.y.z.google.com above)
620650
for (i = 1; i <= segmented.length - 2; ++i) {
621651
t = "*." + segmented.slice(i,segmented.length).join(".");
622-
this.setInsert(results, this.targets[t]);
652+
attempt(t);
623653
}
624654
this.log(DBUG,"Potentially applicable rules for " + host + ":");
625655
for (i = 0; i < results.length; ++i)

src/components/https-everywhere.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ const Cc = Components.classes;
3131
const Cu = Components.utils;
3232
const Cr = Components.results;
3333

34+
Cu.import("resource://gre/modules/Services.jsm");
35+
Cu.import("resource://gre/modules/FileUtils.jsm");
36+
3437
const CP_SHOULDPROCESS = 4;
3538

3639
const SERVICE_CTRID = "@eff.org/https-everywhere;1";

utils/make-sqlite.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/python2.7
2+
#
3+
# Builds an sqlite DB containing all the rulesets, indexed by target.
4+
5+
import sqlite3
6+
import argparse
7+
import sys, re, os
8+
9+
from lxml import etree
10+
11+
parser = argparse.ArgumentParser(
12+
formatter_class=argparse.RawDescriptionHelpFormatter,
13+
description="Ruleset validation script.")
14+
parser.add_argument('ruleset', metavar='XML directory', type=str, nargs="*",
15+
default="src/chrome/content/rules",
16+
help='Directory of XML files to validate.')
17+
18+
args = parser.parse_args()
19+
20+
def nomes_all(where=sys.argv[1:]):
21+
"""Returns generator to extract all files from a list of files/dirs"""
22+
if not where: where=['.']
23+
for i in where:
24+
if os.path.isfile(i):
25+
yield i
26+
elif os.path.isdir(i):
27+
for r, d, f in os.walk(i):
28+
for fi in f:
29+
yield os.path.join(r, fi)
30+
31+
32+
conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), '../src/defaults/rulesets.sqlite'))
33+
c = conn.cursor()
34+
c.execute('''DROP TABLE IF EXISTS rulesets''')
35+
c.execute('''CREATE TABLE rulesets
36+
(id INTEGER PRIMARY KEY,
37+
name TEXT,
38+
contents TEXT)''')
39+
c.execute('''DROP TABLE IF EXISTS targets''')
40+
c.execute('''CREATE TABLE targets
41+
(
42+
host TEXT,
43+
ruleset_id INTEGER)''')
44+
45+
parser = etree.XMLParser(remove_blank_text=True)
46+
47+
for fi in nomes_all():
48+
try:
49+
tree = etree.parse(fi, parser)
50+
except Exception as oops:
51+
if fi[-4:] != ".xml":
52+
continue
53+
print("%s failed XML validity: %s\n" % (fi, oops))
54+
if not tree.xpath("/ruleset"):
55+
continue
56+
57+
# Remove comments to save space.
58+
etree.strip_tags(tree,etree.Comment)
59+
60+
targets = tree.xpath("/ruleset/target/@host")
61+
# TODO: Strip target tags too. Right now the JS code requires there be a
62+
# target tag.
63+
#etree.strip_tags(tree,'target')
64+
65+
# TODO: filter out comments and targets to save storage bytes
66+
ruleset_name = tree.xpath("/ruleset/@name")[0]
67+
c.execute('''INSERT INTO rulesets (name, contents) VALUES(?, ?)''', (ruleset_name, etree.tostring(tree)));
68+
ruleset_id = c.lastrowid
69+
for target in targets:
70+
c.execute('''INSERT INTO targets (host, ruleset_id) VALUES(?, ?)''', (target, ruleset_id));
71+
72+
conn.commit()
73+
conn.close()

0 commit comments

Comments
 (0)