Skip to content

Commit 414c3e9

Browse files
committed
Merge remote-tracking branch 'jsha/sqlite'
Conflicts: src/chrome/content/code/HTTPSRules.js
2 parents 5fba110 + ba496d7 commit 414c3e9

File tree

4 files changed

+160
-43
lines changed

4 files changed

+160
-43
lines changed

makexpi.sh

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ APP_NAME=https-everywhere
1515
# ./makexpi.sh 0.2.3.development.2
1616

1717
cd "`dirname $0`"
18+
RULESETS_SQLITE="$PWD/src/defaults/rulesets.sqlite"
1819

1920
[ -d pkg ] || mkdir pkg
2021

@@ -97,6 +98,11 @@ if [ "$1" != "--fast" ] ; then
9798
fi
9899
# =============== END VALIDATION ================
99100

101+
if [ "$1" != "--fast" -o ! -f "$RULESETS_SQLITE" ] ; then
102+
echo "Generating sqlite DB"
103+
./utils/make-sqlite.py src/chrome/content/rules
104+
fi
105+
100106
# The name/version of the XPI we're building comes from src/install.rdf
101107
XPI_NAME="pkg/$APP_NAME-`grep em:version src/install.rdf | sed -e 's/[<>]/ /g' | cut -f3`"
102108
if [ "$1" ] && [ "$1" != "--fast" ] ; then
@@ -114,14 +120,6 @@ if [ -e "$GIT_OBJECT_FILE" ]; then
114120
export GIT_COMMIT_ID=$(cat "$GIT_OBJECT_FILE")
115121
fi
116122

117-
# Unless we're in a hurry and there's already a ruleset library, build it from
118-
# the ruleset .xml files
119-
120-
if [ "$1" = "--fast" ] ; then
121-
FAST="--fast"
122-
fi
123-
python ./utils/merge-rulesets.py $FAST
124-
125123
cd src
126124

127125
# Build the XPI!
@@ -135,7 +133,7 @@ if [ "$ret" != 0 ]; then
135133
rm -f "../$XPI_NAME"
136134
exit "$?"
137135
else
138-
echo >&2 "Total included rules: `find chrome/content/rules -name "*.xml" | wc -l`"
136+
echo >&2 "Total included rules: `sqlite3 $RULESETS_SQLITE 'select count(*) from rulesets'`"
139137
echo >&2 "Rules disabled by default: `find chrome/content/rules -name "*.xml" | xargs grep -F default_off | wc -l`"
140138
echo >&2 "Created $XPI_NAME"
141139
if [ -n "$BRANCH" ]; then

src/chrome/content/code/HTTPSRules.js

Lines changed: 78 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,12 @@ const RuleWriter = {
280280

281281
sstream.close();
282282
fstream.close();
283+
return this.readFromString(data, rule_store, file);
284+
},
285+
286+
readFromString: function(data, rule_store, file) {
287+
if (typeof file === 'undefined') file = {path: 'fromString'};
288+
283289
// XXX: With DOMParser, we probably do not need to throw away the XML
284290
// declaration anymore nowadays.
285291
data = data.replace(/<\?xml[^>]*\?>/, "");
@@ -414,30 +420,29 @@ const HTTPSRules = {
414420
this.rulesetsByName = {};
415421
var t1 = new Date().getTime();
416422
this.checkMixedContentHandling();
417-
var rulefiles = RuleWriter.enumerate(RuleWriter.getCustomRuleDir());
418-
this.scanRulefiles(rulefiles);
419-
rulefiles = RuleWriter.enumerate(RuleWriter.getRuleDir());
420-
this.scanRulefiles(rulefiles);
421-
var t,i;
422-
for (t in this.targets) {
423-
for (i = 0 ; i < this.targets[t].length ; i++) {
424-
this.log(INFO, t + " -> " + this.targets[t][i].name);
425-
}
426-
}
427-
428-
// for any rulesets with <target host="*">
429-
// every URI needs to be checked against these rulesets
430-
// (though currently we don't ship any)
431-
this.global_rulesets = this.targets["*"] ? this.targets["*"] : [];
432423

433-
this.rulesets.sort(
434-
function(r1,r2) {
435-
if (r1.name.toLowerCase() < r2.name.toLowerCase()) return -1;
436-
else return 1;
437-
}
438-
);
424+
// Initialize database connection.
425+
var dbFile = FileUtils.getFile("ProfD",
426+
["extensions", "https-everywhere@eff.org", "defaults", "rulesets.sqlite"]);
427+
var rulesetDBConn = Services.storage.openDatabase(dbFile);
428+
this.queryForRuleset = rulesetDBConn.createStatement(
429+
"select contents from rulesets where id = :id");
430+
431+
// Preload the list of which targets are available in the DB.
432+
// This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD),
433+
// but is faster than loading all of the rulesets. If this becomes a
434+
// bottleneck, change it to load in a background webworker, or load
435+
// a smaller bloom filter instead.
436+
this.targetsAvailable = {};
437+
var targetsQuery = rulesetDBConn.createStatement("select host, ruleset_id from targets");
438+
this.log(DBUG, "Adding targets...");
439+
while (targetsQuery.executeStep()) {
440+
var host = targetsQuery.row.host;
441+
this.targetsAvailable[host] = targetsQuery.row.ruleset_id;
442+
}
443+
this.log(DBUG, "Done adding targets.");
439444
} catch(e) {
440-
this.log(WARN,"Rules Failed: "+e);
445+
this.log(DBUG,"Rules Failed: "+e);
441446
}
442447
var t2 = new Date().getTime();
443448
this.log(NOTE,"Loading rulesets took " + (t2 - t1) / 1000.0 + " seconds");
@@ -498,6 +503,8 @@ const HTTPSRules = {
498503
}
499504
},
500505

506+
httpMatch: /^http/i,
507+
501508
rewrittenURI: function(alist, input_uri) {
502509
// This function oversees the task of working out if a uri should be
503510
// rewritten, what it should be rewritten to, and recordkeeping of which
@@ -518,7 +525,7 @@ const HTTPSRules = {
518525
try {
519526
var rs = this.potentiallyApplicableRulesets(uri.host);
520527
} catch(e) {
521-
this.log(WARN, 'Could not check applicable rules for '+uri.spec);
528+
this.log(WARN, 'Could not check applicable rules for '+uri.spec + '\n'+e);
522529
return null;
523530
}
524531

@@ -602,17 +609,54 @@ const HTTPSRules = {
602609
intoList.push(fromList[i]);
603610
},
604611

612+
// Try to find a ruleset in the SQLite database for a given target (e.g.
613+
// '*.openssl.org')
614+
// NOTE: This call runs synchronously, which can lock up the browser UI. Is
615+
// there any way to fix that, given that we need to run blocking in the request
616+
// flow? Perhaps we can preload all targets from the DB into memory at startup
617+
// so we only hit the DB when we know there is something to be had.
618+
queryTarget: function(target) {
619+
this.log(DBUG, "Querying DB for " + target);
620+
var output = [];
621+
622+
this.queryForRuleset.params.id = this.targetsAvailable[target];
623+
624+
try {
625+
while (this.queryForRuleset.executeStep())
626+
output.push(this.queryForRuleset.row.contents);
627+
} finally {
628+
this.queryForRuleset.reset();
629+
}
630+
return output;
631+
},
632+
605633
potentiallyApplicableRulesets: function(host) {
606634
// Return a list of rulesets that declare targets matching this host
607635
var i, tmp, t;
608-
var results = this.global_rulesets.slice(0); // copy global_rulesets
609-
try {
610-
if (this.targets[host])
611-
results = results.concat(this.targets[host]);
612-
} catch(e) {
613-
this.log(DBUG,"Couldn't check for ApplicableRulesets: " + e);
614-
return [];
615-
}
636+
var results = [];
637+
638+
var attempt = function(target) {
639+
// First try the in-memory rulesets
640+
if (this.targets[target] &&
641+
this.targets[target].length > 0) {
642+
this.setInsert(results, this.targets[target]);
643+
} else if (this.targetsAvailable[target]) {
644+
// If not found there, check the DB and load the ruleset as appropriate
645+
var rulesets = this.queryTarget(target);
646+
if (rulesets.length > 0) {
647+
for (var i = 0; i < rulesets.length; i++) {
648+
var ruleset = rulesets[i];
649+
this.log(INFO, "Found ruleset in DB for " + host + ": " + ruleset);
650+
RuleWriter.readFromString(ruleset, this);
651+
this.setInsert(results, this.targets[target]);
652+
}
653+
} else {
654+
this.nonTargets[target] = 1;
655+
}
656+
}
657+
}.bind(this);
658+
659+
attempt(host);
616660

617661
// replace each portion of the domain with a * in turn
618662
var segmented = host.split(".");
@@ -621,13 +665,13 @@ const HTTPSRules = {
621665
segmented[i] = "*";
622666
t = segmented.join(".");
623667
segmented[i] = tmp;
624-
this.setInsert(results, this.targets[t]);
668+
attempt(t);
625669
}
626670
// now eat away from the left, with *, so that for x.y.z.google.com we
627671
// check *.z.google.com and *.google.com (we did *.y.z.google.com above)
628-
for (i = 1; i <= segmented.length - 2; ++i) {
672+
for (i = 2; i <= segmented.length - 2; ++i) {
629673
t = "*." + segmented.slice(i,segmented.length).join(".");
630-
this.setInsert(results, this.targets[t]);
674+
attempt(t);
631675
}
632676
this.log(DBUG,"Potentially applicable rules for " + host + ":");
633677
for (i = 0; i < results.length; ++i)

src/components/https-everywhere.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ const Cc = Components.classes;
3131
const Cu = Components.utils;
3232
const Cr = Components.results;
3333

34+
Cu.import("resource://gre/modules/Services.jsm");
35+
Cu.import("resource://gre/modules/FileUtils.jsm");
36+
3437
const CP_SHOULDPROCESS = 4;
3538

3639
const SERVICE_CTRID = "@eff.org/https-everywhere;1";

utils/make-sqlite.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/python2.7
2+
#
3+
# Builds an sqlite DB containing all the rulesets, indexed by target.
4+
5+
import sqlite3
6+
import argparse
7+
import sys, re, os
8+
9+
from lxml import etree
10+
11+
parser = argparse.ArgumentParser(
12+
formatter_class=argparse.RawDescriptionHelpFormatter,
13+
description="Ruleset validation script.")
14+
parser.add_argument('ruleset', metavar='XML directory', type=str, nargs="*",
15+
default="src/chrome/content/rules",
16+
help='Directory of XML files to validate.')
17+
18+
args = parser.parse_args()
19+
20+
def nomes_all(where=sys.argv[1:]):
21+
"""Returns generator to extract all files from a list of files/dirs"""
22+
if not where: where=['.']
23+
for i in where:
24+
if os.path.isfile(i):
25+
yield i
26+
elif os.path.isdir(i):
27+
for r, d, f in os.walk(i):
28+
for fi in f:
29+
yield os.path.join(r, fi)
30+
31+
32+
conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), '../src/defaults/rulesets.sqlite'))
33+
c = conn.cursor()
34+
c.execute('''DROP TABLE IF EXISTS rulesets''')
35+
c.execute('''CREATE TABLE rulesets
36+
(id INTEGER PRIMARY KEY,
37+
name TEXT,
38+
contents TEXT)''')
39+
c.execute('''DROP TABLE IF EXISTS targets''')
40+
c.execute('''CREATE TABLE targets
41+
(host TEXT,
42+
ruleset_id INTEGER)''')
43+
44+
parser = etree.XMLParser(remove_blank_text=True)
45+
46+
for fi in nomes_all():
47+
try:
48+
tree = etree.parse(fi, parser)
49+
except Exception as oops:
50+
if fi[-4:] != ".xml":
51+
continue
52+
print("%s failed XML validity: %s\n" % (fi, oops))
53+
if not tree.xpath("/ruleset"):
54+
continue
55+
56+
# Remove comments to save space.
57+
etree.strip_tags(tree,etree.Comment)
58+
59+
targets = tree.xpath("/ruleset/target/@host")
60+
# TODO: Strip target tags too. Right now the JS code requires there be a
61+
# target tag.
62+
#etree.strip_tags(tree,'target')
63+
64+
# TODO: filter out comments and targets to save storage bytes
65+
ruleset_name = tree.xpath("/ruleset/@name")[0]
66+
c.execute('''INSERT INTO rulesets (name, contents) VALUES(?, ?)''', (ruleset_name, etree.tostring(tree)));
67+
ruleset_id = c.lastrowid
68+
for target in targets:
69+
c.execute('''INSERT INTO targets (host, ruleset_id) VALUES(?, ?)''', (target, ruleset_id));
70+
71+
conn.commit()
72+
conn.close()

0 commit comments

Comments
 (0)