Skip to content

Commit 14ac196

Browse files
author
MB
committed
Merge remote-tracking branch 'origin/master'
2 parents e575b6c + 3ee1011 commit 14ac196

40 files changed

+523
-384
lines changed

src/Changelog

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,27 @@
1+
4.0development.15 (2014-02-??)
2+
* Replace the single XML ruleset library with an sqlite database of rulesets
3+
that are loaded on demand
4+
- reduces startup time by a factor of 10-20:
5+
https://trac.torproject.org/projects/tor/ticket/10174
6+
- reduces RAM usage https://trac.torproject.org/projects/tor/ticket/4804
7+
- Is scalable: https://trac.torproject.org/projects/tor/ticket/6118
8+
Further analysis in this thread:
9+
https://lists.eff.org/pipermail/https-everywhere/2014-January/001919.html
10+
* Implement a cleanup case to recover from some Observatory UI code bugs that
11+
would leave the Observatory off incorrectly.
12+
https://trac.torproject.org/projects/tor/ticket/10728
13+
* Fix observatory - private browsing mode interaction
14+
https://trac.torproject.org/projects/tor/ticket/10208
15+
* Ship 848 new rulesets
16+
* Update cert whitelist
17+
18+
3.5android.0 (2014-01-31)
19+
* First Firefox for Android release! :D
20+
* Major UI changes for mobile compatibility
21+
* Android channel update URL set to
22+
https://www.eff.org/files/https-everywhere-android-update-2048.rdf
23+
* Updated rulesets: Freenode, Imgur
24+
125
3.4.5 (2014-01-03)
226
* Updated license
327
* Updated README.md

src/chrome/content/code/HTTPSRules.js

Lines changed: 63 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,16 @@ function CookieRule(host, cookiename) {
1919
//this.name_c = new RegExp(cookiename);
2020
}
2121

22-
ruleset_counter = 0;
23-
function RuleSet(name, xmlName, match_rule, default_off, platform) {
22+
function RuleSet(id, name, xmlName, match_rule, default_off, platform) {
2423
if(xmlName == "WordPress.xml" || xmlName == "Github.xml") {
2524
this.log(NOTE, "RuleSet( name="+name+", xmlName="+xmlName+", match_rule="+match_rule+", default_off="+default_off+", platform="+platform+" )");
2625
}
2726

28-
this.id="httpseR" + ruleset_counter;
29-
ruleset_counter += 1;
27+
this.id=id;
3028
this.on_by_default = true;
3129
this.compiled = false;
3230
this.name = name;
3331
this.xmlName = xmlName;
34-
//this.ruleset_match = match_rule;
3532
this.notes = "";
3633
if (match_rule) this.ruleset_match_c = new RegExp(match_rule);
3734
else this.ruleset_match_c = null;
@@ -51,7 +48,7 @@ function RuleSet(name, xmlName, match_rule, default_off, platform) {
5148
this.rules = [];
5249
this.exclusions = [];
5350
this.cookierules = [];
54-
51+
5552
this.rule_toggle_prefs = HTTPSEverywhere.instance.rule_toggle_prefs;
5653

5754
try {
@@ -244,130 +241,37 @@ const RuleWriter = {
244241
return rv;
245242
},
246243

247-
getRuleDir: function() {
248-
var loc = "chrome://https-everywhere/content/rules/";
249-
250-
var file =
251-
CC["@mozilla.org/file/local;1"]
252-
.createInstance(CI.nsILocalFile);
253-
file.initWithPath(this.chromeToPath(loc));
254-
255-
if (!file.isDirectory()) {
256-
// XXX: Arg, death!
257-
this.log(WARN,"Catastrophic failure: extension directory is not a directory");
258-
}
259-
return file;
260-
},
261-
262-
read: function(file, rule_store) {
263-
if (!file.exists())
264-
return null;
265-
if ((rule_store.targets == null) && (rule_store.targets != {}))
266-
this.log(WARN, "TARGETS IS NULL");
267-
var data = "";
268-
var fstream = CC["@mozilla.org/network/file-input-stream;1"]
269-
.createInstance(CI.nsIFileInputStream);
270-
var sstream = CC["@mozilla.org/scriptableinputstream;1"]
271-
.createInstance(CI.nsIScriptableInputStream);
272-
fstream.init(file, -1, 0, 0);
273-
sstream.init(fstream);
274-
275-
var str = sstream.read(4096);
276-
while (str.length > 0) {
277-
data += str;
278-
str = sstream.read(4096);
279-
}
280-
281-
sstream.close();
282-
fstream.close();
283-
return this.readFromString(data, rule_store, file);
284-
},
285-
286-
readFromString: function(data, rule_store, file) {
287-
if (typeof file === 'undefined') file = {path: 'fromString'};
288-
289-
// XXX: With DOMParser, we probably do not need to throw away the XML
290-
// declaration anymore nowadays.
291-
data = data.replace(/<\?xml[^>]*\?>/, "");
244+
readFromString: function(data, rule_store, ruleset_id) {
292245
try {
293-
var xmlrulesets = dom_parser.parseFromString(data, "text/xml");
246+
var xmlruleset = dom_parser.parseFromString(data, "text/xml");
294247
} catch(e) { // file has been corrupted; XXX: handle error differently
295-
this.log(WARN,"Error in XML file: " + file.path + "\n" + e);
248+
this.log(WARN,"Error in XML data: " + e + "\n" + data);
296249
return null;
297250
}
298-
this.parseXmlRulesets(xmlrulesets, rule_store, file);
251+
this.parseOneRuleset(xmlruleset.documentElement, rule_store, ruleset_id);
299252
},
300253

301-
parseXmlRulesets: function(xmldom, rule_store, file) {
302-
// XML input files can either be a <ruleset> in a file, or a
303-
// <rulesetlibrary> with many <rulesets> inside it (the latter form exists
304-
// because ZIP does a much better job of compressing it).
305-
if (xmldom.documentElement.nodeName == "ruleset") {
306-
// This is a single ruleset.
307-
this.parseOneRuleset(xmldom.documentElement, rule_store, file);
308-
} else {
309-
// The root of the XML tree is assumed to look like a <rulesetlibrary>
310-
if (!xmldom.documentElement.getAttribute("gitcommitid")) {
311-
// The gitcommitid is a tricky hack to let us display the true full
312-
// source code of a ruleset, even though we strip out comments at build
313-
// time, by having the UI fetch the ruleset from the public https git repo.
314-
this.log(DBUG, "gitcommitid tag not found in <xmlruleset>");
315-
rule_store.GITCommitID = "HEAD";
316-
} else {
317-
rule_store.GITCommitID = xmldom.documentElement.getAttribute("gitcommitid");
318-
}
319-
320-
var rulesets = xmldom.documentElement.getElementsByTagName("ruleset");
321-
if (rulesets.length == 0 && (file.path.search("00README") == -1))
322-
this.log(WARN, "Probable <rulesetlibrary> with no <rulesets> in "
323-
+ file.path + "\n" + xmldom);
324-
for (var j = 0; j < rulesets.length; j++)
325-
this.parseOneRuleset(rulesets[j], rule_store, file);
326-
}
327-
},
328-
329-
parseOneRuleset: function(xmlruleset, rule_store, file) {
254+
parseOneRuleset: function(xmlruleset, rule_store, ruleset_id) {
330255
// Extract an xmlruleset into the rulestore
331256
if (!xmlruleset.getAttribute("name")) {
332257
this.log(WARN, "This blob: '" + xmlruleset + "' is not a ruleset\n");
333258
return null;
334259
}
335260

336-
this.log(DBUG, "Parsing " + xmlruleset.getAttribute("name") + " from " + file.path);
261+
this.log(DBUG, "Parsing " + xmlruleset.getAttribute("name"));
337262

338263
var match_rl = xmlruleset.getAttribute("match_rule");
339264
var dflt_off = xmlruleset.getAttribute("default_off");
340265
var platform = xmlruleset.getAttribute("platform");
341-
var rs = new RuleSet(xmlruleset.getAttribute("name"), xmlruleset.getAttribute("f"), match_rl, dflt_off, platform);
342-
343-
var targets = xmlruleset.getElementsByTagName("target");
344-
if (targets.length == 0) {
345-
var msg = "Error: As of v0.3.0, XML rulesets require a target domain entry,";
346-
msg = msg + "\nbut " + file.path + " is missing one.";
347-
this.log(WARN, msg);
348-
return null;
349-
}
266+
var rs = new RuleSet(ruleset_id, xmlruleset.getAttribute("name"), xmlruleset.getAttribute("f"), match_rl, dflt_off, platform);
350267

351268
// see if this ruleset has the same name as an existing ruleset;
352269
// if so, this ruleset is ignored; DON'T add or return it.
353270
if (rs.name in rule_store.rulesetsByName) {
354-
this.log(WARN, "Error: found duplicate rule name " + rs.name + " in file " + file.path);
271+
this.log(WARN, "Error: found duplicate rule name " + rs.name);
355272
return null;
356273
}
357274

358-
// add this ruleset into HTTPSRules.targets with all of the applicable
359-
// target host indexes
360-
for (var i = 0; i < targets.length; i++) {
361-
var host = targets[i].getAttribute("host");
362-
if (!host) {
363-
this.log(WARN, "<target> missing host in " + file.path);
364-
return null;
365-
}
366-
if (! rule_store.targets[host])
367-
rule_store.targets[host] = [];
368-
rule_store.targets[host].push(rs);
369-
}
370-
371275
var exclusions = xmlruleset.getElementsByTagName("exclusion");
372276
for (var i = 0; i < exclusions.length; i++) {
373277
var exclusion = new Exclusion(exclusions[i].getAttribute("pattern"));
@@ -414,8 +318,8 @@ const HTTPSRules = {
414318
init: function() {
415319
try {
416320
this.rulesets = [];
417-
this.targets = {}; // dict mapping target host patterns -> lists of
418-
// applicable rules
321+
this.targets = {}; // dict mapping target host pattern -> list of
322+
// applicable ruleset ids
419323
this.rulesetsByID = {};
420324
this.rulesetsByName = {};
421325
var t1 = new Date().getTime();
@@ -428,30 +332,40 @@ const HTTPSRules = {
428332
this.queryForRuleset = rulesetDBConn.createStatement(
429333
"select contents from rulesets where id = :id");
430334

431-
// Preload the list of which targets are available in the DB.
335+
// Preload the mapping of hostname target -> ruleset ID from DB.
432336
// This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD),
433337
// but is faster than loading all of the rulesets. If this becomes a
434338
// bottleneck, change it to load in a background webworker, or load
435339
// a smaller bloom filter instead.
436-
this.targetsAvailable = {};
437340
var targetsQuery = rulesetDBConn.createStatement("select host, ruleset_id from targets");
438-
this.log(DBUG, "Adding targets...");
341+
this.log(DBUG, "Loading targets...");
439342
while (targetsQuery.executeStep()) {
440343
var host = targetsQuery.row.host;
441-
this.targetsAvailable[host] = targetsQuery.row.ruleset_id;
344+
var id = targetsQuery.row.ruleset_id;
345+
if (!this.targets[host]) {
346+
this.targets[host] = [id];
347+
} else {
348+
this.targets[host].push(id);
349+
}
442350
}
443-
this.log(DBUG, "Done adding targets.");
351+
this.log(DBUG, "Loading adding targets.");
444352
} catch(e) {
445353
this.log(DBUG,"Rules Failed: "+e);
446354
}
447355
var t2 = new Date().getTime();
448-
this.log(NOTE,"Loading rulesets took " + (t2 - t1) / 1000.0 + " seconds");
356+
this.log(NOTE,"Loading targets took " + (t2 - t1) / 1000.0 + " seconds");
357+
358+
var gitCommitQuery = rulesetDBConn.createStatement("select git_commit from git_commit");
359+
if (gitCommitQuery.executeStep()) {
360+
this.GITCommitID = gitCommitQuery.row.git_commit;
361+
}
362+
449363
try {
450364
if (HTTPSEverywhere.instance.prefs.getBoolPref("performance_tests")) {
451365
this.testRulesetRetrievalPerformance();
452366
}
453367
} catch(e) {
454-
this.log(WARN, "Explosion during testing " + e);
368+
this.log(WARN, "Exception during testing " + e);
455369
}
456370
return;
457371
},
@@ -480,21 +394,6 @@ const HTTPSRules = {
480394
}
481395
},
482396

483-
scanRulefiles: function(rulefiles) {
484-
var i = 0;
485-
var r = null;
486-
for(i = 0; i < rulefiles.length; ++i) {
487-
try {
488-
this.log(DBUG,"Loading ruleset file: "+rulefiles[i].path);
489-
RuleWriter.read(rulefiles[i], this);
490-
} catch(e) {
491-
this.log(WARN, "Error in ruleset file: " + e);
492-
if (e.lineNumber)
493-
this.log(WARN, "(line number: " + e.lineNumber + ")");
494-
}
495-
}
496-
},
497-
498397
resetRulesetsToDefaults: function() {
499398
// Callable from within the prefs UI and also for cleaning up buggy
500399
// configurations...
@@ -503,8 +402,6 @@ const HTTPSRules = {
503402
}
504403
},
505404

506-
httpMatch: /^http/i,
507-
508405
rewrittenURI: function(alist, input_uri) {
509406
// This function oversees the task of working out if a uri should be
510407
// rewritten, what it should be rewritten to, and recordkeeping of which
@@ -525,7 +422,7 @@ const HTTPSRules = {
525422
try {
526423
var rs = this.potentiallyApplicableRulesets(uri.host);
527424
} catch(e) {
528-
this.log(WARN, 'Could not check applicable rules for '+uri.spec + '\n'+e);
425+
this.log(NOTE, 'Could not check applicable rules for '+uri.spec + '\n'+e);
529426
return null;
530427
}
531428

@@ -609,24 +506,46 @@ const HTTPSRules = {
609506
intoList.push(fromList[i]);
610507
},
611508

612-
// Try to find a ruleset in the SQLite database for a given target (e.g.
613-
// '*.openssl.org')
509+
// Load a ruleset by numeric id, e.g. 234
614510
// NOTE: This call runs synchronously, which can lock up the browser UI. Is
615511
// there any way to fix that, given that we need to run blocking in the request
616512
// flow? Perhaps we can preload all targets from the DB into memory at startup
617513
// so we only hit the DB when we know there is something to be had.
618-
queryTarget: function(target) {
619-
this.log(DBUG, "Querying DB for " + target);
620-
var output = [];
621-
622-
this.queryForRuleset.params.id = this.targetsAvailable[target];
514+
loadRulesetById: function(ruleset_id) {
515+
this.log(DBUG, "Querying DB for ruleset id " + ruleset_id);
516+
this.queryForRuleset.params.id = ruleset_id;
623517

624518
try {
625-
while (this.queryForRuleset.executeStep())
626-
output.push(this.queryForRuleset.row.contents);
519+
if (this.queryForRuleset.executeStep()) {
520+
this.log(INFO, "Found ruleset in DB for id " + ruleset_id);
521+
RuleWriter.readFromString(this.queryForRuleset.row.contents, this, ruleset_id);
522+
} else {
523+
this.log(WARN,"Couldn't find ruleset for id " + ruleset_id);
524+
}
627525
} finally {
628526
this.queryForRuleset.reset();
629527
}
528+
},
529+
530+
// Get all rulesets matching a given target, lazy-loading from DB as necessary.
531+
rulesetsByTarget: function(target) {
532+
var rulesetIds = this.targets[target];
533+
534+
var output = [];
535+
if (rulesetIds) {
536+
this.log(INFO, "For target " + target + ", found ids " + rulesetIds.toString());
537+
for (var i = 0; i < rulesetIds.length; i++) {
538+
var id = rulesetIds[i];
539+
if (!this.rulesetsByID[id]) {
540+
this.loadRulesetById(id);
541+
}
542+
if (this.rulesetsByID[id]) {
543+
output.push(this.rulesetsByID[id]);
544+
}
545+
}
546+
} else {
547+
this.log(INFO, "For target " + target + ", found no ids in DB");
548+
}
630549
return output;
631550
},
632551

@@ -636,24 +555,7 @@ const HTTPSRules = {
636555
var results = [];
637556

638557
var attempt = function(target) {
639-
// First try the in-memory rulesets
640-
if (this.targets[target] &&
641-
this.targets[target].length > 0) {
642-
this.setInsert(results, this.targets[target]);
643-
} else if (this.targetsAvailable[target]) {
644-
// If not found there, check the DB and load the ruleset as appropriate
645-
var rulesets = this.queryTarget(target);
646-
if (rulesets.length > 0) {
647-
for (var i = 0; i < rulesets.length; i++) {
648-
var ruleset = rulesets[i];
649-
this.log(INFO, "Found ruleset in DB for " + host + ": " + ruleset);
650-
RuleWriter.readFromString(ruleset, this);
651-
this.setInsert(results, this.targets[target]);
652-
}
653-
} else {
654-
this.nonTargets[target] = 1;
655-
}
656-
}
558+
this.setInsert(results, this.rulesetsByTarget(target));
657559
}.bind(this);
658560

659561
attempt(host);

0 commit comments

Comments
 (0)