Skip to content

Commit 220466d

Browse files
committed
Strip target attributes from XML on DB creation, and index rulesets by id in JS
1 parent d948373 commit 220466d

2 files changed

Lines changed: 73 additions & 175 deletions

File tree

src/chrome/content/code/HTTPSRules.js

Lines changed: 62 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -20,38 +20,34 @@ function CookieRule(host, cookiename) {
2020
}
2121

2222
ruleset_counter = 0;
23-
function RuleSet(name, xmlName, match_rule, default_off, platform) {
23+
function RuleSet(id, name, xmlName, match_rule, default_off, platform) {
2424
if(xmlName == "WordPress.xml" || xmlName == "Github.xml") {
2525
this.log(NOTE, "RuleSet( name="+name+", xmlName="+xmlName+", match_rule="+match_rule+", default_off="+default_off+", platform="+platform+" )");
2626
}
2727

28-
this.id="httpseR" + ruleset_counter;
28+
this.id=id;
2929
ruleset_counter += 1;
3030
this.on_by_default = true;
3131
this.compiled = false;
3232
this.name = name;
3333
this.xmlName = xmlName;
34-
//this.ruleset_match = match_rule;
35-
this.notes = "";
3634
if (match_rule) this.ruleset_match_c = new RegExp(match_rule);
3735
else this.ruleset_match_c = null;
3836
if (default_off) {
3937
// Perhaps problematically, this currently ignores the actual content of
4038
// the default_off XML attribute. Ideally we'd like this attribute to be
4139
// "valueless"
42-
this.notes = default_off;
4340
this.on_by_default = false;
4441
}
4542
if (platform)
4643
if (platform.search(HTTPSRules.localPlatformRegexp) == -1) {
4744
this.on_by_default = false;
48-
this.notes = "Only for " + platform;
4945
}
5046

5147
this.rules = [];
5248
this.exclusions = [];
5349
this.cookierules = [];
54-
50+
5551
this.rule_toggle_prefs = HTTPSEverywhere.instance.rule_toggle_prefs;
5652

5753
try {
@@ -244,130 +240,37 @@ const RuleWriter = {
244240
return rv;
245241
},
246242

247-
getRuleDir: function() {
248-
var loc = "chrome://https-everywhere/content/rules/";
249-
250-
var file =
251-
CC["@mozilla.org/file/local;1"]
252-
.createInstance(CI.nsILocalFile);
253-
file.initWithPath(this.chromeToPath(loc));
254-
255-
if (!file.isDirectory()) {
256-
// XXX: Arg, death!
257-
this.log(WARN,"Catastrophic failure: extension directory is not a directory");
258-
}
259-
return file;
260-
},
261-
262-
read: function(file, rule_store) {
263-
if (!file.exists())
264-
return null;
265-
if ((rule_store.targets == null) && (rule_store.targets != {}))
266-
this.log(WARN, "TARGETS IS NULL");
267-
var data = "";
268-
var fstream = CC["@mozilla.org/network/file-input-stream;1"]
269-
.createInstance(CI.nsIFileInputStream);
270-
var sstream = CC["@mozilla.org/scriptableinputstream;1"]
271-
.createInstance(CI.nsIScriptableInputStream);
272-
fstream.init(file, -1, 0, 0);
273-
sstream.init(fstream);
274-
275-
var str = sstream.read(4096);
276-
while (str.length > 0) {
277-
data += str;
278-
str = sstream.read(4096);
279-
}
280-
281-
sstream.close();
282-
fstream.close();
283-
return this.readFromString(data, rule_store, file);
284-
},
285-
286-
readFromString: function(data, rule_store, file) {
287-
if (typeof file === 'undefined') file = {path: 'fromString'};
288-
289-
// XXX: With DOMParser, we probably do not need to throw away the XML
290-
// declaration anymore nowadays.
291-
data = data.replace(/<\?xml[^>]*\?>/, "");
243+
readFromString: function(data, rule_store, ruleset_id) {
292244
try {
293-
var xmlrulesets = dom_parser.parseFromString(data, "text/xml");
245+
var xmlruleset = dom_parser.parseFromString(data, "text/xml");
294246
} catch(e) { // file has been corrupted; XXX: handle error differently
295-
this.log(WARN,"Error in XML file: " + file.path + "\n" + e);
247+
this.log(WARN,"Error in XML data: " + e + "\n" + data);
296248
return null;
297249
}
298-
this.parseXmlRulesets(xmlrulesets, rule_store, file);
250+
this.parseOneRuleset(xmlruleset.documentElement, rule_store, ruleset_id);
299251
},
300252

301-
parseXmlRulesets: function(xmldom, rule_store, file) {
302-
// XML input files can either be a <ruleset> in a file, or a
303-
// <rulesetlibrary> with many <rulesets> inside it (the latter form exists
304-
// because ZIP does a much better job of compressing it).
305-
if (xmldom.documentElement.nodeName == "ruleset") {
306-
// This is a single ruleset.
307-
this.parseOneRuleset(xmldom.documentElement, rule_store, file);
308-
} else {
309-
// The root of the XML tree is assumed to look like a <rulesetlibrary>
310-
if (!xmldom.documentElement.getAttribute("gitcommitid")) {
311-
// The gitcommitid is a tricky hack to let us display the true full
312-
// source code of a ruleset, even though we strip out comments at build
313-
// time, by having the UI fetch the ruleset from the public https git repo.
314-
this.log(DBUG, "gitcommitid tag not found in <xmlruleset>");
315-
rule_store.GITCommitID = "HEAD";
316-
} else {
317-
rule_store.GITCommitID = xmldom.documentElement.getAttribute("gitcommitid");
318-
}
319-
320-
var rulesets = xmldom.documentElement.getElementsByTagName("ruleset");
321-
if (rulesets.length == 0 && (file.path.search("00README") == -1))
322-
this.log(WARN, "Probable <rulesetlibrary> with no <rulesets> in "
323-
+ file.path + "\n" + xmldom);
324-
for (var j = 0; j < rulesets.length; j++)
325-
this.parseOneRuleset(rulesets[j], rule_store, file);
326-
}
327-
},
328-
329-
parseOneRuleset: function(xmlruleset, rule_store, file) {
253+
parseOneRuleset: function(xmlruleset, rule_store, ruleset_id) {
330254
// Extract an xmlruleset into the rulestore
331255
if (!xmlruleset.getAttribute("name")) {
332256
this.log(WARN, "This blob: '" + xmlruleset + "' is not a ruleset\n");
333257
return null;
334258
}
335259

336-
this.log(DBUG, "Parsing " + xmlruleset.getAttribute("name") + " from " + file.path);
260+
this.log(DBUG, "Parsing " + xmlruleset.getAttribute("name"));
337261

338262
var match_rl = xmlruleset.getAttribute("match_rule");
339263
var dflt_off = xmlruleset.getAttribute("default_off");
340264
var platform = xmlruleset.getAttribute("platform");
341-
var rs = new RuleSet(xmlruleset.getAttribute("name"), xmlruleset.getAttribute("f"), match_rl, dflt_off, platform);
342-
343-
var targets = xmlruleset.getElementsByTagName("target");
344-
if (targets.length == 0) {
345-
var msg = "Error: As of v0.3.0, XML rulesets require a target domain entry,";
346-
msg = msg + "\nbut " + file.path + " is missing one.";
347-
this.log(WARN, msg);
348-
return null;
349-
}
265+
var rs = new RuleSet(ruleset_id, xmlruleset.getAttribute("name"), xmlruleset.getAttribute("f"), match_rl, dflt_off, platform);
350266

351267
// see if this ruleset has the same name as an existing ruleset;
352268
// if so, this ruleset is ignored; DON'T add or return it.
353269
if (rs.name in rule_store.rulesetsByName) {
354-
this.log(WARN, "Error: found duplicate rule name " + rs.name + " in file " + file.path);
270+
this.log(WARN, "Error: found duplicate rule name " + rs.name);
355271
return null;
356272
}
357273

358-
// add this ruleset into HTTPSRules.targets with all of the applicable
359-
// target host indexes
360-
for (var i = 0; i < targets.length; i++) {
361-
var host = targets[i].getAttribute("host");
362-
if (!host) {
363-
this.log(WARN, "<target> missing host in " + file.path);
364-
return null;
365-
}
366-
if (! rule_store.targets[host])
367-
rule_store.targets[host] = [];
368-
rule_store.targets[host].push(rs);
369-
}
370-
371274
var exclusions = xmlruleset.getElementsByTagName("exclusion");
372275
for (var i = 0; i < exclusions.length; i++) {
373276
var exclusion = new Exclusion(exclusions[i].getAttribute("pattern"));
@@ -414,8 +317,8 @@ const HTTPSRules = {
414317
init: function() {
415318
try {
416319
this.rulesets = [];
417-
this.targets = {}; // dict mapping target host patterns -> lists of
418-
// applicable rules
320+
this.targets = {}; // dict mapping target host pattern -> list of
321+
// applicable ruleset ids
419322
this.rulesetsByID = {};
420323
this.rulesetsByName = {};
421324
var t1 = new Date().getTime();
@@ -428,30 +331,40 @@ const HTTPSRules = {
428331
this.queryForRuleset = rulesetDBConn.createStatement(
429332
"select contents from rulesets where id = :id");
430333

431-
// Preload the list of which targets are available in the DB.
334+
// Preload the mapping of hostname target -> ruleset ID from DB.
432335
// This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD),
433336
// but is faster than loading all of the rulesets. If this becomes a
434337
// bottleneck, change it to load in a background webworker, or load
435338
// a smaller bloom filter instead.
436-
this.targetsAvailable = {};
437339
var targetsQuery = rulesetDBConn.createStatement("select host, ruleset_id from targets");
438-
this.log(DBUG, "Adding targets...");
340+
this.log(DBUG, "Loading targets...");
439341
while (targetsQuery.executeStep()) {
440342
var host = targetsQuery.row.host;
441-
this.targetsAvailable[host] = targetsQuery.row.ruleset_id;
343+
var id = targetsQuery.row.ruleset_id;
344+
if (!this.targets[host]) {
345+
this.targets[host] = [id];
346+
} else {
347+
this.targets[host].push(id);
348+
}
442349
}
443-
this.log(DBUG, "Done adding targets.");
350+
this.log(DBUG, "Loading adding targets.");
444351
} catch(e) {
445352
this.log(DBUG,"Rules Failed: "+e);
446353
}
447354
var t2 = new Date().getTime();
448-
this.log(NOTE,"Loading rulesets took " + (t2 - t1) / 1000.0 + " seconds");
355+
this.log(NOTE,"Loading targets took " + (t2 - t1) / 1000.0 + " seconds");
356+
357+
var gitCommitQuery = rulesetDBConn.createStatement("select git_commit from git_commit");
358+
if (gitCommitQuery.executeStep()) {
359+
this.GITCommitID = gitCommitQuery.row.git_commit;
360+
}
361+
449362
try {
450363
if (HTTPSEverywhere.instance.prefs.getBoolPref("performance_tests")) {
451364
this.testRulesetRetrievalPerformance();
452365
}
453366
} catch(e) {
454-
this.log(WARN, "Explosion during testing " + e);
367+
this.log(WARN, "Exception during testing " + e);
455368
}
456369
return;
457370
},
@@ -480,21 +393,6 @@ const HTTPSRules = {
480393
}
481394
},
482395

483-
scanRulefiles: function(rulefiles) {
484-
var i = 0;
485-
var r = null;
486-
for(i = 0; i < rulefiles.length; ++i) {
487-
try {
488-
this.log(DBUG,"Loading ruleset file: "+rulefiles[i].path);
489-
RuleWriter.read(rulefiles[i], this);
490-
} catch(e) {
491-
this.log(WARN, "Error in ruleset file: " + e);
492-
if (e.lineNumber)
493-
this.log(WARN, "(line number: " + e.lineNumber + ")");
494-
}
495-
}
496-
},
497-
498396
resetRulesetsToDefaults: function() {
499397
// Callable from within the prefs UI and also for cleaning up buggy
500398
// configurations...
@@ -503,8 +401,6 @@ const HTTPSRules = {
503401
}
504402
},
505403

506-
httpMatch: /^http/i,
507-
508404
rewrittenURI: function(alist, input_uri) {
509405
// This function oversees the task of working out if a uri should be
510406
// rewritten, what it should be rewritten to, and recordkeeping of which
@@ -609,24 +505,46 @@ const HTTPSRules = {
609505
intoList.push(fromList[i]);
610506
},
611507

612-
// Try to find a ruleset in the SQLite database for a given target (e.g.
613-
// '*.openssl.org')
508+
// Load a ruleset by numeric id, e.g. 234
614509
// NOTE: This call runs synchronously, which can lock up the browser UI. Is
615510
// there any way to fix that, given that we need to run blocking in the request
616511
// flow? Perhaps we can preload all targets from the DB into memory at startup
617512
// so we only hit the DB when we know there is something to be had.
618-
queryTarget: function(target) {
619-
this.log(DBUG, "Querying DB for " + target);
620-
var output = [];
621-
622-
this.queryForRuleset.params.id = this.targetsAvailable[target];
513+
loadRulesetById: function(ruleset_id) {
514+
this.log(DBUG, "Querying DB for ruleset id " + ruleset_id);
515+
this.queryForRuleset.params.id = ruleset_id;
623516

624517
try {
625-
while (this.queryForRuleset.executeStep())
626-
output.push(this.queryForRuleset.row.contents);
518+
if (this.queryForRuleset.executeStep()) {
519+
this.log(INFO, "Found ruleset in DB for id " + ruleset_id);
520+
RuleWriter.readFromString(this.queryForRuleset.row.contents, this, ruleset_id);
521+
} else {
522+
this.log(WARN,"Couldn't find ruleset for id " + ruleset_id);
523+
}
627524
} finally {
628525
this.queryForRuleset.reset();
629526
}
527+
},
528+
529+
// Get all rulesets matching a given target, lazy-loading from DB as necessary.
530+
rulesetsByTarget: function(target) {
531+
var rulesetIds = this.targets[target];
532+
533+
var output = [];
534+
if (rulesetIds) {
535+
this.log(INFO, "For target " + target + ", found ids " + rulesetIds.toString());
536+
for (var i = 0; i < rulesetIds.length; i++) {
537+
var id = rulesetIds[i];
538+
if (!this.rulesetsByID[id]) {
539+
this.loadRulesetById(id);
540+
}
541+
if (this.rulesetsByID[id]) {
542+
output.push(this.rulesetsByID[id]);
543+
}
544+
}
545+
} else {
546+
this.log(INFO, "For target " + target + ", found no ids in DB");
547+
}
630548
return output;
631549
},
632550

@@ -636,24 +554,7 @@ const HTTPSRules = {
636554
var results = [];
637555

638556
var attempt = function(target) {
639-
// First try the in-memory rulesets
640-
if (this.targets[target] &&
641-
this.targets[target].length > 0) {
642-
this.setInsert(results, this.targets[target]);
643-
} else if (this.targetsAvailable[target]) {
644-
// If not found there, check the DB and load the ruleset as appropriate
645-
var rulesets = this.queryTarget(target);
646-
if (rulesets.length > 0) {
647-
for (var i = 0; i < rulesets.length; i++) {
648-
var ruleset = rulesets[i];
649-
this.log(INFO, "Found ruleset in DB for " + host + ": " + ruleset);
650-
RuleWriter.readFromString(ruleset, this);
651-
this.setInsert(results, this.targets[target]);
652-
}
653-
} else {
654-
this.nonTargets[target] = 1;
655-
}
656-
}
557+
this.setInsert(results, this.rulesetsByTarget(target));
657558
}.bind(this);
658559

659560
attempt(host);

0 commit comments

Comments
 (0)