@@ -19,19 +19,16 @@ function CookieRule(host, cookiename) {
1919 //this.name_c = new RegExp(cookiename);
2020}
2121
22- ruleset_counter = 0 ;
23- function RuleSet ( name , xmlName , match_rule , default_off , platform ) {
22+ function RuleSet ( id , name , xmlName , match_rule , default_off , platform ) {
2423 if ( xmlName == "WordPress.xml" || xmlName == "Github.xml" ) {
2524 this . log ( NOTE , "RuleSet( name=" + name + ", xmlName=" + xmlName + ", match_rule=" + match_rule + ", default_off=" + default_off + ", platform=" + platform + " )" ) ;
2625 }
2726
28- this . id = "httpseR" + ruleset_counter ;
29- ruleset_counter += 1 ;
27+ this . id = id ;
3028 this . on_by_default = true ;
3129 this . compiled = false ;
3230 this . name = name ;
3331 this . xmlName = xmlName ;
34- //this.ruleset_match = match_rule;
3532 this . notes = "" ;
3633 if ( match_rule ) this . ruleset_match_c = new RegExp ( match_rule ) ;
3734 else this . ruleset_match_c = null ;
@@ -51,7 +48,7 @@ function RuleSet(name, xmlName, match_rule, default_off, platform) {
5148 this . rules = [ ] ;
5249 this . exclusions = [ ] ;
5350 this . cookierules = [ ] ;
54-
51+
5552 this . rule_toggle_prefs = HTTPSEverywhere . instance . rule_toggle_prefs ;
5653
5754 try {
@@ -244,130 +241,37 @@ const RuleWriter = {
244241 return rv ;
245242 } ,
246243
247- getRuleDir : function ( ) {
248- var loc = "chrome://https-everywhere/content/rules/" ;
249-
250- var file =
251- CC [ "@mozilla.org/file/local;1" ]
252- . createInstance ( CI . nsILocalFile ) ;
253- file . initWithPath ( this . chromeToPath ( loc ) ) ;
254-
255- if ( ! file . isDirectory ( ) ) {
256- // XXX: Arg, death!
257- this . log ( WARN , "Catastrophic failure: extension directory is not a directory" ) ;
258- }
259- return file ;
260- } ,
261-
262- read : function ( file , rule_store ) {
263- if ( ! file . exists ( ) )
264- return null ;
265- if ( ( rule_store . targets == null ) && ( rule_store . targets != { } ) )
266- this . log ( WARN , "TARGETS IS NULL" ) ;
267- var data = "" ;
268- var fstream = CC [ "@mozilla.org/network/file-input-stream;1" ]
269- . createInstance ( CI . nsIFileInputStream ) ;
270- var sstream = CC [ "@mozilla.org/scriptableinputstream;1" ]
271- . createInstance ( CI . nsIScriptableInputStream ) ;
272- fstream . init ( file , - 1 , 0 , 0 ) ;
273- sstream . init ( fstream ) ;
274-
275- var str = sstream . read ( 4096 ) ;
276- while ( str . length > 0 ) {
277- data += str ;
278- str = sstream . read ( 4096 ) ;
279- }
280-
281- sstream . close ( ) ;
282- fstream . close ( ) ;
283- return this . readFromString ( data , rule_store , file ) ;
284- } ,
285-
286- readFromString : function ( data , rule_store , file ) {
287- if ( typeof file === 'undefined' ) file = { path : 'fromString' } ;
288-
289- // XXX: With DOMParser, we probably do not need to throw away the XML
290- // declaration anymore nowadays.
291- data = data . replace ( / < \? x m l [ ^ > ] * \? > / , "" ) ;
244+ readFromString : function ( data , rule_store , ruleset_id ) {
292245 try {
293- var xmlrulesets = dom_parser . parseFromString ( data , "text/xml" ) ;
246+ var xmlruleset = dom_parser . parseFromString ( data , "text/xml" ) ;
294247 } catch ( e ) { // file has been corrupted; XXX: handle error differently
295- this . log ( WARN , "Error in XML file : " + file . path + "\n" + e ) ;
248+ this . log ( WARN , "Error in XML data : " + e + "\n" + data ) ;
296249 return null ;
297250 }
298- this . parseXmlRulesets ( xmlrulesets , rule_store , file ) ;
251+ this . parseOneRuleset ( xmlruleset . documentElement , rule_store , ruleset_id ) ;
299252 } ,
300253
301- parseXmlRulesets : function ( xmldom , rule_store , file ) {
302- // XML input files can either be a <ruleset> in a file, or a
303- // <rulesetlibrary> with many <rulesets> inside it (the latter form exists
304- // because ZIP does a much better job of compressing it).
305- if ( xmldom . documentElement . nodeName == "ruleset" ) {
306- // This is a single ruleset.
307- this . parseOneRuleset ( xmldom . documentElement , rule_store , file ) ;
308- } else {
309- // The root of the XML tree is assumed to look like a <rulesetlibrary>
310- if ( ! xmldom . documentElement . getAttribute ( "gitcommitid" ) ) {
311- // The gitcommitid is a tricky hack to let us display the true full
312- // source code of a ruleset, even though we strip out comments at build
313- // time, by having the UI fetch the ruleset from the public https git repo.
314- this . log ( DBUG , "gitcommitid tag not found in <xmlruleset>" ) ;
315- rule_store . GITCommitID = "HEAD" ;
316- } else {
317- rule_store . GITCommitID = xmldom . documentElement . getAttribute ( "gitcommitid" ) ;
318- }
319-
320- var rulesets = xmldom . documentElement . getElementsByTagName ( "ruleset" ) ;
321- if ( rulesets . length == 0 && ( file . path . search ( "00README" ) == - 1 ) )
322- this . log ( WARN , "Probable <rulesetlibrary> with no <rulesets> in "
323- + file . path + "\n" + xmldom ) ;
324- for ( var j = 0 ; j < rulesets . length ; j ++ )
325- this . parseOneRuleset ( rulesets [ j ] , rule_store , file ) ;
326- }
327- } ,
328-
329- parseOneRuleset : function ( xmlruleset , rule_store , file ) {
254+ parseOneRuleset : function ( xmlruleset , rule_store , ruleset_id ) {
330255 // Extract an xmlruleset into the rulestore
331256 if ( ! xmlruleset . getAttribute ( "name" ) ) {
332257 this . log ( WARN , "This blob: '" + xmlruleset + "' is not a ruleset\n" ) ;
333258 return null ;
334259 }
335260
336- this . log ( DBUG , "Parsing " + xmlruleset . getAttribute ( "name" ) + " from " + file . path ) ;
261+ this . log ( DBUG , "Parsing " + xmlruleset . getAttribute ( "name" ) ) ;
337262
338263 var match_rl = xmlruleset . getAttribute ( "match_rule" ) ;
339264 var dflt_off = xmlruleset . getAttribute ( "default_off" ) ;
340265 var platform = xmlruleset . getAttribute ( "platform" ) ;
341- var rs = new RuleSet ( xmlruleset . getAttribute ( "name" ) , xmlruleset . getAttribute ( "f" ) , match_rl , dflt_off , platform ) ;
342-
343- var targets = xmlruleset . getElementsByTagName ( "target" ) ;
344- if ( targets . length == 0 ) {
345- var msg = "Error: As of v0.3.0, XML rulesets require a target domain entry," ;
346- msg = msg + "\nbut " + file . path + " is missing one." ;
347- this . log ( WARN , msg ) ;
348- return null ;
349- }
266+ var rs = new RuleSet ( ruleset_id , xmlruleset . getAttribute ( "name" ) , xmlruleset . getAttribute ( "f" ) , match_rl , dflt_off , platform ) ;
350267
351268 // see if this ruleset has the same name as an existing ruleset;
352269 // if so, this ruleset is ignored; DON'T add or return it.
353270 if ( rs . name in rule_store . rulesetsByName ) {
354- this . log ( WARN , "Error: found duplicate rule name " + rs . name + " in file " + file . path ) ;
271+ this . log ( WARN , "Error: found duplicate rule name " + rs . name ) ;
355272 return null ;
356273 }
357274
358- // add this ruleset into HTTPSRules.targets with all of the applicable
359- // target host indexes
360- for ( var i = 0 ; i < targets . length ; i ++ ) {
361- var host = targets [ i ] . getAttribute ( "host" ) ;
362- if ( ! host ) {
363- this . log ( WARN , "<target> missing host in " + file . path ) ;
364- return null ;
365- }
366- if ( ! rule_store . targets [ host ] )
367- rule_store . targets [ host ] = [ ] ;
368- rule_store . targets [ host ] . push ( rs ) ;
369- }
370-
371275 var exclusions = xmlruleset . getElementsByTagName ( "exclusion" ) ;
372276 for ( var i = 0 ; i < exclusions . length ; i ++ ) {
373277 var exclusion = new Exclusion ( exclusions [ i ] . getAttribute ( "pattern" ) ) ;
@@ -414,8 +318,8 @@ const HTTPSRules = {
414318 init : function ( ) {
415319 try {
416320 this . rulesets = [ ] ;
417- this . targets = { } ; // dict mapping target host patterns -> lists of
418- // applicable rules
321+ this . targets = { } ; // dict mapping target host pattern -> list of
322+ // applicable ruleset ids
419323 this . rulesetsByID = { } ;
420324 this . rulesetsByName = { } ;
421325 var t1 = new Date ( ) . getTime ( ) ;
@@ -428,30 +332,40 @@ const HTTPSRules = {
428332 this . queryForRuleset = rulesetDBConn . createStatement (
429333 "select contents from rulesets where id = :id" ) ;
430334
431- // Preload the list of which targets are available in the DB.
335+ // Preload the mapping of hostname target -> ruleset ID from DB.
432336 // This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD),
433337 // but is faster than loading all of the rulesets. If this becomes a
434338 // bottleneck, change it to load in a background webworker, or load
435339 // a smaller bloom filter instead.
436- this . targetsAvailable = { } ;
437340 var targetsQuery = rulesetDBConn . createStatement ( "select host, ruleset_id from targets" ) ;
438- this . log ( DBUG , "Adding targets..." ) ;
341+ this . log ( DBUG , "Loading targets..." ) ;
439342 while ( targetsQuery . executeStep ( ) ) {
440343 var host = targetsQuery . row . host ;
441- this . targetsAvailable [ host ] = targetsQuery . row . ruleset_id ;
344+ var id = targetsQuery . row . ruleset_id ;
345+ if ( ! this . targets [ host ] ) {
346+ this . targets [ host ] = [ id ] ;
347+ } else {
348+ this . targets [ host ] . push ( id ) ;
349+ }
442350 }
443- this . log ( DBUG , "Done adding targets." ) ;
351+ this . log ( DBUG , "Loading adding targets." ) ;
444352 } catch ( e ) {
445353 this . log ( DBUG , "Rules Failed: " + e ) ;
446354 }
447355 var t2 = new Date ( ) . getTime ( ) ;
448- this . log ( NOTE , "Loading rulesets took " + ( t2 - t1 ) / 1000.0 + " seconds" ) ;
356+ this . log ( NOTE , "Loading targets took " + ( t2 - t1 ) / 1000.0 + " seconds" ) ;
357+
358+ var gitCommitQuery = rulesetDBConn . createStatement ( "select git_commit from git_commit" ) ;
359+ if ( gitCommitQuery . executeStep ( ) ) {
360+ this . GITCommitID = gitCommitQuery . row . git_commit ;
361+ }
362+
449363 try {
450364 if ( HTTPSEverywhere . instance . prefs . getBoolPref ( "performance_tests" ) ) {
451365 this . testRulesetRetrievalPerformance ( ) ;
452366 }
453367 } catch ( e ) {
454- this . log ( WARN , "Explosion during testing " + e ) ;
368+ this . log ( WARN , "Exception during testing " + e ) ;
455369 }
456370 return ;
457371 } ,
@@ -480,21 +394,6 @@ const HTTPSRules = {
480394 }
481395 } ,
482396
483- scanRulefiles : function ( rulefiles ) {
484- var i = 0 ;
485- var r = null ;
486- for ( i = 0 ; i < rulefiles . length ; ++ i ) {
487- try {
488- this . log ( DBUG , "Loading ruleset file: " + rulefiles [ i ] . path ) ;
489- RuleWriter . read ( rulefiles [ i ] , this ) ;
490- } catch ( e ) {
491- this . log ( WARN , "Error in ruleset file: " + e ) ;
492- if ( e . lineNumber )
493- this . log ( WARN , "(line number: " + e . lineNumber + ")" ) ;
494- }
495- }
496- } ,
497-
498397 resetRulesetsToDefaults : function ( ) {
499398 // Callable from within the prefs UI and also for cleaning up buggy
500399 // configurations...
@@ -503,8 +402,6 @@ const HTTPSRules = {
503402 }
504403 } ,
505404
506- httpMatch : / ^ h t t p / i,
507-
508405 rewrittenURI : function ( alist , input_uri ) {
509406 // This function oversees the task of working out if a uri should be
510407 // rewritten, what it should be rewritten to, and recordkeeping of which
@@ -525,7 +422,7 @@ const HTTPSRules = {
525422 try {
526423 var rs = this . potentiallyApplicableRulesets ( uri . host ) ;
527424 } catch ( e ) {
528- this . log ( WARN , 'Could not check applicable rules for ' + uri . spec + '\n' + e ) ;
425+ this . log ( NOTE , 'Could not check applicable rules for ' + uri . spec + '\n' + e ) ;
529426 return null ;
530427 }
531428
@@ -609,24 +506,46 @@ const HTTPSRules = {
609506 intoList . push ( fromList [ i ] ) ;
610507 } ,
611508
612- // Try to find a ruleset in the SQLite database for a given target (e.g.
613- // '*.openssl.org')
509+ // Load a ruleset by numeric id, e.g. 234
614510 // NOTE: This call runs synchronously, which can lock up the browser UI. Is
615511 // there any way to fix that, given that we need to run blocking in the request
616512 // flow? Perhaps we can preload all targets from the DB into memory at startup
617513 // so we only hit the DB when we know there is something to be had.
618- queryTarget : function ( target ) {
619- this . log ( DBUG , "Querying DB for " + target ) ;
620- var output = [ ] ;
621-
622- this . queryForRuleset . params . id = this . targetsAvailable [ target ] ;
514+ loadRulesetById : function ( ruleset_id ) {
515+ this . log ( DBUG , "Querying DB for ruleset id " + ruleset_id ) ;
516+ this . queryForRuleset . params . id = ruleset_id ;
623517
624518 try {
625- while ( this . queryForRuleset . executeStep ( ) )
626- output . push ( this . queryForRuleset . row . contents ) ;
519+ if ( this . queryForRuleset . executeStep ( ) ) {
520+ this . log ( INFO , "Found ruleset in DB for id " + ruleset_id ) ;
521+ RuleWriter . readFromString ( this . queryForRuleset . row . contents , this , ruleset_id ) ;
522+ } else {
523+ this . log ( WARN , "Couldn't find ruleset for id " + ruleset_id ) ;
524+ }
627525 } finally {
628526 this . queryForRuleset . reset ( ) ;
629527 }
528+ } ,
529+
530+ // Get all rulesets matching a given target, lazy-loading from DB as necessary.
531+ rulesetsByTarget : function ( target ) {
532+ var rulesetIds = this . targets [ target ] ;
533+
534+ var output = [ ] ;
535+ if ( rulesetIds ) {
536+ this . log ( INFO , "For target " + target + ", found ids " + rulesetIds . toString ( ) ) ;
537+ for ( var i = 0 ; i < rulesetIds . length ; i ++ ) {
538+ var id = rulesetIds [ i ] ;
539+ if ( ! this . rulesetsByID [ id ] ) {
540+ this . loadRulesetById ( id ) ;
541+ }
542+ if ( this . rulesetsByID [ id ] ) {
543+ output . push ( this . rulesetsByID [ id ] ) ;
544+ }
545+ }
546+ } else {
547+ this . log ( INFO , "For target " + target + ", found no ids in DB" ) ;
548+ }
630549 return output ;
631550 } ,
632551
@@ -636,24 +555,7 @@ const HTTPSRules = {
636555 var results = [ ] ;
637556
638557 var attempt = function ( target ) {
639- // First try the in-memory rulesets
640- if ( this . targets [ target ] &&
641- this . targets [ target ] . length > 0 ) {
642- this . setInsert ( results , this . targets [ target ] ) ;
643- } else if ( this . targetsAvailable [ target ] ) {
644- // If not found there, check the DB and load the ruleset as appropriate
645- var rulesets = this . queryTarget ( target ) ;
646- if ( rulesets . length > 0 ) {
647- for ( var i = 0 ; i < rulesets . length ; i ++ ) {
648- var ruleset = rulesets [ i ] ;
649- this . log ( INFO , "Found ruleset in DB for " + host + ": " + ruleset ) ;
650- RuleWriter . readFromString ( ruleset , this ) ;
651- this . setInsert ( results , this . targets [ target ] ) ;
652- }
653- } else {
654- this . nonTargets [ target ] = 1 ;
655- }
656- }
558+ this . setInsert ( results , this . rulesetsByTarget ( target ) ) ;
657559 } . bind ( this ) ;
658560
659561 attempt ( host ) ;
0 commit comments