@@ -20,38 +20,34 @@ function CookieRule(host, cookiename) {
2020}
2121
2222ruleset_counter = 0 ;
23- function RuleSet ( name , xmlName , match_rule , default_off , platform ) {
23+ function RuleSet ( id , name , xmlName , match_rule , default_off , platform ) {
2424 if ( xmlName == "WordPress.xml" || xmlName == "Github.xml" ) {
2525 this . log ( NOTE , "RuleSet( name=" + name + ", xmlName=" + xmlName + ", match_rule=" + match_rule + ", default_off=" + default_off + ", platform=" + platform + " )" ) ;
2626 }
2727
28- this . id = "httpseR" + ruleset_counter ;
28+ this . id = id ;
2929 ruleset_counter += 1 ;
3030 this . on_by_default = true ;
3131 this . compiled = false ;
3232 this . name = name ;
3333 this . xmlName = xmlName ;
34- //this.ruleset_match = match_rule;
35- this . notes = "" ;
3634 if ( match_rule ) this . ruleset_match_c = new RegExp ( match_rule ) ;
3735 else this . ruleset_match_c = null ;
3836 if ( default_off ) {
3937 // Perhaps problematically, this currently ignores the actual content of
4038 // the default_off XML attribute. Ideally we'd like this attribute to be
4139 // "valueless"
42- this . notes = default_off ;
4340 this . on_by_default = false ;
4441 }
4542 if ( platform )
4643 if ( platform . search ( HTTPSRules . localPlatformRegexp ) == - 1 ) {
4744 this . on_by_default = false ;
48- this . notes = "Only for " + platform ;
4945 }
5046
5147 this . rules = [ ] ;
5248 this . exclusions = [ ] ;
5349 this . cookierules = [ ] ;
54-
50+
5551 this . rule_toggle_prefs = HTTPSEverywhere . instance . rule_toggle_prefs ;
5652
5753 try {
@@ -244,130 +240,37 @@ const RuleWriter = {
244240 return rv ;
245241 } ,
246242
247- getRuleDir : function ( ) {
248- var loc = "chrome://https-everywhere/content/rules/" ;
249-
250- var file =
251- CC [ "@mozilla.org/file/local;1" ]
252- . createInstance ( CI . nsILocalFile ) ;
253- file . initWithPath ( this . chromeToPath ( loc ) ) ;
254-
255- if ( ! file . isDirectory ( ) ) {
256- // XXX: Arg, death!
257- this . log ( WARN , "Catastrophic failure: extension directory is not a directory" ) ;
258- }
259- return file ;
260- } ,
261-
262- read : function ( file , rule_store ) {
263- if ( ! file . exists ( ) )
264- return null ;
265- if ( ( rule_store . targets == null ) && ( rule_store . targets != { } ) )
266- this . log ( WARN , "TARGETS IS NULL" ) ;
267- var data = "" ;
268- var fstream = CC [ "@mozilla.org/network/file-input-stream;1" ]
269- . createInstance ( CI . nsIFileInputStream ) ;
270- var sstream = CC [ "@mozilla.org/scriptableinputstream;1" ]
271- . createInstance ( CI . nsIScriptableInputStream ) ;
272- fstream . init ( file , - 1 , 0 , 0 ) ;
273- sstream . init ( fstream ) ;
274-
275- var str = sstream . read ( 4096 ) ;
276- while ( str . length > 0 ) {
277- data += str ;
278- str = sstream . read ( 4096 ) ;
279- }
280-
281- sstream . close ( ) ;
282- fstream . close ( ) ;
283- return this . readFromString ( data , rule_store , file ) ;
284- } ,
285-
286- readFromString : function ( data , rule_store , file ) {
287- if ( typeof file === 'undefined' ) file = { path : 'fromString' } ;
288-
289- // XXX: With DOMParser, we probably do not need to throw away the XML
290- // declaration anymore nowadays.
291- data = data . replace ( / < \? x m l [ ^ > ] * \? > / , "" ) ;
243+ readFromString : function ( data , rule_store , ruleset_id ) {
292244 try {
293- var xmlrulesets = dom_parser . parseFromString ( data , "text/xml" ) ;
245+ var xmlruleset = dom_parser . parseFromString ( data , "text/xml" ) ;
294246 } catch ( e ) { // file has been corrupted; XXX: handle error differently
295- this . log ( WARN , "Error in XML file : " + file . path + "\n" + e ) ;
247+ this . log ( WARN , "Error in XML data : " + e + "\n" + data ) ;
296248 return null ;
297249 }
298- this . parseXmlRulesets ( xmlrulesets , rule_store , file ) ;
250+ this . parseOneRuleset ( xmlruleset . documentElement , rule_store , ruleset_id ) ;
299251 } ,
300252
301- parseXmlRulesets : function ( xmldom , rule_store , file ) {
302- // XML input files can either be a <ruleset> in a file, or a
303- // <rulesetlibrary> with many <rulesets> inside it (the latter form exists
304- // because ZIP does a much better job of compressing it).
305- if ( xmldom . documentElement . nodeName == "ruleset" ) {
306- // This is a single ruleset.
307- this . parseOneRuleset ( xmldom . documentElement , rule_store , file ) ;
308- } else {
309- // The root of the XML tree is assumed to look like a <rulesetlibrary>
310- if ( ! xmldom . documentElement . getAttribute ( "gitcommitid" ) ) {
311- // The gitcommitid is a tricky hack to let us display the true full
312- // source code of a ruleset, even though we strip out comments at build
313- // time, by having the UI fetch the ruleset from the public https git repo.
314- this . log ( DBUG , "gitcommitid tag not found in <xmlruleset>" ) ;
315- rule_store . GITCommitID = "HEAD" ;
316- } else {
317- rule_store . GITCommitID = xmldom . documentElement . getAttribute ( "gitcommitid" ) ;
318- }
319-
320- var rulesets = xmldom . documentElement . getElementsByTagName ( "ruleset" ) ;
321- if ( rulesets . length == 0 && ( file . path . search ( "00README" ) == - 1 ) )
322- this . log ( WARN , "Probable <rulesetlibrary> with no <rulesets> in "
323- + file . path + "\n" + xmldom ) ;
324- for ( var j = 0 ; j < rulesets . length ; j ++ )
325- this . parseOneRuleset ( rulesets [ j ] , rule_store , file ) ;
326- }
327- } ,
328-
329- parseOneRuleset : function ( xmlruleset , rule_store , file ) {
253+ parseOneRuleset : function ( xmlruleset , rule_store , ruleset_id ) {
330254 // Extract an xmlruleset into the rulestore
331255 if ( ! xmlruleset . getAttribute ( "name" ) ) {
332256 this . log ( WARN , "This blob: '" + xmlruleset + "' is not a ruleset\n" ) ;
333257 return null ;
334258 }
335259
336- this . log ( DBUG , "Parsing " + xmlruleset . getAttribute ( "name" ) + " from " + file . path ) ;
260+ this . log ( DBUG , "Parsing " + xmlruleset . getAttribute ( "name" ) ) ;
337261
338262 var match_rl = xmlruleset . getAttribute ( "match_rule" ) ;
339263 var dflt_off = xmlruleset . getAttribute ( "default_off" ) ;
340264 var platform = xmlruleset . getAttribute ( "platform" ) ;
341- var rs = new RuleSet ( xmlruleset . getAttribute ( "name" ) , xmlruleset . getAttribute ( "f" ) , match_rl , dflt_off , platform ) ;
342-
343- var targets = xmlruleset . getElementsByTagName ( "target" ) ;
344- if ( targets . length == 0 ) {
345- var msg = "Error: As of v0.3.0, XML rulesets require a target domain entry," ;
346- msg = msg + "\nbut " + file . path + " is missing one." ;
347- this . log ( WARN , msg ) ;
348- return null ;
349- }
265+ var rs = new RuleSet ( ruleset_id , xmlruleset . getAttribute ( "name" ) , xmlruleset . getAttribute ( "f" ) , match_rl , dflt_off , platform ) ;
350266
351267 // see if this ruleset has the same name as an existing ruleset;
352268 // if so, this ruleset is ignored; DON'T add or return it.
353269 if ( rs . name in rule_store . rulesetsByName ) {
354- this . log ( WARN , "Error: found duplicate rule name " + rs . name + " in file " + file . path ) ;
270+ this . log ( WARN , "Error: found duplicate rule name " + rs . name ) ;
355271 return null ;
356272 }
357273
358- // add this ruleset into HTTPSRules.targets with all of the applicable
359- // target host indexes
360- for ( var i = 0 ; i < targets . length ; i ++ ) {
361- var host = targets [ i ] . getAttribute ( "host" ) ;
362- if ( ! host ) {
363- this . log ( WARN , "<target> missing host in " + file . path ) ;
364- return null ;
365- }
366- if ( ! rule_store . targets [ host ] )
367- rule_store . targets [ host ] = [ ] ;
368- rule_store . targets [ host ] . push ( rs ) ;
369- }
370-
371274 var exclusions = xmlruleset . getElementsByTagName ( "exclusion" ) ;
372275 for ( var i = 0 ; i < exclusions . length ; i ++ ) {
373276 var exclusion = new Exclusion ( exclusions [ i ] . getAttribute ( "pattern" ) ) ;
@@ -414,8 +317,8 @@ const HTTPSRules = {
414317 init : function ( ) {
415318 try {
416319 this . rulesets = [ ] ;
417- this . targets = { } ; // dict mapping target host patterns -> lists of
418- // applicable rules
320+ this . targets = { } ; // dict mapping target host pattern -> list of
321+ // applicable ruleset ids
419322 this . rulesetsByID = { } ;
420323 this . rulesetsByName = { } ;
421324 var t1 = new Date ( ) . getTime ( ) ;
@@ -428,30 +331,40 @@ const HTTPSRules = {
428331 this . queryForRuleset = rulesetDBConn . createStatement (
429332 "select contents from rulesets where id = :id" ) ;
430333
431- // Preload the list of which targets are available in the DB.
334+ // Preload the mapping of hostname target -> ruleset ID from DB.
432335 // This is a little slow (287 ms on a Core2 Duo @ 2.2GHz with SSD),
433336 // but is faster than loading all of the rulesets. If this becomes a
434337 // bottleneck, change it to load in a background webworker, or load
435338 // a smaller bloom filter instead.
436- this . targetsAvailable = { } ;
437339 var targetsQuery = rulesetDBConn . createStatement ( "select host, ruleset_id from targets" ) ;
438- this . log ( DBUG , "Adding targets..." ) ;
340+ this . log ( DBUG , "Loading targets..." ) ;
439341 while ( targetsQuery . executeStep ( ) ) {
440342 var host = targetsQuery . row . host ;
441- this . targetsAvailable [ host ] = targetsQuery . row . ruleset_id ;
343+ var id = targetsQuery . row . ruleset_id ;
344+ if ( ! this . targets [ host ] ) {
345+ this . targets [ host ] = [ id ] ;
346+ } else {
347+ this . targets [ host ] . push ( id ) ;
348+ }
442349 }
443- this . log ( DBUG , "Done adding targets." ) ;
350+ this . log ( DBUG , "Loading adding targets." ) ;
444351 } catch ( e ) {
445352 this . log ( DBUG , "Rules Failed: " + e ) ;
446353 }
447354 var t2 = new Date ( ) . getTime ( ) ;
448- this . log ( NOTE , "Loading rulesets took " + ( t2 - t1 ) / 1000.0 + " seconds" ) ;
355+ this . log ( NOTE , "Loading targets took " + ( t2 - t1 ) / 1000.0 + " seconds" ) ;
356+
357+ var gitCommitQuery = rulesetDBConn . createStatement ( "select git_commit from git_commit" ) ;
358+ if ( gitCommitQuery . executeStep ( ) ) {
359+ this . GITCommitID = gitCommitQuery . row . git_commit ;
360+ }
361+
449362 try {
450363 if ( HTTPSEverywhere . instance . prefs . getBoolPref ( "performance_tests" ) ) {
451364 this . testRulesetRetrievalPerformance ( ) ;
452365 }
453366 } catch ( e ) {
454- this . log ( WARN , "Explosion during testing " + e ) ;
367+ this . log ( WARN , "Exception during testing " + e ) ;
455368 }
456369 return ;
457370 } ,
@@ -480,21 +393,6 @@ const HTTPSRules = {
480393 }
481394 } ,
482395
483- scanRulefiles : function ( rulefiles ) {
484- var i = 0 ;
485- var r = null ;
486- for ( i = 0 ; i < rulefiles . length ; ++ i ) {
487- try {
488- this . log ( DBUG , "Loading ruleset file: " + rulefiles [ i ] . path ) ;
489- RuleWriter . read ( rulefiles [ i ] , this ) ;
490- } catch ( e ) {
491- this . log ( WARN , "Error in ruleset file: " + e ) ;
492- if ( e . lineNumber )
493- this . log ( WARN , "(line number: " + e . lineNumber + ")" ) ;
494- }
495- }
496- } ,
497-
498396 resetRulesetsToDefaults : function ( ) {
499397 // Callable from within the prefs UI and also for cleaning up buggy
500398 // configurations...
@@ -503,8 +401,6 @@ const HTTPSRules = {
503401 }
504402 } ,
505403
506- httpMatch : / ^ h t t p / i,
507-
508404 rewrittenURI : function ( alist , input_uri ) {
509405 // This function oversees the task of working out if a uri should be
510406 // rewritten, what it should be rewritten to, and recordkeeping of which
@@ -609,24 +505,46 @@ const HTTPSRules = {
609505 intoList . push ( fromList [ i ] ) ;
610506 } ,
611507
612- // Try to find a ruleset in the SQLite database for a given target (e.g.
613- // '*.openssl.org')
508+ // Load a ruleset by numeric id, e.g. 234
614509 // NOTE: This call runs synchronously, which can lock up the browser UI. Is
615510 // there any way to fix that, given that we need to run blocking in the request
616511 // flow? Perhaps we can preload all targets from the DB into memory at startup
617512 // so we only hit the DB when we know there is something to be had.
618- queryTarget : function ( target ) {
619- this . log ( DBUG , "Querying DB for " + target ) ;
620- var output = [ ] ;
621-
622- this . queryForRuleset . params . id = this . targetsAvailable [ target ] ;
513+ loadRulesetById : function ( ruleset_id ) {
514+ this . log ( DBUG , "Querying DB for ruleset id " + ruleset_id ) ;
515+ this . queryForRuleset . params . id = ruleset_id ;
623516
624517 try {
625- while ( this . queryForRuleset . executeStep ( ) )
626- output . push ( this . queryForRuleset . row . contents ) ;
518+ if ( this . queryForRuleset . executeStep ( ) ) {
519+ this . log ( INFO , "Found ruleset in DB for id " + ruleset_id ) ;
520+ RuleWriter . readFromString ( this . queryForRuleset . row . contents , this , ruleset_id ) ;
521+ } else {
522+ this . log ( WARN , "Couldn't find ruleset for id " + ruleset_id ) ;
523+ }
627524 } finally {
628525 this . queryForRuleset . reset ( ) ;
629526 }
527+ } ,
528+
529+ // Get all rulesets matching a given target, lazy-loading from DB as necessary.
530+ rulesetsByTarget : function ( target ) {
531+ var rulesetIds = this . targets [ target ] ;
532+
533+ var output = [ ] ;
534+ if ( rulesetIds ) {
535+ this . log ( INFO , "For target " + target + ", found ids " + rulesetIds . toString ( ) ) ;
536+ for ( var i = 0 ; i < rulesetIds . length ; i ++ ) {
537+ var id = rulesetIds [ i ] ;
538+ if ( ! this . rulesetsByID [ id ] ) {
539+ this . loadRulesetById ( id ) ;
540+ }
541+ if ( this . rulesetsByID [ id ] ) {
542+ output . push ( this . rulesetsByID [ id ] ) ;
543+ }
544+ }
545+ } else {
546+ this . log ( INFO , "For target " + target + ", found no ids in DB" ) ;
547+ }
630548 return output ;
631549 } ,
632550
@@ -636,24 +554,7 @@ const HTTPSRules = {
636554 var results = [ ] ;
637555
638556 var attempt = function ( target ) {
639- // First try the in-memory rulesets
640- if ( this . targets [ target ] &&
641- this . targets [ target ] . length > 0 ) {
642- this . setInsert ( results , this . targets [ target ] ) ;
643- } else if ( this . targetsAvailable [ target ] ) {
644- // If not found there, check the DB and load the ruleset as appropriate
645- var rulesets = this . queryTarget ( target ) ;
646- if ( rulesets . length > 0 ) {
647- for ( var i = 0 ; i < rulesets . length ; i ++ ) {
648- var ruleset = rulesets [ i ] ;
649- this . log ( INFO , "Found ruleset in DB for " + host + ": " + ruleset ) ;
650- RuleWriter . readFromString ( ruleset , this ) ;
651- this . setInsert ( results , this . targets [ target ] ) ;
652- }
653- } else {
654- this . nonTargets [ target ] = 1 ;
655- }
656- }
557+ this . setInsert ( results , this . rulesetsByTarget ( target ) ) ;
657558 } . bind ( this ) ;
658559
659560 attempt ( host ) ;
0 commit comments