55// Makes a copy of each file at filename.bak.
66//
77// Usage:
8- // (install node and npm)
98// cd https-everywhere
10- // ./makecrx.sh
9+ // ./makecrx.sh # to build default.rulesets
1110// cd rewriter
12- // js rewriter.js ~/path/to/my/webapp
11+ // (install node and npm)
12+ // npm install
13+ // node rewriter.js ~/path/to/my/webapp
14+ // cd ~/path/to/my/webapp
15+ // git diff
1316
1417var path = require ( "path" ) ,
1518 fs = require ( "fs" ) ,
@@ -42,10 +45,23 @@ function processDir(dir) {
4245 . pipe ( es . mapSync ( function ( entry ) {
4346 var filename = path . join ( dir , entry . path ) ;
4447 console . log ( "Rewriting " + filename ) ;
45- processFile ( filename ) ;
48+ try {
49+ processFile ( filename ) ;
50+ } catch ( e ) {
51+ console . log ( filename , e ) ;
52+ }
4653 } ) ) ;
4754}
4855
56+ // Overwrite the default URI find_uri_expression with a modified one that
57+ // mitigates a catastrophic backtracking issue common in CSS.
58+ // The workaround was to insist that URLs start with http, since those are the
59+ // only ones we want to rewrite anyhow. Note that this may still go exponential
60+ // on certain inputs. http://www.regular-expressions.info/catastrophic.html
61+ // Example string that blows up URI.withinString:
62+ // image:url(http://img.youtube.com/vi/x7f
63+ URI . find_uri_expression = / \b ( (?: h t t p : (?: \/ { 1 , 3 } | [ a - z 0 - 9 % ] ) | w w w \d { 0 , 3 } [ . ] | [ a - z 0 - 9 . \- ] + [ . ] [ a - z ] { 2 , 4 } \/ ) (?: [ ^ \s ( ) < > ] + ) + (?: \( ( [ ^ \s ( ) < > ] + | ( \( [ ^ \s ( ) < > ] + \) ) ) * \) | [ ^ \s ` ! ( ) \[ \] { } ; : ' " . , < > ? « » “ ” ‘ ’ ] ) ) / ig;
64+
4965function processFile ( filename ) {
5066 var contents = fs . readFileSync ( filename , 'utf8' ) ;
5167 var rewrittenFile = URI . withinString ( contents , function ( url ) {
@@ -69,14 +85,15 @@ function processFile(filename) {
6985 } ) ;
7086
7187 fs . writeFileSync ( filename + ".new" , rewrittenFile ) ;
72- // fs.renameSync(filename, filename + ".bak");
73- // fs.renameSync(filename + ".new", filename);
88+ fs . renameSync ( filename , filename + ".bak" ) ;
89+ fs . renameSync ( filename + ".new" , filename ) ;
7490}
7591
7692function loadRuleSets ( ) {
77- var fileContents = fs . readFileSync ( path . join ( __dirname , '../pkg/crx/rules/default.rulesets' ) , 'utf-8' ) ;
78- var xml = new DOMParser ( ) . parseFromString ( fileContents , 'text/xml' ) ;
79- ruleSets = new rules . RuleSets ( "fake user agent" , lrucache . LRUCache , xml , { } ) ;
93+ console . log ( "Loading rules..." ) ;
94+ var fileContents = fs . readFileSync ( path . join ( __dirname , '../pkg/crx/rules/default.rulesets' ) , 'utf8' ) ;
95+ ruleSets = new rules . RuleSets ( "fake user agent" , lrucache . LRUCache , { } ) ;
96+ ruleSets . addFromXml ( xml ) ;
8097}
8198
8299loadRuleSets ( ) ;
0 commit comments