Skip to content

Commit 900f52b

Browse files
committed
Merge branch 'rewriter' of github.com:jsha/https-everywhere into rewriter
Conflicts: rewriter/rewriter.js
2 parents 673924e + dd884e1 commit 900f52b

File tree

4 files changed

+51
-32
lines changed

4 files changed

+51
-32
lines changed

chromium/background.js

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,20 @@
1-
function getRuleXml() {
2-
var output = "";
3-
for(var i = 0; i < rule_list.length; i++) {
4-
var xhr = new XMLHttpRequest();
5-
// Use blocking XHR to ensure everything is loaded by the time
6-
// we return.
7-
xhr.open("GET", chrome.extension.getURL(rule_list[i]), false);
8-
xhr.send(null);
9-
// Get file contents
10-
if (xhr.readyState != 4) {
11-
return;
12-
}
13-
// XXX TODO: I think you can't concat XML objects like this
14-
output += xhr.responseXML;
1+
function getRuleXml(url) {
2+
var xhr = new XMLHttpRequest();
3+
// Use blocking XHR to ensure everything is loaded by the time
4+
// we return.
5+
xhr.open("GET", chrome.extension.getURL(url), false);
6+
xhr.send(null);
7+
// Get file contents
8+
if (xhr.readyState != 4) {
9+
return;
1510
}
16-
return output;
11+
return xhr.responseXML;
1712
}
1813

19-
var all_rules = new RuleSets(navigator.userAgent, LRUCache, getRuleXml(), localStorage);
14+
var all_rules = new RuleSets(navigator.userAgent, LRUCache, localStorage);
15+
for (var i = 0; i < rule_list.length; i++) {
16+
all_rules.addFromXml(getRuleXml(rule_list[i]));
17+
}
2018
var wr = chrome.webRequest;
2119

2220
/*

chromium/rules.js

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ RuleSet.prototype = {
7171
};
7272

7373

74-
function RuleSets(userAgent, cache, ruleXml, ruleActiveStates) {
74+
function RuleSets(userAgent, cache, ruleActiveStates) {
7575
// Load rules into structure
7676
this.targets = {};
7777
this.userAgent = userAgent;
@@ -85,14 +85,15 @@ function RuleSets(userAgent, cache, ruleXml, ruleActiveStates) {
8585

8686
// A hash of rule name -> active status (true/false).
8787
this.ruleActiveStates = ruleActiveStates;
88-
89-
var sets = ruleXml.getElementsByTagName("ruleset");
90-
for (var i = 0; i < sets.length; ++i) {
91-
this.parseOneRuleset(sets[i]);
92-
}
9388
}
9489

9590
RuleSets.prototype = {
91+
addFromXml: function(ruleXml) {
92+
var sets = ruleXml.getElementsByTagName("ruleset");
93+
for (var i = 0; i < sets.length; ++i) {
94+
this.parseOneRuleset(sets[i]);
95+
}
96+
},
9697

9798
localPlatformRegexp: (function() {
9899
if (/(OPR|Opera)[\/\s](\d+\.\d+)/.test(this.userAgent)) {
@@ -314,4 +315,7 @@ RuleSets.prototype = {
314315
return null;
315316
}
316317
};
317-
exports.RuleSets = RuleSets;
318+
319+
// Export for HTTPS Rewriter if applicable.
320+
if (typeof exports != 'undefined')
321+
exports.RuleSets = RuleSets;

rewriter/README.md

Whitespace-only changes.

rewriter/rewriter.js

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
// Makes a copy of each file at filename.bak.
66
//
77
// Usage:
8-
// (install node and npm)
98
// cd https-everywhere
10-
// ./makecrx.sh
9+
// ./makecrx.sh # to build default.rulesets
1110
// cd rewriter
12-
// js rewriter.js ~/path/to/my/webapp
11+
// (install node and npm)
12+
// npm install
13+
// node rewriter.js ~/path/to/my/webapp
14+
// cd ~/path/to/my/webapp
15+
// git diff
1316

1417
var path = require("path"),
1518
fs = require("fs"),
@@ -42,10 +45,23 @@ function processDir(dir) {
4245
.pipe(es.mapSync(function (entry) {
4346
var filename = path.join(dir, entry.path);
4447
console.log("Rewriting " + filename);
45-
processFile(filename);
48+
try {
49+
processFile(filename);
50+
} catch(e) {
51+
console.log(filename, e);
52+
}
4653
}));
4754
}
4855

56+
// Overwrite the default URI find_uri_expression with a modified one that
57+
// mitigates a catastrophic backtracking issue common in CSS.
58+
// The workaround was to insist that URLs start with http, since those are the
59+
// only ones we want to rewrite anyhow. Note that this may still go exponential
60+
// on certain inputs. http://www.regular-expressions.info/catastrophic.html
61+
// Example string that blows up URI.withinString:
62+
// image:url(http://img.youtube.com/vi/x7f
63+
URI.find_uri_expression = /\b((?:http:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+)+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»]))/ig;
64+
4965
function processFile(filename) {
5066
var contents = fs.readFileSync(filename, 'utf8');
5167
var rewrittenFile = URI.withinString(contents, function(url) {
@@ -69,14 +85,15 @@ function processFile(filename) {
6985
});
7086

7187
fs.writeFileSync(filename + ".new", rewrittenFile);
72-
//fs.renameSync(filename, filename + ".bak");
73-
//fs.renameSync(filename + ".new", filename);
88+
fs.renameSync(filename, filename + ".bak");
89+
fs.renameSync(filename + ".new", filename);
7490
}
7591

7692
function loadRuleSets() {
77-
var fileContents = fs.readFileSync(path.join(__dirname, '../pkg/crx/rules/default.rulesets'), 'utf-8');
78-
var xml = new DOMParser().parseFromString(fileContents, 'text/xml');
79-
ruleSets = new rules.RuleSets("fake user agent", lrucache.LRUCache, xml, {});
93+
console.log("Loading rules...");
94+
var fileContents = fs.readFileSync(path.join(__dirname, '../pkg/crx/rules/default.rulesets'), 'utf8');
95+
ruleSets = new rules.RuleSets("fake user agent", lrucache.LRUCache, {});
96+
ruleSets.addFromXml(xml);
8097
}
8198

8299
loadRuleSets();

0 commit comments

Comments
 (0)