Skip to content

Commit e66896f

Browse files
RReverserHainish
authored andcommitted
Improve trivialize-rules to detect compex rules
Explode regular expressions and see if it results in a small list of static URLs that can be trivialized. Note: rulesets with <securecookie /> are skipped for now because trivializing those can lead to certain hosts being removed from targets even though cookies should still be secured there. In future, we can analyze securecookie as well to detect static matches and trivialize more rulesets.
1 parent 2d40847 commit e66896f

File tree

3 files changed

+180
-70
lines changed

3 files changed

+180
-70
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/* eslint-env es6, node */
2+
3+
const { parse } = require('regulex');
4+
5+
class UnsupportedRegExp extends Error {}
6+
7+
function explodeRegExp(re, callback) {
8+
(function buildUrls(str, items) {
9+
if (items.length === 0) {
10+
callback(str + '*');
11+
return;
12+
}
13+
14+
let [first, ...rest] = items;
15+
16+
if (first.repeat) {
17+
let repeat = first.repeat;
18+
if (repeat.max !== 1) throw new UnsupportedRegExp(first.raw);
19+
delete first.repeat;
20+
if (repeat.min === 0) {
21+
buildUrls(str, rest);
22+
}
23+
return buildUrls(str, items);
24+
}
25+
26+
switch (first.type) {
27+
case 'group': {
28+
return buildUrls(str, first.sub.concat(rest));
29+
}
30+
31+
case 'assert': {
32+
if (first.assertionType === 'AssertBegin') {
33+
if (str !== '*') return; // can't match begin not at the beginning
34+
return buildUrls('', rest);
35+
}
36+
if (first.assertionType === 'AssertEnd') {
37+
callback(str);
38+
return;
39+
}
40+
if (first.assertionType === 'AssertLookahead' && rest.length === 0) {
41+
return buildUrls(str, first.sub.concat(rest));
42+
}
43+
break;
44+
}
45+
46+
case 'choice': {
47+
for (let branch of first.branches) {
48+
buildUrls(str, branch.concat(rest));
49+
}
50+
return;
51+
}
52+
53+
case 'exact': {
54+
return buildUrls(str + first.chars, rest);
55+
}
56+
57+
case 'charset': {
58+
if (first.ranges.length === 1) {
59+
let range = first.ranges[0];
60+
let from = range.charCodeAt(0);
61+
let to = range.charCodeAt(1);
62+
if (to - from < 10) {
63+
// small range, probably won't explode
64+
for (; from <= to; from++) {
65+
buildUrls(str + String.fromCharCode(from), rest);
66+
}
67+
first.ranges.length = 0;
68+
}
69+
}
70+
if (!first.classes.length && !first.exclude && !first.ranges.length) {
71+
for (let c of first.chars) {
72+
buildUrls(str + c, rest);
73+
}
74+
return;
75+
}
76+
break;
77+
}
78+
}
79+
80+
throw new UnsupportedRegExp(first.raw);
81+
})('*', parse(re).tree);
82+
};
83+
84+
module.exports = {
85+
UnsupportedRegExp,
86+
explodeRegExp
87+
};

utils/trivialize-rules/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
"version": "1.0.0",
44
"main": "trivialize-rules.js",
55
"dependencies": {
6+
"chalk": "^2.1.0",
67
"highland": "^2.7.1",
7-
"progress": "^1.1.8",
8-
"verbal-expressions": "^0.2.1",
8+
"regulex": "0.0.2",
99
"xml2js": "^0.4.16"
1010
},
1111
"devDependencies": {},

utils/trivialize-rules/trivialize-rules.js

Lines changed: 91 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,18 @@
1-
"use strict";
1+
/* eslint-env es6, node */
2+
3+
'use strict';
4+
25
const _ = require('highland');
36
const fs = require('fs');
47
const readDir = _.wrapCallback(fs.readdir);
58
const readFile = _.wrapCallback(fs.readFile);
69
const writeFile = _.wrapCallback(fs.writeFile);
710
const parseXML = _.wrapCallback(require('xml2js').parseString);
8-
const ProgressBar = require('progress');
9-
const VerEx = require('verbal-expressions');
10-
let bar;
11+
const { explodeRegExp, UnsupportedRegExp } = require('./explode-regexp');
12+
const chalk = require('chalk');
1113

1214
const rulesDir = `${__dirname}/../../src/chrome/content/rules`;
1315

14-
const hostPartRe = VerEx().anyOf(/\w\-/).oneOrMore();
15-
const hostPartWithDotRe = VerEx().find(hostPartRe).then('\\.');
16-
17-
const staticRegExp = VerEx()
18-
.startOfLine()
19-
.then('^http://(')
20-
.beginCapture()
21-
.multiple(VerEx().find(hostPartWithDotRe).then('|'))
22-
.then(hostPartWithDotRe)
23-
.endCapture()
24-
.then(')')
25-
.beginCapture()
26-
.maybe('?')
27-
.endCapture()
28-
.beginCapture()
29-
.multiple(hostPartWithDotRe)
30-
.then(hostPartRe)
31-
.endCapture()
32-
.then('/')
33-
.endOfLine()
34-
.stopAtFirst()
35-
.searchOneLine();
36-
3716
const tagsRegExps = new Map();
3817

3918
function createTagsRegexp(tag) {
@@ -47,25 +26,30 @@ function createTagsRegexp(tag) {
4726
}
4827

4928
function replaceXML(source, tag, newXML) {
50-
let pos;
29+
let pos, indent;
5130
let re = createTagsRegexp(tag);
31+
5232
source = source.replace(re, (match, index) => {
33+
if (source.lastIndexOf('<!--', index) > source.lastIndexOf('-->', index)) {
34+
// inside a comment
35+
return match;
36+
}
5337
if (pos === undefined) {
5438
pos = index;
39+
indent = source.slice(source.lastIndexOf('\n', index) + 1, index);
5540
}
5641
return '';
5742
});
43+
5844
if (pos === undefined) {
5945
throw new Error(`${re}: <${tag} /> was not found in ${source}`);
6046
}
61-
return source.slice(0, pos) + newXML + source.slice(pos);
47+
48+
return source.slice(0, pos) + newXML.join('\n' + indent) + source.slice(pos);
6249
}
6350

6451
const files =
6552
readDir(rulesDir)
66-
.tap(rules => {
67-
bar = new ProgressBar(':bar', { total: rules.length, stream: process.stdout });
68-
})
6953
.sequence()
7054
.filter(name => name.endsWith('.xml'));
7155

@@ -95,73 +79,112 @@ function isTrivial(rule) {
9579
}
9680

9781
files.fork().zipAll([ sources.fork(), rules ]).map(([name, source, ruleset]) => {
98-
bar.tick();
82+
function createTag(tagName, colour, print) {
83+
return (strings, ...values) => {
84+
let result = `[${tagName}] ${chalk.bold(name)}: ${strings[0]}`;
85+
for (let i = 1; i < strings.length; i++) {
86+
let value = values[i - 1];
87+
value = Array.isArray(value) ? value.join(', ') : value.toString();
88+
result += chalk.blue(value) + strings[i];
89+
}
90+
print(colour(result));
91+
};
92+
}
93+
94+
const warn = createTag('WARN', chalk.yellow, console.warn);
95+
const info = createTag('INFO', chalk.green, console.info);
96+
const fail = createTag('FAIL', chalk.red, console.error);
97+
98+
if (ruleset.securecookie) {
99+
return;
100+
}
99101

100-
let target = ruleset.target.map(target => target.$.host);
101-
let rule = ruleset.rule.map(rule => rule.$);
102+
let targets = ruleset.target.map(target => target.$.host);
103+
let rules = ruleset.rule.map(rule => rule.$);
102104

103-
if (rule.length === 1 && isTrivial(rule[0])) {
105+
if (rules.length === 1 && isTrivial(rules[0])) {
104106
return;
105107
}
106108

107-
let targetRe = new RegExp(`^${target.map(target => `(${target.replace(/\./g, '\\.').replace(/\*/g, '.*')})`).join('|')}$`);
109+
let targetRe = new RegExp(`^(?:${targets.map(target => target.replace(/\./g, '\\.').replace(/\*/g, '.*')).join('|')})$`);
108110
let domains = [];
109111

110112
function isStatic(rule) {
111113
if (isTrivial(rule)) {
112-
domains = domains.concat(target);
114+
domains = domains.concat(targets);
113115
return true;
114116
}
115117

116118
const { from, to } = rule;
117-
118-
const match = from.match(staticRegExp);
119-
120-
if (!match) {
121-
// console.error(from);
119+
const fromRe = new RegExp(from);
120+
let localDomains = [];
121+
let unknownDomains = [];
122+
let nonTrivialUrls = [];
123+
let suspiciousStrings = [];
124+
125+
try {
126+
explodeRegExp(from, url => {
127+
let parsed = url.match(/^http(s?):\/\/(.+?)(?::(\d+))?\/(.*)$/);
128+
if (!parsed) {
129+
suspiciousStrings.push(url);
130+
return;
131+
}
132+
let [, secure, host, port = '80', path] = parsed;
133+
if (!targetRe.test(host)) {
134+
unknownDomains.push(host);
135+
} else if (!secure && port === '80' && path === '*' && url.replace(fromRe, to) === url.replace(/^http:/, 'https:')) {
136+
localDomains.push(host);
137+
} else {
138+
nonTrivialUrls.push(url);
139+
}
140+
});
141+
} catch (e) {
142+
if (!(e instanceof UnsupportedRegExp)) {
143+
throw e;
144+
}
145+
if (e.message === '/*' || e.message === '/+') {
146+
fail`Suspicious ${e.message} while traversing ${from} => ${to}`;
147+
} else {
148+
warn`Unsupported regexp part ${e.message} while traversing ${from} => ${to}`;
149+
}
122150
return false;
123151
}
124152

125-
const subDomains = match[1].split('|').map(item => item.slice(0, -2));
126-
const baseDomain = match[3].replace(/\\(.)/g, '$1');
127-
const localDomains = subDomains.map(sub => `${sub}.${baseDomain}`);
128-
129-
if (to !== `https://$1${baseDomain}/`) {
130-
console.error(from, to);
131-
return false;
153+
if (suspiciousStrings.length > 0) {
154+
fail`${from} matches ${suspiciousStrings} which don't look like URLs`;
132155
}
133-
134-
let mismatch = false;
135-
136-
for (const domain of localDomains) {
137-
if (!targetRe.test(domain)) {
138-
console.error(target, domain, from);
139-
mismatch = true;
140-
}
156+
157+
if (unknownDomains.length > 0) {
158+
fail`${from} matches ${unknownDomains} which are not in targets ${targets}`;
141159
}
142160

143-
if (mismatch) {
161+
if (suspiciousStrings.length > 0 || unknownDomains.length > 0) {
144162
return false;
145163
}
146164

147-
if (match[2] || targetRe.test(baseDomain)) {
148-
localDomains.unshift(baseDomain);
165+
if (nonTrivialUrls.length > 0) {
166+
if (localDomains.length > 0) {
167+
warn`${from} => ${to} can trivialize ${localDomains} but not urls like ${nonTrivialUrls}`;
168+
}
169+
return false;
149170
}
150-
171+
151172
domains = domains.concat(localDomains);
152-
173+
153174
return true;
154175
}
155176

156-
if (!rule.every(isStatic)) return;
157-
177+
if (!rules.every(isStatic)) return;
178+
179+
info`trivialized`;
180+
158181
domains = Array.from(new Set(domains));
159182

160-
if (domains.slice().sort().join('\n') !== target.sort().join('\n')) {
161-
source = replaceXML(source, 'target', domains.map(domain => `<target host="${domain}" />`).join('\n\t'));
183+
if (domains.slice().sort().join('\n') !== targets.sort().join('\n')) {
184+
source = replaceXML(source, 'target', domains.map(domain => `<target host="${domain}" />`));
162185
}
163186

164-
source = replaceXML(source, 'rule', '<rule from="^http:" to="https:" />');
187+
source = replaceXML(source, 'rule', ['<rule from="^http:" to="https:" />']);
165188

166189
return writeFile(`${rulesDir}/${name}`, source);
167190

0 commit comments

Comments
 (0)