Skip to content

Commit df19ebd

Browse files
authored
br_table optimizations (WebAssembly#1502)
Inspired by WebAssembly#1501 * remove unneeded appearances of the default switch target (at the front or back of the list of targets) * optimize a switch with 0, 1 or 2 targets into an if or if-chain * optimize a br_if br pair when they have the same target Makes e.g. fastcomp libc++ 2% smaller. Noticeable improvements on other things like box2d etc.
1 parent 2751770 commit df19ebd

20 files changed

Lines changed: 682 additions & 123 deletions

src/mixed_arena.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ class ArenaVectorBase {
208208
usedElements++;
209209
}
210210

211+
T& front() const {
212+
assert(usedElements > 0);
213+
return data[0];
214+
}
215+
211216
void erase(Iterator start_it, Iterator end_it) {
212217
assert(start_it.parent == end_it.parent && start_it.parent == this);
213218
assert(start_it.index <= end_it.index && end_it.index <= usedElements);

src/passes/RemoveUnusedBrs.cpp

Lines changed: 110 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
137137
self->stopValueFlow();
138138
} else if (curr->is<Loop>()) {
139139
// do nothing - it's ok for values to flow out
140+
} else if (auto* sw = curr->dynCast<Switch>()) {
141+
self->stopFlow();
142+
self->optimizeSwitch(sw);
140143
} else {
141144
// anything else stops the flow
142145
self->stopFlow();
@@ -171,6 +174,94 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
171174
loops.push_back(curr);
172175
}
173176

177+
void optimizeSwitch(Switch* curr) {
178+
// if the final element is the default, we don't need it
179+
while (!curr->targets.empty() && curr->targets.back() == curr->default_) {
180+
curr->targets.pop_back();
181+
}
182+
// if the first element is the default, we can remove it by shifting everything (which
183+
// does add a subtraction of a constant, but often that is worth it as the constant can
184+
// be folded away and/or we remove multiple elements here)
185+
Index removable = 0;
186+
while (removable < curr->targets.size() && curr->targets[removable] == curr->default_) {
187+
removable++;
188+
}
189+
if (removable > 0) {
190+
for (Index i = removable; i < curr->targets.size(); i++) {
191+
curr->targets[i - removable] = curr->targets[i];
192+
}
193+
curr->targets.resize(curr->targets.size() - removable);
194+
Builder builder(*getModule());
195+
curr->condition = builder.makeBinary(SubInt32,
196+
curr->condition,
197+
builder.makeConst(Literal(int32_t(removable)))
198+
);
199+
}
200+
// when there isn't a value, we can do some trivial optimizations without worrying about
201+
// the value being executed before the condition
202+
if (curr->value) return;
203+
if (curr->targets.size() == 0) {
204+
// a switch with just a default always goes there
205+
Builder builder(*getModule());
206+
replaceCurrent(builder.makeSequence(
207+
builder.makeDrop(curr->condition),
208+
builder.makeBreak(curr->default_)
209+
));
210+
} else if (curr->targets.size() == 1) {
211+
// a switch with two options is basically an if
212+
Builder builder(*getModule());
213+
replaceCurrent(builder.makeIf(
214+
curr->condition,
215+
builder.makeBreak(curr->default_),
216+
builder.makeBreak(curr->targets.front())
217+
));
218+
} else {
219+
// there are also some other cases where we want to convert a switch into ifs,
220+
// especially if the switch is large and we are focusing on size.
221+
// an especially egregious case is a switch like this:
222+
// [a b b [..] b b c] with default b
223+
// (which may be arrived at after we trim away excess default values on both
224+
// sides). in this case, we really have 3 values in a simple form, so it is the
225+
// next logical case after handling 1 and 2 values right above here.
226+
// to optimize this, we must add a local + a bunch of nodes (if*2, tee, eq,
227+
// get, const, break*3), so the table must be big enough for it to make sense
228+
229+
// How many targets we need when shrinking. This is literally the size at which
230+
// the transformation begins to be smaller.
231+
const uint32_t MIN_SHRINK = 13;
232+
// How many targets we need when not shrinking, in which case, 2 ifs may be slower,
233+
// so we do this when the table is ridiculously large for one with just 3 values
234+
// in it.
235+
const uint32_t MIN_GENERAL = 128;
236+
237+
auto shrink = getPassRunner()->options.shrinkLevel > 0;
238+
if ((curr->targets.size() >= MIN_SHRINK && shrink) ||
239+
(curr->targets.size() >= MIN_GENERAL && !shrink)) {
240+
for (Index i = 1; i < curr->targets.size() - 1; i++) {
241+
if (curr->targets[i] != curr->default_) {
242+
return;
243+
}
244+
}
245+
// great, we are in that case, optimize
246+
Builder builder(*getModule());
247+
auto temp = builder.addVar(getFunction(), i32);
248+
Expression* z;
249+
replaceCurrent(z = builder.makeIf(
250+
builder.makeTeeLocal(temp, curr->condition),
251+
builder.makeIf(
252+
builder.makeBinary(EqInt32,
253+
builder.makeGetLocal(temp, i32),
254+
builder.makeConst(Literal(int32_t(curr->targets.size() - 1)))
255+
),
256+
builder.makeBreak(curr->targets.back()),
257+
builder.makeBreak(curr->default_)
258+
),
259+
builder.makeBreak(curr->targets.front())
260+
));
261+
}
262+
}
263+
}
264+
174265
void visitIf(If* curr) {
175266
if (!curr->ifFalse) {
176267
// if without an else. try to reduce if (condition) br => br_if (condition)
@@ -482,25 +573,32 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
482573
}
483574
}
484575
if (list.size() >= 2) {
485-
// Join adjacent br_ifs to the same target, making one br_if with
486-
// a "selectified" condition that executes both.
487-
if (shrink) {
488-
for (Index i = 0; i < list.size() - 1; i++) {
489-
auto* br1 = list[i]->dynCast<Break>();
490-
// avoid unreachable brs, as they are dead code anyhow, and after merging
491-
// them the outer scope could need type changes
492-
if (!br1 || !br1->condition || br1->type == unreachable) continue;
493-
auto* br2 = list[i + 1]->dynCast<Break>();
494-
if (!br2 || !br2->condition || br2->type == unreachable) continue;
495-
if (br1->name == br2->name) {
496-
assert(!br1->value && !br2->value);
576+
// combine/optimize adjacent br_ifs + a br (maybe _if) right after it
577+
for (Index i = 0; i < list.size() - 1; i++) {
578+
auto* br1 = list[i]->dynCast<Break>();
579+
// avoid unreachable brs, as they are dead code anyhow, and after merging
580+
// them the outer scope could need type changes
581+
if (!br1 || !br1->condition || br1->type == unreachable) continue;
582+
assert(!br1->value);
583+
auto* br2 = list[i + 1]->dynCast<Break>();
584+
if (!br2 || br1->name != br2->name) continue;
585+
assert(!br2->value); // same target as previous, which has no value
586+
// a br_if and then a br[_if] with the same target right after it
587+
if (br2->condition) {
588+
if (shrink && br2->type != unreachable) {
589+
// Join adjacent br_ifs to the same target, making one br_if with
590+
// a "selectified" condition that executes both.
497591
if (!EffectAnalyzer(passOptions, br2->condition).hasSideEffects()) {
498592
// it's ok to execute them both, do it
499593
Builder builder(*getModule());
500594
br1->condition = builder.makeBinary(OrInt32, br1->condition, br2->condition);
501595
ExpressionManipulator::nop(br2);
502596
}
503597
}
598+
} else {
599+
// merge, we go there anyhow
600+
Builder builder(*getModule());
601+
list[i] = builder.makeDrop(br1->condition);
504602
}
505603
}
506604
// combine adjacent br_ifs that test the same value into a br_table,

src/passes/pass.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ static void dumpWast(Name name, Module* wasm) {
197197
// write out the wast
198198
static int counter = 0;
199199
auto fullName = std::string("byn-") + std::to_string(counter++) + "-" + name.str + ".wasm";
200+
Colors::disable();
200201
ModuleWriter writer;
201202
writer.setBinary(false); // TODO: add an option for binary
202203
writer.write(*wasm, fullName);

test/debugInfo.fromasm

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -138,30 +138,24 @@
138138
(i32.const 1369188723)
139139
)
140140
(block $switch (result i32)
141-
(block $switch-default
142-
(block $switch-case
143-
(br_table $switch-case $switch-default
144-
(i32.sub
145-
(get_local $1)
146-
(i32.const -1108210269)
147-
)
141+
(br_if $__rjti$0
142+
(i32.eqz
143+
(i32.sub
144+
(get_local $1)
145+
(i32.const -1108210269)
148146
)
149147
)
150-
(br $__rjti$0)
151148
)
152149
(i32.const 0)
153150
)
154151
(block $switch0 (result i32)
155-
(block $switch-default2
156-
(block $switch-case1
157-
(br_table $switch-case1 $switch-default2
158-
(i32.sub
159-
(get_local $1)
160-
(i32.const 1369188723)
161-
)
152+
(br_if $__rjti$0
153+
(i32.eqz
154+
(i32.sub
155+
(get_local $1)
156+
(i32.const 1369188723)
162157
)
163158
)
164-
(br $__rjti$0)
165159
)
166160
(i32.const 0)
167161
)

test/debugInfo.fromasm.clamp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -138,30 +138,24 @@
138138
(i32.const 1369188723)
139139
)
140140
(block $switch (result i32)
141-
(block $switch-default
142-
(block $switch-case
143-
(br_table $switch-case $switch-default
144-
(i32.sub
145-
(get_local $1)
146-
(i32.const -1108210269)
147-
)
141+
(br_if $__rjti$0
142+
(i32.eqz
143+
(i32.sub
144+
(get_local $1)
145+
(i32.const -1108210269)
148146
)
149147
)
150-
(br $__rjti$0)
151148
)
152149
(i32.const 0)
153150
)
154151
(block $switch0 (result i32)
155-
(block $switch-default2
156-
(block $switch-case1
157-
(br_table $switch-case1 $switch-default2
158-
(i32.sub
159-
(get_local $1)
160-
(i32.const 1369188723)
161-
)
152+
(br_if $__rjti$0
153+
(i32.eqz
154+
(i32.sub
155+
(get_local $1)
156+
(i32.const 1369188723)
162157
)
163158
)
164-
(br $__rjti$0)
165159
)
166160
(i32.const 0)
167161
)

test/debugInfo.fromasm.clamp.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/debugInfo.fromasm.imprecise

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -125,30 +125,24 @@
125125
(i32.const 1369188723)
126126
)
127127
(block $switch (result i32)
128-
(block $switch-default
129-
(block $switch-case
130-
(br_table $switch-case $switch-default
131-
(i32.sub
132-
(get_local $1)
133-
(i32.const -1108210269)
134-
)
128+
(br_if $__rjti$0
129+
(i32.eqz
130+
(i32.sub
131+
(get_local $1)
132+
(i32.const -1108210269)
135133
)
136134
)
137-
(br $__rjti$0)
138135
)
139136
(i32.const 0)
140137
)
141138
(block $switch0 (result i32)
142-
(block $switch-default2
143-
(block $switch-case1
144-
(br_table $switch-case1 $switch-default2
145-
(i32.sub
146-
(get_local $1)
147-
(i32.const 1369188723)
148-
)
139+
(br_if $__rjti$0
140+
(i32.eqz
141+
(i32.sub
142+
(get_local $1)
143+
(i32.const 1369188723)
149144
)
150145
)
151-
(br $__rjti$0)
152146
)
153147
(i32.const 0)
154148
)

test/debugInfo.fromasm.imprecise.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/debugInfo.fromasm.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/emcc_hello_world.fromasm

Lines changed: 29 additions & 15 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)