Skip to content

Commit 29f96b6

Browse files
committed
Add a new pattern to instruction selector to utilize UMULL supported by ARM64
https://bugs.webkit.org/show_bug.cgi?id=228721 Reviewed by Saam Barati. Unsigned Multiply Long (UMULL) multiplies two 32-bit register values, and writes the result to the destination register. This instruction is an alias of the UMADDL instruction. umull xd wn wm The equivalent pattern is: d = ZExt32(n) * ZExt32(m) Given B3 IR: Int @0 = ArgumentReg(%x0) Int @1 = Trunc(@0) Int @2 = ArgumentReg(%x1) Int @3 = Trunc(@2) Int @4 = ZExt32(@1) Int @5 = ZExt32(@3) Int @6 = Mul(@4, @5) Void@7 = Return(@6, Terminal) // Old optimized AIR Move %x0, %x0, @4 Move %x1, %x1, @5 Mul %x0, %x1, %x0, @6 Ret %x0, @7 // New optimized AIR MultiplyZeroExtend %x0, %x1, %x0, @6 Ret %x0, @7 * assembler/MacroAssemblerARM64.h: (JSC::MacroAssemblerARM64::multiplyZeroExtend32): * assembler/testmasm.cpp: (JSC::testMultiplyZeroExtend32): * b3/B3LowerToAir.cpp: * b3/air/AirOpcode.opcodes: * b3/testb3.h: * b3/testb3_2.cpp: (testMulArgs32SignExtend): (testMulArgs32ZeroExtend): * b3/testb3_3.cpp: (addArgTests): Canonical link: https://commits.webkit.org/240205@main git-svn-id: https://svn.webkit.org/repository/webkit/trunk@280583 268f45cc-cd09-0410-ab3c-d52691b4dbfc
1 parent 97f3fdf commit 29f96b6

8 files changed

Lines changed: 158 additions & 46 deletions

File tree

Source/JavaScriptCore/ChangeLog

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,50 @@
1+
2021-08-02 Yijia Huang <yijia_huang@apple.com>
2+
3+
Add a new pattern to instruction selector to utilize UMULL supported by ARM64
4+
https://bugs.webkit.org/show_bug.cgi?id=228721
5+
6+
Reviewed by Saam Barati.
7+
8+
Unsigned Multiply Long (UMULL) multiplies two 32-bit register values, and writes the
9+
result to the destination register. This instruction is an alias of the UMADDL instruction.
10+
11+
umull xd wn wm
12+
13+
The equivalent pattern is: d = ZExt32(n) * ZExt32(m)
14+
15+
Given B3 IR:
16+
Int @0 = ArgumentReg(%x0)
17+
Int @1 = Trunc(@0)
18+
Int @2 = ArgumentReg(%x1)
19+
Int @3 = Trunc(@2)
20+
Int @4 = ZExt32(@1)
21+
Int @5 = ZExt32(@3)
22+
Int @6 = Mul(@4, @5)
23+
Void@7 = Return(@6, Terminal)
24+
25+
// Old optimized AIR
26+
Move %x0, %x0, @4
27+
Move %x1, %x1, @5
28+
Mul %x0, %x1, %x0, @6
29+
Ret %x0, @7
30+
31+
// New optimized AIR
32+
MultiplyZeroExtend %x0, %x1, %x0, @6
33+
Ret %x0, @7
34+
35+
* assembler/MacroAssemblerARM64.h:
36+
(JSC::MacroAssemblerARM64::multiplyZeroExtend32):
37+
* assembler/testmasm.cpp:
38+
(JSC::testMultiplyZeroExtend32):
39+
* b3/B3LowerToAir.cpp:
40+
* b3/air/AirOpcode.opcodes:
41+
* b3/testb3.h:
42+
* b3/testb3_2.cpp:
43+
(testMulArgs32SignExtend):
44+
(testMulArgs32ZeroExtend):
45+
* b3/testb3_3.cpp:
46+
(addArgTests):
47+
148
2021-08-02 Yijia Huang <yijia_huang@apple.com>
249

350
Add new patterns to instruction selector to utilize AND/EOR/ORR-with-shift supported by ARM64

Source/JavaScriptCore/assembler/MacroAssemblerARM64.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,11 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
953953
m_assembler.smull(dest, left, right);
954954
}
955955

956+
void multiplyZeroExtend32(RegisterID left, RegisterID right, RegisterID dest)
957+
{
958+
m_assembler.umull(dest, left, right);
959+
}
960+
956961
void div32(RegisterID dividend, RegisterID divisor, RegisterID dest)
957962
{
958963
m_assembler.sdiv<32>(dest, dividend, divisor);

Source/JavaScriptCore/assembler/testmasm.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,26 @@ void testMultiplySignExtend32()
921921
}
922922
}
923923

924+
void testMultiplyZeroExtend32()
925+
{
926+
for (auto nOperand : int32Operands()) {
927+
auto mul = compile([=] (CCallHelpers& jit) {
928+
emitFunctionPrologue(jit);
929+
930+
jit.multiplyZeroExtend32(GPRInfo::argumentGPR0, GPRInfo::argumentGPR1, GPRInfo::returnValueGPR);
931+
932+
emitFunctionEpilogue(jit);
933+
jit.ret();
934+
});
935+
936+
for (auto mOperand : int32Operands()) {
937+
uint32_t n = nOperand;
938+
uint32_t m = mOperand;
939+
CHECK_EQ(invoke<uint64_t>(mul, n, m), static_cast<uint64_t>(n) * static_cast<uint64_t>(m));
940+
}
941+
}
942+
}
943+
924944
void testMultiplyAddSignExtend32()
925945
{
926946
// d = SExt32(n) * SExt32(m) + a
@@ -5649,6 +5669,7 @@ void run(const char* filter) WTF_IGNORES_THREAD_SAFETY_ANALYSIS
56495669
RUN(testLoadStorePair64Int64());
56505670
RUN(testLoadStorePair64Double());
56515671
RUN(testMultiplySignExtend32());
5672+
RUN(testMultiplyZeroExtend32());
56525673

56535674
RUN(testSub32Args());
56545675
RUN(testSub32Imm());

Source/JavaScriptCore/b3/B3LowerToAir.cpp

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ class LowerToAir {
499499
return true;
500500
}
501501

502-
bool isMergeableValue(Value* v, B3::Opcode b3Opcode, bool checkCanBeInternal)
502+
bool isMergeableValue(Value* v, B3::Opcode b3Opcode, bool checkCanBeInternal = false)
503503
{
504504
if (v->opcode() != b3Opcode)
505505
return false;
@@ -516,9 +516,9 @@ class LowerToAir {
516516
#if CPU(ARM64)
517517
// Maybe, the ideal approach is to introduce a decorator (Index@EXT) to the Air operand
518518
// to provide an extension opportunity for the specific form under the Air opcode.
519-
if (isMergeableValue(index, ZExt32, false))
519+
if (isMergeableValue(index, ZExt32))
520520
return Arg::index(base, tmp(index->child(0)), scale, offset, MacroAssembler::Extend::ZExt32);
521-
if (isMergeableValue(index, SExt32, false))
521+
if (isMergeableValue(index, SExt32))
522522
return Arg::index(base, tmp(index->child(0)), scale, offset, MacroAssembler::Extend::SExt32);
523523
#endif
524524
return Arg::index(base, tmp(index), scale, offset);
@@ -2689,19 +2689,17 @@ class LowerToAir {
26892689
if (airOpcode != MultiplyAdd64)
26902690
return Air::Oops;
26912691
// SMADDL: d = SExt32(n) * SExt32(m) + a
2692-
if (isMergeableValue(multiplyLeft, SExt32, true) && isMergeableValue(multiplyRight, SExt32, true))
2692+
if (isMergeableValue(multiplyLeft, SExt32) && isMergeableValue(multiplyRight, SExt32))
26932693
return MultiplyAddSignExtend32;
26942694
// UMADDL: d = ZExt32(n) * ZExt32(m) + a
2695-
if (isMergeableValue(multiplyLeft, ZExt32, true) && isMergeableValue(multiplyRight, ZExt32, true))
2695+
if (isMergeableValue(multiplyLeft, ZExt32) && isMergeableValue(multiplyRight, ZExt32))
26962696
return MultiplyAddZeroExtend32;
26972697
return Air::Oops;
26982698
};
26992699

27002700
Air::Opcode newAirOpcode = tryNewAirOpcode();
27012701
if (isValidForm(newAirOpcode, Arg::Tmp, Arg::Tmp, Arg::Tmp, Arg::Tmp)) {
27022702
append(newAirOpcode, tmp(multiplyLeft->child(0)), tmp(multiplyRight->child(0)), tmp(right), tmp(m_value));
2703-
commitInternal(multiplyLeft);
2704-
commitInternal(multiplyRight);
27052703
commitInternal(left);
27062704
return true;
27072705
}
@@ -2755,19 +2753,17 @@ class LowerToAir {
27552753
if (airOpcode != MultiplySub64)
27562754
return Air::Oops;
27572755
// SMSUBL: d = a - SExt32(n) * SExt32(m)
2758-
if (isMergeableValue(multiplyLeft, SExt32, true) && isMergeableValue(multiplyRight, SExt32, true))
2756+
if (isMergeableValue(multiplyLeft, SExt32) && isMergeableValue(multiplyRight, SExt32))
27592757
return MultiplySubSignExtend32;
27602758
// UMSUBL: d = a - ZExt32(n) * ZExt32(m)
2761-
if (isMergeableValue(multiplyLeft, ZExt32, true) && isMergeableValue(multiplyRight, ZExt32, true))
2759+
if (isMergeableValue(multiplyLeft, ZExt32) && isMergeableValue(multiplyRight, ZExt32))
27622760
return MultiplySubZeroExtend32;
27632761
return Air::Oops;
27642762
};
27652763

27662764
Air::Opcode newAirOpcode = tryNewAirOpcode();
27672765
if (isValidForm(newAirOpcode, Arg::Tmp, Arg::Tmp, Arg::Tmp, Arg::Tmp)) {
27682766
append(newAirOpcode, tmp(multiplyLeft->child(0)), tmp(multiplyRight->child(0)), tmp(left), tmp(m_value));
2769-
commitInternal(multiplyLeft);
2770-
commitInternal(multiplyRight);
27712767
commitInternal(right);
27722768
return true;
27732769
}
@@ -2808,19 +2804,17 @@ class LowerToAir {
28082804
if (airOpcode != MultiplyNeg64)
28092805
return Air::Oops;
28102806
// SMNEGL: d = -(SExt32(n) * SExt32(m))
2811-
if (isMergeableValue(multiplyLeft, SExt32, true) && isMergeableValue(multiplyRight, SExt32, true))
2807+
if (isMergeableValue(multiplyLeft, SExt32) && isMergeableValue(multiplyRight, SExt32))
28122808
return MultiplyNegSignExtend32;
28132809
// UMNEGL: d = -(ZExt32(n) * ZExt32(m))
2814-
if (isMergeableValue(multiplyLeft, ZExt32, true) && isMergeableValue(multiplyRight, ZExt32, true))
2810+
if (isMergeableValue(multiplyLeft, ZExt32) && isMergeableValue(multiplyRight, ZExt32))
28152811
return MultiplyNegZeroExtend32;
28162812
return Air::Oops;
28172813
};
28182814

28192815
Air::Opcode newAirOpcode = tryNewAirOpcode();
28202816
if (isValidForm(newAirOpcode, Arg::Tmp, Arg::Tmp, Arg::Tmp)) {
28212817
append(newAirOpcode, tmp(multiplyLeft->child(0)), tmp(multiplyRight->child(0)), tmp(m_value));
2822-
commitInternal(multiplyLeft);
2823-
commitInternal(multiplyRight);
28242818
commitInternal(m_value->child(0));
28252819
return true;
28262820
}
@@ -2842,29 +2836,34 @@ class LowerToAir {
28422836
}
28432837

28442838
case Mul: {
2845-
if (m_value->type() == Int64
2846-
&& isValidForm(MultiplySignExtend32, Arg::Tmp, Arg::Tmp, Arg::Tmp)
2847-
&& m_value->child(0)->opcode() == SExt32
2848-
&& !m_locked.contains(m_value->child(0))) {
2849-
Value* opLeft = m_value->child(0);
2850-
Value* left = opLeft->child(0);
2851-
Value* opRight = m_value->child(1);
2852-
Value* right = nullptr;
2853-
2854-
if (opRight->opcode() == SExt32 && !m_locked.contains(opRight->child(0))) {
2855-
right = opRight->child(0);
2856-
} else if (m_value->child(1)->isRepresentableAs<int32_t>() && !m_locked.contains(m_value->child(1))) {
2857-
// We just use the 64-bit const int as a 32 bit const int directly
2858-
right = opRight;
2859-
}
2839+
Value* left = m_value->child(0);
2840+
Value* right = m_value->child(1);
28602841

2861-
if (right) {
2862-
append(MultiplySignExtend32, tmp(left), tmp(right), tmp(m_value));
2863-
return;
2842+
auto tryAppendMultiplyWithExtend = [&] () -> bool {
2843+
auto tryAirOpcode = [&] () -> Air::Opcode {
2844+
if (m_value->type() != Int64)
2845+
return Air::Oops;
2846+
// SMULL: d = SExt32(n) * SExt32(m)
2847+
if (isMergeableValue(left, SExt32) && isMergeableValue(right, SExt32))
2848+
return MultiplySignExtend32;
2849+
// UMULL: d = ZExt32(n) * ZExt32(m)
2850+
if (isMergeableValue(left, ZExt32) && isMergeableValue(right, ZExt32))
2851+
return MultiplyZeroExtend32;
2852+
return Air::Oops;
2853+
};
2854+
2855+
Air::Opcode opcode = tryAirOpcode();
2856+
if (isValidForm(opcode, Arg::Tmp, Arg::Tmp, Arg::Tmp)) {
2857+
append(opcode, tmp(left->child(0)), tmp(right->child(0)), tmp(m_value));
2858+
return true;
28642859
}
2865-
}
2866-
appendBinOp<Mul32, Mul64, MulDouble, MulFloat, Commutative>(
2867-
m_value->child(0), m_value->child(1));
2860+
return false;
2861+
};
2862+
2863+
if (tryAppendMultiplyWithExtend())
2864+
return;
2865+
2866+
appendBinOp<Mul32, Mul64, MulDouble, MulFloat, Commutative>(left, right);
28682867
return;
28692868
}
28702869

@@ -3002,7 +3001,7 @@ class LowerToAir {
30023001
// mask = (1 << lowWidth) - 1
30033002
auto tryAppendEXTR = [&] (Value* left, Value* right) -> bool {
30043003
Air::Opcode opcode = opcodeForType(ExtractRegister32, ExtractRegister64, m_value->type());
3005-
if (!isValidForm(opcode, Arg::Tmp, Arg::Tmp, Arg::Imm, Arg::Tmp))
3004+
if (!isValidForm(opcode, Arg::Tmp, Arg::Tmp, Arg::Imm, Arg::Tmp))
30063005
return false;
30073006
if (left->opcode() != Shl || left->child(0)->opcode() != BitAnd || right->opcode() != ZShr)
30083007
return false;
@@ -3208,7 +3207,7 @@ class LowerToAir {
32083207
XorNotLeftShift32, XorNotLeftShift64,
32093208
XorNotRightShift32, XorNotRightShift64,
32103209
XorNotUnsignedRightShift32, XorNotUnsignedRightShift64);
3211-
if (!isValidForm(opcode, Arg::Tmp, Arg::Tmp, Arg::Imm, Arg::Tmp))
3210+
if (!isValidForm(opcode, Arg::Tmp, Arg::Tmp, Arg::Imm, Arg::Tmp))
32123211
return false;
32133212
Value* mValue = shiftValue->child(0);
32143213
Value* amountValue = shiftValue->child(1);

Source/JavaScriptCore/b3/air/AirOpcode.opcodes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ arm64: MultiplyNegZeroExtend32 U:G:32, U:G:32, D:G:64
284284
arm64: MultiplySignExtend32 U:G:32, U:G:32, D:G:64
285285
Tmp, Tmp, Tmp
286286

287+
arm64: MultiplyZeroExtend32 U:G:32, U:G:32, D:G:64
288+
Tmp, Tmp, Tmp
289+
287290
arm64: Div32 U:G:32, U:G:32, ZD:G:32
288291
Tmp, Tmp, Tmp
289292

Source/JavaScriptCore/b3/testb3.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,8 @@ void testMulNegArgArg(int, int);
939939
void testMulArgImm(int64_t, int64_t);
940940
void testMulImmArg(int, int);
941941
void testMulArgs32(int, int);
942-
void testMulArgs32SignExtend(int, int);
942+
void testMulArgs32SignExtend();
943+
void testMulArgs32ZeroExtend();
943944
void testMulImm32SignExtend(const int, int);
944945
void testMulLoadTwice();
945946
void testMulAddArgsLeft();

Source/JavaScriptCore/b3/testb3_2.cpp

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,10 +1228,10 @@ void testMulArgs32(int a, int b)
12281228
CHECK(compileAndRun<int>(proc, a, b) == a * b);
12291229
}
12301230

1231-
void testMulArgs32SignExtend(int a, int b)
1231+
void testMulArgs32SignExtend()
12321232
{
12331233
Procedure proc;
1234-
if (proc.optLevel() < 1)
1234+
if (proc.optLevel() < 2)
12351235
return;
12361236
BasicBlock* root = proc.addBlock();
12371237
Value* arg1 = root->appendNew<Value>(
@@ -1246,8 +1246,46 @@ void testMulArgs32SignExtend(int a, int b)
12461246
root->appendNewControlValue(proc, Return, Origin(), mul);
12471247

12481248
auto code = compileProc(proc);
1249+
if (isARM64())
1250+
checkUsesInstruction(*code, "smull");
1251+
1252+
for (auto nOperand : int32Operands()) {
1253+
for (auto mOperand : int32Operands()) {
1254+
int32_t n = nOperand.value;
1255+
int32_t m = mOperand.value;
1256+
CHECK_EQ(invoke<int64_t>(*code, n, m), static_cast<int64_t>(n) * static_cast<int64_t>(m));
1257+
}
1258+
}
1259+
}
1260+
1261+
void testMulArgs32ZeroExtend()
1262+
{
1263+
Procedure proc;
1264+
if (proc.optLevel() < 2)
1265+
return;
1266+
BasicBlock* root = proc.addBlock();
1267+
Value* arg1 = root->appendNew<Value>(
1268+
proc, Trunc, Origin(),
1269+
root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
1270+
Value* arg2 = root->appendNew<Value>(
1271+
proc, Trunc, Origin(),
1272+
root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
1273+
Value* left = root->appendNew<Value>(proc, ZExt32, Origin(), arg1);
1274+
Value* right = root->appendNew<Value>(proc, ZExt32, Origin(), arg2);
1275+
Value* mul = root->appendNew<Value>(proc, Mul, Origin(), left, right);
1276+
root->appendNewControlValue(proc, Return, Origin(), mul);
1277+
1278+
auto code = compileProc(proc);
1279+
if (isARM64())
1280+
checkUsesInstruction(*code, "umull");
12491281

1250-
CHECK(invoke<long int>(*code, a, b) == ((long int) a) * ((long int) b));
1282+
for (auto nOperand : int32Operands()) {
1283+
for (auto mOperand : int32Operands()) {
1284+
uint32_t n = nOperand.value;
1285+
uint32_t m = mOperand.value;
1286+
CHECK_EQ(invoke<uint64_t>(*code, n, m), static_cast<uint64_t>(n) * static_cast<uint64_t>(m));
1287+
}
1288+
}
12511289
}
12521290

12531291
void testMulImm32SignExtend(const int a, int b)

Source/JavaScriptCore/b3/testb3_3.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3730,10 +3730,8 @@ void addArgTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>& tasks)
37303730
RUN(testMulArgs32(1, 2));
37313731
RUN(testMulArgs32(0xFFFFFFFF, 0xFFFFFFFF));
37323732
RUN(testMulArgs32(0xFFFFFFFE, 0xFFFFFFFF));
3733-
RUN(testMulArgs32SignExtend(1, 1));
3734-
RUN(testMulArgs32SignExtend(1, 2));
3735-
RUN(testMulArgs32SignExtend(0xFFFFFFFF, 0xFFFFFFFF));
3736-
RUN(testMulArgs32SignExtend(0xFFFFFFFE, 0xFFFFFFFF));
3733+
RUN(testMulArgs32SignExtend());
3734+
RUN(testMulArgs32ZeroExtend());
37373735
RUN(testMulLoadTwice());
37383736
RUN(testMulAddArgsLeft());
37393737
RUN(testMulAddArgsRight());

0 commit comments

Comments
 (0)