Skip to content

Commit 09cb144

Browse files
Enable inlining on ARM64
The biggest thing that needed fixing up was the loading of inlinee call info data; I implemented approximately the same thing that we do on arm, minus doing it a little earlier (when encoding, instead of in a relocation step). The most notable change is that I had to make LabelInstr offsets grow to uintptr_t (from uint32); this will not take any more memory, due to already being unioned with a byte pointer, but has impact outside of ARM64. The other part is the definition of two new instructions that have a move with a known shift amount (movz_shift and movk_shift); this is to ease definition of these fixed data moves, and may be useful elsewhere.
1 parent cc14530 commit 09cb144

9 files changed

Lines changed: 111 additions & 25 deletions

File tree

lib/Backend/IR.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ class LabelInstr : public Instr
679679
union labelLocation
680680
{
681681
BYTE * pc; // Used by encoder and is the real pc offset
682-
uint32 offset; // Used by preEncoder and is an estimation pc offset, not accurate
682+
uintptr_t offset; // Used by preEncoder and is an estimation pc offset, not accurate
683683
} m_pc;
684684

685685
BasicBlock * m_block;
@@ -689,9 +689,9 @@ class LabelInstr : public Instr
689689

690690
inline void SetPC(BYTE * pc);
691691
inline BYTE * GetPC(void) const;
692-
inline void SetOffset(uint32 offset);
693-
inline void ResetOffset(uint32 offset);
694-
inline uint32 GetOffset(void) const;
692+
inline void SetOffset(uintptr_t offset);
693+
inline void ResetOffset(uintptr_t offset);
694+
inline uintptr_t GetOffset(void) const;
695695
inline void SetBasicBlock(BasicBlock * block);
696696
inline BasicBlock * GetBasicBlock(void) const;
697697
inline void SetLoop(Loop *loop);

lib/Backend/IR.inl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ LabelInstr::GetPC(void) const
637637
///----------------------------------------------------------------------------
638638

639639
inline void
640-
LabelInstr::ResetOffset(uint32 offset)
640+
LabelInstr::ResetOffset(uintptr_t offset)
641641
{
642642
AssertMsg(this->isInlineeEntryInstr, "As of now only InlineeEntryInstr overwrites the offset at encoder stage");
643643
this->m_pc.offset = offset;
@@ -650,7 +650,7 @@ LabelInstr::ResetOffset(uint32 offset)
650650
///----------------------------------------------------------------------------
651651

652652
inline void
653-
LabelInstr::SetOffset(uint32 offset)
653+
LabelInstr::SetOffset(uintptr_t offset)
654654
{
655655
AssertMsg(this->m_pc.offset == 0, "Overwriting existing byte offset");
656656
this->m_pc.offset = offset;
@@ -662,7 +662,7 @@ LabelInstr::SetOffset(uint32 offset)
662662
///
663663
///----------------------------------------------------------------------------
664664

665-
inline uint32
665+
inline uintptr_t
666666
LabelInstr::GetOffset(void) const
667667
{
668668

lib/Backend/InliningDecider.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,6 @@ uint InliningDecider::InlinePolymorphicCallSite(Js::FunctionBody *const inliner,
184184
Js::FunctionInfo *InliningDecider::Inline(Js::FunctionBody *const inliner, Js::FunctionInfo* functionInfo,
185185
bool isConstructorCall, bool isPolymorphicCall, uint16 constantArgInfo, Js::ProfileId callSiteId, uint recursiveInlineDepth, bool allowRecursiveInlining)
186186
{
187-
#if defined(_M_ARM64)
188-
INLINE_TESTTRACE(_u("INLINING: Inline disabled for ARM64"));
189-
return nullptr;
190-
#else // #if defined(_M_ARM64)
191187
#if defined(DBG_DUMP) || defined(ENABLE_DEBUG_CONFIG_OPTIONS)
192188
char16 debugStringBuffer[MAX_FUNCTION_BODY_DEBUG_STRING_SIZE];
193189
char16 debugStringBuffer2[MAX_FUNCTION_BODY_DEBUG_STRING_SIZE];
@@ -305,7 +301,6 @@ Js::FunctionInfo *InliningDecider::Inline(Js::FunctionBody *const inliner, Js::F
305301

306302
// Note: for built-ins at this time we don't have enough data (the instr) to decide whether it's going to be inlined.
307303
return functionInfo;
308-
#endif // #if defined(_M_ARM64)
309304
}
310305

311306

lib/Backend/arm64/EncoderMD.cpp

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,31 @@ int EncoderMD::EmitMovConstant(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emi
507507
}
508508
}
509509

510+
template<typename _Emitter, typename _Emitter64>
511+
int EncoderMD::EmitMovConstantKnownShift(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emitter emitter, _Emitter64 emitter64, uint32 shift)
512+
{
513+
IR::Opnd* dst = instr->GetDst();
514+
IR::Opnd* src1 = instr->GetSrc1();
515+
Assert(dst->IsRegOpnd());
516+
Assert(src1->IsImmediateOpnd());
517+
518+
int size = dst->GetSize();
519+
Assert(size == 4 || size == 8);
520+
521+
IntConstType immediate = src1->GetImmediateValue(instr->m_func);
522+
Assert((immediate & 0xFFFF) == immediate);
523+
Assert(shift == 0 || shift == 16 || (size == 8 && (shift == 32 || shift == 48)));
524+
525+
if (size == 8)
526+
{
527+
return emitter64(Emitter, this->GetRegEncode(dst->AsRegOpnd()), ULONG(immediate), shift);
528+
}
529+
else
530+
{
531+
return emitter(Emitter, this->GetRegEncode(dst->AsRegOpnd()), ULONG(immediate), shift);
532+
}
533+
}
534+
510535
template<typename _Emitter, typename _Emitter64>
511536
int EncoderMD::EmitBitfield(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emitter emitter, _Emitter64 emitter64)
512537
{
@@ -725,6 +750,7 @@ EncoderMD::GenerateEncoding(IR::Instr* instr, BYTE *pc)
725750
Assert(src1->IsLabelOpnd());
726751

727752
Assert(dst->GetSize() == 8);
753+
Assert(!src1->AsLabelOpnd()->GetLabel()->isInlineeEntryInstr);
728754
EncodeReloc::New(&m_relocList, RelocTypeLabelAdr, m_pc, src1->AsLabelOpnd()->GetLabel(), m_encoder->m_tempAlloc);
729755
bytes = EmitAdr(Emitter, this->GetRegEncode(dst->AsRegOpnd()), 0);
730756
break;
@@ -939,6 +965,38 @@ EncoderMD::GenerateEncoding(IR::Instr* instr, BYTE *pc)
939965
bytes = this->EmitMovConstant(Emitter, instr, EmitMovz, EmitMovz64);
940966
break;
941967

968+
case Js::OpCode::MOVK_SHIFT:
969+
{
970+
Assert(instr->GetSrc1()->IsLabelOpnd());
971+
Assert(instr->GetSrc2()->IsIntConstOpnd());
972+
IR::LabelInstr* labelInstr = instr->GetSrc1()->AsLabelOpnd()->GetLabel();
973+
uint32 shift = instr->GetSrc2()->AsIntConstOpnd()->AsUint32();
974+
975+
// We're going to drop src2, set src1 to just the masked bits from the label
976+
// offset (so we don't even need to go into relocation), and emit it.
977+
instr->UnlinkSrc2();
978+
uintptr_t fullvalue = labelInstr->GetOffset();
979+
instr->ReplaceSrc1(IR::IntConstOpnd::New((fullvalue & (0xffff << shift)) >> shift, IRType::TyUint16, instr->m_func, true));
980+
bytes = this->EmitMovConstantKnownShift(Emitter, instr, EmitMovk, EmitMovk64, shift);
981+
}
982+
break;
983+
984+
case Js::OpCode::MOVZ_SHIFT:
985+
{
986+
Assert(instr->GetSrc1()->IsLabelOpnd());
987+
Assert(instr->GetSrc2()->IsIntConstOpnd());
988+
IR::LabelInstr* labelInstr = instr->GetSrc1()->AsLabelOpnd()->GetLabel();
989+
uint32 shift = instr->GetSrc2()->AsIntConstOpnd()->AsUint32();
990+
991+
// We're going to drop src2, set src1 to just the masked bits from the label
992+
// offset (so we don't even need to go into relocation), and emit it.
993+
instr->UnlinkSrc2();
994+
uintptr_t fullvalue = labelInstr->GetOffset();
995+
instr->ReplaceSrc1(IR::IntConstOpnd::New((fullvalue & (0xffff << shift)) >> shift, IRType::TyUint16, instr->m_func, true));
996+
bytes = this->EmitMovConstantKnownShift(Emitter, instr, EmitMovz, EmitMovz64, shift);
997+
}
998+
break;
999+
9421000
case Js::OpCode::MRS_FPCR:
9431001
dst = instr->GetDst();
9441002
Assert(dst->IsRegOpnd());
@@ -1303,7 +1361,7 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress)
13031361
Assert(encodeResult);
13041362
//We are re-using offset to save the inlineeCallInfo which will be patched in ApplyRelocs
13051363
//This is a cleaner way to patch MOVW\MOVT pair with the right inlineeCallInfo
1306-
instr->AsLabelInstr()->ResetOffset((uint32)inlineeCallInfo);
1364+
instr->AsLabelInstr()->ResetOffset((uintptr_t)inlineeCallInfo);
13071365
}
13081366
else
13091367
{
@@ -1459,6 +1517,7 @@ EncoderMD::ApplyRelocs(size_t codeBufferAddress, size_t codeSize, uint* bufferCR
14591517
break;
14601518

14611519
case RelocTypeLabelAdr:
1520+
Assert(!reloc->m_relocInstr->isInlineeEntryInstr);
14621521
immediate = ULONG_PTR(targetAddress) - ULONG_PTR(relocAddress);
14631522
Assert(IS_CONST_INT21(immediate));
14641523
*relocAddress = (*relocAddress & ~(3 << 29)) | ULONG((immediate & 3) << 29);

lib/Backend/arm64/EncoderMD.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ class EncoderMD
243243

244244
// Misc operations
245245
template<typename _Emitter, typename _Emitter64> int EmitMovConstant(Arm64CodeEmitter &Emitter, IR::Instr* instr, _Emitter emitter, _Emitter64 emitter64);
246+
template<typename _Emitter, typename _Emitter64> int EmitMovConstantKnownShift(Arm64CodeEmitter &Emitter, IR::Instr* instr, _Emitter emitter, _Emitter64 emitter64, uint32 shift);
246247
template<typename _Emitter, typename _Emitter64> int EmitBitfield(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emitter emitter, _Emitter64 emitter64);
247248
template<typename _Emitter, typename _Emitter64> int EmitConditionalSelect(Arm64CodeEmitter &Emitter, IR::Instr *instr, int condition, _Emitter emitter, _Emitter64 emitter64);
248249

lib/Backend/arm64/LegalizeMD.cpp

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -630,24 +630,42 @@ void LegalizeMD::LegalizeLDIMM(IR::Instr * instr, IntConstType immed)
630630
}
631631
else
632632
{
633-
// ARM64_WORKITEM: This needs to be understood better
633+
// Since we don't know the value yet, we're going to handle it when we do
634+
// This is done by having the load be from a label operand, which is later
635+
// changed such that its offset is the correct value to ldimm
636+
637+
// The assembly generated becomes something like
638+
// Label (offset:fake)
639+
// MOVZ DST, Label
640+
// MOVK DST, Label
641+
// MOVK DST, Label
642+
// MOVK DST, Label <- was the LDIMM
643+
634644
Assert(Security::DontEncode(instr->GetSrc1()));
635-
Assert(false);
636-
/* IR::LabelInstr *label = IR::LabelInstr::New(Js::OpCode::Label, instr->m_func, false);
645+
646+
// The label with the special offset value, used for reloc
647+
IR::LabelInstr *label = IR::LabelInstr::New(Js::OpCode::Label, instr->m_func, false);
637648
instr->InsertBefore(label);
638649
Assert((immed & 0x0000000F) == immed);
639-
label->SetOffset(immed);
650+
label->SetOffset((uint32)immed);
651+
label->isInlineeEntryInstr = true;
640652

641653
IR::LabelOpnd *target = IR::LabelOpnd::New(label, instr->m_func);
642654

643-
IR::Instr * instrMov = IR::Instr::New(Js::OpCode::MOVZ, instr->GetDst(), target, instr->m_func);
644-
instr->InsertBefore(instrMov);
655+
// We'll handle splitting this up to properly load the immediates now
656+
// Typically (and worst case) we'll need to load 64 bits.
657+
IR::Instr* bits48_63 = IR::Instr::New(Js::OpCode::MOVZ_SHIFT, instr->GetDst(), target, IR::IntConstOpnd::New(48, IRType::TyUint8, instr->m_func, true), instr->m_func);
658+
instr->InsertBefore(bits48_63);
659+
IR::Instr* bits32_47 = IR::Instr::New(Js::OpCode::MOVK_SHIFT, instr->GetDst(), target, IR::IntConstOpnd::New(32, IRType::TyUint8, instr->m_func, true), instr->m_func);
660+
instr->InsertBefore(bits32_47);
661+
IR::Instr* bits16_31 = IR::Instr::New(Js::OpCode::MOVK_SHIFT, instr->GetDst(), target, IR::IntConstOpnd::New(16, IRType::TyUint8, instr->m_func, true), instr->m_func);
662+
instr->InsertBefore(bits16_31);
645663

646664
instr->ReplaceSrc1(target);
647-
instr->m_opcode = Js::OpCode::MOVK64;
665+
instr->SetSrc2(IR::IntConstOpnd::New(0, IRType::TyUint8, instr->m_func, true));
666+
instr->m_opcode = Js::OpCode::MOVK_SHIFT;
648667

649-
label->isInlineeEntryInstr = true;
650-
instr->isInlineeEntryInstr = false;*/
668+
instr->isInlineeEntryInstr = false;
651669
}
652670
}
653671

@@ -740,14 +758,22 @@ void LegalizeMD::LegalizeLdLabel(IR::Instr * instr, IR::Opnd * opnd)
740758
Assert(instr->m_opcode == Js::OpCode::LDIMM);
741759
Assert(opnd->IsLabelOpnd());
742760

743-
instr->m_opcode = Js::OpCode::ADR;
761+
if (opnd->AsLabelOpnd()->GetLabel()->isInlineeEntryInstr)
762+
{
763+
// We want to leave it as LDIMMs so that we can easily disambiguate later
764+
return;
765+
}
766+
else
767+
{
768+
instr->m_opcode = Js::OpCode::ADR;
769+
}
744770
}
745771

746772
bool LegalizeMD::LegalizeDirectBranch(IR::BranchInstr *branchInstr, uint32 branchOffset)
747773
{
748774
Assert(branchInstr->IsBranchInstr());
749775

750-
uint32 labelOffset = branchInstr->GetTarget()->GetOffset();
776+
uint32 labelOffset = (uint32)branchInstr->GetTarget()->GetOffset();
751777
Assert(labelOffset); //Label offset must be set.
752778

753779
int32 offset = labelOffset - branchOffset;

lib/Backend/arm64/LegalizeMD.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ struct LegalInstrForms
5656
#define LEGAL_CBZ { L_None, { L_Reg } }
5757
#define LEGAL_LABEL { L_Reg, { L_Label } }
5858
#define LEGAL_LDIMM { L_Reg, { L_Imm, L_None } }
59+
#define LEGAL_LDIMM_S { L_Reg, { L_Imm, L_ImmU6 } }
5960
#define LEGAL_LOAD { L_Reg, { (LegalForms)(L_IndirSU12I9 | L_SymSU12I9), L_None } }
6061
#define LEGAL_LOADP { L_Reg, { (LegalForms)(L_IndirSI7 | L_SymSI7), L_Reg } }
6162
#define LEGAL_PLD { L_None, { (LegalForms)(L_IndirSU12I9 | L_SymSU12I9), L_None } }

lib/Backend/arm64/LowerMD.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7613,7 +7613,7 @@ LowererMD::FinalLower()
76137613

76147614
if (branchInstr->GetTarget() && !LowererMD::IsUnconditionalBranch(branchInstr)) //Ignore BX register based branches & B
76157615
{
7616-
uint32 targetOffset = branchInstr->GetTarget()->GetOffset();
7616+
uint32 targetOffset = (uint32)branchInstr->GetTarget()->GetOffset();
76177617

76187618
if (targetOffset != 0)
76197619
{

lib/Backend/arm64/MdOpCodes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,12 @@ MACRO(MOV, Reg2, 0, UNUSED, LEGAL_REG2, UNUSED,
6969
// Alias of MOV that won't get optimized out when src and dst are the same.
7070
MACRO(MOV_TRUNC, Reg2, 0, UNUSED, LEGAL_REG2, UNUSED, DM__)
7171
MACRO(MOVK, Reg2, 0, UNUSED, LEGAL_LDIMM, UNUSED, DM__)
72+
// Alias of MOVK where we know the shift, but don't know the value yet
73+
MACRO(MOVK_SHIFT, Reg2, 0, UNUSED, LEGAL_LDIMM_S, UNUSED, DM__)
7274
MACRO(MOVN, Reg2, 0, UNUSED, LEGAL_LDIMM, UNUSED, DM__)
7375
MACRO(MOVZ, Reg2, 0, UNUSED, LEGAL_LDIMM, UNUSED, DM__)
76+
// Alias of MOVZ where we know the shift, but don't know the value yet
77+
MACRO(MOVZ_SHIFT, Reg2, 0, UNUSED, LEGAL_LDIMM_S, UNUSED, DM__)
7478
MACRO(MRS_FPCR, Reg1, 0, UNUSED, LEGAL_REG1, UNUSED, D___)
7579
MACRO(MRS_FPSR, Reg1, 0, UNUSED, LEGAL_REG1, UNUSED, D___)
7680
MACRO(MSR_FPCR, Reg2, 0, UNUSED, LEGAL_REG2_ND, UNUSED, D___)

0 commit comments

Comments
 (0)