Skip to content

Commit 7dcab65

Browse files
Adding push2/pop2 + ppx
1 parent d3bcd23 commit 7dcab65

File tree

8 files changed

+319
-9
lines changed

8 files changed

+319
-9
lines changed

src/coreclr/jit/codegen.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ class CodeGen final : public CodeGenInterface
560560

561561
#if defined(TARGET_XARCH)
562562
unsigned genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs);
563+
#ifdef TARGET_AMD64
564+
unsigned genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs);
565+
#endif // TARGET_AMD64
563566
#endif // !defined(TARGET_XARCH)
564567

565568
#endif // !defined(TARGET_ARM64)
@@ -1600,6 +1603,12 @@ class CodeGen final : public CodeGenInterface
16001603
insFlags flags = INS_FLAGS_DONT_CARE DEBUGARG(size_t targetHandle = 0)
16011604
DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
16021605

1606+
#if defined(TARGET_AMD64)
1607+
void instGen_Push2Pop2Ppx(instruction ins,
1608+
regNumber reg1,
1609+
regNumber reg2);
1610+
#endif // defined(TARGET_AMD64)
1611+
16031612
#ifdef TARGET_XARCH
16041613
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
16051614
#endif // TARGET_XARCH

src/coreclr/jit/codegenxarch.cpp

Lines changed: 154 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9388,6 +9388,15 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
93889388
theEmitter->emitIns_R_R_R(INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17);
93899389
theEmitter->emitIns_R_R_R(INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17);
93909390

9391+
theEmitter->emitIns_R_R(INS_push2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9392+
theEmitter->emitIns_R_R(INS_pop2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9393+
theEmitter->emitIns_R_R(INS_push2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9394+
theEmitter->emitIns_R_R(INS_pop2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9395+
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9396+
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9397+
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9398+
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9399+
93919400
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false);
93929401
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false);
93939402
theEmitter->emitIns_Mov(INS_movq, EA_8BYTE, REG_R16, REG_XMM0, false);
@@ -10243,7 +10252,6 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
1024310252
osrAdditionalIntCalleeSaves &= ~regBit;
1024410253
}
1024510254
}
10246-
1024710255
#endif // TARGET_AMD64
1024810256

1024910257
//------------------------------------------------------------------------
@@ -10293,6 +10301,69 @@ void CodeGen::genPushCalleeSavedRegisters()
1029310301
}
1029410302
#endif // DEBUG
1029510303

10304+
#ifdef TARGET_AMD64
10305+
if(compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX())
10306+
{
10307+
// This is not a funclet or an On-Stack Replacement.
10308+
assert((compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts.IsOSR());
10309+
// PUSH2 doesn't work for ESP.
10310+
assert((rsPushRegs & RBM_SPBASE) == 0);
10311+
// We need to align the stack to 16 bytes to use push2/pop2.
10312+
// The ABI requirement is that the stack must be 16B aligned at the point of a function call.
10313+
// As soon as the CALL is executed, the stack is no longer 16B aligned.
10314+
// To use PP2, the stack needs to be pre-aligned
10315+
// If isFramePointerUsed() is true, we have already pushed the frame pointer and stack is aligned.
10316+
// Else, We need to issue a single push to align the stack.
10317+
if(!isFramePointerUsed() && (rsPushRegs != RBM_NONE))
10318+
{
10319+
if ((rsPushRegs & RBM_FPBASE) != 0)
10320+
{
10321+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_EBP, INS_OPTS_APX_ppx);
10322+
compiler->unwindPush(REG_EBP);
10323+
rsPushRegs &= ~RBM_FPBASE;
10324+
}
10325+
else
10326+
{
10327+
regNumber alignReg = genFirstRegNumFromMaskAndToggle(rsPushRegs);
10328+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10329+
compiler->unwindPush(alignReg);
10330+
}
10331+
}
10332+
10333+
// Push backwards so we match the order we will pop them in the epilog
10334+
// and all the other code that expects it to be in this order.
10335+
// All registers to be saved as pushed to an ArrayStack
10336+
ArrayStack<regNumber> regStack(compiler->getAllocator(CMK_Codegen));
10337+
while (rsPushRegs != RBM_NONE)
10338+
{
10339+
regNumber reg = genFirstRegNumFromMaskAndToggle(rsPushRegs);
10340+
regStack.Push(reg);
10341+
}
10342+
10343+
// We need to push the registers in pairs.
10344+
// In cases where we have an odd number of registers, we need to push the last one
10345+
// separately at the end to maintain alignment for push2.
10346+
while (regStack.Height() > 1)
10347+
{
10348+
regNumber reg1 = regStack.Pop();
10349+
regNumber reg2 = regStack.Pop();
10350+
10351+
GetEmitter()->emitIns_R_R(INS_push2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10352+
compiler->unwindPush(reg1);
10353+
compiler->unwindPush(reg2);
10354+
}
10355+
10356+
if (regStack.Height() == 1)
10357+
{
10358+
regNumber reg = regStack.Pop();
10359+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10360+
compiler->unwindPush(reg);
10361+
}
10362+
assert(regStack.Height() == 0);
10363+
return;
10364+
}
10365+
#endif // TARGET_AMD64
10366+
1029610367
// Push backwards so we match the order we will pop them in the epilog
1029710368
// and all the other code that expects it to be in this order.
1029810369
for (regNumber reg = get_REG_INT_LAST(); rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
@@ -10341,6 +10412,14 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
1034110412
return;
1034210413
}
1034310414

10415+
if(compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX())
10416+
{
10417+
regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
10418+
const unsigned popCount = genPopCalleeSavedRegistersFromMaskAPX(rsPopRegs);
10419+
noway_assert(compiler->compCalleeRegsPushed == popCount);
10420+
return;
10421+
}
10422+
1034410423
#endif // TARGET_AMD64
1034510424

1034610425
// Registers saved by a normal prolog
@@ -10350,6 +10429,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
1035010429
noway_assert(compiler->compCalleeRegsPushed == popCount);
1035110430
}
1035210431

10432+
1035310433
//------------------------------------------------------------------------
1035410434
// genPopCalleeSavedRegistersFromMask: pop specified set of callee saves
1035510435
// in the "standard" order
@@ -10416,6 +10496,79 @@ unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs)
1041610496
return popCount;
1041710497
}
1041810498

10499+
#if defined(TARGET_AMD64)
10500+
//------------------------------------------------------------------------
10501+
// genPopCalleeSavedRegistersFromMaskAPX: pop specified set of callee saves
10502+
// in the "standard" order using Pop2 when possible
10503+
//
10504+
// Arguments:
10505+
// rsPopRegs - register mask of registers to pop
10506+
//
10507+
// Return Value:
10508+
// The number of registers popped.
10509+
//
10510+
unsigned CodeGen::genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs)
10511+
{
10512+
// This is not a funclet or an On-Stack Replacement.
10513+
assert((compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts.IsOSR());
10514+
unsigned popCount = 0;
10515+
// POP2 doesn't work for ESP.
10516+
assert((rsPopRegs & RBM_SPBASE) == 0);
10517+
regNumber alignReg = REG_NA;
10518+
// We need to align the stack to 16 bytes to use push2/pop2.
10519+
// If isFramePointerUsed() is true, we will pop the frame pointer and stack will be aligned.
10520+
// Else, We need to issue a single pop after the last pop2 to align the stack.
10521+
if(!isFramePointerUsed() && (rsPopRegs != RBM_NONE))
10522+
{
10523+
if ((rsPopRegs & RBM_FPBASE) != 0)
10524+
{
10525+
alignReg = REG_EBP;
10526+
rsPopRegs &= ~RBM_FPBASE;
10527+
}
10528+
else
10529+
{
10530+
alignReg = genFirstRegNumFromMaskAndToggle(rsPopRegs);
10531+
}
10532+
}
10533+
10534+
10535+
// All registers to be restored as pushed to an ArrayStack
10536+
ArrayStack<regNumber> regStack(compiler->getAllocator(CMK_Codegen));
10537+
while (rsPopRegs != RBM_NONE)
10538+
{
10539+
regNumber reg = genFirstRegNumFromMaskAndToggle(rsPopRegs);
10540+
regStack.Push(reg);
10541+
}
10542+
10543+
int index = 0;
10544+
if(regStack.Height() % 2 == 1)
10545+
{
10546+
// We have an odd number of registers to pop, so we need to pop the last one
10547+
// separately..
10548+
regNumber reg = regStack.Bottom(index++);
10549+
GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10550+
popCount++;
10551+
}
10552+
10553+
while (index < (regStack.Height() - 1))
10554+
{
10555+
regNumber reg1 = regStack.Bottom(index++);
10556+
regNumber reg2 = regStack.Bottom(index++);
10557+
GetEmitter()->emitIns_R_R(INS_pop2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10558+
popCount += 2;
10559+
}
10560+
assert(regStack.Height() == index);
10561+
10562+
if (alignReg != REG_NA)
10563+
{
10564+
GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10565+
popCount++;
10566+
}
10567+
10568+
return popCount;
10569+
}
10570+
#endif // defined(TARGET_AMD64)
10571+
1041910572
/*****************************************************************************
1042010573
*
1042110574
* Generates code for a function epilog.

src/coreclr/jit/emit.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,8 @@ class emitter
848848
*/
849849
#define _idEvexNdContext _idCustom5 /* bits used for the APX-EVEX.nd context for promoted legacy instructions */
850850
#define _idEvexNfContext _idCustom6 /* bits used for the APX-EVEX.nf context for promoted legacy/vex instructions */
851+
// We repurpose _idCustom6 for the APX-EVEX.ppx context for Push/Pop/Push2/Pop2 since these instructions have no NF.
852+
#define _idApxPpxContext _idCustom6 /* bits used for the APX-EVEX.ppx context for Push/Pop/Push2/Pop2 */
851853

852854
// We repurpose 4 bits for the default flag value bits for ccmp instructions.
853855
#define _idEvexDFV (_idCustom4 << 3) | (_idCustom3 << 2) | (_idCustom2 << 1) | _idCustom1
@@ -1799,6 +1801,17 @@ class emitter
17991801
_idEvexNfContext = 1;
18001802
}
18011803

1804+
bool idIsApxPpxContextSet() const
1805+
{
1806+
return _idApxPpxContext != 0;
1807+
}
1808+
1809+
void idSetApxPpxContext()
1810+
{
1811+
assert(!idIsApxPpxContextSet());
1812+
_idApxPpxContext = 1;
1813+
}
1814+
18021815
bool idIsNoApxEvexPromotion() const
18031816
{
18041817
return _idNoApxEvexXPromotion != 0;

0 commit comments

Comments
 (0)