Skip to content

Commit f25f57b

Browse files
Adding push2/pop2 + ppx.
1 parent 27347c4 commit f25f57b

File tree

8 files changed

+313
-9
lines changed

8 files changed

+313
-9
lines changed

src/coreclr/jit/codegen.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ class CodeGen final : public CodeGenInterface
560560

561561
#if defined(TARGET_XARCH)
562562
unsigned genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs);
563+
#ifdef TARGET_AMD64
564+
unsigned genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs);
565+
#endif // TARGET_AMD64
563566
#endif // !defined(TARGET_XARCH)
564567

565568
#endif // !defined(TARGET_ARM64)
@@ -1600,6 +1603,10 @@ class CodeGen final : public CodeGenInterface
16001603
insFlags flags = INS_FLAGS_DONT_CARE DEBUGARG(size_t targetHandle = 0)
16011604
DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
16021605

1606+
#if defined(TARGET_AMD64)
1607+
void instGen_Push2Pop2Ppx(instruction ins, regNumber reg1, regNumber reg2);
1608+
#endif // defined(TARGET_AMD64)
1609+
16031610
#ifdef TARGET_XARCH
16041611
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
16051612
#endif // TARGET_XARCH

src/coreclr/jit/codegenxarch.cpp

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9429,6 +9429,15 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
94299429
theEmitter->emitIns_R_R_R(INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17);
94309430
theEmitter->emitIns_R_R_R(INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17);
94319431

9432+
theEmitter->emitIns_R_R(INS_push2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9433+
theEmitter->emitIns_R_R(INS_pop2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9434+
theEmitter->emitIns_R_R(INS_push2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9435+
theEmitter->emitIns_R_R(INS_pop2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9436+
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9437+
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9438+
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9439+
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9440+
94329441
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false);
94339442
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false);
94349443
theEmitter->emitIns_Mov(INS_movq, EA_8BYTE, REG_R16, REG_XMM0, false);
@@ -10284,7 +10293,6 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
1028410293
osrAdditionalIntCalleeSaves &= ~regBit;
1028510294
}
1028610295
}
10287-
1028810296
#endif // TARGET_AMD64
1028910297

1029010298
//------------------------------------------------------------------------
@@ -10334,6 +10342,70 @@ void CodeGen::genPushCalleeSavedRegisters()
1033410342
}
1033510343
#endif // DEBUG
1033610344

10345+
#ifdef TARGET_AMD64
10346+
if (compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX())
10347+
{
10348+
// This is not a funclet or an On-Stack Replacement.
10349+
assert((compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts.IsOSR());
10350+
// PUSH2 doesn't work for ESP.
10351+
assert((rsPushRegs & RBM_SPBASE) == 0);
10352+
// We need to align the stack to 16 bytes to use push2/pop2.
10353+
// The ABI requirement is that the stack must be 16B aligned at the point of a function call.
10354+
// As soon as the CALL is executed, the stack is no longer 16B aligned.
10355+
// To use PP2, the stack needs to be pre-aligned
10356+
// If isFramePointerUsed() is true, we have already pushed the frame pointer and stack is aligned.
10357+
// Else, We need to issue a single push to align the stack.
10358+
if (!isFramePointerUsed() && (rsPushRegs != RBM_NONE))
10359+
{
10360+
if ((rsPushRegs & RBM_FPBASE) != 0)
10361+
{
10362+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_EBP, INS_OPTS_APX_ppx);
10363+
compiler->unwindPush(REG_EBP);
10364+
rsPushRegs &= ~RBM_FPBASE;
10365+
}
10366+
else
10367+
{
10368+
regNumber alignReg = genFirstRegNumFromMaskAndToggle(rsPushRegs);
10369+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10370+
compiler->unwindPush(alignReg);
10371+
}
10372+
}
10373+
10374+
// Push backwards so we match the order we will pop them in the epilog
10375+
// and all the other code that expects it to be in this order.
10376+
// All registers to be saved as pushed to an ArrayStack
10377+
ArrayStack<regNumber> regStack(compiler->getAllocator(CMK_Codegen));
10378+
while (rsPushRegs != RBM_NONE)
10379+
{
10380+
regNumber reg = genFirstRegNumFromMaskAndToggle(rsPushRegs);
10381+
regStack.Push(reg);
10382+
}
10383+
10384+
// We need to push the registers in pairs.
10385+
// In cases where we have an odd number of registers, we need to push the last one
10386+
// separately at the end to maintain alignment for push2.
10387+
while (regStack.Height() > 1)
10388+
{
10389+
regNumber reg1 = regStack.Pop();
10390+
regNumber reg2 = regStack.Pop();
10391+
10392+
GetEmitter()->emitIns_R_R(INS_push2, EA_PTRSIZE, reg1, reg2,
10393+
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10394+
compiler->unwindPush(reg1);
10395+
compiler->unwindPush(reg2);
10396+
}
10397+
10398+
if (regStack.Height() == 1)
10399+
{
10400+
regNumber reg = regStack.Pop();
10401+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10402+
compiler->unwindPush(reg);
10403+
}
10404+
assert(regStack.Height() == 0);
10405+
return;
10406+
}
10407+
#endif // TARGET_AMD64
10408+
1033710409
// Push backwards so we match the order we will pop them in the epilog
1033810410
// and all the other code that expects it to be in this order.
1033910411
for (regNumber reg = get_REG_INT_LAST(); rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
@@ -10382,6 +10454,14 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
1038210454
return;
1038310455
}
1038410456

10457+
if (compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX())
10458+
{
10459+
regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
10460+
const unsigned popCount = genPopCalleeSavedRegistersFromMaskAPX(rsPopRegs);
10461+
noway_assert(compiler->compCalleeRegsPushed == popCount);
10462+
return;
10463+
}
10464+
1038510465
#endif // TARGET_AMD64
1038610466

1038710467
// Registers saved by a normal prolog
@@ -10457,6 +10537,78 @@ unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs)
1045710537
return popCount;
1045810538
}
1045910539

10540+
#if defined(TARGET_AMD64)
10541+
//------------------------------------------------------------------------
10542+
// genPopCalleeSavedRegistersFromMaskAPX: pop specified set of callee saves
10543+
// in the "standard" order using Pop2 when possible
10544+
//
10545+
// Arguments:
10546+
// rsPopRegs - register mask of registers to pop
10547+
//
10548+
// Return Value:
10549+
// The number of registers popped.
10550+
//
10551+
unsigned CodeGen::genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs)
10552+
{
10553+
// This is not a funclet or an On-Stack Replacement.
10554+
assert((compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts.IsOSR());
10555+
unsigned popCount = 0;
10556+
// POP2 doesn't work for ESP.
10557+
assert((rsPopRegs & RBM_SPBASE) == 0);
10558+
regNumber alignReg = REG_NA;
10559+
// We need to align the stack to 16 bytes to use push2/pop2.
10560+
// If isFramePointerUsed() is true, we will pop the frame pointer and stack will be aligned.
10561+
// Else, We need to issue a single pop after the last pop2 to align the stack.
10562+
if (!isFramePointerUsed() && (rsPopRegs != RBM_NONE))
10563+
{
10564+
if ((rsPopRegs & RBM_FPBASE) != 0)
10565+
{
10566+
alignReg = REG_EBP;
10567+
rsPopRegs &= ~RBM_FPBASE;
10568+
}
10569+
else
10570+
{
10571+
alignReg = genFirstRegNumFromMaskAndToggle(rsPopRegs);
10572+
}
10573+
}
10574+
10575+
// All registers to be restored as pushed to an ArrayStack
10576+
ArrayStack<regNumber> regStack(compiler->getAllocator(CMK_Codegen));
10577+
while (rsPopRegs != RBM_NONE)
10578+
{
10579+
regNumber reg = genFirstRegNumFromMaskAndToggle(rsPopRegs);
10580+
regStack.Push(reg);
10581+
}
10582+
10583+
int index = 0;
10584+
if (regStack.Height() % 2 == 1)
10585+
{
10586+
// We have an odd number of registers to pop, so we need to pop the last one
10587+
// separately..
10588+
regNumber reg = regStack.Bottom(index++);
10589+
GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10590+
popCount++;
10591+
}
10592+
10593+
while (index < (regStack.Height() - 1))
10594+
{
10595+
regNumber reg1 = regStack.Bottom(index++);
10596+
regNumber reg2 = regStack.Bottom(index++);
10597+
GetEmitter()->emitIns_R_R(INS_pop2, EA_PTRSIZE, reg1, reg2, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10598+
popCount += 2;
10599+
}
10600+
assert(regStack.Height() == index);
10601+
10602+
if (alignReg != REG_NA)
10603+
{
10604+
GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10605+
popCount++;
10606+
}
10607+
10608+
return popCount;
10609+
}
10610+
#endif // defined(TARGET_AMD64)
10611+
1046010612
/*****************************************************************************
1046110613
*
1046210614
* Generates code for a function epilog.

src/coreclr/jit/emit.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,9 @@ class emitter
848848
*/
849849
#define _idEvexNdContext _idCustom5 /* bits used for the APX-EVEX.nd context for promoted legacy instructions */
850850
#define _idEvexNfContext _idCustom6 /* bits used for the APX-EVEX.nf context for promoted legacy/vex instructions */
851+
// We repurpose _idCustom6 for the APX-EVEX.ppx context for Push/Pop/Push2/Pop2 since these instructions have no
852+
// NF.
853+
#define _idApxPpxContext _idCustom6 /* bits used for the APX-EVEX.ppx context for Push/Pop/Push2/Pop2 */
851854

852855
// We repurpose 4 bits for the default flag value bits for ccmp instructions.
853856
#define _idEvexDFV (_idCustom4 << 3) | (_idCustom3 << 2) | (_idCustom2 << 1) | _idCustom1
@@ -1799,6 +1802,17 @@ class emitter
17991802
_idEvexNfContext = 1;
18001803
}
18011804

1805+
bool idIsApxPpxContextSet() const
1806+
{
1807+
return _idApxPpxContext != 0;
1808+
}
1809+
1810+
void idSetApxPpxContext()
1811+
{
1812+
assert(!idIsApxPpxContextSet());
1813+
_idApxPpxContext = 1;
1814+
}
1815+
18021816
bool idIsNoApxEvexPromotion() const
18031817
{
18041818
return _idNoApxEvexXPromotion != 0;

0 commit comments

Comments
 (0)