Skip to content

Commit b960c6e

Browse files
Adding push2/pop2 + ppx
# Conflicts: # src/coreclr/jit/emitxarch.cpp
1 parent 27347c4 commit b960c6e

File tree

8 files changed

+319
-9
lines changed

8 files changed

+319
-9
lines changed

src/coreclr/jit/codegen.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ class CodeGen final : public CodeGenInterface
560560

561561
#if defined(TARGET_XARCH)
562562
unsigned genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs);
563+
#ifdef TARGET_AMD64
564+
unsigned genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs);
565+
#endif // TARGET_AMD64
563566
#endif // !defined(TARGET_XARCH)
564567

565568
#endif // !defined(TARGET_ARM64)
@@ -1600,6 +1603,12 @@ class CodeGen final : public CodeGenInterface
16001603
insFlags flags = INS_FLAGS_DONT_CARE DEBUGARG(size_t targetHandle = 0)
16011604
DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
16021605

1606+
#if defined(TARGET_AMD64)
1607+
void instGen_Push2Pop2Ppx(instruction ins,
1608+
regNumber reg1,
1609+
regNumber reg2);
1610+
#endif // defined(TARGET_AMD64)
1611+
16031612
#ifdef TARGET_XARCH
16041613
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
16051614
#endif // TARGET_XARCH

src/coreclr/jit/codegenxarch.cpp

Lines changed: 154 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9429,6 +9429,15 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
94299429
theEmitter->emitIns_R_R_R(INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17);
94309430
theEmitter->emitIns_R_R_R(INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17);
94319431

9432+
theEmitter->emitIns_R_R(INS_push2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9433+
theEmitter->emitIns_R_R(INS_pop2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9434+
theEmitter->emitIns_R_R(INS_push2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9435+
theEmitter->emitIns_R_R(INS_pop2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9436+
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9437+
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9438+
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9439+
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9440+
94329441
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false);
94339442
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false);
94349443
theEmitter->emitIns_Mov(INS_movq, EA_8BYTE, REG_R16, REG_XMM0, false);
@@ -10284,7 +10293,6 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
1028410293
osrAdditionalIntCalleeSaves &= ~regBit;
1028510294
}
1028610295
}
10287-
1028810296
#endif // TARGET_AMD64
1028910297

1029010298
//------------------------------------------------------------------------
@@ -10334,6 +10342,69 @@ void CodeGen::genPushCalleeSavedRegisters()
1033410342
}
1033510343
#endif // DEBUG
1033610344

10345+
#ifdef TARGET_AMD64
10346+
if(compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX())
10347+
{
10348+
// This is not a funclet or an On-Stack Replacement.
10349+
assert((compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts.IsOSR());
10350+
// PUSH2 doesn't work for ESP.
10351+
assert((rsPushRegs & RBM_SPBASE) == 0);
10352+
// We need to align the stack to 16 bytes to use push2/pop2.
10353+
// The ABI requirement is that the stack must be 16B aligned at the point of a function call.
10354+
// As soon as the CALL is executed, the stack is no longer 16B aligned.
10355+
// To use PP2, the stack needs to be pre-aligned
10356+
// If isFramePointerUsed() is true, we have already pushed the frame pointer and stack is aligned.
10357+
// Else, We need to issue a single push to align the stack.
10358+
if(!isFramePointerUsed() && (rsPushRegs != RBM_NONE))
10359+
{
10360+
if ((rsPushRegs & RBM_FPBASE) != 0)
10361+
{
10362+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_EBP, INS_OPTS_APX_ppx);
10363+
compiler->unwindPush(REG_EBP);
10364+
rsPushRegs &= ~RBM_FPBASE;
10365+
}
10366+
else
10367+
{
10368+
regNumber alignReg = genFirstRegNumFromMaskAndToggle(rsPushRegs);
10369+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10370+
compiler->unwindPush(alignReg);
10371+
}
10372+
}
10373+
10374+
// Push backwards so we match the order we will pop them in the epilog
10375+
// and all the other code that expects it to be in this order.
10376+
// All registers to be saved as pushed to an ArrayStack
10377+
ArrayStack<regNumber> regStack(compiler->getAllocator(CMK_Codegen));
10378+
while (rsPushRegs != RBM_NONE)
10379+
{
10380+
regNumber reg = genFirstRegNumFromMaskAndToggle(rsPushRegs);
10381+
regStack.Push(reg);
10382+
}
10383+
10384+
// We need to push the registers in pairs.
10385+
// In cases where we have an odd number of registers, we need to push the last one
10386+
// separately at the end to maintain alignment for push2.
10387+
while (regStack.Height() > 1)
10388+
{
10389+
regNumber reg1 = regStack.Pop();
10390+
regNumber reg2 = regStack.Pop();
10391+
10392+
GetEmitter()->emitIns_R_R(INS_push2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10393+
compiler->unwindPush(reg1);
10394+
compiler->unwindPush(reg2);
10395+
}
10396+
10397+
if (regStack.Height() == 1)
10398+
{
10399+
regNumber reg = regStack.Pop();
10400+
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10401+
compiler->unwindPush(reg);
10402+
}
10403+
assert(regStack.Height() == 0);
10404+
return;
10405+
}
10406+
#endif // TARGET_AMD64
10407+
1033710408
// Push backwards so we match the order we will pop them in the epilog
1033810409
// and all the other code that expects it to be in this order.
1033910410
for (regNumber reg = get_REG_INT_LAST(); rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
@@ -10382,6 +10453,14 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
1038210453
return;
1038310454
}
1038410455

10456+
if(compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX())
10457+
{
10458+
regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
10459+
const unsigned popCount = genPopCalleeSavedRegistersFromMaskAPX(rsPopRegs);
10460+
noway_assert(compiler->compCalleeRegsPushed == popCount);
10461+
return;
10462+
}
10463+
1038510464
#endif // TARGET_AMD64
1038610465

1038710466
// Registers saved by a normal prolog
@@ -10391,6 +10470,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
1039110470
noway_assert(compiler->compCalleeRegsPushed == popCount);
1039210471
}
1039310472

10473+
1039410474
//------------------------------------------------------------------------
1039510475
// genPopCalleeSavedRegistersFromMask: pop specified set of callee saves
1039610476
// in the "standard" order
@@ -10457,6 +10537,79 @@ unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs)
1045710537
return popCount;
1045810538
}
1045910539

10540+
#if defined(TARGET_AMD64)
10541+
//------------------------------------------------------------------------
10542+
// genPopCalleeSavedRegistersFromMaskAPX: pop specified set of callee saves
10543+
// in the "standard" order using Pop2 when possible
10544+
//
10545+
// Arguments:
10546+
// rsPopRegs - register mask of registers to pop
10547+
//
10548+
// Return Value:
10549+
// The number of registers popped.
10550+
//
10551+
unsigned CodeGen::genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs)
10552+
{
10553+
// This is not a funclet or an On-Stack Replacement.
10554+
assert((compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts.IsOSR());
10555+
unsigned popCount = 0;
10556+
// POP2 doesn't work for ESP.
10557+
assert((rsPopRegs & RBM_SPBASE) == 0);
10558+
regNumber alignReg = REG_NA;
10559+
// We need to align the stack to 16 bytes to use push2/pop2.
10560+
// If isFramePointerUsed() is true, we will pop the frame pointer and stack will be aligned.
10561+
// Else, We need to issue a single pop after the last pop2 to align the stack.
10562+
if(!isFramePointerUsed() && (rsPopRegs != RBM_NONE))
10563+
{
10564+
if ((rsPopRegs & RBM_FPBASE) != 0)
10565+
{
10566+
alignReg = REG_EBP;
10567+
rsPopRegs &= ~RBM_FPBASE;
10568+
}
10569+
else
10570+
{
10571+
alignReg = genFirstRegNumFromMaskAndToggle(rsPopRegs);
10572+
}
10573+
}
10574+
10575+
10576+
// All registers to be restored as pushed to an ArrayStack
10577+
ArrayStack<regNumber> regStack(compiler->getAllocator(CMK_Codegen));
10578+
while (rsPopRegs != RBM_NONE)
10579+
{
10580+
regNumber reg = genFirstRegNumFromMaskAndToggle(rsPopRegs);
10581+
regStack.Push(reg);
10582+
}
10583+
10584+
int index = 0;
10585+
if(regStack.Height() % 2 == 1)
10586+
{
10587+
// We have an odd number of registers to pop, so we need to pop the last one
10588+
// separately..
10589+
regNumber reg = regStack.Bottom(index++);
10590+
GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10591+
popCount++;
10592+
}
10593+
10594+
while (index < (regStack.Height() - 1))
10595+
{
10596+
regNumber reg1 = regStack.Bottom(index++);
10597+
regNumber reg2 = regStack.Bottom(index++);
10598+
GetEmitter()->emitIns_R_R(INS_pop2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10599+
popCount += 2;
10600+
}
10601+
assert(regStack.Height() == index);
10602+
10603+
if (alignReg != REG_NA)
10604+
{
10605+
GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10606+
popCount++;
10607+
}
10608+
10609+
return popCount;
10610+
}
10611+
#endif // defined(TARGET_AMD64)
10612+
1046010613
/*****************************************************************************
1046110614
*
1046210615
* Generates code for a function epilog.

src/coreclr/jit/emit.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,8 @@ class emitter
848848
*/
849849
#define _idEvexNdContext _idCustom5 /* bits used for the APX-EVEX.nd context for promoted legacy instructions */
850850
#define _idEvexNfContext _idCustom6 /* bits used for the APX-EVEX.nf context for promoted legacy/vex instructions */
851+
// We repurpose _idCustom6 for the APX-EVEX.ppx context for Push/Pop/Push2/Pop2 since these instructions have no NF.
852+
#define _idApxPpxContext _idCustom6 /* bits used for the APX-EVEX.ppx context for Push/Pop/Push2/Pop2 */
851853

852854
// We repurpose 4 bits for the default flag value bits for ccmp instructions.
853855
#define _idEvexDFV (_idCustom4 << 3) | (_idCustom3 << 2) | (_idCustom2 << 1) | _idCustom1
@@ -1799,6 +1801,17 @@ class emitter
17991801
_idEvexNfContext = 1;
18001802
}
18011803

1804+
bool idIsApxPpxContextSet() const
1805+
{
1806+
return _idApxPpxContext != 0;
1807+
}
1808+
1809+
void idSetApxPpxContext()
1810+
{
1811+
assert(!idIsApxPpxContextSet());
1812+
_idApxPpxContext = 1;
1813+
}
1814+
18021815
bool idIsNoApxEvexPromotion() const
18031816
{
18041817
return _idNoApxEvexXPromotion != 0;

0 commit comments

Comments
 (0)