@@ -9388,6 +9388,15 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
9388
9388
theEmitter->emitIns_R_R_R (INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17);
9389
9389
theEmitter->emitIns_R_R_R (INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17);
9390
9390
9391
+ theEmitter->emitIns_R_R (INS_push2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9392
+ theEmitter->emitIns_R_R (INS_pop2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9393
+ theEmitter->emitIns_R_R (INS_push2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9394
+ theEmitter->emitIns_R_R (INS_pop2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9395
+ theEmitter->emitIns_R (INS_push, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9396
+ theEmitter->emitIns_R (INS_pop, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9397
+ theEmitter->emitIns_R (INS_push, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9398
+ theEmitter->emitIns_R (INS_pop, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9399
+
9391
9400
theEmitter->emitIns_Mov (INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false );
9392
9401
theEmitter->emitIns_Mov (INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false );
9393
9402
theEmitter->emitIns_Mov (INS_movq, EA_8BYTE, REG_R16, REG_XMM0, false );
@@ -10243,7 +10252,6 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
10243
10252
osrAdditionalIntCalleeSaves &= ~regBit;
10244
10253
}
10245
10254
}
10246
-
10247
10255
#endif // TARGET_AMD64
10248
10256
10249
10257
// ------------------------------------------------------------------------
@@ -10293,6 +10301,69 @@ void CodeGen::genPushCalleeSavedRegisters()
10293
10301
}
10294
10302
#endif // DEBUG
10295
10303
10304
+ #ifdef TARGET_AMD64
10305
+ if (compiler->canUseApxEncoding () && compiler->canUseEvexEncoding () && JitConfig.EnableApxPPX ())
10306
+ {
10307
+ // This is not a funclet or an On-Stack Replacement.
10308
+ assert ((compiler->funCurrentFunc ()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts .IsOSR ());
10309
+ // PUSH2 doesn't work for ESP.
10310
+ assert ((rsPushRegs & RBM_SPBASE) == 0 );
10311
+ // We need to align the stack to 16 bytes to use push2/pop2.
10312
+ // The ABI requirement is that the stack must be 16B aligned at the point of a function call.
10313
+ // As soon as the CALL is executed, the stack is no longer 16B aligned.
10314
+ // To use PP2, the stack needs to be pre-aligned
10315
+ // If isFramePointerUsed() is true, we have already pushed the frame pointer and stack is aligned.
10316
+ // Else, We need to issue a single push to align the stack.
10317
+ if (!isFramePointerUsed () && (rsPushRegs != RBM_NONE))
10318
+ {
10319
+ if ((rsPushRegs & RBM_FPBASE) != 0 )
10320
+ {
10321
+ GetEmitter ()->emitIns_R (INS_push, EA_PTRSIZE, REG_EBP, INS_OPTS_APX_ppx);
10322
+ compiler->unwindPush (REG_EBP);
10323
+ rsPushRegs &= ~RBM_FPBASE;
10324
+ }
10325
+ else
10326
+ {
10327
+ regNumber alignReg = genFirstRegNumFromMaskAndToggle (rsPushRegs);
10328
+ GetEmitter ()->emitIns_R (INS_push, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10329
+ compiler->unwindPush (alignReg);
10330
+ }
10331
+ }
10332
+
10333
+ // Push backwards so we match the order we will pop them in the epilog
10334
+ // and all the other code that expects it to be in this order.
10335
+ // All registers to be saved as pushed to an ArrayStack
10336
+ ArrayStack<regNumber> regStack (compiler->getAllocator (CMK_Codegen));
10337
+ while (rsPushRegs != RBM_NONE)
10338
+ {
10339
+ regNumber reg = genFirstRegNumFromMaskAndToggle (rsPushRegs);
10340
+ regStack.Push (reg);
10341
+ }
10342
+
10343
+ // We need to push the registers in pairs.
10344
+ // In cases where we have an odd number of registers, we need to push the last one
10345
+ // separately at the end to maintain alignment for push2.
10346
+ while (regStack.Height () > 1 )
10347
+ {
10348
+ regNumber reg1 = regStack.Pop ();
10349
+ regNumber reg2 = regStack.Pop ();
10350
+
10351
+ GetEmitter ()->emitIns_R_R (INS_push2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10352
+ compiler->unwindPush (reg1);
10353
+ compiler->unwindPush (reg2);
10354
+ }
10355
+
10356
+ if (regStack.Height () == 1 )
10357
+ {
10358
+ regNumber reg = regStack.Pop ();
10359
+ GetEmitter ()->emitIns_R (INS_push, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10360
+ compiler->unwindPush (reg);
10361
+ }
10362
+ assert (regStack.Height () == 0 );
10363
+ return ;
10364
+ }
10365
+ #endif // TARGET_AMD64
10366
+
10296
10367
// Push backwards so we match the order we will pop them in the epilog
10297
10368
// and all the other code that expects it to be in this order.
10298
10369
for (regNumber reg = get_REG_INT_LAST (); rsPushRegs != RBM_NONE; reg = REG_PREV (reg))
@@ -10341,6 +10412,14 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
10341
10412
return ;
10342
10413
}
10343
10414
10415
+ if (compiler->canUseApxEncoding () && compiler->canUseEvexEncoding () && JitConfig.EnableApxPPX ())
10416
+ {
10417
+ regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask ();
10418
+ const unsigned popCount = genPopCalleeSavedRegistersFromMaskAPX (rsPopRegs);
10419
+ noway_assert (compiler->compCalleeRegsPushed == popCount);
10420
+ return ;
10421
+ }
10422
+
10344
10423
#endif // TARGET_AMD64
10345
10424
10346
10425
// Registers saved by a normal prolog
@@ -10350,6 +10429,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
10350
10429
noway_assert (compiler->compCalleeRegsPushed == popCount);
10351
10430
}
10352
10431
10432
+
10353
10433
// ------------------------------------------------------------------------
10354
10434
// genPopCalleeSavedRegistersFromMask: pop specified set of callee saves
10355
10435
// in the "standard" order
@@ -10416,6 +10496,79 @@ unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs)
10416
10496
return popCount;
10417
10497
}
10418
10498
10499
+ #if defined(TARGET_AMD64)
10500
+ // ------------------------------------------------------------------------
10501
+ // genPopCalleeSavedRegistersFromMaskAPX: pop specified set of callee saves
10502
+ // in the "standard" order using Pop2 when possible
10503
+ //
10504
+ // Arguments:
10505
+ // rsPopRegs - register mask of registers to pop
10506
+ //
10507
+ // Return Value:
10508
+ // The number of registers popped.
10509
+ //
10510
+ unsigned CodeGen::genPopCalleeSavedRegistersFromMaskAPX (regMaskTP rsPopRegs)
10511
+ {
10512
+ // This is not a funclet or an On-Stack Replacement.
10513
+ assert ((compiler->funCurrentFunc ()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts .IsOSR ());
10514
+ unsigned popCount = 0 ;
10515
+ // POP2 doesn't work for ESP.
10516
+ assert ((rsPopRegs & RBM_SPBASE) == 0 );
10517
+ regNumber alignReg = REG_NA;
10518
+ // We need to align the stack to 16 bytes to use push2/pop2.
10519
+ // If isFramePointerUsed() is true, we will pop the frame pointer and stack will be aligned.
10520
+ // Else, We need to issue a single pop after the last pop2 to align the stack.
10521
+ if (!isFramePointerUsed () && (rsPopRegs != RBM_NONE))
10522
+ {
10523
+ if ((rsPopRegs & RBM_FPBASE) != 0 )
10524
+ {
10525
+ alignReg = REG_EBP;
10526
+ rsPopRegs &= ~RBM_FPBASE;
10527
+ }
10528
+ else
10529
+ {
10530
+ alignReg = genFirstRegNumFromMaskAndToggle (rsPopRegs);
10531
+ }
10532
+ }
10533
+
10534
+
10535
+ // All registers to be restored as pushed to an ArrayStack
10536
+ ArrayStack<regNumber> regStack (compiler->getAllocator (CMK_Codegen));
10537
+ while (rsPopRegs != RBM_NONE)
10538
+ {
10539
+ regNumber reg = genFirstRegNumFromMaskAndToggle (rsPopRegs);
10540
+ regStack.Push (reg);
10541
+ }
10542
+
10543
+ int index = 0 ;
10544
+ if (regStack.Height () % 2 == 1 )
10545
+ {
10546
+ // We have an odd number of registers to pop, so we need to pop the last one
10547
+ // separately..
10548
+ regNumber reg = regStack.Bottom (index++);
10549
+ GetEmitter ()->emitIns_R (INS_pop, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10550
+ popCount++;
10551
+ }
10552
+
10553
+ while (index < (regStack.Height () - 1 ))
10554
+ {
10555
+ regNumber reg1 = regStack.Bottom (index++);
10556
+ regNumber reg2 = regStack.Bottom (index++);
10557
+ GetEmitter ()->emitIns_R_R (INS_pop2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10558
+ popCount += 2 ;
10559
+ }
10560
+ assert (regStack.Height () == index);
10561
+
10562
+ if (alignReg != REG_NA)
10563
+ {
10564
+ GetEmitter ()->emitIns_R (INS_pop, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10565
+ popCount++;
10566
+ }
10567
+
10568
+ return popCount;
10569
+ }
10570
+ #endif // defined(TARGET_AMD64)
10571
+
10419
10572
/* ****************************************************************************
10420
10573
*
10421
10574
* Generates code for a function epilog.
0 commit comments