@@ -9429,6 +9429,15 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
9429
9429
theEmitter->emitIns_R_R_R (INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17);
9430
9430
theEmitter->emitIns_R_R_R (INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17);
9431
9431
9432
+ theEmitter->emitIns_R_R (INS_push2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9433
+ theEmitter->emitIns_R_R (INS_pop2, EA_PTRSIZE, REG_R16, REG_R17, INS_OPTS_EVEX_nd);
9434
+ theEmitter->emitIns_R_R (INS_push2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9435
+ theEmitter->emitIns_R_R (INS_pop2, EA_PTRSIZE, REG_R17, REG_R18, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
9436
+ theEmitter->emitIns_R (INS_push, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9437
+ theEmitter->emitIns_R (INS_pop, EA_PTRSIZE, REG_R11, INS_OPTS_APX_ppx);
9438
+ theEmitter->emitIns_R (INS_push, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9439
+ theEmitter->emitIns_R (INS_pop, EA_PTRSIZE, REG_R17, INS_OPTS_APX_ppx);
9440
+
9432
9441
theEmitter->emitIns_Mov (INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false );
9433
9442
theEmitter->emitIns_Mov (INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false );
9434
9443
theEmitter->emitIns_Mov (INS_movq, EA_8BYTE, REG_R16, REG_XMM0, false );
@@ -10284,7 +10293,6 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
10284
10293
osrAdditionalIntCalleeSaves &= ~regBit;
10285
10294
}
10286
10295
}
10287
-
10288
10296
#endif // TARGET_AMD64
10289
10297
10290
10298
// ------------------------------------------------------------------------
@@ -10334,6 +10342,69 @@ void CodeGen::genPushCalleeSavedRegisters()
10334
10342
}
10335
10343
#endif // DEBUG
10336
10344
10345
+ #ifdef TARGET_AMD64
10346
+ if (compiler->canUseApxEncoding () && compiler->canUseEvexEncoding () && JitConfig.EnableApxPPX ())
10347
+ {
10348
+ // This is not a funclet or an On-Stack Replacement.
10349
+ assert ((compiler->funCurrentFunc ()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts .IsOSR ());
10350
+ // PUSH2 doesn't work for ESP.
10351
+ assert ((rsPushRegs & RBM_SPBASE) == 0 );
10352
+ // We need to align the stack to 16 bytes to use push2/pop2.
10353
+ // The ABI requirement is that the stack must be 16B aligned at the point of a function call.
10354
+ // As soon as the CALL is executed, the stack is no longer 16B aligned.
10355
+ // To use PP2, the stack needs to be pre-aligned
10356
+ // If isFramePointerUsed() is true, we have already pushed the frame pointer and stack is aligned.
10357
+ // Else, We need to issue a single push to align the stack.
10358
+ if (!isFramePointerUsed () && (rsPushRegs != RBM_NONE))
10359
+ {
10360
+ if ((rsPushRegs & RBM_FPBASE) != 0 )
10361
+ {
10362
+ GetEmitter ()->emitIns_R (INS_push, EA_PTRSIZE, REG_EBP, INS_OPTS_APX_ppx);
10363
+ compiler->unwindPush (REG_EBP);
10364
+ rsPushRegs &= ~RBM_FPBASE;
10365
+ }
10366
+ else
10367
+ {
10368
+ regNumber alignReg = genFirstRegNumFromMaskAndToggle (rsPushRegs);
10369
+ GetEmitter ()->emitIns_R (INS_push, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10370
+ compiler->unwindPush (alignReg);
10371
+ }
10372
+ }
10373
+
10374
+ // Push backwards so we match the order we will pop them in the epilog
10375
+ // and all the other code that expects it to be in this order.
10376
+ // All registers to be saved as pushed to an ArrayStack
10377
+ ArrayStack<regNumber> regStack (compiler->getAllocator (CMK_Codegen));
10378
+ while (rsPushRegs != RBM_NONE)
10379
+ {
10380
+ regNumber reg = genFirstRegNumFromMaskAndToggle (rsPushRegs);
10381
+ regStack.Push (reg);
10382
+ }
10383
+
10384
+ // We need to push the registers in pairs.
10385
+ // In cases where we have an odd number of registers, we need to push the last one
10386
+ // separately at the end to maintain alignment for push2.
10387
+ while (regStack.Height () > 1 )
10388
+ {
10389
+ regNumber reg1 = regStack.Pop ();
10390
+ regNumber reg2 = regStack.Pop ();
10391
+
10392
+ GetEmitter ()->emitIns_R_R (INS_push2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10393
+ compiler->unwindPush (reg1);
10394
+ compiler->unwindPush (reg2);
10395
+ }
10396
+
10397
+ if (regStack.Height () == 1 )
10398
+ {
10399
+ regNumber reg = regStack.Pop ();
10400
+ GetEmitter ()->emitIns_R (INS_push, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10401
+ compiler->unwindPush (reg);
10402
+ }
10403
+ assert (regStack.Height () == 0 );
10404
+ return ;
10405
+ }
10406
+ #endif // TARGET_AMD64
10407
+
10337
10408
// Push backwards so we match the order we will pop them in the epilog
10338
10409
// and all the other code that expects it to be in this order.
10339
10410
for (regNumber reg = get_REG_INT_LAST (); rsPushRegs != RBM_NONE; reg = REG_PREV (reg))
@@ -10382,6 +10453,14 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
10382
10453
return ;
10383
10454
}
10384
10455
10456
+ if (compiler->canUseApxEncoding () && compiler->canUseEvexEncoding () && JitConfig.EnableApxPPX ())
10457
+ {
10458
+ regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask ();
10459
+ const unsigned popCount = genPopCalleeSavedRegistersFromMaskAPX (rsPopRegs);
10460
+ noway_assert (compiler->compCalleeRegsPushed == popCount);
10461
+ return ;
10462
+ }
10463
+
10385
10464
#endif // TARGET_AMD64
10386
10465
10387
10466
// Registers saved by a normal prolog
@@ -10391,6 +10470,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
10391
10470
noway_assert (compiler->compCalleeRegsPushed == popCount);
10392
10471
}
10393
10472
10473
+
10394
10474
// ------------------------------------------------------------------------
10395
10475
// genPopCalleeSavedRegistersFromMask: pop specified set of callee saves
10396
10476
// in the "standard" order
@@ -10457,6 +10537,79 @@ unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs)
10457
10537
return popCount;
10458
10538
}
10459
10539
10540
+ #if defined(TARGET_AMD64)
10541
+ // ------------------------------------------------------------------------
10542
+ // genPopCalleeSavedRegistersFromMaskAPX: pop specified set of callee saves
10543
+ // in the "standard" order using Pop2 when possible
10544
+ //
10545
+ // Arguments:
10546
+ // rsPopRegs - register mask of registers to pop
10547
+ //
10548
+ // Return Value:
10549
+ // The number of registers popped.
10550
+ //
10551
+ unsigned CodeGen::genPopCalleeSavedRegistersFromMaskAPX (regMaskTP rsPopRegs)
10552
+ {
10553
+ // This is not a funclet or an On-Stack Replacement.
10554
+ assert ((compiler->funCurrentFunc ()->funKind == FuncKind::FUNC_ROOT) && !compiler->opts .IsOSR ());
10555
+ unsigned popCount = 0 ;
10556
+ // POP2 doesn't work for ESP.
10557
+ assert ((rsPopRegs & RBM_SPBASE) == 0 );
10558
+ regNumber alignReg = REG_NA;
10559
+ // We need to align the stack to 16 bytes to use push2/pop2.
10560
+ // If isFramePointerUsed() is true, we will pop the frame pointer and stack will be aligned.
10561
+ // Else, We need to issue a single pop after the last pop2 to align the stack.
10562
+ if (!isFramePointerUsed () && (rsPopRegs != RBM_NONE))
10563
+ {
10564
+ if ((rsPopRegs & RBM_FPBASE) != 0 )
10565
+ {
10566
+ alignReg = REG_EBP;
10567
+ rsPopRegs &= ~RBM_FPBASE;
10568
+ }
10569
+ else
10570
+ {
10571
+ alignReg = genFirstRegNumFromMaskAndToggle (rsPopRegs);
10572
+ }
10573
+ }
10574
+
10575
+
10576
+ // All registers to be restored as pushed to an ArrayStack
10577
+ ArrayStack<regNumber> regStack (compiler->getAllocator (CMK_Codegen));
10578
+ while (rsPopRegs != RBM_NONE)
10579
+ {
10580
+ regNumber reg = genFirstRegNumFromMaskAndToggle (rsPopRegs);
10581
+ regStack.Push (reg);
10582
+ }
10583
+
10584
+ int index = 0 ;
10585
+ if (regStack.Height () % 2 == 1 )
10586
+ {
10587
+ // We have an odd number of registers to pop, so we need to pop the last one
10588
+ // separately..
10589
+ regNumber reg = regStack.Bottom (index++);
10590
+ GetEmitter ()->emitIns_R (INS_pop, EA_PTRSIZE, reg, INS_OPTS_APX_ppx);
10591
+ popCount++;
10592
+ }
10593
+
10594
+ while (index < (regStack.Height () - 1 ))
10595
+ {
10596
+ regNumber reg1 = regStack.Bottom (index++);
10597
+ regNumber reg2 = regStack.Bottom (index++);
10598
+ GetEmitter ()->emitIns_R_R (INS_pop2, EA_PTRSIZE, reg1, reg2, (insOpts) (INS_OPTS_EVEX_nd | INS_OPTS_APX_ppx));
10599
+ popCount += 2 ;
10600
+ }
10601
+ assert (regStack.Height () == index);
10602
+
10603
+ if (alignReg != REG_NA)
10604
+ {
10605
+ GetEmitter ()->emitIns_R (INS_pop, EA_PTRSIZE, alignReg, INS_OPTS_APX_ppx);
10606
+ popCount++;
10607
+ }
10608
+
10609
+ return popCount;
10610
+ }
10611
+ #endif // defined(TARGET_AMD64)
10612
+
10460
10613
/* ****************************************************************************
10461
10614
*
10462
10615
* Generates code for a function epilog.
0 commit comments