@@ -604,6 +604,47 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
604
604
llvm_unreachable (" Invalid TargetStackID::Value" );
605
605
}
606
606
607
+ // Activate all lanes, returns saved exec.
608
+ static Register buildScratchExecCopy (LivePhysRegs &LiveRegs,
609
+ MachineFunction &MF,
610
+ MachineBasicBlock &MBB,
611
+ MachineBasicBlock::iterator MBBI,
612
+ bool IsProlog) {
613
+ Register ScratchExecCopy;
614
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
615
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
616
+ const SIInstrInfo *TII = ST.getInstrInfo ();
617
+ const SIRegisterInfo &TRI = TII->getRegisterInfo ();
618
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
619
+ DebugLoc DL;
620
+
621
+ if (LiveRegs.empty ()) {
622
+ if (IsProlog) {
623
+ LiveRegs.init (TRI);
624
+ LiveRegs.addLiveIns (MBB);
625
+ if (FuncInfo->SGPRForFPSaveRestoreCopy )
626
+ LiveRegs.removeReg (FuncInfo->SGPRForFPSaveRestoreCopy );
627
+ } else {
628
+ // In epilog.
629
+ LiveRegs.init (*ST.getRegisterInfo ());
630
+ LiveRegs.addLiveOuts (MBB);
631
+ LiveRegs.stepBackward (*MBBI);
632
+ }
633
+ }
634
+
635
+ ScratchExecCopy = findScratchNonCalleeSaveRegister (
636
+ MRI, LiveRegs, *TRI.getWaveMaskRegClass ());
637
+
638
+ if (!IsProlog)
639
+ LiveRegs.removeReg (ScratchExecCopy);
640
+
641
+ const unsigned OrSaveExec =
642
+ ST.isWave32 () ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
643
+ BuildMI (MBB, MBBI, DL, TII->get (OrSaveExec), ScratchExecCopy).addImm (-1 );
644
+
645
+ return ScratchExecCopy;
646
+ }
647
+
607
648
void SIFrameLowering::emitPrologueEntryCFI (MachineBasicBlock &MBB,
608
649
MachineBasicBlock::iterator MBBI,
609
650
const DebugLoc &DL) const {
@@ -684,11 +725,24 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
684
725
685
726
emitPrologueEntryCFI (MBB, MBBI, DL);
686
727
728
+ bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex .hasValue ();
729
+ bool SpillFPToMemory = false ;
730
+ // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
731
+ // Otherwise we are spilling the FP to memory.
732
+ if (HasFPSaveIndex) {
733
+ SpillFPToMemory = MFI.getStackID (*FuncInfo->FramePointerSaveIndex ) !=
734
+ TargetStackID::SGPRSpill;
735
+ }
736
+
687
737
// Emit the copy if we need an FP, and are using a free SGPR to save it.
688
738
if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
689
739
BuildMI (MBB, MBBI, DL, TII->get (AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy )
690
740
.addReg (FramePtrReg)
691
741
.setMIFlag (MachineInstr::FrameSetup);
742
+ // Make the register live throughout the function.
743
+ for (MachineBasicBlock &MBB : MF)
744
+ MBB.addLiveIn (FuncInfo->SGPRForFPSaveRestoreCopy );
745
+
692
746
buildCFI (
693
747
MBB, MBBI, DL,
694
748
MCCFIInstruction::createRegister (
@@ -701,25 +755,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
701
755
if (!Reg.FI .hasValue ())
702
756
continue ;
703
757
704
- if (ScratchExecCopy == AMDGPU::NoRegister) {
705
- if (LiveRegs.empty ()) {
706
- LiveRegs.init (TRI);
707
- LiveRegs.addLiveIns (MBB);
708
- if (FuncInfo->SGPRForFPSaveRestoreCopy )
709
- LiveRegs.removeReg (FuncInfo->SGPRForFPSaveRestoreCopy );
710
- }
711
-
712
- ScratchExecCopy
713
- = findScratchNonCalleeSaveRegister (MRI, LiveRegs,
714
- *TRI.getWaveMaskRegClass ());
715
- assert (FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
716
-
717
- const unsigned OrSaveExec = ST.isWave32 () ?
718
- AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
719
- BuildMI (MBB, MBBI, DL, TII->get (OrSaveExec),
720
- ScratchExecCopy)
721
- .addImm (-1 );
722
- }
758
+ if (!ScratchExecCopy)
759
+ ScratchExecCopy = buildScratchExecCopy (LiveRegs, MF, MBB, MBBI, true );
723
760
724
761
int FI = Reg.FI .getValue ();
725
762
@@ -733,6 +770,29 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
733
770
MFI.getObjectOffset (FI) * ST.getWavefrontSize ()));
734
771
}
735
772
773
+ if (HasFPSaveIndex && SpillFPToMemory) {
774
+ const int FI = FuncInfo->FramePointerSaveIndex .getValue ();
775
+ assert (!MFI.isDeadObjectIndex (FI));
776
+
777
+ if (!ScratchExecCopy)
778
+ ScratchExecCopy = buildScratchExecCopy (LiveRegs, MF, MBB, MBBI, true );
779
+
780
+ MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister (
781
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
782
+
783
+ BuildMI (MBB, MBBI, DL, TII->get (AMDGPU::V_MOV_B32_e32), TmpVGPR)
784
+ .addReg (FramePtrReg);
785
+
786
+ buildPrologSpill (LiveRegs, MBB, MBBI, TII, TmpVGPR,
787
+ FuncInfo->getScratchRSrcReg (), StackPtrReg,
788
+ FuncInfo->FramePointerSaveIndex .getValue ());
789
+
790
+ buildCFI (MBB, MBBI, DL,
791
+ MCCFIInstruction::createOffset (
792
+ nullptr , MCRI->getDwarfRegNum (FramePtrReg, false ),
793
+ MFI.getObjectOffset (FI) * ST.getWavefrontSize ()));
794
+ }
795
+
736
796
if (ScratchExecCopy) {
737
797
// FIXME: Split block and make terminator.
738
798
unsigned ExecMov = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -780,12 +840,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
780
840
buildCFIForSGPRToVGPRSpill (MBB, MBBI, DL, AMDGPU::EXEC, EXECSpill);
781
841
}
782
842
783
- if (FuncInfo->FramePointerSaveIndex ) {
843
+ // In this case, spill the FP to a reserved VGPR.
844
+ if (HasFPSaveIndex && !SpillFPToMemory) {
784
845
const int FI = FuncInfo->FramePointerSaveIndex .getValue ();
785
- assert (!MFI.isDeadObjectIndex (FI) &&
786
- MFI.getStackID (FI) == TargetStackID::SGPRSpill);
787
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
788
- = FuncInfo->getSGPRToVGPRSpills (FI);
846
+ assert (!MFI.isDeadObjectIndex (FI));
847
+
848
+ assert (MFI.getStackID (FI) == TargetStackID::SGPRSpill);
849
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
850
+ FuncInfo->getSGPRToVGPRSpills (FI);
789
851
assert (Spill.size () == 1 );
790
852
791
853
// Save FP before setting it up.
@@ -880,8 +942,14 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
880
942
const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg ();
881
943
const Register FramePtrReg = FuncInfo->getFrameOffsetReg ();
882
944
945
+ bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex .hasValue ();
946
+ bool SpillFPToMemory = false ;
947
+ if (HasFPSaveIndex) {
948
+ SpillFPToMemory = MFI.getStackID (*FuncInfo->FramePointerSaveIndex ) !=
949
+ TargetStackID::SGPRSpill;
950
+ }
951
+
883
952
if (RoundedSize != 0 && hasFP (MF)) {
884
- const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg ();
885
953
BuildMI (MBB, MBBI, DL, TII->get (AMDGPU::S_SUB_U32), StackPtrReg)
886
954
.addReg (StackPtrReg)
887
955
.addImm (RoundedSize * ST.getWavefrontSize ())
@@ -894,19 +962,31 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
894
962
.setMIFlag (MachineInstr::FrameSetup);
895
963
}
896
964
897
- if (FuncInfo->FramePointerSaveIndex ) {
965
+ Register ScratchExecCopy;
966
+ if (HasFPSaveIndex) {
898
967
const int FI = FuncInfo->FramePointerSaveIndex .getValue ();
899
-
900
- assert (!MF.getFrameInfo ().isDeadObjectIndex (FI) &&
901
- MF.getFrameInfo ().getStackID (FI) == TargetStackID::SGPRSpill);
902
-
903
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
904
- = FuncInfo->getSGPRToVGPRSpills (FI);
905
- assert (Spill.size () == 1 );
906
- BuildMI (MBB, MBBI, DL, TII->getMCOpcodeFromPseudo (AMDGPU::V_READLANE_B32),
907
- FramePtrReg)
908
- .addReg (Spill[0 ].VGPR )
909
- .addImm (Spill[0 ].Lane );
968
+ assert (!MFI.isDeadObjectIndex (FI));
969
+ if (SpillFPToMemory) {
970
+ if (!ScratchExecCopy)
971
+ ScratchExecCopy = buildScratchExecCopy (LiveRegs, MF, MBB, MBBI, false );
972
+
973
+ MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister (
974
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
975
+ buildEpilogReload (LiveRegs, MBB, MBBI, TII, TempVGPR,
976
+ FuncInfo->getScratchRSrcReg (), StackPtrReg, FI);
977
+ BuildMI (MBB, MBBI, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
978
+ .addReg (TempVGPR, RegState::Kill);
979
+ } else {
980
+ // Reload from VGPR spill.
981
+ assert (MFI.getStackID (FI) == TargetStackID::SGPRSpill);
982
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
983
+ FuncInfo->getSGPRToVGPRSpills (FI);
984
+ assert (Spill.size () == 1 );
985
+ BuildMI (MBB, MBBI, DL, TII->getMCOpcodeFromPseudo (AMDGPU::V_READLANE_B32),
986
+ FramePtrReg)
987
+ .addReg (Spill[0 ].VGPR )
988
+ .addImm (Spill[0 ].Lane );
989
+ }
910
990
}
911
991
912
992
if (hasFP (MF)) {
@@ -915,31 +995,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
915
995
nullptr , MCRI->getDwarfRegNum (StackPtrReg, false )));
916
996
}
917
997
918
- unsigned ScratchExecCopy = AMDGPU::NoRegister;
919
998
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
920
999
: FuncInfo->getSGPRSpillVGPRs ()) {
921
1000
if (!Reg.FI .hasValue ())
922
1001
continue ;
923
1002
924
- const SIRegisterInfo &TRI = TII->getRegisterInfo ();
925
- if (!ScratchExecCopy) {
926
- // See emitPrologue
927
- if (LiveRegs.empty ()) {
928
- LiveRegs.init (*ST.getRegisterInfo ());
929
- LiveRegs.addLiveOuts (MBB);
930
- LiveRegs.stepBackward (*MBBI);
931
- }
932
-
933
- ScratchExecCopy = findScratchNonCalleeSaveRegister (
934
- MRI, LiveRegs, *TRI.getWaveMaskRegClass ());
935
- LiveRegs.removeReg (ScratchExecCopy);
936
-
937
- const unsigned OrSaveExec =
938
- ST.isWave32 () ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
939
-
940
- BuildMI (MBB, MBBI, DL, TII->get (OrSaveExec), ScratchExecCopy)
941
- .addImm (-1 );
942
- }
1003
+ if (!ScratchExecCopy)
1004
+ ScratchExecCopy = buildScratchExecCopy (LiveRegs, MF, MBB, MBBI, false );
943
1005
944
1006
buildEpilogReload (LiveRegs, MBB, MBBI, TII, Reg.VGPR ,
945
1007
FuncInfo->getScratchRSrcReg (),
@@ -1045,7 +1107,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1045
1107
if (MFI->isEntryFunction ())
1046
1108
return ;
1047
1109
1048
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo ();
1110
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo ();
1049
1111
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
1050
1112
const SIRegisterInfo *TRI = ST.getRegisterInfo ();
1051
1113
@@ -1080,12 +1142,14 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1080
1142
if (!HasFP)
1081
1143
return ;
1082
1144
1145
+ // We need to save and restore the current FP.
1146
+
1147
+ // 1: If there is already a VGPR with free lanes, use it. We
1148
+ // may already have to pay the penalty for spilling a CSR VGPR.
1083
1149
if (MFI->haveFreeLanesForSGPRSpill (MF, 1 )) {
1084
1150
int NewFI = MF.getFrameInfo ().CreateStackObject (4 , 4 , true , nullptr ,
1085
1151
TargetStackID::SGPRSpill);
1086
1152
1087
- // If there is already a VGPR with free lanes, use it. We may already have
1088
- // to pay the penalty for spilling a CSR VGPR.
1089
1153
if (!MFI->allocateSGPRSpillToVGPR (MF, NewFI))
1090
1154
llvm_unreachable (" allocate SGPR spill should have worked" );
1091
1155
@@ -1098,16 +1162,22 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1098
1162
return ;
1099
1163
}
1100
1164
1165
+ // 2: Next, try to save the FP in an unused SGPR.
1101
1166
MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved (MF.getRegInfo ());
1102
1167
1103
1168
if (!MFI->SGPRForFPSaveRestoreCopy ) {
1104
- // There's no free lane to spill, and no free register to save FP, so we're
1105
- // forced to spill another VGPR to use for the spill.
1106
1169
int NewFI = MF.getFrameInfo ().CreateStackObject (4 , 4 , true , nullptr ,
1107
1170
TargetStackID::SGPRSpill);
1108
- if (!MFI->allocateSGPRSpillToVGPR (MF, NewFI))
1109
- llvm_unreachable (" allocate SGPR spill should have worked" );
1110
- MFI->FramePointerSaveIndex = NewFI;
1171
+
1172
+ if (MFI->allocateSGPRSpillToVGPR (MF, NewFI)) {
1173
+ // 3: There's no free lane to spill, and no free register to save FP, so
1174
+ // we're forced to spill another VGPR to use for the spill.
1175
+ MFI->FramePointerSaveIndex = NewFI;
1176
+ } else {
1177
+ // 4: If all else fails, spill the FP to memory.
1178
+ MFI->FramePointerSaveIndex =
1179
+ FrameInfo.CreateSpillStackObject (4 , Align (4 ));
1180
+ }
1111
1181
1112
1182
LLVM_DEBUG (
1113
1183
auto Spill = MFI->getSGPRToVGPRSpills (NewFI).front ();
0 commit comments