Skip to content

Commit 7a57f6d

Browse files
authored
Update handling of limited register during consecutive registers allocation (#84588)
* Fix issue to handle candidates that don't have assignedInterval in spillcost/prevReg * Do not let limitRegs reduce the number of candidates If we find out that there are no candidates free/busy for refPositions that need consecutive registers, have at least one range of registers in the candidates such that allocation is possible. * getConsecutiveCandidates update if freeCandidate=RBM_NONE Intially, we were just returning RBM_NONE if we don't find any freeCandidates, but instead should try if we can find out if there are any busy candidates that we should try them out. * Relax limitStressRegs for refpositions live at consecutive register position If consecutive registers are being allocated, other refpositions that are live at the same location might not have enough registers left to be assigned because all registers are busy. As such, introduce a way to track if we are assigning at the location of consecutive registers, and if yes, do not take jitstressregs limit into account. * Update minRegCount for registerAssignment For consecutive register, also include the register count needed for "minimum register requirement" when limiting the registers. * Remove LsraLimitFPSetForConsecutive With other conditions in place, no need to have LsraLimitFPSetForConsecutive. * Added an assert * misc changes * jit format * Use BitOperations::BitScanForward() * review feedback
1 parent a6a5dfb commit 7a57f6d

File tree

4 files changed

+201
-125
lines changed

4 files changed

+201
-125
lines changed

src/coreclr/jit/lsra.cpp

Lines changed: 133 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -496,13 +496,6 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
496496
{
497497
mask |= refPosition->registerAssignment;
498498
}
499-
500-
#ifdef TARGET_ARM64
501-
if ((refPosition != nullptr) && refPosition->isFirstRefPositionOfConsecutiveRegisters())
502-
{
503-
mask |= LsraLimitFPSetForConsecutive;
504-
}
505-
#endif
506499
}
507500

508501
return mask;
@@ -662,7 +655,9 @@ LinearScan::LinearScan(Compiler* theCompiler)
662655
firstColdLoc = MaxLocation;
663656

664657
#ifdef DEBUG
665-
maxNodeLocation = 0;
658+
maxNodeLocation = 0;
659+
consecutiveRegistersLocation = 0;
660+
666661
activeRefPosition = nullptr;
667662
currBuildNode = nullptr;
668663

@@ -4901,6 +4896,24 @@ void LinearScan::allocateRegisters()
49014896
}
49024897
}
49034898
prevLocation = currentLocation;
4899+
#ifdef TARGET_ARM64
4900+
4901+
#ifdef DEBUG
4902+
if (hasConsecutiveRegister)
4903+
{
4904+
if (currentRefPosition.needsConsecutive)
4905+
{
4906+
// track all the refpositions around the location that is also
4907+
// allocating consecutive registers.
4908+
consecutiveRegistersLocation = currentLocation;
4909+
}
4910+
else if (consecutiveRegistersLocation < currentLocation)
4911+
{
4912+
consecutiveRegistersLocation = MinLocation;
4913+
}
4914+
}
4915+
#endif // DEBUG
4916+
#endif // TARGET_ARM64
49044917

49054918
// get previous refposition, then current refpos is the new previous
49064919
if (currentReferent != nullptr)
@@ -11683,49 +11696,53 @@ void LinearScan::RegisterSelection::try_SPILL_COST()
1168311696
Interval* assignedInterval = spillCandidateRegRecord->assignedInterval;
1168411697
RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr;
1168511698

11686-
// Can and should the interval in this register be spilled for this one,
11687-
// if we don't find a better alternative?
11699+
// Can and should the interval in this register be spilled for this one,
11700+
// if we don't find a better alternative?
1168811701

11702+
weight_t currentSpillWeight = 0;
1168911703
#ifdef TARGET_ARM64
11690-
if (assignedInterval == nullptr)
11691-
{
11692-
// Ideally we should not be seeing this candidate because it is not assigned to
11693-
// any interval. But based on that, we cannot determine if it is a good spill
11694-
// candidate or not. Skip processing it.
11695-
continue;
11696-
}
11697-
1169811704
if ((recentRefPosition != nullptr) && linearScan->isRefPositionActive(recentRefPosition, thisLocation) &&
1169911705
(recentRefPosition->needsConsecutive))
1170011706
{
1170111707
continue;
1170211708
}
11703-
#endif // TARGET_ARM64
11704-
11705-
if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) &&
11706-
!assignedInterval->getNextRefPosition()->RegOptional())
11707-
{
11708-
continue;
11709-
}
11710-
if (!linearScan->isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord))
11709+
else if (assignedInterval != nullptr)
11710+
#endif
1171111711
{
11712-
continue;
11713-
}
11712+
if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) &&
11713+
!assignedInterval->getNextRefPosition()->RegOptional())
11714+
{
11715+
continue;
11716+
}
11717+
if (!linearScan->isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord))
11718+
{
11719+
continue;
11720+
}
1171411721

11715-
weight_t currentSpillWeight = 0;
11716-
if ((recentRefPosition != nullptr) &&
11717-
(recentRefPosition->RegOptional() && !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef())))
11718-
{
11719-
// We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
11720-
// is not an actual ref. In those cases, we will reload in future (next) refPosition.
11721-
// For such cases, consider the spill cost of next refposition.
11722-
// See notes in "spillInterval()".
11723-
RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition();
11724-
if (reloadRefPosition != nullptr)
11722+
if ((recentRefPosition != nullptr) && (recentRefPosition->RegOptional() &&
11723+
!(assignedInterval->isLocalVar && recentRefPosition->IsActualRef())))
1172511724
{
11726-
currentSpillWeight = linearScan->getWeight(reloadRefPosition);
11725+
// We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
11726+
// is not an actual ref. In those cases, we will reload in future (next) refPosition.
11727+
// For such cases, consider the spill cost of next refposition.
11728+
// See notes in "spillInterval()".
11729+
RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition();
11730+
if (reloadRefPosition != nullptr)
11731+
{
11732+
currentSpillWeight = linearScan->getWeight(reloadRefPosition);
11733+
}
1172711734
}
1172811735
}
11736+
#ifdef TARGET_ARM64
11737+
else
11738+
{
11739+
// Ideally we should not be seeing this candidate because it is not assigned to
11740+
// any interval. But it is possible for certain scenarios. One of them is that
11741+
// `refPosition` needs consecutive registers and we decided to pick a mix of free+busy
11742+
// registers. This candidate is part of that set and is free and hence is not assigned
11743+
// to any interval.
11744+
}
11745+
#endif // TARGET_ARM64
1172911746

1173011747
// Only consider spillCost if we were not able to calculate weight of reloadRefPosition.
1173111748
if (currentSpillWeight == 0)
@@ -11875,7 +11892,16 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT()
1187511892
#ifdef DEBUG
1187611893
// The assigned should be non-null, and should have a recentRefPosition, however since
1187711894
// this is a heuristic, we don't want a fatal error, so we just assert (not noway_assert).
11878-
if (!hasAssignedInterval)
11895+
if (!hasAssignedInterval
11896+
#ifdef TARGET_ARM64
11897+
// We could see a candidate that doesn't have assignedInterval because allocation is
11898+
// happening for `refPosition` that needs consecutive registers and we decided to pick
11899+
// a mix of free+busy registers. This candidate is part of that set and is free and hence
11900+
// is not assigned to any interval.
11901+
11902+
&& !refPosition->needsConsecutive
11903+
#endif
11904+
)
1187911905
{
1188011906
assert(!"Spill candidate has no assignedInterval recentRefPosition");
1188111907
}
@@ -11988,6 +12014,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1198812014
*registerScore = NONE;
1198912015
#endif
1199012016

12017+
#ifdef TARGET_ARM64
12018+
assert(!needsConsecutiveRegisters || refPosition->needsConsecutive);
12019+
#endif
12020+
1199112021
reset(currentInterval, refPosition);
1199212022

1199312023
// process data-structures
@@ -12036,7 +12066,19 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1203612066
}
1203712067

1203812068
#ifdef DEBUG
12039-
candidates = linearScan->stressLimitRegs(refPosition, candidates);
12069+
#ifdef TARGET_ARM64
12070+
if (!refPosition->needsConsecutive && (linearScan->consecutiveRegistersLocation == refPosition->nodeLocation))
12071+
{
12072+
// If a method has consecutive registers and we are assigning to refPositions that are not part
12073+
// of consecutive registers, but are live at same location, skip the limit stress for them, because
12074+
// there are high chances that many registers are busy for consecutive requirements and we don't
12075+
// have enough remaining for other refpositions (like operands).
12076+
}
12077+
else
12078+
#endif
12079+
{
12080+
candidates = linearScan->stressLimitRegs(refPosition, candidates);
12081+
}
1204012082
#endif
1204112083
assert(candidates != RBM_NONE);
1204212084

@@ -12186,6 +12228,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1218612228
}
1218712229
}
1218812230

12231+
#ifdef DEBUG
12232+
regMaskTP inUseOrBusyRegsMask = RBM_NONE;
12233+
#endif
12234+
1218912235
// Eliminate candidates that are in-use or busy.
1219012236
if (!found)
1219112237
{
@@ -12195,6 +12241,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1219512241
regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation;
1219612242
candidates &= ~busyRegs;
1219712243

12244+
#ifdef DEBUG
12245+
inUseOrBusyRegsMask |= busyRegs;
12246+
#endif
12247+
1219812248
// Also eliminate as busy any register with a conflicting fixed reference at this or
1219912249
// the next location.
1220012250
// Note that this will eliminate the fixedReg, if any, but we'll add it back below.
@@ -12210,6 +12260,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1221012260
(refPosition->delayRegFree && (checkConflictLocation == (refPosition->nodeLocation + 1))))
1221112261
{
1221212262
candidates &= ~checkConflictBit;
12263+
#ifdef DEBUG
12264+
inUseOrBusyRegsMask |= checkConflictBit;
12265+
#endif
1221312266
}
1221412267
}
1221512268
candidates |= fixedRegMask;
@@ -12226,12 +12279,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1222612279
prevRegBit = genRegMask(prevRegRec->regNum);
1222712280
if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE))
1222812281
{
12229-
#ifdef TARGET_ARM64
12230-
// If this is allocating for consecutive register, we need to make sure that
12231-
// we allocate register, whose consecutive registers are also free.
1223212282
if (!needsConsecutiveRegisters)
12233-
#endif
1223412283
{
12284+
// If this is allocating for consecutive register, we need to make sure that
12285+
// we allocate register, whose consecutive registers are also free.
1223512286
candidates = prevRegBit;
1223612287
found = true;
1223712288
#ifdef DEBUG
@@ -12245,13 +12296,6 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1224512296
prevRegBit = RBM_NONE;
1224612297
}
1224712298

12248-
if (!found && (candidates == RBM_NONE))
12249-
{
12250-
assert(refPosition->RegOptional());
12251-
currentInterval->assignedReg = nullptr;
12252-
return RBM_NONE;
12253-
}
12254-
1225512299
// TODO-Cleanup: Previously, the "reverseSelect" stress mode reversed the order of the heuristics.
1225612300
// It needs to be re-engineered with this refactoring.
1225712301
// In non-debug builds, this will simply get optimized away
@@ -12260,9 +12304,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1226012304
reverseSelect = linearScan->doReverseSelect();
1226112305
#endif // DEBUG
1226212306

12263-
#ifdef TARGET_ARM64
1226412307
if (needsConsecutiveRegisters)
1226512308
{
12309+
#ifdef TARGET_ARM64
1226612310
regMaskTP busyConsecutiveCandidates = RBM_NONE;
1226712311
if (refPosition->isFirstRefPositionOfConsecutiveRegisters())
1226812312
{
@@ -12287,12 +12331,46 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
1228712331

1228812332
if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE))
1228912333
{
12290-
noway_assert(!"Not sufficient consecutive registers available.");
12334+
#ifdef DEBUG
12335+
// Need to make sure that candidates has N consecutive registers to assign
12336+
if (linearScan->getStressLimitRegs() != LSRA_LIMIT_NONE)
12337+
{
12338+
// If the refPosition needs consecutive registers, then we want to make sure that
12339+
// the candidates have atleast one range of N registers that are consecutive, where N
12340+
// is the number of consecutive registers needed.
12341+
// Remove the `inUseOrBusyRegsMask` from the original candidates list and find one
12342+
// such range that is consecutive. Next, append that range to the `candidates`.
12343+
//
12344+
regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask;
12345+
regMaskTP overallLimitCandidates;
12346+
regMaskTP limitConsecutiveResult =
12347+
linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount,
12348+
&overallLimitCandidates);
12349+
assert(limitConsecutiveResult != RBM_NONE);
12350+
12351+
unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult);
12352+
12353+
regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1;
12354+
candidates |= (registersNeededMask << startRegister);
12355+
}
12356+
12357+
if (candidates == RBM_NONE)
12358+
#endif // DEBUG
12359+
{
12360+
noway_assert(!"Not sufficient consecutive registers available.");
12361+
}
1229112362
}
12363+
#endif // TARGET_ARM64
1229212364
}
1229312365
else
12294-
#endif // TARGET_ARM64
1229512366
{
12367+
if (!found && (candidates == RBM_NONE))
12368+
{
12369+
assert(refPosition->RegOptional());
12370+
currentInterval->assignedReg = nullptr;
12371+
return RBM_NONE;
12372+
}
12373+
1229612374
freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType));
1229712375
}
1229812376

src/coreclr/jit/lsra.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -792,9 +792,6 @@ class LinearScan : public LinearScanInterface
792792
#elif defined(TARGET_ARM64)
793793
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
794794
static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
795-
// LsraLimitFPSetForConsecutive is used for stress mode and gives few extra registers to satisfy
796-
// the requirements for allocating consecutive registers.
797-
static const regMaskTP LsraLimitFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7);
798795
#elif defined(TARGET_X86)
799796
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
800797
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
@@ -2006,9 +2003,13 @@ class LinearScan : public LinearScanInterface
20062003
int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount);
20072004
#ifdef TARGET_ARM64
20082005
int BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr);
2009-
#endif
2006+
#endif // TARGET_ARM64
20102007
#endif // FEATURE_HW_INTRINSICS
20112008

2009+
#ifdef DEBUG
2010+
LsraLocation consecutiveRegistersLocation;
2011+
#endif // DEBUG
2012+
20122013
int BuildPutArgStk(GenTreePutArgStk* argNode);
20132014
#if FEATURE_ARG_SPLIT
20142015
int BuildPutArgSplit(GenTreePutArgSplit* tree);

0 commit comments

Comments
 (0)