@@ -496,13 +496,6 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
496
496
{
497
497
mask |= refPosition->registerAssignment ;
498
498
}
499
-
500
- #ifdef TARGET_ARM64
501
- if ((refPosition != nullptr ) && refPosition->isFirstRefPositionOfConsecutiveRegisters ())
502
- {
503
- mask |= LsraLimitFPSetForConsecutive;
504
- }
505
- #endif
506
499
}
507
500
508
501
return mask;
@@ -662,7 +655,9 @@ LinearScan::LinearScan(Compiler* theCompiler)
662
655
firstColdLoc = MaxLocation;
663
656
664
657
#ifdef DEBUG
665
- maxNodeLocation = 0 ;
658
+ maxNodeLocation = 0 ;
659
+ consecutiveRegistersLocation = 0 ;
660
+
666
661
activeRefPosition = nullptr ;
667
662
currBuildNode = nullptr ;
668
663
@@ -4901,6 +4896,24 @@ void LinearScan::allocateRegisters()
4901
4896
}
4902
4897
}
4903
4898
prevLocation = currentLocation;
4899
+ #ifdef TARGET_ARM64
4900
+
4901
+ #ifdef DEBUG
4902
+ if (hasConsecutiveRegister)
4903
+ {
4904
+ if (currentRefPosition.needsConsecutive )
4905
+ {
4906
+ // track all the refpositions around the location that is also
4907
+ // allocating consecutive registers.
4908
+ consecutiveRegistersLocation = currentLocation;
4909
+ }
4910
+ else if (consecutiveRegistersLocation < currentLocation)
4911
+ {
4912
+ consecutiveRegistersLocation = MinLocation;
4913
+ }
4914
+ }
4915
+ #endif // DEBUG
4916
+ #endif // TARGET_ARM64
4904
4917
4905
4918
// get previous refposition, then current refpos is the new previous
4906
4919
if (currentReferent != nullptr )
@@ -11683,49 +11696,53 @@ void LinearScan::RegisterSelection::try_SPILL_COST()
11683
11696
Interval* assignedInterval = spillCandidateRegRecord->assignedInterval ;
11684
11697
RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr ;
11685
11698
11686
- // Can and should the interval in this register be spilled for this one,
11687
- // if we don't find a better alternative?
11699
+ // Can and should the interval in this register be spilled for this one,
11700
+ // if we don't find a better alternative?
11688
11701
11702
+ weight_t currentSpillWeight = 0 ;
11689
11703
#ifdef TARGET_ARM64
11690
- if (assignedInterval == nullptr )
11691
- {
11692
- // Ideally we should not be seeing this candidate because it is not assigned to
11693
- // any interval. But based on that, we cannot determine if it is a good spill
11694
- // candidate or not. Skip processing it.
11695
- continue ;
11696
- }
11697
-
11698
11704
if ((recentRefPosition != nullptr ) && linearScan->isRefPositionActive (recentRefPosition, thisLocation) &&
11699
11705
(recentRefPosition->needsConsecutive ))
11700
11706
{
11701
11707
continue ;
11702
11708
}
11703
- #endif // TARGET_ARM64
11704
-
11705
- if ((linearScan->getNextIntervalRef (spillCandidateRegNum, regType) == thisLocation) &&
11706
- !assignedInterval->getNextRefPosition ()->RegOptional ())
11707
- {
11708
- continue ;
11709
- }
11710
- if (!linearScan->isSpillCandidate (currentInterval, refPosition, spillCandidateRegRecord))
11709
+ else if (assignedInterval != nullptr )
11710
+ #endif
11711
11711
{
11712
- continue ;
11713
- }
11712
+ if ((linearScan->getNextIntervalRef (spillCandidateRegNum, regType) == thisLocation) &&
11713
+ !assignedInterval->getNextRefPosition ()->RegOptional ())
11714
+ {
11715
+ continue ;
11716
+ }
11717
+ if (!linearScan->isSpillCandidate (currentInterval, refPosition, spillCandidateRegRecord))
11718
+ {
11719
+ continue ;
11720
+ }
11714
11721
11715
- weight_t currentSpillWeight = 0 ;
11716
- if ((recentRefPosition != nullptr ) &&
11717
- (recentRefPosition->RegOptional () && !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef ())))
11718
- {
11719
- // We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
11720
- // is not an actual ref. In those cases, we will reload in future (next) refPosition.
11721
- // For such cases, consider the spill cost of next refposition.
11722
- // See notes in "spillInterval()".
11723
- RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition ();
11724
- if (reloadRefPosition != nullptr )
11722
+ if ((recentRefPosition != nullptr ) && (recentRefPosition->RegOptional () &&
11723
+ !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef ())))
11725
11724
{
11726
- currentSpillWeight = linearScan->getWeight (reloadRefPosition);
11725
+ // We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
11726
+ // is not an actual ref. In those cases, we will reload in future (next) refPosition.
11727
+ // For such cases, consider the spill cost of next refposition.
11728
+ // See notes in "spillInterval()".
11729
+ RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition ();
11730
+ if (reloadRefPosition != nullptr )
11731
+ {
11732
+ currentSpillWeight = linearScan->getWeight (reloadRefPosition);
11733
+ }
11727
11734
}
11728
11735
}
11736
+ #ifdef TARGET_ARM64
11737
+ else
11738
+ {
11739
+ // Ideally we should not be seeing this candidate because it is not assigned to
11740
+ // any interval. But it is possible for certain scenarios. One of them is that
11741
+ // `refPosition` needs consecutive registers and we decided to pick a mix of free+busy
11742
+ // registers. This candidate is part of that set and is free and hence is not assigned
11743
+ // to any interval.
11744
+ }
11745
+ #endif // TARGET_ARM64
11729
11746
11730
11747
// Only consider spillCost if we were not able to calculate weight of reloadRefPosition.
11731
11748
if (currentSpillWeight == 0 )
@@ -11875,7 +11892,16 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT()
11875
11892
#ifdef DEBUG
11876
11893
// The assigned should be non-null, and should have a recentRefPosition, however since
11877
11894
// this is a heuristic, we don't want a fatal error, so we just assert (not noway_assert).
11878
- if (!hasAssignedInterval)
11895
+ if (!hasAssignedInterval
11896
+ #ifdef TARGET_ARM64
11897
+ // We could see a candidate that doesn't have assignedInterval because allocation is
11898
+ // happening for `refPosition` that needs consecutive registers and we decided to pick
11899
+ // a mix of free+busy registers. This candidate is part of that set and is free and hence
11900
+ // is not assigned to any interval.
11901
+
11902
+ && !refPosition->needsConsecutive
11903
+ #endif
11904
+ )
11879
11905
{
11880
11906
assert (!" Spill candidate has no assignedInterval recentRefPosition" );
11881
11907
}
@@ -11988,6 +12014,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
11988
12014
*registerScore = NONE;
11989
12015
#endif
11990
12016
12017
+ #ifdef TARGET_ARM64
12018
+ assert (!needsConsecutiveRegisters || refPosition->needsConsecutive );
12019
+ #endif
12020
+
11991
12021
reset (currentInterval, refPosition);
11992
12022
11993
12023
// process data-structures
@@ -12036,7 +12066,19 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12036
12066
}
12037
12067
12038
12068
#ifdef DEBUG
12039
- candidates = linearScan->stressLimitRegs (refPosition, candidates);
12069
+ #ifdef TARGET_ARM64
12070
+ if (!refPosition->needsConsecutive && (linearScan->consecutiveRegistersLocation == refPosition->nodeLocation ))
12071
+ {
12072
+ // If a method has consecutive registers and we are assigning to refPositions that are not part
12073
+ // of consecutive registers, but are live at same location, skip the limit stress for them, because
12074
+ // there are high chances that many registers are busy for consecutive requirements and we don't
12075
+ // have enough remaining for other refpositions (like operands).
12076
+ }
12077
+ else
12078
+ #endif
12079
+ {
12080
+ candidates = linearScan->stressLimitRegs (refPosition, candidates);
12081
+ }
12040
12082
#endif
12041
12083
assert (candidates != RBM_NONE);
12042
12084
@@ -12186,6 +12228,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12186
12228
}
12187
12229
}
12188
12230
12231
+ #ifdef DEBUG
12232
+ regMaskTP inUseOrBusyRegsMask = RBM_NONE;
12233
+ #endif
12234
+
12189
12235
// Eliminate candidates that are in-use or busy.
12190
12236
if (!found)
12191
12237
{
@@ -12195,6 +12241,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12195
12241
regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation ;
12196
12242
candidates &= ~busyRegs;
12197
12243
12244
+ #ifdef DEBUG
12245
+ inUseOrBusyRegsMask |= busyRegs;
12246
+ #endif
12247
+
12198
12248
// Also eliminate as busy any register with a conflicting fixed reference at this or
12199
12249
// the next location.
12200
12250
// Note that this will eliminate the fixedReg, if any, but we'll add it back below.
@@ -12210,6 +12260,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12210
12260
(refPosition->delayRegFree && (checkConflictLocation == (refPosition->nodeLocation + 1 ))))
12211
12261
{
12212
12262
candidates &= ~checkConflictBit;
12263
+ #ifdef DEBUG
12264
+ inUseOrBusyRegsMask |= checkConflictBit;
12265
+ #endif
12213
12266
}
12214
12267
}
12215
12268
candidates |= fixedRegMask;
@@ -12226,12 +12279,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12226
12279
prevRegBit = genRegMask (prevRegRec->regNum );
12227
12280
if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE))
12228
12281
{
12229
- #ifdef TARGET_ARM64
12230
- // If this is allocating for consecutive register, we need to make sure that
12231
- // we allocate register, whose consecutive registers are also free.
12232
12282
if (!needsConsecutiveRegisters)
12233
- #endif
12234
12283
{
12284
+ // If this is allocating for consecutive register, we need to make sure that
12285
+ // we allocate register, whose consecutive registers are also free.
12235
12286
candidates = prevRegBit;
12236
12287
found = true ;
12237
12288
#ifdef DEBUG
@@ -12245,13 +12296,6 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12245
12296
prevRegBit = RBM_NONE;
12246
12297
}
12247
12298
12248
- if (!found && (candidates == RBM_NONE))
12249
- {
12250
- assert (refPosition->RegOptional ());
12251
- currentInterval->assignedReg = nullptr ;
12252
- return RBM_NONE;
12253
- }
12254
-
12255
12299
// TODO-Cleanup: Previously, the "reverseSelect" stress mode reversed the order of the heuristics.
12256
12300
// It needs to be re-engineered with this refactoring.
12257
12301
// In non-debug builds, this will simply get optimized away
@@ -12260,9 +12304,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12260
12304
reverseSelect = linearScan->doReverseSelect ();
12261
12305
#endif // DEBUG
12262
12306
12263
- #ifdef TARGET_ARM64
12264
12307
if (needsConsecutiveRegisters)
12265
12308
{
12309
+ #ifdef TARGET_ARM64
12266
12310
regMaskTP busyConsecutiveCandidates = RBM_NONE;
12267
12311
if (refPosition->isFirstRefPositionOfConsecutiveRegisters ())
12268
12312
{
@@ -12287,12 +12331,46 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
12287
12331
12288
12332
if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE))
12289
12333
{
12290
- noway_assert (!" Not sufficient consecutive registers available." );
12334
+ #ifdef DEBUG
12335
+ // Need to make sure that candidates has N consecutive registers to assign
12336
+ if (linearScan->getStressLimitRegs () != LSRA_LIMIT_NONE)
12337
+ {
12338
+ // If the refPosition needs consecutive registers, then we want to make sure that
12339
+ // the candidates have atleast one range of N registers that are consecutive, where N
12340
+ // is the number of consecutive registers needed.
12341
+ // Remove the `inUseOrBusyRegsMask` from the original candidates list and find one
12342
+ // such range that is consecutive. Next, append that range to the `candidates`.
12343
+ //
12344
+ regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask;
12345
+ regMaskTP overallLimitCandidates;
12346
+ regMaskTP limitConsecutiveResult =
12347
+ linearScan->filterConsecutiveCandidates (limitCandidatesForConsecutive, refPosition->regCount ,
12348
+ &overallLimitCandidates);
12349
+ assert (limitConsecutiveResult != RBM_NONE);
12350
+
12351
+ unsigned startRegister = BitOperations::BitScanForward (limitConsecutiveResult);
12352
+
12353
+ regMaskTP registersNeededMask = (1ULL << refPosition->regCount ) - 1 ;
12354
+ candidates |= (registersNeededMask << startRegister);
12355
+ }
12356
+
12357
+ if (candidates == RBM_NONE)
12358
+ #endif // DEBUG
12359
+ {
12360
+ noway_assert (!" Not sufficient consecutive registers available." );
12361
+ }
12291
12362
}
12363
+ #endif // TARGET_ARM64
12292
12364
}
12293
12365
else
12294
- #endif // TARGET_ARM64
12295
12366
{
12367
+ if (!found && (candidates == RBM_NONE))
12368
+ {
12369
+ assert (refPosition->RegOptional ());
12370
+ currentInterval->assignedReg = nullptr ;
12371
+ return RBM_NONE;
12372
+ }
12373
+
12296
12374
freeCandidates = linearScan->getFreeCandidates (candidates ARM_ARG (regType));
12297
12375
}
12298
12376
0 commit comments