Skip to content

Commit ddda6b9

Browse files
amanasifkhalideduardo-vp
authored andcommitted
JIT: Do greedy 4-opt for backward jumps in 3-opt layout (dotnet#110277)
Part of dotnet#107749. Follow-up to dotnet#103450. Greedy 3-opt (i.e. an implementation that requires each move to be profitable on its own) is not well-suited for discovering profitable moves for backward jumps, as such movement requires an unrelated move to first place the source block lexically behind the destination block. Thus, the 3-opt implementation added in dotnet#103450 incorporates a 4-opt move for backward jumps, where we partition 1) before the destination block, 2) before the source block, and 3) directly after the source block. This 4-opt implementation can be expanded to search for the best cut point between the destination and source blocks to maximize its efficacy.
1 parent df6db4a commit ddda6b9

File tree

2 files changed

+102
-43
lines changed

2 files changed

+102
-43
lines changed

src/coreclr/jit/compiler.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -6233,7 +6233,8 @@ class Compiler
62336233
#endif // DEBUG
62346234

62356235
weight_t GetCost(BasicBlock* block, BasicBlock* next);
6236-
bool TrySwappingPartitions(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End);
6236+
weight_t GetPartitionCostDelta(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End);
6237+
void SwapPartitions(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End);
62376238

62386239
void ConsiderEdge(FlowEdge* edge);
62396240
void AddNonFallthroughSuccs(unsigned blockPos);

src/coreclr/jit/fgopt.cpp

+100-42
Original file line numberDiff line numberDiff line change
@@ -4945,6 +4945,9 @@ weight_t Compiler::ThreeOptLayout::GetLayoutCost(unsigned startPos, unsigned end
49454945
// block - The block to consider creating fallthrough from
49464946
// next - The block to consider creating fallthrough into
49474947
//
4948+
// Returns:
4949+
// The cost
4950+
//
49484951
weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next)
49494952
{
49504953
assert(block != nullptr);
@@ -4964,8 +4967,8 @@ weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next)
49644967
}
49654968

49664969
//-----------------------------------------------------------------------------
4967-
// Compiler::ThreeOptLayout::TrySwappingPartitions: Evaluates the cost of swapping the given partitions.
4968-
// If it is profitable, write the swapped partitions back to 'blockOrder'.
4970+
// Compiler::ThreeOptLayout::GetPartitionCostDelta: Computes the current cost of the given partitions,
4971+
// and the cost of swapping S2 and S3, returning the difference between them.
49694972
//
49704973
// Parameters:
49714974
// s1Start - The starting position of the first partition
@@ -4975,24 +4978,10 @@ weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next)
49754978
// s4End - The ending position (inclusive) of the fourth partition
49764979
//
49774980
// Returns:
4978-
// True if the swap was performed, false otherwise
4979-
//
4980-
// Notes:
4981-
// Here is the proposed partition:
4982-
// S1: s1Start ~ s2Start-1
4983-
// S2: s2Start ~ s3Start-1
4984-
// S3: s3Start ~ s3End
4985-
// S4: remaining blocks
4986-
//
4987-
// After the swap:
4988-
// S1: s1Start ~ s2Start-1
4989-
// S3: s3Start ~ s3End
4990-
// S2: s2Start ~ s3Start-1
4991-
// S4: remaining blocks
4981+
// The difference in cost between the current and proposed layouts.
4982+
// A negative delta indicates the proposed layout is an improvement.
49924983
//
4993-
// If 's3End' and 's4End' are the same, the fourth partition doesn't exist.
4994-
//
4995-
bool Compiler::ThreeOptLayout::TrySwappingPartitions(
4984+
weight_t Compiler::ThreeOptLayout::GetPartitionCostDelta(
49964985
unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End)
49974986
{
49984987
BasicBlock* const s2Block = blockOrder[s2Start];
@@ -5019,16 +5008,38 @@ bool Compiler::ThreeOptLayout::TrySwappingPartitions(
50195008
newCost += s3BlockPrev->bbWeight;
50205009
}
50215010

5022-
// Check if the swap is profitable
5023-
if ((newCost >= currCost) || Compiler::fgProfileWeightsEqual(newCost, currCost, 0.001))
5024-
{
5025-
return false;
5026-
}
5011+
return newCost - currCost;
5012+
}
50275013

5028-
// We've found a profitable cut point. Continue with the swap.
5029-
JITDUMP("Swapping partitions [" FMT_BB ", " FMT_BB "] and [" FMT_BB ", " FMT_BB
5030-
"] (current partition cost = %f, new partition cost = %f)\n",
5031-
s2Block->bbNum, s3BlockPrev->bbNum, s3Block->bbNum, lastBlock->bbNum, currCost, newCost);
5014+
//-----------------------------------------------------------------------------
5015+
// Compiler::ThreeOptLayout::SwapPartitions: Swap the specified partitions.
5016+
// It is assumed (and asserted) that the swap is profitable.
5017+
//
5018+
// Parameters:
5019+
// s1Start - The starting position of the first partition
5020+
// s2Start - The starting position of the second partition
5021+
// s3Start - The starting position of the third partition
5022+
// s3End - The ending position (inclusive) of the third partition
5023+
// s4End - The ending position (inclusive) of the fourth partition
5024+
//
5025+
// Notes:
5026+
// Here is the proposed partition:
5027+
// S1: s1Start ~ s2Start-1
5028+
// S2: s2Start ~ s3Start-1
5029+
// S3: s3Start ~ s3End
5030+
// S4: remaining blocks
5031+
//
5032+
// After the swap:
5033+
// S1: s1Start ~ s2Start-1
5034+
// S3: s3Start ~ s3End
5035+
// S2: s2Start ~ s3Start-1
5036+
// S4: remaining blocks
5037+
//
5038+
// If 's3End' and 's4End' are the same, the fourth partition doesn't exist.
5039+
//
5040+
void Compiler::ThreeOptLayout::SwapPartitions(
5041+
unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End)
5042+
{
50325043
INDEBUG(const weight_t currLayoutCost = GetLayoutCost(s1Start, s4End));
50335044

50345045
// Swap the partitions
@@ -5062,8 +5073,6 @@ bool Compiler::ThreeOptLayout::TrySwappingPartitions(
50625073
Compiler::fgProfileWeightsEqual(newLayoutCost, currLayoutCost, 0.001));
50635074
}
50645075
#endif // DEBUG
5065-
5066-
return true;
50675076
}
50685077

50695078
//-----------------------------------------------------------------------------
@@ -5364,6 +5373,7 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned
53645373

53655374
const bool isForwardJump = (srcPos < dstPos);
53665375
unsigned s2Start, s3Start, s3End;
5376+
weight_t costChange;
53675377

53685378
if (isForwardJump)
53695379
{
@@ -5378,35 +5388,83 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned
53785388
// S3: dstPos ~ endPos
53795389
// S2: srcPos+1 ~ dstPos-1
53805390
// S4: remaining blocks
5381-
s2Start = srcPos + 1;
5382-
s3Start = dstPos;
5383-
s3End = endPos;
5391+
s2Start = srcPos + 1;
5392+
s3Start = dstPos;
5393+
s3End = endPos;
5394+
costChange = GetPartitionCostDelta(startPos, s2Start, s3Start, s3End, endPos);
53845395
}
53855396
else
53865397
{
5387-
5398+
// For backward jumps, we will employ a greedy 4-opt approach to find the ideal cut point
5399+
// between the destination and source blocks.
53885400
// Here is the proposed partition:
53895401
// S1: startPos ~ dstPos-1
5390-
// S2: dstPos ~ srcPos-1
5391-
// S3: srcPos
5402+
// S2: dstPos ~ s3Start-1
5403+
// S3: s3Start ~ srcPos
53925404
// S4: srcPos+1 ~ endPos
53935405
//
53945406
// After the swap:
53955407
// S1: startPos ~ dstPos-1
5396-
// S3: srcPos
5397-
// S2: dstPos ~ srcPos-1
5408+
// S3: s3Start ~ srcPos
5409+
// S2: dstPos ~ s3Start-1
53985410
// S4: srcPos+1 ~ endPos
5399-
s2Start = dstPos;
5400-
s3Start = srcPos;
5401-
s3End = srcPos;
5411+
s2Start = dstPos;
5412+
s3Start = srcPos;
5413+
s3End = srcPos;
5414+
costChange = BB_ZERO_WEIGHT;
5415+
5416+
// The cut points before S2 and after S3 are fixed.
5417+
// We will search for the optimal cut point before S3.
5418+
BasicBlock* const s2Block = blockOrder[s2Start];
5419+
BasicBlock* const s2BlockPrev = blockOrder[s2Start - 1];
5420+
BasicBlock* const lastBlock = blockOrder[s3End];
5421+
5422+
// Because the above cut points are fixed, don't waste time re-computing their costs.
5423+
// Instead, pre-compute them here.
5424+
const weight_t currCostBase =
5425+
GetCost(s2BlockPrev, s2Block) +
5426+
((s3End < endPos) ? GetCost(lastBlock, blockOrder[s3End + 1]) : lastBlock->bbWeight);
5427+
const weight_t newCostBase = GetCost(lastBlock, s2Block);
5428+
5429+
// Search for the ideal start to S3
5430+
for (unsigned position = s2Start + 1; position <= s3End; position++)
5431+
{
5432+
BasicBlock* const s3Block = blockOrder[position];
5433+
BasicBlock* const s3BlockPrev = blockOrder[position - 1];
5434+
5435+
// Don't consider any cut points that would break up call-finally pairs
5436+
if (s3Block->KindIs(BBJ_CALLFINALLYRET))
5437+
{
5438+
continue;
5439+
}
5440+
5441+
// Compute the cost delta of this partition
5442+
const weight_t currCost = currCostBase + GetCost(s3BlockPrev, s3Block);
5443+
const weight_t newCost =
5444+
newCostBase + GetCost(s2BlockPrev, s3Block) +
5445+
((s3End < endPos) ? GetCost(s3BlockPrev, blockOrder[s3End + 1]) : s3BlockPrev->bbWeight);
5446+
const weight_t delta = newCost - currCost;
5447+
5448+
if (delta < costChange)
5449+
{
5450+
costChange = delta;
5451+
s3Start = position;
5452+
}
5453+
}
54025454
}
54035455

54045456
// Continue evaluating partitions if this one isn't profitable
5405-
if (!TrySwappingPartitions(startPos, s2Start, s3Start, s3End, endPos))
5457+
if ((costChange >= BB_ZERO_WEIGHT) || Compiler::fgProfileWeightsEqual(costChange, BB_ZERO_WEIGHT, 0.001))
54065458
{
54075459
continue;
54085460
}
54095461

5462+
JITDUMP("Swapping partitions [" FMT_BB ", " FMT_BB "] and [" FMT_BB ", " FMT_BB "] (cost change = %f)\n",
5463+
blockOrder[s2Start]->bbNum, blockOrder[s3Start - 1]->bbNum, blockOrder[s3Start]->bbNum,
5464+
blockOrder[s3End]->bbNum, costChange);
5465+
5466+
SwapPartitions(startPos, s2Start, s3Start, s3End, endPos);
5467+
54105468
// Update the ordinals for the blocks we moved
54115469
for (unsigned i = s2Start; i <= endPos; i++)
54125470
{

0 commit comments

Comments
 (0)