Skip to content

Commit e4c4aac

Browse files
committed
[SimplifyCFG] Improve range reducing for switches
1 parent 4346aaf commit e4c4aac

File tree

2 files changed

+210
-3
lines changed

2 files changed

+210
-3
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#include <cstddef>
8181
#include <cstdint>
8282
#include <iterator>
83+
#include <limits>
8384
#include <map>
8485
#include <optional>
8586
#include <set>
@@ -6748,6 +6749,71 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
67486749
return true;
67496750
}
67506751

6752+
/// Try to reduce the range of cases with an unreachable default.
6753+
static bool
6754+
ReduceSwitchRangeWithUnreachableDefault(SwitchInst *SI,
6755+
const SmallVectorImpl<int64_t> &Values,
6756+
uint64_t Base, IRBuilder<> &Builder) {
6757+
bool HasDefault =
6758+
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
6759+
if (HasDefault)
6760+
return false;
6761+
6762+
// Try reducing the range to (idx + offset) & mask
6763+
// Mask out common high bits
6764+
uint64_t CommonOnes = std::numeric_limits<uint64_t>::max();
6765+
uint64_t CommonZeros = std::numeric_limits<uint64_t>::max();
6766+
for (auto &V : Values) {
6767+
CommonOnes &= (uint64_t)V;
6768+
CommonZeros &= ~(uint64_t)V;
6769+
}
6770+
uint64_t CommonBits = countl_one(CommonOnes | CommonZeros);
6771+
unsigned LowBits = 64 - CommonBits;
6772+
uint64_t Mask = (1ULL << LowBits) - 1;
6773+
if (Mask == std::numeric_limits<uint64_t>::max())
6774+
return false;
6775+
// Now we have some case values in the additive group Z/(2**k)Z.
6776+
// Find the largest hole in the group and move it to back.
6777+
uint64_t MaxHole = 0;
6778+
uint64_t BestOffset = 0;
6779+
for (unsigned I = 0; I < Values.size(); ++I) {
6780+
uint64_t Hole = ((uint64_t)Values[I] -
6781+
(uint64_t)(I == 0 ? Values.back() : Values[I - 1])) &
6782+
Mask;
6783+
if (Hole > MaxHole) {
6784+
MaxHole = Hole;
6785+
BestOffset = Mask - (uint64_t)Values[I] + 1;
6786+
}
6787+
}
6788+
6789+
SmallVector<int64_t, 4> NewValues;
6790+
for (auto &V : Values)
6791+
NewValues.push_back(
6792+
(((int64_t)(((uint64_t)V + BestOffset) & Mask)) << CommonBits) >>
6793+
CommonBits);
6794+
6795+
llvm::sort(NewValues);
6796+
if (!isSwitchDense(NewValues))
6797+
// Transform didn't create a dense switch.
6798+
return false;
6799+
6800+
auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6801+
APInt Offset(Ty->getBitWidth(), BestOffset - Base);
6802+
auto *Index = Builder.CreateAnd(
6803+
Builder.CreateAdd(SI->getCondition(), ConstantInt::get(Ty, Offset)),
6804+
Mask);
6805+
SI->replaceUsesOfWith(SI->getCondition(), Index);
6806+
6807+
for (auto Case : SI->cases()) {
6808+
auto *Orig = Case.getCaseValue();
6809+
auto CaseVal =
6810+
(Orig->getValue() + Offset).trunc(LowBits).sext(Ty->getBitWidth());
6811+
Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, CaseVal)));
6812+
}
6813+
6814+
return true;
6815+
}
6816+
67516817
/// Try to transform a switch that has "holes" in it to a contiguous sequence
67526818
/// of cases.
67536819
///
@@ -6763,9 +6829,8 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
67636829
if (CondTy->getIntegerBitWidth() > 64 ||
67646830
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
67656831
return false;
6766-
// Only bother with this optimization if there are more than 3 switch cases;
6767-
// SDAG will only bother creating jump tables for 4 or more cases.
6768-
if (SI->getNumCases() < 4)
6832+
// Ignore switches with less than three cases.
6833+
if (SI->getNumCases() < 3)
67696834
return false;
67706835

67716836
// This transform is agnostic to the signedness of the input or case values. We
@@ -6786,6 +6851,9 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
67866851
for (auto &V : Values)
67876852
V -= (uint64_t)(Base);
67886853

6854+
if (ReduceSwitchRangeWithUnreachableDefault(SI, Values, Base, Builder))
6855+
return true;
6856+
67896857
// Now we have signed numbers that have been shifted so that, given enough
67906858
// precision, there are no negative values. Since the rest of the transform
67916859
// is bitwise only, we switch now to an unsigned representation.

llvm/test/Transforms/SimplifyCFG/rangereduce.ll

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,142 @@ three:
315315
ret i32 99783
316316
}
317317

318+
define i8 @pr67842(i32 %0) {
319+
; CHECK-LABEL: @pr67842(
320+
; CHECK-NEXT: start:
321+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0:%.*]], 1
322+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 255
323+
; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP2]] to i8
324+
; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i8 [[SWITCH_IDX_CAST]], -1
325+
; CHECK-NEXT: ret i8 [[SWITCH_OFFSET]]
326+
;
327+
start:
328+
switch i32 %0, label %bb2 [
329+
i32 0, label %bb5
330+
i32 1, label %bb4
331+
i32 255, label %bb1
332+
]
333+
334+
bb2: ; preds = %start
335+
unreachable
336+
337+
bb4: ; preds = %start
338+
br label %bb5
339+
340+
bb1: ; preds = %start
341+
br label %bb5
342+
343+
bb5: ; preds = %start, %bb1, %bb4
344+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
345+
ret i8 %.0
346+
}
347+
348+
define i8 @reduce_masked_common_high_bits(i32 %0) {
349+
; CHECK-LABEL: @reduce_masked_common_high_bits(
350+
; CHECK-NEXT: start:
351+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0:%.*]], -127
352+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 127
353+
; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP2]] to i8
354+
; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i8 [[SWITCH_IDX_CAST]], -1
355+
; CHECK-NEXT: ret i8 [[SWITCH_OFFSET]]
356+
;
357+
start:
358+
switch i32 %0, label %bb2 [
359+
i32 128, label %bb5
360+
i32 129, label %bb4
361+
i32 255, label %bb1
362+
]
363+
364+
bb2: ; preds = %start
365+
unreachable
366+
367+
bb4: ; preds = %start
368+
br label %bb5
369+
370+
bb1: ; preds = %start
371+
br label %bb5
372+
373+
bb5: ; preds = %start, %bb1, %bb4
374+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
375+
ret i8 %.0
376+
}
377+
378+
define i8 @reduce_masked_common_high_bits_fail(i32 %0) {
379+
; CHECK-LABEL: @reduce_masked_common_high_bits_fail(
380+
; CHECK-NEXT: start:
381+
; CHECK-NEXT: switch i32 [[TMP0:%.*]], label [[BB2:%.*]] [
382+
; CHECK-NEXT: i32 128, label [[BB5:%.*]]
383+
; CHECK-NEXT: i32 129, label [[BB4:%.*]]
384+
; CHECK-NEXT: i32 511, label [[BB1:%.*]]
385+
; CHECK-NEXT: ]
386+
; CHECK: bb2:
387+
; CHECK-NEXT: unreachable
388+
; CHECK: bb4:
389+
; CHECK-NEXT: br label [[BB5]]
390+
; CHECK: bb1:
391+
; CHECK-NEXT: br label [[BB5]]
392+
; CHECK: bb5:
393+
; CHECK-NEXT: [[DOT0:%.*]] = phi i8 [ -1, [[BB1]] ], [ 1, [[BB4]] ], [ 0, [[START:%.*]] ]
394+
; CHECK-NEXT: ret i8 [[DOT0]]
395+
;
396+
start:
397+
switch i32 %0, label %bb2 [
398+
i32 128, label %bb5
399+
i32 129, label %bb4
400+
i32 511, label %bb1
401+
]
402+
403+
bb2: ; preds = %start
404+
unreachable
405+
406+
bb4: ; preds = %start
407+
br label %bb5
408+
409+
bb1: ; preds = %start
410+
br label %bb5
411+
412+
bb5: ; preds = %start, %bb1, %bb4
413+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
414+
ret i8 %.0
415+
}
416+
417+
; Optimization shouldn't trigger; The default block is reachable.
418+
define i8 @reduce_masked_default_reachable(i32 %0) {
419+
; CHECK-LABEL: @reduce_masked_default_reachable(
420+
; CHECK-NEXT: start:
421+
; CHECK-NEXT: switch i32 [[TMP0:%.*]], label [[COMMON_RET:%.*]] [
422+
; CHECK-NEXT: i32 0, label [[BB5:%.*]]
423+
; CHECK-NEXT: i32 1, label [[BB4:%.*]]
424+
; CHECK-NEXT: i32 255, label [[BB1:%.*]]
425+
; CHECK-NEXT: ]
426+
; CHECK: common.ret:
427+
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i8 [ [[DOT0:%.*]], [[BB5]] ], [ 24, [[START:%.*]] ]
428+
; CHECK-NEXT: ret i8 [[COMMON_RET_OP]]
429+
; CHECK: bb4:
430+
; CHECK-NEXT: br label [[BB5]]
431+
; CHECK: bb1:
432+
; CHECK-NEXT: br label [[BB5]]
433+
; CHECK: bb5:
434+
; CHECK-NEXT: [[DOT0]] = phi i8 [ -1, [[BB1]] ], [ 1, [[BB4]] ], [ 0, [[START]] ]
435+
; CHECK-NEXT: br label [[COMMON_RET]]
436+
;
437+
start:
438+
switch i32 %0, label %bb2 [
439+
i32 0, label %bb5
440+
i32 1, label %bb4
441+
i32 255, label %bb1
442+
]
443+
444+
bb2: ; preds = %start
445+
ret i8 24
446+
447+
bb4: ; preds = %start
448+
br label %bb5
449+
450+
bb1: ; preds = %start
451+
br label %bb5
452+
453+
bb5: ; preds = %start, %bb1, %bb4
454+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
455+
ret i8 %.0
456+
}

0 commit comments

Comments
 (0)