Skip to content

Commit 83c8a85

Browse files
fix X86 DAGCombiner hang
1 parent 2236180 commit 83c8a85

File tree

4 files changed

+26
-11
lines changed

4 files changed

+26
-11
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45540,6 +45540,10 @@ static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
4554045540
if (!sd_match(Op, m_OneUse(m_BitwiseLogic(m_Value(LHS), m_Value(RHS)))))
4554145541
return SDValue();
4554245542

45543+
// WIP: Fixes one of the failures but triggers more.
45544+
//if (isBitwiseNot(Op))
45545+
// return SDValue();
45546+
4554345547
// If either operand was bitcast from DstVT, then perform logic with DstVT (at
4554445548
// least one of the getBitcast() will fold away).
4554545549
if (sd_match(LHS, m_OneUse(m_BitCast(m_SpecificVT(DstVT)))) ||
@@ -48138,8 +48142,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4813848142
// Check if the first operand is all zeros and Cond type is vXi1.
4813948143
// If this an avx512 target we can improve the use of zero masking by
4814048144
// swapping the operands and inverting the condition.
48141-
if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
48142-
Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
48145+
if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
48146+
Cond.hasOneUse() && Subtarget.hasAVX512() &&
48147+
CondVT.getVectorElementType() == MVT::i1 &&
4814348148
ISD::isBuildVectorAllZeros(LHS.getNode()) &&
4814448149
!ISD::isBuildVectorAllZeros(RHS.getNode())) {
4814548150
// Invert the cond to not(cond) : xor(op,allones)=not(op)

llvm/test/CodeGen/X86/avx512-select.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
743743
; X86-AVX512F-LABEL: julia_issue36955:
744744
; X86-AVX512F: # %bb.0:
745745
; X86-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
746-
; X86-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0
746+
; X86-AVX512F-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
747+
; X86-AVX512F-NEXT: knotw %k0, %k0
747748
; X86-AVX512F-NEXT: kmovw %k0, %eax
748749
; X86-AVX512F-NEXT: # kill: def $al killed $al killed $eax
749750
; X86-AVX512F-NEXT: vzeroupper
@@ -752,7 +753,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
752753
; X64-AVX512F-LABEL: julia_issue36955:
753754
; X64-AVX512F: # %bb.0:
754755
; X64-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
755-
; X64-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0
756+
; X64-AVX512F-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
757+
; X64-AVX512F-NEXT: knotw %k0, %k0
756758
; X64-AVX512F-NEXT: kmovw %k0, %eax
757759
; X64-AVX512F-NEXT: # kill: def $al killed $al killed $eax
758760
; X64-AVX512F-NEXT: vzeroupper
@@ -761,7 +763,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
761763
; X86-AVX512BW-LABEL: julia_issue36955:
762764
; X86-AVX512BW: # %bb.0:
763765
; X86-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0
764-
; X86-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0
766+
; X86-AVX512BW-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
767+
; X86-AVX512BW-NEXT: knotw %k0, %k0
765768
; X86-AVX512BW-NEXT: kmovd %k0, %eax
766769
; X86-AVX512BW-NEXT: # kill: def $al killed $al killed $eax
767770
; X86-AVX512BW-NEXT: vzeroupper
@@ -770,7 +773,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
770773
; X64-AVX512BW-LABEL: julia_issue36955:
771774
; X64-AVX512BW: # %bb.0:
772775
; X64-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0
773-
; X64-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0
776+
; X64-AVX512BW-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
777+
; X64-AVX512BW-NEXT: knotw %k0, %k0
774778
; X64-AVX512BW-NEXT: kmovd %k0, %eax
775779
; X64-AVX512BW-NEXT: # kill: def $al killed $al killed $eax
776780
; X64-AVX512BW-NEXT: vzeroupper

llvm/test/CodeGen/X86/vselect-zero.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) {
5656
;
5757
; AVX512-LABEL: test2:
5858
; AVX512: # %bb.0:
59-
; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k1
59+
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
60+
; AVX512-NEXT: knotw %k0, %k1
6061
; AVX512-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z}
6162
; AVX512-NEXT: retq
6263
%cond = fcmp oeq <4 x float> %a, %b

llvm/test/CodeGen/X86/x86-interleaved-access.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,8 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(ptr %ptr) nounwind {
478478
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
479479
; AVX1-NEXT: vpcmpeqb %xmm0, %xmm5, %xmm0
480480
; AVX1-NEXT: vpxor %xmm0, %xmm2, %xmm0
481-
; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
481+
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
482+
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
482483
; AVX1-NEXT: retq
483484
;
484485
; AVX2-LABEL: interleaved_load_vf16_i8_stride4:
@@ -517,7 +518,8 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(ptr %ptr) nounwind {
517518
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
518519
; AVX2-NEXT: vpcmpeqb %xmm0, %xmm3, %xmm0
519520
; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0
520-
; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
521+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
522+
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
521523
; AVX2-NEXT: vzeroupper
522524
; AVX2-NEXT: retq
523525
;
@@ -633,7 +635,9 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(ptr %ptr) nounwind {
633635
; AVX1-NEXT: vinsertf128 $1, %xmm9, %ymm8, %ymm2
634636
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
635637
; AVX1-NEXT: vxorps %ymm0, %ymm2, %ymm0
636-
; AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
638+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
639+
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
640+
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
637641
; AVX1-NEXT: retq
638642
;
639643
; AVX2-LABEL: interleaved_load_vf32_i8_stride4:
@@ -698,7 +702,8 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(ptr %ptr) nounwind {
698702
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7]
699703
; AVX2-NEXT: vpcmpeqb %ymm0, %ymm6, %ymm0
700704
; AVX2-NEXT: vpxor %ymm0, %ymm5, %ymm0
701-
; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
705+
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
706+
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
702707
; AVX2-NEXT: retq
703708
;
704709
; AVX512-LABEL: interleaved_load_vf32_i8_stride4:

0 commit comments

Comments
 (0)