Skip to content

Commit 9fe86ca

Browse files
authored
Arm64 SVE: Optimise zero/allbits vectors the same as masks (dotnet#115566)
* Arm64 SVE: Better optimise zero/allbits vectors Fixes dotnet#114443 * IsVectorZero() should allow for all zero vectors and false masks that have been converted to vectors. * IsVectorAllBitsSet() should allow for all bits set vectors and true masks that have been converted to vectors. * IsMaskZero() should all for false masks and all zero vectors that have been converted to masks. * IsMaskAllBitsSet() should allow for true masks and all bit set vectors that have been converted to masks. In addition: * Fix up all the errors caused by these changes. * Add a bunch of asmcheck tests * Remove all jit changes * Import constant vector 0 for createfalsemask * fix up tests * Only allow zero op3 contains for embedded ops * fix up tests * fix formatting * Import constant vector all bits set for createtruemask * Fix up tests * fix type of true mask variants * Allow common code to create the convert for CreateTrueMask* * Fix x86 build * unique test names in templates * simpler lowering * Don't remove embedded ops that may throw * Clear embOp when clearing contained * Import masks as gtNewVconNode * Remove pRetType * Add nullptr check * Add AOT TODO * Add codegen support for CNS_MASK * Add const vector folding for Arm64 * Add mask patterns * Move tests to SVE * Add isTrueMask() Change-Id: I456498f06d454e6ed57ce935e195b721e2c6d225 * fix tests Change-Id: I3d74a7292e2c880fcaba215dc1fb58369e4ad141 * fix formatting * Add EvaluateSimdPatternToMask * import vectors not masks * rename to EvaluateSimdMaskToPattern * Add unreached * formatting * fix IsTrueMask * remove emb op fix * fix morphing errors * Remove NI_Sve_CreateFalseMaskAll * rename TrueMaskAll to ConversionTrueMask and only use as such * remove gtNewSimdCnsVecTrueMaskPattern * Switch gtNewSimdAllTrueMaskNode to create constant mask * fix tests * FEATURE_HW_INTRINSICS checks * formatting * fix gtFoldExprConvertVecCnsToMask call * move gtFoldExprConvertVecCnsToMask call * Allow for masks being input to mask nodes * use IsFalseMask everywhere * Add simdSize to GenTreeMskCon * Use simdSize in GenTreeMskCon * cndsel op3 is a vector * use unsigned instead of unsigned char * Fix HasDisasmCheck * Hardcode mask simd size to 16 * formatting * remove TODO * Use simdBaseType for IsTrueMask arg * Add asserts to gtFoldExprHWIntrinsic * Simplify IsFalseMask * inline IsTrueMask/IsFalseMask * Use LABELEDDISPTREERANGE * Add header to gtFoldExprConvertVecCnsToMask * Remove FEATURE_HW_INTRINSICS around IsTrueMask/IsFalseMask * turn off fgMorphTryUseAllMaskVariant
1 parent 5951ad2 commit 9fe86ca

18 files changed

+1216
-172
lines changed

src/coreclr/jit/codegenarm64.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2338,6 +2338,50 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
23382338

23392339
break;
23402340
}
2341+
2342+
case GT_CNS_MSK:
2343+
{
2344+
GenTreeMskCon* mask = tree->AsMskCon();
2345+
emitter* emit = GetEmitter();
2346+
2347+
// Try every type until a match is found
2348+
2349+
if (mask->IsZero())
2350+
{
2351+
emit->emitInsSve_R(INS_sve_pfalse, EA_SCALABLE, targetReg, INS_OPTS_SCALABLE_B);
2352+
break;
2353+
}
2354+
2355+
insOpts opt = INS_OPTS_SCALABLE_B;
2356+
SveMaskPattern pat = EvaluateSimdMaskToPattern<simd16_t>(TYP_BYTE, mask->gtSimdMaskVal);
2357+
2358+
if (pat == SveMaskPatternNone)
2359+
{
2360+
opt = INS_OPTS_SCALABLE_H;
2361+
pat = EvaluateSimdMaskToPattern<simd16_t>(TYP_SHORT, mask->gtSimdMaskVal);
2362+
}
2363+
2364+
if (pat == SveMaskPatternNone)
2365+
{
2366+
opt = INS_OPTS_SCALABLE_S;
2367+
pat = EvaluateSimdMaskToPattern<simd16_t>(TYP_INT, mask->gtSimdMaskVal);
2368+
}
2369+
2370+
if (pat == SveMaskPatternNone)
2371+
{
2372+
opt = INS_OPTS_SCALABLE_D;
2373+
pat = EvaluateSimdMaskToPattern<simd16_t>(TYP_LONG, mask->gtSimdMaskVal);
2374+
}
2375+
2376+
// Should only ever create constant masks for valid patterns.
2377+
if (pat == SveMaskPatternNone)
2378+
{
2379+
unreached();
2380+
}
2381+
2382+
emit->emitIns_R_PATTERN(INS_sve_ptrue, EA_SCALABLE, targetReg, opt, (insSvePattern)pat);
2383+
break;
2384+
}
23412385
#endif // FEATURE_SIMD
23422386

23432387
default:

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3146,8 +3146,8 @@ class Compiler
31463146
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
31473147

31483148
#if defined(TARGET_ARM64)
3149-
GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize);
3150-
GenTree* gtNewSimdFalseMaskByteNode(unsigned simdSize);
3149+
GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType);
3150+
GenTree* gtNewSimdFalseMaskByteNode();
31513151
#endif
31523152

31533153
GenTree* gtNewSimdBinOpNode(genTreeOps op,
@@ -3715,6 +3715,7 @@ class Compiler
37153715

37163716
#if defined(FEATURE_HW_INTRINSICS)
37173717
GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree);
3718+
GenTreeMskCon* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon);
37183719
#endif // FEATURE_HW_INTRINSICS
37193720

37203721
// Options to control behavior of gtTryRemoveBoxUpstreamEffects

src/coreclr/jit/gentree.cpp

Lines changed: 91 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -21959,8 +21959,8 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type,
2195921959
#if defined(TARGET_XARCH)
2196021960
return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512_ConvertVectorToMask, simdBaseJitType, simdSize);
2196121961
#elif defined(TARGET_ARM64)
21962-
// We use cmpne which requires an embedded mask.
21963-
GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize);
21962+
// ConvertVectorToMask uses cmpne which requires an embedded mask.
21963+
GenTree* trueMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_ConversionTrueMask, simdBaseJitType, simdSize);
2196421964
return gtNewSimdHWIntrinsicNode(TYP_MASK, trueMask, op1, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize);
2196521965
#else
2196621966
#error Unsupported platform
@@ -31683,6 +31683,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3168331683
}
3168431684

3168531685
#if defined(FEATURE_MASKED_HW_INTRINSICS)
31686+
// Fold ConvertMaskToVector(ConvertVectorToMask(vec)) to vec
3168631687
if (tree->OperIsConvertMaskToVector())
3168731688
{
3168831689
GenTree* op = op1;
@@ -31715,6 +31716,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3171531716
}
3171631717
}
3171731718

31719+
// Fold ConvertVectorToMask(ConvertMaskToVector(mask)) to mask
3171831720
if (tree->OperIsConvertVectorToMask())
3171931721
{
3172031722
GenTree* op = op1;
@@ -31723,11 +31725,9 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3172331725
#if defined(TARGET_XARCH)
3172431726
tryHandle = op->OperIsHWIntrinsic();
3172531727
#elif defined(TARGET_ARM64)
31726-
if (op->OperIsHWIntrinsic(NI_Sve_CreateTrueMaskAll))
31727-
{
31728-
op = op2;
31729-
tryHandle = op->OperIsHWIntrinsic();
31730-
}
31728+
assert(op->OperIsHWIntrinsic(NI_Sve_ConversionTrueMask));
31729+
op = op2;
31730+
tryHandle = op->OperIsHWIntrinsic();
3173131731
#endif // TARGET_ARM64
3173231732

3173331733
if (tryHandle)
@@ -31813,53 +31813,12 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3181331813

3181431814
resultNode = gtNewVconNode(retType, &simdVal);
3181531815
}
31816+
#if defined(TARGET_XARCH)
3181631817
else if (tree->OperIsConvertVectorToMask())
3181731818
{
31818-
GenTreeVecCon* vecCon = cnsNode->AsVecCon();
31819-
GenTreeMskCon* mskCon = gtNewMskConNode(retType);
31820-
31821-
switch (vecCon->TypeGet())
31822-
{
31823-
case TYP_SIMD8:
31824-
{
31825-
EvaluateSimdCvtVectorToMask<simd8_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd8Val);
31826-
break;
31827-
}
31828-
31829-
case TYP_SIMD12:
31830-
{
31831-
EvaluateSimdCvtVectorToMask<simd12_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd12Val);
31832-
break;
31833-
}
31834-
31835-
case TYP_SIMD16:
31836-
{
31837-
EvaluateSimdCvtVectorToMask<simd16_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd16Val);
31838-
break;
31839-
}
31840-
31841-
#if defined(TARGET_XARCH)
31842-
case TYP_SIMD32:
31843-
{
31844-
EvaluateSimdCvtVectorToMask<simd32_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd32Val);
31845-
break;
31846-
}
31847-
31848-
case TYP_SIMD64:
31849-
{
31850-
EvaluateSimdCvtVectorToMask<simd64_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val);
31851-
break;
31852-
}
31853-
#endif // TARGET_XARCH
31854-
31855-
default:
31856-
{
31857-
unreached();
31858-
}
31859-
}
31860-
31861-
resultNode = mskCon;
31819+
resultNode = gtFoldExprConvertVecCnsToMask(tree, cnsNode->AsVecCon());
3186231820
}
31821+
#endif // TARGET_XARCH
3186331822
#endif // FEATURE_MASKED_HW_INTRINSICS
3186431823
else
3186531824
{
@@ -32702,6 +32661,10 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3270232661
switch (ni)
3270332662
{
3270432663
#ifdef TARGET_ARM64
32664+
case NI_Sve_ConvertVectorToMask:
32665+
resultNode = gtFoldExprConvertVecCnsToMask(tree, cnsNode->AsVecCon());
32666+
break;
32667+
3270532668
case NI_AdvSimd_MultiplyByScalar:
3270632669
case NI_AdvSimd_Arm64_MultiplyByScalar:
3270732670
{
@@ -32843,7 +32806,18 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3284332806
break;
3284432807
}
3284532808

32846-
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
32809+
#if defined(TARGET_ARM64)
32810+
if (ni == NI_Sve_ConditionalSelect)
32811+
{
32812+
assert(!op1->IsVectorAllBitsSet() && !op1->IsVectorZero());
32813+
}
32814+
else
32815+
{
32816+
assert(!op1->IsTrueMask(simdBaseType) && !op1->IsFalseMask());
32817+
}
32818+
#endif
32819+
32820+
if (op1->IsVectorAllBitsSet() || op1->IsTrueMask(simdBaseType))
3284732821
{
3284832822
if ((op3->gtFlags & GTF_SIDE_EFFECT) != 0)
3284932823
{
@@ -32857,7 +32831,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3285732831
return op2;
3285832832
}
3285932833

32860-
if (op1->IsVectorZero())
32834+
if (op1->IsVectorZero() || op1->IsFalseMask())
3286132835
{
3286232836
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
3286332837
}
@@ -32909,6 +32883,70 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3290932883
}
3291032884
return resultNode;
3291132885
}
32886+
32887+
//------------------------------------------------------------------------------
32888+
// gtFoldExprConvertVecCnsToMask: Folds a constant vector plus conversion to
32889+
// mask into a constant mask.
32890+
//
32891+
// Arguments:
32892+
// tree - The convert vector to mask node
32893+
// vecCon - The vector constant converted by the convert
32894+
//
32895+
// Return Value:
32896+
// Returns a constant mask
32897+
//
32898+
GenTreeMskCon* Compiler::gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon)
32899+
{
32900+
assert(tree->OperIsConvertVectorToMask());
32901+
assert(vecCon == tree->Op(1) || vecCon == tree->Op(2));
32902+
32903+
var_types retType = tree->TypeGet();
32904+
var_types simdBaseType = tree->GetSimdBaseType();
32905+
GenTreeMskCon* mskCon = gtNewMskConNode(retType);
32906+
32907+
switch (vecCon->TypeGet())
32908+
{
32909+
case TYP_SIMD8:
32910+
{
32911+
EvaluateSimdCvtVectorToMask<simd8_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd8Val);
32912+
break;
32913+
}
32914+
32915+
case TYP_SIMD12:
32916+
{
32917+
EvaluateSimdCvtVectorToMask<simd12_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd12Val);
32918+
break;
32919+
}
32920+
32921+
case TYP_SIMD16:
32922+
{
32923+
EvaluateSimdCvtVectorToMask<simd16_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd16Val);
32924+
break;
32925+
}
32926+
32927+
#if defined(TARGET_XARCH)
32928+
case TYP_SIMD32:
32929+
{
32930+
EvaluateSimdCvtVectorToMask<simd32_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd32Val);
32931+
break;
32932+
}
32933+
32934+
case TYP_SIMD64:
32935+
{
32936+
EvaluateSimdCvtVectorToMask<simd64_t>(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val);
32937+
break;
32938+
}
32939+
#endif // TARGET_XARCH
32940+
32941+
default:
32942+
{
32943+
unreached();
32944+
}
32945+
}
32946+
32947+
return mskCon;
32948+
}
32949+
3291232950
#endif // FEATURE_HW_INTRINSICS
3291332951

3291432952
//------------------------------------------------------------------------

src/coreclr/jit/gentree.h

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,8 +1802,8 @@ struct GenTree
18021802
inline bool IsVectorCreate() const;
18031803
inline bool IsVectorAllBitsSet() const;
18041804
inline bool IsVectorBroadcast(var_types simdBaseType) const;
1805-
inline bool IsMaskAllBitsSet() const;
1806-
inline bool IsMaskZero() const;
1805+
inline bool IsTrueMask(var_types simdBaseType) const;
1806+
inline bool IsFalseMask() const;
18071807

18081808
inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType);
18091809

@@ -9550,54 +9550,46 @@ inline bool GenTree::IsVectorBroadcast(var_types simdBaseType) const
95509550
return false;
95519551
}
95529552

9553-
inline bool GenTree::IsMaskAllBitsSet() const
9553+
//------------------------------------------------------------------------
9554+
// IsTrueMask: Is the given node a true mask
9555+
//
9556+
// Arguments:
9557+
// simdBaseType - the base type of the mask
9558+
//
9559+
// Returns true if the node is a true mask for the given simdBaseType.
9560+
//
9561+
// Note that a byte true mask (1111...) is different to an int true mask
9562+
// (10001000...), therefore the simdBaseType of the mask needs to be
9563+
// taken into account.
9564+
//
9565+
inline bool GenTree::IsTrueMask(var_types simdBaseType) const
95549566
{
95559567
#ifdef TARGET_ARM64
9556-
static_assert_no_msg(AreContiguous(NI_Sve_CreateTrueMaskByte, NI_Sve_CreateTrueMaskDouble,
9557-
NI_Sve_CreateTrueMaskInt16, NI_Sve_CreateTrueMaskInt32,
9558-
NI_Sve_CreateTrueMaskInt64, NI_Sve_CreateTrueMaskSByte,
9559-
NI_Sve_CreateTrueMaskSingle, NI_Sve_CreateTrueMaskUInt16,
9560-
NI_Sve_CreateTrueMaskUInt32, NI_Sve_CreateTrueMaskUInt64));
9568+
// TODO-SVE: For agnostic VL, vector type may not be simd16_t
95619569

9562-
if (OperIsHWIntrinsic())
9570+
if (IsCnsMsk())
95639571
{
9564-
NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId();
9565-
if (id == NI_Sve_ConvertMaskToVector)
9566-
{
9567-
GenTree* op1 = AsHWIntrinsic()->Op(1);
9568-
assert(op1->OperIsHWIntrinsic());
9569-
id = op1->AsHWIntrinsic()->GetHWIntrinsicId();
9570-
}
9571-
return ((id == NI_Sve_CreateTrueMaskAll) ||
9572-
((id >= NI_Sve_CreateTrueMaskByte) && (id <= NI_Sve_CreateTrueMaskUInt64)));
9572+
return SveMaskPatternAll == EvaluateSimdMaskToPattern<simd16_t>(simdBaseType, AsMskCon()->gtSimdMaskVal);
95739573
}
9574-
95759574
#endif
9575+
95769576
return false;
95779577
}
95789578

9579-
inline bool GenTree::IsMaskZero() const
9579+
//------------------------------------------------------------------------
9580+
// IsFalseMask: Is the given node a false mask
9581+
//
9582+
// Returns true if the node is a false mask, ie all zeros
9583+
//
9584+
inline bool GenTree::IsFalseMask() const
95809585
{
95819586
#ifdef TARGET_ARM64
9582-
static_assert_no_msg(AreContiguous(NI_Sve_CreateFalseMaskByte, NI_Sve_CreateFalseMaskDouble,
9583-
NI_Sve_CreateFalseMaskInt16, NI_Sve_CreateFalseMaskInt32,
9584-
NI_Sve_CreateFalseMaskInt64, NI_Sve_CreateFalseMaskSByte,
9585-
NI_Sve_CreateFalseMaskSingle, NI_Sve_CreateFalseMaskUInt16,
9586-
NI_Sve_CreateFalseMaskUInt32, NI_Sve_CreateFalseMaskUInt64));
9587-
9588-
if (OperIsHWIntrinsic())
9587+
if (IsCnsMsk())
95899588
{
9590-
NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId();
9591-
if (id == NI_Sve_ConvertMaskToVector)
9592-
{
9593-
GenTree* op1 = AsHWIntrinsic()->Op(1);
9594-
assert(op1->OperIsHWIntrinsic());
9595-
id = op1->AsHWIntrinsic()->GetHWIntrinsicId();
9596-
}
9597-
return ((id >= NI_Sve_CreateFalseMaskByte) && (id <= NI_Sve_CreateFalseMaskUInt64));
9589+
return AsMskCon()->IsZero();
95989590
}
9599-
96009591
#endif
9592+
96019593
return false;
96029594
}
96039595

0 commit comments

Comments
 (0)