Skip to content

Commit ddb2472

Browse files
committed
Add pattern matching for SVE intrinsics that operate on mask operands
Introduces `fgMorphTryUseAllMaskVariant` for ARM64 that looks for various named intrinsics that have operands that look 'mask-like'. E.g. source operands originating from Sve.CreateTrueMask* may be recognized as masks, causing the JIT to prefer to use the predicated version of the instruction as codegen for the intrinsic. It will also inspect ConditionalSelect intrinsic nodes to match instructions with governing predicates. The transform runs during morph. It's possible to emit the following instructions after this patch: * ZIP{1,2} <Pd>.<T>, <Pn>.<T>, <Pm>.<T> (Sve.ZipLow, Sve.ZipHigh) * UZP{1,2} <Pd>.<T>, <Pn>.<T>, <Pm>.<T> (Sve.UnzipEven, Sve.UnzipOdd) * TRN{1,2} <Pd>.<T>, <Pn>.<T>, <Pm>.<T> (Sve.TransposeEven, Sve.TransposeOdd) * REV <Pd>.<T>, <Pn>.<T> (Sve.ReverseElement) * AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.And) * BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.BitwiseClear) * EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.Xor) * ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.Or) * SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B (Sve.ConditionalSelect) Contributes towards #101970
1 parent 472c3b0 commit ddb2472

File tree

7 files changed

+260
-2
lines changed

7 files changed

+260
-2
lines changed

src/coreclr/jit/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ set( JIT_ARM64_SOURCES
244244
unwindarm64.cpp
245245
hwintrinsicarm64.cpp
246246
hwintrinsiccodegenarm64.cpp
247+
morpharm64.cpp
247248
)
248249

249250
set( JIT_ARMV6_SOURCES

src/coreclr/jit/compiler.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3127,6 +3127,7 @@ class Compiler
31273127

31283128
#if defined(TARGET_ARM64)
31293129
GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize);
3130+
GenTree* gtNewSimdAllFalseMaskNode(unsigned simdSize);
31303131
#endif
31313132

31323133
GenTree* gtNewSimdBinOpNode(genTreeOps op,
@@ -6682,6 +6683,12 @@ class Compiler
66826683
GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree);
66836684
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
66846685
GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node);
6686+
#ifdef TARGET_ARM64
6687+
bool canMorphVectorOperandToMask(GenTree* node);
6688+
bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node);
6689+
GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent);
6690+
GenTree* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node);
6691+
#endif // TARGET_ARM64
66856692
#endif // FEATURE_HW_INTRINSICS
66866693
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
66876694
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);

src/coreclr/jit/gentree.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20589,7 +20589,24 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
2058920589
}
2059020590
}
2059120591
#elif defined(TARGET_ARM64)
20592-
return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->GetHWIntrinsicId());
20592+
NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId();
20593+
switch (id)
20594+
{
20595+
case NI_Sve_And:
20596+
case NI_Sve_BitwiseClear:
20597+
case NI_Sve_Xor:
20598+
case NI_Sve_Or:
20599+
// Mask variant is not RMW, but the vector variant is.
20600+
if (varTypeIsMask(this))
20601+
{
20602+
assert(AsHWIntrinsic()->GetOperandCount() == 3);
20603+
return false;
20604+
}
20605+
break;
20606+
default:
20607+
break;
20608+
}
20609+
return HWIntrinsicInfo::HasRMWSemantics(id);
2059320610
#else
2059420611
return false;
2059520612
#endif

src/coreclr/jit/gentree.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6617,6 +6617,10 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
66176617
}
66186618
}
66196619

6620+
#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS)
6621+
bool HasAllMaskVariant();
6622+
#endif
6623+
66206624
private:
66216625
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);
66226626

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3248,7 +3248,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
32483248
}
32493249

32503250
//------------------------------------------------------------------------
3251-
// gtNewSimdEmbeddedMaskNode: Create an embedded mask
3251+
// gtNewSimdAllTrueMaskNode: Create an embedded mask with all bits set to true
32523252
//
32533253
// Arguments:
32543254
// simdBaseJitType -- the base jit type of the nodes being masked
@@ -3262,4 +3262,18 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne
32623262
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
32633263
}
32643264

3265+
//------------------------------------------------------------------------
3266+
// gtNewSimdAllFalseMaskNode: Create an embedded mask with all bits set to false
3267+
//
3268+
// Arguments:
3269+
// simdSize -- the simd size of the nodes being masked
3270+
//
3271+
// Return Value:
3272+
// The mask
3273+
//
3274+
GenTree* Compiler::gtNewSimdAllFalseMaskNode(unsigned simdSize)
3275+
{
3276+
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_BYTE, simdSize);
3277+
}
3278+
32653279
#endif // FEATURE_HW_INTRINSICS

src/coreclr/jit/morph.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9219,6 +9219,15 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
92199219
}
92209220
}
92219221

9222+
#ifdef TARGET_ARM64
9223+
optimizedTree = fgMorphTryUseAllMaskVariant(node);
9224+
if (optimizedTree != nullptr)
9225+
{
9226+
optimizedTree->SetMorphed(this);
9227+
return optimizedTree;
9228+
}
9229+
#endif
9230+
92229231
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
92239232
var_types retType = node->TypeGet();
92249233
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();

src/coreclr/jit/morpharm64.cpp

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5+
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6+
XX XX
7+
XX Arm64 Specific Morph XX
8+
XX XX
9+
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10+
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11+
*/
12+
13+
#include "jitpch.h"
14+
#ifdef _MSC_VER
15+
#pragma hdrstop
16+
#endif
17+
18+
#ifdef FEATURE_MASKED_HW_INTRINSICS
19+
20+
//------------------------------------------------------------------------
21+
// HasAllMaskVariant: Does this intrinsic have a variant where all of it's operands
22+
// are mask types?
23+
//
24+
// Return Value:
25+
// true if an all-mask variant exists for the intrinsic, else false.
26+
//
27+
bool GenTreeHWIntrinsic::HasAllMaskVariant()
28+
{
29+
switch (GetHWIntrinsicId())
30+
{
31+
// ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
32+
// ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
33+
// UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
34+
// UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
35+
// TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
36+
// TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
37+
// REV <Pd>.<T>, <Pn>.<T>
38+
case NI_Sve_ZipHigh:
39+
case NI_Sve_ZipLow:
40+
case NI_Sve_UnzipOdd:
41+
case NI_Sve_UnzipEven:
42+
case NI_Sve_TransposeEven:
43+
case NI_Sve_TransposeOdd:
44+
case NI_Sve_ReverseElement:
45+
return true;
46+
47+
default:
48+
return false;
49+
}
50+
}
51+
52+
//------------------------------------------------------------------------
53+
// canMorphVectorOperandToMask: Can this vector operand be converted to a
54+
// node with type TYP_MASK easily?
55+
//
56+
bool Compiler::canMorphVectorOperandToMask(GenTree* node)
57+
{
58+
return varTypeIsMask(node) || node->OperIsConvertMaskToVector() || node->IsVectorZero();
59+
}
60+
61+
//------------------------------------------------------------------------
62+
// canMorphAllVectorOperandsToMasks: Can all vector operands to this node
63+
// be converted to a node with type
64+
// TYP_MASK easily?
65+
//
66+
bool Compiler::canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node)
67+
{
68+
bool allMaskConversions = true;
69+
for (size_t i = 1; i <= node->GetOperandCount() && allMaskConversions; i++)
70+
{
71+
allMaskConversions &= canMorphVectorOperandToMask(node->Op(i));
72+
}
73+
74+
return allMaskConversions;
75+
}
76+
77+
//------------------------------------------------------------------------
78+
// doMorphVectorOperandToMask: Morph a vector node that is close to a mask
79+
// node into a mask node.
80+
//
81+
// Return value:
82+
// The morphed tree, or nullptr if the transform is not applicable.
83+
//
84+
GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent)
85+
{
86+
if (varTypeIsMask(node))
87+
{
88+
// Already a mask, nothing to do.
89+
return node;
90+
}
91+
else if (node->OperIsConvertMaskToVector())
92+
{
93+
// Replace node with op1.
94+
return node->AsHWIntrinsic()->Op(1);
95+
}
96+
else if (node->IsVectorZero())
97+
{
98+
// Morph the vector of zeroes into mask of zeroes.
99+
GenTree* mask = gtNewSimdAllFalseMaskNode(parent->GetSimdSize());
100+
mask->SetMorphed(this);
101+
return mask;
102+
}
103+
104+
return nullptr;
105+
}
106+
107+
//-----------------------------------------------------------------------------------------------------
108+
// fgMorphTryUseAllMaskVariant: For NamedIntrinsics that have a variant where all operands are
109+
// mask nodes. If all operands to this node are 'suggesting' that they
110+
// originate closely from a mask, but are of vector types, then morph the
111+
// operands as appropriate to use mask types instead. 'Suggesting'
112+
// is defined by the canMorphVectorOperandToMask function.
113+
//
114+
// Arguments:
115+
// tree - The HWIntrinsic to try and optimize.
116+
//
117+
// Return Value:
118+
// The fully morphed tree if a change was made, else nullptr.
119+
//
120+
GenTree* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node)
121+
{
122+
if (node->HasAllMaskVariant() && canMorphAllVectorOperandsToMasks(node))
123+
{
124+
for (size_t i = 1; i <= node->GetOperandCount(); i++)
125+
{
126+
node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node);
127+
}
128+
129+
node->gtType = TYP_MASK;
130+
return node;
131+
}
132+
133+
if (node->OperIsHWIntrinsic(NI_Sve_ConditionalSelect))
134+
{
135+
GenTree* mask = node->Op(1);
136+
GenTree* left = node->Op(2);
137+
GenTree* right = node->Op(3);
138+
139+
if (left->OperIsHWIntrinsic())
140+
{
141+
assert(canMorphVectorOperandToMask(mask));
142+
143+
if (canMorphAllVectorOperandsToMasks(left->AsHWIntrinsic()))
144+
{
145+
// At this point we know the 'left' node is a HWINTRINSIC node and all of its operands look like
146+
// mask nodes.
147+
//
148+
// The ConditionalSelect could be substituted for the named intrinsic in it's 'left' operand and
149+
// transformed to a mask-type operation for some named intrinsics. Doing so will encourage codegen
150+
// to emit predicate variants of instructions rather than vector variants, and we can lose some
151+
// unnecessary mask->vector conversion nodes.
152+
GenTreeHWIntrinsic* actualOp = left->AsHWIntrinsic();
153+
154+
switch (actualOp->GetHWIntrinsicId())
155+
{
156+
// AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
157+
// BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
158+
// EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
159+
// ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
160+
case NI_Sve_And:
161+
case NI_Sve_BitwiseClear:
162+
case NI_Sve_Xor:
163+
case NI_Sve_Or:
164+
if (right->IsVectorZero())
165+
{
166+
// The operation is equivalent for all lane arrangements, because it is a bitwise operation.
167+
// It's safe to bash the type to 8-bit required to assemble the instruction.
168+
actualOp->SetSimdBaseJitType(CORINFO_TYPE_BYTE);
169+
170+
actualOp->ResetHWIntrinsicId(actualOp->GetHWIntrinsicId(), this,
171+
doMorphVectorOperandToMask(mask, actualOp),
172+
doMorphVectorOperandToMask(actualOp->Op(1), actualOp),
173+
doMorphVectorOperandToMask(actualOp->Op(2), actualOp));
174+
actualOp->gtType = TYP_MASK;
175+
return actualOp;
176+
}
177+
break;
178+
default:
179+
break;
180+
}
181+
}
182+
}
183+
184+
// If we got this far, then there was no match on any predicated operation.
185+
// ConditionalSelect itself can be a mask operation for 8-bit lane types, using
186+
// SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B
187+
if (canMorphAllVectorOperandsToMasks(node))
188+
{
189+
for (size_t i = 1; i <= node->GetOperandCount(); i++)
190+
{
191+
node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node);
192+
}
193+
194+
// Again this operation is bitwise, so the lane arrangement doesn't matter.
195+
// We can bash the type to 8-bit.
196+
node->SetSimdBaseJitType(CORINFO_TYPE_BYTE);
197+
198+
node->gtType = TYP_MASK;
199+
return node;
200+
}
201+
}
202+
203+
return nullptr;
204+
}
205+
206+
#endif // FEATURE_MASKED_HW_INTRINSICS

0 commit comments

Comments
 (0)