Skip to content

Commit 5415b73

Browse files
Additional cleanup and simplification of hwintrinsic instruction sets for xarch (#116406)
* Merge the instruction sets for SSE3, SSSE3, SSE4.1, and POPCNT into SSE4.2 * Merge the instruction sets for BMI1, BMI2, FMA, and LZCNT into AVX2 * Additional instruction set merging and cleanup * Grouping the xarch instructions based on the target instruction sets * Various additional cleanup to ensure tests are all passing * Adjust the NAOT smoke tests * Ensure the JIT/EE version guid is updated
1 parent 9a6b6c8 commit 5415b73

35 files changed

+1569
-2767
lines changed

src/coreclr/inc/corinfoinstructionset.h

Lines changed: 172 additions & 426 deletions
Large diffs are not rendered by default.

src/coreclr/inc/jiteeversionguid.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@
3737

3838
#include <minipal/guid.h>
3939

40-
constexpr GUID JITEEVersionIdentifier = { /* f22d9c39-8d24-4e4d-86aa-7b883aecf97f */
41-
0xf22d9c39,
42-
0x8d24,
43-
0x4e4d,
44-
{0x86, 0xaa, 0x7b, 0x88, 0x3a, 0xec, 0xf9, 0x7f}
40+
constexpr GUID JITEEVersionIdentifier = { /* 7a77e6d9-7280-439d-bb9d-9887b4516a86 */
41+
0x7a77e6d9,
42+
0x7280,
43+
0x439d,
44+
{0xbb, 0x9d, 0x98, 0x87, 0xb4, 0x51, 0x6a, 0x86}
4545
};
4646

4747
#endif // JIT_EE_VERSIONING_GUID_H

src/coreclr/jit/assertionprop.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -251,17 +251,17 @@ bool IntegralRange::Contains(int64_t value) const
251251
case NI_X86Base_CompareScalarUnorderedLessThan:
252252
case NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual:
253253
case NI_X86Base_CompareScalarUnorderedGreaterThan:
254-
case NI_SSE41_TestC:
255-
case NI_SSE41_TestZ:
256-
case NI_SSE41_TestNotZAndNotC:
254+
case NI_SSE42_TestC:
255+
case NI_SSE42_TestZ:
256+
case NI_SSE42_TestNotZAndNotC:
257257
case NI_AVX_TestC:
258258
case NI_AVX_TestZ:
259259
case NI_AVX_TestNotZAndNotC:
260260
return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::One};
261261

262262
case NI_X86Base_Extract:
263-
case NI_SSE41_Extract:
264-
case NI_SSE41_X64_Extract:
263+
case NI_SSE42_Extract:
264+
case NI_SSE42_X64_Extract:
265265
case NI_Vector128_ToScalar:
266266
case NI_Vector256_ToScalar:
267267
case NI_Vector512_ToScalar:
@@ -274,12 +274,12 @@ bool IntegralRange::Contains(int64_t value) const
274274
}
275275
break;
276276

277-
case NI_BMI1_TrailingZeroCount:
278-
case NI_BMI1_X64_TrailingZeroCount:
279-
case NI_LZCNT_LeadingZeroCount:
280-
case NI_LZCNT_X64_LeadingZeroCount:
281-
case NI_POPCNT_PopCount:
282-
case NI_POPCNT_X64_PopCount:
277+
case NI_AVX2_LeadingZeroCount:
278+
case NI_AVX2_TrailingZeroCount:
279+
case NI_AVX2_X64_LeadingZeroCount:
280+
case NI_AVX2_X64_TrailingZeroCount:
281+
case NI_SSE42_PopCount:
282+
case NI_SSE42_X64_PopCount:
283283
// Note: No advantage in using a precise range for IntegralRange.
284284
// Example: IntCns = 42 gives [0..127] with a non -precise range, [42,42] with a precise range.
285285
return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ByteMax};

src/coreclr/jit/codegen.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ class CodeGen final : public CodeGenInterface
5050
// Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
5151
void genSSE2BitwiseOp(GenTree* treeNode);
5252

53-
// Generates SSE41 code for the given tree as a round operation
54-
void genSSE41RoundOp(GenTreeOp* treeNode);
53+
// Generates SSE42 code for the given tree as a round operation
54+
void genSSE42RoundOp(GenTreeOp* treeNode);
5555

5656
instruction simdAlignedMovIns()
5757
{
@@ -937,14 +937,10 @@ class CodeGen final : public CodeGenInterface
937937

938938
void genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
939939
void genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
940-
void genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
941-
void genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
940+
void genSse42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
942941
void genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
943-
void genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
944-
void genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
942+
void genFmaIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
945943
void genPermuteVar2x(GenTreeHWIntrinsic* node, insOpts instOptions);
946-
void genLZCNTIntrinsic(GenTreeHWIntrinsic* node);
947-
void genPOPCNTIntrinsic(GenTreeHWIntrinsic* node);
948944
void genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins);
949945
void genX86SerializeIntrinsic(GenTreeHWIntrinsic* node);
950946

src/coreclr/jit/codegenxarch.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc
14101410
inst_Mov(TYP_INT, reg0, opReg, /* canSkip */ false);
14111411

14121412
// reg1 = opRef[61:32]
1413-
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
1413+
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
14141414
{
14151415
inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1, INS_OPTS_NONE);
14161416
}
@@ -2427,7 +2427,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
24272427

24282428
inst_Mov(TYP_FLOAT, targetReg, reg0, /* canSkip */ false);
24292429
const emitAttr size = emitTypeSize(TYP_SIMD8);
2430-
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
2430+
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
24312431
{
24322432
GetEmitter()->emitIns_SIMD_R_R_R_I(INS_pinsrd, size, targetReg, targetReg, reg1, 1, INS_OPTS_NONE);
24332433
}
@@ -4855,7 +4855,7 @@ void CodeGen::genCodeForShift(GenTree* tree)
48554855
// Only the non-RMW case here.
48564856
assert(tree->OperIsShiftOrRotate());
48574857
assert(tree->GetRegNum() != REG_NA);
4858-
assert(tree->AsOp()->gtOp1->isUsedFromReg() || compiler->compIsaSupportedDebugOnly(InstructionSet_BMI2));
4858+
assert(tree->AsOp()->gtOp1->isUsedFromReg() || compiler->compIsaSupportedDebugOnly(InstructionSet_AVX2));
48594859

48604860
genConsumeOperands(tree->AsOp());
48614861

@@ -4902,7 +4902,7 @@ void CodeGen::genCodeForShift(GenTree* tree)
49024902
{
49034903
int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
49044904

4905-
if (tree->OperIsRotate() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) &&
4905+
if (tree->OperIsRotate() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2) &&
49064906
!tree->gtSetFlags())
49074907
{
49084908
// If we have a contained source operand, we must emit rorx.
@@ -4930,7 +4930,7 @@ void CodeGen::genCodeForShift(GenTree* tree)
49304930
return;
49314931
}
49324932
}
4933-
else if (tree->OperIsShift() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) &&
4933+
else if (tree->OperIsShift() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2) &&
49344934
!tree->gtSetFlags())
49354935
{
49364936
// Emit shlx, sarx, shrx if BMI2 is available instead of mov+shl, mov+sar, mov+shr.
@@ -5758,8 +5758,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
57585758
}
57595759

57605760
case NI_X86Base_Extract:
5761-
case NI_SSE41_Extract:
5762-
case NI_SSE41_X64_Extract:
5761+
case NI_SSE42_Extract:
5762+
case NI_SSE42_X64_Extract:
57635763
case NI_AVX_ExtractVector128:
57645764
case NI_AVX2_ExtractVector128:
57655765
case NI_AVX512_ExtractVector128:
@@ -5771,8 +5771,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
57715771

57725772
if (intrinsicId == NI_X86Base_Extract)
57735773
{
5774-
// The encoding that supports containment is SSE4.1 only
5775-
ins = INS_pextrw_sse41;
5774+
ins = INS_pextrw_sse42;
57765775
}
57775776

57785777
// The hardware intrinsics take unsigned bytes between [0, 255].
@@ -7742,7 +7741,7 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
77427741
}
77437742

77447743
//-----------------------------------------------------------------------------------------
7745-
// genSSE41RoundOp - generate SSE41 code for the given tree as a round operation
7744+
// genSSE42RoundOp - generate SSE42 code for the given tree as a round operation
77467745
//
77477746
// Arguments:
77487747
// treeNode - tree node
@@ -7751,16 +7750,16 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
77517750
// None
77527751
//
77537752
// Assumptions:
7754-
// i) SSE4.1 is supported by the underlying hardware
7753+
// i) SSE4.2 is supported by the underlying hardware
77557754
// ii) treeNode oper is a GT_INTRINSIC
77567755
// iii) treeNode type is a floating point type
77577756
// iv) treeNode is not used from memory
77587757
// v) tree oper is NI_System_Math{F}_Round, _Ceiling, _Floor, or _Truncate
77597758
// vi) caller of this routine needs to call genProduceReg()
7760-
void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode)
7759+
void CodeGen::genSSE42RoundOp(GenTreeOp* treeNode)
77617760
{
7762-
// i) SSE4.1 is supported by the underlying hardware
7763-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE41));
7761+
// i) SSE4.2 is supported by the underlying hardware
7762+
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE42));
77647763

77657764
// ii) treeNode oper is a GT_INTRINSIC
77667765
assert(treeNode->OperIs(GT_INTRINSIC));
@@ -7804,7 +7803,7 @@ void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode)
78047803

78057804
default:
78067805
ins = INS_invalid;
7807-
assert(!"genSSE41RoundOp: unsupported intrinsic");
7806+
assert(!"genSSE42RoundOp: unsupported intrinsic");
78087807
unreached();
78097808
}
78107809

@@ -7834,7 +7833,7 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
78347833
case NI_System_Math_Floor:
78357834
case NI_System_Math_Truncate:
78367835
case NI_System_Math_Round:
7837-
genSSE41RoundOp(treeNode->AsOp());
7836+
genSSE42RoundOp(treeNode->AsOp());
78387837
break;
78397838

78407839
case NI_System_Math_Sqrt:
@@ -9576,7 +9575,7 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP()
95769575
theEmitter->emitIns_R_R(INS_ccmpe, EA_1BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf);
95779576

95789577
// Test all CC codes
9579-
for (uint32_t ins = INS_FIRST_CCMP_INSTRUCTION + 1; ins < INS_LAST_CCMP_INSTRUCTION; ins++)
9578+
for (uint32_t ins = FIRST_CCMP_INSTRUCTION; ins <= LAST_CCMP_INSTRUCTION; ins++)
95809579
{
95819580
theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf);
95829581
}
@@ -9598,7 +9597,7 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP()
95989597
theEmitter->emitIns_R_S(INS_ccmpe, EA_1BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf);
95999598

96009599
// Test all CC codes
9601-
for (uint32_t ins = INS_FIRST_CCMP_INSTRUCTION + 1; ins < INS_LAST_CCMP_INSTRUCTION; ins++)
9600+
for (uint32_t ins = FIRST_CCMP_INSTRUCTION; ins <= LAST_CCMP_INSTRUCTION; ins++)
96029601
{
96039602
theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf);
96049603
}

src/coreclr/jit/compiler.cpp

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6042,11 +6042,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
60426042

60436043
if (JitConfig.EnableSSE42() != 0)
60446044
{
6045-
instructionSetFlags.AddInstructionSet(InstructionSet_SSE3);
6046-
instructionSetFlags.AddInstructionSet(InstructionSet_SSSE3);
6047-
instructionSetFlags.AddInstructionSet(InstructionSet_SSE41);
60486045
instructionSetFlags.AddInstructionSet(InstructionSet_SSE42);
6049-
instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT);
60506046
}
60516047

60526048
if (JitConfig.EnableAVX() != 0)
@@ -6057,11 +6053,6 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
60576053
if (JitConfig.EnableAVX2() != 0)
60586054
{
60596055
instructionSetFlags.AddInstructionSet(InstructionSet_AVX2);
6060-
instructionSetFlags.AddInstructionSet(InstructionSet_BMI1);
6061-
instructionSetFlags.AddInstructionSet(InstructionSet_BMI2);
6062-
instructionSetFlags.AddInstructionSet(InstructionSet_FMA);
6063-
instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT);
6064-
instructionSetFlags.AddInstructionSet(InstructionSet_MOVBE);
60656056
}
60666057

60676058
if (JitConfig.EnableAVX512() != 0)
@@ -6071,7 +6062,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
60716062

60726063
if (JitConfig.EnableAVX512v2() != 0)
60736064
{
6074-
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI);
6065+
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512v2);
60756066
}
60766067

60776068
if (JitConfig.EnableAVX512v3() != 0)
@@ -6097,7 +6088,12 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
60976088
if (JitConfig.EnableAES() != 0)
60986089
{
60996090
instructionSetFlags.AddInstructionSet(InstructionSet_AES);
6100-
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ);
6091+
6092+
if (JitConfig.EnableVAES() != 0)
6093+
{
6094+
instructionSetFlags.AddInstructionSet(InstructionSet_AES_V256);
6095+
instructionSetFlags.AddInstructionSet(InstructionSet_AES_V512);
6096+
}
61016097
}
61026098

61036099
if (JitConfig.EnableAVX512VP2INTERSECT() != 0)
@@ -6127,14 +6123,6 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
61276123
instructionSetFlags.AddInstructionSet(InstructionSet_SHA);
61286124
}
61296125

6130-
if (JitConfig.EnableVAES() != 0)
6131-
{
6132-
instructionSetFlags.AddInstructionSet(InstructionSet_AES_V256);
6133-
instructionSetFlags.AddInstructionSet(InstructionSet_AES_V512);
6134-
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V256);
6135-
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V512);
6136-
}
6137-
61386126
if (JitConfig.EnableWAITPKG() != 0)
61396127
{
61406128
instructionSetFlags.AddInstructionSet(InstructionSet_WAITPKG);

src/coreclr/jit/decomposelongs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1961,7 +1961,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn
19611961
Range().InsertAfter(loResult, simdTmpVar);
19621962

19631963
GenTree* hiResult;
1964-
if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
1964+
if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
19651965
{
19661966
GenTree* one = m_compiler->gtNewIconNode(1);
19671967
hiResult = m_compiler->gtNewSimdGetElementNode(TYP_INT, simdTmpVar, one, CORINFO_TYPE_INT, simdSize);

src/coreclr/jit/emit.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8265,7 +8265,7 @@ void emitter::emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, reg
82658265

82668266
if ((dataSize == 16) && (constValue->u64[1] == constValue->u64[0]))
82678267
{
8268-
if (((cnsSize == 16) && emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) ||
8268+
if (((cnsSize == 16) && emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE42)) ||
82698269
emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX))
82708270
{
82718271
dataSize = 8;

0 commit comments

Comments
 (0)