Skip to content

Commit 4eb60ff

Browse files
committed
Ensure max is handled for simdSize 32 and 64
1 parent f2d2b9b commit 4eb60ff

File tree

1 file changed

+173
-147
lines changed

1 file changed

+173
-147
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 173 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -24303,190 +24303,216 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type,
2430324303

2430424304
retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
2430524305
}
24306-
else if (!isMagnitude && (cnsNode != nullptr))
24306+
else if ((cnsNode != nullptr) && !otherNode->OperIsConst())
2430724307
{
24308-
bool needsFixup = false;
24309-
bool canHandle = false;
24308+
bool isNaN = false;
2431024309

24311-
if (isMax)
24310+
if (isScalar)
2431224311
{
24313-
// xarch max return op2 if both inputs are 0 of either sign
24314-
// we require +0 to be greater than -0 we also require NaN to
24315-
// not be propagated for isNumber and to be propagated otherwise.
24316-
//
24317-
// This means for isNumber we want to do `max other, cns` and
24318-
// can only handle cns being -0 if Avx512F is supported. This is
24319-
// because if other was NaN, we want to return the non-NaN cns.
24320-
// But if cns was -0 and other was +0 we'd want to return +0 and
24321-
// so need to be able to fixup the result.
24322-
//
24323-
// For !isNumber we have the inverse and want `max cns, other` and
24324-
// can only handle cns being +0 if Avx512F is supported. This is
24325-
// because if other was NaN, we want to return other and if cns
24326-
// was +0 and other was -0 we'd want to return +0 and so need
24327-
// so need to be able to fixup the result.
24312+
isNaN = cnsNode->IsFloatNaN();
24313+
}
24314+
else
24315+
{
24316+
isNaN = cnsNode->IsVectorNaN(simdBaseType);
24317+
}
2432824318

24319+
if (isNaN)
24320+
{
2432924321
if (isNumber)
2433024322
{
24331-
if (isScalar)
24332-
{
24333-
needsFixup = cnsNode->IsFloatNegativeZero();
24334-
}
24335-
else
24336-
{
24337-
needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
24338-
}
24339-
}
24340-
else if (isScalar)
24341-
{
24342-
needsFixup = cnsNode->IsFloatPositiveZero();
24323+
return otherNode;
2434324324
}
2434424325
else
2434524326
{
24346-
needsFixup = cnsNode->IsVectorZero();
24347-
}
24348-
24349-
if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24350-
{
24351-
// Given the checks, op1 can safely be the cns and op2 the other node
24352-
24353-
intrinsic = isScalar ? NI_X86Base_MaxScalar : NI_X86Base_Max;
24354-
24355-
op1 = cnsNode;
24356-
op2 = otherNode;
24357-
24358-
canHandle = true;
24327+
return cnsNode;
2435924328
}
2436024329
}
24361-
else
24330+
24331+
if (!isMagnitude)
2436224332
{
24363-
// xarch min return op2 if both inputs are 0 of either sign
24364-
// we require -0 to be lesser than +0, we also require NaN to
24365-
// not be propagated for isNumber and to be propagated otherwise.
24366-
//
24367-
// This means for isNumber we want to do `min other, cns` and
24368-
// can only handle cns being +0 if Avx512F is supported. This is
24369-
// because if other was NaN, we want to return the non-NaN cns.
24370-
// But if cns was +0 and other was -0 we'd want to return -0 and
24371-
// so need to be able to fixup the result.
24372-
//
24373-
// For !isNumber we have the inverse and want `min cns, other` and
24374-
// can only handle cns being -0 if Avx512F is supported. This is
24375-
// because if other was NaN, we want to return other and if cns
24376-
// was -0 and other was +0 we'd want to return -0 and so need
24377-
// so need to be able to fixup the result.
24333+
bool needsFixup = false;
24334+
bool canHandle = false;
2437824335

24379-
if (isNumber)
24336+
if (isMax)
2438024337
{
24381-
if (isScalar)
24338+
// xarch max return op2 if both inputs are 0 of either sign
24339+
// we require +0 to be greater than -0 we also require NaN to
24340+
// not be propagated for isNumber and to be propagated otherwise.
24341+
//
24342+
// This means for isNumber we want to do `max other, cns` and
24343+
// can only handle cns being -0 if Avx512F is supported. This is
24344+
// because if other was NaN, we want to return the non-NaN cns.
24345+
// But if cns was -0 and other was +0 we'd want to return +0 and
24346+
// so need to be able to fixup the result.
24347+
//
24348+
// For !isNumber we have the inverse and want `max cns, other` and
24349+
// can only handle cns being +0 if Avx512F is supported. This is
24350+
// because if other was NaN, we want to return other and if cns
24351+
// was +0 and other was -0 we'd want to return +0 and so need
24352+
// so need to be able to fixup the result.
24353+
24354+
if (isNumber)
24355+
{
24356+
if (isScalar)
24357+
{
24358+
needsFixup = cnsNode->IsFloatNegativeZero();
24359+
}
24360+
else
24361+
{
24362+
needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
24363+
}
24364+
}
24365+
else if (isScalar)
2438224366
{
2438324367
needsFixup = cnsNode->IsFloatPositiveZero();
2438424368
}
2438524369
else
2438624370
{
2438724371
needsFixup = cnsNode->IsVectorZero();
2438824372
}
24389-
}
24390-
else if (isScalar)
24391-
{
24392-
needsFixup = cnsNode->IsFloatNegativeZero();
24393-
}
24394-
else
24395-
{
24396-
needsFixup = cnsNode->IsVectorZero();
24397-
}
24398-
{
24399-
needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
24400-
}
24401-
24402-
if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24403-
{
24404-
// Given the checks, op1 can safely be the cns and op2 the other node
24405-
24406-
intrinsic = isScalar ? NI_X86Base_MinScalar : NI_X86Base_Min;
2440724373

24408-
op1 = cnsNode;
24409-
op2 = otherNode;
24374+
if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24375+
{
24376+
// Given the checks, op1 can safely be the cns and op2 the other node
2441024377

24411-
canHandle = true;
24412-
}
24413-
}
24378+
intrinsic = isScalar ? NI_X86Base_MaxScalar : NI_X86Base_Max;
2441424379

24415-
if (canHandle)
24416-
{
24417-
assert(op1->OperIsConst() && !op2->OperIsConst());
24418-
24419-
if (isScalar)
24420-
{
24421-
GenTreeVecCon* vecCon = gtNewVconNode(type);
24422-
vecCon->EvaluateBroadcastInPlace(simdBaseType, cnsNode->AsDblCon()->DconValue());
24380+
op1 = cnsNode;
24381+
op2 = otherNode;
2442324382

24424-
op1 = vecCon;
24425-
op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseJitType, simdSize);
24383+
canHandle = true;
24384+
}
2442624385
}
24427-
24428-
retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
24429-
24430-
if (needsFixup)
24386+
else
2443124387
{
24432-
GenTree* op2Clone = fgMakeMultiUse(&op2);
24433-
retNode->AsHWIntrinsic()->Op(2) = op2;
24434-
24435-
GenTreeVecCon* tblVecCon = gtNewVconNode(type);
24436-
24437-
// FixupScalar(left, right, table, control) computes the input type of right
24438-
// adjusts it based on the table and then returns
24388+
// xarch min return op2 if both inputs are 0 of either sign
24389+
// we require -0 to be lesser than +0, we also require NaN to
24390+
// not be propagated for isNumber and to be propagated otherwise.
2443924391
//
24440-
// In our case, left is going to be the result of the RangeScalar operation
24441-
// and right is going to be op1 or op2. In the case op1/op2 is QNaN or SNaN
24442-
// we want to preserve it instead. Otherwise we want to preserve the original
24443-
// result computed by RangeScalar.
24392+
// This means for isNumber we want to do `min other, cns` and
24393+
// can only handle cns being +0 if Avx512F is supported. This is
24394+
// because if other was NaN, we want to return the non-NaN cns.
24395+
// But if cns was +0 and other was -0 we'd want to return -0 and
24396+
// so need to be able to fixup the result.
2444424397
//
24445-
// If both inputs are NaN, then we'll end up taking op1 by virtue of it being
24446-
// the latter fixup.
24398+
// For !isNumber we have the inverse and want `min cns, other` and
24399+
// can only handle cns being -0 if Avx512F is supported. This is
24400+
// because if other was NaN, we want to return other and if cns
24401+
// was -0 and other was +0 we'd want to return -0 and so need
24402+
// so need to be able to fixup the result.
2444724403

24448-
if (isMax)
24404+
if (isNumber)
2444924405
{
24450-
// QNAN: 0b0000: Preserve left
24451-
// SNAN: 0b0000
24452-
// ZERO: 0b1000: +0
24453-
// +ONE: 0b0000
24454-
// -INF: 0b0000
24455-
// +INF: 0b0000
24456-
// -VAL: 0b0000
24457-
// +VAL: 0b0000
24458-
24459-
const int64_t tblValue = 0x00000800;
24460-
tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24406+
if (isScalar)
24407+
{
24408+
needsFixup = cnsNode->IsFloatPositiveZero();
24409+
}
24410+
else
24411+
{
24412+
needsFixup = cnsNode->IsVectorZero();
24413+
}
24414+
}
24415+
else if (isScalar)
24416+
{
24417+
needsFixup = cnsNode->IsFloatNegativeZero();
2446124418
}
2446224419
else
2446324420
{
24464-
// QNAN: 0b0000: Preserve left
24465-
// SNAN: 0b0000
24466-
// ZERO: 0b0111: -0
24467-
// +ONE: 0b0000
24468-
// -INF: 0b0000
24469-
// +INF: 0b0000
24470-
// -VAL: 0b0000
24471-
// +VAL: 0b0000
24472-
24473-
const int64_t tblValue = 0x00000700;
24474-
tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24421+
needsFixup = cnsNode->IsVectorZero();
24422+
}
24423+
{
24424+
needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
2447524425
}
2447624426

24477-
intrinsic = isScalar ? NI_AVX512_FixupScalar : NI_AVX512_Fixup;
24427+
if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24428+
{
24429+
// Given the checks, op1 can safely be the cns and op2 the other node
2447824430

24479-
retNode = gtNewSimdHWIntrinsicNode(type, retNode, op2Clone, tblVecCon, gtNewIconNode(0), intrinsic,
24480-
simdBaseJitType, simdSize);
24431+
intrinsic = isScalar ? NI_X86Base_MinScalar : NI_X86Base_Min;
24432+
24433+
op1 = cnsNode;
24434+
op2 = otherNode;
24435+
24436+
canHandle = true;
24437+
}
2448124438
}
2448224439

24483-
if (isNumber)
24440+
if (canHandle)
2448424441
{
24485-
// Swap the operands so that the cnsNode is op1, this prevents
24486-
// the unknown value (which could be NaN) from being selected.
24442+
assert(op1->OperIsConst() && !op2->OperIsConst());
24443+
24444+
if (isScalar)
24445+
{
24446+
GenTreeVecCon* vecCon = gtNewVconNode(type);
24447+
vecCon->EvaluateBroadcastInPlace(simdBaseType, cnsNode->AsDblCon()->DconValue());
24448+
24449+
op1 = vecCon;
24450+
op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseJitType, simdSize);
24451+
}
24452+
24453+
retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
24454+
24455+
if (needsFixup)
24456+
{
24457+
GenTree* op2Clone = fgMakeMultiUse(&op2);
24458+
retNode->AsHWIntrinsic()->Op(2) = op2;
24459+
24460+
GenTreeVecCon* tblVecCon = gtNewVconNode(type);
24461+
24462+
// FixupScalar(left, right, table, control) computes the input type of right
24463+
// adjusts it based on the table and then returns
24464+
//
24465+
// In our case, left is going to be the result of the RangeScalar operation
24466+
// and right is going to be op1 or op2. In the case op1/op2 is QNaN or SNaN
24467+
// we want to preserve it instead. Otherwise we want to preserve the original
24468+
// result computed by RangeScalar.
24469+
//
24470+
// If both inputs are NaN, then we'll end up taking op1 by virtue of it being
24471+
// the latter fixup.
24472+
24473+
if (isMax)
24474+
{
24475+
// QNAN: 0b0000: Preserve left
24476+
// SNAN: 0b0000
24477+
// ZERO: 0b1000: +0
24478+
// +ONE: 0b0000
24479+
// -INF: 0b0000
24480+
// +INF: 0b0000
24481+
// -VAL: 0b0000
24482+
// +VAL: 0b0000
24483+
24484+
const int64_t tblValue = 0x00000800;
24485+
tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24486+
}
24487+
else
24488+
{
24489+
// QNAN: 0b0000: Preserve left
24490+
// SNAN: 0b0000
24491+
// ZERO: 0b0111: -0
24492+
// +ONE: 0b0000
24493+
// -INF: 0b0000
24494+
// +INF: 0b0000
24495+
// -VAL: 0b0000
24496+
// +VAL: 0b0000
24497+
24498+
const int64_t tblValue = 0x00000700;
24499+
tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24500+
}
24501+
24502+
intrinsic = isScalar ? NI_AVX512_FixupScalar : NI_AVX512_Fixup;
2448724503

24488-
retNode->AsHWIntrinsic()->Op(1) = op2;
24489-
retNode->AsHWIntrinsic()->Op(2) = op1;
24504+
retNode = gtNewSimdHWIntrinsicNode(type, retNode, op2Clone, tblVecCon, gtNewIconNode(0), intrinsic,
24505+
simdBaseJitType, simdSize);
24506+
}
24507+
24508+
if (isNumber)
24509+
{
24510+
// Swap the operands so that the cnsNode is op1, this prevents
24511+
// the unknown value (which could be NaN) from being selected.
24512+
24513+
retNode->AsHWIntrinsic()->Op(1) = op2;
24514+
retNode->AsHWIntrinsic()->Op(2) = op1;
24515+
}
2449024516
}
2449124517
}
2449224518
}
@@ -24783,26 +24809,26 @@ GenTree* Compiler::gtNewSimdMinMaxNativeNode(
2478324809

2478424810
if (varTypeIsFloating(simdBaseType))
2478524811
{
24786-
intrinsic = NI_AVX_Min;
24812+
intrinsic = isMax ? NI_AVX_Max : NI_AVX_Min;
2478724813
}
2478824814
else
2478924815
{
2479024816
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
2479124817

2479224818
if (!varTypeIsLong(simdBaseType))
2479324819
{
24794-
intrinsic = NI_AVX2_Min;
24820+
intrinsic = isMax ? NI_AVX2_Max : NI_AVX2_Min;
2479524821
}
2479624822
else if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
2479724823
{
24798-
intrinsic = NI_AVX512_Min;
24824+
intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min;
2479924825
}
2480024826
}
2480124827
}
2480224828
else if (simdSize == 64)
2480324829
{
2480424830
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512));
24805-
intrinsic = NI_AVX512_Min;
24831+
intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min;
2480624832
}
2480724833
else
2480824834
{

0 commit comments

Comments
 (0)