@@ -24303,190 +24303,216 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type,
24303
24303
24304
24304
retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
24305
24305
}
24306
- else if (!isMagnitude && (cnsNode != nullptr))
24306
+ else if ((cnsNode != nullptr) && !otherNode->OperIsConst( ))
24307
24307
{
24308
- bool needsFixup = false;
24309
- bool canHandle = false;
24308
+ bool isNaN = false;
24310
24309
24311
- if (isMax )
24310
+ if (isScalar )
24312
24311
{
24313
- // xarch max return op2 if both inputs are 0 of either sign
24314
- // we require +0 to be greater than -0 we also require NaN to
24315
- // not be propagated for isNumber and to be propagated otherwise.
24316
- //
24317
- // This means for isNumber we want to do `max other, cns` and
24318
- // can only handle cns being -0 if Avx512F is supported. This is
24319
- // because if other was NaN, we want to return the non-NaN cns.
24320
- // But if cns was -0 and other was +0 we'd want to return +0 and
24321
- // so need to be able to fixup the result.
24322
- //
24323
- // For !isNumber we have the inverse and want `max cns, other` and
24324
- // can only handle cns being +0 if Avx512F is supported. This is
24325
- // because if other was NaN, we want to return other and if cns
24326
- // was +0 and other was -0 we'd want to return +0 and so need
24327
- // so need to be able to fixup the result.
24312
+ isNaN = cnsNode->IsFloatNaN();
24313
+ }
24314
+ else
24315
+ {
24316
+ isNaN = cnsNode->IsVectorNaN(simdBaseType);
24317
+ }
24328
24318
24319
+ if (isNaN)
24320
+ {
24329
24321
if (isNumber)
24330
24322
{
24331
- if (isScalar)
24332
- {
24333
- needsFixup = cnsNode->IsFloatNegativeZero();
24334
- }
24335
- else
24336
- {
24337
- needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
24338
- }
24339
- }
24340
- else if (isScalar)
24341
- {
24342
- needsFixup = cnsNode->IsFloatPositiveZero();
24323
+ return otherNode;
24343
24324
}
24344
24325
else
24345
24326
{
24346
- needsFixup = cnsNode->IsVectorZero();
24347
- }
24348
-
24349
- if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24350
- {
24351
- // Given the checks, op1 can safely be the cns and op2 the other node
24352
-
24353
- intrinsic = isScalar ? NI_X86Base_MaxScalar : NI_X86Base_Max;
24354
-
24355
- op1 = cnsNode;
24356
- op2 = otherNode;
24357
-
24358
- canHandle = true;
24327
+ return cnsNode;
24359
24328
}
24360
24329
}
24361
- else
24330
+
24331
+ if (!isMagnitude)
24362
24332
{
24363
- // xarch min return op2 if both inputs are 0 of either sign
24364
- // we require -0 to be lesser than +0, we also require NaN to
24365
- // not be propagated for isNumber and to be propagated otherwise.
24366
- //
24367
- // This means for isNumber we want to do `min other, cns` and
24368
- // can only handle cns being +0 if Avx512F is supported. This is
24369
- // because if other was NaN, we want to return the non-NaN cns.
24370
- // But if cns was +0 and other was -0 we'd want to return -0 and
24371
- // so need to be able to fixup the result.
24372
- //
24373
- // For !isNumber we have the inverse and want `min cns, other` and
24374
- // can only handle cns being -0 if Avx512F is supported. This is
24375
- // because if other was NaN, we want to return other and if cns
24376
- // was -0 and other was +0 we'd want to return -0 and so need
24377
- // so need to be able to fixup the result.
24333
+ bool needsFixup = false;
24334
+ bool canHandle = false;
24378
24335
24379
- if (isNumber )
24336
+ if (isMax )
24380
24337
{
24381
- if (isScalar)
24338
+ // xarch max return op2 if both inputs are 0 of either sign
24339
+ // we require +0 to be greater than -0 we also require NaN to
24340
+ // not be propagated for isNumber and to be propagated otherwise.
24341
+ //
24342
+ // This means for isNumber we want to do `max other, cns` and
24343
+ // can only handle cns being -0 if Avx512F is supported. This is
24344
+ // because if other was NaN, we want to return the non-NaN cns.
24345
+ // But if cns was -0 and other was +0 we'd want to return +0 and
24346
+ // so need to be able to fixup the result.
24347
+ //
24348
+ // For !isNumber we have the inverse and want `max cns, other` and
24349
+ // can only handle cns being +0 if Avx512F is supported. This is
24350
+ // because if other was NaN, we want to return other and if cns
24351
+ // was +0 and other was -0 we'd want to return +0 and so need
24352
+ // so need to be able to fixup the result.
24353
+
24354
+ if (isNumber)
24355
+ {
24356
+ if (isScalar)
24357
+ {
24358
+ needsFixup = cnsNode->IsFloatNegativeZero();
24359
+ }
24360
+ else
24361
+ {
24362
+ needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
24363
+ }
24364
+ }
24365
+ else if (isScalar)
24382
24366
{
24383
24367
needsFixup = cnsNode->IsFloatPositiveZero();
24384
24368
}
24385
24369
else
24386
24370
{
24387
24371
needsFixup = cnsNode->IsVectorZero();
24388
24372
}
24389
- }
24390
- else if (isScalar)
24391
- {
24392
- needsFixup = cnsNode->IsFloatNegativeZero();
24393
- }
24394
- else
24395
- {
24396
- needsFixup = cnsNode->IsVectorZero();
24397
- }
24398
- {
24399
- needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
24400
- }
24401
-
24402
- if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24403
- {
24404
- // Given the checks, op1 can safely be the cns and op2 the other node
24405
-
24406
- intrinsic = isScalar ? NI_X86Base_MinScalar : NI_X86Base_Min;
24407
24373
24408
- op1 = cnsNode;
24409
- op2 = otherNode;
24374
+ if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24375
+ {
24376
+ // Given the checks, op1 can safely be the cns and op2 the other node
24410
24377
24411
- canHandle = true;
24412
- }
24413
- }
24378
+ intrinsic = isScalar ? NI_X86Base_MaxScalar : NI_X86Base_Max;
24414
24379
24415
- if (canHandle)
24416
- {
24417
- assert(op1->OperIsConst() && !op2->OperIsConst());
24418
-
24419
- if (isScalar)
24420
- {
24421
- GenTreeVecCon* vecCon = gtNewVconNode(type);
24422
- vecCon->EvaluateBroadcastInPlace(simdBaseType, cnsNode->AsDblCon()->DconValue());
24380
+ op1 = cnsNode;
24381
+ op2 = otherNode;
24423
24382
24424
- op1 = vecCon ;
24425
- op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseJitType, simdSize);
24383
+ canHandle = true ;
24384
+ }
24426
24385
}
24427
-
24428
- retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
24429
-
24430
- if (needsFixup)
24386
+ else
24431
24387
{
24432
- GenTree* op2Clone = fgMakeMultiUse(&op2);
24433
- retNode->AsHWIntrinsic()->Op(2) = op2;
24434
-
24435
- GenTreeVecCon* tblVecCon = gtNewVconNode(type);
24436
-
24437
- // FixupScalar(left, right, table, control) computes the input type of right
24438
- // adjusts it based on the table and then returns
24388
+ // xarch min return op2 if both inputs are 0 of either sign
24389
+ // we require -0 to be lesser than +0, we also require NaN to
24390
+ // not be propagated for isNumber and to be propagated otherwise.
24439
24391
//
24440
- // In our case, left is going to be the result of the RangeScalar operation
24441
- // and right is going to be op1 or op2. In the case op1/op2 is QNaN or SNaN
24442
- // we want to preserve it instead. Otherwise we want to preserve the original
24443
- // result computed by RangeScalar.
24392
+ // This means for isNumber we want to do `min other, cns` and
24393
+ // can only handle cns being +0 if Avx512F is supported. This is
24394
+ // because if other was NaN, we want to return the non-NaN cns.
24395
+ // But if cns was +0 and other was -0 we'd want to return -0 and
24396
+ // so need to be able to fixup the result.
24444
24397
//
24445
- // If both inputs are NaN, then we'll end up taking op1 by virtue of it being
24446
- // the latter fixup.
24398
+ // For !isNumber we have the inverse and want `min cns, other` and
24399
+ // can only handle cns being -0 if Avx512F is supported. This is
24400
+ // because if other was NaN, we want to return other and if cns
24401
+ // was -0 and other was +0 we'd want to return -0 and so need
24402
+ // so need to be able to fixup the result.
24447
24403
24448
- if (isMax )
24404
+ if (isNumber )
24449
24405
{
24450
- // QNAN: 0b0000: Preserve left
24451
- // SNAN: 0b0000
24452
- // ZERO: 0b1000: +0
24453
- // +ONE: 0b0000
24454
- // -INF: 0b0000
24455
- // +INF: 0b0000
24456
- // -VAL: 0b0000
24457
- // +VAL: 0b0000
24458
-
24459
- const int64_t tblValue = 0x00000800;
24460
- tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24406
+ if (isScalar)
24407
+ {
24408
+ needsFixup = cnsNode->IsFloatPositiveZero();
24409
+ }
24410
+ else
24411
+ {
24412
+ needsFixup = cnsNode->IsVectorZero();
24413
+ }
24414
+ }
24415
+ else if (isScalar)
24416
+ {
24417
+ needsFixup = cnsNode->IsFloatNegativeZero();
24461
24418
}
24462
24419
else
24463
24420
{
24464
- // QNAN: 0b0000: Preserve left
24465
- // SNAN: 0b0000
24466
- // ZERO: 0b0111: -0
24467
- // +ONE: 0b0000
24468
- // -INF: 0b0000
24469
- // +INF: 0b0000
24470
- // -VAL: 0b0000
24471
- // +VAL: 0b0000
24472
-
24473
- const int64_t tblValue = 0x00000700;
24474
- tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24421
+ needsFixup = cnsNode->IsVectorZero();
24422
+ }
24423
+ {
24424
+ needsFixup = cnsNode->IsVectorNegativeZero(simdBaseType);
24475
24425
}
24476
24426
24477
- intrinsic = isScalar ? NI_AVX512_FixupScalar : NI_AVX512_Fixup;
24427
+ if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512))
24428
+ {
24429
+ // Given the checks, op1 can safely be the cns and op2 the other node
24478
24430
24479
- retNode = gtNewSimdHWIntrinsicNode(type, retNode, op2Clone, tblVecCon, gtNewIconNode(0), intrinsic,
24480
- simdBaseJitType, simdSize);
24431
+ intrinsic = isScalar ? NI_X86Base_MinScalar : NI_X86Base_Min;
24432
+
24433
+ op1 = cnsNode;
24434
+ op2 = otherNode;
24435
+
24436
+ canHandle = true;
24437
+ }
24481
24438
}
24482
24439
24483
- if (isNumber )
24440
+ if (canHandle )
24484
24441
{
24485
- // Swap the operands so that the cnsNode is op1, this prevents
24486
- // the unknown value (which could be NaN) from being selected.
24442
+ assert(op1->OperIsConst() && !op2->OperIsConst());
24443
+
24444
+ if (isScalar)
24445
+ {
24446
+ GenTreeVecCon* vecCon = gtNewVconNode(type);
24447
+ vecCon->EvaluateBroadcastInPlace(simdBaseType, cnsNode->AsDblCon()->DconValue());
24448
+
24449
+ op1 = vecCon;
24450
+ op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseJitType, simdSize);
24451
+ }
24452
+
24453
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
24454
+
24455
+ if (needsFixup)
24456
+ {
24457
+ GenTree* op2Clone = fgMakeMultiUse(&op2);
24458
+ retNode->AsHWIntrinsic()->Op(2) = op2;
24459
+
24460
+ GenTreeVecCon* tblVecCon = gtNewVconNode(type);
24461
+
24462
+ // FixupScalar(left, right, table, control) computes the input type of right
24463
+ // adjusts it based on the table and then returns
24464
+ //
24465
+ // In our case, left is going to be the result of the RangeScalar operation
24466
+ // and right is going to be op1 or op2. In the case op1/op2 is QNaN or SNaN
24467
+ // we want to preserve it instead. Otherwise we want to preserve the original
24468
+ // result computed by RangeScalar.
24469
+ //
24470
+ // If both inputs are NaN, then we'll end up taking op1 by virtue of it being
24471
+ // the latter fixup.
24472
+
24473
+ if (isMax)
24474
+ {
24475
+ // QNAN: 0b0000: Preserve left
24476
+ // SNAN: 0b0000
24477
+ // ZERO: 0b1000: +0
24478
+ // +ONE: 0b0000
24479
+ // -INF: 0b0000
24480
+ // +INF: 0b0000
24481
+ // -VAL: 0b0000
24482
+ // +VAL: 0b0000
24483
+
24484
+ const int64_t tblValue = 0x00000800;
24485
+ tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24486
+ }
24487
+ else
24488
+ {
24489
+ // QNAN: 0b0000: Preserve left
24490
+ // SNAN: 0b0000
24491
+ // ZERO: 0b0111: -0
24492
+ // +ONE: 0b0000
24493
+ // -INF: 0b0000
24494
+ // +INF: 0b0000
24495
+ // -VAL: 0b0000
24496
+ // +VAL: 0b0000
24497
+
24498
+ const int64_t tblValue = 0x00000700;
24499
+ tblVecCon->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
24500
+ }
24501
+
24502
+ intrinsic = isScalar ? NI_AVX512_FixupScalar : NI_AVX512_Fixup;
24487
24503
24488
- retNode->AsHWIntrinsic()->Op(1) = op2;
24489
- retNode->AsHWIntrinsic()->Op(2) = op1;
24504
+ retNode = gtNewSimdHWIntrinsicNode(type, retNode, op2Clone, tblVecCon, gtNewIconNode(0), intrinsic,
24505
+ simdBaseJitType, simdSize);
24506
+ }
24507
+
24508
+ if (isNumber)
24509
+ {
24510
+ // Swap the operands so that the cnsNode is op1, this prevents
24511
+ // the unknown value (which could be NaN) from being selected.
24512
+
24513
+ retNode->AsHWIntrinsic()->Op(1) = op2;
24514
+ retNode->AsHWIntrinsic()->Op(2) = op1;
24515
+ }
24490
24516
}
24491
24517
}
24492
24518
}
@@ -24783,26 +24809,26 @@ GenTree* Compiler::gtNewSimdMinMaxNativeNode(
24783
24809
24784
24810
if (varTypeIsFloating(simdBaseType))
24785
24811
{
24786
- intrinsic = NI_AVX_Min;
24812
+ intrinsic = isMax ? NI_AVX_Max : NI_AVX_Min;
24787
24813
}
24788
24814
else
24789
24815
{
24790
24816
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
24791
24817
24792
24818
if (!varTypeIsLong(simdBaseType))
24793
24819
{
24794
- intrinsic = NI_AVX2_Min;
24820
+ intrinsic = isMax ? NI_AVX2_Max : NI_AVX2_Min;
24795
24821
}
24796
24822
else if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
24797
24823
{
24798
- intrinsic = NI_AVX512_Min;
24824
+ intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min;
24799
24825
}
24800
24826
}
24801
24827
}
24802
24828
else if (simdSize == 64)
24803
24829
{
24804
24830
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512));
24805
- intrinsic = NI_AVX512_Min;
24831
+ intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min;
24806
24832
}
24807
24833
else
24808
24834
{
0 commit comments