Skip to content

Commit 2dd517e

Browse files
authored
ARM64-SVE: Delay free all ops within conditional select (#107036)
* ARM64-SVE: Delay free all ops within conditional select * Fix comment * Add test header * don't delay prefUseOpNum * Fix FMA * Add assert checks for delay free * Merge embedded op build code * fix formatting * simplify assert * simplify FMA code * Add tests for 106867 * ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic TEST_IMG: ubuntu/dotnet-build TEST_CMD: safe ./projects/dotnet/build-runtime.sh Jira: ENTLLT-7634 Change-Id: I337a291be6661f104fe90c7cdc27150eede43647 * Add Sve.IsSupported to tests * Add Sve.IsSupported to test * fix formatting * Revert "ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic" * Revert "ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic" * Revert "ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic"
1 parent 0961328 commit 2dd517e

File tree

7 files changed

+191
-46
lines changed

7 files changed

+191
-46
lines changed

src/coreclr/jit/lsraarm64.cpp

Lines changed: 28 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,7 +1944,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
19441944
GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic();
19451945
size_t numArgs = embOp2Node->GetOperandCount();
19461946
const HWIntrinsic intrinEmb(embOp2Node);
1947-
numArgs = embOp2Node->GetOperandCount();
1947+
numArgs = embOp2Node->GetOperandCount();
1948+
GenTree* prefUseNode = nullptr;
19481949

19491950
if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id))
19501951
{
@@ -1961,44 +1962,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
19611962
unsigned resultOpNum =
19621963
embOp2Node->GetResultOpNumForRmwIntrinsic(user, intrinEmb.op1, intrinEmb.op2, intrinEmb.op3);
19631964

1964-
GenTree* emitOp1 = intrinEmb.op1;
1965-
GenTree* emitOp2 = intrinEmb.op2;
1966-
GenTree* emitOp3 = intrinEmb.op3;
1967-
1968-
if (resultOpNum == 2)
1969-
{
1970-
// op2 = op1 + (op2 * op3)
1971-
std::swap(emitOp1, emitOp3);
1972-
std::swap(emitOp1, emitOp2);
1973-
// op1 = (op1 * op2) + op3
1974-
}
1975-
else if (resultOpNum == 3)
1965+
if (resultOpNum == 0)
19761966
{
1977-
// op3 = op1 + (op2 * op3)
1978-
std::swap(emitOp1, emitOp3);
1979-
// op1 = (op1 * op2) + op3
1967+
prefUseNode = embOp2Node->Op(1);
19801968
}
19811969
else
19821970
{
1983-
// op1 = op1 + (op2 * op3)
1984-
// Nothing needs to be done
1971+
assert(resultOpNum >= 1 && resultOpNum <= 3);
1972+
prefUseNode = embOp2Node->Op(resultOpNum);
19851973
}
1986-
1987-
GenTree* ops[] = {intrinEmb.op1, intrinEmb.op2, intrinEmb.op3};
1988-
for (GenTree* op : ops)
1989-
{
1990-
if (op == emitOp1)
1991-
{
1992-
tgtPrefUse = BuildUse(op);
1993-
srcCount++;
1994-
}
1995-
else if (op == emitOp2 || op == emitOp3)
1996-
{
1997-
srcCount += BuildDelayFreeUses(op, emitOp1);
1998-
}
1999-
}
2000-
2001-
srcCount += BuildDelayFreeUses(intrin.op3, emitOp1);
20021974
}
20031975
else
20041976
{
@@ -2045,22 +2017,32 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
20452017
{
20462018
prefUseOpNum = 2;
20472019
}
2048-
GenTree* prefUseNode = embOp2Node->Op(prefUseOpNum);
2049-
for (size_t argNum = 1; argNum <= numArgs; argNum++)
2020+
prefUseNode = embOp2Node->Op(prefUseOpNum);
2021+
}
2022+
2023+
for (size_t argNum = 1; argNum <= numArgs; argNum++)
2024+
{
2025+
GenTree* node = embOp2Node->Op(argNum);
2026+
2027+
if (node == prefUseNode)
20502028
{
2051-
if (argNum == prefUseOpNum)
2052-
{
2053-
tgtPrefUse = BuildUse(prefUseNode);
2054-
srcCount += 1;
2055-
}
2056-
else
2057-
{
2058-
srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), prefUseNode);
2059-
}
2029+
tgtPrefUse = BuildUse(node);
2030+
srcCount++;
20602031
}
2032+
else
2033+
{
2034+
RefPosition* useRefPosition = nullptr;
20612035

2062-
srcCount += BuildDelayFreeUses(intrin.op3, prefUseNode);
2036+
int uses = BuildDelayFreeUses(node, nullptr, RBM_NONE, &useRefPosition);
2037+
srcCount += uses;
2038+
2039+
// It is a hard requirement that these are not allocated to the same register as the destination,
2040+
// so verify no optimizations kicked in to skip setting the delay-free.
2041+
assert((useRefPosition != nullptr && useRefPosition->delayRegFree) || (uses == 0));
2042+
}
20632043
}
2044+
2045+
srcCount += BuildDelayFreeUses(intrin.op3, prefUseNode);
20642046
}
20652047
else if (intrin.op2 != nullptr)
20662048
{
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using Xunit;
5+
6+
// Generated by Fuzzlyn v2.3 on 2024-08-23 09:12:06
7+
// Run on Arm64 Windows
8+
// Seed: 9639718980642677114-vectort,vector64,vector128,armsve
9+
// Reduced from 52.6 KiB to 0.4 KiB in 00:00:26
10+
// Hits JIT assert in Release:
11+
// Assertion failed 'targetReg != embMaskOp2Reg' in 'Program:Main(Fuzzlyn.ExecutionServer.IRuntime)' during 'Generate code' (IL size 32; hash 0xade6b36b; FullOpts)
12+
//
13+
// File: C:\dev\dotnet\runtime2\src\coreclr\jit\hwintrinsiccodegenarm64.cpp Line: 818
14+
//
15+
using System;
16+
using System.Numerics;
17+
using System.Runtime.Intrinsics;
18+
using System.Runtime.Intrinsics.Arm;
19+
20+
public class C1
21+
{
22+
public Vector<short> F1;
23+
}
24+
25+
public class Runtime_106864
26+
{
27+
public static C1 s_2 = new C1();
28+
29+
[Fact]
30+
public static void TestEntryPoint()
31+
{
32+
if (Sve.IsSupported)
33+
{
34+
C1 vr2 = s_2;
35+
var vr3 = vr2.F1;
36+
var vr4 = vr2.F1;
37+
vr2.F1 = Sve.Max(vr3, vr4);
38+
}
39+
}
40+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
<NoWarn>$(NoWarn),SYSLIB5003</NoWarn>
5+
</PropertyGroup>
6+
<ItemGroup>
7+
<Compile Include="$(MSBuildProjectName).cs" />
8+
</ItemGroup>
9+
</Project>
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using Xunit;
5+
6+
// Generated by Fuzzlyn v2.3 on 2024-08-23 10:10:06
7+
// Run on Arm64 Windows
8+
// Seed: 13584223539078280353-vectort,vector64,vector128,armsve
9+
// Reduced from 87.4 KiB to 0.8 KiB in 00:00:52
10+
// Hits JIT assert in Release:
11+
// Assertion failed 'secondId->idReg1() != secondId->idReg4()' in 'S0:M6(ubyte,double):this' during 'Emit code' (IL size 81; hash 0x596acd7c; FullOpts)
12+
//
13+
// File: C:\dev\dotnet\runtime2\src\coreclr\jit\emitarm64sve.cpp Line: 18601
14+
//
15+
using System;
16+
using System.Numerics;
17+
using System.Runtime.Intrinsics;
18+
using System.Runtime.Intrinsics.Arm;
19+
20+
public struct S0
21+
{
22+
public void M6(byte arg0, double arg1)
23+
{
24+
var vr0 = Vector128.CreateScalar(119.12962f).AsVector();
25+
var vr3 = Runtime_106867.s_2;
26+
var vr4 = Vector128.CreateScalar(1f).AsVector();
27+
var vr5 = Runtime_106867.s_2;
28+
var vr2 = Sve.FusedMultiplySubtractNegated(vr3, vr4, vr5);
29+
if ((Sve.ConditionalExtractLastActiveElement(vr0, 0, vr2) < 0))
30+
{
31+
this = this;
32+
}
33+
}
34+
}
35+
36+
public class Runtime_106867
37+
{
38+
public static Vector<float> s_2;
39+
public static double[] s_5 = new double[]
40+
{
41+
0
42+
};
43+
public static byte s_16;
44+
45+
[Fact]
46+
public static void TestEntryPoint()
47+
{
48+
if (Sve.IsSupported)
49+
{
50+
var vr6 = s_5[0];
51+
new S0().M6(s_16, vr6);
52+
}
53+
}
54+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
<NoWarn>$(NoWarn),SYSLIB5003</NoWarn>
5+
</PropertyGroup>
6+
<ItemGroup>
7+
<Compile Include="$(MSBuildProjectName).cs" />
8+
</ItemGroup>
9+
</Project>
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using Xunit;
5+
using System.Runtime.CompilerServices;
6+
7+
// Generated by Fuzzlyn v2.3 on 2024-08-23 10:12:51
8+
// Run on Arm64 Windows
9+
// Seed: 4576767951799510057-vectort,vector64,vector128,armsve
10+
// Reduced from 32.2 KiB to 0.5 KiB in 00:00:25
11+
// Hits JIT assert in Release:
12+
// Assertion failed 'secondId->idReg1() != secondId->idReg3()' in 'Program:Main(Fuzzlyn.ExecutionServer.IRuntime)' during 'Emit code' (IL size 55; hash 0xade6b36b; FullOpts)
13+
//
14+
// File: C:\dev\dotnet\runtime2\src\coreclr\jit\emitarm64sve.cpp Line: 18600
15+
//
16+
using System;
17+
using System.Numerics;
18+
using System.Runtime.Intrinsics;
19+
using System.Runtime.Intrinsics.Arm;
20+
21+
public class Runtime_106866_2
22+
{
23+
[Fact]
24+
public static void TestEntryPoint()
25+
{
26+
if (Sve.IsSupported)
27+
{
28+
Vector<short> vr4 = default(Vector<short>);
29+
vr4 = Sve.MultiplyAdd(vr4, vr4, vr4);
30+
var vr5 = (short)0;
31+
var vr6 = Vector128.CreateScalar(vr5).AsVector();
32+
var vr7 = (short)0;
33+
var vr8 = Sve.ConditionalExtractLastActiveElement(vr6, vr7, vr4);
34+
Consume(vr8);
35+
}
36+
}
37+
38+
[MethodImpl(MethodImplOptions.NoInlining)]
39+
static void Consume<T>(T val)
40+
{
41+
}
42+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
<NoWarn>$(NoWarn),SYSLIB5003</NoWarn>
5+
</PropertyGroup>
6+
<ItemGroup>
7+
<Compile Include="$(MSBuildProjectName).cs" />
8+
</ItemGroup>
9+
</Project>

0 commit comments

Comments
 (0)