-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[ARM] Handle roundeven for MVE. #142557
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ARM] Handle roundeven for MVE. #142557
Conversation
Now that llvm#141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-arm Author: David Green (davemgreen) ChangesNow that #141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work. Full diff: https://github.com/llvm/llvm-project/pull/142557.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index e33c065059c44..412ef9abac1bc 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -710,7 +710,7 @@ defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">;
defm vrndpq: vrnd<IRIntBase<"ceil", [Vector]>, "p">;
defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">;
defm vrndxq: vrnd<IRIntBase<"rint", [Vector]>, "x">;
-defm vrndnq: vrnd<IRInt<"vrintn", [Vector]>, "n">;
+defm vrndnq: vrnd<IRIntBase<"roundeven", [Vector]>, "n">;
multiclass compare_with_pred<string condname, dag arguments,
dag cmp, string suffix> {
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
index 3e625c739bde9..4888bc8c5e98f 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
@@ -148,7 +148,7 @@ float32x4_t test_vrndxq_f32(float32x4_t a)
// CHECK-LABEL: @test_vrndnq_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]])
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vrndnq_f16(float16x8_t a)
@@ -162,7 +162,7 @@ float16x8_t test_vrndnq_f16(float16x8_t a)
// CHECK-LABEL: @test_vrndnq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]])
// CHECK-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vrndnq_f32(float32x4_t a)
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 9b7dd8099368d..3ee69b72cc5cd 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1306,8 +1306,6 @@ foreach suffix = ["a","n","p","m"] in {
[llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
}
-def int_arm_mve_vrintn: DefaultAttrsIntrinsic<
- [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vcls: DefaultAttrsIntrinsic<
[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7ba6d411bc7b5..814c00c669cb3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -767,6 +767,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
return false; // Not 'arm.mve.vctp64'.
}
+ if (Name.starts_with("vrintn.v")) {
+ NewFn = Intrinsic::getOrInsertDeclaration(
+ F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
+ return true;
+ }
+
// These too are changed to accept a v2i1 instead of the old v4i1.
if (Name.consume_back(".v4i1")) {
// 'arm.mve.*.v4i1'.
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 6dd8a374a92af..9dffd945d5baa 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3527,7 +3527,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
}
multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
- defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+ defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>;
defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll
index 1d7dcc8bf8440..6946ec37ddf33 100644
--- a/llvm/test/CodeGen/Thumb2/mve-frint.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll
@@ -424,21 +424,74 @@ entry:
ret <2 x double> %0
}
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
-declare <4 x float> @llvm.rint.v4f32(<4 x float>)
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
-declare <4 x float> @llvm.floor.v4f32(<4 x float>)
-declare <4 x float> @llvm.round.v4f32(<4 x float>)
-declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
-declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
-declare <8 x half> @llvm.rint.v8f16(<8 x half>)
-declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
-declare <8 x half> @llvm.floor.v8f16(<8 x half>)
-declare <8 x half> @llvm.round.v8f16(<8 x half>)
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
-declare <2 x double> @llvm.rint.v2f64(<2 x double>)
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
-declare <2 x double> @llvm.floor.v2f64(<2 x double>)
-declare <2 x double> @llvm.round.v2f64(<2 x double>)
+define arm_aapcs_vfpcc <4 x float> @froundeven_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: froundeven_float32_t:
+; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vrintn.f32 s3, s3
+; CHECK-MVE-NEXT: vrintn.f32 s2, s2
+; CHECK-MVE-NEXT: vrintn.f32 s1, s1
+; CHECK-MVE-NEXT: vrintn.f32 s0, s0
+; CHECK-MVE-NEXT: bx lr
+;
+; CHECK-MVEFP-LABEL: froundeven_float32_t:
+; CHECK-MVEFP: @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT: vrintn.f32 q0, q0
+; CHECK-MVEFP-NEXT: bx lr
+entry:
+ %0 = call fast <4 x float> @llvm.roundeven.v4f32(<4 x float> %src)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @froundeven_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: froundeven_float16_t:
+; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vmovx.f16 s4, s0
+; CHECK-MVE-NEXT: vrintn.f16 s0, s0
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vins.f16 s0, s4
+; CHECK-MVE-NEXT: vmovx.f16 s4, s1
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vrintn.f16 s1, s1
+; CHECK-MVE-NEXT: vins.f16 s1, s4
+; CHECK-MVE-NEXT: vmovx.f16 s4, s2
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vrintn.f16 s2, s2
+; CHECK-MVE-NEXT: vins.f16 s2, s4
+; CHECK-MVE-NEXT: vmovx.f16 s4, s3
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vrintn.f16 s3, s3
+; CHECK-MVE-NEXT: vins.f16 s3, s4
+; CHECK-MVE-NEXT: bx lr
+;
+; CHECK-MVEFP-LABEL: froundeven_float16_t:
+; CHECK-MVEFP: @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT: vrintn.f16 q0, q0
+; CHECK-MVEFP-NEXT: bx lr
+entry:
+ %0 = call fast <8 x half> @llvm.roundeven.v8f16(<8 x half> %src)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <2 x double> @froundeven_float64_t(<2 x double> %src) {
+; CHECK-LABEL: froundeven_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.roundeven.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
index a70975e1e7318..b30bb2e3ad3ff 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+; The llvm.arm.mve.vrintn should auto-upgrade to llvm.roundeven, which are selected to vrintn.
+
define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) {
; CHECK-LABEL: test_vrndnq_f16:
; CHECK: @ %bb.0: @ %entry
|
@@ -424,21 +424,74 @@ entry: | |||
ret <2 x double> %0 | |||
} | |||
|
|||
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These removed declarations don't seem related to the rest of the patch. Were they already redundant and you just noticed it in passing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep - they are not necessary any more for intrinsics, since https://discourse.llvm.org/t/recent-improvements-to-the-ir-parser/77366.
It felt cleaner to remove the definitions, as opposed to add the new roundeven calls or leave them inconsistent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great!
@@ -424,21 +424,74 @@ entry: | |||
ret <2 x double> %0 | |||
} | |||
|
|||
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great!
Now that llvm#141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.
Now that llvm#141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.
Now that llvm#141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.
Now that #141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.