-
Notifications
You must be signed in to change notification settings - Fork 14k
[flang][acc] Generate acc.copyout for the reduction clause on compute constructs #144623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-openacc Author: None (khaki3) ChangesFor the reduction clause on combined constructs (compute + loop), we emit both This MR supplies the missing Patch is 25.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144623.diff 3 Files Affected:
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 69e9c53baa740..63a9d1d5616a9 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -2676,7 +2676,8 @@ static Op createComputeOp(
llvm::SmallVector<mlir::Value> waitOperands, attachEntryOperands,
copyEntryOperands, copyinEntryOperands, copyoutEntryOperands,
createEntryOperands, nocreateEntryOperands, presentEntryOperands,
- dataClauseOperands, numGangs, numWorkers, vectorLength, async;
+ reductionEntryOperands, dataClauseOperands, numGangs, numWorkers,
+ vectorLength, async;
llvm::SmallVector<mlir::Attribute> numGangsDeviceTypes, numWorkersDeviceTypes,
vectorLengthDeviceTypes, asyncDeviceTypes, asyncOnlyDeviceTypes,
waitOperandsDeviceTypes, waitOnlyDeviceTypes;
@@ -2912,9 +2913,12 @@ static Op createComputeOp(
// combined construct implies a copy clause so issue an implicit copy
// instead.
if (!combinedConstructs) {
+ auto crtDataStart = reductionOperands.size();
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, async,
asyncDeviceTypes, asyncOnlyDeviceTypes);
+ reductionEntryOperands.append(reductionOperands.begin() + crtDataStart,
+ reductionOperands.end());
} else {
auto crtDataStart = dataClauseOperands.size();
genDataOperandOperations<mlir::acc::CopyinOp>(
@@ -3038,6 +3042,8 @@ static Op createComputeOp(
builder, nocreateEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::PresentOp, mlir::acc::DeleteOp>(
builder, presentEntryOperands, /*structured=*/true);
+ genDataExitOperations<mlir::acc::ReductionOp, mlir::acc::CopyoutOp>(
+ builder, reductionEntryOperands, /*structured=*/true);
builder.restoreInsertionPoint(insPt);
return computeOp;
diff --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
index 5bb751678ed53..bb76122aaffac 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
@@ -1001,6 +1001,7 @@ subroutine acc_reduction_iand()
! CHECK-LABEL: func.func @_QPacc_reduction_iand()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ior()
integer :: i
@@ -1011,6 +1012,7 @@ subroutine acc_reduction_ior()
! CHECK-LABEL: func.func @_QPacc_reduction_ior()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ieor()
integer :: i
@@ -1021,6 +1023,7 @@ subroutine acc_reduction_ieor()
! CHECK-LABEL: func.func @_QPacc_reduction_ieor()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_and()
logical :: l
@@ -1033,6 +1036,7 @@ subroutine acc_reduction_and()
! CHECK: %[[DECLL:.*]]:2 = hlfir.declare %[[L]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLL]]#0 : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_or()
logical :: l
@@ -1043,6 +1047,7 @@ subroutine acc_reduction_or()
! CHECK-LABEL: func.func @_QPacc_reduction_or()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_eqv()
logical :: l
@@ -1053,6 +1058,7 @@ subroutine acc_reduction_eqv()
! CHECK-LABEL: func.func @_QPacc_reduction_eqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_neqv()
logical :: l
@@ -1063,6 +1069,7 @@ subroutine acc_reduction_neqv()
! CHECK-LABEL: func.func @_QPacc_reduction_neqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_add_cmplx()
complex :: c
@@ -1073,6 +1080,7 @@ subroutine acc_reduction_add_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_mul_cmplx()
complex :: c
@@ -1083,6 +1091,7 @@ subroutine acc_reduction_mul_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_add_alloc()
integer, allocatable :: i
@@ -1098,6 +1107,7 @@ subroutine acc_reduction_add_alloc()
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap<i32>) -> !fir.heap<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_heap_i32 -> %[[RED]] : !fir.heap<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.heap<i32>) to varPtr(%[[BOX_ADDR]] : !fir.heap<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_pointer(i)
integer, pointer :: i
@@ -1112,6 +1122,7 @@ subroutine acc_reduction_add_pointer(i)
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<i32>>) -> !fir.ptr<i32>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr<i32>) -> !fir.ptr<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_ptr_i32 -> %[[RED]] : !fir.ptr<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ptr<i32>) to varPtr(%[[BOX_ADDR]] : !fir.ptr<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_static_slice(a)
integer :: a(100)
@@ -1129,6 +1140,7 @@ subroutine acc_reduction_add_static_slice(a)
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) extent(%[[C100]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a(11:20)"}
! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.ref<!fir.array<100xi32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) to varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a(11:20)"}
subroutine acc_reduction_add_dynamic_extent_add(a)
integer :: a(:)
@@ -1141,6 +1153,7 @@ subroutine acc_reduction_add_dynamic_extent_add(a)
! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.array<?xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_add_box_Uxi32 -> %[[RED:.*]] : !fir.ref<!fir.array<?xi32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?xi32>>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_add_assumed_shape_max(a)
real :: a(:)
@@ -1153,6 +1166,7 @@ subroutine acc_reduction_add_assumed_shape_max(a)
! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_Uxf32 -> %[[RED]] : !fir.ref<!fir.array<?xf32>>) {
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?xf32>>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_add_dynamic_extent_add_with_section(a)
integer :: a(:)
@@ -1167,6 +1181,7 @@ subroutine acc_reduction_add_dynamic_extent_add_with_section(a)
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[DECL]]#0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ref<!fir.array<?xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<?xi32>> {name = "a(2:4)"}
! CHECK: acc.parallel reduction(@reduction_add_section_lb1.ub3_box_Uxi32 -> %[[RED]] : !fir.ref<!fir.array<?xi32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?xi32>>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.ref<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a(2:4)"}
subroutine acc_reduction_add_allocatable(a)
real, allocatable :: a(:)
@@ -1180,8 +1195,9 @@ subroutine acc_reduction_add_allocatable(a)
! CHECK: %[[BOX:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}}#1 : index) stride(%{{.*}}#2 : index) startIdx(%{{.*}}#0 : index) {strideInBytes = true}
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
-! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xf32>>) bounds(%{{[0-9]+}}) -> !fir.heap<!fir.array<?xf32>> {name = "a"}
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xf32>>) bounds(%[[BOUND]]) -> !fir.heap<!fir.array<?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_heap_Uxf32 -> %[[RED]] : !fir.heap<!fir.array<?xf32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.heap<!fir.array<?xf32>>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_add_pointer_array(a)
real, pointer :: a(:)
@@ -1197,6 +1213,7 @@ subroutine acc_reduction_add_pointer_array(a)
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>) -> !fir.ptr<!fir.array<?xf32>>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr<!fir.array<?xf32>>) bounds(%[[BOUND]]) -> !fir.ptr<!fir.array<?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_ptr_Uxf32 -> %[[RED]] : !fir.ptr<!fir.array<?xf32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ptr<!fir.array<?xf32>>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.ptr<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_max_dynamic_extent_max(a, n)
integer :: n
@@ -1211,3 +1228,4 @@ subroutine acc_reduction_max_dynamic_extent_max(a, n)
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[DECL_A]]#0 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[ADDR]] : !fir.ref<!fir.array<?x?xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<?x?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_UxUxf32 -> %[[RED]] : !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?x?xf32>>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 20b5ad28f78a1..22a52739171b1 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -1042,6 +1042,7 @@ subroutine acc_reduction_iand()
! CHECK-LABEL: func.func @_QPacc_reduction_iand()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ior()
integer :: i
@@ -1052,6 +1053,7 @@ subroutine acc_reduction_ior()
! CHECK-LABEL: func.func @_QPacc_reduction_ior()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ieor()
integer :: i
@@ -1062,6 +1064,7 @@ subroutine acc_reduction_ieor()
! CHECK-LABEL: func.func @_QPacc_reduction_ieor()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_and()
logical :: l
@@ -1074,6 +1077,7 @@ subroutine acc_reduction_and()
! CHECK: %[[DECLL:.*]]:2 = hlfir.declare %[[L]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLL]]#0 : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_or()
logical :: l
@@ -1084,6 +1088,7 @@ subroutine acc_reduction_or()
! CHECK-LABEL: func.func @_QPacc_reduction_or()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_eqv()
logical :: l
@@ -1094,6 +1099,7 @@ subroutine acc_reduction_eqv()
! CHECK-LABEL: func.func @_QPacc_reduction_eqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_neqv()
logical :: l
@@ -1104,6 +1110,7 @@ subroutine acc_reduction_neqv()
! CHECK-LABEL: func.func @_QPacc_reduction_neqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_add_cmplx()
complex :: c
@@ -1114,6 +1121,7 @@ subroutine acc_reduction_add_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_mul_cmplx()
complex :: c
@@ -1124,6 +1132,7 @@ subroutine acc_reduction_mul_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_add_alloc()
integer, allocatable :: i
@@ -1137,6 +1146,7 @@ subroutine acc_reduction_add_alloc()
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOCA]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<!fir.heap<i32>>> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_ref_box_heap_i32 -> %[[RED]] : !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.box<!fir.heap<i32>>>) to varPtr(%[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_pointer(i)
integer, pointer :: i
@@ -1149,6 +1159,7 @@ subroutine acc_reduction_add_pointer(i)
! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_ref_box_ptr_i32 -> %[[RED]] : !fir.ref<!fir.box<!fir.ptr<i32>>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.box<!fir.ptr<i32>>>) to varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_static_slice(a)
integer :: a(100)
@@ -1166,6 +1177,7 @@ subroutine acc_reduction_add_static_slice(a)
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) extent(%[[C100]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a(11:20)"}
! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.re...
[truncated]
|
@llvm/pr-subscribers-flang-fir-hlfir Author: None (khaki3) ChangesFor the reduction clause on combined constructs (compute + loop), we emit both This MR supplies the missing Patch is 25.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144623.diff 3 Files Affected:
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 69e9c53baa740..63a9d1d5616a9 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -2676,7 +2676,8 @@ static Op createComputeOp(
llvm::SmallVector<mlir::Value> waitOperands, attachEntryOperands,
copyEntryOperands, copyinEntryOperands, copyoutEntryOperands,
createEntryOperands, nocreateEntryOperands, presentEntryOperands,
- dataClauseOperands, numGangs, numWorkers, vectorLength, async;
+ reductionEntryOperands, dataClauseOperands, numGangs, numWorkers,
+ vectorLength, async;
llvm::SmallVector<mlir::Attribute> numGangsDeviceTypes, numWorkersDeviceTypes,
vectorLengthDeviceTypes, asyncDeviceTypes, asyncOnlyDeviceTypes,
waitOperandsDeviceTypes, waitOnlyDeviceTypes;
@@ -2912,9 +2913,12 @@ static Op createComputeOp(
// combined construct implies a copy clause so issue an implicit copy
// instead.
if (!combinedConstructs) {
+ auto crtDataStart = reductionOperands.size();
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, async,
asyncDeviceTypes, asyncOnlyDeviceTypes);
+ reductionEntryOperands.append(reductionOperands.begin() + crtDataStart,
+ reductionOperands.end());
} else {
auto crtDataStart = dataClauseOperands.size();
genDataOperandOperations<mlir::acc::CopyinOp>(
@@ -3038,6 +3042,8 @@ static Op createComputeOp(
builder, nocreateEntryOperands, /*structured=*/true);
genDataExitOperations<mlir::acc::PresentOp, mlir::acc::DeleteOp>(
builder, presentEntryOperands, /*structured=*/true);
+ genDataExitOperations<mlir::acc::ReductionOp, mlir::acc::CopyoutOp>(
+ builder, reductionEntryOperands, /*structured=*/true);
builder.restoreInsertionPoint(insPt);
return computeOp;
diff --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
index 5bb751678ed53..bb76122aaffac 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
@@ -1001,6 +1001,7 @@ subroutine acc_reduction_iand()
! CHECK-LABEL: func.func @_QPacc_reduction_iand()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ior()
integer :: i
@@ -1011,6 +1012,7 @@ subroutine acc_reduction_ior()
! CHECK-LABEL: func.func @_QPacc_reduction_ior()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ieor()
integer :: i
@@ -1021,6 +1023,7 @@ subroutine acc_reduction_ieor()
! CHECK-LABEL: func.func @_QPacc_reduction_ieor()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_and()
logical :: l
@@ -1033,6 +1036,7 @@ subroutine acc_reduction_and()
! CHECK: %[[DECLL:.*]]:2 = hlfir.declare %[[L]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLL]]#0 : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_or()
logical :: l
@@ -1043,6 +1047,7 @@ subroutine acc_reduction_or()
! CHECK-LABEL: func.func @_QPacc_reduction_or()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_eqv()
logical :: l
@@ -1053,6 +1058,7 @@ subroutine acc_reduction_eqv()
! CHECK-LABEL: func.func @_QPacc_reduction_eqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_neqv()
logical :: l
@@ -1063,6 +1069,7 @@ subroutine acc_reduction_neqv()
! CHECK-LABEL: func.func @_QPacc_reduction_neqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_add_cmplx()
complex :: c
@@ -1073,6 +1080,7 @@ subroutine acc_reduction_add_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_mul_cmplx()
complex :: c
@@ -1083,6 +1091,7 @@ subroutine acc_reduction_mul_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_add_alloc()
integer, allocatable :: i
@@ -1098,6 +1107,7 @@ subroutine acc_reduction_add_alloc()
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap<i32>) -> !fir.heap<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_heap_i32 -> %[[RED]] : !fir.heap<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.heap<i32>) to varPtr(%[[BOX_ADDR]] : !fir.heap<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_pointer(i)
integer, pointer :: i
@@ -1112,6 +1122,7 @@ subroutine acc_reduction_add_pointer(i)
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<i32>>) -> !fir.ptr<i32>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr<i32>) -> !fir.ptr<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_ptr_i32 -> %[[RED]] : !fir.ptr<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ptr<i32>) to varPtr(%[[BOX_ADDR]] : !fir.ptr<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_static_slice(a)
integer :: a(100)
@@ -1129,6 +1140,7 @@ subroutine acc_reduction_add_static_slice(a)
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) extent(%[[C100]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a(11:20)"}
! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.ref<!fir.array<100xi32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) to varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a(11:20)"}
subroutine acc_reduction_add_dynamic_extent_add(a)
integer :: a(:)
@@ -1141,6 +1153,7 @@ subroutine acc_reduction_add_dynamic_extent_add(a)
! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.array<?xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_add_box_Uxi32 -> %[[RED:.*]] : !fir.ref<!fir.array<?xi32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?xi32>>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_add_assumed_shape_max(a)
real :: a(:)
@@ -1153,6 +1166,7 @@ subroutine acc_reduction_add_assumed_shape_max(a)
! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_Uxf32 -> %[[RED]] : !fir.ref<!fir.array<?xf32>>) {
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?xf32>>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_add_dynamic_extent_add_with_section(a)
integer :: a(:)
@@ -1167,6 +1181,7 @@ subroutine acc_reduction_add_dynamic_extent_add_with_section(a)
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[DECL]]#0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ref<!fir.array<?xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<?xi32>> {name = "a(2:4)"}
! CHECK: acc.parallel reduction(@reduction_add_section_lb1.ub3_box_Uxi32 -> %[[RED]] : !fir.ref<!fir.array<?xi32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?xi32>>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.ref<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a(2:4)"}
subroutine acc_reduction_add_allocatable(a)
real, allocatable :: a(:)
@@ -1180,8 +1195,9 @@ subroutine acc_reduction_add_allocatable(a)
! CHECK: %[[BOX:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}}#1 : index) stride(%{{.*}}#2 : index) startIdx(%{{.*}}#0 : index) {strideInBytes = true}
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
-! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xf32>>) bounds(%{{[0-9]+}}) -> !fir.heap<!fir.array<?xf32>> {name = "a"}
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xf32>>) bounds(%[[BOUND]]) -> !fir.heap<!fir.array<?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_heap_Uxf32 -> %[[RED]] : !fir.heap<!fir.array<?xf32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.heap<!fir.array<?xf32>>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_add_pointer_array(a)
real, pointer :: a(:)
@@ -1197,6 +1213,7 @@ subroutine acc_reduction_add_pointer_array(a)
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>) -> !fir.ptr<!fir.array<?xf32>>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr<!fir.array<?xf32>>) bounds(%[[BOUND]]) -> !fir.ptr<!fir.array<?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_ptr_Uxf32 -> %[[RED]] : !fir.ptr<!fir.array<?xf32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ptr<!fir.array<?xf32>>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.ptr<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
subroutine acc_reduction_max_dynamic_extent_max(a, n)
integer :: n
@@ -1211,3 +1228,4 @@ subroutine acc_reduction_max_dynamic_extent_max(a, n)
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[DECL_A]]#0 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[ADDR]] : !fir.ref<!fir.array<?x?xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<?x?xf32>> {name = "a"}
! CHECK: acc.parallel reduction(@reduction_max_box_UxUxf32 -> %[[RED]] : !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.array<?x?xf32>>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_reduction>, name = "a"}
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 20b5ad28f78a1..22a52739171b1 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -1042,6 +1042,7 @@ subroutine acc_reduction_iand()
! CHECK-LABEL: func.func @_QPacc_reduction_iand()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ior()
integer :: i
@@ -1052,6 +1053,7 @@ subroutine acc_reduction_ior()
! CHECK-LABEL: func.func @_QPacc_reduction_ior()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_ieor()
integer :: i
@@ -1062,6 +1064,7 @@ subroutine acc_reduction_ieor()
! CHECK-LABEL: func.func @_QPacc_reduction_ieor()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_and()
logical :: l
@@ -1074,6 +1077,7 @@ subroutine acc_reduction_and()
! CHECK: %[[DECLL:.*]]:2 = hlfir.declare %[[L]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLL]]#0 : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_or()
logical :: l
@@ -1084,6 +1088,7 @@ subroutine acc_reduction_or()
! CHECK-LABEL: func.func @_QPacc_reduction_or()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_eqv()
logical :: l
@@ -1094,6 +1099,7 @@ subroutine acc_reduction_eqv()
! CHECK-LABEL: func.func @_QPacc_reduction_eqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_neqv()
logical :: l
@@ -1104,6 +1110,7 @@ subroutine acc_reduction_neqv()
! CHECK-LABEL: func.func @_QPacc_reduction_neqv()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.logical<4>>) to varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) {dataClause = #acc<data_clause acc_reduction>, name = "l"}
subroutine acc_reduction_add_cmplx()
complex :: c
@@ -1114,6 +1121,7 @@ subroutine acc_reduction_add_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_mul_cmplx()
complex :: c
@@ -1124,6 +1132,7 @@ subroutine acc_reduction_mul_cmplx()
! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx()
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {name = "c"}
! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref<complex<f32>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<complex<f32>>) to varPtr(%{{.*}} : !fir.ref<complex<f32>>) {dataClause = #acc<data_clause acc_reduction>, name = "c"}
subroutine acc_reduction_add_alloc()
integer, allocatable :: i
@@ -1137,6 +1146,7 @@ subroutine acc_reduction_add_alloc()
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOCA]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<!fir.heap<i32>>> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_ref_box_heap_i32 -> %[[RED]] : !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.box<!fir.heap<i32>>>) to varPtr(%[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_pointer(i)
integer, pointer :: i
@@ -1149,6 +1159,7 @@ subroutine acc_reduction_add_pointer(i)
! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "i"}
! CHECK: acc.parallel reduction(@reduction_add_ref_box_ptr_i32 -> %[[RED]] : !fir.ref<!fir.box<!fir.ptr<i32>>>)
+! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref<!fir.box<!fir.ptr<i32>>>) to varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>) {dataClause = #acc<data_clause acc_reduction>, name = "i"}
subroutine acc_reduction_add_static_slice(a)
integer :: a(100)
@@ -1166,6 +1177,7 @@ subroutine acc_reduction_add_static_slice(a)
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) extent(%[[C100]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a(11:20)"}
! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.re...
[truncated]
|
Since we need a special handling for reduction, it is proper that the copyout should not be explicitly declared. Close this PR. |
For the reduction clause on combined constructs (compute + loop), we emit both
acc.copyin
andacc.copyout
, while deferring the generation of reduction to the process of the loop construct. Yet, with a single construct (such asparallel
andkernels
), we only createacc.reduction
. According to the spec, the reduction clause should be handled as if it is a copy clause in terms of data tranfers.This PR supplies the missing
acc.copyout
.Related: #122539 #126560