|
20 | 20 | * and Technology (RIST). All rights reserved.
|
21 | 21 | * Copyright (c) 2018 Triad National Security, LLC. All rights
|
22 | 22 | * reserved.
|
| 23 | + * Copyright (c) 2021 IBM Corporation. All rights reserved. |
23 | 24 | * $COPYRIGHT$
|
24 | 25 | *
|
25 | 26 | * Additional copyrights may follow
|
@@ -510,10 +511,41 @@ static inline bool ompi_op_is_valid(ompi_op_t * op, ompi_datatype_t * ddt,
|
510 | 511 | * is not defined to have that operation, it is likely to seg fault.
|
511 | 512 | */
|
512 | 513 | static inline void ompi_op_reduce(ompi_op_t * op, void *source,
|
513 |
| - void *target, int count, |
| 514 | + void *target, size_t count, |
514 | 515 | ompi_datatype_t * dtype)
|
515 | 516 | {
|
516 | 517 | MPI_Fint f_dtype, f_count;
|
| 518 | + int int_count = count; |
| 519 | + |
| 520 | + /* |
| 521 | + * If the count is > INT_MAX then we need to call the reduction op |
| 522 | + * in iterations of counts <= INT_MAX since it has an `int *len` |
| 523 | + * parameter. |
| 524 | + * |
| 525 | + * Note: When we add BigCount support then we can distinguish between |
| 526 | + * a reduction operation with `int *len` and `MPI_Count *len`. At which |
| 527 | + * point we can avoid this loop. |
| 528 | + */ |
| 529 | + if( OPAL_UNLIKELY(count > INT_MAX) ) { |
| 530 | + size_t done_count = 0, shift; |
| 531 | + int iter_count; |
| 532 | + ptrdiff_t ext, lb; |
| 533 | + |
| 534 | + ompi_datatype_get_extent(dtype, &lb, &ext); |
| 535 | + |
| 536 | + while(done_count < count) { |
| 537 | + if(done_count + INT_MAX > count) { |
| 538 | + iter_count = count - done_count; |
| 539 | + } else { |
| 540 | + iter_count = INT_MAX; |
| 541 | + } |
| 542 | + shift = done_count * ext; |
| 543 | + // Recurse one level in iterations of 'int' |
| 544 | + ompi_op_reduce(op, (char*)source + shift, (char*)target + shift, iter_count, dtype); |
| 545 | + done_count += iter_count; |
| 546 | + } |
| 547 | + return; |
| 548 | + } |
517 | 549 |
|
518 | 550 | /*
|
519 | 551 | * Call the reduction function. Two dimensions: a) if both the op
|
@@ -548,25 +580,25 @@ static inline void ompi_op_reduce(ompi_op_t * op, void *source,
|
548 | 580 | dtype_id = ompi_op_ddt_map[dtype->id];
|
549 | 581 | }
|
550 | 582 | op->o_func.intrinsic.fns[dtype_id](source, target,
|
551 |
| - &count, &dtype, |
| 583 | + &int_count, &dtype, |
552 | 584 | op->o_func.intrinsic.modules[dtype_id]);
|
553 | 585 | return;
|
554 | 586 | }
|
555 | 587 |
|
556 | 588 | /* User-defined function */
|
557 | 589 | if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) {
|
558 | 590 | f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index);
|
559 |
| - f_count = OMPI_INT_2_FINT(count); |
| 591 | + f_count = OMPI_INT_2_FINT(int_count); |
560 | 592 | op->o_func.fort_fn(source, target, &f_count, &f_dtype);
|
561 | 593 | return;
|
562 | 594 | } else if (0 != (op->o_flags & OMPI_OP_FLAGS_JAVA_FUNC)) {
|
563 |
| - op->o_func.java_data.intercept_fn(source, target, &count, &dtype, |
| 595 | + op->o_func.java_data.intercept_fn(source, target, &int_count, &dtype, |
564 | 596 | op->o_func.java_data.baseType,
|
565 | 597 | op->o_func.java_data.jnienv,
|
566 | 598 | op->o_func.java_data.object);
|
567 | 599 | return;
|
568 | 600 | }
|
569 |
| - op->o_func.c_fn(source, target, &count, &dtype); |
| 601 | + op->o_func.c_fn(source, target, &int_count, &dtype); |
570 | 602 | return;
|
571 | 603 | }
|
572 | 604 |
|
|
0 commit comments