Skip to content

Commit 285f6b1

Browse files
authored
Merge pull request #13010 from hppritcha/some_pmix_group_use_improvs
comm: add pmix timeout knob to group ops
2 parents 3907c2c + 9b71f99 commit 285f6b1

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

ompi/communicator/comm_cid.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
2525
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
2626
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
27-
* Copyright (c) 2020-2024 Triad National Security, LLC. All rights
27+
* Copyright (c) 2020-2025 Triad National Security, LLC. All rights
2828
* reserved.
2929
* $COPYRIGHT$
3030
*
@@ -320,6 +320,7 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
320320
pmix_proc_t *procs = NULL;
321321
void *grpinfo = NULL, *list = NULL;
322322
pmix_data_array_t darray;
323+
pmix_info_t tinfo;
323324

324325
switch (mode) {
325326
case OMPI_COMM_CID_GROUP_NEW:
@@ -349,6 +350,13 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
349350
goto fn_exit;
350351
}
351352

353+
rc = PMIx_Info_list_add(grpinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
354+
if (PMIX_SUCCESS != rc) {
355+
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Info_list_add failed %s %d", PMIx_Error_string(rc), __LINE__));
356+
rc = OMPI_ERR_OUT_OF_RESOURCE;
357+
goto fn_exit;
358+
}
359+
352360
list = PMIx_Info_list_start();
353361

354362
size_t c_index = (size_t)newcomm->c_index;
@@ -450,7 +458,10 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
450458
tag, tproc_count, ninfo, cid_base));
451459

452460
/* destruct the group */
453-
rc = PMIx_Group_destruct (tag, NULL, 0);
461+
PMIX_INFO_CONSTRUCT(&tinfo);
462+
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
463+
rc = PMIx_Group_destruct (tag, &tinfo, 0);
464+
PMIX_INFO_DESTRUCT(&tinfo);
454465
if(PMIX_SUCCESS != rc) {
455466
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Group_destruct failed %s", PMIx_Error_string(rc)));
456467
rc = opal_pmix_convert_status(rc);

ompi/runtime/ompi_mpi_params.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* All rights reserved.
2121
* Copyright (c) 2016-2021 Research Organization for Information Science
2222
* and Technology (RIST). All rights reserved.
23-
* Copyright (c) 2018-2024 Triad National Security, LLC. All rights
23+
* Copyright (c) 2018-2025 Triad National Security, LLC. All rights
2424
* reserved.
2525
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2626
* Copyright (c) 2022 IBM Corporation. All rights reserved.
@@ -391,7 +391,7 @@ int ompi_mpi_register_params(void)
391391

392392
ompi_pmix_connect_timeout = 0; /* infinite timeout - see PMIx standard */
393393
(void) mca_base_var_register ("ompi", "mpi", NULL, "pmix_connect_timeout",
394-
"Timeout(secs) for calls to PMIx_Connect. Default is no timeout.",
394+
"Timeout(secs) for calls to PMIx_Connect and PMIx_Group_construct/destruct. Default is no timeout.",
395395
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
396396
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
397397
&ompi_pmix_connect_timeout);

0 commit comments

Comments
 (0)