Skip to content

Commit a03199c

Browse files
committed
btl/ofi: fixes for multi mpi init/fini scenarios
The OFI BTL was caching a context -in the case of normal OFI EPs - which needed across multiple sessions init/finalize and resulting in segfaults. Also, there were some symbols being exported which should not have been. Related to #13019 Signed-off-by: Howard Pritchard <[email protected]>
1 parent 0bccfcd commit a03199c

File tree

2 files changed

+7
-8
lines changed

2 files changed

+7
-8
lines changed

opal/mca/btl/ofi/btl_ofi_context.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
/*
33
* $COPYRIGHT$
44
* Copyright (c) 2018 Intel Inc. All rights reserved
5+
* Copyright (c) 2025 Triad National Security, LLC. All rights
6+
* reserved.
57
* $COPYRIGHT$
68
*
79
* Additional copyrights may follow
@@ -14,10 +16,10 @@
1416
#include "btl_ofi_rdma.h"
1517

1618
#if OPAL_HAVE_THREAD_LOCAL
17-
opal_thread_local mca_btl_ofi_context_t *my_context = NULL;
19+
static opal_thread_local mca_btl_ofi_context_t *my_context = NULL;
1820
#endif /* OPAL_HAVE_THREAD_LOCAL */
1921

20-
int init_context_freelists(mca_btl_ofi_context_t *context)
22+
static int init_context_freelists(mca_btl_ofi_context_t *context)
2123
{
2224
int rc;
2325
OBJ_CONSTRUCT(&context->rdma_comp_list, opal_free_list_t);
@@ -113,6 +115,7 @@ mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info,
113115
context->tx_ctx = ep;
114116
context->rx_ctx = ep;
115117
context->context_id = 0;
118+
my_context = NULL;
116119

117120
return context;
118121

opal/mca/btl/ofi/btl_ofi_endpoint.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
* Copyright (c) 2018 Intel, Inc, All rights reserved
1616
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates.
1717
* All Rights reserved.
18+
* Copyright (c) 2025 Triad National Security, LLC. All rights
19+
* reserved.
1820
* $COPYRIGHT$
1921
*
2022
* Additional copyrights may follow
@@ -32,10 +34,6 @@
3234

3335
BEGIN_C_DECLS
3436

35-
#if OPAL_HAVE_THREAD_LOCAL
36-
extern opal_thread_local mca_btl_ofi_context_t *my_context;
37-
#endif /* OPAL_HAVE_THREAD_LOCAL */
38-
3937
struct mca_btl_base_endpoint_t {
4038
opal_list_item_t super;
4139

@@ -53,8 +51,6 @@ typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
5351
typedef mca_btl_base_endpoint_t mca_btl_ofi_endpoint_t;
5452
OBJ_CLASS_DECLARATION(mca_btl_ofi_endpoint_t);
5553

56-
int init_context_freelists(mca_btl_ofi_context_t *context);
57-
5854
mca_btl_base_endpoint_t *mca_btl_ofi_endpoint_create(opal_proc_t *proc, struct fid_ep *ep);
5955

6056
/* contexts */

0 commit comments

Comments
 (0)