Skip to content

btl/ofi: fixes for multi mpi init/fini scenarios #13020

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions opal/mca/btl/ofi/btl_ofi_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
/*
* $COPYRIGHT$
* Copyright (c) 2018 Intel Inc. All rights reserved
* Copyright (c) 2025 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -14,10 +16,10 @@
#include "btl_ofi_rdma.h"

#if OPAL_HAVE_THREAD_LOCAL
opal_thread_local mca_btl_ofi_context_t *my_context = NULL;
static opal_thread_local mca_btl_ofi_context_t *my_context = NULL;
#endif /* OPAL_HAVE_THREAD_LOCAL */

int init_context_freelists(mca_btl_ofi_context_t *context)
static int init_context_freelists(mca_btl_ofi_context_t *context)
{
int rc;
OBJ_CONSTRUCT(&context->rdma_comp_list, opal_free_list_t);
Expand Down Expand Up @@ -113,6 +115,7 @@ mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info,
context->tx_ctx = ep;
context->rx_ctx = ep;
context->context_id = 0;
my_context = NULL;

return context;

Expand Down
8 changes: 2 additions & 6 deletions opal/mca/btl/ofi/btl_ofi_endpoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
* Copyright (c) 2018 Intel, Inc, All rights reserved
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates.
* All Rights reserved.
* Copyright (c) 2025 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -32,10 +34,6 @@

BEGIN_C_DECLS

#if OPAL_HAVE_THREAD_LOCAL
extern opal_thread_local mca_btl_ofi_context_t *my_context;
#endif /* OPAL_HAVE_THREAD_LOCAL */

struct mca_btl_base_endpoint_t {
opal_list_item_t super;

Expand All @@ -53,8 +51,6 @@ typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_ofi_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_ofi_endpoint_t);

int init_context_freelists(mca_btl_ofi_context_t *context);

mca_btl_base_endpoint_t *mca_btl_ofi_endpoint_create(opal_proc_t *proc, struct fid_ep *ep);

/* contexts */
Expand Down
5 changes: 4 additions & 1 deletion opal/mca/memory/patcher/memory_patcher_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2020 IBM Corporation. All rights reserved.
* Copyright (c) 2025 Triad National Security, LLC. All rights reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -73,6 +74,7 @@ static int patcher_register(void);
static int patcher_query(int *);

static int mca_memory_patcher_priority;
static int was_executed_already = 0;

opal_memory_patcher_component_t mca_memory_patcher_component = {
.super =
Expand Down Expand Up @@ -585,7 +587,6 @@ static int patcher_query(int *priority)

static int patcher_open(void)
{
static int was_executed_already = 0;
int rc;

if (was_executed_already) {
Expand Down Expand Up @@ -678,6 +679,8 @@ static int patcher_close(void)
{
mca_base_framework_close(&opal_patcher_base_framework);

was_executed_already = 0;

/* Note that we don't need to unpatch any symbols here; the
patcher framework will take care of all of that for us. */
return OPAL_SUCCESS;
Expand Down
Loading