Skip to content

Commit 37a7896

Browse files
authored
Merge pull request #11418 from gkatev/coll_xhc
New collectives component: XPMEM-based Hierarchical Collectives (XHC)
2 parents 8b4237c + 7b9e74c commit 37a7896

17 files changed

+5966
-9
lines changed

ompi/mca/coll/han/coll_han.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
77
* Copyright (c) Amazon.com, Inc. or its affiliates.
88
* All rights reserved.
9+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
10+
* Laboratory, ICS Forth. All rights reserved.
911
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1012
* $COPYRIGHT$
1113
*
@@ -47,11 +49,11 @@
4749

4850
/*
4951
* Today;
50-
* . only 2 modules available for intranode (low) level
52+
* . 3 modules available for intranode (low) level
5153
* . only 2 modules available for internode (up) level
5254
*/
5355

54-
#define COLL_HAN_LOW_MODULES 2
56+
#define COLL_HAN_LOW_MODULES 3
5557
#define COLL_HAN_UP_MODULES 2
5658

5759
struct mca_coll_han_bcast_args_s {

ompi/mca/coll/han/coll_han_component.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
* reserved.
55
* Copyright (c) 2022 IBM Corporation. All rights reserved
66
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
7+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
8+
* Laboratory, ICS Forth. All rights reserved.
79
* $COPYRIGHT$
810
*
911
* Additional copyrights may follow
@@ -43,7 +45,8 @@ ompi_coll_han_components ompi_coll_han_available_components[COMPONENTS_COUNT] =
4345
{ TUNED, "tuned" },
4446
{ SM, "sm" }, /* this should not be used, the collective component is gone */
4547
{ ADAPT, "adapt" },
46-
{ HAN, "han" }
48+
{ HAN, "han" },
49+
{ XHC, "xhc" }
4750
};
4851

4952
/*
@@ -287,7 +290,7 @@ static int han_register(void)
287290

288291
cs->han_bcast_low_module = 0;
289292
(void) mca_coll_han_query_module_from_mca(c, "bcast_low_module",
290-
"low level module for bcast, currently only 0 for tuned",
293+
"low level module for bcast, 0 tuned, 2 xhc",
291294
OPAL_INFO_LVL_9,
292295
&cs->han_bcast_low_module,
293296
&cs->han_op_module_name.bcast.han_op_low_module_name);
@@ -307,7 +310,7 @@ static int han_register(void)
307310

308311
cs->han_reduce_low_module = 0;
309312
(void) mca_coll_han_query_module_from_mca(c, "reduce_low_module",
310-
"low level module for allreduce, currently only 0 tuned",
313+
"low level module for allreduce, 0 tuned, 2 xhc",
311314
OPAL_INFO_LVL_9, &cs->han_reduce_low_module,
312315
&cs->han_op_module_name.reduce.han_op_low_module_name);
313316

@@ -326,7 +329,7 @@ static int han_register(void)
326329

327330
cs->han_allreduce_low_module = 0;
328331
(void) mca_coll_han_query_module_from_mca(c, "allreduce_low_module",
329-
"low level module for allreduce, currently only 0 tuned",
332+
"low level module for allreduce, 0 tuned, 2 xhc",
330333
OPAL_INFO_LVL_9, &cs->han_allreduce_low_module,
331334
&cs->han_op_module_name.allreduce.han_op_low_module_name);
332335

@@ -338,7 +341,7 @@ static int han_register(void)
338341

339342
cs->han_allgather_low_module = 0;
340343
(void) mca_coll_han_query_module_from_mca(c, "allgather_low_module",
341-
"low level module for allgather, currently only 0 tuned",
344+
"low level module for allgather, 0 tuned, 2 xhc",
342345
OPAL_INFO_LVL_9, &cs->han_allgather_low_module,
343346
&cs->han_op_module_name.allgather.han_op_low_module_name);
344347

@@ -350,7 +353,7 @@ static int han_register(void)
350353

351354
cs->han_gather_low_module = 0;
352355
(void) mca_coll_han_query_module_from_mca(c, "gather_low_module",
353-
"low level module for gather, currently only 0 tuned",
356+
"low level module for gather, 0 tuned, 2 xhc",
354357
OPAL_INFO_LVL_9, &cs->han_gather_low_module,
355358
&cs->han_op_module_name.gather.han_op_low_module_name);
356359

@@ -374,7 +377,7 @@ static int han_register(void)
374377

375378
cs->han_scatter_low_module = 0;
376379
(void) mca_coll_han_query_module_from_mca(c, "scatter_low_module",
377-
"low level module for scatter, currently only 0 tuned",
380+
"low level module for scatter, 0 tuned, 2 xhc",
378381
OPAL_INFO_LVL_9, &cs->han_scatter_low_module,
379382
&cs->han_op_module_name.scatter.han_op_low_module_name);
380383

ompi/mca/coll/han/coll_han_dynamic.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
* reserved.
66
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
77
* Copyright (c) 2022 IBM Corporation. All rights reserved
8+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
9+
* Laboratory, ICS Forth. All rights reserved.
810
*
911
* $COPYRIGHT$
1012
*
@@ -105,6 +107,7 @@ typedef enum COMPONENTS {
105107
SM,
106108
ADAPT,
107109
HAN,
110+
XHC,
108111
COMPONENTS_COUNT
109112
} COMPONENT_T;
110113

ompi/mca/coll/han/coll_han_subcomms.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* of Tennessee Research Foundation. All rights
44
* reserved.
55
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
6+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
7+
* Laboratory, ICS Forth. All rights reserved.
68
*
79
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
810
* $COPYRIGHT$
@@ -314,6 +316,10 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm,
314316
&comm_info, &(low_comms[1]));
315317
assert(OMPI_COMM_IS_DISJOINT_SET(low_comms[1]) && !OMPI_COMM_IS_DISJOINT(low_comms[1]));
316318

319+
opal_info_set(&comm_info, "ompi_comm_coll_preference", "xhc,^han");
320+
ompi_comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0,
321+
&comm_info, &(low_comms[2]));
322+
317323
/*
318324
* Upgrade libnbc module priority to set up up_comms[0] with libnbc module
319325
* This sub-communicator contains one process per node: processes with the

ompi/mca/coll/xhc/Makefile.am

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#
2+
# Copyright (c) 2021-2023 Computer Architecture and VLSI Systems (CARV)
3+
# Laboratory, ICS Forth. All rights reserved.
4+
# $COPYRIGHT$
5+
#
6+
# Additional copyrights may follow
7+
#
8+
# $HEADER$
9+
#
10+
11+
dist_opaldata_DATA = help-coll-xhc.txt
12+
13+
sources = \
14+
coll_xhc.h \
15+
coll_xhc_atomic.h \
16+
coll_xhc.c \
17+
coll_xhc_component.c \
18+
coll_xhc_module.c \
19+
coll_xhc_bcast.c \
20+
coll_xhc_barrier.c \
21+
coll_xhc_reduce.c \
22+
coll_xhc_allreduce.c
23+
24+
# Make the output library in this directory, and name it either
25+
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
26+
# (for static builds).
27+
28+
component_noinst =
29+
component_install =
30+
if MCA_BUILD_ompi_coll_xhc_DSO
31+
component_install += mca_coll_xhc.la
32+
else
33+
component_noinst += libmca_coll_xhc.la
34+
endif
35+
36+
mcacomponentdir = $(ompilibdir)
37+
mcacomponent_LTLIBRARIES = $(component_install)
38+
mca_coll_xhc_la_SOURCES = $(sources)
39+
mca_coll_xhc_la_LDFLAGS = -module -avoid-version
40+
mca_coll_xhc_la_LIBADD = $(top_builddir)/ompi/lib@[email protected]
41+
42+
noinst_LTLIBRARIES = $(component_noinst)
43+
libmca_coll_xhc_la_SOURCES = $(sources)
44+
libmca_coll_xhc_la_LDFLAGS = -module -avoid-version

0 commit comments

Comments
 (0)