26
26
27
27
#include "coll_xhc.h"
28
28
29
- static int xhc_make_comms (ompi_communicator_t * ompi_comm ,
29
+ static int xhc_comms_make (ompi_communicator_t * ompi_comm ,
30
30
xhc_peer_info_t * peer_info , xhc_comm_t * * comms_dst ,
31
31
int * comm_count_dst , xhc_loc_t * hierarchy , int hierarchy_len );
32
- static void xhc_destroy_comms (xhc_comm_t * comms , int comm_count );
32
+ static void xhc_comms_destroy (xhc_comm_t * comms , int comm_count );
33
33
34
- static void xhc_print_info (xhc_module_t * module ,
34
+ static int xhc_print_info (xhc_module_t * module ,
35
35
ompi_communicator_t * comm , xhc_data_t * data );
36
36
37
37
static void * xhc_shmem_create (opal_shmem_ds_t * seg_ds , size_t size ,
38
38
ompi_communicator_t * ompi_comm , const char * name_chr_s , int name_chr_i );
39
39
static void * xhc_shmem_attach (opal_shmem_ds_t * seg_ds );
40
40
static mca_smsc_endpoint_t * xhc_smsc_ep (xhc_peer_info_t * peer_info );
41
41
42
- int xhc_lazy_init (xhc_module_t * module , ompi_communicator_t * comm ) {
42
+ // ------------------------------------------------
43
+
44
+ int mca_coll_xhc_lazy_init (xhc_module_t * module , ompi_communicator_t * comm ) {
43
45
44
46
int comm_size = ompi_comm_size (comm );
45
47
int rank = ompi_comm_rank (comm );
@@ -118,7 +120,7 @@ int xhc_lazy_init(xhc_module_t *module, ompi_communicator_t *comm) {
118
120
/* An XHC communicator is created for each level of the hierarchy.
119
121
* The hierachy must be in an order of most-specific to most-general. */
120
122
121
- ret = xhc_make_comms (comm , peer_info , & data -> comms , & data -> comm_count ,
123
+ ret = xhc_comms_make (comm , peer_info , & data -> comms , & data -> comm_count ,
122
124
module -> hierarchy , module -> hierarchy_len );
123
125
if (ret != OMPI_SUCCESS ) {
124
126
RETURN_WITH_ERROR (return_code , ret , end );
@@ -144,7 +146,10 @@ int xhc_lazy_init(xhc_module_t *module, ompi_communicator_t *comm) {
144
146
// ----
145
147
146
148
if (mca_coll_xhc_component .print_info ) {
147
- xhc_print_info (module , comm , data );
149
+ ret = xhc_print_info (module , comm , data );
150
+ if (ret != OMPI_SUCCESS ) {
151
+ RETURN_WITH_ERROR (return_code , ret , end );
152
+ }
148
153
}
149
154
150
155
// ----
@@ -162,18 +167,18 @@ int xhc_lazy_init(xhc_module_t *module, ompi_communicator_t *comm) {
162
167
opal_show_help ("help-coll-xhc.txt" , "xhc-init-failed" , true,
163
168
return_code , errno , strerror (errno ));
164
169
165
- xhc_deinit (module );
170
+ xhc_fini (module );
166
171
}
167
172
168
173
return return_code ;
169
174
}
170
175
171
- void xhc_deinit (mca_coll_xhc_module_t * module ) {
176
+ void mca_coll_xhc_fini (mca_coll_xhc_module_t * module ) {
172
177
if (module -> data ) {
173
178
xhc_data_t * data = module -> data ;
174
179
175
180
if (data -> comm_count >= 0 ) {
176
- xhc_destroy_comms (data -> comms , data -> comm_count );
181
+ xhc_comms_destroy (data -> comms , data -> comm_count );
177
182
}
178
183
179
184
free (data -> comms );
@@ -198,7 +203,20 @@ void xhc_deinit(mca_coll_xhc_module_t *module) {
198
203
}
199
204
}
200
205
201
- static int xhc_make_comms (ompi_communicator_t * ompi_comm ,
206
+ // ------------------------------------------------
207
+
208
+ /* This method is where the hierarchy of XHC is constructed; it receives
209
+ * the hierarchy specifications (hierarchy param) and groups ranks together
210
+ * among them. The process begins with the first locality in the list. All
211
+ * ranks that share this locality (determined via the relative peer to peer
212
+ * distances) become siblings. The one amongst them with the lowest rank
213
+ * number becomes the manager/leader of the group. The members don't really
214
+ * need to keep track of the actual ranks of their siblings -- only the rank
215
+ * of the group's leader/manager, the size of the group, and their own member
216
+ * ID. The process continues with the next locality, only that now only the
217
+ * ranks that became leaders in the previous level are eligible (determined
218
+ * via comm_candidate, see inline comments). */
219
+ static int xhc_comms_make (ompi_communicator_t * ompi_comm ,
202
220
xhc_peer_info_t * peer_info , xhc_comm_t * * comms_dst ,
203
221
int * comm_count_dst , xhc_loc_t * hierarchy , int hierarchy_len ) {
204
222
@@ -257,6 +275,10 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
257
275
258
276
// ----
259
277
278
+ /* Only ranks that were leaders in the previous level are candidates
279
+ * for this one. Every rank advertises whether others may consider
280
+ * it for inclusion via an Allgather. */
281
+
260
282
bool is_candidate = (comm_count == 0
261
283
|| comms [comm_count - 1 ].manager_rank == ompi_rank );
262
284
@@ -305,14 +327,16 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
305
327
306
328
assert (xc -> size > 0 );
307
329
330
+ /* If there are no local peers in regards to this locality, no
331
+ * XHC comm is created for this process on this level. */
308
332
if (xc -> size == 1 ) {
309
333
opal_output_verbose (MCA_BASE_VERBOSE_WARN ,
310
334
ompi_coll_base_framework .framework_output ,
311
335
"coll:xhc: Warning: Locality 0x%04x does not result "
312
336
"in any new groupings; skipping it" , xc -> locality );
313
337
314
- /* Must participate in this allgather, even if useless
315
- * to this rank, since it's necessary for the rest */
338
+ /* All ranks must participate in the "control struct sharing"
339
+ * allgather, even if useless to this rank to some of them */
316
340
317
341
ret = ompi_comm -> c_coll -> coll_allgather (& xc -> ctrl_ds ,
318
342
sizeof (opal_shmem_ds_t ), MPI_BYTE , comm_ctrl_ds ,
@@ -322,7 +346,7 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
322
346
RETURN_WITH_ERROR (return_code , ret , comm_error );
323
347
}
324
348
325
- xhc_destroy_comms (xc , 1 );
349
+ xhc_comms_destroy (xc , 1 );
326
350
continue ;
327
351
}
328
352
@@ -372,6 +396,12 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
372
396
+ sizeof (xhc_comm_ctrl_t ) + smsc_reg_size );
373
397
}
374
398
399
+ /* The comm's managers share the details of the communication structs
400
+ * with their children, so that they may attach to them. Because
401
+ * there's not any MPI communicator formed that includes (only) the
402
+ * members of the XHC comm, the sharing is achieved with a single
403
+ * Allgather, instead of a Broadcast inside each XHC comm. */
404
+
375
405
ret = ompi_comm -> c_coll -> coll_allgather (& xc -> ctrl_ds ,
376
406
sizeof (opal_shmem_ds_t ), MPI_BYTE , comm_ctrl_ds ,
377
407
sizeof (opal_shmem_ds_t ), MPI_BYTE , ompi_comm ,
@@ -404,7 +434,7 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
404
434
continue ;
405
435
406
436
comm_error : {
407
- xhc_destroy_comms (comms , comm_count + 1 );
437
+ xhc_comms_destroy (comms , comm_count + 1 );
408
438
comm_count = -1 ;
409
439
410
440
goto end ;
@@ -428,7 +458,7 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
428
458
return return_code ;
429
459
}
430
460
431
- static void xhc_destroy_comms (xhc_comm_t * comms , int comm_count ) {
461
+ static void xhc_comms_destroy (xhc_comm_t * comms , int comm_count ) {
432
462
bool is_manager = true;
433
463
434
464
for (int i = 0 ; i < comm_count ; i ++ ) {
@@ -458,16 +488,17 @@ static void xhc_destroy_comms(xhc_comm_t *comms, int comm_count) {
458
488
}
459
489
}
460
490
461
- static void xhc_print_info (xhc_module_t * module ,
491
+ static int xhc_print_info (xhc_module_t * module ,
462
492
ompi_communicator_t * comm , xhc_data_t * data ) {
463
493
464
494
int rank = ompi_comm_rank (comm );
465
-
466
- char * drval_str = NULL ;
467
- char * lb_rla_str = NULL ;
468
- char * un_min_str = NULL ;
495
+ int ret ;
469
496
470
497
if (rank == 0 ) {
498
+ char * drval_str ;
499
+ char * lb_rla_str ;
500
+ char * un_min_str ;
501
+
471
502
switch (mca_coll_xhc_component .dynamic_reduce ) {
472
503
case OMPI_XHC_DYNAMIC_REDUCE_DISABLED :
473
504
drval_str = "OFF" ; break ;
@@ -492,8 +523,11 @@ static void xhc_print_info(xhc_module_t *module,
492
523
lb_rla_str = "???" ;
493
524
}
494
525
495
- opal_asprintf (& un_min_str , " (min '%zu' bytes)" ,
526
+ ret = opal_asprintf (& un_min_str , " (min '%zu' bytes)" ,
496
527
mca_coll_xhc_component .uniform_chunks_min );
528
+ if (ret < 0 ) {
529
+ return OMPI_ERR_OUT_OF_RESOURCE ;
530
+ }
497
531
498
532
printf ("------------------------------------------------\n"
499
533
"OMPI coll/xhc @ %s, priority %d\n"
@@ -508,39 +542,50 @@ static void xhc_print_info(xhc_module_t *module,
508
542
(mca_coll_xhc_component .uniform_chunks ? "ON" : "OFF" ),
509
543
(mca_coll_xhc_component .uniform_chunks ? un_min_str : "" ),
510
544
mca_coll_xhc_component .cico_max );
545
+
546
+ free (un_min_str );
511
547
}
512
548
513
- // TODO convert to opal_asprintf?
514
549
for (int i = 0 ; i < data -> comm_count ; i ++ ) {
515
- char buf [ BUFSIZ ] = { 0 } ;
516
- size_t buf_idx = 0 ;
550
+ char * mlist = NULL ;
551
+ char * tmp ;
517
552
518
- buf_idx += snprintf (buf + buf_idx , sizeof (buf ) - buf_idx ,
519
- "%d" , data -> comms [i ].manager_rank );
553
+ ret = opal_asprintf (& mlist , "%d" , data -> comms [i ].manager_rank );
554
+ if (ret < 0 ) {
555
+ return OMPI_ERR_OUT_OF_RESOURCE ;
556
+ }
520
557
521
- for (int j = 1 ; j < data -> comms [i ].size ; j ++ ) {
522
- if (j == data -> comms [i ].member_id ) {
558
+ for (int m = 1 ; m < data -> comms [i ].size ; m ++ ) {
559
+ if (m == data -> comms [i ].member_id ) {
523
560
if (i == 0 || data -> comms [i - 1 ].manager_rank == rank ) {
524
- buf_idx += snprintf (buf + buf_idx ,
525
- sizeof (buf ) - buf_idx , " %d" , rank );
561
+ ret = opal_asprintf (& tmp , "%s %d" , mlist , rank );
526
562
} else {
527
- buf_idx += snprintf (buf + buf_idx ,
528
- sizeof (buf ) - buf_idx , " _" );
563
+ ret = opal_asprintf (& tmp , "%s _" , mlist );
529
564
}
530
565
} else {
531
- buf_idx += snprintf (buf + buf_idx ,
532
- sizeof (buf ) - buf_idx , " x" );
566
+ ret = opal_asprintf (& tmp , "%s x" , mlist );
567
+ }
568
+
569
+ free (mlist );
570
+ mlist = tmp ;
571
+
572
+ if (ret < 0 ) {
573
+ return OMPI_ERR_OUT_OF_RESOURCE ;
533
574
}
534
575
}
535
576
536
577
printf ("XHC comm loc=0x%08x chunk_size=%zu with %d members [%s]\n" ,
537
578
data -> comms [i ].locality , data -> comms [i ].chunk_size ,
538
- data -> comms [i ].size , buf );
579
+ data -> comms [i ].size , mlist );
580
+
581
+ free (mlist );
539
582
}
540
583
541
- free ( un_min_str ) ;
584
+ return OMPI_SUCCESS ;
542
585
}
543
586
587
+ // ------------------------------------------------
588
+
544
589
static void * xhc_shmem_create (opal_shmem_ds_t * seg_ds , size_t size ,
545
590
ompi_communicator_t * ompi_comm , const char * name_chr_s , int name_chr_i ) {
546
591
@@ -594,18 +639,6 @@ static void *xhc_shmem_attach(opal_shmem_ds_t *seg_ds) {
594
639
return addr ;
595
640
}
596
641
597
- void * xhc_get_cico (xhc_peer_info_t * peer_info , int rank ) {
598
- if (OMPI_XHC_CICO_MAX == 0 ) {
599
- return NULL ;
600
- }
601
-
602
- if (peer_info [rank ].cico_buffer == NULL ) {
603
- peer_info [rank ].cico_buffer = xhc_shmem_attach (& peer_info [rank ].cico_ds );
604
- }
605
-
606
- return peer_info [rank ].cico_buffer ;
607
- }
608
-
609
642
static mca_smsc_endpoint_t * xhc_smsc_ep (xhc_peer_info_t * peer_info ) {
610
643
if (!peer_info -> smsc_ep ) {
611
644
peer_info -> smsc_ep = MCA_SMSC_CALL (get_endpoint , & peer_info -> proc -> super );
@@ -622,7 +655,21 @@ static mca_smsc_endpoint_t *xhc_smsc_ep(xhc_peer_info_t *peer_info) {
622
655
return peer_info -> smsc_ep ;
623
656
}
624
657
625
- int xhc_copy_expose_region (void * base , size_t len , xhc_copy_data_t * * region_data ) {
658
+ // ------------------------------------------------
659
+
660
+ void * mca_coll_xhc_get_cico (xhc_peer_info_t * peer_info , int rank ) {
661
+ if (OMPI_XHC_CICO_MAX == 0 ) {
662
+ return NULL ;
663
+ }
664
+
665
+ if (peer_info [rank ].cico_buffer == NULL ) {
666
+ peer_info [rank ].cico_buffer = xhc_shmem_attach (& peer_info [rank ].cico_ds );
667
+ }
668
+
669
+ return peer_info [rank ].cico_buffer ;
670
+ }
671
+
672
+ int mca_coll_xhc_copy_expose_region (void * base , size_t len , xhc_copy_data_t * * region_data ) {
626
673
if (mca_smsc_base_has_feature (MCA_SMSC_FEATURE_REQUIRE_REGISTATION )) {
627
674
void * data = MCA_SMSC_CALL (register_region , base , len );
628
675
@@ -640,11 +687,11 @@ int xhc_copy_expose_region(void *base, size_t len, xhc_copy_data_t **region_data
640
687
return 0 ;
641
688
}
642
689
643
- void xhc_copy_region_post (void * dst , xhc_copy_data_t * region_data ) {
690
+ void mca_coll_xhc_copy_region_post (void * dst , xhc_copy_data_t * region_data ) {
644
691
memcpy (dst , region_data , mca_smsc_base_registration_data_size ());
645
692
}
646
693
647
- int xhc_copy_from (xhc_peer_info_t * peer_info ,
694
+ int mca_coll_xhc_copy_from (xhc_peer_info_t * peer_info ,
648
695
void * dst , void * src , size_t size , void * access_token ) {
649
696
650
697
mca_smsc_endpoint_t * smsc_ep = xhc_smsc_ep (peer_info );
@@ -659,12 +706,12 @@ int xhc_copy_from(xhc_peer_info_t *peer_info,
659
706
return (status == OPAL_SUCCESS ? 0 : -1 );
660
707
}
661
708
662
- void xhc_copy_close_region (xhc_copy_data_t * region_data ) {
709
+ void mca_coll_xhc_copy_close_region (xhc_copy_data_t * region_data ) {
663
710
if (mca_smsc_base_has_feature (MCA_SMSC_FEATURE_REQUIRE_REGISTATION ))
664
711
MCA_SMSC_CALL (deregister_region , region_data );
665
712
}
666
713
667
- void * xhc_get_registration (xhc_peer_info_t * peer_info ,
714
+ void * mca_coll_xhc_get_registration (xhc_peer_info_t * peer_info ,
668
715
void * peer_vaddr , size_t size , xhc_reg_t * * reg ) {
669
716
670
717
mca_smsc_endpoint_t * smsc_ep = xhc_smsc_ep (peer_info );
@@ -695,6 +742,6 @@ void *xhc_get_registration(xhc_peer_info_t *peer_info,
695
742
696
743
/* Won't actually unmap/detach, since we've set
697
744
* the "persist" flag while creating the mapping */
698
- void xhc_return_registration (xhc_reg_t * reg ) {
745
+ void mca_coll_xhc_return_registration (xhc_reg_t * reg ) {
699
746
MCA_SMSC_CALL (unmap_peer_region , reg );
700
747
}
0 commit comments