12
12
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
13
13
* Copyright (c) 2015 Research Organization for Information Science
14
14
* and Technology (RIST). All rights reserved.
15
- * Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
15
+ * Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
16
16
* $COPYRIGHT$
17
17
*
18
18
* Additional copyrights may follow
61
61
#include "orte/util/show_help.h"
62
62
63
63
#include "orte/mca/errmgr/errmgr.h"
64
+ #include "orte/mca/ras/base/base.h"
64
65
#include "orte/runtime/runtime.h"
65
66
#include "orte/runtime/orte_globals.h"
66
67
@@ -370,6 +371,16 @@ int orte_session_dir(bool create, orte_process_name_t *proc)
370
371
int
371
372
orte_session_dir_cleanup (orte_jobid_t jobid )
372
373
{
374
+ /* special case - if a daemon is colocated with mpirun,
375
+ * then we let mpirun do the rest to avoid a race
376
+ * condition. this scenario always results in the rank=1
377
+ * daemon colocated with mpirun */
378
+ if (orte_ras_base .launch_orted_on_hn &&
379
+ ORTE_PROC_IS_DAEMON &&
380
+ 1 == ORTE_PROC_MY_NAME -> vpid ) {
381
+ return ORTE_SUCCESS ;
382
+ }
383
+
373
384
if (!orte_create_session_dirs || orte_process_info .rm_session_dirs ) {
374
385
/* we haven't created them or RM will clean them up for us*/
375
386
return ORTE_SUCCESS ;
@@ -386,6 +397,7 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
386
397
return ORTE_ERR_NOT_INITIALIZED ;
387
398
}
388
399
400
+
389
401
/* recursively blow the whole session away for our job family,
390
402
* saving only output files
391
403
*/
@@ -461,20 +473,6 @@ orte_session_dir_finalize(orte_process_name_t *proc)
461
473
462
474
opal_os_dirpath_destroy (orte_process_info .proc_session_dir ,
463
475
false, orte_dir_check_file );
464
- opal_os_dirpath_destroy (orte_process_info .job_session_dir ,
465
- false, orte_dir_check_file );
466
- /* only remove the jobfam session dir if we are the
467
- * local daemon and we are finalizing our own session dir */
468
- if ((ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON ) &&
469
- (ORTE_PROC_MY_NAME == proc )) {
470
- opal_os_dirpath_destroy (orte_process_info .jobfam_session_dir ,
471
- false, orte_dir_check_file );
472
- }
473
-
474
- if ( NULL != orte_process_info .top_session_dir ){
475
- opal_os_dirpath_destroy (orte_process_info .top_session_dir ,
476
- false, orte_dir_check_file );
477
- }
478
476
479
477
if (opal_os_dirpath_is_empty (orte_process_info .proc_session_dir )) {
480
478
if (orte_debug_flag ) {
@@ -492,6 +490,32 @@ orte_session_dir_finalize(orte_process_name_t *proc)
492
490
}
493
491
}
494
492
493
+ /* special case - if a daemon is colocated with mpirun,
494
+ * then we let mpirun do the rest to avoid a race
495
+ * condition. this scenario always results in the rank=1
496
+ * daemon colocated with mpirun */
497
+ if (orte_ras_base .launch_orted_on_hn &&
498
+ ORTE_PROC_IS_DAEMON &&
499
+ 1 == ORTE_PROC_MY_NAME -> vpid ) {
500
+ return ORTE_SUCCESS ;
501
+ }
502
+
503
+ opal_os_dirpath_destroy (orte_process_info .job_session_dir ,
504
+ false, orte_dir_check_file );
505
+
506
+ /* only remove the jobfam session dir if we are the
507
+ * local daemon and we are finalizing our own session dir */
508
+ if ((ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON ) &&
509
+ (ORTE_PROC_MY_NAME == proc )) {
510
+ opal_os_dirpath_destroy (orte_process_info .jobfam_session_dir ,
511
+ false, orte_dir_check_file );
512
+ }
513
+
514
+ if ( NULL != orte_process_info .top_session_dir ){
515
+ opal_os_dirpath_destroy (orte_process_info .top_session_dir ,
516
+ false, orte_dir_check_file );
517
+ }
518
+
495
519
if (opal_os_dirpath_is_empty (orte_process_info .job_session_dir )) {
496
520
if (orte_debug_flag ) {
497
521
opal_output (0 , "sess_dir_finalize: found job session dir empty - deleting" );
0 commit comments