Skip to content

Commit db52da4

Browse files
authored
Merge pull request #7162 from rhc54/topic/iof
Update IOF redirection options
2 parents edcd6d8 + b0a487a commit db52da4

File tree

12 files changed

+193
-18
lines changed

12 files changed

+193
-18
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,8 @@ opal/mca/pmix/pmix*/openpmix/test/run_tests12.pl
418418
opal/mca/pmix/pmix*/openpmix/test/run_tests13.pl
419419
opal/mca/pmix/pmix*/openpmix/test/run_tests14.pl
420420
opal/mca/pmix/pmix*/openpmix/test/run_tests15.pl
421+
opal/mca/pmix/pmix*/openpmix/src/tools/wrapper/pmix.pc
422+
opal/mca/pmix/pmix*/openpmix/src/tools/wrapper/pmixcc-wrapper-data.txt
421423

422424

423425
opal/mca/pmix/ext4x/ext4x.c

orte/mca/iof/base/Makefile.am

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,16 @@
1010
# Copyright (c) 2004-2005 The Regents of the University of California.
1111
# All rights reserved.
1212
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved.
13+
# Copyright (c) 2019 Intel, Inc. All rights reserved.
1314
# $COPYRIGHT$
1415
#
1516
# Additional copyrights may follow
1617
#
1718
# $HEADER$
1819
#
1920

21+
dist_ortedata_DATA = base/help-iof-base.txt
22+
2023
headers += \
2124
base/base.h \
2225
base/iof_base_setup.h

orte/mca/iof/base/help-iof-base.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- text -*-
2+
#
3+
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4+
# University Research and Technology
5+
# Corporation. All rights reserved.
6+
# Copyright (c) 2004-2005 The University of Tennessee and The University
7+
# of Tennessee Research Foundation. All rights
8+
# reserved.
9+
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10+
# University of Stuttgart. All rights reserved.
11+
# Copyright (c) 2004-2005 The Regents of the University of California.
12+
# All rights reserved.
13+
# Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
14+
# Copyright (c) 2017 IBM Corporation. All rights reserved.
15+
# Copyright (c) 2018 Research Organization for Information Science
16+
# and Technology (RIST). All rights reserved.
17+
# $COPYRIGHT$
18+
#
19+
# Additional copyrights may follow
20+
#
21+
# $HEADER$
22+
#
23+
#
24+
[unrecognized-directive]
25+
The following directive was given to "--%s":
26+
27+
Directive: %s
28+
29+
This directive is not recognized. Please check your spelling
30+
and/or use the "--help" option to find the supported values.
31+
#

orte/mca/iof/base/iof_base_setup.c

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
1414
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1515
* Copyright (c) 2017 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
@@ -63,11 +63,13 @@
6363
#include "opal/util/opal_environ.h"
6464
#include "opal/util/os_dirpath.h"
6565
#include "opal/util/output.h"
66+
#include "opal/util/basename.h"
6667
#include "opal/util/argv.h"
6768
#include "opal/util/printf.h"
6869

6970
#include "orte/mca/errmgr/errmgr.h"
7071
#include "orte/util/name_fns.h"
72+
#include "orte/util/show_help.h"
7173
#include "orte/runtime/orte_globals.h"
7274

7375
#include "orte/mca/iof/iof.h"
@@ -291,9 +293,9 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
291293
char *p, **s;
292294
bool usejobid = true;
293295

294-
/* see if we are to output to a file */
296+
/* see if we are to output to a directory */
295297
dirname = NULL;
296-
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_FILE, (void**)&dirname, OPAL_STRING) &&
298+
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, (void**)&dirname, OPAL_STRING) &&
297299
NULL != dirname) {
298300
np = jobdat->num_procs / 10;
299301
/* determine the number of digits required for max vpid */
@@ -313,6 +315,12 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
313315
usejobid = false;
314316
} else if (0 == strcasecmp(s[i], "nocopy")) {
315317
proct->copy = false;
318+
} else {
319+
orte_show_help("help-iof-base",
320+
"unrecognized-directive",
321+
true, "output-directory", s[i]);
322+
opal_argv_free(s);
323+
return ORTE_ERROR;
316324
}
317325
}
318326
}
@@ -379,7 +387,80 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
379387
proct->revstddiag->sink = proct->revstderr->sink;
380388
}
381389
#endif
390+
return ORTE_SUCCESS;
382391
}
383392

393+
/* see if we are to output to a file */
394+
dirname = NULL;
395+
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_FILE, (void**)&dirname, OPAL_STRING) &&
396+
NULL != dirname) {
397+
np = jobdat->num_procs / 10;
398+
/* determine the number of digits required for max vpid */
399+
numdigs = 1;
400+
while (np > 0) {
401+
numdigs++;
402+
np = np / 10;
403+
}
404+
/* check for a conditional in the directory name */
405+
if (NULL != (p = strchr(dirname, ':'))) {
406+
*p = '\0';
407+
++p;
408+
/* could me more than one directive */
409+
s = opal_argv_split(p, ',');
410+
for (i=0; NULL != s[i]; i++) {
411+
if (0 == strcasecmp(s[i], "nocopy")) {
412+
proct->copy = false;
413+
} else {
414+
orte_show_help("help-iof-base",
415+
"unrecognized-directive",
416+
true, "output-filename", s[i]);
417+
opal_argv_free(s);
418+
return ORTE_ERROR;
419+
}
420+
}
421+
}
422+
423+
/* construct the directory where the output files will go */
424+
outdir = opal_dirname(dirname);
425+
426+
/* ensure the directory exists */
427+
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(outdir, S_IRWXU|S_IRGRP|S_IXGRP))) {
428+
ORTE_ERROR_LOG(rc);
429+
free(outdir);
430+
return rc;
431+
}
432+
if (NULL != proct->revstdout && NULL == proct->revstdout->sink) {
433+
/* setup the stdout sink */
434+
opal_asprintf(&outfile, "%s.%d.%0*lu", dirname,
435+
(int)ORTE_LOCAL_JOBID(proct->name.jobid),
436+
numdigs, (unsigned long)proct->name.vpid);
437+
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
438+
free(outfile);
439+
if (fdout < 0) {
440+
/* couldn't be opened */
441+
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
442+
return ORTE_ERR_FILE_OPEN_FAILURE;
443+
}
444+
/* define a sink to that file descriptor */
445+
ORTE_IOF_SINK_DEFINE(&proct->revstdout->sink, dst_name,
446+
fdout, ORTE_IOF_STDOUTALL,
447+
orte_iof_base_write_handler);
448+
}
449+
450+
if (NULL != proct->revstderr && NULL == proct->revstderr->sink) {
451+
/* we only create one file - all output goes there */
452+
OBJ_RETAIN(proct->revstdout->sink);
453+
proct->revstdout->sink->tag = ORTE_IOF_STDMERGE; // show that it is merged
454+
proct->revstderr->sink = proct->revstdout->sink;
455+
}
456+
#if OPAL_PMIX_V1
457+
if (NULL != proct->revstddiag && NULL == proct->revstddiag->sink) {
458+
/* always tie the sink for stddiag to stderr */
459+
OBJ_RETAIN(proct->revstderr->sink);
460+
proct->revstddiag->sink = proct->revstderr->sink;
461+
}
462+
#endif
463+
return ORTE_SUCCESS;
464+
}
384465
return ORTE_SUCCESS;
385466
}

orte/mca/schizo/ompi/schizo_ompi.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
1515
* Copyright (c) 2011-2017 Oak Ridge National Labs. All rights reserved.
1616
* Copyright (c) 2017 UT-Battelle, LLC. All rights reserved.
17-
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
17+
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
1818
* Copyright (c) 2015-2019 Research Organization for Information Science
1919
* and Technology (RIST). All rights reserved.
2020
* Copyright (c) 2018 IBM Corporation. All rights reserved.
@@ -138,9 +138,13 @@ static opal_cmd_line_init_t cmd_line_init[] = {
138138
{ "orte_timestamp_output", '\0', "timestamp-output", "timestamp-output", 0,
139139
&orte_cmd_options.timestamp_output, OPAL_CMD_LINE_TYPE_BOOL,
140140
"Timestamp all application process output", OPAL_CMD_LINE_OTYPE_OUTPUT },
141+
{ "orte_output_directory", '\0', "output-directory", "output-directory", 1,
142+
&orte_cmd_options.output_directory, OPAL_CMD_LINE_TYPE_STRING,
143+
"Redirect output from application processes into filename/job/rank/std[out,err,diag]. A relative path value will be converted to an absolute path. The directory name may include a colon followed by a comma-delimited list of optional case-insensitive directives. Supported directives currently include NOJOBID (do not include a job-id directory level) and NOCOPY (do not copy the output to the stdout/err streams)",
144+
OPAL_CMD_LINE_OTYPE_OUTPUT },
141145
{ "orte_output_filename", '\0', "output-filename", "output-filename", 1,
142146
&orte_cmd_options.output_filename, OPAL_CMD_LINE_TYPE_STRING,
143-
"Redirect output from application processes into filename/job/rank/std[out,err,diag]. A relative path value will be converted to an absolute path",
147+
"Redirect output from application processes into filename.rank. A relative path value will be converted to an absolute path. The directory name may include a colon followed by a comma-delimited list of optional case-insensitive directives. Supported directives currently include NOCOPY (do not copy the output to the stdout/err streams)",
144148
OPAL_CMD_LINE_OTYPE_OUTPUT },
145149
{ NULL, '\0', "merge-stderr-to-stdout", "merge-stderr-to-stdout", 0,
146150
&orte_cmd_options.merge, OPAL_CMD_LINE_TYPE_BOOL,

orte/orted/help-orted.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# University of Stuttgart. All rights reserved.
1111
# Copyright (c) 2004-2005 The Regents of the University of California.
1212
# All rights reserved.
13-
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
13+
# Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1414
# $COPYRIGHT$
1515
#
1616
# Additional copyrights may follow
@@ -107,3 +107,12 @@ mechanisms.
107107
You may wish to contact your debugger vendor to inquire about support
108108
for PMIx-based debugger attachment mechanisms. Meantime, you can
109109
disable this warning by setting the OMPI_MPIR_DO_NOT_WARN envar to 1.
110+
#
111+
[both-file-and-dir-set]
112+
Both the "output-directory" and "output-filename" options have been
113+
set:
114+
115+
Directory: %s
116+
Filename: %s
117+
118+
Only one of these can be set - please fix the options and try again.

orte/orted/orted_main.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
209209
&orted_globals.singleton_died_pipe, OPAL_CMD_LINE_TYPE_INT,
210210
"Watch on indicated pipe for singleton termination"},
211211

212+
{ "orte_output_directory", '\0', "output-directory", "output-directory", 1,
213+
NULL, OPAL_CMD_LINE_TYPE_STRING,
214+
"Redirect output from application processes into filename/job/rank/std[out,err,diag]." },
215+
212216
{ "orte_output_filename", '\0', "output-filename", "output-filename", 1,
213217
NULL, OPAL_CMD_LINE_TYPE_STRING,
214218
"Redirect output from application processes into filename.rank" },

orte/orted/orted_submit.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata);
176176

177177
ORTE_DECLSPEC void __opal_attribute_optnone__ MPIR_Breakpoint(void);
178178

179-
/*
179+
/*
180180
* Attempt to prevent the compiler from optimizing out
181181
* MPIR_Breakpoint().
182182
*
@@ -199,7 +199,7 @@ volatile void* volatile orte_noop_mpir_breakpoint_ptr = NULL;
199199
*/
200200
void MPIR_Breakpoint(void)
201201
{
202-
/*
202+
/*
203203
* Actually do something with this pointer to make
204204
* sure the compiler does not optimize out this function.
205205
* The compiler should be forced to keep this
@@ -852,9 +852,16 @@ int orte_submit_job(char *argv[], int *index,
852852
if (orte_cmd_options.timestamp_output) {
853853
orte_set_attribute(&jdata->attributes, ORTE_JOB_TIMESTAMP_OUTPUT, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
854854
}
855-
/* if we were asked to output to files, pass it along */
856-
if (NULL != orte_cmd_options.output_filename) {
857-
/* if the given filename isn't an absolute path, then
855+
/* cannot have both files and directory set for output */
856+
if (NULL != orte_cmd_options.output_filename &&
857+
NULL != orte_cmd_options.output_directory) {
858+
orte_show_help("help-orted.txt", "both-file-and-dir-set", true,
859+
orte_cmd_options.output_directory,
860+
orte_cmd_options.output_filename);
861+
return ORTE_ERR_FATAL;
862+
} else if (NULL != orte_cmd_options.output_filename) {
863+
/* if we were asked to output to files, pass it along.
864+
* If the given filename isn't an absolute path, then
858865
* convert it to one so the name will be relative to
859866
* the directory where prun was given as that is what
860867
* the user will have seen */
@@ -867,6 +874,21 @@ int orte_submit_job(char *argv[], int *index,
867874
} else {
868875
orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_FILE, ORTE_ATTR_GLOBAL, orte_cmd_options.output_filename, OPAL_STRING);
869876
}
877+
} else if (NULL != orte_cmd_options.output_directory) {
878+
/* if we were asked to output to a directory, pass it along.
879+
* If the given filename isn't an absolute path, then
880+
* convert it to one so the name will be relative to
881+
* the directory where prun was given as that is what
882+
* the user will have seen */
883+
if (!opal_path_is_absolute(orte_cmd_options.output_directory)) {
884+
char cwd[OPAL_PATH_MAX], *path;
885+
getcwd(cwd, sizeof(cwd));
886+
path = opal_os_path(false, cwd, orte_cmd_options.output_directory, NULL);
887+
orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, ORTE_ATTR_GLOBAL, path, OPAL_STRING);
888+
free(path);
889+
} else {
890+
orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, ORTE_ATTR_GLOBAL, orte_cmd_options.output_directory, OPAL_STRING);
891+
}
870892
}
871893
/* if we were asked to merge stderr to stdout, mark it so */
872894
if (orte_cmd_options.merge) {

orte/orted/orted_submit.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
2+
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
33
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
44
* Copyright (c) 2017 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
@@ -90,6 +90,7 @@ struct orte_cmd_options_t {
9090
bool debug;
9191
bool tag_output;
9292
bool timestamp_output;
93+
char *output_directory;
9394
char *output_filename;
9495
bool merge;
9596
bool continuous;

orte/tools/orterun/orterun.1in

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.\" -*- nroff -*-
22
.\" Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved.
33
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
4-
.\" Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
4+
.\" Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
55
.\" Copyright (c) 2017 Los Alamos National Security, LLC. All rights
66
.\" reserved.
77
.\" $COPYRIGHT$
@@ -418,10 +418,25 @@ Redirect the stdout, stderr, and stddiag of all processes to a process-unique ve
418418
the specified filename. Any directories in the filename will automatically be created.
419419
Each output file will consist of filename.id, where the id will be the
420420
processes' rank in MPI_COMM_WORLD, left-filled with
421-
zero's for correct ordering in listings. A relative path value will be converted to an
421+
zero's for correct ordering in listings. Both stdout and stderr will be redirected to the file.
422+
A relative path value will be converted to an
422423
absolute path based on the cwd where mpirun is executed. Note that this \fIwill not\fP work
423424
on environments where the file system on compute nodes differs from that where mpirun
424-
is executed.
425+
is executed. This option accepts one case-insensitive directive, specified after a colon: NOCOPY
426+
indicates that the output is not to be echoed to the terminal.
427+
.
428+
.
429+
.TP
430+
.B -output-directory\fR,\fP --output-directory \fR<path>\fP
431+
Redirect the stdout, stderr, and stddiag of all processes to a process-unique location
432+
consisting of "<path>/<jobid>/rank.id/stdout[err]", where the id will be the
433+
processes' rank in MPI_COMM_WORLD, left-filled with
434+
zero's for correct ordering in listings. Any directories in the filename will automatically be created.
435+
A relative path value will be converted to an
436+
absolute path based on the cwd where mpirun is executed. Note that this \fIwill not\fP work
437+
on environments where the file system on compute nodes differs from that where mpirun
438+
is executed. This option also supports two case-insensitive directives, specified in comma-delimited form after a colon: NOJOBID (omits the jobid directory layer) and NOCOPY (do
439+
not copy the output to the terminal).
425440
.
426441
.
427442
.TP

orte/util/attr.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
2+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
33
* Copyright (c) 2014-2017 Research Organization for Information Science
44
* and Technology (RIST). All rights reserved.
55
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
@@ -384,6 +384,8 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key)
384384
return "ORTE_APP_ADD_ENVAR";
385385
case ORTE_JOB_APP_SETUP_DATA:
386386
return "ORTE_JOB_APP_SETUP_DATA";
387+
case ORTE_JOB_OUTPUT_TO_DIRECTORY:
388+
return "ORTE_JOB_OUTPUT_TO_DIRECTORY";
387389

388390
case ORTE_PROC_NOBARRIER:
389391
return "PROC-NOBARRIER";

orte/util/attr.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
2+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
33
* Copyright (c) 2016 Research Organization for Information Science
44
* and Technology (RIST). All rights reserved.
55
* $COPYRIGHT$
@@ -142,7 +142,7 @@ typedef uint16_t orte_job_flags_t;
142142
#define ORTE_JOB_FIXED_DVM (ORTE_JOB_START_KEY + 42) // bool - do not change the size of the DVM for this job
143143
#define ORTE_JOB_DVM_JOB (ORTE_JOB_START_KEY + 43) // bool - job is using a DVM
144144
#define ORTE_JOB_CANCELLED (ORTE_JOB_START_KEY + 44) // bool - job was cancelled
145-
#define ORTE_JOB_OUTPUT_TO_FILE (ORTE_JOB_START_KEY + 45) // string - name of directory to which stdout/err is to be directed
145+
#define ORTE_JOB_OUTPUT_TO_FILE (ORTE_JOB_START_KEY + 45) // string - path to use as basename of files to which stdout/err is to be directed
146146
#define ORTE_JOB_MERGE_STDERR_STDOUT (ORTE_JOB_START_KEY + 46) // bool - merge stderr into stdout stream
147147
#define ORTE_JOB_TAG_OUTPUT (ORTE_JOB_START_KEY + 47) // bool - tag stdout/stderr
148148
#define ORTE_JOB_TIMESTAMP_OUTPUT (ORTE_JOB_START_KEY + 48) // bool - timestamp stdout/stderr
@@ -159,6 +159,7 @@ typedef uint16_t orte_job_flags_t;
159159
#define ORTE_JOB_APPEND_ENVAR (ORTE_JOB_START_KEY + 58) // opal_envar_t - append the specified value to the given envar
160160
#define ORTE_JOB_ADD_ENVAR (ORTE_JOB_START_KEY + 59) // opal_envar_t - add envar, do not override pre-existing one
161161
#define ORTE_JOB_APP_SETUP_DATA (ORTE_JOB_START_KEY + 60) // opal_byte_object_t - blob containing app setup data
162+
#define ORTE_JOB_OUTPUT_TO_DIRECTORY (ORTE_JOB_START_KEY + 61) // tring - path of directory to which stdout/err is to be directed
162163

163164
#define ORTE_JOB_MAX_KEY 300
164165

0 commit comments

Comments
 (0)