25
25
#ifdef HAVE_UNISTD_H
26
26
#include <unistd.h>
27
27
#endif
28
+ #ifdef HAVE_SYS_TYPES_H
29
+ #include <sys/types.h>
30
+ #endif
31
+ #ifdef HAVE_SYS_STAT_H
32
+ #include <sys/stat.h>
33
+ #endif
34
+ #ifdef HAVE_SYS_FCNTL_H
35
+ #include <fcntl.h>
36
+ #endif
28
37
29
38
#include <string.h>
30
39
#include <signal.h>
35
44
#include "opal/util/output.h"
36
45
#include "opal/util/show_help.h"
37
46
#include "opal/util/argv.h"
47
+ #include "opal/util/proc.h"
38
48
#include "opal/runtime/opal_params.h"
39
49
40
50
#ifndef _NSIG
43
53
44
54
#define HOSTFORMAT "[%s:%05d] "
45
55
56
+ int opal_stacktrace_output_fileno = -1 ;
57
+ static char * opal_stacktrace_output_filename_base = NULL ;
58
+ static size_t opal_stacktrace_output_filename_max_len = 0 ;
46
59
static char stacktrace_hostname [OPAL_MAXHOSTNAMELEN ];
47
60
static char * unable_to_print_msg = "Unable to print stack trace!\n" ;
48
61
62
+ /*
63
+ * Set the stacktrace filename:
64
+ * stacktrace.PID
65
+ * -or, if VPID is available-
66
+ * stacktrace.VPID.PID
67
+ */
68
+ static void set_stacktrace_filename (void ) {
69
+ opal_proc_t * my_proc = opal_proc_local_get ();
70
+
71
+ if ( NULL == my_proc ) {
72
+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
73
+ "%s.%lu" ,
74
+ opal_stacktrace_output_filename_base , (unsigned long )getpid ());
75
+ }
76
+ else {
77
+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
78
+ "%s.%lu.%lu" ,
79
+ opal_stacktrace_output_filename_base , (unsigned long )my_proc -> proc_name .vpid , (unsigned long )getpid ());
80
+ }
81
+
82
+ return ;
83
+ }
84
+
49
85
/**
50
86
* This function is being called as a signal-handler in response
51
87
* to a user-specified signal (e.g. SIGFPE or SIGSEGV).
@@ -69,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
69
105
int ret ;
70
106
char * si_code_str = "" ;
71
107
108
+ /* Do not print the stack trace */
109
+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
110
+ /* Raise the signal again, so we don't accidentally mask critical signals.
111
+ * For critical signals, it is preferred that we call 'raise' instead of
112
+ * 'exit' or 'abort' so that the return status is set properly for this
113
+ * process.
114
+ */
115
+ signal (signo , SIG_DFL );
116
+ raise (signo );
117
+
118
+ return ;
119
+ }
120
+
121
+ /* Update the file name with the RANK, if available */
122
+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
123
+ set_stacktrace_filename ();
124
+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
125
+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
126
+ if ( 0 > opal_stacktrace_output_fileno ) {
127
+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
128
+ opal_stacktrace_output_filename , strerror (errno ));
129
+ opal_stacktrace_output_fileno = fileno (stderr );
130
+ }
131
+ }
132
+
72
133
/* write out the footer information */
73
134
memset (print_buffer , 0 , sizeof (print_buffer ));
74
135
ret = snprintf (print_buffer , sizeof (print_buffer ),
75
136
HOSTFORMAT "*** Process received signal ***\n" ,
76
137
stacktrace_hostname , getpid ());
77
- write (fileno ( stderr ) , print_buffer , ret );
138
+ write (opal_stacktrace_output_fileno , print_buffer , ret );
78
139
79
140
80
141
memset (print_buffer , 0 , sizeof (print_buffer ));
@@ -324,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
324
385
}
325
386
326
387
/* write out the signal information generated above */
327
- write (fileno ( stderr ) , print_buffer , sizeof (print_buffer )- size );
388
+ write (opal_stacktrace_output_fileno , print_buffer , sizeof (print_buffer )- size );
328
389
329
390
/* print out the stack trace */
330
391
snprintf (print_buffer , sizeof (print_buffer ), HOSTFORMAT ,
331
392
stacktrace_hostname , getpid ());
332
- ret = opal_backtrace_print (stderr , print_buffer , 2 );
393
+ ret = opal_backtrace_print (NULL , print_buffer , 2 );
333
394
if (OPAL_SUCCESS != ret ) {
334
- write (fileno ( stderr ) , unable_to_print_msg , strlen (unable_to_print_msg ));
395
+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
335
396
}
336
397
337
398
/* write out the footer information */
@@ -340,9 +401,15 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
340
401
HOSTFORMAT "*** End of error message ***\n" ,
341
402
stacktrace_hostname , getpid ());
342
403
if (ret > 0 ) {
343
- write (fileno ( stderr ) , print_buffer , ret );
404
+ write (opal_stacktrace_output_fileno , print_buffer , ret );
344
405
} else {
345
- write (fileno (stderr ), unable_to_print_msg , strlen (unable_to_print_msg ));
406
+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
407
+ }
408
+
409
+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
410
+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
411
+ close (opal_stacktrace_output_fileno );
412
+ opal_stacktrace_output_fileno = -1 ;
346
413
}
347
414
348
415
/* Raise the signal again, so we don't accidentally mask critical signals.
@@ -372,7 +439,30 @@ void opal_stackframe_output(int stream)
372
439
opal_output (stream , "%s" , traces [i ]);
373
440
}
374
441
} else {
375
- opal_backtrace_print (stderr , NULL , 2 );
442
+ /* Do not print the stack trace */
443
+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
444
+ return ;
445
+ }
446
+
447
+ /* Update the file name with the RANK, if available */
448
+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
449
+ set_stacktrace_filename ();
450
+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
451
+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
452
+ if ( 0 > opal_stacktrace_output_fileno ) {
453
+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
454
+ opal_stacktrace_output_filename , strerror (errno ));
455
+ opal_stacktrace_output_fileno = fileno (stderr );
456
+ }
457
+ }
458
+
459
+ opal_backtrace_print (NULL , NULL , 2 );
460
+
461
+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
462
+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
463
+ close (opal_stacktrace_output_fileno );
464
+ opal_stacktrace_output_fileno = -1 ;
465
+ }
376
466
}
377
467
}
378
468
@@ -443,6 +533,50 @@ int opal_util_register_stackhandlers (void)
443
533
}
444
534
}
445
535
536
+ /* Setup the output stream to use */
537
+ if ( NULL == opal_stacktrace_output_filename ||
538
+ 0 == strcasecmp (opal_stacktrace_output_filename , "none" ) ) {
539
+ opal_stacktrace_output_fileno = -1 ;
540
+ }
541
+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stdout" ) ) {
542
+ opal_stacktrace_output_fileno = fileno (stdout );
543
+ }
544
+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stderr" ) ) {
545
+ opal_stacktrace_output_fileno = fileno (stdout );
546
+ }
547
+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "file" ) ||
548
+ 0 == strcasecmp (opal_stacktrace_output_filename , "file:" ) ) {
549
+ opal_stacktrace_output_filename_base = strdup ("stacktrace" );
550
+
551
+ free (opal_stacktrace_output_filename );
552
+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
553
+ opal_stacktrace_output_filename_max_len = strlen ("stacktrace" ) + 8 + 8 ;
554
+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
555
+ set_stacktrace_filename ();
556
+ opal_stacktrace_output_fileno = -1 ;
557
+ }
558
+ else if ( 0 == strncasecmp (opal_stacktrace_output_filename , "file:" , 5 ) ) {
559
+ char * filename_cpy = NULL ;
560
+ next = strchr (opal_stacktrace_output_filename , ':' );
561
+ next ++ ; // move past the ':' to the filename specified
562
+
563
+ opal_stacktrace_output_filename_base = strdup (next );
564
+
565
+ free (opal_stacktrace_output_filename );
566
+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
567
+ opal_stacktrace_output_filename_max_len = strlen (opal_stacktrace_output_filename_base ) + 8 + 8 ;
568
+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
569
+ set_stacktrace_filename ();
570
+ opal_stacktrace_output_fileno = -1 ;
571
+
572
+ free (filename_cpy );
573
+ }
574
+ else {
575
+ opal_stacktrace_output_fileno = fileno (stderr );
576
+ }
577
+
578
+
579
+ /* Setup the signals to catch */
446
580
memset (& act , 0 , sizeof (act ));
447
581
act .sa_sigaction = show_stackframe ;
448
582
act .sa_flags = SA_SIGINFO ;
0 commit comments