11
11
* All rights reserved.
12
12
* Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
13
13
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
14
+ * Copyright (c) 2017 IBM Corporation. All rights reserved.
14
15
* $COPYRIGHT$
15
16
*
16
17
* Additional copyrights may follow
24
25
#ifdef HAVE_UNISTD_H
25
26
#include <unistd.h>
26
27
#endif
28
+ #ifdef HAVE_SYS_TYPES_H
29
+ #include <sys/types.h>
30
+ #endif
31
+ #ifdef HAVE_SYS_STAT_H
32
+ #include <sys/stat.h>
33
+ #endif
34
+ #ifdef HAVE_SYS_FCNTL_H
35
+ #include <fcntl.h>
36
+ #endif
27
37
28
38
#include <string.h>
29
39
#include <signal.h>
34
44
#include "opal/util/output.h"
35
45
#include "opal/util/show_help.h"
36
46
#include "opal/util/argv.h"
47
+ #include "opal/util/proc.h"
37
48
#include "opal/runtime/opal_params.h"
38
49
39
50
#ifndef _NSIG
42
53
43
54
#define HOSTFORMAT "[%s:%05d] "
44
55
56
+ int opal_stacktrace_output_fileno = -1 ;
57
+ static char * opal_stacktrace_output_filename_base = NULL ;
58
+ static size_t opal_stacktrace_output_filename_max_len = 0 ;
45
59
static char stacktrace_hostname [OPAL_MAXHOSTNAMELEN ];
46
60
static char * unable_to_print_msg = "Unable to print stack trace!\n" ;
47
61
62
+ /*
63
+ * Set the stacktrace filename:
64
+ * stacktrace.PID
65
+ * -or, if VPID is available-
66
+ * stacktrace.VPID.PID
67
+ */
68
+ static void set_stacktrace_filename (void ) {
69
+ opal_proc_t * my_proc = opal_proc_local_get ();
70
+
71
+ if ( NULL == my_proc ) {
72
+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
73
+ "%s.%lu" ,
74
+ opal_stacktrace_output_filename_base , (unsigned long )getpid ());
75
+ }
76
+ else {
77
+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
78
+ "%s.%lu.%lu" ,
79
+ opal_stacktrace_output_filename_base , (unsigned long )my_proc -> proc_name .vpid , (unsigned long )getpid ());
80
+ }
81
+
82
+ return ;
83
+ }
84
+
48
85
/**
49
86
* This function is being called as a signal-handler in response
50
87
* to a user-specified signal (e.g. SIGFPE or SIGSEGV).
@@ -68,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
68
105
int ret ;
69
106
char * si_code_str = "" ;
70
107
108
+ /* Do not print the stack trace */
109
+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
110
+ /* Raise the signal again, so we don't accidentally mask critical signals.
111
+ * For critical signals, it is preferred that we call 'raise' instead of
112
+ * 'exit' or 'abort' so that the return status is set properly for this
113
+ * process.
114
+ */
115
+ signal (signo , SIG_DFL );
116
+ raise (signo );
117
+
118
+ return ;
119
+ }
120
+
121
+ /* Update the file name with the RANK, if available */
122
+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
123
+ set_stacktrace_filename ();
124
+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
125
+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
126
+ if ( 0 > opal_stacktrace_output_fileno ) {
127
+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
128
+ opal_stacktrace_output_filename , strerror (errno ));
129
+ opal_stacktrace_output_fileno = fileno (stderr );
130
+ }
131
+ }
132
+
71
133
/* write out the footer information */
72
134
memset (print_buffer , 0 , sizeof (print_buffer ));
73
135
ret = snprintf (print_buffer , sizeof (print_buffer ),
74
136
HOSTFORMAT "*** Process received signal ***\n" ,
75
137
stacktrace_hostname , getpid ());
76
- write (fileno ( stderr ) , print_buffer , ret );
138
+ write (opal_stacktrace_output_fileno , print_buffer , ret );
77
139
78
140
79
141
memset (print_buffer , 0 , sizeof (print_buffer ));
@@ -323,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
323
385
}
324
386
325
387
/* write out the signal information generated above */
326
- write (fileno ( stderr ) , print_buffer , sizeof (print_buffer )- size );
388
+ write (opal_stacktrace_output_fileno , print_buffer , sizeof (print_buffer )- size );
327
389
328
390
/* print out the stack trace */
329
391
snprintf (print_buffer , sizeof (print_buffer ), HOSTFORMAT ,
330
392
stacktrace_hostname , getpid ());
331
- ret = opal_backtrace_print (stderr , print_buffer , 2 );
393
+ ret = opal_backtrace_print (NULL , print_buffer , 2 );
332
394
if (OPAL_SUCCESS != ret ) {
333
- write (fileno ( stderr ) , unable_to_print_msg , strlen (unable_to_print_msg ));
395
+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
334
396
}
335
397
336
398
/* write out the footer information */
@@ -339,10 +401,24 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
339
401
HOSTFORMAT "*** End of error message ***\n" ,
340
402
stacktrace_hostname , getpid ());
341
403
if (ret > 0 ) {
342
- write (fileno ( stderr ) , print_buffer , ret );
404
+ write (opal_stacktrace_output_fileno , print_buffer , ret );
343
405
} else {
344
- write (fileno (stderr ), unable_to_print_msg , strlen (unable_to_print_msg ));
406
+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
407
+ }
408
+
409
+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
410
+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
411
+ close (opal_stacktrace_output_fileno );
412
+ opal_stacktrace_output_fileno = -1 ;
345
413
}
414
+
415
+ /* Raise the signal again, so we don't accidentally mask critical signals.
416
+ * For critical signals, it is preferred that we call 'raise' instead of
417
+ * 'exit' or 'abort' so that the return status is set properly for this
418
+ * process.
419
+ */
420
+ signal (signo , SIG_DFL );
421
+ raise (signo );
346
422
}
347
423
348
424
#endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */
@@ -364,7 +440,30 @@ void opal_stackframe_output(int stream)
364
440
opal_output (stream , "%s" , traces [i ]);
365
441
}
366
442
} else {
367
- opal_backtrace_print (stderr , NULL , 2 );
443
+ /* Do not print the stack trace */
444
+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
445
+ return ;
446
+ }
447
+
448
+ /* Update the file name with the RANK, if available */
449
+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
450
+ set_stacktrace_filename ();
451
+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
452
+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
453
+ if ( 0 > opal_stacktrace_output_fileno ) {
454
+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
455
+ opal_stacktrace_output_filename , strerror (errno ));
456
+ opal_stacktrace_output_fileno = fileno (stderr );
457
+ }
458
+ }
459
+
460
+ opal_backtrace_print (NULL , NULL , 2 );
461
+
462
+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
463
+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
464
+ close (opal_stacktrace_output_fileno );
465
+ opal_stacktrace_output_fileno = -1 ;
466
+ }
368
467
}
369
468
}
370
469
@@ -435,6 +534,50 @@ int opal_util_register_stackhandlers (void)
435
534
}
436
535
}
437
536
537
+ /* Setup the output stream to use */
538
+ if ( NULL == opal_stacktrace_output_filename ||
539
+ 0 == strcasecmp (opal_stacktrace_output_filename , "none" ) ) {
540
+ opal_stacktrace_output_fileno = -1 ;
541
+ }
542
+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stdout" ) ) {
543
+ opal_stacktrace_output_fileno = fileno (stdout );
544
+ }
545
+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stderr" ) ) {
546
+ opal_stacktrace_output_fileno = fileno (stdout );
547
+ }
548
+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "file" ) ||
549
+ 0 == strcasecmp (opal_stacktrace_output_filename , "file:" ) ) {
550
+ opal_stacktrace_output_filename_base = strdup ("stacktrace" );
551
+
552
+ free (opal_stacktrace_output_filename );
553
+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
554
+ opal_stacktrace_output_filename_max_len = strlen ("stacktrace" ) + 8 + 8 ;
555
+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
556
+ set_stacktrace_filename ();
557
+ opal_stacktrace_output_fileno = -1 ;
558
+ }
559
+ else if ( 0 == strncasecmp (opal_stacktrace_output_filename , "file:" , 5 ) ) {
560
+ char * filename_cpy = NULL ;
561
+ next = strchr (opal_stacktrace_output_filename , ':' );
562
+ next ++ ; // move past the ':' to the filename specified
563
+
564
+ opal_stacktrace_output_filename_base = strdup (next );
565
+
566
+ free (opal_stacktrace_output_filename );
567
+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
568
+ opal_stacktrace_output_filename_max_len = strlen (opal_stacktrace_output_filename_base ) + 8 + 8 ;
569
+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
570
+ set_stacktrace_filename ();
571
+ opal_stacktrace_output_fileno = -1 ;
572
+
573
+ free (filename_cpy );
574
+ }
575
+ else {
576
+ opal_stacktrace_output_fileno = fileno (stderr );
577
+ }
578
+
579
+
580
+ /* Setup the signals to catch */
438
581
memset (& act , 0 , sizeof (act ));
439
582
act .sa_sigaction = show_stackframe ;
440
583
act .sa_flags = SA_SIGINFO ;
0 commit comments