Skip to content

Commit ad607a2

Browse files
authored
handle some cases of null Task in segv handling (#42836)
Also ensure we do not use round-robin sampling when printing a critical error, only for profiling.
1 parent b55fb5f commit ad607a2

File tree

6 files changed

+79
-76
lines changed

6 files changed

+79
-76
lines changed

src/gf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1829,7 +1829,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
18291829
jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
18301830
jl_ptls_t ptls = jl_current_task->ptls;
18311831
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
1832-
jl_critical_error(0, NULL);
1832+
jl_critical_error(0, NULL, jl_current_task);
18331833
abort();
18341834
}
18351835
// not reached

src/julia_internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,7 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t
10131013
size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
10141014
#endif
10151015
JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
1016-
void jl_critical_error(int sig, bt_context_t *context);
1016+
void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct);
10171017
JL_DLLEXPORT void jl_raise_debugger(void);
10181018
int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
10191019
JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;

src/signal-handling.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -241,19 +241,20 @@ void jl_show_sigill(void *_ctx)
241241
}
242242

243243
// what to do on a critical error on a thread
244-
void jl_critical_error(int sig, bt_context_t *context)
244+
void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
245245
{
246246

247-
jl_task_t *ct = jl_current_task;
248-
jl_bt_element_t *bt_data = ct->ptls->bt_data;
249-
size_t *bt_size = &ct->ptls->bt_size;
250-
size_t i, n = *bt_size;
247+
jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL;
248+
size_t *bt_size = ct ? &ct->ptls->bt_size : NULL;
249+
size_t i, n = ct ? *bt_size : 0;
251250
if (sig) {
252251
// kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
253252
jl_set_safe_restore(NULL);
254-
ct->gcstack = NULL;
255-
ct->eh = NULL;
256-
ct->excstack = NULL;
253+
if (ct) {
254+
ct->gcstack = NULL;
255+
ct->eh = NULL;
256+
ct->excstack = NULL;
257+
}
257258
#ifndef _OS_WINDOWS_
258259
sigset_t sset;
259260
sigemptyset(&sset);
@@ -277,7 +278,7 @@ void jl_critical_error(int sig, bt_context_t *context)
277278
jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
278279
}
279280
jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
280-
if (context) {
281+
if (context && ct) {
281282
// Must avoid extended backtrace frames here unless we're sure bt_data
282283
// is properly rooted.
283284
*bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL);

src/signals-mach.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ static void jl_try_deliver_sigint(void)
402402
static void JL_NORETURN jl_exit_thread0_cb(int exitstate)
403403
{
404404
CFI_NORETURN
405-
jl_critical_error(exitstate - 128, NULL);
405+
jl_critical_error(exitstate - 128, NULL, jl_current_task);
406406
jl_exit(exitstate);
407407
}
408408

src/signals-unix.c

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
231231
uv_tty_reset_mode();
232232
if (sig == SIGILL)
233233
jl_show_sigill(context);
234-
jl_critical_error(sig, jl_to_bt_context(context));
234+
jl_critical_error(sig, jl_to_bt_context(context), jl_get_current_task());
235235
if (sig != SIGSEGV &&
236236
sig != SIGBUS &&
237237
sig != SIGILL) {
@@ -410,7 +410,7 @@ CFI_NORETURN
410410
// (unavoidable due to its async nature).
411411
// Try harder to exit each time if we get multiple exit requests.
412412
if (thread0_exit_count <= 1) {
413-
jl_critical_error(thread0_exit_state - 128, NULL);
413+
jl_critical_error(thread0_exit_state - 128, NULL, jl_current_task);
414414
jl_exit(thread0_exit_state);
415415
}
416416
else if (thread0_exit_count == 2) {
@@ -747,71 +747,72 @@ static void *signal_listener(void *arg)
747747
unw_context_t *signal_context;
748748
// sample each thread, round-robin style in reverse order
749749
// (so that thread zero gets notified last)
750-
if (critical || profile)
750+
if (critical || profile) {
751751
jl_lock_profile();
752-
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
753-
for (int idx = jl_n_threads; idx-- > 0; ) {
754-
// Stop the threads in the random round-robin order.
755-
int i = profile_round_robin_thread_order[idx];
756-
// notify thread to stop
757-
jl_thread_suspend_and_get_state(i, &signal_context);
758-
759-
// do backtrace on thread contexts for critical signals
760-
// this part must be signal-handler safe
761-
if (critical) {
762-
bt_size += rec_backtrace_ctx(bt_data + bt_size,
763-
JL_MAX_BT_SIZE / jl_n_threads - 1,
764-
signal_context, NULL);
765-
bt_data[bt_size++].uintptr = 0;
766-
}
767-
768-
// do backtrace for profiler
769-
if (profile && running) {
770-
if (jl_profile_is_buffer_full()) {
771-
// Buffer full: Delete the timer
772-
jl_profile_stop_timer();
752+
if (!critical)
753+
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
754+
for (int idx = jl_n_threads; idx-- > 0; ) {
755+
// Stop the threads in the random round-robin order.
756+
int i = critical ? idx : profile_round_robin_thread_order[idx];
757+
// notify thread to stop
758+
jl_thread_suspend_and_get_state(i, &signal_context);
759+
760+
// do backtrace on thread contexts for critical signals
761+
// this part must be signal-handler safe
762+
if (critical) {
763+
bt_size += rec_backtrace_ctx(bt_data + bt_size,
764+
JL_MAX_BT_SIZE / jl_n_threads - 1,
765+
signal_context, NULL);
766+
bt_data[bt_size++].uintptr = 0;
773767
}
774-
else {
775-
// unwinding can fail, so keep track of the current state
776-
// and restore from the SEGV handler if anything happens.
777-
jl_jmp_buf *old_buf = jl_get_safe_restore();
778-
jl_jmp_buf buf;
779-
780-
jl_set_safe_restore(&buf);
781-
if (jl_setjmp(buf, 0)) {
782-
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
783-
} else {
784-
// Get backtrace data
785-
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
786-
bt_size_max - bt_size_cur - 1, signal_context, NULL);
787-
}
788-
jl_set_safe_restore(old_buf);
789-
790-
jl_ptls_t ptls = jl_all_tls_states[i];
791-
792-
// store threadid but add 1 as 0 is preserved to indicate end of block
793-
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
794-
795-
// store task id
796-
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
797-
798-
// store cpu cycle clock
799-
bt_data_prof[bt_size_cur++].uintptr = cycleclock();
800768

801-
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
802-
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
803-
804-
// Mark the end of this block with two 0's
805-
bt_data_prof[bt_size_cur++].uintptr = 0;
806-
bt_data_prof[bt_size_cur++].uintptr = 0;
769+
// do backtrace for profiler
770+
if (profile && running) {
771+
if (jl_profile_is_buffer_full()) {
772+
// Buffer full: Delete the timer
773+
jl_profile_stop_timer();
774+
}
775+
else {
776+
// unwinding can fail, so keep track of the current state
777+
// and restore from the SEGV handler if anything happens.
778+
jl_jmp_buf *old_buf = jl_get_safe_restore();
779+
jl_jmp_buf buf;
780+
781+
jl_set_safe_restore(&buf);
782+
if (jl_setjmp(buf, 0)) {
783+
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
784+
} else {
785+
// Get backtrace data
786+
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
787+
bt_size_max - bt_size_cur - 1, signal_context, NULL);
788+
}
789+
jl_set_safe_restore(old_buf);
790+
791+
jl_ptls_t ptls = jl_all_tls_states[i];
792+
793+
// store threadid but add 1 as 0 is preserved to indicate end of block
794+
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
795+
796+
// store task id
797+
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
798+
799+
// store cpu cycle clock
800+
bt_data_prof[bt_size_cur++].uintptr = cycleclock();
801+
802+
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
803+
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
804+
805+
// Mark the end of this block with two 0's
806+
bt_data_prof[bt_size_cur++].uintptr = 0;
807+
bt_data_prof[bt_size_cur++].uintptr = 0;
808+
}
807809
}
808-
}
809810

810-
// notify thread to resume
811-
jl_thread_resume(i, sig);
812-
}
813-
if (critical || profile)
811+
// notify thread to resume
812+
jl_thread_resume(i, sig);
813+
}
814814
jl_unlock_profile();
815+
}
815816
#ifndef HAVE_MACH
816817
if (profile && running) {
817818
#if defined(HAVE_TIMER)

src/signals-win.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ void __cdecl crt_sig_handler(int sig, int num)
9191
RtlCaptureContext(&Context);
9292
if (sig == SIGILL)
9393
jl_show_sigill(&Context);
94-
jl_critical_error(sig, &Context);
94+
jl_critical_error(sig, &Context, jl_get_current_task());
9595
raise(sig);
9696
}
9797
}
@@ -225,7 +225,8 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara
225225

226226
LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
227227
{
228-
jl_ptls_t ptls = jl_current_task->ptls;
228+
jl_task_t *ct = jl_current_task;
229+
jl_ptls_t ptls = ct->ptls;
229230
if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) {
230231
switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
231232
case EXCEPTION_INT_DIVIDE_BY_ZERO:
@@ -312,7 +313,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
312313
jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
313314
jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
314315

315-
jl_critical_error(0, ExceptionInfo->ContextRecord);
316+
jl_critical_error(0, ExceptionInfo->ContextRecord, ct);
316317
static int recursion = 0;
317318
if (recursion++)
318319
exit(1);

0 commit comments

Comments
 (0)