Skip to content

Commit 8eb23b9

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched: Debug nested sleeps
Validate we call might_sleep() with TASK_RUNNING, which catches places where we nest blocking primitives, eg. mutex usage in a wait loop. Since all blocking is arranged through task_struct::state, nesting this will cause the inner primitive to set TASK_RUNNING and the outer will thus not block. Another observed problem is calling a blocking function from schedule()->sched_submit_work()->blk_schedule_flush_plug() which will then destroy the task state for the actual __schedule() call that comes after it. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: Linus Torvalds <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 26cabd3 commit 8eb23b9

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

include/linux/sched.h

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!(
243243
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
244244
(task->flags & PF_FROZEN) == 0)
245245

246+
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
247+
248+
#define __set_task_state(tsk, state_value) \
249+
do { \
250+
(tsk)->task_state_change = _THIS_IP_; \
251+
(tsk)->state = (state_value); \
252+
} while (0)
253+
#define set_task_state(tsk, state_value) \
254+
do { \
255+
(tsk)->task_state_change = _THIS_IP_; \
256+
set_mb((tsk)->state, (state_value)); \
257+
} while (0)
258+
259+
/*
260+
* set_current_state() includes a barrier so that the write of current->state
261+
* is correctly serialised wrt the caller's subsequent test of whether to
262+
* actually sleep:
263+
*
264+
* set_current_state(TASK_UNINTERRUPTIBLE);
265+
* if (do_i_need_to_sleep())
266+
* schedule();
267+
*
268+
* If the caller does not need such serialisation then use __set_current_state()
269+
*/
270+
#define __set_current_state(state_value) \
271+
do { \
272+
current->task_state_change = _THIS_IP_; \
273+
current->state = (state_value); \
274+
} while (0)
275+
#define set_current_state(state_value) \
276+
do { \
277+
current->task_state_change = _THIS_IP_; \
278+
set_mb(current->state, (state_value)); \
279+
} while (0)
280+
281+
#else
282+
246283
#define __set_task_state(tsk, state_value) \
247284
do { (tsk)->state = (state_value); } while (0)
248285
#define set_task_state(tsk, state_value) \
@@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!(
259296
*
260297
* If the caller does not need such serialisation then use __set_current_state()
261298
*/
262-
#define __set_current_state(state_value) \
299+
#define __set_current_state(state_value) \
263300
do { current->state = (state_value); } while (0)
264-
#define set_current_state(state_value) \
301+
#define set_current_state(state_value) \
265302
set_mb(current->state, (state_value))
266303

304+
#endif
305+
267306
/* Task command name length */
268307
#define TASK_COMM_LEN 16
269308

@@ -1661,6 +1700,9 @@ struct task_struct {
16611700
unsigned int sequential_io;
16621701
unsigned int sequential_io_avg;
16631702
#endif
1703+
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1704+
unsigned long task_state_change;
1705+
#endif
16641706
};
16651707

16661708
/* Future-safe accessor for struct task_struct's cpus_allowed. */

kernel/sched/core.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7298,6 +7298,19 @@ void __might_sleep(const char *file, int line, int preempt_offset)
72987298
{
72997299
static unsigned long prev_jiffy; /* ratelimiting */
73007300

7301+
/*
7302+
* Blocking primitives will set (and therefore destroy) current->state,
7303+
* since we will exit with TASK_RUNNING make sure we enter with it,
7304+
* otherwise we will destroy state.
7305+
*/
7306+
if (WARN(current->state != TASK_RUNNING,
7307+
"do not call blocking ops when !TASK_RUNNING; "
7308+
"state=%lx set at [<%p>] %pS\n",
7309+
current->state,
7310+
(void *)current->task_state_change,
7311+
(void *)current->task_state_change))
7312+
__set_current_state(TASK_RUNNING);
7313+
73017314
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
73027315
if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
73037316
!is_idle_task(current)) ||

0 commit comments

Comments
 (0)