Skip to content

Commit 194a9b9

Browse files
ioworker0akpm00
authored andcommitted
hung_task: show the blocker task if the task is hung on semaphore
Inspired by mutex blocker tracking[1], this patch makes a trade-off to balance the overhead and utility of the hung task detector. Unlike mutexes, semaphores lack explicit ownership tracking, making it challenging to identify the root cause of hangs. To address this, we introduce a last_holder field to the semaphore structure, which is updated when a task successfully calls down() and cleared during up(). The assumption is that if a task is blocked on a semaphore, the holders must not have released it. While this does not guarantee that the last holder is one of the current blockers, it likely provides a practical hint for diagnosing semaphore-related stalls. With this change, the hung task detector can now show blocker task's info like below: [Tue Apr 8 12:19:07 2025] INFO: task cat:945 blocked for more than 120 seconds. [Tue Apr 8 12:19:07 2025] Tainted: G E 6.14.0-rc6+ CachyOS#1 [Tue Apr 8 12:19:07 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Tue Apr 8 12:19:07 2025] task:cat state:D stack:0 pid:945 tgid:945 ppid:828 task_flags:0x400000 flags:0x00000000 [Tue Apr 8 12:19:07 2025] Call Trace: [Tue Apr 8 12:19:07 2025] <TASK> [Tue Apr 8 12:19:07 2025] __schedule+0x491/0xbd0 [Tue Apr 8 12:19:07 2025] schedule+0x27/0xf0 [Tue Apr 8 12:19:07 2025] schedule_timeout+0xe3/0xf0 [Tue Apr 8 12:19:07 2025] ? __folio_mod_stat+0x2a/0x80 [Tue Apr 8 12:19:07 2025] ? set_ptes.constprop.0+0x27/0x90 [Tue Apr 8 12:19:07 2025] __down_common+0x155/0x280 [Tue Apr 8 12:19:07 2025] down+0x53/0x70 [Tue Apr 8 12:19:07 2025] read_dummy_semaphore+0x23/0x60 [Tue Apr 8 12:19:07 2025] full_proxy_read+0x5f/0xa0 [Tue Apr 8 12:19:07 2025] vfs_read+0xbc/0x350 [Tue Apr 8 12:19:07 2025] ? __count_memcg_events+0xa5/0x140 [Tue Apr 8 12:19:07 2025] ? count_memcg_events.constprop.0+0x1a/0x30 [Tue Apr 8 12:19:07 2025] ? handle_mm_fault+0x180/0x260 [Tue Apr 8 12:19:07 2025] ksys_read+0x66/0xe0 [Tue Apr 8 12:19:07 2025] do_syscall_64+0x51/0x120 [Tue Apr 8 12:19:07 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Tue Apr 8 12:19:07 2025] RIP: 0033:0x7f419478f46e [Tue Apr 8 12:19:07 2025] RSP: 002b:00007fff1c4d2668 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Tue Apr 8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f419478f46e [Tue Apr 8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f4194683000 RDI: 0000000000000003 [Tue Apr 8 12:19:07 2025] RBP: 00007f4194683000 R08: 00007f4194682010 R09: 0000000000000000 [Tue Apr 8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000 [Tue Apr 8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [Tue Apr 8 12:19:07 2025] </TASK> [Tue Apr 8 12:19:07 2025] INFO: task cat:945 blocked on a semaphore likely last held by task cat:938 [Tue Apr 8 12:19:07 2025] task:cat state:S stack:0 pid:938 tgid:938 ppid:584 task_flags:0x400000 flags:0x00000000 [Tue Apr 8 12:19:07 2025] Call Trace: [Tue Apr 8 12:19:07 2025] <TASK> [Tue Apr 8 12:19:07 2025] __schedule+0x491/0xbd0 [Tue Apr 8 12:19:07 2025] ? _raw_spin_unlock_irqrestore+0xe/0x40 [Tue Apr 8 12:19:07 2025] schedule+0x27/0xf0 [Tue Apr 8 12:19:07 2025] schedule_timeout+0x77/0xf0 [Tue Apr 8 12:19:07 2025] ? __pfx_process_timeout+0x10/0x10 [Tue Apr 8 12:19:07 2025] msleep_interruptible+0x49/0x60 [Tue Apr 8 12:19:07 2025] read_dummy_semaphore+0x2d/0x60 [Tue Apr 8 12:19:07 2025] full_proxy_read+0x5f/0xa0 [Tue Apr 8 12:19:07 2025] vfs_read+0xbc/0x350 [Tue Apr 8 12:19:07 2025] ? __count_memcg_events+0xa5/0x140 [Tue Apr 8 12:19:07 2025] ? count_memcg_events.constprop.0+0x1a/0x30 [Tue Apr 8 12:19:07 2025] ? handle_mm_fault+0x180/0x260 [Tue Apr 8 12:19:07 2025] ksys_read+0x66/0xe0 [Tue Apr 8 12:19:07 2025] do_syscall_64+0x51/0x120 [Tue Apr 8 12:19:07 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Tue Apr 8 12:19:07 2025] RIP: 0033:0x7f7c584a646e [Tue Apr 8 12:19:07 2025] RSP: 002b:00007ffdba8ce158 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Tue Apr 8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f7c584a646e [Tue Apr 8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f7c5839a000 RDI: 0000000000000003 [Tue Apr 8 12:19:07 2025] RBP: 00007f7c5839a000 R08: 00007f7c58399010 R09: 0000000000000000 [Tue Apr 8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000 [Tue Apr 8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [Tue Apr 8 12:19:07 2025] </TASK> [1] https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Mingzhe Yang <[email protected]> Signed-off-by: Lance Yang <[email protected]> Suggested-by: Andrew Morton <[email protected]> Suggested-by: Masami Hiramatsu (Google) <[email protected]> Reviewed-by: Masami Hiramatsu (Google) <[email protected]> Cc: Anna Schumaker <[email protected]> Cc: Boqun Feng <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Joel Granados <[email protected]> Cc: John Stultz <[email protected]> Cc: Kent Overstreet <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Sergey Senozhatsky <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Tomasz Figa <[email protected]> Cc: Waiman Long <[email protected]> Cc: Will Deacon <[email protected]> Cc: Yongliang Gao <[email protected]> Cc: Zi Li <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent e711faa commit 194a9b9

File tree

3 files changed

+106
-18
lines changed

3 files changed

+106
-18
lines changed

include/linux/semaphore.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,25 @@ struct semaphore {
1616
raw_spinlock_t lock;
1717
unsigned int count;
1818
struct list_head wait_list;
19+
20+
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
21+
unsigned long last_holder;
22+
#endif
1923
};
2024

25+
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
26+
#define __LAST_HOLDER_SEMAPHORE_INITIALIZER \
27+
, .last_holder = 0UL
28+
#else
29+
#define __LAST_HOLDER_SEMAPHORE_INITIALIZER
30+
#endif
31+
2132
#define __SEMAPHORE_INITIALIZER(name, n) \
2233
{ \
2334
.lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \
2435
.count = n, \
25-
.wait_list = LIST_HEAD_INIT((name).wait_list), \
36+
.wait_list = LIST_HEAD_INIT((name).wait_list) \
37+
__LAST_HOLDER_SEMAPHORE_INITIALIZER \
2638
}
2739

2840
/*
@@ -47,5 +59,6 @@ extern int __must_check down_killable(struct semaphore *sem);
4759
extern int __must_check down_trylock(struct semaphore *sem);
4860
extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
4961
extern void up(struct semaphore *sem);
62+
extern unsigned long sem_last_holder(struct semaphore *sem);
5063

5164
#endif /* __LINUX_SEMAPHORE_H */

kernel/hung_task.c

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -99,32 +99,62 @@ static struct notifier_block panic_block = {
9999
static void debug_show_blocker(struct task_struct *task)
100100
{
101101
struct task_struct *g, *t;
102-
unsigned long owner, blocker;
102+
unsigned long owner, blocker, blocker_type;
103103

104104
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");
105105

106106
blocker = READ_ONCE(task->blocker);
107-
if (!blocker ||
108-
hung_task_get_blocker_type(blocker) != BLOCKER_TYPE_MUTEX)
107+
if (!blocker)
109108
return;
110109

111-
owner = mutex_get_owner(
112-
(struct mutex *)hung_task_blocker_to_lock(blocker));
110+
blocker_type = hung_task_get_blocker_type(blocker);
111+
112+
switch (blocker_type) {
113+
case BLOCKER_TYPE_MUTEX:
114+
owner = mutex_get_owner(
115+
(struct mutex *)hung_task_blocker_to_lock(blocker));
116+
break;
117+
case BLOCKER_TYPE_SEM:
118+
owner = sem_last_holder(
119+
(struct semaphore *)hung_task_blocker_to_lock(blocker));
120+
break;
121+
default:
122+
WARN_ON_ONCE(1);
123+
return;
124+
}
125+
113126

114127
if (unlikely(!owner)) {
115-
pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
116-
task->comm, task->pid);
128+
switch (blocker_type) {
129+
case BLOCKER_TYPE_MUTEX:
130+
pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
131+
task->comm, task->pid);
132+
break;
133+
case BLOCKER_TYPE_SEM:
134+
pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n",
135+
task->comm, task->pid);
136+
break;
137+
}
117138
return;
118139
}
119140

120141
/* Ensure the owner information is correct. */
121142
for_each_process_thread(g, t) {
122-
if ((unsigned long)t == owner) {
143+
if ((unsigned long)t != owner)
144+
continue;
145+
146+
switch (blocker_type) {
147+
case BLOCKER_TYPE_MUTEX:
123148
pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n",
124-
task->comm, task->pid, t->comm, t->pid);
125-
sched_show_task(t);
126-
return;
149+
task->comm, task->pid, t->comm, t->pid);
150+
break;
151+
case BLOCKER_TYPE_SEM:
152+
pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n",
153+
task->comm, task->pid, t->comm, t->pid);
154+
break;
127155
}
156+
sched_show_task(t);
157+
return;
128158
}
129159
}
130160
#else

kernel/locking/semaphore.c

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,49 @@
3434
#include <linux/spinlock.h>
3535
#include <linux/ftrace.h>
3636
#include <trace/events/lock.h>
37+
#include <linux/hung_task.h>
3738

3839
static noinline void __down(struct semaphore *sem);
3940
static noinline int __down_interruptible(struct semaphore *sem);
4041
static noinline int __down_killable(struct semaphore *sem);
4142
static noinline int __down_timeout(struct semaphore *sem, long timeout);
4243
static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q);
4344

45+
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
46+
static inline void hung_task_sem_set_holder(struct semaphore *sem)
47+
{
48+
WRITE_ONCE((sem)->last_holder, (unsigned long)current);
49+
}
50+
51+
static inline void hung_task_sem_clear_if_holder(struct semaphore *sem)
52+
{
53+
if (READ_ONCE((sem)->last_holder) == (unsigned long)current)
54+
WRITE_ONCE((sem)->last_holder, 0UL);
55+
}
56+
57+
unsigned long sem_last_holder(struct semaphore *sem)
58+
{
59+
return READ_ONCE(sem->last_holder);
60+
}
61+
#else
62+
static inline void hung_task_sem_set_holder(struct semaphore *sem)
63+
{
64+
}
65+
static inline void hung_task_sem_clear_if_holder(struct semaphore *sem)
66+
{
67+
}
68+
unsigned long sem_last_holder(struct semaphore *sem)
69+
{
70+
return 0UL;
71+
}
72+
#endif
73+
74+
static inline void __sem_acquire(struct semaphore *sem)
75+
{
76+
sem->count--;
77+
hung_task_sem_set_holder(sem);
78+
}
79+
4480
/**
4581
* down - acquire the semaphore
4682
* @sem: the semaphore to be acquired
@@ -59,7 +95,7 @@ void __sched down(struct semaphore *sem)
5995
might_sleep();
6096
raw_spin_lock_irqsave(&sem->lock, flags);
6197
if (likely(sem->count > 0))
62-
sem->count--;
98+
__sem_acquire(sem);
6399
else
64100
__down(sem);
65101
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -83,7 +119,7 @@ int __sched down_interruptible(struct semaphore *sem)
83119
might_sleep();
84120
raw_spin_lock_irqsave(&sem->lock, flags);
85121
if (likely(sem->count > 0))
86-
sem->count--;
122+
__sem_acquire(sem);
87123
else
88124
result = __down_interruptible(sem);
89125
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -110,7 +146,7 @@ int __sched down_killable(struct semaphore *sem)
110146
might_sleep();
111147
raw_spin_lock_irqsave(&sem->lock, flags);
112148
if (likely(sem->count > 0))
113-
sem->count--;
149+
__sem_acquire(sem);
114150
else
115151
result = __down_killable(sem);
116152
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -140,7 +176,7 @@ int __sched down_trylock(struct semaphore *sem)
140176
raw_spin_lock_irqsave(&sem->lock, flags);
141177
count = sem->count - 1;
142178
if (likely(count >= 0))
143-
sem->count = count;
179+
__sem_acquire(sem);
144180
raw_spin_unlock_irqrestore(&sem->lock, flags);
145181

146182
return (count < 0);
@@ -165,7 +201,7 @@ int __sched down_timeout(struct semaphore *sem, long timeout)
165201
might_sleep();
166202
raw_spin_lock_irqsave(&sem->lock, flags);
167203
if (likely(sem->count > 0))
168-
sem->count--;
204+
__sem_acquire(sem);
169205
else
170206
result = __down_timeout(sem, timeout);
171207
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -187,6 +223,9 @@ void __sched up(struct semaphore *sem)
187223
DEFINE_WAKE_Q(wake_q);
188224

189225
raw_spin_lock_irqsave(&sem->lock, flags);
226+
227+
hung_task_sem_clear_if_holder(sem);
228+
190229
if (likely(list_empty(&sem->wait_list)))
191230
sem->count++;
192231
else
@@ -228,8 +267,10 @@ static inline int __sched ___down_common(struct semaphore *sem, long state,
228267
raw_spin_unlock_irq(&sem->lock);
229268
timeout = schedule_timeout(timeout);
230269
raw_spin_lock_irq(&sem->lock);
231-
if (waiter.up)
270+
if (waiter.up) {
271+
hung_task_sem_set_holder(sem);
232272
return 0;
273+
}
233274
}
234275

235276
timed_out:
@@ -246,10 +287,14 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
246287
{
247288
int ret;
248289

290+
hung_task_set_blocker(sem, BLOCKER_TYPE_SEM);
291+
249292
trace_contention_begin(sem, 0);
250293
ret = ___down_common(sem, state, timeout);
251294
trace_contention_end(sem, ret);
252295

296+
hung_task_clear_blocker();
297+
253298
return ret;
254299
}
255300

0 commit comments

Comments
 (0)