Skip to content

Commit ea0970c

Browse files
xiaochenshenkelmously
authored andcommitted
x86/resctrl: Fix a deadlock due to inaccurate reference
BugLink: https://bugs.launchpad.net/bugs/1866403 commit 334b0f4 upstream. There is a race condition which results in a deadlock when rmdir and mkdir execute concurrently: $ ls /sys/fs/resctrl/c1/mon_groups/m1/ cpus cpus_list mon_data tasks Thread 1: rmdir /sys/fs/resctrl/c1 Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1 3 locks held by mkdir/48649: #0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50 #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c13b>] filename_create+0x7b/0x170 #2: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70 4 locks held by rmdir/48652: #0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50 #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c3cf>] do_rmdir+0x13f/0x1e0 #2: (&type->i_mutex_dir_key#8){++++}, at: [<ffffffffb4c86d5d>] vfs_rmdir+0x4d/0x120 #3: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70 Thread 1 is deleting control group "c1". Holding rdtgroup_mutex, kernfs_remove() removes all kernfs nodes under directory "c1" recursively, then waits for sub kernfs node "mon_groups" to drop active reference. Thread 2 is trying to create a subdirectory "m1" in the "mon_groups" directory. The wrapper kernfs_iop_mkdir() takes an active reference to the "mon_groups" directory but the code drops the active reference to the parent directory "c1" instead. As a result, Thread 1 is blocked on waiting for active reference to drop and never release rdtgroup_mutex, while Thread 2 is also blocked on trying to get rdtgroup_mutex. Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir) (rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1) ------------------------- ------------------------- kernfs_iop_mkdir /* * kn: "m1", parent_kn: "mon_groups", * prgrp_kn: parent_kn->parent: "c1", * * "mon_groups", parent_kn->active++: 1 */ kernfs_get_active(parent_kn) kernfs_iop_rmdir /* "c1", kn->active++ */ kernfs_get_active(kn) rdtgroup_kn_lock_live atomic_inc(&rdtgrp->waitcount) /* "c1", kn->active-- */ kernfs_break_active_protection(kn) mutex_lock rdtgroup_rmdir_ctrl free_all_child_rdtgrp sentry->flags = RDT_DELETED rdtgroup_ctrl_remove rdtgrp->flags = RDT_DELETED kernfs_get(kn) kernfs_remove(rdtgrp->kn) __kernfs_remove /* "mon_groups", sub_kn */ atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active) kernfs_drain(sub_kn) /* * sub_kn->active == KN_DEACTIVATED_BIAS + 1, * waiting on sub_kn->active to drop, but it * never drops in Thread 2 which is blocked * on getting rdtgroup_mutex. */ Thread 1 hangs here ----> wait_event(sub_kn->active == KN_DEACTIVATED_BIAS) ... rdtgroup_mkdir rdtgroup_mkdir_mon(parent_kn, prgrp_kn) mkdir_rdt_prepare(parent_kn, prgrp_kn) rdtgroup_kn_lock_live(prgrp_kn) atomic_inc(&rdtgrp->waitcount) /* * "c1", prgrp_kn->active-- * * The active reference on "c1" is * dropped, but not matching the * actual active reference taken * on "mon_groups", thus causing * Thread 1 to wait forever while * holding rdtgroup_mutex. */ kernfs_break_active_protection( prgrp_kn) /* * Trying to get rdtgroup_mutex * which is held by Thread 1. */ Thread 2 hangs here ----> mutex_lock ... The problem is that the creation of a subdirectory in the "mon_groups" directory incorrectly releases the active protection of its parent directory instead of itself before it starts waiting for rdtgroup_mutex. This is triggered by the rdtgroup_mkdir() flow calling rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the parent control group ("c1") as argument. It should be called with kernfs node "mon_groups" instead. What is currently missing is that the kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp. Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() instead. And then it operates on the same rdtgroup structure but handles the active reference of kernfs node "mon_groups" to prevent deadlock. The same changes are also made to the "mon_data" directories. This results in some unused function parameters that will be cleaned up in follow-up patch as the focus here is on the fix only in support of backporting efforts. Backporting notes: Since upstream commit fa7d949 ("x86/resctrl: Rename and move rdt files to a separate directory"), the file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to arch/x86/kernel/cpu/resctrl/rdtgroup.c. Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c for older stable trees. Fixes: c7d9aac ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring") Suggested-by: Reinette Chatre <[email protected]> Signed-off-by: Xiaochen Shen <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Reviewed-by: Reinette Chatre <[email protected]> Reviewed-by: Tony Luck <[email protected]> Acked-by: Thomas Gleixner <[email protected]> Cc: [email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Kamal Mostafa <[email protected]> Signed-off-by: Khalid Elmously <[email protected]>
1 parent f7ee9b9 commit ea0970c

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

arch/x86/kernel/cpu/resctrl/rdtgroup.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,7 +1970,7 @@ static int rdt_get_tree(struct fs_context *fc)
19701970

19711971
if (rdt_mon_capable) {
19721972
ret = mongroup_create_dir(rdtgroup_default.kn,
1973-
NULL, "mon_groups",
1973+
&rdtgroup_default, "mon_groups",
19741974
&kn_mongrp);
19751975
if (ret < 0)
19761976
goto out_info;
@@ -2454,7 +2454,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
24542454
/*
24552455
* Create the mon_data directory first.
24562456
*/
2457-
ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
2457+
ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
24582458
if (ret)
24592459
return ret;
24602460

@@ -2653,7 +2653,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
26532653
uint files = 0;
26542654
int ret;
26552655

2656-
prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
2656+
prdtgrp = rdtgroup_kn_lock_live(parent_kn);
26572657
if (!prdtgrp) {
26582658
ret = -ENODEV;
26592659
goto out_unlock;
@@ -2726,7 +2726,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
27262726
kernfs_activate(kn);
27272727

27282728
/*
2729-
* The caller unlocks the prgrp_kn upon success.
2729+
* The caller unlocks the parent_kn upon success.
27302730
*/
27312731
return 0;
27322732

@@ -2737,7 +2737,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
27372737
out_free_rgrp:
27382738
kfree(rdtgrp);
27392739
out_unlock:
2740-
rdtgroup_kn_unlock(prgrp_kn);
2740+
rdtgroup_kn_unlock(parent_kn);
27412741
return ret;
27422742
}
27432743

@@ -2775,7 +2775,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
27752775
*/
27762776
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
27772777

2778-
rdtgroup_kn_unlock(prgrp_kn);
2778+
rdtgroup_kn_unlock(parent_kn);
27792779
return ret;
27802780
}
27812781

@@ -2818,7 +2818,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
28182818
* Create an empty mon_groups directory to hold the subset
28192819
* of tasks and cpus to monitor.
28202820
*/
2821-
ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
2821+
ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
28222822
if (ret) {
28232823
rdt_last_cmd_puts("kernfs subdir error\n");
28242824
goto out_del_list;
@@ -2834,7 +2834,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
28342834
out_common_fail:
28352835
mkdir_rdt_prepare_clean(rdtgrp);
28362836
out_unlock:
2837-
rdtgroup_kn_unlock(prgrp_kn);
2837+
rdtgroup_kn_unlock(parent_kn);
28382838
return ret;
28392839
}
28402840

0 commit comments

Comments
 (0)