Skip to content

Commit eab0da3

Browse files
mosheshemesh2Saeed Mahameed
authored andcommitted
net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work
Due to the cited patch, devlink health commands take devlink lock and this may result in deadlock for mlx5e_tx_reporter as it takes local state_lock before calling devlink health report and on the other hand devlink health commands such as diagnose for same reporter take local state_lock after taking devlink lock (see kernel log below). To fix it, remove local state_lock from mlx5e_tx_timeout_work() before calling devlink_health_report() and take care to cancel the work before any call to close channels, which may free the SQs that should be handled by the work. Before cancel_work_sync(), use current_work() to check we are not calling it from within the work, as mlx5e_tx_timeout_work() itself may close the channels and reopen as part of recovery flow. While removing state_lock from mlx5e_tx_timeout_work() keep rtnl_lock to ensure no change in netdev->real_num_tx_queues, but use rtnl_trylock() and a flag to avoid deadlock by calling cancel_work_sync() before closing the channels while holding rtnl_lock too. Kernel log: ====================================================== WARNING: possible circular locking dependency detected 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Not tainted ------------------------------------------------------ kworker/u16:2/65 is trying to acquire lock: ffff888122f6c2f8 (&devlink->lock_key#2){+.+.}-{3:3}, at: devlink_health_report+0x2f1/0x7e0 but task is already holding lock: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&priv->state_lock){+.+.}-{3:3}: __mutex_lock+0x12c/0x14b0 mlx5e_rx_reporter_diagnose+0x71/0x700 [mlx5_core] devlink_nl_cmd_health_reporter_diagnose_doit+0x212/0xa50 genl_family_rcv_msg_doit+0x1e9/0x2f0 genl_rcv_msg+0x2e9/0x530 netlink_rcv_skb+0x11d/0x340 genl_rcv+0x24/0x40 netlink_unicast+0x438/0x710 netlink_sendmsg+0x788/0xc40 sock_sendmsg+0xb0/0xe0 __sys_sendto+0x1c1/0x290 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 -> #0 (&devlink->lock_key#2){+.+.}-{3:3}: __lock_acquire+0x2c8a/0x6200 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&priv->state_lock); lock(&devlink->lock_key#2); lock(&priv->state_lock); lock(&devlink->lock_key#2); *** DEADLOCK *** 4 locks held by kworker/u16:2/65: #0: ffff88811a55b138 ((wq_completion)mlx5e#2){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888101de7db8 ((work_completion)(&priv->tx_timeout_work)){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffffffff84ce8328 (rtnl_mutex){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x53/0x280 [mlx5_core] #3: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] stack backtrace: CPU: 1 PID: 65 Comm: kworker/u16:2 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] Call Trace: <TASK> dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? find_held_lock+0x2d/0x110 ? __stack_depot_save+0x24c/0x520 ? alloc_chain_hlocks+0x228/0x700 __lock_acquire+0x2c8a/0x6200 ? register_lock_class+0x1860/0x1860 ? kasan_save_stack+0x1e/0x40 ? kasan_set_free_info+0x20/0x30 ? ____kasan_slab_free+0x11d/0x1b0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? devlink_health_report+0x3d5/0x7e0 lock_acquire+0x1c1/0x550 ? devlink_health_report+0x2f1/0x7e0 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? find_held_lock+0x2d/0x110 __mutex_lock+0x12c/0x14b0 ? devlink_health_report+0x2f1/0x7e0 ? devlink_health_report+0x2f1/0x7e0 ? mutex_lock_io_nested+0x1320/0x1320 ? trace_hardirqs_on+0x2d/0x100 ? bit_wait_io_timeout+0x170/0x170 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? mlx5e_reporter_tx_err_cqe+0x1b0/0x1b0 [mlx5_core] ? mlx5e_tx_reporter_timeout_dump+0x70/0x70 [mlx5_core] ? mlx5e_tx_reporter_dump_sq+0x320/0x320 [mlx5_core] ? mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] ? mutex_lock_io_nested+0x1320/0x1320 ? process_one_work+0x70f/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? lock_downgrade+0x6e0/0x6e0 mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 </TASK> Fixes: c90005b ("devlink: Hold the instance lock in health callbacks") Signed-off-by: Moshe Shemesh <[email protected]> Reviewed-by: Tariq Toukan <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent 762a55a commit eab0da3

File tree

2 files changed

+25
-3
lines changed

2 files changed

+25
-3
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ enum {
826826
MLX5E_STATE_DESTROYING,
827827
MLX5E_STATE_XDP_TX_ENABLED,
828828
MLX5E_STATE_XDP_ACTIVE,
829+
MLX5E_STATE_CHANNELS_ACTIVE,
829830
};
830831

831832
struct mlx5e_modify_sq_param {

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2731,6 +2731,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
27312731
{
27322732
int i;
27332733

2734+
ASSERT_RTNL();
27342735
if (chs->ptp) {
27352736
mlx5e_ptp_close(chs->ptp);
27362737
chs->ptp = NULL;
@@ -3012,17 +3013,29 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
30123013
if (mlx5e_is_vport_rep(priv))
30133014
mlx5e_rep_activate_channels(priv);
30143015

3016+
set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state);
3017+
30153018
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
30163019

30173020
if (priv->rx_res)
30183021
mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
30193022
}
30203023

3024+
static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv)
3025+
{
3026+
WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state));
3027+
if (current_work() != &priv->tx_timeout_work)
3028+
cancel_work_sync(&priv->tx_timeout_work);
3029+
}
3030+
30213031
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
30223032
{
30233033
if (priv->rx_res)
30243034
mlx5e_rx_res_channels_deactivate(priv->rx_res);
30253035

3036+
clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state);
3037+
mlx5e_cancel_tx_timeout_work(priv);
3038+
30263039
if (mlx5e_is_vport_rep(priv))
30273040
mlx5e_rep_deactivate_channels(priv);
30283041

@@ -4801,8 +4814,17 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
48014814
struct net_device *netdev = priv->netdev;
48024815
int i;
48034816

4804-
rtnl_lock();
4805-
mutex_lock(&priv->state_lock);
4817+
/* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues
4818+
* through this flow. However, channel closing flows have to wait for
4819+
* this work to finish while holding rtnl lock too. So either get the
4820+
* lock or find that channels are being closed for other reason and
4821+
* this work is not relevant anymore.
4822+
*/
4823+
while (!rtnl_trylock()) {
4824+
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
4825+
return;
4826+
msleep(20);
4827+
}
48064828

48074829
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
48084830
goto unlock;
@@ -4821,7 +4843,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
48214843
}
48224844

48234845
unlock:
4824-
mutex_unlock(&priv->state_lock);
48254846
rtnl_unlock();
48264847
}
48274848

0 commit comments

Comments
 (0)