Skip to content

Commit c69453e

Browse files
yishaihdavem330
authored andcommitted
net/mlx4_core: Manage interface state for Reset flow cases
We need to manage interface state to sync between reset flow and some other relative cases such as remove_one. This has to be done to prevent certain races. For example in case software stack is down as a result of unload call, the remove_one should skip the unload phase. Implement the remove_one case, handling AER and other cases comes next. The interface can be up/down, upon remove_one, the state will include an extra bit indicating that the device is cleaned-up, forcing other tasks to finish before the final cleanup. Signed-off-by: Yishai Hadas <[email protected]> Signed-off-by: Or Gerlitz <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f5aef5a commit c69453e

File tree

4 files changed

+32
-3
lines changed

4 files changed

+32
-3
lines changed

drivers/net/ethernet/mellanox/mlx4/catas.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,14 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
122122
int err = 0;
123123

124124
mlx4_enter_error_state(persist);
125-
err = mlx4_restart_one(persist->pdev);
126-
mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err);
125+
mutex_lock(&persist->interface_state_mutex);
126+
if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
127+
!(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
128+
err = mlx4_restart_one(persist->pdev);
129+
mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
130+
err);
131+
}
132+
mutex_unlock(&persist->interface_state_mutex);
127133
}
128134

129135
static void dump_err_buf(struct mlx4_dev *dev)
@@ -211,6 +217,9 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
211217
iounmap(priv->catas_err.map);
212218
priv->catas_err.map = NULL;
213219
}
220+
221+
if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION)
222+
flush_workqueue(dev->persist->catas_wq);
214223
}
215224

216225
int mlx4_catas_init(struct mlx4_dev *dev)

drivers/net/ethernet/mellanox/mlx4/intf.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ int mlx4_register_device(struct mlx4_dev *dev)
138138

139139
mutex_lock(&intf_mutex);
140140

141+
dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP;
141142
list_add_tail(&priv->dev_list, &dev_list);
142143
list_for_each_entry(intf, &intf_list, list)
143144
mlx4_add_device(intf, priv);
@@ -162,6 +163,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
162163
mlx4_remove_device(intf, priv);
163164

164165
list_del(&priv->dev_list);
166+
dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
165167

166168
mutex_unlock(&intf_mutex);
167169
}

drivers/net/ethernet/mellanox/mlx4/main.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3114,6 +3114,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
31143114
pci_set_drvdata(pdev, dev->persist);
31153115
priv->pci_dev_data = id->driver_data;
31163116
mutex_init(&dev->persist->device_state_mutex);
3117+
mutex_init(&dev->persist->interface_state_mutex);
31173118

31183119
ret = __mlx4_init_one(pdev, id->driver_data, priv);
31193120
if (ret) {
@@ -3232,7 +3233,17 @@ static void mlx4_remove_one(struct pci_dev *pdev)
32323233
struct mlx4_dev *dev = persist->dev;
32333234
struct mlx4_priv *priv = mlx4_priv(dev);
32343235

3235-
mlx4_unload_one(pdev);
3236+
mutex_lock(&persist->interface_state_mutex);
3237+
persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
3238+
mutex_unlock(&persist->interface_state_mutex);
3239+
3240+
/* device marked to be under deletion running now without the lock
3241+
* letting other tasks to be terminated
3242+
*/
3243+
if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3244+
mlx4_unload_one(pdev);
3245+
else
3246+
mlx4_info(dev, "%s: interface is down\n", __func__);
32363247
mlx4_catas_end(dev);
32373248
pci_release_regions(pdev);
32383249
pci_disable_device(pdev);

include/linux/mlx4/device.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,11 @@ enum {
416416
MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1,
417417
};
418418

419+
enum {
420+
MLX4_INTERFACE_STATE_UP = 1 << 0,
421+
MLX4_INTERFACE_STATE_DELETION = 1 << 1,
422+
};
423+
419424
#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
420425
MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
421426

@@ -760,6 +765,8 @@ struct mlx4_dev_persistent {
760765
struct workqueue_struct *catas_wq;
761766
struct mutex device_state_mutex; /* protect HW state */
762767
u8 state;
768+
struct mutex interface_state_mutex; /* protect SW state */
769+
u8 interface_state;
763770
};
764771

765772
struct mlx4_dev {

0 commit comments

Comments
 (0)