summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/health.c
diff options
context:
space:
mode:
authorShay Drory <shayd@nvidia.com>2022-11-24 13:34:12 +0200
committerSaeed Mahameed <saeedm@nvidia.com>2022-12-28 11:38:50 -0800
commit9078e843efec530f279a155f262793c58b0746bd (patch)
tree95d8d46bcd6219302dba8e480f18736ffc5e71b2 /drivers/net/ethernet/mellanox/mlx5/core/health.c
parent44aee8ea15ac205490a41b00cbafcccbf9f7f82b (diff)
net/mlx5: Avoid recovery in probe flows
Currently, recovery is done without considering whether the device is still in probe flow. This may lead to recovery before device have finished probed successfully. e.g.: while mlx5_init_one() is running. Recovery flow is using functionality that is loaded only by mlx5_init_one(), and there is no point in running recovery without mlx5_init_one() finished successfully. Fix it by waiting for probe flow to finish and checking whether the device is probed before trying to perform recovery. Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling") Signed-off-by: Shay Drory <shayd@nvidia.com> Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/health.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 86ed87d704f7..96417c5feed7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -674,6 +674,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
dev = container_of(priv, struct mlx5_core_dev, priv);
devlink = priv_to_devlink(dev);
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
+ mlx5_core_err(dev, "health works are not permitted at this stage\n");
+ return;
+ }
+ mutex_unlock(&dev->intf_state_mutex);
enter_error_state(dev, false);
if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
devl_lock(devlink);