summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
diff options
context:
space:
mode:
authorMaxim Mikityanskiy <maximmi@nvidia.com>2022-04-15 16:19:15 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2022-05-31 13:40:54 -0700
commit2e642afb61b24401a7ec819d27ddcd69c7c29784 (patch)
treedde8c0e66212cd5b1f8bf4909b432632249ae701 /drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
parent1f2856cde64baa78475e6d3c601fb7b7f693a161 (diff)
net/mlx5e: Disable softirq in mlx5e_activate_rq to avoid race condition
When the driver activates the channels, it assumes NAPI isn't running yet. mlx5e_activate_rq posts a NOP WQE to ICOSQ to trigger a hardware interrupt and start NAPI, which will run mlx5e_alloc_rx_mpwqe and post UMR WQEs to ICOSQ to be able to receive packets with striding RQ. Unfortunately, a race condition is possible if NAPI is triggered by something else (for example, TX) at a bad timing, before mlx5e_activate_rq finishes. In this case, mlx5e_alloc_rx_mpwqe may post UMR WQEs to ICOSQ, and with the bad timing, the wqe_info of the first UMR may be overwritten by the wqe_info of the NOP posted by mlx5e_activate_rq. The consequence is that icosq->db.wqe_info[0].num_wqebbs will be changed from MLX5E_UMR_WQEBBS to 1, disrupting the integrity of the array-based linked list in wqe_info[]. mlx5e_poll_ico_cq will hang in an infinite loop after processing wqe_info[0], because after the corruption, the next item to be processed will be wqe_info[1], which is filled with zeros, and `sqcc += wi->num_wqebbs` will never move further. This commit fixes this race condition by using async_icosq to post the NOP and trigger the interrupt. async_icosq is always protected with a spinlock, eliminating the race condition. Fixes: bc77b240b3c5 ("net/mlx5e: Add fragmented memory support for RX multi packet WQE") Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com> Reported-by: Karsten Nielsen <karsten@foo-bar.dk> Reviewed-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Gal Pressman <gal@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 2684e9da9f41..fc366e66d0b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -123,6 +123,8 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
xskrq->stats->recover++;
}
+ mlx5e_trigger_napi_icosq(icosq->channel);
+
mutex_unlock(&icosq->channel->icosq_recovery_lock);
return 0;
@@ -166,6 +168,10 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
mlx5e_activate_rq(rq);
rq->stats->recover++;
+ if (rq->channel)
+ mlx5e_trigger_napi_icosq(rq->channel);
+ else
+ mlx5e_trigger_napi_sched(rq->cq.napi);
return 0;
out:
clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);