summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
diff options
context:
space:
mode:
authorMaxim Mikityanskiy <maximmi@mellanox.com>2020-06-11 14:25:19 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2020-09-21 17:22:21 -0700
commit9c25a22dfb00270372224721fed646965420323a (patch)
tree7aa5cd4729b74d75ce128a227504ddffbbd7752e /drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
parentfe45386a208277cae4648106133c08246eecd012 (diff)
net/mlx5e: Use synchronize_rcu to sync with NAPI
As described in the previous commit, napi_synchronize doesn't quite fit the purpose when we just need to wait until the currently running NAPI quits. Its implementation waits until NAPI is not running by polling and waiting for 1ms in between. In cases where we need to deactivate one queue (e.g., recovery flows) or where we deactivate them one-by-one (deactivate channel flow), we may get stuck in napi_synchronize forever if other queues keep NAPI active, causing a soft lockup. Depending on kernel configuration (CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC), it may result in a kernel panic. To fix the issue, use synchronize_rcu to wait for NAPI to quit, and wrap the whole NAPI in rcu_read_lock. Fixes: acc6c5953af1 ("net/mlx5e: Split open/close channels to stages") Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com> Reviewed-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index de10b06bade5..d5868670f8a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -121,13 +121,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_xdpsq *xsksq = &c->xsksq;
struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
- bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
bool aff_change = false;
bool busy_xsk = false;
bool busy = false;
int work_done = 0;
+ bool xsk_open;
int i;
+ rcu_read_lock();
+
+ xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
ch_stats->poll++;
for (i = 0; i < c->num_tc; i++)
@@ -167,8 +171,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
busy |= busy_xsk;
if (busy) {
- if (likely(mlx5e_channel_no_affinity_change(c)))
- return budget;
+ if (likely(mlx5e_channel_no_affinity_change(c))) {
+ work_done = budget;
+ goto out;
+ }
ch_stats->aff_change++;
aff_change = true;
if (budget && work_done == budget)
@@ -176,7 +182,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
}
if (unlikely(!napi_complete_done(napi, work_done)))
- return work_done;
+ goto out;
ch_stats->arm++;
@@ -203,6 +209,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
ch_stats->force_irq++;
}
+out:
+ rcu_read_unlock();
+
return work_done;
}