diff options
author | Paolo Abeni <pabeni@redhat.com> | 2025-01-09 15:33:10 +0100 |
---|---|---|
committer | Paolo Abeni <pabeni@redhat.com> | 2025-01-09 15:33:11 +0100 |
commit | 11c668db098507207d55c5d6e04cc34a55288636 (patch) | |
tree | 9eb6e497555e24d5ae38f2a4b0c4871e9384b900 /net/core/dev.c | |
parent | a3b3d2dc389568a77d0e25da17203e3616218e93 (diff) | |
parent | eb721f117e7d43b561e81dd878c4acfa2de13ee2 (diff) |
Merge branch 'net-make-sure-we-retain-napi-ordering-on-netdev-napi_list'
Jakub Kicinski says:
====================
net: make sure we retain NAPI ordering on netdev->napi_list
I promised Eric to remove the rtnl protection of the NAPI list,
when I sat down to implement it over the break I realized that
the recently added NAPI ID retention will break the list ordering
assumption we have in netlink dump. The ordering used to happen
"naturally", because we'd always add NAPIs that the head of the
list, and assign a new monotonically increasing ID.
Before the first patch of this series we'd still only add at
the head of the list but now the newly added NAPI may inherit
from its config an ID lower than something else already on the list.
The fix is in the first patch, the rest is netdevsim churn to test it.
I'm posting this for net-next, because AFAICT the problem can't
be triggered in net, given the very limited queue API adoption.
v2:
- [patch 2] allocate the array with kcalloc() instead of kvcalloc()
- [patch 2] set GFP_KERNEL_ACCOUNT when allocating queues
- [patch 6] don't null-check page pool before page_pool_destroy()
- [patch 6] controled -> controlled
- [patch 7] change mode to 0200
- [patch 7] reorder removal to be inverse of add
- [patch 7] fix the spaces vs tabs
v1: https://lore.kernel.org/20250103185954.1236510-1-kuba@kernel.org
====================
Link: https://patch.msgid.link/20250107160846.2223263-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 42 |
1 files changed, 36 insertions, 6 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 983c24927316..26f0c2fbb8aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6736,13 +6736,14 @@ static void napi_restore_config(struct napi_struct *n) n->gro_flush_timeout = n->config->gro_flush_timeout; n->irq_suspend_timeout = n->config->irq_suspend_timeout; /* a NAPI ID might be stored in the config, if so use it. if not, use - * napi_hash_add to generate one for us. It will be saved to the config - * in napi_disable. + * napi_hash_add to generate one for us. */ - if (n->config->napi_id) + if (n->config->napi_id) { napi_hash_add_with_id(n, n->config->napi_id); - else + } else { napi_hash_add(n); + n->config->napi_id = n->napi_id; + } } static void napi_save_config(struct napi_struct *n) @@ -6750,10 +6751,39 @@ static void napi_save_config(struct napi_struct *n) n->config->defer_hard_irqs = n->defer_hard_irqs; n->config->gro_flush_timeout = n->gro_flush_timeout; n->config->irq_suspend_timeout = n->irq_suspend_timeout; - n->config->napi_id = n->napi_id; napi_hash_del(n); } +/* Netlink wants the NAPI list to be sorted by ID, if adding a NAPI which will + * inherit an existing ID try to insert it at the right position. + */ +static void +netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) +{ + unsigned int new_id, pos_id; + struct list_head *higher; + struct napi_struct *pos; + + new_id = UINT_MAX; + if (napi->config && napi->config->napi_id) + new_id = napi->config->napi_id; + + higher = &dev->napi_list; + list_for_each_entry(pos, &dev->napi_list, dev_list) { + if (pos->napi_id >= MIN_NAPI_ID) + pos_id = pos->napi_id; + else if (pos->config) + pos_id = pos->config->napi_id; + else + pos_id = UINT_MAX; + + if (pos_id <= new_id) + break; + higher = &pos->dev_list; + } + list_add_rcu(&napi->dev_list, higher); /* adds after higher */ +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6780,7 +6810,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, napi->list_owner = -1; set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); - list_add_rcu(&napi->dev_list, &dev->napi_list); + netif_napi_dev_list_add(dev, napi); /* default settings from sysfs are applied to all NAPIs. any per-NAPI * configuration will be loaded in napi_enable |