diff options
| -rw-r--r-- | include/linux/netdevice.h | 6 | ||||
| -rw-r--r-- | include/net/hotdata.h | 7 | ||||
| -rw-r--r-- | net/core/dev.c | 41 | ||||
| -rw-r--r-- | net/core/dev.h | 2 | ||||
| -rw-r--r-- | net/core/skbuff.c | 24 |
5 files changed, 43 insertions, 37 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b85454116f6..d1a687444b27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3536,11 +3536,7 @@ struct softnet_data { struct numa_drop_counters drop_counters; - /* Another possibly contended cache line */ - spinlock_t defer_lock ____cacheline_aligned_in_smp; - int defer_count; - int defer_ipi_scheduled; - struct sk_buff *defer_list; + int defer_ipi_scheduled ____cacheline_aligned_in_smp; call_single_data_t defer_csd; }; diff --git a/include/net/hotdata.h b/include/net/hotdata.h index fda94b2647ff..4acec191c54a 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -2,10 +2,16 @@ #ifndef _NET_HOTDATA_H #define _NET_HOTDATA_H +#include <linux/llist.h> #include <linux/types.h> #include <linux/netdevice.h> #include <net/protocol.h> +struct skb_defer_node { + struct llist_head defer_list; + atomic_long_t defer_count; +} ____cacheline_aligned_in_smp; + /* Read mostly data used in network fast paths. */ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) @@ -30,6 +36,7 @@ struct net_hotdata { struct rps_sock_flow_table __rcu *rps_sock_flow_table; u32 rps_cpu_mask; #endif + struct skb_defer_node __percpu *skb_defer_nodes; int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; diff --git a/net/core/dev.c b/net/core/dev.c index 8b54fdf0289a..a64cef2c537e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5180,8 +5180,9 @@ static void napi_schedule_rps(struct softnet_data *sd) __napi_schedule_irqoff(&mysd->backlog); } -void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu) +void kick_defer_list_purge(unsigned int cpu) { + struct softnet_data *sd = &per_cpu(softnet_data, cpu); unsigned long flags; if (use_backlog_threads()) { @@ -6715,24 +6716,24 @@ bool napi_complete_done(struct napi_struct *n, int work_done) } EXPORT_SYMBOL(napi_complete_done); -static void skb_defer_free_flush(struct softnet_data *sd) +static void skb_defer_free_flush(void) { + struct llist_node *free_list; struct sk_buff *skb, *next; + struct skb_defer_node *sdn; + int node; - /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ - if (!READ_ONCE(sd->defer_list)) - return; + for_each_node(node) { + sdn = this_cpu_ptr(net_hotdata.skb_defer_nodes) + node; - spin_lock(&sd->defer_lock); - skb = sd->defer_list; - sd->defer_list = NULL; - sd->defer_count = 0; - spin_unlock(&sd->defer_lock); + if (llist_empty(&sdn->defer_list)) + continue; + atomic_long_set(&sdn->defer_count, 0); + free_list = llist_del_all(&sdn->defer_list); - while (skb != NULL) { - next = skb->next; - napi_consume_skb(skb, 1); - skb = next; + llist_for_each_entry_safe(skb, next, free_list, ll_node) { + napi_consume_skb(skb, 1); + } } } @@ -6860,7 +6861,7 @@ count: if (work > 0) __NET_ADD_STATS(dev_net(napi->dev), LINUX_MIB_BUSYPOLLRXPACKETS, work); - skb_defer_free_flush(this_cpu_ptr(&softnet_data)); + skb_defer_free_flush(); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); @@ -7719,7 +7720,7 @@ static void napi_threaded_poll_loop(struct napi_struct *napi) local_irq_disable(); net_rps_action_and_irq_enable(sd); } - skb_defer_free_flush(sd); + skb_defer_free_flush(); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); @@ -7761,7 +7762,7 @@ start: for (;;) { struct napi_struct *n; - skb_defer_free_flush(sd); + skb_defer_free_flush(); if (list_empty(&list)) { if (list_empty(&repoll)) { @@ -12995,7 +12996,6 @@ static int __init net_dev_init(void) sd->cpu = i; #endif INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); - spin_lock_init(&sd->defer_lock); gro_init(&sd->backlog.gro); sd->backlog.poll = process_backlog; @@ -13005,6 +13005,11 @@ static int __init net_dev_init(void) if (net_page_pool_create(i)) goto out; } + net_hotdata.skb_defer_nodes = + __alloc_percpu(sizeof(struct skb_defer_node) * nr_node_ids, + __alignof__(struct skb_defer_node)); + if (!net_hotdata.skb_defer_nodes) + goto out; if (use_backlog_threads()) smpboot_register_percpu_thread(&backlog_threads); diff --git a/net/core/dev.h b/net/core/dev.h index d6b08d435479..900880e8b5b4 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -357,7 +357,7 @@ static inline void napi_assert_will_not_race(const struct napi_struct *napi) WARN_ON(READ_ONCE(napi->list_owner) != -1); } -void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); +void kick_defer_list_purge(unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 618afd59afff..bc12790017b0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -7185,8 +7185,9 @@ static void kfree_skb_napi_cache(struct sk_buff *skb) */ void skb_attempt_defer_free(struct sk_buff *skb) { + struct skb_defer_node *sdn; + unsigned long defer_count; int cpu = skb->alloc_cpu; - struct softnet_data *sd; unsigned int defer_max; bool kick; @@ -7200,27 +7201,24 @@ nodefer: kfree_skb_napi_cache(skb); DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); DEBUG_NET_WARN_ON_ONCE(skb->destructor); - sd = &per_cpu(softnet_data, cpu); + sdn = per_cpu_ptr(net_hotdata.skb_defer_nodes, cpu) + numa_node_id(); + defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max); - if (READ_ONCE(sd->defer_count) >= defer_max) + defer_count = atomic_long_inc_return(&sdn->defer_count); + + if (defer_count >= defer_max) goto nodefer; - spin_lock_bh(&sd->defer_lock); - /* Send an IPI every time queue reaches half capacity. */ - kick = sd->defer_count == (defer_max >> 1); - /* Paired with the READ_ONCE() few lines above */ - WRITE_ONCE(sd->defer_count, sd->defer_count + 1); + llist_add(&skb->ll_node, &sdn->defer_list); - skb->next = sd->defer_list; - /* Paired with READ_ONCE() in skb_defer_free_flush() */ - WRITE_ONCE(sd->defer_list, skb); - spin_unlock_bh(&sd->defer_lock); + /* Send an IPI every time queue reaches half capacity. */ + kick = (defer_count - 1) == (defer_max >> 1); /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU * if we are unlucky enough (this seems very unlikely). */ if (unlikely(kick)) - kick_defer_list_purge(sd, cpu); + kick_defer_list_purge(cpu); } static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, |
