diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 144 |
1 files changed, 91 insertions, 53 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 1488f700bf81..735096d42c1d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -160,8 +160,6 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static int netif_rx_internal(struct sk_buff *skb); -static int call_netdevice_notifiers_info(unsigned long val, - struct netdev_notifier_info *info); static int call_netdevice_notifiers_extack(unsigned long val, struct net_device *dev, struct netlink_ext_ack *extack); @@ -1919,8 +1917,8 @@ static void move_netdevice_notifiers_dev_net(struct net_device *dev, * are as for raw_notifier_call_chain(). */ -static int call_netdevice_notifiers_info(unsigned long val, - struct netdev_notifier_info *info) +int call_netdevice_notifiers_info(unsigned long val, + struct netdev_notifier_info *info) { struct net *net = dev_net(info->dev); int ret; @@ -2535,6 +2533,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, struct xps_map *map, *new_map; unsigned int nr_ids; + WARN_ON_ONCE(index >= dev->num_tx_queues); + if (dev->num_tc) { /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; @@ -3075,7 +3075,7 @@ void __netif_schedule(struct Qdisc *q) EXPORT_SYMBOL(__netif_schedule); struct dev_kfree_skb_cb { - enum skb_free_reason reason; + enum skb_drop_reason reason; }; static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) @@ -3108,7 +3108,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue) } EXPORT_SYMBOL(netif_tx_wake_queue); -void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason) { unsigned long flags; @@ -3128,18 +3128,16 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } -EXPORT_SYMBOL(__dev_kfree_skb_irq); +EXPORT_SYMBOL(dev_kfree_skb_irq_reason); -void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason) { if (in_hardirq() || irqs_disabled()) - __dev_kfree_skb_irq(skb, reason); - else if (unlikely(reason == SKB_REASON_DROPPED)) - kfree_skb(skb); + dev_kfree_skb_irq_reason(skb, reason); else - consume_skb(skb); + kfree_skb_reason(skb, reason); } -EXPORT_SYMBOL(__dev_kfree_skb_any); +EXPORT_SYMBOL(dev_kfree_skb_any_reason); /** @@ -3317,8 +3315,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb) skb->len - start, ~(__u32)0, crc32c_csum_stub)); *(__le32 *)(skb->data + offset) = crc32c_csum; - skb->ip_summed = CHECKSUM_NONE; - skb->csum_not_inet = 0; + skb_reset_csum_not_inet(skb); out: return ret; } @@ -3736,25 +3733,25 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) * we add to pkt_len the headers size of all segments */ if (shinfo->gso_size && skb_transport_header_was_set(skb)) { - unsigned int hdr_len; u16 gso_segs = shinfo->gso_segs; + unsigned int hdr_len; /* mac layer + network layer */ - hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + hdr_len = skb_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { const struct tcphdr *th; struct tcphdr _tcphdr; - th = skb_header_pointer(skb, skb_transport_offset(skb), + th = skb_header_pointer(skb, hdr_len, sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); } else { struct udphdr _udphdr; - if (skb_header_pointer(skb, skb_transport_offset(skb), + if (skb_header_pointer(skb, hdr_len, sizeof(_udphdr), &_udphdr)) hdr_len += sizeof(struct udphdr); } @@ -4361,7 +4358,12 @@ static inline void ____napi_schedule(struct softnet_data *sd, } list_add_tail(&napi->poll_list, &sd->poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + WRITE_ONCE(napi->list_owner, smp_processor_id()); + /* If not called from net_rx_action() + * we have to raise NET_RX_SOFTIRQ. + */ + if (!sd->in_net_rx_action) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); } #ifdef CONFIG_RPS @@ -4583,11 +4585,16 @@ static void trigger_rx_softirq(void *data) } /* - * Check if this softnet_data structure is another cpu one - * If yes, queue it to our IPI list and return 1 - * If no, return 0 + * After we queued a packet into sd->input_pkt_queue, + * we need to make sure this queue is serviced soon. + * + * - If this is another cpu queue, link it to our rps_ipi_list, + * and make sure we will process rps_ipi_list from net_rx_action(). + * + * - If this is our own queue, NAPI schedule our backlog. + * Note that this also raises NET_RX_SOFTIRQ. */ -static int napi_schedule_rps(struct softnet_data *sd) +static void napi_schedule_rps(struct softnet_data *sd) { struct softnet_data *mysd = this_cpu_ptr(&softnet_data); @@ -4596,12 +4603,15 @@ static int napi_schedule_rps(struct softnet_data *sd) sd->rps_ipi_next = mysd->rps_ipi_list; mysd->rps_ipi_list = sd; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - return 1; + /* If not called from net_rx_action() or napi_threaded_poll() + * we have to raise NET_RX_SOFTIRQ. + */ + if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + return; } #endif /* CONFIG_RPS */ __napi_schedule_irqoff(&mysd->backlog); - return 0; } #ifdef CONFIG_NET_FLOW_LIMIT @@ -5021,16 +5031,17 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(refcount_read(&skb->users)); - if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) + if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED)) trace_consume_skb(skb, net_tx_action); else trace_kfree_skb(skb, net_tx_action, - SKB_DROP_REASON_NOT_SPECIFIED); + get_kfree_skb_cb(skb)->reason); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); else - __kfree_skb_defer(skb); + __napi_kfree_skb(skb, + get_kfree_skb_cb(skb)->reason); } } @@ -6059,6 +6070,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) list_del_init(&n->poll_list); local_irq_restore(flags); } + WRITE_ONCE(n->list_owner, -1); val = READ_ONCE(n->state); do { @@ -6374,6 +6386,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, #ifdef CONFIG_NETPOLL napi->poll_owner = -1; #endif + napi->list_owner = -1; set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); @@ -6585,9 +6598,31 @@ static int napi_thread_wait(struct napi_struct *napi) return -1; } +static void skb_defer_free_flush(struct softnet_data *sd) +{ + struct sk_buff *skb, *next; + + /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ + if (!READ_ONCE(sd->defer_list)) + return; + + spin_lock(&sd->defer_lock); + skb = sd->defer_list; + sd->defer_list = NULL; + sd->defer_count = 0; + spin_unlock(&sd->defer_lock); + + while (skb != NULL) { + next = skb->next; + napi_consume_skb(skb, 1); + skb = next; + } +} + static int napi_threaded_poll(void *data) { struct napi_struct *napi = data; + struct softnet_data *sd; void *have; while (!napi_thread_wait(napi)) { @@ -6595,11 +6630,21 @@ static int napi_threaded_poll(void *data) bool repoll = false; local_bh_disable(); + sd = this_cpu_ptr(&softnet_data); + sd->in_napi_threaded_poll = true; have = netpoll_poll_lock(napi); __napi_poll(napi, &repoll); netpoll_poll_unlock(have); + sd->in_napi_threaded_poll = false; + barrier(); + + if (sd_has_rps_ipi_waiting(sd)) { + local_irq_disable(); + net_rps_action_and_irq_enable(sd); + } + skb_defer_free_flush(sd); local_bh_enable(); if (!repoll) @@ -6611,27 +6656,6 @@ static int napi_threaded_poll(void *data) return 0; } -static void skb_defer_free_flush(struct softnet_data *sd) -{ - struct sk_buff *skb, *next; - - /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ - if (!READ_ONCE(sd->defer_list)) - return; - - spin_lock_irq(&sd->defer_lock); - skb = sd->defer_list; - sd->defer_list = NULL; - sd->defer_count = 0; - spin_unlock_irq(&sd->defer_lock); - - while (skb != NULL) { - next = skb->next; - napi_consume_skb(skb, 1); - skb = next; - } -} - static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -6641,6 +6665,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) LIST_HEAD(list); LIST_HEAD(repoll); +start: + sd->in_net_rx_action = true; local_irq_disable(); list_splice_init(&sd->poll_list, &list); local_irq_enable(); @@ -6651,8 +6677,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) skb_defer_free_flush(sd); if (list_empty(&list)) { - if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) - goto end; + if (list_empty(&repoll)) { + sd->in_net_rx_action = false; + barrier(); + /* We need to check if ____napi_schedule() + * had refilled poll_list while + * sd->in_net_rx_action was true. + */ + if (!list_empty(&sd->poll_list)) + goto start; + if (!sd_has_rps_ipi_waiting(sd)) + goto end; + } break; } @@ -6677,6 +6713,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) __raise_softirq_irqoff(NET_RX_SOFTIRQ); + else + sd->in_net_rx_action = false; net_rps_action_and_irq_enable(sd); end:; |