diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 999 |
1 files changed, 364 insertions, 635 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 3721db716350..3430b1ed12e5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,7 +131,6 @@ #include <linux/static_key.h> #include <linux/hashtable.h> #include <linux/vmalloc.h> -#include <linux/if_macvlan.h> #include "net-sysfs.h" @@ -147,8 +146,6 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; -static int netif_rx_internal(struct sk_buff *skb); - /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. @@ -482,7 +479,7 @@ EXPORT_SYMBOL(dev_add_offload); * and must not be freed until after all the CPU's have gone * through a quiescent state. */ -static void __dev_remove_offload(struct packet_offload *po) +void __dev_remove_offload(struct packet_offload *po) { struct list_head *head = &offload_base; struct packet_offload *po1; @@ -500,6 +497,7 @@ static void __dev_remove_offload(struct packet_offload *po) out: spin_unlock(&offload_lock); } +EXPORT_SYMBOL(__dev_remove_offload); /** * dev_remove_offload - remove packet offload handler @@ -1119,8 +1117,6 @@ rollback: write_seqcount_end(&devnet_rename_seq); - netdev_adjacent_rename_links(dev, oldname); - write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); @@ -1140,7 +1136,6 @@ rollback: err = ret; write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); - memcpy(oldname, newname, IFNAMSIZ); goto rollback; } else { pr_err("%s: name change rollback failed: %d\n", @@ -1208,7 +1203,7 @@ void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { call_netdevice_notifiers(NETDEV_CHANGE, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } } EXPORT_SYMBOL(netdev_state_change); @@ -1298,7 +1293,7 @@ int dev_open(struct net_device *dev) if (ret < 0) return ret; - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_UP, dev); return ret; @@ -1312,7 +1307,7 @@ static int __dev_close_many(struct list_head *head) ASSERT_RTNL(); might_sleep(); - list_for_each_entry(dev, head, close_list) { + list_for_each_entry(dev, head, unreg_list) { call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1328,7 +1323,7 @@ static int __dev_close_many(struct list_head *head) dev_deactivate_many(head); - list_for_each_entry(dev, head, close_list) { + list_for_each_entry(dev, head, unreg_list) { const struct net_device_ops *ops = dev->netdev_ops; /* @@ -1356,7 +1351,7 @@ static int __dev_close(struct net_device *dev) /* Temporarily disable netpoll until the interface is down */ netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); + list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); list_del(&single); @@ -1367,20 +1362,21 @@ static int __dev_close(struct net_device *dev) static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; + LIST_HEAD(tmp_list); - /* Remove the devices that don't need to be closed */ - list_for_each_entry_safe(dev, tmp, head, close_list) + list_for_each_entry_safe(dev, tmp, head, unreg_list) if (!(dev->flags & IFF_UP)) - list_del_init(&dev->close_list); + list_move(&dev->unreg_list, &tmp_list); __dev_close_many(head); - list_for_each_entry_safe(dev, tmp, head, close_list) { - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); + list_for_each_entry(dev, head, unreg_list) { + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_DOWN, dev); - list_del_init(&dev->close_list); } + /* rollback_registered_many needs the complete original list */ + list_splice(&tmp_list, head); return 0; } @@ -1401,7 +1397,7 @@ int dev_close(struct net_device *dev) /* Block netpoll rx while the interface is going down */ netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); + list_add(&dev->unreg_list, &single); dev_close_many(&single); list_del(&single); @@ -1429,10 +1425,6 @@ void dev_disable_lro(struct net_device *dev) if (is_vlan_dev(dev)) dev = vlan_dev_real_dev(dev); - /* the same for macvlan devices */ - if (netif_is_macvlan(dev)) - dev = macvlan_dev_real_dev(dev); - dev->wanted_features &= ~NETIF_F_LRO; netdev_update_features(dev); @@ -1570,14 +1562,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); * are as for raw_notifier_call_chain(). */ -static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, - struct netdev_notifier_info *info) +int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, + struct netdev_notifier_info *info) { ASSERT_RTNL(); netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } +EXPORT_SYMBOL(call_netdevice_notifiers_info); /** * call_netdevice_notifiers - call all network notifier blocks @@ -1699,11 +1691,15 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } + skb->protocol = eth_type_trans(skb, dev); + /* eth_type_trans() can set pkt_type. + * call skb_scrub_packet() after it to clear pkt_type _after_ calling + * eth_type_trans(). + */ skb_scrub_packet(skb, true); - skb->protocol = eth_type_trans(skb, dev); - return netif_rx_internal(skb); + return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -2083,7 +2079,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -#ifdef CONFIG_SYSFS +#ifdef CONFIG_RPS /** * netif_set_real_num_rx_queues - set actual number of RX queues used * @dev: Network device @@ -2149,42 +2145,30 @@ void __netif_schedule(struct Qdisc *q) } EXPORT_SYMBOL(__netif_schedule); -struct dev_kfree_skb_cb { - enum skb_free_reason reason; -}; - -static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) +void dev_kfree_skb_irq(struct sk_buff *skb) { - return (struct dev_kfree_skb_cb *)skb->cb; -} - -void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) -{ - unsigned long flags; + if (atomic_dec_and_test(&skb->users)) { + struct softnet_data *sd; + unsigned long flags; - if (likely(atomic_read(&skb->users) == 1)) { - smp_rmb(); - atomic_set(&skb->users, 0); - } else if (likely(!atomic_dec_and_test(&skb->users))) { - return; + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + skb->next = sd->completion_queue; + sd->completion_queue = skb; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); } - get_kfree_skb_cb(skb)->reason = reason; - local_irq_save(flags); - skb->next = __this_cpu_read(softnet_data.completion_queue); - __this_cpu_write(softnet_data.completion_queue, skb); - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); } -EXPORT_SYMBOL(__dev_kfree_skb_irq); +EXPORT_SYMBOL(dev_kfree_skb_irq); -void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_any(struct sk_buff *skb) { if (in_irq() || irqs_disabled()) - __dev_kfree_skb_irq(skb, reason); + dev_kfree_skb_irq(skb); else dev_kfree_skb(skb); } -EXPORT_SYMBOL(__dev_kfree_skb_any); +EXPORT_SYMBOL(dev_kfree_skb_any); /** @@ -2394,8 +2378,6 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, } SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); - SKB_GSO_CB(skb)->encap_level = 0; - skb_reset_mac_header(skb); skb_reset_mac_len(skb); @@ -2458,8 +2440,13 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) { struct dev_gso_cb *cb; - kfree_skb_list(skb->next); - skb->next = NULL; + do { + struct sk_buff *nskb = skb->next; + + skb->next = nskb->next; + nskb->next = NULL; + kfree_skb(nskb); + } while (skb->next); cb = DEV_GSO_CB(skb); if (cb->destructor) @@ -2534,6 +2521,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) } EXPORT_SYMBOL(netif_skb_features); +/* + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG. + */ +static inline int skb_needs_linearize(struct sk_buff *skb, + netdev_features_t features) +{ + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && + !(features & NETIF_F_SG))); +} + int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { @@ -2601,7 +2603,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); skb_len = skb->len; - trace_net_dev_start_xmit(skb, dev); rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) @@ -2620,7 +2621,6 @@ gso: dev_queue_xmit_nit(nskb, dev); skb_len = nskb->len; - trace_net_dev_start_xmit(nskb, dev); rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { @@ -2646,7 +2646,6 @@ out_kfree_skb: out: return rc; } -EXPORT_SYMBOL_GPL(dev_hard_start_xmit); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -2741,7 +2740,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) +#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); @@ -2778,9 +2777,8 @@ int dev_loopback_xmit(struct sk_buff *skb) EXPORT_SYMBOL(dev_loopback_xmit); /** - * __dev_queue_xmit - transmit a buffer + * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit - * @accel_priv: private data used for L2 forwarding offload * * Queue a buffer for transmission to a network device. The caller must * have set the device and priority and built the buffer before calling @@ -2803,7 +2801,7 @@ EXPORT_SYMBOL(dev_loopback_xmit); * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ -int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) +int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct netdev_queue *txq; @@ -2819,7 +2817,7 @@ int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb_update_prio(skb); - txq = netdev_pick_tx(dev, skb, accel_priv); + txq = netdev_pick_tx(dev, skb); q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT @@ -2884,19 +2882,8 @@ out: rcu_read_unlock_bh(); return rc; } - -int dev_queue_xmit(struct sk_buff *skb) -{ - return __dev_queue_xmit(skb, NULL); -} EXPORT_SYMBOL(dev_queue_xmit); -int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) -{ - return __dev_queue_xmit(skb, accel_priv); -} -EXPORT_SYMBOL(dev_queue_xmit_accel); - /*======================================================================= Receiver routines @@ -3012,7 +2999,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_hash(skb)) + if (!skb_get_rxhash(skb)) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3157,7 +3144,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); + new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; @@ -3225,7 +3212,22 @@ enqueue: return NET_RX_DROP; } -static int netif_rx_internal(struct sk_buff *skb) +/** + * netif_rx - post buffer to the network code + * @skb: buffer to post + * + * This function receives a packet from a device driver and queues it for + * the upper (protocol) levels to process. It always succeeds. The buffer + * may be dropped during processing for congestion control or by the + * protocol layers. + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + */ + +int netif_rx(struct sk_buff *skb) { int ret; @@ -3261,38 +3263,14 @@ static int netif_rx_internal(struct sk_buff *skb) } return ret; } - -/** - * netif_rx - post buffer to the network code - * @skb: buffer to post - * - * This function receives a packet from a device driver and queues it for - * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the - * protocol layers. - * - * return values: - * NET_RX_SUCCESS (no congestion) - * NET_RX_DROP (packet was dropped) - * - */ - -int netif_rx(struct sk_buff *skb) -{ - trace_netif_rx_entry(skb); - - return netif_rx_internal(skb); -} EXPORT_SYMBOL(netif_rx); int netif_rx_ni(struct sk_buff *skb) { int err; - trace_netif_rx_ni_entry(skb); - preempt_disable(); - err = netif_rx_internal(skb); + err = netif_rx(skb); if (local_softirq_pending()) do_softirq(); preempt_enable(); @@ -3318,10 +3296,7 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); - if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) - trace_consume_skb(skb); - else - trace_kfree_skb(skb, net_tx_action); + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } @@ -3677,7 +3652,22 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -static int netif_receive_skb_internal(struct sk_buff *skb) +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb(struct sk_buff *skb) { net_timestamp_check(netdev_tstamp_prequeue, skb); @@ -3703,28 +3693,6 @@ static int netif_receive_skb_internal(struct sk_buff *skb) #endif return __netif_receive_skb(skb); } - -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) -{ - trace_netif_receive_skb_entry(skb); - - return netif_receive_skb_internal(skb); -} EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending @@ -3774,7 +3742,7 @@ static int napi_gro_complete(struct sk_buff *skb) if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->callbacks.gro_complete(skb, 0); + err = ptype->callbacks.gro_complete(skb); break; } rcu_read_unlock(); @@ -3786,7 +3754,7 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - return netif_receive_skb_internal(skb); + return netif_receive_skb(skb); } /* napi->gro_list contains packets ordered by age. @@ -3822,18 +3790,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; unsigned int maclen = skb->dev->hard_header_len; - u32 hash = skb_get_hash_raw(skb); for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; - NAPI_GRO_CB(p)->flush = 0; - - if (hash != skb_get_hash_raw(p)) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; if (maclen == ETH_HLEN) @@ -3844,23 +3804,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) skb_gro_mac_header(skb), maclen); NAPI_GRO_CB(p)->same_flow = !diffs; - } -} - -static void skb_gro_reset_offset(struct sk_buff *skb) -{ - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; - - NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - - if (skb_mac_header(skb) == skb_tail_pointer(skb) && - pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { - NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + NAPI_GRO_CB(p)->flush = 0; } } @@ -3879,9 +3823,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; - skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); - NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3893,7 +3835,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - NAPI_GRO_CB(skb)->udp_mark = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; @@ -3918,23 +3859,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush) + if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) goto normal; - if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { - struct sk_buff *nskb = napi->gro_list; - - /* locate the end of the list to select the 'oldest' flow */ - while (nskb->next) { - pp = &nskb->next; - nskb = *pp; - } - *pp = NULL; - nskb->next = NULL; - napi_gro_complete(nskb); - } else { - napi->gro_count++; - } + napi->gro_count++; NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); @@ -3972,39 +3900,12 @@ normal: goto pull; } -struct packet_offload *gro_find_receive_by_type(__be16 type) -{ - struct list_head *offload_head = &offload_base; - struct packet_offload *ptype; - - list_for_each_entry_rcu(ptype, offload_head, list) { - if (ptype->type != type || !ptype->callbacks.gro_receive) - continue; - return ptype; - } - return NULL; -} -EXPORT_SYMBOL(gro_find_receive_by_type); - -struct packet_offload *gro_find_complete_by_type(__be16 type) -{ - struct list_head *offload_head = &offload_base; - struct packet_offload *ptype; - - list_for_each_entry_rcu(ptype, offload_head, list) { - if (ptype->type != type || !ptype->callbacks.gro_complete) - continue; - return ptype; - } - return NULL; -} -EXPORT_SYMBOL(gro_find_complete_by_type); static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb_internal(skb)) + if (netif_receive_skb(skb)) ret = GRO_DROP; break; @@ -4027,9 +3928,26 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } +static void skb_gro_reset_offset(struct sk_buff *skb) +{ + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + + if (skb_mac_header(skb) == skb_tail_pointer(skb) && + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + } +} + gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - trace_napi_gro_receive_entry(skb); + skb_gro_reset_offset(skb); return napi_skb_finish(dev_gro_receive(napi, skb), skb); } @@ -4053,7 +3971,8 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); - napi->skb = skb; + if (skb) + napi->skb = skb; } return skb; } @@ -4064,7 +3983,12 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb_internal(skb)) + case GRO_HELD: + skb->protocol = eth_type_trans(skb, skb->dev); + + if (ret == GRO_HELD) + skb_gro_pull(skb, -ETH_HLEN); + else if (netif_receive_skb(skb)) ret = GRO_DROP; break; @@ -4073,7 +3997,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; - case GRO_HELD: case GRO_MERGED: break; } @@ -4084,15 +4007,36 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + unsigned int hlen; + unsigned int off; napi->skb = NULL; - if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { - napi_reuse_skb(napi, skb); - return NULL; + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + off = skb_gro_offset(skb); + hlen = off + sizeof(*eth); + eth = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + eth = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!eth)) { + napi_reuse_skb(napi, skb); + skb = NULL; + goto out; + } } - skb->protocol = eth_type_trans(skb, skb->dev); + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; + +out: return skb; } @@ -4103,14 +4047,12 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) if (!skb) return GRO_DROP; - trace_napi_gro_frags_entry(skb); - return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); } EXPORT_SYMBOL(napi_gro_frags); /* - * net_rps_action_and_irq_enable sends any pending IPI's for rps. + * net_rps_action sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. */ static void net_rps_action_and_irq_enable(struct softnet_data *sd) @@ -4315,10 +4257,17 @@ EXPORT_SYMBOL(netif_napi_add); void netif_napi_del(struct napi_struct *napi) { + struct sk_buff *skb, *next; + list_del_init(&napi->dev_list); napi_free_frags(napi); - kfree_skb_list(napi->gro_list); + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + kfree_skb(skb); + } + napi->gro_list = NULL; napi->gro_count = 0; } @@ -4425,29 +4374,44 @@ struct netdev_adjacent { /* upper master flag, there can only be one master device per list */ bool master; + /* indicates that this dev is our first-level lower/upper device */ + bool neighbour; + /* counter for the number of times this device was added to us */ u16 ref_nr; - /* private field for the users */ - void *private; - struct list_head list; struct rcu_head rcu; }; static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, struct net_device *adj_dev, - struct list_head *adj_list) + bool upper) { struct netdev_adjacent *adj; + struct list_head *dev_list; + + dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list; - list_for_each_entry(adj, adj_list, list) { + list_for_each_entry(adj, dev_list, list) { if (adj->dev == adj_dev) return adj; } return NULL; } +static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, + struct net_device *udev) +{ + return __netdev_find_adj(dev, udev, true); +} + +static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev, + struct net_device *ldev) +{ + return __netdev_find_adj(dev, ldev, false); +} + /** * netdev_has_upper_dev - Check if device is linked to an upper device * @dev: device @@ -4462,7 +4426,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); + return __netdev_find_upper(dev, upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -4473,12 +4437,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -static bool netdev_has_any_upper_dev(struct net_device *dev) +bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->all_adj_list.upper); + return !list_empty(&dev->upper_dev_list); } +EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device @@ -4493,10 +4458,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) ASSERT_RTNL(); - if (list_empty(&dev->adj_list.upper)) + if (list_empty(&dev->upper_dev_list)) return NULL; - upper = list_first_entry(&dev->adj_list.upper, + upper = list_first_entry(&dev->upper_dev_list, struct netdev_adjacent, list); if (likely(upper->master)) return upper->dev; @@ -4504,119 +4469,30 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); -void *netdev_adjacent_get_private(struct list_head *adj_list) -{ - struct netdev_adjacent *adj; - - adj = list_entry(adj_list, struct netdev_adjacent, list); - - return adj->private; -} -EXPORT_SYMBOL(netdev_adjacent_get_private); - -/** - * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list +/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device * @iter: list_head ** of the current position * * Gets the next device from the dev's upper list, starting from iter * position. The caller must hold RCU read lock. */ -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + WARN_ON_ONCE(!rcu_read_lock_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - if (&upper->list == &dev->all_adj_list.upper) + if (&upper->list == &dev->upper_dev_list) return NULL; *iter = &upper->list; return upper->dev; } -EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); - -/** - * netdev_lower_get_next_private - Get the next ->private from the - * lower neighbour list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent->private from the dev's lower neighbour - * list, starting from iter position. The caller must hold either hold the - * RTNL lock or its own locking that guarantees that the neighbour lower - * list will remain unchainged. - */ -void *netdev_lower_get_next_private(struct net_device *dev, - struct list_head **iter) -{ - struct netdev_adjacent *lower; - - lower = list_entry(*iter, struct netdev_adjacent, list); - - if (&lower->list == &dev->adj_list.lower) - return NULL; - - if (iter) - *iter = lower->list.next; - - return lower->private; -} -EXPORT_SYMBOL(netdev_lower_get_next_private); - -/** - * netdev_lower_get_next_private_rcu - Get the next ->private from the - * lower neighbour list, RCU - * variant - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent->private from the dev's lower neighbour - * list, starting from iter position. The caller must hold RCU read lock. - */ -void *netdev_lower_get_next_private_rcu(struct net_device *dev, - struct list_head **iter) -{ - struct netdev_adjacent *lower; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - - if (&lower->list == &dev->adj_list.lower) - return NULL; - - if (iter) - *iter = &lower->list; - - return lower->private; -} -EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); - -/** - * netdev_lower_get_first_private_rcu - Get the first ->private from the - * lower neighbour list, RCU - * variant - * @dev: device - * - * Gets the first netdev_adjacent->private from the dev's lower neighbour - * list. The caller must hold RCU read lock. - */ -void *netdev_lower_get_first_private_rcu(struct net_device *dev) -{ - struct netdev_adjacent *lower; - - lower = list_first_or_null_rcu(&dev->adj_list.lower, - struct netdev_adjacent, list); - if (lower) - return lower->private; - return NULL; -} -EXPORT_SYMBOL(netdev_lower_get_first_private_rcu); +EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); /** * netdev_master_upper_dev_get_rcu - Get master upper device @@ -4629,7 +4505,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) { struct netdev_adjacent *upper; - upper = list_first_or_null_rcu(&dev->adj_list.upper, + upper = list_first_or_null_rcu(&dev->upper_dev_list, struct netdev_adjacent, list); if (upper && likely(upper->master)) return upper->dev; @@ -4637,41 +4513,17 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); -int netdev_adjacent_sysfs_add(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *dev_list) -{ - char linkname[IFNAMSIZ+7]; - sprintf(linkname, dev_list == &dev->adj_list.upper ? - "upper_%s" : "lower_%s", adj_dev->name); - return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj), - linkname); -} -void netdev_adjacent_sysfs_del(struct net_device *dev, - char *name, - struct list_head *dev_list) -{ - char linkname[IFNAMSIZ+7]; - sprintf(linkname, dev_list == &dev->adj_list.upper ? - "upper_%s" : "lower_%s", name); - sysfs_remove_link(&(dev->dev.kobj), linkname); -} - -#define netdev_adjacent_is_neigh_list(dev, dev_list) \ - (dev_list == &dev->adj_list.upper || \ - dev_list == &dev->adj_list.lower) - static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, - struct list_head *dev_list, - void *private, bool master) + bool neighbour, bool master, + bool upper) { struct netdev_adjacent *adj; - int ret; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + adj = __netdev_find_adj(dev, adj_dev, upper); if (adj) { + BUG_ON(neighbour); adj->ref_nr++; return 0; } @@ -4682,159 +4534,124 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; + adj->neighbour = neighbour; adj->ref_nr = 1; - adj->private = private; - dev_hold(adj_dev); - pr_debug("dev_hold for %s, because of link added from %s to %s\n", - adj_dev->name, dev->name, adj_dev->name); + dev_hold(adj_dev); + pr_debug("dev_hold for %s, because of %s link added from %s to %s\n", + adj_dev->name, upper ? "upper" : "lower", dev->name, + adj_dev->name); - if (netdev_adjacent_is_neigh_list(dev, dev_list)) { - ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); - if (ret) - goto free_adj; + if (!upper) { + list_add_tail_rcu(&adj->list, &dev->lower_dev_list); + return 0; } - /* Ensure that master link is always the first item in list. */ - if (master) { - ret = sysfs_create_link(&(dev->dev.kobj), - &(adj_dev->dev.kobj), "master"); - if (ret) - goto remove_symlinks; - - list_add_rcu(&adj->list, dev_list); - } else { - list_add_tail_rcu(&adj->list, dev_list); - } + /* Ensure that master upper link is always the first item in list. */ + if (master) + list_add_rcu(&adj->list, &dev->upper_dev_list); + else + list_add_tail_rcu(&adj->list, &dev->upper_dev_list); return 0; +} -remove_symlinks: - if (netdev_adjacent_is_neigh_list(dev, dev_list)) - netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); -free_adj: - kfree(adj); - dev_put(adj_dev); +static inline int __netdev_upper_dev_insert(struct net_device *dev, + struct net_device *udev, + bool master, bool neighbour) +{ + return __netdev_adjacent_dev_insert(dev, udev, neighbour, master, + true); +} - return ret; +static inline int __netdev_lower_dev_insert(struct net_device *dev, + struct net_device *ldev, + bool neighbour) +{ + return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false, + false); } -static void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *dev_list) +void __netdev_adjacent_dev_remove(struct net_device *dev, + struct net_device *adj_dev, bool upper) { struct netdev_adjacent *adj; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + if (upper) + adj = __netdev_find_upper(dev, adj_dev); + else + adj = __netdev_find_lower(dev, adj_dev); - if (!adj) { - pr_err("tried to remove device %s from %s\n", - dev->name, adj_dev->name); + if (!adj) BUG(); - } if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); adj->ref_nr--; return; } - if (adj->master) - sysfs_remove_link(&(dev->dev.kobj), "master"); - - if (netdev_adjacent_is_neigh_list(dev, dev_list)) - netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); - list_del_rcu(&adj->list); - pr_debug("dev_put for %s, because link removed from %s to %s\n", - adj_dev->name, dev->name, adj_dev->name); + pr_debug("dev_put for %s, because of %s link removed from %s to %s\n", + adj_dev->name, upper ? "upper" : "lower", dev->name, + adj_dev->name); dev_put(adj_dev); kfree_rcu(adj, rcu); } -static int __netdev_adjacent_dev_link_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list, - void *private, bool master) +static inline void __netdev_upper_dev_remove(struct net_device *dev, + struct net_device *udev) +{ + return __netdev_adjacent_dev_remove(dev, udev, true); +} + +static inline void __netdev_lower_dev_remove(struct net_device *dev, + struct net_device *ldev) +{ + return __netdev_adjacent_dev_remove(dev, ldev, false); +} + +int __netdev_adjacent_dev_insert_link(struct net_device *dev, + struct net_device *upper_dev, + bool master, bool neighbour) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_upper_dev_remove(dev, upper_dev); return ret; } return 0; } -static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) -{ - return __netdev_adjacent_dev_link_lists(dev, upper_dev, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower, - NULL, false); -} - -static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list) +static inline int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *udev) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + return __netdev_adjacent_dev_insert_link(dev, udev, false, false); } -static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *udev, + bool master) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower); + return __netdev_adjacent_dev_insert_link(dev, udev, master, true); } -static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *upper_dev, - void *private, bool master) +void __netdev_adjacent_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); - - if (ret) - return ret; - - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, - &dev->adj_list.upper, - &upper_dev->adj_list.lower, - private, master); - if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - return ret; - } - - return 0; + __netdev_upper_dev_remove(dev, upper_dev); + __netdev_lower_dev_remove(upper_dev, dev); } -static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, - struct net_device *upper_dev) -{ - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, - &dev->adj_list.upper, - &upper_dev->adj_list.lower); -} static int __netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev, bool master, - void *private) + struct net_device *upper_dev, bool master) { struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; @@ -4845,29 +4662,26 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) + if (__netdev_find_upper(upper_dev, dev)) return -EBUSY; - if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) + if (__netdev_find_upper(dev, upper_dev)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, - master); + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master); if (ret) return ret; /* Now that we linked these devs, make all the upper_dev's - * all_adj_list.upper visible to every dev's all_adj_list.lower an + * upper_dev_list visible to every dev's lower_dev_list and vice * versa, and don't forget the devices itself. All of these * links are non-neighbours. */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - pr_debug("Interlinking %s with %s, non-neighbour\n", - i->dev->name, j->dev->name); + list_for_each_entry(i, &dev->lower_dev_list, list) { + list_for_each_entry(j, &upper_dev->upper_dev_list, list) { ret = __netdev_adjacent_dev_link(i->dev, j->dev); if (ret) goto rollback_mesh; @@ -4875,18 +4689,14 @@ static int __netdev_upper_dev_link(struct net_device *dev, } /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - pr_debug("linking %s's upper device %s with %s\n", - upper_dev->name, i->dev->name, dev->name); + list_for_each_entry(i, &upper_dev->upper_dev_list, list) { ret = __netdev_adjacent_dev_link(dev, i->dev); if (ret) goto rollback_upper_mesh; } /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - pr_debug("linking %s's lower device %s with %s\n", dev->name, - i->dev->name, upper_dev->name); + list_for_each_entry(i, &dev->lower_dev_list, list) { ret = __netdev_adjacent_dev_link(i->dev, upper_dev); if (ret) goto rollback_lower_mesh; @@ -4897,7 +4707,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, rollback_lower_mesh: to_i = i; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { + list_for_each_entry(i, &dev->lower_dev_list, list) { if (i == to_i) break; __netdev_adjacent_dev_unlink(i->dev, upper_dev); @@ -4907,7 +4717,7 @@ rollback_lower_mesh: rollback_upper_mesh: to_i = i; - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { + list_for_each_entry(i, &upper_dev->upper_dev_list, list) { if (i == to_i) break; __netdev_adjacent_dev_unlink(dev, i->dev); @@ -4918,8 +4728,8 @@ rollback_upper_mesh: rollback_mesh: to_i = i; to_j = j; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { + list_for_each_entry(i, &dev->lower_dev_list, list) { + list_for_each_entry(j, &upper_dev->upper_dev_list, list) { if (i == to_i && j == to_j) break; __netdev_adjacent_dev_unlink(i->dev, j->dev); @@ -4928,7 +4738,7 @@ rollback_mesh: break; } - __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev); return ret; } @@ -4946,7 +4756,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -4964,18 +4774,10 @@ EXPORT_SYMBOL(netdev_upper_dev_link); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, true, NULL); + return __netdev_upper_dev_link(dev, upper_dev, true); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private) -{ - return __netdev_upper_dev_link(dev, upper_dev, true, private); -} -EXPORT_SYMBOL(netdev_master_upper_dev_link_private); - /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device @@ -4990,72 +4792,38 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct netdev_adjacent *i, *j; ASSERT_RTNL(); - __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev); /* Here is the tricky part. We must remove all dev's lower * devices from all upper_dev's upper devices and vice * versa, to maintain the graph relationship. */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) + list_for_each_entry(i, &dev->lower_dev_list, list) + list_for_each_entry(j, &upper_dev->upper_dev_list, list) __netdev_adjacent_dev_unlink(i->dev, j->dev); /* remove also the devices itself from lower/upper device * list */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) + list_for_each_entry(i, &dev->lower_dev_list, list) __netdev_adjacent_dev_unlink(i->dev, upper_dev); - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) + list_for_each_entry(i, &upper_dev->upper_dev_list, list) __netdev_adjacent_dev_unlink(dev, i->dev); call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) -{ - struct netdev_adjacent *iter; - - list_for_each_entry(iter, &dev->adj_list.upper, list) { - netdev_adjacent_sysfs_del(iter->dev, oldname, - &iter->dev->adj_list.lower); - netdev_adjacent_sysfs_add(iter->dev, dev, - &iter->dev->adj_list.lower); - } - - list_for_each_entry(iter, &dev->adj_list.lower, list) { - netdev_adjacent_sysfs_del(iter->dev, oldname, - &iter->dev->adj_list.upper); - netdev_adjacent_sysfs_add(iter->dev, dev, - &iter->dev->adj_list.upper); - } -} - -void *netdev_lower_dev_get_private(struct net_device *dev, - struct net_device *lower_dev) -{ - struct netdev_adjacent *lower; - - if (!lower_dev) - return NULL; - lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); - if (!lower) - return NULL; - - return lower->private; -} -EXPORT_SYMBOL(netdev_lower_dev_get_private); - static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_change_rx_flags) + if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } -static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) +static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; kuid_t uid; @@ -5098,8 +4866,6 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) dev_change_rx_flags(dev, IFF_PROMISC); } - if (notify) - __dev_notify_flags(dev, old_flags, IFF_PROMISC); return 0; } @@ -5119,7 +4885,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) unsigned int old_flags = dev->flags; int err; - err = __dev_set_promiscuity(dev, inc, true); + err = __dev_set_promiscuity(dev, inc); if (err < 0) return err; if (dev->flags != old_flags) @@ -5128,9 +4894,22 @@ int dev_set_promiscuity(struct net_device *dev, int inc) } EXPORT_SYMBOL(dev_set_promiscuity); -static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) +/** + * dev_set_allmulti - update allmulti count on a device + * @dev: device + * @inc: modifier + * + * Add or remove reception of all multicast frames to a device. While the + * count in the device remains above zero the interface remains listening + * to all interfaces. Once it hits zero the device reverts back to normal + * filtering operation. A negative @inc value is used to drop the counter + * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. + */ + +int dev_set_allmulti(struct net_device *dev, int inc) { - unsigned int old_flags = dev->flags, old_gflags = dev->gflags; + unsigned int old_flags = dev->flags; ASSERT_RTNL(); @@ -5153,30 +4932,9 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) if (dev->flags ^ old_flags) { dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); - if (notify) - __dev_notify_flags(dev, old_flags, - dev->gflags ^ old_gflags); } return 0; } - -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) -{ - return __dev_set_allmulti(dev, inc, true); -} EXPORT_SYMBOL(dev_set_allmulti); /* @@ -5201,10 +4959,10 @@ void __dev_set_rx_mode(struct net_device *dev) * therefore calling __dev_set_promiscuity here is safe. */ if (!netdev_uc_empty(dev) && !dev->uc_promisc) { - __dev_set_promiscuity(dev, 1, false); + __dev_set_promiscuity(dev, 1); dev->uc_promisc = true; } else if (netdev_uc_empty(dev) && dev->uc_promisc) { - __dev_set_promiscuity(dev, -1, false); + __dev_set_promiscuity(dev, -1); dev->uc_promisc = false; } } @@ -5293,13 +5051,9 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; - unsigned int old_flags = dev->flags; dev->gflags ^= IFF_PROMISC; - - if (__dev_set_promiscuity(dev, inc, false) >= 0) - if (dev->flags != old_flags) - dev_set_rx_mode(dev); + dev_set_promiscuity(dev, inc); } /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI @@ -5310,20 +5064,16 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) int inc = (flags & IFF_ALLMULTI) ? 1 : -1; dev->gflags ^= IFF_ALLMULTI; - __dev_set_allmulti(dev, inc, false); + dev_set_allmulti(dev, inc); } return ret; } -void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, - unsigned int gchanges) +void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) { unsigned int changes = dev->flags ^ old_flags; - if (gchanges) - rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC); - if (changes & IFF_UP) { if (dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_UP, dev); @@ -5352,29 +5102,21 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, int dev_change_flags(struct net_device *dev, unsigned int flags) { int ret; - unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; + unsigned int changes, old_flags = dev->flags; ret = __dev_change_flags(dev, flags); if (ret < 0) return ret; - changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags); - __dev_notify_flags(dev, old_flags, changes); + changes = old_flags ^ dev->flags; + if (changes) + rtmsg_ifinfo(RTM_NEWLINK, dev, changes); + + __dev_notify_flags(dev, old_flags); return ret; } EXPORT_SYMBOL(dev_change_flags); -static int __dev_set_mtu(struct net_device *dev, int new_mtu) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - if (ops->ndo_change_mtu) - return ops->ndo_change_mtu(dev, new_mtu); - - dev->mtu = new_mtu; - return 0; -} - /** * dev_set_mtu - Change maximum transfer unit * @dev: device @@ -5384,7 +5126,8 @@ static int __dev_set_mtu(struct net_device *dev, int new_mtu) */ int dev_set_mtu(struct net_device *dev, int new_mtu) { - int err, orig_mtu; + const struct net_device_ops *ops = dev->netdev_ops; + int err; if (new_mtu == dev->mtu) return 0; @@ -5396,25 +5139,14 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (!netif_device_present(dev)) return -ENODEV; - err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev); - err = notifier_to_errno(err); - if (err) - return err; - - orig_mtu = dev->mtu; - err = __dev_set_mtu(dev, new_mtu); + err = 0; + if (ops->ndo_change_mtu) + err = ops->ndo_change_mtu(dev, new_mtu); + else + dev->mtu = new_mtu; - if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); - err = notifier_to_errno(err); - if (err) { - /* setting mtu back and notifying everyone again, - * so that they have a chance to revert changes. - */ - __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); - } - } + if (!err) + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } EXPORT_SYMBOL(dev_set_mtu); @@ -5527,7 +5259,6 @@ static void net_set_todo(struct net_device *dev) static void rollback_registered_many(struct list_head *head) { struct net_device *dev, *tmp; - LIST_HEAD(close_head); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -5550,9 +5281,7 @@ static void rollback_registered_many(struct list_head *head) } /* If device is running, close it first. */ - list_for_each_entry(dev, head, unreg_list) - list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head); + dev_close_many(head); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -5575,7 +5304,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* * Flush the unicast and multicast chains @@ -5768,7 +5497,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); -#ifdef CONFIG_SYSFS +#ifdef CONFIG_RPS static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; @@ -5907,8 +5636,13 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; + /* Turn on no cache copy if HW is doing checksum */ if (!(dev->flags & IFF_LOOPBACK)) { dev->hw_features |= NETIF_F_NOCACHE_COPY; + if (dev->features & NETIF_F_ALL_CSUM) { + dev->wanted_features |= NETIF_F_NOCACHE_COPY; + dev->features |= NETIF_F_NOCACHE_COPY; + } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. @@ -5969,7 +5703,7 @@ int register_netdevice(struct net_device *dev) */ if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); out: return ret; @@ -6276,16 +6010,6 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); -void netdev_freemem(struct net_device *dev) -{ - char *addr = (char *)dev - dev->padded; - - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); -} - /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6295,7 +6019,7 @@ void netdev_freemem(struct net_device *dev) * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. Also allocates subqueue structs + * and performs basic initialization. Also allocates subquue structs * for each queue on the device. */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, @@ -6313,7 +6037,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } -#ifdef CONFIG_SYSFS +#ifdef CONFIG_RPS if (rxqs < 1) { pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); return NULL; @@ -6329,9 +6053,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!p) - p = vzalloc(alloc_size); + p = kzalloc(alloc_size, GFP_KERNEL); if (!p) return NULL; @@ -6340,7 +6062,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) - goto free_dev; + goto free_p; if (dev_addr_init(dev)) goto free_pcpu; @@ -6355,12 +6077,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); - INIT_LIST_HEAD(&dev->close_list); INIT_LIST_HEAD(&dev->link_watch_list); - INIT_LIST_HEAD(&dev->adj_list.upper); - INIT_LIST_HEAD(&dev->adj_list.lower); - INIT_LIST_HEAD(&dev->all_adj_list.upper); - INIT_LIST_HEAD(&dev->all_adj_list.lower); + INIT_LIST_HEAD(&dev->upper_dev_list); + INIT_LIST_HEAD(&dev->lower_dev_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); @@ -6369,7 +6088,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (netif_alloc_netdev_queues(dev)) goto free_all; -#ifdef CONFIG_SYSFS +#ifdef CONFIG_RPS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) @@ -6389,12 +6108,12 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); netif_free_tx_queues(dev); -#ifdef CONFIG_SYSFS +#ifdef CONFIG_RPS kfree(dev->_rx); #endif -free_dev: - netdev_freemem(dev); +free_p: + kfree(p); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -6414,7 +6133,7 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); netif_free_tx_queues(dev); -#ifdef CONFIG_SYSFS +#ifdef CONFIG_RPS kfree(dev->_rx); #endif @@ -6431,7 +6150,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { - netdev_freemem(dev); + kfree((char *)dev - dev->padded); return; } @@ -6593,7 +6312,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* * Flush the unicast and multicast chains @@ -6632,7 +6351,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char * Prevent userspace races by waiting until the network * device is fully setup before sending notifications. */ - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); synchronize_net(); err = 0; @@ -6684,11 +6403,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { - netif_rx_internal(skb); + netif_rx(skb); input_queue_head_incr(oldsd); } while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { - netif_rx_internal(skb); + netif_rx(skb); input_queue_head_incr(oldsd); } @@ -7001,18 +6720,28 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); + memset(sd, 0, sizeof(*sd)); skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); + sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); + sd->output_queue = NULL; sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; + sd->csd.flags = 0; sd->cpu = i; #endif sd->backlog.poll = process_backlog; sd->backlog.weight = weight_p; + sd->backlog.gro_list = NULL; + sd->backlog.gro_count = 0; + +#ifdef CONFIG_NET_FLOW_LIMIT + sd->flow_limit = NULL; +#endif } dev_boot_phase = 0; |