diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 171 | ||||
-rw-r--r-- | net/core/dev.c | 154 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 4 | ||||
-rw-r--r-- | net/core/dev_ioctl.c | 9 | ||||
-rw-r--r-- | net/core/ethtool.c | 12 | ||||
-rw-r--r-- | net/core/iovec.c | 47 | ||||
-rw-r--r-- | net/core/link_watch.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 279 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 18 | ||||
-rw-r--r-- | net/core/netpoll.c | 4 | ||||
-rw-r--r-- | net/core/pktgen.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 121 | ||||
-rw-r--r-- | net/core/sock.c | 7 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 21 | ||||
-rw-r--r-- | net/core/utils.c | 3 |
15 files changed, 488 insertions, 367 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index fdbc9a81d4c2..b6e303b0f01f 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -49,6 +49,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/uio.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -393,34 +394,30 @@ fault: EXPORT_SYMBOL(skb_copy_datagram_iovec); /** - * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. + * skb_copy_datagram_iter - Copy a datagram to an iovec iterator. * @skb: buffer to copy * @offset: offset in the buffer to start copying from - * @to: io vector to copy to - * @to_offset: offset in the io vector to start copying to + * @to: iovec iterator to copy to * @len: amount of data to copy from buffer to iovec - * - * Returns 0 or -EFAULT. - * Note: the iovec is not modified during the copy. */ -int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, - const struct iovec *to, int to_offset, - int len) +int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; + trace_skb_copy_datagram_iovec(skb, len); + /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; - if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) - goto fault; + if (copy_to_iter(skb->data + offset, copy, to) != copy) + goto short_copy; if ((len -= copy) == 0) return 0; offset += copy; - to_offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -432,22 +429,15 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - struct page *page = skb_frag_page(frag); - if (copy > len) copy = len; - vaddr = kmap(page); - err = memcpy_toiovecend(to, vaddr + frag->page_offset + - offset - start, to_offset, copy); - kunmap(page); - if (err) - goto fault; + if (copy_page_to_iter(skb_frag_page(frag), + frag->page_offset + offset - + start, copy, to) != copy) + goto short_copy; if (!(len -= copy)) return 0; offset += copy; - to_offset += copy; } start = end; } @@ -461,39 +451,45 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - if (skb_copy_datagram_const_iovec(frag_iter, - offset - start, - to, to_offset, - copy)) + if (skb_copy_datagram_iter(frag_iter, offset - start, + to, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; - to_offset += copy; } start = end; } if (!len) return 0; + /* This is not really a user copy fault, but rather someone + * gave us a bogus length on the skb. We should probably + * print a warning here as it may indicate a kernel bug. + */ + fault: return -EFAULT; + +short_copy: + if (iov_iter_count(to)) + goto fault; + + return 0; } -EXPORT_SYMBOL(skb_copy_datagram_const_iovec); +EXPORT_SYMBOL(skb_copy_datagram_iter); /** - * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. + * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter. * @skb: buffer to copy * @offset: offset in the buffer to start copying to - * @from: io vector to copy to - * @from_offset: offset in the io vector to start copying from + * @from: the copy source * @len: amount of data to copy to buffer from iovec * * Returns 0 or -EFAULT. - * Note: the iovec is not modified during the copy. */ -int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - const struct iovec *from, int from_offset, +int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, + struct iov_iter *from, int len) { int start = skb_headlen(skb); @@ -504,13 +500,11 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - if (memcpy_fromiovecend(skb->data + offset, from, from_offset, - copy)) + if (copy_from_iter(skb->data + offset, copy, from) != copy) goto fault; if ((len -= copy) == 0) return 0; offset += copy; - from_offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -522,24 +516,19 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - struct page *page = skb_frag_page(frag); + size_t copied; if (copy > len) copy = len; - vaddr = kmap(page); - err = memcpy_fromiovecend(vaddr + frag->page_offset + - offset - start, - from, from_offset, copy); - kunmap(page); - if (err) + copied = copy_page_from_iter(skb_frag_page(frag), + frag->page_offset + offset - start, + copy, from); + if (copied != copy) goto fault; if (!(len -= copy)) return 0; offset += copy; - from_offset += copy; } start = end; } @@ -553,16 +542,13 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - if (skb_copy_datagram_from_iovec(frag_iter, - offset - start, - from, - from_offset, - copy)) + if (skb_copy_datagram_from_iter(frag_iter, + offset - start, + from, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; - from_offset += copy; } start = end; } @@ -572,78 +558,61 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, fault: return -EFAULT; } -EXPORT_SYMBOL(skb_copy_datagram_from_iovec); +EXPORT_SYMBOL(skb_copy_datagram_from_iter); /** - * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec + * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter * @skb: buffer to copy - * @from: io vector to copy from - * @offset: offset in the io vector to start copying from - * @count: amount of vectors to copy to buffer from + * @from: the source to copy from * * The function will first copy up to headlen, and then pin the userspace * pages and build frags through them. * * Returns 0, -EFAULT or -EMSGSIZE. - * Note: the iovec is not modified during the copy */ -int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, - int offset, size_t count) +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) { - int len = iov_length(from, count) - offset; + int len = iov_iter_count(from); int copy = min_t(int, skb_headlen(skb), len); - int size; - int i = 0; + int frag = 0; /* copy up to skb headlen */ - if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy)) + if (skb_copy_datagram_from_iter(skb, 0, from, copy)) return -EFAULT; - if (len == copy) - return 0; - - offset += copy; - while (count--) { - struct page *page[MAX_SKB_FRAGS]; - int num_pages; - unsigned long base; + while (iov_iter_count(from)) { + struct page *pages[MAX_SKB_FRAGS]; + size_t start; + ssize_t copied; unsigned long truesize; + int n = 0; - /* Skip over from offset and copied */ - if (offset >= from->iov_len) { - offset -= from->iov_len; - ++from; - continue; - } - len = from->iov_len - offset; - base = (unsigned long)from->iov_base + offset; - size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; - if (i + size > MAX_SKB_FRAGS) + if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; - num_pages = get_user_pages_fast(base, size, 0, &page[i]); - if (num_pages != size) { - release_pages(&page[i], num_pages, 0); + + copied = iov_iter_get_pages(from, pages, ~0U, + MAX_SKB_FRAGS - frag, &start); + if (copied < 0) return -EFAULT; - } - truesize = size * PAGE_SIZE; - skb->data_len += len; - skb->len += len; + + iov_iter_advance(from, copied); + + truesize = PAGE_ALIGN(copied + start); + skb->data_len += copied; + skb->len += copied; skb->truesize += truesize; atomic_add(truesize, &skb->sk->sk_wmem_alloc); - while (len) { - int off = base & ~PAGE_MASK; - int size = min_t(int, len, PAGE_SIZE - off); - skb_fill_page_desc(skb, i, page[i], off, size); - base += size; - len -= size; - i++; + while (copied) { + int size = min_t(int, copied, PAGE_SIZE - start); + skb_fill_page_desc(skb, frag++, pages[n], start, size); + start = 0; + copied -= size; + n++; } - offset = 0; - ++from; } return 0; } -EXPORT_SYMBOL(zerocopy_sg_from_iovec); +EXPORT_SYMBOL(zerocopy_sg_from_iter); static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 __user *to, int len, diff --git a/net/core/dev.c b/net/core/dev.c index 945bbd001359..ac4836241a96 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -118,6 +118,7 @@ #include <linux/if_vlan.h> #include <linux/ip.h> #include <net/ip.h> +#include <net/mpls.h> #include <linux/ipv6.h> #include <linux/in.h> #include <linux/jhash.h> @@ -133,6 +134,7 @@ #include <linux/vmalloc.h> #include <linux/if_macvlan.h> #include <linux/errqueue.h> +#include <linux/hrtimer.h> #include "net-sysfs.h" @@ -1435,22 +1437,17 @@ EXPORT_SYMBOL(dev_close); */ void dev_disable_lro(struct net_device *dev) { - /* - * If we're trying to disable lro on a vlan device - * use the underlying physical device instead - */ - if (is_vlan_dev(dev)) - dev = vlan_dev_real_dev(dev); - - /* the same for macvlan devices */ - if (netif_is_macvlan(dev)) - dev = macvlan_dev_real_dev(dev); + struct net_device *lower_dev; + struct list_head *iter; dev->wanted_features &= ~NETIF_F_LRO; netdev_update_features(dev); if (unlikely(dev->features & NETIF_F_LRO)) netdev_WARN(dev, "failed to disable LRO!\n"); + + netdev_for_each_lower_dev(dev, lower_dev, iter) + dev_disable_lro(lower_dev); } EXPORT_SYMBOL(dev_disable_lro); @@ -2530,7 +2527,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) { - if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) + if (eth_p_mpls(type)) features &= skb->dev->mpls_features; return features; @@ -2647,12 +2644,8 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { if (vlan_tx_tag_present(skb) && - !vlan_hw_offload_capable(features, skb->vlan_proto)) { - skb = __vlan_put_tag(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); - if (skb) - skb->vlan_tci = 0; - } + !vlan_hw_offload_capable(features, skb->vlan_proto)) + skb = __vlan_hwaccel_push_inside(skb); return skb; } @@ -4316,20 +4309,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) local_irq_enable(); } +static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + return sd->rps_ipi_list != NULL; +#else + return false; +#endif +} + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); -#ifdef CONFIG_RPS /* Check if we have pending ipi, its better to send them now, * not waiting net_rx_action() end. */ - if (sd->rps_ipi_list) { + if (sd_has_rps_ipi_waiting(sd)) { local_irq_disable(); net_rps_action_and_irq_enable(sd); } -#endif + napi->weight = weight_p; local_irq_disable(); while (1) { @@ -4356,7 +4357,6 @@ static int process_backlog(struct napi_struct *napi, int quota) * We can use a plain write instead of clear_bit(), * and we dont need an smp_mb() memory barrier. */ - list_del(&napi->poll_list); napi->state = 0; rps_unlock(sd); @@ -4376,7 +4376,8 @@ static int process_backlog(struct napi_struct *napi, int quota) * __napi_schedule - schedule for receive * @n: entry to schedule * - * The entry's receive function will be scheduled to run + * The entry's receive function will be scheduled to run. + * Consider using __napi_schedule_irqoff() if hard irqs are masked. */ void __napi_schedule(struct napi_struct *n) { @@ -4388,18 +4389,29 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +/** + * __napi_schedule_irqoff - schedule for receive + * @n: entry to schedule + * + * Variant of __napi_schedule() assuming hard irqs are masked + */ +void __napi_schedule_irqoff(struct napi_struct *n) +{ + ____napi_schedule(this_cpu_ptr(&softnet_data), n); +} +EXPORT_SYMBOL(__napi_schedule_irqoff); + void __napi_complete(struct napi_struct *n) { BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - BUG_ON(n->gro_list); - list_del(&n->poll_list); + list_del_init(&n->poll_list); smp_mb__before_atomic(); clear_bit(NAPI_STATE_SCHED, &n->state); } EXPORT_SYMBOL(__napi_complete); -void napi_complete(struct napi_struct *n) +void napi_complete_done(struct napi_struct *n, int work_done) { unsigned long flags; @@ -4410,12 +4422,28 @@ void napi_complete(struct napi_struct *n) if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) return; - napi_gro_flush(n, false); - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); + if (n->gro_list) { + unsigned long timeout = 0; + + if (work_done) + timeout = n->dev->gro_flush_timeout; + + if (timeout) + hrtimer_start(&n->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + else + napi_gro_flush(n, false); + } + if (likely(list_empty(&n->poll_list))) { + WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state)); + } else { + /* If n->poll_list is not empty, we need to mask irqs */ + local_irq_save(flags); + __napi_complete(n); + local_irq_restore(flags); + } } -EXPORT_SYMBOL(napi_complete); +EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ struct napi_struct *napi_by_id(unsigned int napi_id) @@ -4469,10 +4497,23 @@ void napi_hash_del(struct napi_struct *napi) } EXPORT_SYMBOL_GPL(napi_hash_del); +static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) +{ + struct napi_struct *napi; + + napi = container_of(timer, struct napi_struct, timer); + if (napi->gro_list) + napi_schedule(napi); + + return HRTIMER_NORESTART; +} + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { INIT_LIST_HEAD(&napi->poll_list); + hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + napi->timer.function = napi_watchdog; napi->gro_count = 0; napi->gro_list = NULL; napi->skb = NULL; @@ -4491,6 +4532,20 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, } EXPORT_SYMBOL(netif_napi_add); +void napi_disable(struct napi_struct *n) +{ + might_sleep(); + set_bit(NAPI_STATE_DISABLE, &n->state); + + while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) + msleep(1); + + hrtimer_cancel(&n->timer); + + clear_bit(NAPI_STATE_DISABLE, &n->state); +} +EXPORT_SYMBOL(napi_disable); + void netif_napi_del(struct napi_struct *napi) { list_del_init(&napi->dev_list); @@ -4507,29 +4562,28 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; + LIST_HEAD(list); + LIST_HEAD(repoll); void *have; local_irq_disable(); + list_splice_init(&sd->poll_list, &list); + local_irq_enable(); - while (!list_empty(&sd->poll_list)) { + while (!list_empty(&list)) { struct napi_struct *n; int work, weight; - /* If softirq window is exhuasted then punt. + /* If softirq window is exhausted then punt. * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) goto softnet_break; - local_irq_enable(); - /* Even though interrupts have been re-enabled, this - * access is safe because interrupts can only add new - * entries to the tail of this list, and only ->poll() - * calls can remove this head entry from the list. - */ - n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); + n = list_first_entry(&list, struct napi_struct, poll_list); + list_del_init(&n->poll_list); have = netpoll_poll_lock(n); @@ -4551,8 +4605,6 @@ static void net_rx_action(struct softirq_action *h) budget -= work; - local_irq_disable(); - /* Drivers must not modify the NAPI state if they * consume the entire weight. In such cases this code * still "owns" the NAPI instance and therefore can @@ -4560,32 +4612,40 @@ static void net_rx_action(struct softirq_action *h) */ if (unlikely(work == weight)) { if (unlikely(napi_disable_pending(n))) { - local_irq_enable(); napi_complete(n); - local_irq_disable(); } else { if (n->gro_list) { /* flush too old packets * If HZ < 1000, flush all packets. */ - local_irq_enable(); napi_gro_flush(n, HZ >= 1000); - local_irq_disable(); } - list_move_tail(&n->poll_list, &sd->poll_list); + list_add_tail(&n->poll_list, &repoll); } } netpoll_poll_unlock(have); } + + if (!sd_has_rps_ipi_waiting(sd) && + list_empty(&list) && + list_empty(&repoll)) + return; out: + local_irq_disable(); + + list_splice_tail_init(&sd->poll_list, &list); + list_splice_tail(&repoll, &list); + list_splice(&list, &sd->poll_list); + if (!list_empty(&sd->poll_list)) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + net_rps_action_and_irq_enable(sd); return; softnet_break: sd->time_squeeze++; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index b6b230600b97..c0548d268e1a 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -278,8 +278,8 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, EXPORT_SYMBOL(__hw_addr_sync_dev); /** - * __hw_addr_unsync_dev - Remove synchonized addresses from device - * @list: address list to remove syncronized addresses from + * __hw_addr_unsync_dev - Remove synchronized addresses from device + * @list: address list to remove synchronized addresses from * @dev: device to sync * @unsync: function to call if address should be removed * diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 72e899a3efda..b94b1d293506 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -142,10 +142,12 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm case SIOCGIFHWADDR: if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); + memset(ifr->ifr_hwaddr.sa_data, 0, + sizeof(ifr->ifr_hwaddr.sa_data)); else memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + min(sizeof(ifr->ifr_hwaddr.sa_data), + (size_t)dev->addr_len)); ifr->ifr_hwaddr.sa_family = dev->type; return 0; @@ -265,7 +267,8 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + min(sizeof(ifr->ifr_hwaddr.sa_data), + (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 06dfb293e5aa..715f51f321e9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/sched.h> +#include <linux/net.h> /* * Some useful ethtool_ops methods that're device independent. @@ -84,7 +85,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", - [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", @@ -574,6 +574,16 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, return 0; } +u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; + +void netdev_rss_key_fill(void *buffer, size_t len) +{ + BUG_ON(len > sizeof(netdev_rss_key)); + net_get_random_once(netdev_rss_key, sizeof(netdev_rss_key)); + memcpy(buffer, netdev_rss_key, len); +} +EXPORT_SYMBOL(netdev_rss_key_fill); + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { diff --git a/net/core/iovec.c b/net/core/iovec.c index e1ec45ab1e63..dcbe98b3726a 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -28,53 +28,6 @@ #include <net/sock.h> /* - * Verify iovec. The caller must ensure that the iovec is big enough - * to hold the message iovec. - * - * Save time not doing access_ok. copy_*_user will make this work - * in any case. - */ - -int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode) -{ - int size, ct, err; - - if (m->msg_name && m->msg_namelen) { - if (mode == VERIFY_READ) { - void __user *namep; - namep = (void __user __force *) m->msg_name; - err = move_addr_to_kernel(namep, m->msg_namelen, - address); - if (err < 0) - return err; - } - m->msg_name = address; - } else { - m->msg_name = NULL; - m->msg_namelen = 0; - } - - size = m->msg_iovlen * sizeof(struct iovec); - if (copy_from_user(iov, (void __user __force *) m->msg_iov, size)) - return -EFAULT; - - m->msg_iov = iov; - err = 0; - - for (ct = 0; ct < m->msg_iovlen; ct++) { - size_t len = iov[ct].iov_len; - - if (len > INT_MAX - err) { - len = INT_MAX - err; - iov[ct].iov_len = len; - } - err += len; - } - - return err; -} - -/* * And now for the all-in-one: copy and checksum from a user iovec * directly to a datagram * Calls to csum_partial but the last must be in 32 bit chunks diff --git a/net/core/link_watch.c b/net/core/link_watch.c index bd0767e6b2b3..49a9e3e06c08 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -21,7 +21,7 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/bitops.h> -#include <asm/types.h> +#include <linux/types.h> enum lw_bits { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ef31fef25e5a..8e38f17288d3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -56,7 +56,6 @@ static void __neigh_notify(struct neighbour *n, int type, int flags); static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); -static struct neigh_table *neigh_tables; #ifdef CONFIG_PROC_FS static const struct file_operations neigh_stat_seq_fops; #endif @@ -87,13 +86,8 @@ static const struct file_operations neigh_stat_seq_fops; the most complicated procedure, which we allow is dev->hard_header. It is supposed, that dev->hard_header is simplistic and does not make callbacks to neighbour tables. - - The last lock is neigh_tbl_lock. It is pure SMP lock, protecting - list of neighbour tables. This list is used only in process context, */ -static DEFINE_RWLOCK(neigh_tbl_lock); - static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) { kfree_skb(skb); @@ -773,7 +767,7 @@ static void neigh_periodic_work(struct work_struct *work) if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; tbl->last_rand = jiffies; - for (p = &tbl->parms; p; p = p->next) + list_for_each_entry(p, &tbl->parms_list, list) p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } @@ -1446,7 +1440,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, { struct neigh_parms *p; - for (p = &tbl->parms; p; p = p->next) { + list_for_each_entry(p, &tbl->parms_list, list) { if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || (!p->dev && !ifindex && net_eq(net, &init_net))) return p; @@ -1481,8 +1475,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, } write_lock_bh(&tbl->lock); - p->next = tbl->parms.next; - tbl->parms.next = p; + list_add(&p->list, &tbl->parms.list); write_unlock_bh(&tbl->lock); neigh_parms_data_state_cleanall(p); @@ -1501,24 +1494,15 @@ static void neigh_rcu_free_parms(struct rcu_head *head) void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) { - struct neigh_parms **p; - if (!parms || parms == &tbl->parms) return; write_lock_bh(&tbl->lock); - for (p = &tbl->parms.next; *p; p = &(*p)->next) { - if (*p == parms) { - *p = parms->next; - parms->dead = 1; - write_unlock_bh(&tbl->lock); - if (parms->dev) - dev_put(parms->dev); - call_rcu(&parms->rcu_head, neigh_rcu_free_parms); - return; - } - } + list_del(&parms->list); + parms->dead = 1; write_unlock_bh(&tbl->lock); - neigh_dbg(1, "%s: not found\n", __func__); + if (parms->dev) + dev_put(parms->dev); + call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); @@ -1530,11 +1514,15 @@ static void neigh_parms_destroy(struct neigh_parms *parms) static struct lock_class_key neigh_table_proxy_queue_class; -static void neigh_table_init_no_netlink(struct neigh_table *tbl) +static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; + +void neigh_table_init(int index, struct neigh_table *tbl) { unsigned long now = jiffies; unsigned long phsize; + INIT_LIST_HEAD(&tbl->parms_list); + list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = @@ -1574,34 +1562,14 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time * 20; -} - -void neigh_table_init(struct neigh_table *tbl) -{ - struct neigh_table *tmp; - - neigh_table_init_no_netlink(tbl); - write_lock(&neigh_tbl_lock); - for (tmp = neigh_tables; tmp; tmp = tmp->next) { - if (tmp->family == tbl->family) - break; - } - tbl->next = neigh_tables; - neigh_tables = tbl; - write_unlock(&neigh_tbl_lock); - if (unlikely(tmp)) { - pr_err("Registering multiple tables for family %d\n", - tbl->family); - dump_stack(); - } + neigh_tables[index] = tbl; } EXPORT_SYMBOL(neigh_table_init); -int neigh_table_clear(struct neigh_table *tbl) +int neigh_table_clear(int index, struct neigh_table *tbl) { - struct neigh_table **tp; - + neigh_tables[index] = NULL; /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); @@ -1609,14 +1577,6 @@ int neigh_table_clear(struct neigh_table *tbl) neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); - write_lock(&neigh_tbl_lock); - for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { - if (*tp == tbl) { - *tp = tbl->next; - break; - } - } - write_unlock(&neigh_tbl_lock); call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, neigh_hash_free_rcu); @@ -1634,12 +1594,32 @@ int neigh_table_clear(struct neigh_table *tbl) } EXPORT_SYMBOL(neigh_table_clear); +static struct neigh_table *neigh_find_table(int family) +{ + struct neigh_table *tbl = NULL; + + switch (family) { + case AF_INET: + tbl = neigh_tables[NEIGH_ARP_TABLE]; + break; + case AF_INET6: + tbl = neigh_tables[NEIGH_ND_TABLE]; + break; + case AF_DECnet: + tbl = neigh_tables[NEIGH_DN_TABLE]; + break; + } + + return tbl; +} + static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; + struct neighbour *neigh; struct net_device *dev = NULL; int err = -EINVAL; @@ -1660,39 +1640,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) } } - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct neighbour *neigh; + tbl = neigh_find_table(ndm->ndm_family); + if (tbl == NULL) + return -EAFNOSUPPORT; - if (tbl->family != ndm->ndm_family) - continue; - read_unlock(&neigh_tbl_lock); - - if (nla_len(dst_attr) < tbl->key_len) - goto out; - - if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); - goto out; - } + if (nla_len(dst_attr) < tbl->key_len) + goto out; - if (dev == NULL) - goto out; + if (ndm->ndm_flags & NTF_PROXY) { + err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); + goto out; + } - neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); - if (neigh == NULL) { - err = -ENOENT; - goto out; - } + if (dev == NULL) + goto out; - err = neigh_update(neigh, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_ADMIN); - neigh_release(neigh); + neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); + if (neigh == NULL) { + err = -ENOENT; goto out; } - read_unlock(&neigh_tbl_lock); - err = -EAFNOSUPPORT; + + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE | + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); out: return err; @@ -1700,11 +1672,14 @@ out: static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) { + int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; + struct neighbour *neigh; + void *dst, *lladdr; int err; ASSERT_RTNL(); @@ -1728,70 +1703,60 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables; tbl; tbl = tbl->next) { - int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; - struct neighbour *neigh; - void *dst, *lladdr; + tbl = neigh_find_table(ndm->ndm_family); + if (tbl == NULL) + return -EAFNOSUPPORT; - if (tbl->family != ndm->ndm_family) - continue; - read_unlock(&neigh_tbl_lock); + if (nla_len(tb[NDA_DST]) < tbl->key_len) + goto out; + dst = nla_data(tb[NDA_DST]); + lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; - if (nla_len(tb[NDA_DST]) < tbl->key_len) - goto out; - dst = nla_data(tb[NDA_DST]); - lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; + if (ndm->ndm_flags & NTF_PROXY) { + struct pneigh_entry *pn; - if (ndm->ndm_flags & NTF_PROXY) { - struct pneigh_entry *pn; + err = -ENOBUFS; + pn = pneigh_lookup(tbl, net, dst, dev, 1); + if (pn) { + pn->flags = ndm->ndm_flags; + err = 0; + } + goto out; + } - err = -ENOBUFS; - pn = pneigh_lookup(tbl, net, dst, dev, 1); - if (pn) { - pn->flags = ndm->ndm_flags; - err = 0; - } + if (dev == NULL) + goto out; + + neigh = neigh_lookup(tbl, dst, dev); + if (neigh == NULL) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; goto out; } - if (dev == NULL) + neigh = __neigh_lookup_errno(tbl, dst, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); + goto out; + } + } else { + if (nlh->nlmsg_flags & NLM_F_EXCL) { + err = -EEXIST; + neigh_release(neigh); goto out; - - neigh = neigh_lookup(tbl, dst, dev); - if (neigh == NULL) { - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - err = -ENOENT; - goto out; - } - - neigh = __neigh_lookup_errno(tbl, dst, dev); - if (IS_ERR(neigh)) { - err = PTR_ERR(neigh); - goto out; - } - } else { - if (nlh->nlmsg_flags & NLM_F_EXCL) { - err = -EEXIST; - neigh_release(neigh); - goto out; - } - - if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) - flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - if (ndm->ndm_flags & NTF_USE) { - neigh_event_send(neigh, NULL); - err = 0; - } else - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); - neigh_release(neigh); - goto out; + if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) + flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - read_unlock(&neigh_tbl_lock); - err = -EAFNOSUPPORT; + if (ndm->ndm_flags & NTF_USE) { + neigh_event_send(neigh, NULL); + err = 0; + } else + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + neigh_release(neigh); + out: return err; } @@ -1990,7 +1955,8 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; - int err; + bool found = false; + int err, tidx; err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, nl_neightbl_policy); @@ -2003,19 +1969,21 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) } ndtmsg = nlmsg_data(nlh); - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables; tbl; tbl = tbl->next) { + + for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { + tbl = neigh_tables[tidx]; + if (!tbl) + continue; if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) continue; - - if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) + if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { + found = true; break; + } } - if (tbl == NULL) { - err = -ENOENT; - goto errout_locked; - } + if (!found) + return -ENOENT; /* * We acquire tbl->lock to be nice to the periodic timers and @@ -2126,8 +2094,6 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) errout_tbl_lock: write_unlock_bh(&tbl->lock); -errout_locked: - read_unlock(&neigh_tbl_lock); errout: return err; } @@ -2142,10 +2108,13 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { + for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { struct neigh_parms *p; + tbl = neigh_tables[tidx]; + if (!tbl) + continue; + if (tidx < tbl_skip || (family && tbl->family != family)) continue; @@ -2154,7 +2123,9 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) NLM_F_MULTI) <= 0) break; - for (nidx = 0, p = tbl->parms.next; p; p = p->next) { + nidx = 0; + p = list_next_entry(&tbl->parms, list); + list_for_each_entry_from(p, &tbl->parms_list, list) { if (!net_eq(neigh_parms_net(p), net)) continue; @@ -2174,7 +2145,6 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) neigh_skip = 0; } out: - read_unlock(&neigh_tbl_lock); cb->args[0] = tidx; cb->args[1] = nidx; @@ -2357,7 +2327,6 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) int proxy = 0; int err; - read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; /* check for full ndmsg structure presence, family member is @@ -2369,8 +2338,11 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; - for (tbl = neigh_tables, t = 0; tbl; - tbl = tbl->next, t++) { + for (t = 0; t < NEIGH_NR_TABLES; t++) { + tbl = neigh_tables[t]; + + if (!tbl) + continue; if (t < s_t || (family && tbl->family != family)) continue; if (t > s_t) @@ -2383,7 +2355,6 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) break; } - read_unlock(&neigh_tbl_lock); cb->args[0] = t; return skb->len; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9dd06699b09c..1a24602cd54e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -325,6 +325,23 @@ static ssize_t tx_queue_len_store(struct device *dev, } NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong); +static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) +{ + dev->gro_flush_timeout = val; + return 0; +} + +static ssize_t gro_flush_timeout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + return netdev_store(dev, attr, buf, len, change_gro_flush_timeout); +} +NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); + static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -422,6 +439,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_mtu.attr, &dev_attr_flags.attr, &dev_attr_tx_queue_len.attr, + &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, NULL, }; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e6645b4f330a..e0ad5d16c9c5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -79,8 +79,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, if (vlan_tx_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) { - skb = __vlan_put_tag(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb = __vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) { /* This is actually a packet drop, but we * don't want the code that calls this @@ -88,7 +87,6 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, */ goto out; } - skb->vlan_tci = 0; } status = netdev_start_xmit(skb, dev, txq, false); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 443256bdcddc..da934fc3faa8 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3728,8 +3728,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Remove proc before if_list entry, because add_device uses * list to determine if interface already exist, avoid race * with proc_create_data() */ - if (pkt_dev->entry) - proc_remove(pkt_dev->entry); + proc_remove(pkt_dev->entry); /* And update the thread if_list */ _rem_dev_from_if_list(t, pkt_dev); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 32e31c299631..92116dfe827c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3002,7 +3002,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (nskb->len == len + doffset) goto perform_csum_check; - if (!sg) { + if (!sg && !nskb->remcsum_offload) { nskb->ip_summed = CHECKSUM_NONE; nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), @@ -3074,7 +3074,7 @@ skip_fraglist: nskb->truesize += nskb->data_len; perform_csum_check: - if (!csum) { + if (!csum && !nskb->remcsum_offload) { nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; @@ -3088,6 +3088,16 @@ perform_csum_check: * (see validate_xmit_skb_list() for example) */ segs->prev = tail; + + /* Following permits correct backpressure, for protocols + * using skb_set_owner_w(). + * Idea is to tranfert ownership from head_skb to last segment. + */ + if (head_skb->destructor == sock_wfree) { + swap(tail->truesize, head_skb->truesize); + swap(tail->destructor, head_skb->destructor); + swap(tail->sk, head_skb->sk); + } return segs; err: @@ -4130,6 +4140,113 @@ err_free: } EXPORT_SYMBOL(skb_vlan_untag); +int skb_ensure_writable(struct sk_buff *skb, int write_len) +{ + if (!pskb_may_pull(skb, write_len)) + return -ENOMEM; + + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) + return 0; + + return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} +EXPORT_SYMBOL(skb_ensure_writable); + +/* remove VLAN header from packet and update csum accordingly. */ +static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) +{ + struct vlan_hdr *vhdr; + unsigned int offset = skb->data - skb_mac_header(skb); + int err; + + __skb_push(skb, offset); + err = skb_ensure_writable(skb, VLAN_ETH_HLEN); + if (unlikely(err)) + goto pull; + + skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); + + vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); + *vlan_tci = ntohs(vhdr->h_vlan_TCI); + + memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); + __skb_pull(skb, VLAN_HLEN); + + vlan_set_encap_proto(skb, vhdr); + skb->mac_header += VLAN_HLEN; + + if (skb_network_offset(skb) < ETH_HLEN) + skb_set_network_header(skb, ETH_HLEN); + + skb_reset_mac_len(skb); +pull: + __skb_pull(skb, offset); + + return err; +} + +int skb_vlan_pop(struct sk_buff *skb) +{ + u16 vlan_tci; + __be16 vlan_proto; + int err; + + if (likely(vlan_tx_tag_present(skb))) { + skb->vlan_tci = 0; + } else { + if (unlikely((skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD)) || + skb->len < VLAN_ETH_HLEN)) + return 0; + + err = __skb_vlan_pop(skb, &vlan_tci); + if (err) + return err; + } + /* move next vlan tag to hw accel tag */ + if (likely((skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD)) || + skb->len < VLAN_ETH_HLEN)) + return 0; + + vlan_proto = skb->protocol; + err = __skb_vlan_pop(skb, &vlan_tci); + if (unlikely(err)) + return err; + + __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); + return 0; +} +EXPORT_SYMBOL(skb_vlan_pop); + +int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) +{ + if (vlan_tx_tag_present(skb)) { + unsigned int offset = skb->data - skb_mac_header(skb); + int err; + + /* __vlan_insert_tag expect skb->data pointing to mac header. + * So change skb->data before calling it and change back to + * original position later + */ + __skb_push(skb, offset); + err = __vlan_insert_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (err) + return err; + skb->protocol = skb->vlan_proto; + skb->mac_len += VLAN_HLEN; + __skb_pull(skb, offset); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(skb->data + + (2 * ETH_ALEN), VLAN_HLEN, 0)); + } + __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); + return 0; +} +EXPORT_SYMBOL(skb_vlan_push); + /** * alloc_skb_with_frags - allocate skb with page frags * diff --git a/net/core/sock.c b/net/core/sock.c index 15e0c67b1069..0725cf0cb685 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1213,6 +1213,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_max_pacing_rate; break; + case SO_INCOMING_CPU: + v.val = sk->sk_incoming_cpu; + break; + default: return -ENOPROTOOPT; } @@ -1517,6 +1521,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; + newsk->sk_incoming_cpu = raw_smp_processor_id(); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -2457,7 +2462,7 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cf9cd13509a7..31baba2a71ce 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -26,6 +26,8 @@ static int zero = 0; static int one = 1; static int ushort_max = USHRT_MAX; +static int net_msg_warn; /* Unused, but still a sysctl */ + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -215,6 +217,18 @@ static int set_default_qdisc(struct ctl_table *table, int write, } #endif +static int proc_do_rss_key(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + char buf[NETDEV_RSS_KEY_LEN * 3]; + + snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); + fake_table.data = buf; + fake_table.maxlen = sizeof(buf); + return proc_dostring(&fake_table, write, buffer, lenp, ppos); +} + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -263,6 +277,13 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "netdev_rss_key", + .data = &netdev_rss_key, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_do_rss_key, + }, #ifdef CONFIG_BPF_JIT { .procname = "bpf_jit_enable", diff --git a/net/core/utils.c b/net/core/utils.c index efc76dd9dcd1..7b803884c162 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -33,9 +33,6 @@ #include <asm/byteorder.h> #include <asm/uaccess.h> -int net_msg_warn __read_mostly = 1; -EXPORT_SYMBOL(net_msg_warn); - DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. |