summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-04 17:31:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-04 17:31:39 -0800
commit8d70eeb84ab277377c017af6a21d0a337025dede (patch)
treed6e8a80e1d5e953ab37eef0c7468c77e7735779b /net/core
parent2d62e0768d3c28536d4cfe4c40ba1e5e8e442a93 (diff)
parentf78ef7cd9a0686b979679d0de061c6dbfd8d649e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Fix double-free in batman-adv, from Sven Eckelmann. 2) Fix packet stats for fast-RX path, from Joannes Berg. 3) Netfilter's ip_route_me_harder() doesn't handle request sockets properly, fix from Florian Westphal. 4) Fix sendmsg deadlock in rxrpc, from David Howells. 5) Add missing RCU locking to transport hashtable scan, from Xin Long. 6) Fix potential packet loss in mlxsw driver, from Ido Schimmel. 7) Fix race in NAPI handling between poll handlers and busy polling, from Eric Dumazet. 8) TX path in vxlan and geneve need proper RCU locking, from Jakub Kicinski. 9) SYN processing in DCCP and TCP need to disable BH, from Eric Dumazet. 10) Properly handle net_enable_timestamp() being invoked from IRQ context, also from Eric Dumazet. 11) Fix crash on device-tree systems in xgene driver, from Alban Bedel. 12) Do not call sk_free() on a locked socket, from Arnaldo Carvalho de Melo. 13) Fix use-after-free in netvsc driver, from Dexuan Cui. 14) Fix max MTU setting in bonding driver, from WANG Cong. 15) xen-netback hash table can be allocated from softirq context, so use GFP_ATOMIC. From Anoob Soman. 16) Fix MAC address change bug in bgmac driver, from Hari Vyas. 17) strparser needs to destroy strp_wq on module exit, from WANG Cong. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (69 commits) strparser: destroy workqueue on module exit sfc: fix IPID endianness in TSOv2 sfc: avoid max() in array size rds: remove unnecessary returned value check rxrpc: Fix potential NULL-pointer exception nfp: correct DMA direction in XDP DMA sync nfp: don't tell FW about the reserved buffer space net: ethernet: bgmac: mac address change bug net: ethernet: bgmac: init sequence bug xen-netback: don't vfree() queues under spinlock xen-netback: keep a local pointer for vif in backend_disconnect() netfilter: nf_tables: don't call nfnetlink_set_err() if nfnetlink_send() fails netfilter: nft_set_rbtree: incorrect assumption on lower interval lookups netfilter: nf_conntrack_sip: fix wrong memory initialisation can: flexcan: fix typo in comment can: usb_8dev: Fix memory leak of priv->cmd_msg_buffer can: gs_usb: fix coding style can: gs_usb: Don't use stack memory for USB transfers ixgbe: Limit use of 2K buffers on architectures with 256B or larger cache lines ixgbe: update the rss key on h/w, when ethtool ask for it ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c111
-rw-r--r--net/core/sock.c16
2 files changed, 114 insertions, 13 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 304f2deae5f9..8637b2b71f3d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1698,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
+static atomic_t netstamp_wanted;
static void netstamp_clear(struct work_struct *work)
{
int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+ int wanted;
- while (deferred--)
- static_key_slow_dec(&netstamp_needed);
+ wanted = atomic_add_return(deferred, &netstamp_wanted);
+ if (wanted > 0)
+ static_key_enable(&netstamp_needed);
+ else
+ static_key_disable(&netstamp_needed);
}
static DECLARE_WORK(netstamp_work, netstamp_clear);
#endif
void net_enable_timestamp(void)
{
+#ifdef HAVE_JUMP_LABEL
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 0)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
+ return;
+ }
+ atomic_inc(&netstamp_needed_deferred);
+ schedule_work(&netstamp_work);
+#else
static_key_slow_inc(&netstamp_needed);
+#endif
}
EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
#ifdef HAVE_JUMP_LABEL
- /* net_disable_timestamp() can be called from non process context */
- atomic_inc(&netstamp_needed_deferred);
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 1)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
+ return;
+ }
+ atomic_dec(&netstamp_needed_deferred);
schedule_work(&netstamp_work);
#else
static_key_slow_dec(&netstamp_needed);
@@ -4884,6 +4911,39 @@ void __napi_schedule(struct napi_struct *n)
EXPORT_SYMBOL(__napi_schedule);
/**
+ * napi_schedule_prep - check if napi can be scheduled
+ * @n: napi context
+ *
+ * Test if NAPI routine is already running, and if not mark
+ * it as running. This is used as a condition variable
+ * insure only one NAPI poll instance runs. We also make
+ * sure there is no pending NAPI disable.
+ */
+bool napi_schedule_prep(struct napi_struct *n)
+{
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ if (unlikely(val & NAPIF_STATE_DISABLE))
+ return false;
+ new = val | NAPIF_STATE_SCHED;
+
+ /* Sets STATE_MISSED bit if STATE_SCHED was already set
+ * This was suggested by Alexander Duyck, as compiler
+ * emits better code than :
+ * if (val & NAPIF_STATE_SCHED)
+ * new |= NAPIF_STATE_MISSED;
+ */
+ new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
+ NAPIF_STATE_MISSED;
+ } while (cmpxchg(&n->state, val, new) != val);
+
+ return !(val & NAPIF_STATE_SCHED);
+}
+EXPORT_SYMBOL(napi_schedule_prep);
+
+/**
* __napi_schedule_irqoff - schedule for receive
* @n: entry to schedule
*
@@ -4897,7 +4957,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
bool napi_complete_done(struct napi_struct *n, int work_done)
{
- unsigned long flags;
+ unsigned long flags, val, new;
/*
* 1) Don't let napi dequeue from the cpu poll list
@@ -4927,7 +4987,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
list_del_init(&n->poll_list);
local_irq_restore(flags);
}
- WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+
+ do {
+ val = READ_ONCE(n->state);
+
+ WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
+
+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+
+ /* If STATE_MISSED was set, leave STATE_SCHED set,
+ * because we will call napi->poll() one more time.
+ * This C code was suggested by Alexander Duyck to help gcc.
+ */
+ new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
+ NAPIF_STATE_SCHED;
+ } while (cmpxchg(&n->state, val, new) != val);
+
+ if (unlikely(val & NAPIF_STATE_MISSED)) {
+ __napi_schedule(n);
+ return false;
+ }
+
return true;
}
EXPORT_SYMBOL(napi_complete_done);
@@ -4953,6 +5033,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
{
int rc;
+ /* Busy polling means there is a high chance device driver hard irq
+ * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
+ * set in napi_schedule_prep().
+ * Since we are about to call napi->poll() once more, we can safely
+ * clear NAPI_STATE_MISSED.
+ *
+ * Note: x86 could use a single "lock and ..." instruction
+ * to perform these two clear_bit()
+ */
+ clear_bit(NAPI_STATE_MISSED, &napi->state);
clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
local_bh_disable();
@@ -5088,8 +5178,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
struct napi_struct *napi;
napi = container_of(timer, struct napi_struct, timer);
- if (napi->gro_list)
- napi_schedule_irqoff(napi);
+
+ /* Note : we use a relaxed variant of napi_schedule_prep() not setting
+ * NAPI_STATE_MISSED, since we do not react to a device IRQ.
+ */
+ if (napi->gro_list && !napi_disable_pending(napi) &&
+ !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
+ __napi_schedule_irqoff(napi);
return HRTIMER_NORESTART;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index e7d74940e863..f6fd79f33097 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1539,11 +1539,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
is_charged = sk_filter_charge(newsk, filter);
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- newsk->sk_destruct = NULL;
- bh_unlock_sock(newsk);
- sk_free(newsk);
+ sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}
@@ -1592,6 +1588,16 @@ out:
}
EXPORT_SYMBOL_GPL(sk_clone_lock);
+void sk_free_unlock_clone(struct sock *sk)
+{
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ sk->sk_destruct = NULL;
+ bh_unlock_sock(sk);
+ sk_free(sk);
+}
+EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;