From 3aa569c3fedbd0d16041d08bf6e89b8c43aee650 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 18 Jul 2012 12:42:58 +0200
Subject: mac80211: fix scan_sdata assignment

We need to use RCU to assign scan_sdata.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 169da0742c81..7644181cb6b4 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -304,7 +304,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
 	if (local->scan_req != local->int_scan_req)
 		cfg80211_scan_done(local->scan_req, aborted);
 	local->scan_req = NULL;
-	local->scan_sdata = NULL;
+	rcu_assign_pointer(local->scan_sdata, NULL);
 
 	local->scanning = 0;
 	local->scan_channel = NULL;
-- 
cgit 


From 5e31fc0815a4e2c72b1b495fe7a0d8f9bfb9e4b4 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 24 Jul 2012 08:35:39 +0200
Subject: wireless: reg: restore previous behaviour of chan->max_power
 calculations

commit eccc068e8e84c8fe997115629925e0422a98e4de
Author: Hong Wu <Hong.Wu@dspg.com>
Date:   Wed Jan 11 20:33:39 2012 +0200

    wireless: Save original maximum regulatory transmission power for the calucation of the local maximum transmit pow

changed the way we calculate chan->max_power as min(chan->max_power,
chan->max_reg_power). That broke rt2x00 (and perhaps some other
drivers) that do not set chan->max_power. It is not so easy to fix this
problem correctly in rt2x00.

According to commit eccc068e8 changelog, change claim only to save
maximum regulatory power - changing setting of chan->max_power was side
effect. This patch restore previous calculations of chan->max_power and
do not touch chan->max_reg_power.

Cc: stable@vger.kernel.org # 3.4+
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index baf5704740ee..460af03d8149 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -891,7 +891,21 @@ static void handle_channel(struct wiphy *wiphy,
 	chan->max_antenna_gain = min(chan->orig_mag,
 		(int) MBI_TO_DBI(power_rule->max_antenna_gain));
 	chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp);
-	chan->max_power = min(chan->max_power, chan->max_reg_power);
+	if (chan->orig_mpwr) {
+		/*
+		 * Devices that have their own custom regulatory domain
+		 * but also use WIPHY_FLAG_STRICT_REGULATORY will follow the
+		 * passed country IE power settings.
+		 */
+		if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+		    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY &&
+		    wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY)
+			chan->max_power = chan->max_reg_power;
+		else
+			chan->max_power = min(chan->orig_mpwr,
+					      chan->max_reg_power);
+	} else
+		chan->max_power = chan->max_reg_power;
 }
 
 static void handle_band(struct wiphy *wiphy,
-- 
cgit 


From ba846a502c6b3c0ff047861c891fd1afeed6e435 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Sun, 29 Jul 2012 16:25:10 +0300
Subject: mac80211: don't clear sched_scan_sdata on sched scan stop request

ieee80211_request_sched_scan_stop() cleared
local->sched_scan_sdata. However, sched_scan_sdata
should be cleared only after the driver calls
ieee80211_sched_scan_stopped() (like with normal hw scan).

Clearing sched_scan_sdata too early caused
ieee80211_sched_scan_stopped_work to exit prematurely
without properly cleaning all the sched scan resources
and without calling cfg80211_sched_scan_stopped (so
userspace wasn't notified about sched scan completion).

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/scan.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index df36280ed78f..839dd9737989 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -984,7 +984,6 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
 			kfree(local->sched_scan_ies.ie[i]);
 
 		drv_sched_scan_stop(local, sdata);
-		rcu_assign_pointer(local->sched_scan_sdata, NULL);
 	}
 out:
 	mutex_unlock(&local->mtx);
-- 
cgit 


From 2d9957cce674308f744f37f68b6bc3261bfdbbf4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 1 Aug 2012 20:54:52 +0200
Subject: mac80211: clear timer bits when disconnecting

There's a corner case that can happen when we
suspend with a timer running, then resume and
disconnect. If we connect again, suspend and
resume we might start timers that shouldn't be
running. Reset the timer flags to avoid this.

This affects both mesh and managed modes.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh.c | 2 ++
 net/mac80211/mlme.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6fac18c0423f..d60b3d362e00 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -634,6 +634,8 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	local->fif_other_bss--;
 	atomic_dec(&local->iff_allmultis);
 	ieee80211_configure_filter(local);
+
+	sdata->u.mesh.timers_running = 0;
 }
 
 static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cef0c9e79aba..a4a5acdbaa4d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1430,6 +1430,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
 	del_timer_sync(&sdata->u.mgd.timer);
 	del_timer_sync(&sdata->u.mgd.chswitch_timer);
+
+	sdata->u.mgd.timers_running = 0;
 }
 
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
-- 
cgit 


From dd4c9260e7f23f2e951cbfb2726e468c6d30306c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 1 Aug 2012 21:03:21 +0200
Subject: mac80211: cancel mesh path timer

The mesh path timer needs to be canceled when
leaving the mesh as otherwise it could fire
after the interface has been removed already.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index d60b3d362e00..85572353a7e3 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -622,6 +622,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
+	del_timer_sync(&sdata->u.mesh.mesh_path_timer);
 	/*
 	 * If the timer fired while we waited for it, it will have
 	 * requeued the work. Now the work will be running again
-- 
cgit 


From 30b678d844af3305cda5953467005cebb5d7b687 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 30 Jul 2012 15:57:00 +0000
Subject: net: Allow driver to limit number of GSO segments per skb

A peer (or local user) may cause TCP to use a nominal MSS of as little
as 88 (actual MSS of 76 with timestamps).  Given that we have a
sufficiently prodigious local sender and the peer ACKs quickly enough,
it is nevertheless possible to grow the window for such a connection
to the point that we will try to send just under 64K at once.  This
results in a single skb that expands to 861 segments.

In some drivers with TSO support, such an skb will require hundreds of
DMA descriptors; a substantial fraction of a TX ring or even more than
a full ring.  The TX queue selected for the skb may stall and trigger
the TX watchdog repeatedly (since the problem skb will be retried
after the TX reset).  This particularly affects sfc, for which the
issue is designated as CVE-2012-3412.

Therefore:
1. Add the field net_device::gso_max_segs holding the device-specific
   limit.
2. In netif_skb_features(), if the number of segments is too high then
   mask out GSO features to force fall back to software GSO.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0cb3fe8d8e72..f91abf800161 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2134,6 +2134,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	__be16 protocol = skb->protocol;
 	netdev_features_t features = skb->dev->features;
 
+	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+		features &= ~NETIF_F_GSO_MASK;
+
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
@@ -5986,6 +5989,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev_net_set(dev, &init_net);
 
 	dev->gso_max_size = GSO_MAX_SIZE;
+	dev->gso_max_segs = GSO_MAX_SEGS;
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
-- 
cgit 


From 1485348d2424e1131ea42efc033cbd9366462b01 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 30 Jul 2012 16:11:42 +0000
Subject: tcp: Apply device TSO segment limit earlier

Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to
limit the size of TSO skbs.  This avoids the need to fall back to
software GSO for local TCP senders.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c       |  1 +
 net/ipv4/tcp.c        |  4 +++-
 net/ipv4/tcp_cong.c   |  3 ++-
 net/ipv4/tcp_output.c | 21 ++++++++++++---------
 4 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 6b654b3ddfda..8f67ced8d6a8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1458,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 		} else {
 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
 			sk->sk_gso_max_size = dst->dev->gso_max_size;
+			sk->sk_gso_max_segs = dst->dev->gso_max_segs;
 		}
 	}
 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e7e6eeae49c0..2109ff4a1daf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 			   old_size_goal + mss_now > xmit_size_goal)) {
 			xmit_size_goal = old_size_goal;
 		} else {
-			tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+			tp->xmit_size_goal_segs =
+				min_t(u16, xmit_size_goal / mss_now,
+				      sk->sk_gso_max_segs);
 			xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
 		}
 	}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4d4db16e336e..1432cdb0644c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
 	left = tp->snd_cwnd - in_flight;
 	if (sk_can_gso(sk) &&
 	    left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
-	    left * tp->mss_cache < sk->sk_gso_max_size)
+	    left * tp->mss_cache < sk->sk_gso_max_size &&
+	    left < sk->sk_gso_max_segs)
 		return true;
 	return left <= tcp_max_tso_deferred_mss(tp);
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3f1bcff0b10b..a7b3ec9b6c3e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1522,21 +1522,21 @@ static void tcp_cwnd_validate(struct sock *sk)
  * when we would be allowed to send the split-due-to-Nagle skb fully.
  */
 static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
-					unsigned int mss_now, unsigned int cwnd)
+					unsigned int mss_now, unsigned int max_segs)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	u32 needed, window, cwnd_len;
+	u32 needed, window, max_len;
 
 	window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
-	cwnd_len = mss_now * cwnd;
+	max_len = mss_now * max_segs;
 
-	if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
-		return cwnd_len;
+	if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+		return max_len;
 
 	needed = min(skb->len, window);
 
-	if (cwnd_len <= needed)
-		return cwnd_len;
+	if (max_len <= needed)
+		return max_len;
 
 	return needed - needed % mss_now;
 }
@@ -1765,7 +1765,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	limit = min(send_win, cong_win);
 
 	/* If a full-sized TSO skb can be sent, do it. */
-	if (limit >= sk->sk_gso_max_size)
+	if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+			   sk->sk_gso_max_segs * tp->mss_cache))
 		goto send_now;
 
 	/* Middle in queue won't get any more data, full sendable already? */
@@ -1999,7 +2000,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		limit = mss_now;
 		if (tso_segs > 1 && !tcp_urg_mode(tp))
 			limit = tcp_mss_split_point(sk, skb, mss_now,
-						    cwnd_quota);
+						    min_t(unsigned int,
+							  cwnd_quota,
+							  sk->sk_gso_max_segs));
 
 		if (skb->len > limit &&
 		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
-- 
cgit 


From e3c0d04750751389d5116267f8cf4687444d9a50 Mon Sep 17 00:00:00 2001
From: Fan Du <fdu@windriver.com>
Date: Mon, 30 Jul 2012 21:43:54 +0000
Subject: Fix unexpected SA hard expiration after changing date

After SA is setup, one timer is armed to detect soft/hard expiration,
however the timer handler uses xtime to do the math. This makes hard
expiration occurs first before soft expiration after setting new date
with big interval. As a result new child SA is deleted before rekeying
the new one.

Signed-off-by: Fan Du <fdu@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5b228f97d4b3..87cd0e4d4282 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
 	if (x->lft.hard_add_expires_seconds) {
 		long tmo = x->lft.hard_add_expires_seconds +
 			x->curlft.add_time - now;
-		if (tmo <= 0)
-			goto expired;
+		if (tmo <= 0) {
+			if (x->xflags & XFRM_SOFT_EXPIRE) {
+				/* enter hard expire without soft expire first?!
+				 * setting a new date could trigger this.
+				 * workarbound: fix x->curflt.add_time by below:
+				 */
+				x->curlft.add_time = now - x->saved_tmo - 1;
+				tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
+			} else
+				goto expired;
+		}
 		if (tmo < next)
 			next = tmo;
 	}
@@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
 	if (x->lft.soft_add_expires_seconds) {
 		long tmo = x->lft.soft_add_expires_seconds +
 			x->curlft.add_time - now;
-		if (tmo <= 0)
+		if (tmo <= 0) {
 			warn = 1;
-		else if (tmo < next)
+			x->xflags &= ~XFRM_SOFT_EXPIRE;
+		} else if (tmo < next) {
 			next = tmo;
+			x->xflags |= XFRM_SOFT_EXPIRE;
+			x->saved_tmo = tmo;
+		}
 	}
 	if (x->lft.soft_use_expires_seconds) {
 		long tmo = x->lft.soft_use_expires_seconds +
-- 
cgit 


From e33cdac014d50dd9753e1399ae8b0b5cd98d7aa0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 1 Aug 2012 23:23:40 +0000
Subject: ipv4: route.c cleanup

Remove unused includes after IP cache removal

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c035251beb07..e4ba974f143c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,7 +70,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -80,7 +79,6 @@
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
-#include <linux/workqueue.h>
 #include <linux/skbuff.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
@@ -88,11 +86,9 @@
 #include <linux/mroute.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/random.h>
-#include <linux/jhash.h>
 #include <linux/rcupdate.h>
 #include <linux/times.h>
 #include <linux/slab.h>
-#include <linux/prefetch.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
-- 
cgit 


From 03f6b0843ad6512f27bc2e48f04c21065311e03e Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 1 Aug 2012 15:58:42 -0500
Subject: cfg80211: add channel flag to prohibit OFDM operation

Currently the only way for wireless drivers to tell whether or not OFDM
is allowed on the current channel is to check the regulatory
information. However, this requires hodling cfg80211_mutex, which is not
visible to the drivers.

Other regulatory restrictions are provided as flags in the channel
definition, so let's do similarly with OFDM. This patch adds a new flag,
IEEE80211_CHAN_NO_OFDM, to tell drivers that OFDM on a channel is not
allowed. This flag is set on any channels for which regulatory indicates
that OFDM is prohibited.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Tested-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index a9175fedeb59..cbf30de79c69 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -680,6 +680,8 @@ static u32 map_regdom_flags(u32 rd_flags)
 		channel_flags |= IEEE80211_CHAN_NO_IBSS;
 	if (rd_flags & NL80211_RRF_DFS)
 		channel_flags |= IEEE80211_CHAN_RADAR;
+	if (rd_flags & NL80211_RRF_NO_OFDM)
+		channel_flags |= IEEE80211_CHAN_NO_OFDM;
 	return channel_flags;
 }
 
-- 
cgit 


From 899852af60c212bfe9a2fb71d4d9082d2622df5c Mon Sep 17 00:00:00 2001
From: Paul Stewart <pstew@chromium.org>
Date: Wed, 1 Aug 2012 16:54:42 -0700
Subject: cfg80211: Clear "beacon_found" on regulatory restore

Restore the default state to the "beacon_found" flag when
the channel flags are restored.  Otherwise, we can end up
with a channel that we can no longer transmit on even when
we can see beacons on that channel.

Signed-off-by: Paul Stewart <pstew@chromium.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index cbf30de79c69..2ded3c7fad06 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1901,6 +1901,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy)
 			chan->flags = chan->orig_flags;
 			chan->max_antenna_gain = chan->orig_mag;
 			chan->max_power = chan->orig_mpwr;
+			chan->beacon_found = false;
 		}
 	}
 }
-- 
cgit 


From 696ecdc10622d86541f2e35cc16e15b6b3b1b67e Mon Sep 17 00:00:00 2001
From: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Date: Fri, 3 Aug 2012 19:57:52 +0900
Subject: net_sched: gact: Fix potential panic in tcf_gact().

gact_rand array is accessed by gact->tcfg_ptype whose value
is assumed to less than MAX_RAND, but any range checks are
not performed.

So add a check in tcf_gact_init(). And in tcf_gact(), we can
reduce a branch.

Signed-off-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_gact.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index f10fb8256442..05d60859d8e3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 	struct tcf_common *pc;
 	int ret = 0;
 	int err;
+#ifdef CONFIG_GACT_PROB
+	struct tc_gact_p *p_parm = NULL;
+#endif
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 #ifndef CONFIG_GACT_PROB
 	if (tb[TCA_GACT_PROB] != NULL)
 		return -EOPNOTSUPP;
+#else
+	if (tb[TCA_GACT_PROB]) {
+		p_parm = nla_data(tb[TCA_GACT_PROB]);
+		if (p_parm->ptype >= MAX_RAND)
+			return -EINVAL;
+	}
 #endif
 
 	pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
@@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 	spin_lock_bh(&gact->tcf_lock);
 	gact->tcf_action = parm->action;
 #ifdef CONFIG_GACT_PROB
-	if (tb[TCA_GACT_PROB] != NULL) {
-		struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
+	if (p_parm) {
 		gact->tcfg_paction = p_parm->paction;
 		gact->tcfg_pval    = p_parm->pval;
 		gact->tcfg_ptype   = p_parm->ptype;
@@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_lock(&gact->tcf_lock);
 #ifdef CONFIG_GACT_PROB
-	if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+	if (gact->tcfg_ptype)
 		action = gact_rand[gact->tcfg_ptype](gact);
 	else
 		action = gact->tcf_action;
-- 
cgit 


From 269c4845d5b3627b95b1934107251bacbe99bb68 Mon Sep 17 00:00:00 2001
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Date: Fri, 15 Jun 2012 02:30:20 -0300
Subject: Bluetooth: Fix possible deadlock in SCO code

sco_chan_del() only has conn != NULL when called from sco_conn_del() so
just move the code from it that deal with conn to sco_conn_del().

[  120.765529]
[  120.765529] ======================================================
[  120.766529] [ INFO: possible circular locking dependency detected ]
[  120.766529] 3.5.0-rc1-10292-g3701f94-dirty #70 Tainted: G        W
[  120.766529] -------------------------------------------------------
[  120.766529] kworker/u:3/1497 is trying to acquire lock:
[  120.766529]  (&(&conn->lock)->rlock#2){+.+...}, at:
[<ffffffffa00b7ecc>] sco_chan_del+0x4c/0x170 [bluetooth]
[  120.766529]
[  120.766529] but task is already holding lock:
[  120.766529]  (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}, at:
[<ffffffffa00b8401>] sco_conn_del+0x61/0xe0 [bluetooth]
[  120.766529]
[  120.766529] which lock already depends on the new lock.
[  120.766529]
[  120.766529]
[  120.766529] the existing dependency chain (in reverse order) is:
[  120.766529]
[  120.766529] -> #1 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}:
[  120.766529]        [<ffffffff8107980e>] lock_acquire+0x8e/0xb0
[  120.766529]        [<ffffffff813c19e0>] _raw_spin_lock+0x40/0x80
[  120.766529]        [<ffffffffa00b85e9>] sco_connect_cfm+0x79/0x300
[bluetooth]
[  120.766529]        [<ffffffffa0094b13>]
hci_sync_conn_complete_evt.isra.90+0x343/0x400 [bluetooth]
[  120.766529]        [<ffffffffa009d447>] hci_event_packet+0x317/0xfb0
[bluetooth]
[  120.766529]        [<ffffffffa008aa68>] hci_rx_work+0x2c8/0x890
[bluetooth]
[  120.766529]        [<ffffffff81047db7>] process_one_work+0x197/0x460
[  120.766529]        [<ffffffff810489d6>] worker_thread+0x126/0x2d0
[  120.766529]        [<ffffffff8104ee4d>] kthread+0x9d/0xb0
[  120.766529]        [<ffffffff813c4294>] kernel_thread_helper+0x4/0x10
[  120.766529]
[  120.766529] -> #0 (&(&conn->lock)->rlock#2){+.+...}:
[  120.766529]        [<ffffffff81078a8a>] __lock_acquire+0x154a/0x1d30
[  120.766529]        [<ffffffff8107980e>] lock_acquire+0x8e/0xb0
[  120.766529]        [<ffffffff813c19e0>] _raw_spin_lock+0x40/0x80
[  120.766529]        [<ffffffffa00b7ecc>] sco_chan_del+0x4c/0x170
[bluetooth]
[  120.766529]        [<ffffffffa00b8414>] sco_conn_del+0x74/0xe0
[bluetooth]
[  120.766529]        [<ffffffffa00b88a2>] sco_disconn_cfm+0x32/0x60
[bluetooth]
[  120.766529]        [<ffffffffa0093a82>]
hci_disconn_complete_evt.isra.53+0x242/0x390 [bluetooth]
[  120.766529]        [<ffffffffa009d747>] hci_event_packet+0x617/0xfb0
[bluetooth]
[  120.766529]        [<ffffffffa008aa68>] hci_rx_work+0x2c8/0x890
[bluetooth]
[  120.766529]        [<ffffffff81047db7>] process_one_work+0x197/0x460
[  120.766529]        [<ffffffff810489d6>] worker_thread+0x126/0x2d0
[  120.766529]        [<ffffffff8104ee4d>] kthread+0x9d/0xb0
[  120.766529]        [<ffffffff813c4294>] kernel_thread_helper+0x4/0x10
[  120.766529]
[  120.766529] other info that might help us debug this:
[  120.766529]
[  120.766529]  Possible unsafe locking scenario:
[  120.766529]
[  120.766529]        CPU0                    CPU1
[  120.766529]        ----                    ----
[  120.766529]   lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
[  120.766529]
lock(&(&conn->lock)->rlock#2);
[  120.766529]
lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
[  120.766529]   lock(&(&conn->lock)->rlock#2);
[  120.766529]
[  120.766529]  *** DEADLOCK ***

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/sco.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 40bbe25dcff7..3589e21edb09 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err)
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
 		bh_unlock_sock(sk);
+
+		sco_conn_lock(conn);
+		conn->sk = NULL;
+		sco_pi(sk)->conn = NULL;
+		sco_conn_unlock(conn);
+
+		if (conn->hcon)
+			hci_conn_put(conn->hcon);
+
 		sco_sock_kill(sk);
 	}
 
@@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err)
 
 	BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
 
-	if (conn) {
-		sco_conn_lock(conn);
-		conn->sk = NULL;
-		sco_pi(sk)->conn = NULL;
-		sco_conn_unlock(conn);
-
-		if (conn->hcon)
-			hci_conn_put(conn->hcon);
-	}
-
 	sk->sk_state = BT_CLOSED;
 	sk->sk_err   = err;
 	sk->sk_state_change(sk);
-- 
cgit 


From a9ea3ed9b71cc3271dd59e76f65748adcaa76422 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@tieto.com>
Date: Thu, 19 Jul 2012 14:46:08 +0200
Subject: Bluetooth: Fix legacy pairing with some devices

Some devices e.g. some Android based phones don't do SDP search before
pairing and cancel legacy pairing when ACL is disconnected.

PIN Code Request event which changes ACL timeout to HCI_PAIRING_TIMEOUT
is only received after remote user entered PIN.

In that case no L2CAP is connected so default HCI_DISCONN_TIMEOUT
(2 seconds) is being used to timeout ACL connection. This results in
problems with legacy pairing as remote user has only few seconds to
enter PIN before ACL is disconnected.

Increase disconnect timeout for incomming connection to
HCI_PAIRING_TIMEOUT if SSP is disabled and no linkey exists.

To avoid keeping ACL alive for too long after SDP search set ACL
timeout back to HCI_DISCONN_TIMEOUT when L2CAP is connected.

2012-07-19 13:24:43.413521 < HCI Command: Create Connection (0x01|0x0005) plen 13
    bdaddr 00:02:72:D6:6A:3F ptype 0xcc18 rswitch 0x01 clkoffset 0x0000
    Packet type: DM1 DM3 DM5 DH1 DH3 DH5
2012-07-19 13:24:43.425224 > HCI Event: Command Status (0x0f) plen 4
    Create Connection (0x01|0x0005) status 0x00 ncmd 1
2012-07-19 13:24:43.885222 > HCI Event: Role Change (0x12) plen 8
    status 0x00 bdaddr 00:02:72:D6:6A:3F role 0x01
    Role: Slave
2012-07-19 13:24:44.054221 > HCI Event: Connect Complete (0x03) plen 11
    status 0x00 handle 42 bdaddr 00:02:72:D6:6A:3F type ACL encrypt 0x00
2012-07-19 13:24:44.054313 < HCI Command: Read Remote Supported Features (0x01|0x001b) plen 2
    handle 42
2012-07-19 13:24:44.055176 > HCI Event: Page Scan Repetition Mode Change (0x20) plen 7
    bdaddr 00:02:72:D6:6A:3F mode 0
2012-07-19 13:24:44.056217 > HCI Event: Max Slots Change (0x1b) plen 3
    handle 42 slots 5
2012-07-19 13:24:44.059218 > HCI Event: Command Status (0x0f) plen 4
    Read Remote Supported Features (0x01|0x001b) status 0x00 ncmd 0
2012-07-19 13:24:44.062192 > HCI Event: Command Status (0x0f) plen 4
    Unknown (0x00|0x0000) status 0x00 ncmd 1
2012-07-19 13:24:44.067219 > HCI Event: Read Remote Supported Features (0x0b) plen 11
    status 0x00 handle 42
    Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87
2012-07-19 13:24:44.067248 < HCI Command: Read Remote Extended Features (0x01|0x001c) plen 3
    handle 42 page 1
2012-07-19 13:24:44.071217 > HCI Event: Command Status (0x0f) plen 4
    Read Remote Extended Features (0x01|0x001c) status 0x00 ncmd 1
2012-07-19 13:24:44.076218 > HCI Event: Read Remote Extended Features (0x23) plen 13
    status 0x00 handle 42 page 1 max 1
    Features: 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00
2012-07-19 13:24:44.076249 < HCI Command: Remote Name Request (0x01|0x0019) plen 10
    bdaddr 00:02:72:D6:6A:3F mode 2 clkoffset 0x0000
2012-07-19 13:24:44.081218 > HCI Event: Command Status (0x0f) plen 4
    Remote Name Request (0x01|0x0019) status 0x00 ncmd 1
2012-07-19 13:24:44.105214 > HCI Event: Remote Name Req Complete (0x07) plen 255
    status 0x00 bdaddr 00:02:72:D6:6A:3F name 'uw000951-0'
2012-07-19 13:24:44.105284 < HCI Command: Authentication Requested (0x01|0x0011) plen 2
    handle 42
2012-07-19 13:24:44.111207 > HCI Event: Command Status (0x0f) plen 4
    Authentication Requested (0x01|0x0011) status 0x00 ncmd 1
2012-07-19 13:24:44.112220 > HCI Event: Link Key Request (0x17) plen 6
    bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:44.112249 < HCI Command: Link Key Request Negative Reply (0x01|0x000c) plen 6
    bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:44.115215 > HCI Event: Command Complete (0x0e) plen 10
    Link Key Request Negative Reply (0x01|0x000c) ncmd 1
    status 0x00 bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:44.116215 > HCI Event: PIN Code Request (0x16) plen 6
    bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:48.099184 > HCI Event: Auth Complete (0x06) plen 3
    status 0x13 handle 42
    Error: Remote User Terminated Connection
2012-07-19 13:24:48.179182 > HCI Event: Disconn Complete (0x05) plen 4
    status 0x00 handle 42 reason 0x13
    Reason: Remote User Terminated Connection

Cc: stable@vger.kernel.org
Signed-off-by: Szymon Janc <szymon.janc@tieto.com>
Acked-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c  | 7 ++++++-
 net/bluetooth/l2cap_core.c | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 41ff978a33f9..248632cec11d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1762,7 +1762,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (conn->type == ACL_LINK) {
 			conn->state = BT_CONFIG;
 			hci_conn_hold(conn);
-			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+
+			if (!conn->out && !hci_conn_ssp_enabled(conn) &&
+			    !hci_find_link_key(hdev, &ev->bdaddr))
+				conn->disc_timeout = HCI_PAIRING_TIMEOUT;
+			else
+				conn->disc_timeout = HCI_DISCONN_TIMEOUT;
 		} else
 			conn->state = BT_CONNECTED;
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a8964db04bfb..daa149b7003c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1181,6 +1181,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
 	sk = chan->sk;
 
 	hci_conn_hold(conn->hcon);
+	conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
 
 	bacpy(&bt_sk(sk)->src, conn->src);
 	bacpy(&bt_sk(sk)->dst, conn->dst);
-- 
cgit 


From c810089c27e48b816181b454fcc493d19fdbc2ba Mon Sep 17 00:00:00 2001
From: Ram Malovany <ramm@ti.com>
Date: Thu, 19 Jul 2012 10:26:09 +0300
Subject: Bluetooth: Fix using NULL inquiry entry

If entry wasn't found in the hci_inquiry_cache_lookup_resolve do not
resolve the name.This will fix a kernel crash when trying to use NULL
pointer.

Cc: stable@vger.kernel.org
Signed-off-by: Ram Malovany <ramm@ti.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 248632cec11d..b64cfa213bd6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1365,6 +1365,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev)
 		return false;
 
 	e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED);
+	if (!e)
+		return false;
+
 	if (hci_resolve_name(hdev, e) == 0) {
 		e->name_state = NAME_PENDING;
 		return true;
-- 
cgit 


From 7cc8380eb10347016d95bf6f9d842c2ae6d12932 Mon Sep 17 00:00:00 2001
From: Ram Malovany <ramm@ti.com>
Date: Thu, 19 Jul 2012 10:26:10 +0300
Subject: Bluetooth: Fix using a NULL inquiry cache entry

If the device was not found in a list of found devices names of which
are pending.This may happen in a case when HCI Remote Name Request
was sent as a part of incoming connection establishment procedure.
Hence there is no need to continue resolving a next name as it will
be done upon receiving another Remote Name Request Complete Event.
This will fix a kernel crash when trying to use this entry to resolve
the next name.

Cc: stable@vger.kernel.org
Signed-off-by: Ram Malovany <ramm@ti.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b64cfa213bd6..fe9a3d6d30b0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1396,12 +1396,18 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
 		return;
 
 	e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING);
-	if (e) {
+	/* If the device was not found in a list of found devices names of which
+	 * are pending. there is no need to continue resolving a next name as it
+	 * will be done upon receiving another Remote Name Request Complete
+	 * Event */
+	if (!e)
+		return;
+
+	list_del(&e->list);
+	if (name) {
 		e->name_state = NAME_KNOWN;
-		list_del(&e->list);
-		if (name)
-			mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
-					 e->data.rssi, name, name_len);
+		mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
+				 e->data.rssi, name, name_len);
 	}
 
 	if (hci_resolve_next_name(hdev))
-- 
cgit 


From c3e7c0d90b14a3e7ac091d24cef09efb516d587b Mon Sep 17 00:00:00 2001
From: Ram Malovany <ramm@ti.com>
Date: Thu, 19 Jul 2012 10:26:11 +0300
Subject: Bluetooth: Set name_state to unknown when entry name is empty

When the name of the given entry is empty , the state needs to be
updated accordingly.

Cc: stable@vger.kernel.org
Signed-off-by: Ram Malovany <ramm@ti.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index fe9a3d6d30b0..715d7e33fba0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1408,6 +1408,8 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
 		e->name_state = NAME_KNOWN;
 		mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
 				 e->data.rssi, name, name_len);
+	} else {
+		e->name_state = NAME_NOT_KNOWN;
 	}
 
 	if (hci_resolve_next_name(hdev))
-- 
cgit 


From d08fd0e712a834d4abb869c0215a702e290bc51e Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 19 Jul 2012 17:03:43 +0300
Subject: Bluetooth: smp: Fix possible NULL dereference

smp_chan_create might return NULL so we need to check before
dereferencing smp.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/smp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 16ef0dc85a0a..901a616c8083 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 
 	if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags))
 		smp = smp_chan_create(conn);
+	else
+		smp = conn->smp_chan;
 
-	smp = conn->smp_chan;
+	if (!smp)
+		return SMP_UNSPECIFIED;
 
 	smp->preq[0] = SMP_CMD_PAIRING_REQ;
 	memcpy(&smp->preq[1], req, sizeof(*req));
-- 
cgit 


From 49dfbb9129c4edb318578de35cc45c555df37884 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k@samsung.com>
Date: Thu, 19 Jul 2012 12:54:04 +0530
Subject: Bluetooth: Fix socket not getting freed if l2cap channel create fails

If l2cap_chan_create() fails then it will return from l2cap_sock_kill
since zapped flag of sk is reset.

Signed-off-by: Jaganath Kanakkassery <jaganath.k@samsung.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/l2cap_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a4bb27e8427e..b94abd30e6f9 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1174,7 +1174,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
 
 	chan = l2cap_chan_create();
 	if (!chan) {
-		l2cap_sock_kill(sk);
+		sk_free(sk);
 		return NULL;
 	}
 
-- 
cgit 


From 1f6fc43e621167492ed4b7f3b4269c584c3d6ccc Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@laptop.org>
Date: Thu, 2 Aug 2012 18:41:48 +0100
Subject: cfg80211: process pending events when unregistering net device

libertas currently calls cfg80211_disconnected() when it is being
brought down. This causes an event to be allocated, but since the
wdev is already removed from the rdev by the time that the event
processing work executes, the event is never processed or freed.
http://article.gmane.org/gmane.linux.kernel.wireless.general/95666

Fix this leak, and other possible situations, by processing the event
queue when a device is being unregistered. Thanks to Johannes Berg for
the suggestion.

Signed-off-by: Daniel Drake <dsd@laptop.org>
Cc: stable@vger.kernel.org
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 5 +++++
 net/wireless/core.h | 1 +
 net/wireless/util.c | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 31b40cc4a9c3..dcd64d5b07aa 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -952,6 +952,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		 */
 		synchronize_rcu();
 		INIT_LIST_HEAD(&wdev->list);
+		/*
+		 * Ensure that all events have been processed and
+		 * freed.
+		 */
+		cfg80211_process_wdev_events(wdev);
 		break;
 	case NETDEV_PRE_UP:
 		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5206c6844fd7..bc7430b54771 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev, enum nl80211_iftype ntype,
 			  u32 *flags, struct vif_params *params);
 void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
+void cfg80211_process_wdev_events(struct wireless_dev *wdev);
 
 int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 				 struct wireless_dev *wdev,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 26f8cd30f712..994e2f0cc7a8 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -735,7 +735,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 	wdev->connect_keys = NULL;
 }
 
-static void cfg80211_process_wdev_events(struct wireless_dev *wdev)
+void cfg80211_process_wdev_events(struct wireless_dev *wdev)
 {
 	struct cfg80211_event *ev;
 	unsigned long flags;
-- 
cgit 


From caa0bf648cd20a2efbb6558531711e9ce2c6e948 Mon Sep 17 00:00:00 2001
From: Marek Lindner <lindner_marek@yahoo.de>
Date: Sat, 4 Aug 2012 04:13:26 +0000
Subject: batman-adv: select an internet gateway if none was chosen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a regression introduced by: 2265c141086474bbae55a5bb3afa1ebb78ccaa7c
("batman-adv: gateway election code refactoring")

Reported-by: Nicolás Echániz <nicoechaniz@codigosur.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/batman-adv/gateway_client.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index b421cc49d2cd..fc866f2e4528 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -200,11 +200,11 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
 	if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
 		goto out;
 
-	if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect))
-		goto out;
-
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
+	if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect) && curr_gw)
+		goto out;
+
 	next_gw = batadv_gw_get_best_gw_node(bat_priv);
 
 	if (curr_gw == next_gw)
-- 
cgit 


From 8e7dfbc8d1ea9ca9058aa641a8fe795ebca320e2 Mon Sep 17 00:00:00 2001
From: Silviu-Mihai Popescu <silviupopescu1990@gmail.com>
Date: Sat, 4 Aug 2012 09:31:29 +0000
Subject: tcp_output: fix sparse warning for tcp_wfree

Fix sparse warning:
	* symbol 'tcp_wfree' was not declared. Should it be static?

Signed-off-by: Silviu-Mihai Popescu <silviupopescu1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a7b3ec9b6c3e..20dfd892c86f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -940,7 +940,7 @@ void __init tcp_tasklet_init(void)
  * We cant xmit new skbs from this context, as we might already
  * hold qdisc lock.
  */
-void tcp_wfree(struct sk_buff *skb)
+static void tcp_wfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
-- 
cgit 


From 91d27a8650d5359a7a320daeb35b88cdea15e3a8 Mon Sep 17 00:00:00 2001
From: Sorin Dumitru <sdumitru@ixiacom.com>
Date: Mon, 6 Aug 2012 02:35:58 +0000
Subject: llc: free the right skb

We are freeing skb instead of nskb, resulting in a double
free on skb and a leak from nskb.

Signed-off-by: Sorin Dumitru <sdumitru@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_station.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 39a8d8924b9c..6828e39ec2ec 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -268,7 +268,7 @@ static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
 out:
 	return rc;
 free:
-	kfree_skb(skb);
+	kfree_skb(nskb);
 	goto out;
 }
 
@@ -293,7 +293,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
 out:
 	return rc;
 free:
-	kfree_skb(skb);
+	kfree_skb(nskb);
 	goto out;
 }
 
@@ -322,7 +322,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
 out:
 	return rc;
 free:
-	kfree_skb(skb);
+	kfree_skb(nskb);
 	goto out;
 }
 
-- 
cgit 


From 9871f1ad677d95ffeca80e2c21b70af9bfc9cc91 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Mon, 6 Aug 2012 03:55:29 +0000
Subject: ip: fix error handling in ip_finish_output2()

__neigh_create() returns either a pointer to struct neighbour or PTR_ERR().
But the caller expects it to return either a pointer or NULL.  Replace
the NULL check with IS_ERR() check.

The bug was introduced in a263b3093641fb1ec377582c90986a7fd0625184
("ipv4: Make neigh lookups directly in output packet path.").

Signed-off-by: Vasily Kulikov <segoon@openwall.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ba39a52d18c1..76dde25fb9a0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
 	if (unlikely(!neigh))
 		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
-	if (neigh) {
+	if (!IS_ERR(neigh)) {
 		int res = dst_neigh_output(dst, neigh, skb);
 
 		rcu_read_unlock_bh();
-- 
cgit 


From 47fd92f5a76eb3f5b407773766e7d7fa1a179419 Mon Sep 17 00:00:00 2001
From: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Date: Mon, 6 Aug 2012 05:45:48 +0000
Subject: net_sched: act: Delete estimator in error path.

Some action modules free struct tcf_common in their error path
while estimator is still active. This results in est_timer()
dereference freed memory.
Add gen_kill_estimator() in ipt, pedit and simple action.

Signed-off-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ipt.c    | 7 ++++++-
 net/sched/act_pedit.c  | 5 ++++-
 net/sched/act_simple.c | 5 ++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 60e281ad0f07..58fb3c7aab9e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -185,7 +185,12 @@ err3:
 err2:
 	kfree(tname);
 err1:
-	kfree(pc);
+	if (ret == ACT_P_CREATED) {
+		if (est)
+			gen_kill_estimator(&pc->tcfc_bstats,
+					   &pc->tcfc_rate_est);
+		kfree_rcu(pc, tcfc_rcu);
+	}
 	return err;
 }
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 26aa2f6ce257..45c53ab067a6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 		p = to_pedit(pc);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			kfree(pc);
+			if (est)
+				gen_kill_estimator(&pc->tcfc_bstats,
+						   &pc->tcfc_rate_est);
+			kfree_rcu(pc, tcfc_rcu);
 			return -ENOMEM;
 		}
 		ret = ACT_P_CREATED;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3922f2a2821b..3714f60f0b3c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 		d = to_defact(pc);
 		ret = alloc_defdata(d, defdata);
 		if (ret < 0) {
-			kfree(pc);
+			if (est)
+				gen_kill_estimator(&pc->tcfc_bstats,
+						   &pc->tcfc_rate_est);
+			kfree_rcu(pc, tcfc_rcu);
 			return ret;
 		}
 		d->tcf_action = parm->action;
-- 
cgit 


From 5d299f3d3c8a2fbc732b1bf03af36333ccec3130 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Aug 2012 05:09:33 +0000
Subject: net: ipv6: fix TCP early demux

IPv6 needs a cookie in dst_check() call.

We need to add rx_dst_cookie and provide a family independent
sk_rx_dst_set(sk, skb) method to properly support IPv6 TCP early demux.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c     |  4 +++-
 net/ipv4/tcp_ipv4.c      | 13 ++++++++++---
 net/ipv4/tcp_minisocks.c |  2 +-
 net/ipv6/tcp_ipv6.c      | 27 +++++++++++++++++++++++++--
 4 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2fd2bc9e3c64..85308b90df80 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5392,6 +5392,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
+	if (unlikely(sk->sk_rx_dst == NULL))
+		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
 	/*
 	 *	Header prediction.
 	 *	The code loosely follows the one in the famous
@@ -5605,7 +5607,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	tcp_set_state(sk, TCP_ESTABLISHED);
 
 	if (skb != NULL) {
-		inet_sk_rx_dst_set(sk, skb);
+		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
 		security_inet_conn_established(sk, skb);
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 42b2a6a73092..272241f16fcb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1627,9 +1627,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 				sk->sk_rx_dst = NULL;
 			}
 		}
-		if (unlikely(sk->sk_rx_dst == NULL))
-			inet_sk_rx_dst_set(sk, skb);
-
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
 			goto reset;
@@ -1872,10 +1869,20 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
 	.twsk_destructor= tcp_twsk_destructor,
 };
 
+static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+
+	dst_hold(dst);
+	sk->sk_rx_dst = dst;
+	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+}
+
 const struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = tcp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
 	.conn_request	   = tcp_v4_conn_request,
 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
 	.net_header_len	   = sizeof(struct iphdr),
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 232a90c3ec86..d9c9dcef2de3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,7 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		struct tcp_sock *oldtp = tcp_sk(sk);
 		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
 
-		inet_sk_rx_dst_set(newsk, skb);
+		newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb);
 
 		/* TCP Cookie Transactions require space for the cookie pair,
 		 * as it differs for each connection.  There is no need to
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c66b90f71c9b..5a439e9a4c01 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1447,7 +1447,17 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+		struct dst_entry *dst = sk->sk_rx_dst;
+
 		sock_rps_save_rxhash(sk, skb);
+		if (dst) {
+			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+				dst_release(dst);
+				sk->sk_rx_dst = NULL;
+			}
+		}
+
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
 			goto reset;
 		if (opt_skb)
@@ -1705,9 +1715,9 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
 			struct dst_entry *dst = sk->sk_rx_dst;
 			struct inet_sock *icsk = inet_sk(sk);
 			if (dst)
-				dst = dst_check(dst, 0);
+				dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
 			if (dst &&
-			    icsk->rx_dst_ifindex == inet6_iif(skb))
+			    icsk->rx_dst_ifindex == skb->skb_iif)
 				skb_dst_set_noref(skb, dst);
 		}
 	}
@@ -1719,10 +1729,23 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 	.twsk_destructor= tcp_twsk_destructor,
 };
 
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+	dst_hold(dst);
+	sk->sk_rx_dst = dst;
+	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+	if (rt->rt6i_node)
+		inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+}
+
 static const struct inet_connection_sock_af_ops ipv6_specific = {
 	.queue_xmit	   = inet6_csk_xmit,
 	.send_check	   = tcp_v6_send_check,
 	.rebuild_header	   = inet6_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
 	.conn_request	   = tcp_v6_conn_request,
 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
 	.net_header_len	   = sizeof(struct ipv6hdr),
-- 
cgit 


From 99aa3473e672ca610905838997fa018b95cd643f Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Mon, 6 Aug 2012 16:27:10 +0000
Subject: af_packet: Quiet sparse noise about using plain integer as NULL
 pointer

Quiets the sparse warning:
warning: Using plain integer as NULL pointer

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ceaca7c134a0..f016f6634a79 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1079,7 +1079,7 @@ static void *packet_current_rx_frame(struct packet_sock *po,
 	default:
 		WARN(1, "TPACKET version not supported\n");
 		BUG();
-		return 0;
+		return NULL;
 	}
 }
 
-- 
cgit 


From 0c03eca3d995e73d691edea8c787e25929ec156d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 7 Aug 2012 00:47:11 +0000
Subject: net: fib: fix incorrect call_rcu_bh()

After IP route cache removal, I believe rcu_bh() has very little use and
we should remove this RCU variant, since it adds some cycles in fast
path.

Anyway, the call_rcu_bh() use in fib_true is obviously wrong, since
some users only assert rcu_read_lock().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f0cdb30921c0..57bd978483e1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -367,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head)
 
 static inline void free_leaf(struct leaf *l)
 {
-	call_rcu_bh(&l->rcu, __leaf_free_rcu);
+	call_rcu(&l->rcu, __leaf_free_rcu);
 }
 
 static inline void free_leaf_info(struct leaf_info *leaf)
-- 
cgit 


From a37e6e344910a43b9ebc2bbf29a029f5ea942598 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 7 Aug 2012 10:55:45 +0000
Subject: net: force dst_default_metrics to const section

While investigating on network performance problems, I found this little
gem :

$ nm -v vmlinux | grep -1 dst_default_metrics
ffffffff82736540 b busy.46605
ffffffff82736560 B dst_default_metrics
ffffffff82736598 b dst_busy_list

Apparently, declaring a const array without initializer put it in
(writeable) bss section, in middle of possibly often dirtied cache
lines.

Since we really want dst_default_metrics be const to avoid any possible
false sharing and catch any buggy writes, I force a null initializer.

ffffffff818a4c20 R dst_default_metrics

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 069d51d29414..56d63612e1e4 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
 
-const u32 dst_default_metrics[RTAX_MAX];
+const u32 dst_default_metrics[RTAX_MAX + 1] = {
+	/* This initializer is needed to force linker to place this variable
+	 * into const section. Otherwise it might end into bss section.
+	 * We really want to avoid false sharing on this variable, and catch
+	 * any writes on it.
+	 */
+	[RTAX_MAX] = 0xdeadbeef,
+};
+
 
 void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 		int initial_ref, int initial_obsolete, unsigned short flags)
-- 
cgit 


From be72f63b4cd5d6778cf7eb0d0a86f18f96b51c0c Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 7 Aug 2012 07:27:25 +0000
Subject: sched: add missing group change to qfq_change_class

[Resending again, as the text was corrupted by the email client]

To speed up operations, QFQ internally divides classes into
groups. Which group a class belongs to depends on the ratio between
the maximum packet length and the weight of the class. Unfortunately
the function qfq_change_class lacks the steps for changing the group
of a class when the ratio max_pkt_len/weight of the class changes.

For example, when the last of the following three commands is
executed, the group of class 1:1 is not correctly changed:

tc disc add dev XXX root handle 1: qfq
tc class add dev XXX parent 1: qfq classid 1:1 weight 1
tc class change dev XXX parent 1: classid 1:1 qfq weight 4

Not changing the group of a class does not affect the long-term
bandwidth guaranteed to the class, as the latter is independent of the
maximum packet length, and correctly changes (only) if the weight of
the class changes. In contrast, if the group of the class is not
updated, the class is still guaranteed the short-term bandwidth and
packet delay related to its old group, instead of the guarantees that
it should receive according to its new weight and/or maximum packet
length. This may also break service guarantees for other classes.
This patch adds the missing operations.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_qfq.c | 95 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 9af01f3df18c..e4723d31fdd5 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -203,6 +203,34 @@ out:
 	return index;
 }
 
+/* Length of the next packet (0 if the queue is empty). */
+static unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+
+	skb = sch->ops->peek(sch);
+	return skb ? qdisc_pkt_len(skb) : 0;
+}
+
+static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *);
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+			       unsigned int len);
+
+static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
+				    u32 lmax, u32 inv_w, int delta_w)
+{
+	int i;
+
+	/* update qfq-specific data */
+	cl->lmax = lmax;
+	cl->inv_w = inv_w;
+	i = qfq_calc_index(cl->inv_w, cl->lmax);
+
+	cl->grp = &q->groups[i];
+
+	q->wsum += delta_w;
+}
+
 static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			    struct nlattr **tca, unsigned long *arg)
 {
@@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		lmax = 1UL << QFQ_MTU_SHIFT;
 
 	if (cl != NULL) {
+		bool need_reactivation = false;
+
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
 						    qdisc_root_sleeping_lock(sch),
@@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 				return err;
 		}
 
-		if (inv_w != cl->inv_w) {
-			sch_tree_lock(sch);
-			q->wsum += delta_w;
-			cl->inv_w = inv_w;
-			sch_tree_unlock(sch);
+		if (lmax == cl->lmax && inv_w == cl->inv_w)
+			return 0; /* nothing to update */
+
+		i = qfq_calc_index(inv_w, lmax);
+		sch_tree_lock(sch);
+		if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
+			/*
+			 * shift cl->F back, to not charge the
+			 * class for the not-yet-served head
+			 * packet
+			 */
+			cl->F = cl->S;
+			/* remove class from its slot in the old group */
+			qfq_deactivate_class(q, cl);
+			need_reactivation = true;
 		}
+
+		qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
+
+		if (need_reactivation) /* activate in new group */
+			qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+		sch_tree_unlock(sch);
+
 		return 0;
 	}
 
@@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	cl->refcnt = 1;
 	cl->common.classid = classid;
-	cl->lmax = lmax;
-	cl->inv_w = inv_w;
-	i = qfq_calc_index(cl->inv_w, cl->lmax);
 
-	cl->grp = &q->groups[i];
+	qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
 
 	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
 				      &pfifo_qdisc_ops, classid);
@@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			return err;
 		}
 	}
-	q->wsum += weight;
 
 	sch_tree_lock(sch);
 	qdisc_class_hash_insert(&q->clhash, &cl->common);
@@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
 	}
 }
 
-/* What is length of next packet in queue (0 if queue is empty) */
-static unsigned int qdisc_peek_len(struct Qdisc *sch)
-{
-	struct sk_buff *skb;
-
-	skb = sch->ops->peek(sch);
-	return skb ? qdisc_pkt_len(skb) : 0;
-}
-
 /*
  * Updates the class, returns true if also the group needs to be updated.
  */
@@ -843,11 +877,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
 static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
-	struct qfq_group *grp;
 	struct qfq_class *cl;
 	int err;
-	u64 roundedS;
-	int s;
 
 	cl = qfq_classify(skb, sch, &err);
 	if (cl == NULL) {
@@ -876,11 +907,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 
 	/* If reach this point, queue q was idle */
-	grp = cl->grp;
+	qfq_activate_class(q, cl, qdisc_pkt_len(skb));
+
+	return err;
+}
+
+/*
+ * Handle class switch from idle to backlogged.
+ */
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+			       unsigned int pkt_len)
+{
+	struct qfq_group *grp = cl->grp;
+	u64 roundedS;
+	int s;
+
 	qfq_update_start(q, cl);
 
 	/* compute new finish time and rounded start. */
-	cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
+	cl->F = cl->S + (u64)pkt_len * cl->inv_w;
 	roundedS = qfq_round_down(cl->S, grp->slot_shift);
 
 	/*
@@ -917,8 +962,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 skip_update:
 	qfq_slot_insert(grp, cl, roundedS);
-
-	return err;
 }
 
 
-- 
cgit 


From 155e4e12b9f49c2dc817bb4c44e9416c46833c3d Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Tue, 7 Aug 2012 08:32:34 +0000
Subject: batman-adv: Fix mem leak in the batadv_tt_local_event() function

Memory is allocated for 'tt_change_node' with kmalloc().
'tt_change_node' may go out of scope really being used for anything
(except have a few members initialized) if we hit the 'del:' label.
This patch makes sure we free the memory in that case.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/batman-adv/translation-table.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a438f4b582fc..99dd8f75b3ff 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -197,6 +197,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
 del:
 		list_del(&entry->list);
 		kfree(entry);
+		kfree(tt_change_node);
 		event_removed = true;
 		goto unlock;
 	}
-- 
cgit 


From 7364e445f62825758fa61195d237a5b8ecdd06ec Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Wed, 8 Aug 2012 00:33:25 +0000
Subject: net/core: Fix potential memory leak in dev_set_alias()

Do not leak memory by updating pointer with potentially NULL realloc return value.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index f91abf800161..a39354ee1432 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1055,6 +1055,8 @@ rollback:
  */
 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 {
+	char *new_ifalias;
+
 	ASSERT_RTNL();
 
 	if (len >= IFALIASZ)
@@ -1068,9 +1070,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 		return 0;
 	}
 
-	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
-	if (!dev->ifalias)
+	new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
+	if (!new_ifalias)
 		return -ENOMEM;
+	dev->ifalias = new_ifalias;
 
 	strlcpy(dev->ifalias, alias, len+1);
 	return len;
-- 
cgit 


From 36471012e2ae28ca3178f84d4687a2d88a36593e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Aug 2012 11:19:13 +0200
Subject: tcp: must free metrics at net dismantle

We currently leak all tcp metrics at struct net dismantle time.

tcp_net_metrics_exit() frees the hash table, we must first
iterate it to free all metrics.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_metrics.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 2288a6399e1e..0abe67bb4d3a 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net)
 
 static void __net_exit tcp_net_metrics_exit(struct net *net)
 {
+	unsigned int i;
+
+	for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
+		struct tcp_metrics_block *tm, *next;
+
+		tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
+		while (tm) {
+			next = rcu_dereference_protected(tm->tcpm_next, 1);
+			kfree(tm);
+			tm = next;
+		}
+	}
 	kfree(net->ipv4.tcp_metrics_hash);
 }
 
-- 
cgit 


From 3a7c384ffd57ef5fbd95f48edaa2ca4eb3d9f2ee Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Aug 2012 13:56:06 +0000
Subject: ipv4: tcp: unicast_sock should not land outside of TCP stack

commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
selinux regression, reported and bisected by John Stultz

selinux_ip_postroute_compat() expect to find a valid sk->sk_security
pointer, but this field is NULL for unicast_sock

It turns out that unicast_sock are really temporary stuff to be able
to reuse  part of IP stack (ip_append_data()/ip_push_pending_frames())

Fact is that frames sent by ip_send_unicast_reply() should be orphaned
to not fool LSM.

Note IPv6 never had this problem, as tcp_v6_send_response() doesnt use a
fake socket at all. I'll probably implement tcp_v4_send_response() to
remove these unicast_sock in linux-3.7

Reported-by: John Stultz <johnstul@us.ibm.com>
Bisected-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Eric Paris <eparis@parisplace.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 76dde25fb9a0..ec410e08b4b9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1536,6 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
 								arg->csum));
 		nskb->ip_summed = CHECKSUM_NONE;
+		skb_orphan(nskb);
 		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
 		ip_push_pending_frames(sk, &fl4);
 	}
-- 
cgit 


From 63d02d157ec4124990258d66517b6c11fd6df0cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Aug 2012 14:11:00 +0000
Subject: net: tcp: ipv6_mapped needs sk_rx_dst_set method

commit 5d299f3d3c8a2fb (net: ipv6: fix TCP early demux) added a
regression for ipv6_mapped case.

[   67.422369] SELinux: initialized (dev autofs, type autofs), uses
genfs_contexts
[   67.449678] SELinux: initialized (dev autofs, type autofs), uses
genfs_contexts
[   92.631060] BUG: unable to handle kernel NULL pointer dereference at
(null)
[   92.631435] IP: [<          (null)>]           (null)
[   92.631645] PGD 0
[   92.631846] Oops: 0010 [#1] SMP
[   92.632095] Modules linked in: autofs4 sunrpc ipv6 dm_mirror
dm_region_hash dm_log dm_multipath dm_mod video sbs sbshc battery ac lp
parport sg snd_hda_intel snd_hda_codec snd_seq_oss snd_seq_midi_event
snd_seq snd_seq_device pcspkr snd_pcm_oss snd_mixer_oss snd_pcm
snd_timer serio_raw button floppy snd i2c_i801 i2c_core soundcore
snd_page_alloc shpchp ide_cd_mod cdrom microcode ehci_hcd ohci_hcd
uhci_hcd
[   92.634294] CPU 0
[   92.634294] Pid: 4469, comm: sendmail Not tainted 3.6.0-rc1 #3
[   92.634294] RIP: 0010:[<0000000000000000>]  [<          (null)>]
(null)
[   92.634294] RSP: 0018:ffff880245fc7cb0  EFLAGS: 00010282
[   92.634294] RAX: ffffffffa01985f0 RBX: ffff88024827ad00 RCX:
0000000000000000
[   92.634294] RDX: 0000000000000218 RSI: ffff880254735380 RDI:
ffff88024827ad00
[   92.634294] RBP: ffff880245fc7cc8 R08: 0000000000000001 R09:
0000000000000000
[   92.634294] R10: 0000000000000000 R11: ffff880245fc7bf8 R12:
ffff880254735380
[   92.634294] R13: ffff880254735380 R14: 0000000000000000 R15:
7fffffffffff0218
[   92.634294] FS:  00007f4516ccd6f0(0000) GS:ffff880256600000(0000)
knlGS:0000000000000000
[   92.634294] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   92.634294] CR2: 0000000000000000 CR3: 0000000245ed1000 CR4:
00000000000007f0
[   92.634294] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[   92.634294] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
0000000000000400
[   92.634294] Process sendmail (pid: 4469, threadinfo ffff880245fc6000,
task ffff880254b8cac0)
[   92.634294] Stack:
[   92.634294]  ffffffff813837a7 ffff88024827ad00 ffff880254b6b0e8
ffff880245fc7d68
[   92.634294]  ffffffff81385083 00000000001d2680 ffff8802547353a8
ffff880245fc7d18
[   92.634294]  ffffffff8105903a ffff88024827ad60 0000000000000002
00000000000000ff
[   92.634294] Call Trace:
[   92.634294]  [<ffffffff813837a7>] ? tcp_finish_connect+0x2c/0xfa
[   92.634294]  [<ffffffff81385083>] tcp_rcv_state_process+0x2b6/0x9c6
[   92.634294]  [<ffffffff8105903a>] ? sched_clock_cpu+0xc3/0xd1
[   92.634294]  [<ffffffff81059073>] ? local_clock+0x2b/0x3c
[   92.634294]  [<ffffffff8138caf3>] tcp_v4_do_rcv+0x63a/0x670
[   92.634294]  [<ffffffff8133278e>] release_sock+0x128/0x1bd
[   92.634294]  [<ffffffff8139f060>] __inet_stream_connect+0x1b1/0x352
[   92.634294]  [<ffffffff813325f5>] ? lock_sock_nested+0x74/0x7f
[   92.634294]  [<ffffffff8104b333>] ? wake_up_bit+0x25/0x25
[   92.634294]  [<ffffffff813325f5>] ? lock_sock_nested+0x74/0x7f
[   92.634294]  [<ffffffff8139f223>] ? inet_stream_connect+0x22/0x4b
[   92.634294]  [<ffffffff8139f234>] inet_stream_connect+0x33/0x4b
[   92.634294]  [<ffffffff8132e8cf>] sys_connect+0x78/0x9e
[   92.634294]  [<ffffffff813fd407>] ? sysret_check+0x1b/0x56
[   92.634294]  [<ffffffff81088503>] ? __audit_syscall_entry+0x195/0x1c8
[   92.634294]  [<ffffffff811cc26e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[   92.634294]  [<ffffffff813fd3e2>] system_call_fastpath+0x16/0x1b
[   92.634294] Code:  Bad RIP value.
[   92.634294] RIP  [<          (null)>]           (null)
[   92.634294]  RSP <ffff880245fc7cb0>
[   92.634294] CR2: 0000000000000000
[   92.648982] ---[ end trace 24e2bed94314c8d9 ]---
[   92.649146] Kernel panic - not syncing: Fatal exception in interrupt

Fix this using inet_sk_rx_dst_set(), and export this function in case
IPv6 is modular.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 3 ++-
 net/ipv6/tcp_ipv6.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 272241f16fcb..767823764016 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1869,7 +1869,7 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
 	.twsk_destructor= tcp_twsk_destructor,
 };
 
-static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 
@@ -1877,6 +1877,7 @@ static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 	sk->sk_rx_dst = dst;
 	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 }
+EXPORT_SYMBOL(inet_sk_rx_dst_set);
 
 const struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	   = ip_queue_xmit,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5a439e9a4c01..bb9ce2b2f377 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1777,6 +1777,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = tcp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
 	.conn_request	   = tcp_v6_conn_request,
 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
 	.net_header_len	   = sizeof(struct iphdr),
-- 
cgit 


From e9324b2ce656e1910d2385b9b47a2f926456dbe3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:45 +0000
Subject: netfilter: nf_ct_sip: fix helper name

Commit 3a8fc53a (netfilter: nf_ct_helper: allocate 16 bytes for the helper
and policy names) introduced a bug in the SIP helper, the helper name is
sprinted to the sip_names array instead of instead of into the helper
structure. This breaks the helper match and the /proc/net/nf_conntrack_expect
output.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_sip.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 758a1bacc126..2fb666920cc9 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1515,7 +1515,6 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
 }
 
 static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
-static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
@@ -1585,9 +1584,9 @@ static int __init nf_conntrack_sip_init(void)
 			sip[i][j].me = THIS_MODULE;
 
 			if (ports[i] == SIP_PORT)
-				sprintf(sip_names[i][j], "sip");
+				sprintf(sip[i][j].name, "sip");
 			else
-				sprintf(sip_names[i][j], "sip-%u", i);
+				sprintf(sip[i][j].name, "sip-%u", i);
 
 			pr_debug("port #%u: %u\n", i, ports[i]);
 
-- 
cgit 


From 02b69cbdc2fb2e1bfbfd9ac0c246d7be1b08d3cd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:46 +0000
Subject: netfilter: nf_ct_sip: fix IPv6 address parsing

Within SIP messages IPv6 addresses are enclosed in square brackets in most
cases, with the exception of the "received=" header parameter. Currently
the helper fails to parse enclosed addresses.

This patch:

- changes the SIP address parsing function to enforce square brackets
  when required, and accept them when not required but present, as
  recommended by RFC 5118.

- adds a new SDP address parsing function that never accepts square
  brackets since SDP doesn't use them.

With these changes, the SIP helper correctly parses all test messages
from RFC 5118 (Session Initiation Protocol (SIP) Torture Test Messages
for Internet Protocol Version 6 (IPv6)).

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_sip.c  |  4 +-
 net/netfilter/nf_conntrack_sip.c | 87 ++++++++++++++++++++++++++++++++--------
 2 files changed, 72 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ea4a23813d26..eef8f29e8bf8 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -173,7 +173,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		 * the reply. */
 		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
 					       "maddr=", &poff, &plen,
-					       &addr) > 0 &&
+					       &addr, true) > 0 &&
 		    addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
 		    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
@@ -187,7 +187,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		 * from which the server received the request. */
 		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
 					       "received=", &poff, &plen,
-					       &addr) > 0 &&
+					       &addr, false) > 0 &&
 		    addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
 		    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 2fb666920cc9..5c0a112aeee6 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr,
 	return len + digits_len(ct, dptr, limit, shift);
 }
 
-static int parse_addr(const struct nf_conn *ct, const char *cp,
-                      const char **endp, union nf_inet_addr *addr,
-                      const char *limit)
+static int sip_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit, bool delim)
 {
 	const char *end;
-	int ret = 0;
+	int ret;
 
 	if (!ct)
 		return 0;
@@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
 	switch (nf_ct_l3num(ct)) {
 	case AF_INET:
 		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		if (ret == 0)
+			return 0;
 		break;
 	case AF_INET6:
+		if (cp < limit && *cp == '[')
+			cp++;
+		else if (delim)
+			return 0;
+
 		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		if (ret == 0)
+			return 0;
+
+		if (end < limit && *end == ']')
+			end++;
+		else if (delim)
+			return 0;
 		break;
 	default:
 		BUG();
 	}
 
-	if (ret == 0 || end == cp)
-		return 0;
 	if (endp)
 		*endp = end;
 	return 1;
@@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr,
 	union nf_inet_addr addr;
 	const char *aux = dptr;
 
-	if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
+	if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) {
 		pr_debug("ip: %s parse failed.!\n", dptr);
 		return 0;
 	}
@@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct,
 		return 0;
 	dptr += shift;
 
-	if (!parse_addr(ct, dptr, &end, addr, limit))
+	if (!sip_parse_addr(ct, dptr, &end, addr, limit, true))
 		return -1;
 	if (end < limit && *end == ':') {
 		end++;
@@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
 	if (ret == 0)
 		return ret;
 
-	if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit))
+	if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true))
 		return -1;
 	if (*c == ':') {
 		c++;
@@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 			       unsigned int dataoff, unsigned int datalen,
 			       const char *name,
 			       unsigned int *matchoff, unsigned int *matchlen,
-			       union nf_inet_addr *addr)
+			       union nf_inet_addr *addr, bool delim)
 {
 	const char *limit = dptr + datalen;
 	const char *start, *end;
@@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 		return 0;
 
 	start += strlen(name);
-	if (!parse_addr(ct, start, &end, addr, limit))
+	if (!sip_parse_addr(ct, start, &end, addr, limit, delim))
 		return 0;
 	*matchoff = start - dptr;
 	*matchlen = end - start;
@@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
 	return 1;
 }
 
+static int sdp_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit)
+{
+	const char *end;
+	int ret;
+
+	memset(addr, 0, sizeof(*addr));
+	switch (nf_ct_l3num(ct)) {
+	case AF_INET:
+		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		break;
+	case AF_INET6:
+		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		break;
+	default:
+		BUG();
+	}
+
+	if (ret == 0)
+		return 0;
+	if (endp)
+		*endp = end;
+	return 1;
+}
+
+/* skip ip address. returns its length. */
+static int sdp_addr_len(const struct nf_conn *ct, const char *dptr,
+			const char *limit, int *shift)
+{
+	union nf_inet_addr addr;
+	const char *aux = dptr;
+
+	if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) {
+		pr_debug("ip: %s parse failed.!\n", dptr);
+		return 0;
+	}
+
+	return dptr - aux;
+}
+
 /* SDP header parsing: a SDP session description contains an ordered set of
  * headers, starting with a section containing general session parameters,
  * optionally followed by multiple media descriptions.
@@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
  */
 static const struct sip_header ct_sdp_hdrs[] = {
 	[SDP_HDR_VERSION]		= SDP_HDR("v=", NULL, digits_len),
-	[SDP_HDR_OWNER_IP4]		= SDP_HDR("o=", "IN IP4 ", epaddr_len),
-	[SDP_HDR_CONNECTION_IP4]	= SDP_HDR("c=", "IN IP4 ", epaddr_len),
-	[SDP_HDR_OWNER_IP6]		= SDP_HDR("o=", "IN IP6 ", epaddr_len),
-	[SDP_HDR_CONNECTION_IP6]	= SDP_HDR("c=", "IN IP6 ", epaddr_len),
+	[SDP_HDR_OWNER_IP4]		= SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION_IP4]	= SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_OWNER_IP6]		= SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION_IP6]	= SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
 	[SDP_HDR_MEDIA]			= SDP_HDR("m=", NULL, media_len),
 };
 
@@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
 	if (ret <= 0)
 		return ret;
 
-	if (!parse_addr(ct, dptr + *matchoff, NULL, addr,
-			dptr + *matchoff + *matchlen))
+	if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr,
+			    dptr + *matchoff + *matchlen))
 		return -1;
 	return 1;
 }
-- 
cgit 


From f22eb25cf5b1157b29ef88c793b71972efc47143 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:47 +0000
Subject: netfilter: nf_nat_sip: fix via header translation with multiple
 parameters

Via-headers are parsed beginning at the first character after the Via-address.
When the address is translated first and its length decreases, the offset to
start parsing at is incorrect and header parameters might be missed.

Update the offset after translating the Via-address to fix this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_sip.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index eef8f29e8bf8..4ad9cf173992 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
 				    hdr, NULL, &matchoff, &matchlen,
 				    &addr, &port) > 0) {
-		unsigned int matchend, poff, plen, buflen, n;
+		unsigned int olen, matchend, poff, plen, buflen, n;
 		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
 
 		/* We're only interested in headers related to this
@@ -163,11 +163,12 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 				goto next;
 		}
 
+		olen = *datalen;
 		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 
-		matchend = matchoff + matchlen;
+		matchend = matchoff + matchlen + *datalen - olen;
 
 		/* The maddr= parameter (RFC 2361) specifies where to send
 		 * the reply. */
-- 
cgit 


From b5ec8eeac46a99004c26791f70b15d001e970acf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 10 Aug 2012 02:22:47 +0000
Subject: ipv4: fix ip_send_skb()

ip_send_skb() can send orphaned skb, so we must pass the net pointer to
avoid possible NULL dereference in error path.

Bug added by commit 3a7c384ffd57 (ipv4: tcp: unicast_sock should not
land outside of TCP stack)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 5 ++---
 net/ipv4/udp.c       | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ec410e08b4b9..147ccc3e93db 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1366,9 +1366,8 @@ out:
 	return skb;
 }
 
-int ip_send_skb(struct sk_buff *skb)
+int ip_send_skb(struct net *net, struct sk_buff *skb)
 {
-	struct net *net = sock_net(skb->sk);
 	int err;
 
 	err = ip_local_out(skb);
@@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
 		return 0;
 
 	/* Netfilter gets whole the not fragmented skb. */
-	return ip_send_skb(skb);
+	return ip_send_skb(sock_net(sk), skb);
 }
 
 /*
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b4c3582a991f..6f6d1aca3c3d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 		uh->check = CSUM_MANGLED_0;
 
 send:
-	err = ip_send_skb(skb);
+	err = ip_send_skb(sock_net(sk), skb);
 	if (err) {
 		if (err == -ENOBUFS && !inet->recverr) {
 			UDP_INC_STATS_USER(sock_net(sk),
-- 
cgit 


From 7f5c3e3a80e6654cf48dfba7cf94f88c6b505467 Mon Sep 17 00:00:00 2001
From: "danborkmann@iogearbox.net" <danborkmann@iogearbox.net>
Date: Fri, 10 Aug 2012 22:48:54 +0000
Subject: af_packet: remove BUG statement in tpacket_destruct_skb

Here's a quote of the comment about the BUG macro from asm-generic/bug.h:

 Don't use BUG() or BUG_ON() unless there's really no way out; one
 example might be detecting data structure corruption in the middle
 of an operation that can't be backed out of.  If the (sub)system
 can somehow continue operating, perhaps with reduced functionality,
 it's probably not BUG-worthy.

 If you're tempted to BUG(), think again:  is completely giving up
 really the *only* solution?  There are usually better options, where
 users don't need to reboot ASAP and can mostly shut down cleanly.

In our case, the status flag of a ring buffer slot is managed from both sides,
the kernel space and the user space. This means that even though the kernel
side might work as expected, the user space screws up and changes this flag
right between the send(2) is triggered when the flag is changed to
TP_STATUS_SENDING and a given skb is destructed after some time. Then, this
will hit the BUG macro. As David suggested, the best solution is to simply
remove this statement since it cannot be used for kernel side internal
consistency checks. I've tested it and the system still behaves /stable/ in
this case, so in accordance with the above comment, we should rather remove it.

Signed-off-by: Daniel Borkmann <daniel.borkmann@tik.ee.ethz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f016f6634a79..8ac890a1a4c0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1936,7 +1936,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 
 	if (likely(po->tx_ring.pg_vec)) {
 		ph = skb_shinfo(skb)->destructor_arg;
-		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
 		atomic_dec(&po->tx_ring.pending);
 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
-- 
cgit 


From 68e035c950dbceaf660144bf74054dfdfb6aad15 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 14 Aug 2012 12:47:37 +0200
Subject: netfilter: ctnetlink: fix missing locking while changing conntrack
 from nfqueue

Since 9cb017665 netfilter: add glue code to integrate nfnetlink_queue and
ctnetlink, we can modify the conntrack entry via nfnl_queue. However, the
change of the conntrack entry via nfnetlink_queue requires appropriate
locking to avoid concurrent updates.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 14f67a2cbcb5..da4fc37a8578 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1896,10 +1896,15 @@ static int
 ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
 {
 	struct nlattr *cda[CTA_MAX+1];
+	int ret;
 
 	nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
 
-	return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+	spin_lock_bh(&nf_conntrack_lock);
+	ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+	spin_unlock_bh(&nf_conntrack_lock);
+
+	return ret;
 }
 
 static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
-- 
cgit 


From 47be03a28cc6c80e3aa2b3e8ed6d960ff0c5c0af Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:37 +0000
Subject: netpoll: use GFP_ATOMIC in slave_enable_netpoll() and
 __netpoll_setup()

slave_enable_netpoll() and __netpoll_setup() may be called
with read_lock() held, so should use GFP_ATOMIC to allocate
memory. Eric suggested to pass gfp flags to __netpoll_setup().

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c    |  7 ++++---
 net/bridge/br_device.c  | 12 ++++++------
 net/bridge/br_if.c      |  2 +-
 net/bridge/br_private.h |  4 ++--
 net/core/netpoll.c      |  8 ++++----
 5 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 73a2a83ee2da..ee4ae0944cef 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -669,19 +669,20 @@ static void vlan_dev_poll_controller(struct net_device *dev)
 	return;
 }
 
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
+				  gfp_t gfp)
 {
 	struct vlan_dev_priv *info = vlan_dev_priv(dev);
 	struct net_device *real_dev = info->real_dev;
 	struct netpoll *netpoll;
 	int err = 0;
 
-	netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+	netpoll = kzalloc(sizeof(*netpoll), gfp);
 	err = -ENOMEM;
 	if (!netpoll)
 		goto out;
 
-	err = __netpoll_setup(netpoll, real_dev);
+	err = __netpoll_setup(netpoll, real_dev, gfp);
 	if (err) {
 		kfree(netpoll);
 		goto out;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 333484537600..ed0e0f9dc788 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -213,7 +213,8 @@ static void br_netpoll_cleanup(struct net_device *dev)
 	}
 }
 
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
+			    gfp_t gfp)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p, *n;
@@ -222,8 +223,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
 		if (!p->dev)
 			continue;
-
-		err = br_netpoll_enable(p);
+		err = br_netpoll_enable(p, gfp);
 		if (err)
 			goto fail;
 	}
@@ -236,17 +236,17 @@ fail:
 	goto out;
 }
 
-int br_netpoll_enable(struct net_bridge_port *p)
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
 {
 	struct netpoll *np;
 	int err = 0;
 
-	np = kzalloc(sizeof(*p->np), GFP_KERNEL);
+	np = kzalloc(sizeof(*p->np), gfp);
 	err = -ENOMEM;
 	if (!np)
 		goto out;
 
-	err = __netpoll_setup(np, p->dev);
+	err = __netpoll_setup(np, p->dev, gfp);
 	if (err) {
 		kfree(np);
 		goto out;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index e1144e1617be..171fd6b9bfe6 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err2;
 
-	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
+	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
 		goto err3;
 
 	err = netdev_set_master(dev, br->dev);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a768b2408edf..f507d2af9646 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 		netpoll_send_skb(np, skb);
 }
 
-extern int br_netpoll_enable(struct net_bridge_port *p);
+extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
 extern void br_netpoll_disable(struct net_bridge_port *p);
 #else
 static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
@@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 {
 }
 
-static inline int br_netpoll_enable(struct net_bridge_port *p)
+static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
 {
 	return 0;
 }
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b4c90e42b443..37cc854774a4 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -715,7 +715,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 }
 EXPORT_SYMBOL(netpoll_parse_options);
 
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 {
 	struct netpoll_info *npinfo;
 	const struct net_device_ops *ops;
@@ -734,7 +734,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 	}
 
 	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		npinfo = kmalloc(sizeof(*npinfo), gfp);
 		if (!npinfo) {
 			err = -ENOMEM;
 			goto out;
@@ -752,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 
 		ops = np->dev->netdev_ops;
 		if (ops->ndo_netpoll_setup) {
-			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
 			if (err)
 				goto free_npinfo;
 		}
@@ -857,7 +857,7 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	rtnl_lock();
-	err = __netpoll_setup(np, ndev);
+	err = __netpoll_setup(np, ndev, GFP_KERNEL);
 	rtnl_unlock();
 
 	if (err)
-- 
cgit 


From 38e6bc185d9544dfad1774b3f8902a0b061aea25 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:38 +0000
Subject: netpoll: make __netpoll_cleanup non-block

Like the previous patch, slave_disable_netpoll() and __netpoll_cleanup()
may be called with read_lock() held too, so we should make them
non-block, by moving the cleanup and kfree() to call_rcu_bh() callbacks.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c   |  6 +-----
 net/bridge/br_device.c |  6 +-----
 net/core/netpoll.c     | 42 ++++++++++++++++++++++++++++++++----------
 3 files changed, 34 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ee4ae0944cef..b65623f90660 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -704,11 +704,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	info->netpoll = NULL;
 
-        /* Wait for transmitting packets to finish before freeing. */
-        synchronize_rcu_bh();
-
-        __netpoll_cleanup(netpoll);
-        kfree(netpoll);
+	__netpoll_free_rcu(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ed0e0f9dc788..f41ba4048c9a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	/* Wait for transmitting packets to finish before freeing. */
-	synchronize_rcu_bh();
-
-	__netpoll_cleanup(np);
-	kfree(np);
+	__netpoll_free_rcu(np);
 }
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 37cc854774a4..dc17f1db1479 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -878,6 +878,24 @@ static int __init netpoll_init(void)
 }
 core_initcall(netpoll_init);
 
+static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
+{
+	struct netpoll_info *npinfo =
+			container_of(rcu_head, struct netpoll_info, rcu);
+
+	skb_queue_purge(&npinfo->arp_tx);
+	skb_queue_purge(&npinfo->txq);
+
+	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
+	cancel_delayed_work(&npinfo->tx_work);
+
+	/* clean after last, unfinished work */
+	__skb_queue_purge(&npinfo->txq);
+	/* now cancel it again */
+	cancel_delayed_work(&npinfo->tx_work);
+	kfree(npinfo);
+}
+
 void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
@@ -903,20 +921,24 @@ void __netpoll_cleanup(struct netpoll *np)
 			ops->ndo_netpoll_cleanup(np->dev);
 
 		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-		/* avoid racing with NAPI reading npinfo */
-		synchronize_rcu_bh();
+static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+{
+	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
 
-		skb_queue_purge(&npinfo->arp_tx);
-		skb_queue_purge(&npinfo->txq);
-		cancel_delayed_work_sync(&npinfo->tx_work);
+	__netpoll_cleanup(np);
+	kfree(np);
+}
 
-		/* clean after last, unfinished work */
-		__skb_queue_purge(&npinfo->txq);
-		kfree(npinfo);
-	}
+void __netpoll_free_rcu(struct netpoll *np)
+{
+	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
 }
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
 
 void netpoll_cleanup(struct netpoll *np)
 {
-- 
cgit 


From 57c5d46191e75312934c00eba65b13a31ca95120 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:40 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_rx()

In __netpoll_rx(), it dereferences ->npinfo without rcu_dereference_bh(),
this patch fixes it by using the 'npinfo' passed from netpoll_rx()
where it is already dereferenced with rcu_dereference_bh().

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index dc17f1db1479..d055bb01328b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -543,13 +543,12 @@ static void arp_reply(struct sk_buff *skb)
 	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
-int __netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
 	int proto, len, ulen;
 	int hits = 0;
 	const struct iphdr *iph;
 	struct udphdr *uh;
-	struct netpoll_info *npinfo = skb->dev->npinfo;
 	struct netpoll *np, *tmp;
 
 	if (list_empty(&npinfo->rx_np))
-- 
cgit 


From 2899656b494dcd118123af1126826b115c8ea6f9 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:42 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()

This patch fixes several problems in the call path of
netpoll_send_skb_on_dev():

1. Disable IRQ's before calling netpoll_send_skb_on_dev().

2. All the callees of netpoll_send_skb_on_dev() should use
   rcu_dereference_bh() to dereference ->npinfo.

3. Rename arp_reply() to netpoll_arp_reply(), the former is too generic.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d055bb01328b..174346ac15a0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -54,7 +54,7 @@ static atomic_t trapped;
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void arp_reply(struct sk_buff *skb);
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -170,7 +170,8 @@ static void poll_napi(struct net_device *dev)
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			budget = poll_one_napi(dev->npinfo, napi, budget);
+			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
+					       napi, budget);
 			spin_unlock(&napi->poll_lock);
 
 			if (!budget)
@@ -185,13 +186,14 @@ static void service_arp_queue(struct netpoll_info *npi)
 		struct sk_buff *skb;
 
 		while ((skb = skb_dequeue(&npi->arp_tx)))
-			arp_reply(skb);
+			netpoll_arp_reply(skb, npi);
 	}
 }
 
 static void netpoll_poll_dev(struct net_device *dev)
 {
 	const struct net_device_ops *ops;
+	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 
 	if (!dev || !netif_running(dev))
 		return;
@@ -206,17 +208,18 @@ static void netpoll_poll_dev(struct net_device *dev)
 	poll_napi(dev);
 
 	if (dev->flags & IFF_SLAVE) {
-		if (dev->npinfo) {
+		if (ni) {
 			struct net_device *bond_dev = dev->master;
 			struct sk_buff *skb;
-			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+			struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+			while ((skb = skb_dequeue(&ni->arp_tx))) {
 				skb->dev = bond_dev;
-				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+				skb_queue_tail(&bond_ni->arp_tx, skb);
 			}
 		}
 	}
 
-	service_arp_queue(dev->npinfo);
+	service_arp_queue(ni);
 
 	zap_completion_queue();
 }
@@ -302,6 +305,7 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
+/* call with IRQ disabled */
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev)
 {
@@ -309,8 +313,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	unsigned long tries;
 	const struct net_device_ops *ops = dev->netdev_ops;
 	/* It is up to the caller to keep npinfo alive. */
-	struct netpoll_info *npinfo = np->dev->npinfo;
+	struct netpoll_info *npinfo;
+
+	WARN_ON_ONCE(!irqs_disabled());
 
+	npinfo = rcu_dereference_bh(np->dev->npinfo);
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
 		__kfree_skb(skb);
 		return;
@@ -319,11 +326,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	/* don't get messages out of order, and no recursion */
 	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 		struct netdev_queue *txq;
-		unsigned long flags;
 
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 
-		local_irq_save(flags);
 		/* try until next clock tick */
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
@@ -347,10 +352,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		}
 
 		WARN_ONCE(!irqs_disabled(),
-			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
 			dev->name, ops->ndo_start_xmit);
 
-		local_irq_restore(flags);
 	}
 
 	if (status != NETDEV_TX_OK) {
@@ -423,9 +427,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 }
 EXPORT_SYMBOL(netpoll_send_udp);
 
-static void arp_reply(struct sk_buff *skb)
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	struct netpoll_info *npinfo = skb->dev->npinfo;
 	struct arphdr *arp;
 	unsigned char *arp_ptr;
 	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
-- 
cgit 


From d30362c0712eb567334b3b66de7c40d4372f2c6f Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:43 +0000
Subject: bridge: add some comments for NETDEV_RELEASE

Add comments on why we don't notify NETDEV_RELEASE.

Cc: David Miller <davem@davemloft.net>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 171fd6b9bfe6..1c8fdc3558cd 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	if (!p || p->br != br)
 		return -EINVAL;
 
+	/* Since more than one interface can be attached to a bridge,
+	 * there still maybe an alternate path for netconsole to use;
+	 * therefore there is no reason for a NETDEV_RELEASE event.
+	 */
 	del_nbp(p);
 
 	spin_lock_bh(&br->lock);
-- 
cgit 


From 4e3828c4bfd90b00a951cad7c8da27d1966beefe Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:44 +0000
Subject: bridge: use list_for_each_entry() in netpoll functions

We don't delete 'p' from the list in the loop,
so we can just use list_for_each_entry().

Cc: David Miller <davem@davemloft.net>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f41ba4048c9a..32211fa5b506 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -206,21 +206,20 @@ static void br_poll_controller(struct net_device *br_dev)
 static void br_netpoll_cleanup(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *p, *n;
+	struct net_bridge_port *p;
 
-	list_for_each_entry_safe(p, n, &br->port_list, list) {
+	list_for_each_entry(p, &br->port_list, list)
 		br_netpoll_disable(p);
-	}
 }
 
 static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
 			    gfp_t gfp)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *p, *n;
+	struct net_bridge_port *p;
 	int err = 0;
 
-	list_for_each_entry_safe(p, n, &br->port_list, list) {
+	list_for_each_entry(p, &br->port_list, list) {
 		if (!p->dev)
 			continue;
 		err = br_netpoll_enable(p, gfp);
-- 
cgit 


From e15c3c2294605f09f9b336b2f3b97086ab4b8145 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:45 +0000
Subject: netpoll: check netpoll tx status on the right device

Although this doesn't matter actually, because netpoll_tx_running()
doesn't use the parameter, the code will be more readable.

For team_dev_queue_xmit() we have to move it down to avoid
compile errors.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e9466d412707..02015a505d2a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
 	skb->dev = to->dev;
 
-	if (unlikely(netpoll_tx_running(to->dev))) {
+	if (unlikely(netpoll_tx_running(to->br->dev))) {
 		if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 			kfree_skb(skb);
 		else {
-- 
cgit 


From f3da38932b46d1bf171634cc7aae3da405eaf7a6 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:47 +0000
Subject: vlan: clean up some variable names

To be consistent, s/info/vlan/.

Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b65623f90660..0347d48aa7c9 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -672,8 +672,8 @@ static void vlan_dev_poll_controller(struct net_device *dev)
 static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
 				  gfp_t gfp)
 {
-	struct vlan_dev_priv *info = vlan_dev_priv(dev);
-	struct net_device *real_dev = info->real_dev;
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	struct net_device *real_dev = vlan->real_dev;
 	struct netpoll *netpoll;
 	int err = 0;
 
@@ -688,7 +688,7 @@ static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *n
 		goto out;
 	}
 
-	info->netpoll = netpoll;
+	vlan->netpoll = netpoll;
 
 out:
 	return err;
@@ -696,13 +696,13 @@ out:
 
 static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 {
-	struct vlan_dev_priv *info = vlan_dev_priv(dev);
-	struct netpoll *netpoll = info->netpoll;
+	struct vlan_dev_priv *vlan= vlan_dev_priv(dev);
+	struct netpoll *netpoll = vlan->netpoll;
 
 	if (!netpoll)
 		return;
 
-	info->netpoll = NULL;
+	vlan->netpoll = NULL;
 
 	__netpoll_free_rcu(netpoll);
 }
-- 
cgit 


From 6eacf8ad8d01c49b95b994b0bf379db2b5b29460 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:48 +0000
Subject: vlan: clean up vlan_dev_hard_start_xmit()

Clean up vlan_dev_hard_start_xmit() function.

Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 0347d48aa7c9..402442402af7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return rc;
 }
 
+static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (vlan->netpoll)
+		netpoll_send_skb(vlan->netpoll, skb);
+#else
+	BUG();
+#endif
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 	unsigned int len;
 	int ret;
@@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-	    vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
-		vlan_tci = vlan_dev_priv(dev)->vlan_id;
+		vlan_tci = vlan->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
 	}
 
-	skb->dev = vlan_dev_priv(dev)->real_dev;
+	skb->dev = vlan->real_dev;
 	len = skb->len;
-	if (netpoll_tx_running(dev))
-		return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev);
+	if (unlikely(netpoll_tx_running(dev)))
+		return vlan_netpoll_send_skb(vlan, skb);
+
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		struct vlan_pcpu_stats *stats;
 
-		stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats);
+		stats = this_cpu_ptr(vlan->vlan_pcpu_stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
 		stats->tx_bytes += len;
 		u64_stats_update_end(&stats->syncp);
 	} else {
-		this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped);
+		this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped);
 	}
 
 	return ret;
-- 
cgit 


From 689971b44613883ee74ae9c1b31a864aaa3a8e17 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:49 +0000
Subject: netpoll: handle vlan tags in netpoll tx and rx path

Without this patch, I can't get netconsole logs remotely over
vlan. The reason is probably we don't handle vlan tags in either
netpoll tx or rx path.

I am not sure if I use these vlan functions correctly, at
least this patch works.

Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 174346ac15a0..e4ba3e70c174 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -26,6 +26,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/if_vlan.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <asm/unaligned.h>
@@ -334,6 +335,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_xmit_stopped(txq)) {
+					if (vlan_tx_tag_present(skb) &&
+					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
+						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+						if (unlikely(!skb))
+							break;
+						skb->vlan_tci = 0;
+					}
+
 					status = ops->ndo_start_xmit(skb, dev);
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
@@ -567,6 +576,12 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 		return 1;
 	}
 
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+		skb = vlan_untag(skb);
+		if (unlikely(!skb))
+			goto out;
+	}
+
 	proto = ntohs(eth_hdr(skb)->h_proto);
 	if (proto != ETH_P_IP)
 		goto out;
-- 
cgit 


From 6bdb7fe31046ac50b47e83c35cd6c6b6160a475d Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:50 +0000
Subject: netpoll: re-enable irq in poll_napi()

napi->poll() needs IRQ enabled, so we have to re-enable IRQ before
calling it.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e4ba3e70c174..346b1eb83a1f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -168,16 +168,24 @@ static void poll_napi(struct net_device *dev)
 	struct napi_struct *napi;
 	int budget = 16;
 
+	WARN_ON_ONCE(!irqs_disabled());
+
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+		local_irq_enable();
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
+			rcu_read_lock_bh();
 			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
 					       napi, budget);
+			rcu_read_unlock_bh();
 			spin_unlock(&napi->poll_lock);
 
-			if (!budget)
+			if (!budget) {
+				local_irq_disable();
 				break;
+			}
 		}
+		local_irq_disable();
 	}
 }
 
-- 
cgit 


From 7bd86cc282a458b66c41e3f6676de6656c99b8db Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 12 Aug 2012 20:09:59 +0000
Subject: ipv4: Cache local output routes

Commit caacf05e5ad1abf causes big drop of UDP loop back performance.
The cause of the regression is that we do not cache the local output
routes. Each time we send a datagram from unconnected UDP socket,
the kernel allocates a dst_entry and adds it to the rt_uncached_list.
It creates lock contention on the rt_uncached_lock.

Reported-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e4ba974f143c..fd9ecb52c66b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2028,7 +2028,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 		}
 		dev_out = net->loopback_dev;
 		fl4->flowi4_oif = dev_out->ifindex;
-		res.fi = NULL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
 	}
-- 
cgit 


From 4855d6f3116e891b66198838b683dce3dcf6e874 Mon Sep 17 00:00:00 2001
From: Igor Maravic <igorm@etf.rs>
Date: Sun, 12 Aug 2012 22:31:58 +0000
Subject: net: ipv6: proc: Fix error handling

Fix error handling in case making of dir dev_snmp6 failes

Signed-off-by: Igor Maravic <igorm@etf.rs>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index da2e92d05c15..745a32042950 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net)
 		goto proc_dev_snmp6_fail;
 	return 0;
 
+proc_dev_snmp6_fail:
+	proc_net_remove(net, "snmp6");
 proc_snmp6_fail:
 	proc_net_remove(net, "sockstat6");
-proc_dev_snmp6_fail:
-	proc_net_remove(net, "dev_snmp6");
 	return -ENOMEM;
 }
 
-- 
cgit 


From 6024935f5ff5f1646bce8404416318e5fd4a0c4a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Aug 2012 02:49:59 +0000
Subject: llc2: Fix silent failure of llc_station_init()

llc_station_init() creates and processes an event skb with no effect
other than to change the state from DOWN to UP.  Allocation failure is
reported, but then ignored by its caller, llc2_init().  Remove this
possibility by simply initialising the state as UP.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_station.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 6828e39ec2ec..45ddbb93c5d0 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb)
 	llc_station_state_process(skb);
 }
 
-int __init llc_station_init(void)
+void __init llc_station_init(void)
 {
-	int rc = -ENOBUFS;
-	struct sk_buff *skb;
-	struct llc_station_state_ev *ev;
-
 	skb_queue_head_init(&llc_main_station.mac_pdu_q);
 	skb_queue_head_init(&llc_main_station.ev_q.list);
 	spin_lock_init(&llc_main_station.ev_q.lock);
@@ -700,20 +696,9 @@ int __init llc_station_init(void)
 			(unsigned long)&llc_main_station);
 	llc_main_station.ack_timer.expires  = jiffies +
 						sysctl_llc_station_ack_timeout;
-	skb = alloc_skb(0, GFP_ATOMIC);
-	if (!skb)
-		goto out;
-	rc = 0;
 	llc_set_station_handler(llc_station_rcv);
-	ev = llc_station_ev(skb);
-	memset(ev, 0, sizeof(*ev));
 	llc_main_station.maximum_retry	= 1;
-	llc_main_station.state		= LLC_STATION_STATE_DOWN;
-	ev->type	= LLC_STATION_EV_TYPE_SIMPLE;
-	ev->prim_type	= LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK;
-	rc = llc_station_next_state(skb);
-out:
-	return rc;
+	llc_main_station.state		= LLC_STATION_STATE_UP;
 }
 
 void __exit llc_station_exit(void)
-- 
cgit 


From f4f8720febf0d785a054fc09bde5e3ad09728a58 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Aug 2012 02:50:43 +0000
Subject: llc2: Call llc_station_exit() on llc2_init() failure path

Otherwise the station packet handler will remain registered even though
the module is unloaded.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c      | 5 +++--
 net/llc/llc_station.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index f6fe4d400502..8c2919ca36f6 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1206,7 +1206,7 @@ static int __init llc2_init(void)
 	rc = llc_proc_init();
 	if (rc != 0) {
 		printk(llc_proc_err_msg);
-		goto out_unregister_llc_proto;
+		goto out_station;
 	}
 	rc = llc_sysctl_init();
 	if (rc) {
@@ -1226,7 +1226,8 @@ out_sysctl:
 	llc_sysctl_exit();
 out_proc:
 	llc_proc_exit();
-out_unregister_llc_proto:
+out_station:
+	llc_station_exit();
 	proto_unregister(&llc_proto);
 	goto out;
 }
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 45ddbb93c5d0..bba5184fafd7 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -701,7 +701,7 @@ void __init llc_station_init(void)
 	llc_main_station.state		= LLC_STATION_STATE_UP;
 }
 
-void __exit llc_station_exit(void)
+void llc_station_exit(void)
 {
 	llc_set_station_handler(NULL);
 }
-- 
cgit 


From aadf31de16a7b2878af00a02e6557df84efa784b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Aug 2012 02:50:55 +0000
Subject: llc: Fix races between llc2 handler use and (un)registration

When registering the handlers, any state they rely on must be
completely initialised first.  When unregistering, we must wait until
they are definitely no longer running.  llc_rcv() must also avoid
reading the handler pointers again after checking for NULL.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_input.c   | 21 +++++++++++++++++----
 net/llc/llc_station.c |  2 +-
 2 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index e32cab44ea95..dd3e83328ad5 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap,
 void llc_add_pack(int type, void (*handler)(struct llc_sap *sap,
 					    struct sk_buff *skb))
 {
+	smp_wmb(); /* ensure initialisation is complete before it's called */
 	if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
 		llc_type_handlers[type - 1] = handler;
 }
@@ -50,11 +51,19 @@ void llc_remove_pack(int type)
 {
 	if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
 		llc_type_handlers[type - 1] = NULL;
+	synchronize_net();
 }
 
 void llc_set_station_handler(void (*handler)(struct sk_buff *skb))
 {
+	/* Ensure initialisation is complete before it's called */
+	if (handler)
+		smp_wmb();
+
 	llc_station_handler = handler;
+
+	if (!handler)
+		synchronize_net();
 }
 
 /**
@@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	int dest;
 	int (*rcv)(struct sk_buff *, struct net_device *,
 		   struct packet_type *, struct net_device *);
+	void (*sta_handler)(struct sk_buff *skb);
+	void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
 
 	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
@@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	rcv = rcu_dereference(sap->rcv_func);
 	dest = llc_pdu_type(skb);
-	if (unlikely(!dest || !llc_type_handlers[dest - 1])) {
+	sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL;
+	if (unlikely(!sap_handler)) {
 		if (rcv)
 			rcv(skb, dev, pt, orig_dev);
 		else
@@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 			if (cskb)
 				rcv(cskb, dev, pt, orig_dev);
 		}
-		llc_type_handlers[dest - 1](sap, skb);
+		sap_handler(sap, skb);
 	}
 	llc_sap_put(sap);
 out:
@@ -202,9 +214,10 @@ drop:
 	kfree_skb(skb);
 	goto out;
 handle_station:
-	if (!llc_station_handler)
+	sta_handler = ACCESS_ONCE(llc_station_handler);
+	if (!sta_handler)
 		goto drop;
-	llc_station_handler(skb);
+	sta_handler(skb);
 	goto out;
 }
 
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index bba5184fafd7..b2f2bac2c2a2 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -696,9 +696,9 @@ void __init llc_station_init(void)
 			(unsigned long)&llc_main_station);
 	llc_main_station.ack_timer.expires  = jiffies +
 						sysctl_llc_station_ack_timeout;
-	llc_set_station_handler(llc_station_rcv);
 	llc_main_station.maximum_retry	= 1;
 	llc_main_station.state		= LLC_STATION_STATE_UP;
+	llc_set_station_handler(llc_station_rcv);
 }
 
 void llc_station_exit(void)
-- 
cgit 


From 4acd4945cd1e1f92b20d14e349c6c6a52acbd42d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 14 Aug 2012 08:54:51 +0000
Subject: ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side
 lock

Cong Wang reports that lockdep detected suspicious RCU usage while
enabling IPV6 forwarding:

 [ 1123.310275] ===============================
 [ 1123.442202] [ INFO: suspicious RCU usage. ]
 [ 1123.558207] 3.6.0-rc1+ #109 Not tainted
 [ 1123.665204] -------------------------------
 [ 1123.768254] include/linux/rcupdate.h:430 Illegal context switch in RCU read-side critical section!
 [ 1123.992320]
 [ 1123.992320] other info that might help us debug this:
 [ 1123.992320]
 [ 1124.307382]
 [ 1124.307382] rcu_scheduler_active = 1, debug_locks = 0
 [ 1124.522220] 2 locks held by sysctl/5710:
 [ 1124.648364]  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff81768498>] rtnl_trylock+0x15/0x17
 [ 1124.882211]  #1:  (rcu_read_lock){.+.+.+}, at: [<ffffffff81871df8>] rcu_lock_acquire+0x0/0x29
 [ 1125.085209]
 [ 1125.085209] stack backtrace:
 [ 1125.332213] Pid: 5710, comm: sysctl Not tainted 3.6.0-rc1+ #109
 [ 1125.441291] Call Trace:
 [ 1125.545281]  [<ffffffff8109d915>] lockdep_rcu_suspicious+0x109/0x112
 [ 1125.667212]  [<ffffffff8107c240>] rcu_preempt_sleep_check+0x45/0x47
 [ 1125.781838]  [<ffffffff8107c260>] __might_sleep+0x1e/0x19b
[...]
 [ 1127.445223]  [<ffffffff81757ac5>] call_netdevice_notifiers+0x4a/0x4f
[...]
 [ 1127.772188]  [<ffffffff8175e125>] dev_disable_lro+0x32/0x6b
 [ 1127.885174]  [<ffffffff81872d26>] dev_forward_change+0x30/0xcb
 [ 1128.013214]  [<ffffffff818738c4>] addrconf_forward_change+0x85/0xc5
[...]

addrconf_forward_change() uses RCU iteration over the netdev list,
which is unnecessary since it already holds the RTNL lock.  We also
cannot reasonably require netdevice notifier functions not to sleep.

Reported-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 79181819a24f..6bc85f7c31e3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
+	for_each_netdev(net, dev) {
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 				dev_forward_change(idev);
 		}
 	}
-	rcu_read_unlock();
 }
 
 static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
-- 
cgit 


From c03307eab68d583ea6db917681afa14ed1fb3b84 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 14 Aug 2012 08:19:33 -0700
Subject: bridge: fix rcu dereference outside of rcu_read_lock

Alternative solution for problem found by Linux Driver Verification
project (linuxtesting.org).

As it noted in the comment before the br_handle_frame_finish
function, this function should be called under rcu_read_lock.

The problem callgraph:
br_dev_xmit -> br_nf_pre_routing_finish_bridge_slow ->
 -> br_handle_frame_finish -> br_port_get_rcu -> rcu_dereference

And in this case there is no read-lock section.

Reported-by: Denis Efremov <yefremov.denis@gmail.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 32211fa5b506..070e8a68cfc6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+	rcu_read_lock();
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
 		br_nf_pre_routing_finish_bridge_slow(skb);
+		rcu_read_unlock();
 		return NETDEV_TX_OK;
 	}
 #endif
@@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
-	rcu_read_lock();
 	if (is_broadcast_ether_addr(dest))
 		br_flood_deliver(br, skb);
 	else if (is_multicast_ether_addr(dest)) {
-- 
cgit 


From e862f1a9b7df4e8196ebec45ac62295138aa3fc2 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:44 +0000
Subject: atm: fix info leak in getsockopt(SO_ATMPVC)

The ATM code fails to initialize the two padding bytes of struct
sockaddr_atmpvc inserted for alignment. Add an explicit memset(0)
before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index b4b44dbed645..0c0ad930a632 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
 
 		if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
 			return -ENOTCONN;
+		memset(&pvc, 0, sizeof(pvc));
 		pvc.sap_family = AF_ATMPVC;
 		pvc.sap_addr.itf = vcc->dev->number;
 		pvc.sap_addr.vpi = vcc->vpi;
-- 
cgit 


From 3c0c5cfdcd4d69ffc4b9c0907cec99039f30a50a Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:45 +0000
Subject: atm: fix info leak via getsockname()

The ATM code fails to initialize the two padding bytes of struct
sockaddr_atmpvc inserted for alignment. Add an explicit memset(0)
before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/pvc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 3a734919c36c..ae0324021407 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
 		return -ENOTCONN;
 	*sockaddr_len = sizeof(struct sockaddr_atmpvc);
 	addr = (struct sockaddr_atmpvc *)sockaddr;
+	memset(addr, 0, sizeof(*addr));
 	addr->sap_family = AF_ATMPVC;
 	addr->sap_addr.itf = vcc->dev->number;
 	addr->sap_addr.vpi = vcc->vpi;
-- 
cgit 


From e15ca9a0ef9a86f0477530b0f44a725d67f889ee Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:46 +0000
Subject: Bluetooth: HCI - Fix info leak in getsockopt(HCI_FILTER)

The HCI code fails to initialize the two padding bytes of struct
hci_ufilter before copying it to userland -- that for leaking two
bytes kernel stack. Add an explicit memset(0) before filling the
structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index a7f04de03d79..a27bbc3cd4b7 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1009,6 +1009,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
 		{
 			struct hci_filter *f = &hci_pi(sk)->filter;
 
+			memset(&uf, 0, sizeof(uf));
 			uf.type_mask = f->type_mask;
 			uf.opcode    = f->opcode;
 			uf.event_mask[0] = *((u32 *) f->event_mask + 0);
-- 
cgit 


From 3f68ba07b1da811bf383b4b701b129bfcb2e4988 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:47 +0000
Subject: Bluetooth: HCI - Fix info leak via getsockname()

The HCI code fails to initialize the hci_channel member of struct
sockaddr_hci and that for leaks two bytes kernel stack via the
getsockname() syscall. Initialize hci_channel with 0 to avoid the
info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index a27bbc3cd4b7..19fdac78e555 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
 	*addr_len = sizeof(*haddr);
 	haddr->hci_family = AF_BLUETOOTH;
 	haddr->hci_dev    = hdev->id;
+	haddr->hci_channel= 0;
 
 	release_sock(sk);
 	return 0;
-- 
cgit 


From 9ad2de43f1aee7e7274a4e0d41465489299e344b Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:48 +0000
Subject: Bluetooth: RFCOMM - Fix info leak in getsockopt(BT_SECURITY)

The RFCOMM code fails to initialize the key_size member of struct
bt_security before copying it to userland -- that for leaking one
byte kernel stack. Initialize key_size with 0 to avoid the info
leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7e1e59645c05..64f55ca61472 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -822,6 +822,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
 		}
 
 		sec.level = rfcomm_pi(sk)->sec_level;
+		sec.key_size = 0;
 
 		len = min_t(unsigned int, len, sizeof(sec));
 		if (copy_to_user(optval, (char *) &sec, len))
-- 
cgit 


From f9432c5ec8b1e9a09b9b0e5569e3c73db8de432a Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:49 +0000
Subject: Bluetooth: RFCOMM - Fix info leak in ioctl(RFCOMMGETDEVLIST)

The RFCOMM code fails to initialize the two padding bytes of struct
rfcomm_dev_list_req inserted for alignment before copying it to
userland. Additionally there are two padding bytes in each instance of
struct rfcomm_dev_info. The ioctl() that for disclosures two bytes plus
dev_num times two bytes uninitialized kernel heap memory.

Allocate the memory using kzalloc() to fix this issue.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/tty.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cb960773c002..56f182393c4c 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg)
 
 	size = sizeof(*dl) + dev_num * sizeof(*di);
 
-	dl = kmalloc(size, GFP_KERNEL);
+	dl = kzalloc(size, GFP_KERNEL);
 	if (!dl)
 		return -ENOMEM;
 
-- 
cgit 


From 9344a972961d1a6d2c04d9008b13617bcb6ec2ef Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:50 +0000
Subject: Bluetooth: RFCOMM - Fix info leak via getsockname()

The RFCOMM code fails to initialize the trailing padding byte of struct
sockaddr_rc added for alignment. It that for leaks one byte kernel stack
via the getsockname() syscall. Add an explicit memset(0) before filling
the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 64f55ca61472..1a17850d093c 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
+	memset(sa, 0, sizeof(*sa));
 	sa->rc_family  = AF_BLUETOOTH;
 	sa->rc_channel = rfcomm_pi(sk)->channel;
 	if (peer)
-- 
cgit 


From 792039c73cf176c8e39a6e8beef2c94ff46522ed Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:51 +0000
Subject: Bluetooth: L2CAP - Fix info leak via getsockname()

The L2CAP code fails to initialize the l2_bdaddr_type member of struct
sockaddr_l2 and the padding byte added for alignment. It that for leaks
two bytes kernel stack via the getsockname() syscall. Add an explicit
memset(0) before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index b94abd30e6f9..1497edd191a2 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -245,6 +245,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
+	memset(la, 0, sizeof(struct sockaddr_l2));
 	addr->sa_family = AF_BLUETOOTH;
 	*len = sizeof(struct sockaddr_l2);
 
-- 
cgit 


From 04d4fbca1017c11381e7d82acea21dd741e748bc Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:52 +0000
Subject: l2tp: fix info leak via getsockname()

The L2TP code for IPv6 fails to initialize the l2tp_unused member of
struct sockaddr_l2tpip6 and that for leaks two bytes kernel stack via
the getsockname() syscall. Initialize l2tp_unused with 0 to avoid the
info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 35e1e4bde587..927547171bc7 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 	lsa->l2tp_family = AF_INET6;
 	lsa->l2tp_flowinfo = 0;
 	lsa->l2tp_scope_id = 0;
+	lsa->l2tp_unused = 0;
 	if (peer) {
 		if (!lsk->peer_conn_id)
 			return -ENOTCONN;
-- 
cgit 


From 3592aaeb80290bda0f2cf0b5456c97bfc638b192 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:53 +0000
Subject: llc: fix info leak via getsockname()

The LLC code wrongly returns 0, i.e. "success", when the socket is
zapped. Together with the uninitialized uaddrlen pointer argument from
sys_getsockname this leads to an arbitrary memory leak of up to 128
bytes kernel stack via the getsockname() syscall.

Return an error instead when the socket is zapped to prevent the info
leak. Also remove the unnecessary memset(0). We don't directly write to
the memory pointed by uaddr but memcpy() a local structure at the end of
the function that is properly initialized.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8c2919ca36f6..c2190005a114 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct sockaddr_llc sllc;
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
-	int rc = 0;
+	int rc = -EBADF;
 
 	memset(&sllc, 0, sizeof(sllc));
 	lock_sock(sk);
 	if (sock_flag(sk, SOCK_ZAPPED))
 		goto out;
 	*uaddrlen = sizeof(sllc);
-	memset(uaddr, 0, *uaddrlen);
 	if (peer) {
 		rc = -ENOTCONN;
 		if (sk->sk_state != TCP_ESTABLISHED)
-- 
cgit 


From 276bdb82dedb290511467a5a4fdbe9f0b52dce6f Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:54 +0000
Subject: dccp: check ccid before dereferencing

ccid_hc_rx_getsockopt() and ccid_hc_tx_getsockopt() might be called with
a NULL ccid pointer leading to a NULL pointer dereference. This could
lead to a privilege escalation if the attacker is able to map page 0 and
prepare it with a fake ccid_ops pointer.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 75c3582a7678..fb85d371a8de 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
 					u32 __user *optval, int __user *optlen)
 {
 	int rc = -ENOPROTOOPT;
-	if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+	if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
 		rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
 						 optval, optlen);
 	return rc;
@@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
 					u32 __user *optval, int __user *optlen)
 {
 	int rc = -ENOPROTOOPT;
-	if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+	if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
 		rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
 						 optval, optlen);
 	return rc;
-- 
cgit 


From 7b07f8eb75aa3097cdfd4f6eac3da49db787381d Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:55 +0000
Subject: dccp: fix info leak via getsockopt(DCCP_SOCKOPT_CCID_TX_INFO)

The CCID3 code fails to initialize the trailing padding bytes of struct
tfrc_tx_info added for alignment on 64 bit architectures. It that for
potentially leaks four bytes kernel stack via the getsockopt() syscall.
Add an explicit memset(0) before filling the structure to avoid the
info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d65e98798eca..119c04317d48 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 	case DCCP_SOCKOPT_CCID_TX_INFO:
 		if (len < sizeof(tfrc))
 			return -EINVAL;
+		memset(&tfrc, 0, sizeof(tfrc));
 		tfrc.tfrctx_x	   = hc->tx_x;
 		tfrc.tfrctx_x_recv = hc->tx_x_recv;
 		tfrc.tfrctx_x_calc = hc->tx_x_calc;
-- 
cgit 


From 2d8a041b7bfe1097af21441cb77d6af95f4f4680 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:56 +0000
Subject: ipvs: fix info leak in getsockopt(IP_VS_SO_GET_TIMEOUT)

If at least one of CONFIG_IP_VS_PROTO_TCP or CONFIG_IP_VS_PROTO_UDP is
not set, __ip_vs_get_timeouts() does not fully initialize the structure
that gets copied to userland and that for leaks up to 12 bytes of kernel
stack. Add an explicit memset(0) before passing the structure to
__ip_vs_get_timeouts() to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Wensong Zhang <wensong@linux-vs.org>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 84444dda194b..72bf32a84874 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2759,6 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_timeout_user t;
 
+		memset(&t, 0, sizeof(t));
 		__ip_vs_get_timeouts(net, &t);
 		if (copy_to_user(user, &t, sizeof(t)) != 0)
 			ret = -EFAULT;
-- 
cgit 


From 43da5f2e0d0c69ded3d51907d9552310a6b545e8 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:57 +0000
Subject: net: fix info leak in compat dev_ifconf()

The implementation of dev_ifconf() for the compat ioctl interface uses
an intermediate ifc structure allocated in userland for the duration of
the syscall. Though, it fails to initialize the padding bytes inserted
for alignment and that for leaks four bytes of kernel stack. Add an
explicit memset(0) before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index dfe5b66c97e0..a5471f804d99 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2657,6 +2657,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 	if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
 		return -EFAULT;
 
+	memset(&ifc, 0, sizeof(ifc));
 	if (ifc32.ifcbuf == 0) {
 		ifc32.ifc_len = 0;
 		ifc.ifc_len = 0;
-- 
cgit 


From 2614f86490122bf51eb7c12ec73927f1900f4e7d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 16 Aug 2012 02:25:24 +0200
Subject: netfilter: nf_ct_expect: fix possible access to uninitialized timer

In __nf_ct_expect_check, the function refresh_timer returns 1
if a matching expectation is found and its timer is successfully
refreshed. This results in nf_ct_expect_related returning 0.
Note that at this point:

- the passed expectation is not inserted in the expectation table
  and its timer was not initialized, since we have refreshed one
  matching/existing expectation.

- nf_ct_expect_alloc uses kmem_cache_alloc, so the expectation
  timer is in some undefined state just after the allocation,
  until it is appropriately initialized.

This can be a problem for the SIP helper during the expectation
addition:

 ...
 if (nf_ct_expect_related(rtp_exp) == 0) {
         if (nf_ct_expect_related(rtcp_exp) != 0)
                 nf_ct_unexpect_related(rtp_exp);
 ...

Note that nf_ct_expect_related(rtp_exp) may return 0 for the timer refresh
case that is detailed above. Then, if nf_ct_unexpect_related(rtcp_exp)
returns != 0, nf_ct_unexpect_related(rtp_exp) is called, which does:

 spin_lock_bh(&nf_conntrack_lock);
 if (del_timer(&exp->timeout)) {
         nf_ct_unlink_expect(exp);
         nf_ct_expect_put(exp);
 }
 spin_unlock_bh(&nf_conntrack_lock);

Note that del_timer always returns false if the timer has been
initialized.  However, the timer was not initialized since setup_timer
was not called, therefore, the expectation timer remains in some
undefined state. If I'm not missing anything, this may lead to the
removal an unexistent expectation.

To fix this, the optimization that allows refreshing an expectation
is removed. Now nf_conntrack_expect_related looks more consistent
to me since it always add the expectation in case that it returns
success.

Thanks to Patrick McHardy for participating in the discussion of
this patch.

I think this may be the source of the problem described by:
http://marc.info/?l=netfilter-devel&m=134073514719421&w=2

Reported-by: Rafal Fitt <rafalf@aplusc.com.pl>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_expect.c | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 45cf602a76bc..527651a53a45 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master,
 	}
 }
 
-static inline int refresh_timer(struct nf_conntrack_expect *i)
-{
-	struct nf_conn_help *master_help = nfct_help(i->master);
-	const struct nf_conntrack_expect_policy *p;
-
-	if (!del_timer(&i->timeout))
-		return 0;
-
-	p = &rcu_dereference_protected(
-		master_help->helper,
-		lockdep_is_held(&nf_conntrack_lock)
-		)->expect_policy[i->class];
-	i->timeout.expires = jiffies + p->timeout * HZ;
-	add_timer(&i->timeout);
-	return 1;
-}
-
 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 {
 	const struct nf_conntrack_expect_policy *p;
@@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	struct nf_conn_help *master_help = nfct_help(master);
 	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(expect);
-	struct hlist_node *n;
+	struct hlist_node *n, *next;
 	unsigned int h;
 	int ret = 1;
 
@@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 		goto out;
 	}
 	h = nf_ct_expect_dst_hash(&expect->tuple);
-	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
-			/* Refresh timer: if it's dying, ignore.. */
-			if (refresh_timer(i)) {
-				ret = 0;
-				goto out;
+			if (del_timer(&i->timeout)) {
+				nf_ct_unlink_expect(i);
+				nf_ct_expect_put(i);
+				break;
 			}
 		} else if (expect_clash(i, expect)) {
 			ret = -EBUSY;
-- 
cgit 


From 16c0b164bd24d44db137693a36b428ba28970c62 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 15 Aug 2012 20:44:27 +0000
Subject: act_mirred: do not drop packets when fails to mirror it

We drop packet unconditionally when we fail to mirror it. This is not intended
in some cases. Consdier for kvm guest, we may mirror the traffic of the bridge
to a tap device used by a VM. When kernel fails to mirror the packet in
conditions such as when qemu crashes or stop polling the tap, it's hard for the
management software to detect such condition and clean the the mirroring
before. This would lead all packets to the bridge to be dropped and break the
netowrk of other virtual machines.

To solve the issue, the patch does not drop packets when kernel fails to mirror
it, and only drop the redirected packets.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fe81cc18e9e0..9c0fd0c78814 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 out:
 	if (err) {
 		m->tcf_qstats.overlimits++;
-		/* should we be asking for packet to be dropped?
-		 * may make sense for redirect case only
-		 */
-		retval = TC_ACT_SHOT;
-	} else {
+		if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+			retval = TC_ACT_SHOT;
+		else
+			retval = m->tcf_action;
+	} else
 		retval = m->tcf_action;
-	}
 	spin_unlock(&m->tcf_lock);
 
 	return retval;
-- 
cgit 


From f796c20cf67aa54c9130d0dc41307c0025719b85 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 14 Aug 2012 12:34:24 +0000
Subject: net: netprio: fix files lock and remove useless d_path bits

Add lock to prevent a race with a file closing and also remove
useless and ugly sscanf code. The extra code was never needed
and the case it supposedly protected against is in fact handled
correctly by sock_from_file as pointed out by Al Viro.

CC: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index ed0c0431fcd8..f65dba3afd99 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -277,12 +277,6 @@ out_free_devname:
 void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 {
 	struct task_struct *p;
-	char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
-
-	if (!tmp) {
-		pr_warn("Unable to attach cgrp due to alloc failure!\n");
-		return;
-	}
 
 	cgroup_taskset_for_each(p, cgrp, tset) {
 		unsigned int fd;
@@ -296,32 +290,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 			continue;
 		}
 
-		rcu_read_lock();
+		spin_lock(&files->file_lock);
 		fdt = files_fdtable(files);
 		for (fd = 0; fd < fdt->max_fds; fd++) {
-			char *path;
 			struct file *file;
 			struct socket *sock;
-			unsigned long s;
-			int rv, err = 0;
+			int err;
 
 			file = fcheck_files(files, fd);
 			if (!file)
 				continue;
 
-			path = d_path(&file->f_path, tmp, PAGE_SIZE);
-			rv = sscanf(path, "socket:[%lu]", &s);
-			if (rv <= 0)
-				continue;
-
 			sock = sock_from_file(file, &err);
-			if (!err)
+			if (sock)
 				sock_update_netprioidx(sock->sk, p);
 		}
-		rcu_read_unlock();
+		spin_unlock(&files->file_lock);
 		task_unlock(p);
 	}
-	kfree(tmp);
 }
 
 static struct cftype ss_files[] = {
-- 
cgit 


From 48a87cc26c13b68f6cce4e9d769fcb17a6b3e4b8 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 14 Aug 2012 12:34:30 +0000
Subject: net: netprio: fd passed in SCM_RIGHTS datagram not set correctly

A socket fd passed in a SCM_RIGHTS datagram was not getting
updated with the new tasks cgrp prioidx. This leaves IO on
the socket tagged with the old tasks priority.

To fix this add a check in the scm recvmsg path to update the
sock cgrp prioidx with the new tasks value.

Thanks to Al Viro for catching this.

CC: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/scm.c b/net/core/scm.c
index 8f6ccfd68ef4..040cebeed45b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -265,6 +265,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 	for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
 	     i++, cmfptr++)
 	{
+		struct socket *sock;
 		int new_fd;
 		err = security_file_receive(fp[i]);
 		if (err)
@@ -281,6 +282,9 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		}
 		/* Bump the usage count and install the file. */
 		get_file(fp[i]);
+		sock = sock_from_file(fp[i], &err);
+		if (sock)
+			sock_update_netprioidx(sock->sk, current);
 		fd_install(new_fd, fp[i]);
 	}
 
-- 
cgit 


From 476ad154f3b41dd7d9a08a2f641e28388abc2fd1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 14 Aug 2012 12:34:35 +0000
Subject: net: netprio: fix cgrp create and write priomap race

A race exists where creating cgroups and also updating the priomap
may result in losing a priomap update. This is because priomap
writers are not protected by rtnl_lock.

Move priority writer into rtnl_lock()/rtnl_unlock().

CC: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index f65dba3afd99..c75e3f9d060f 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -101,12 +101,10 @@ static int write_update_netdev_table(struct net_device *dev)
 	u32 max_len;
 	struct netprio_map *map;
 
-	rtnl_lock();
 	max_len = atomic_read(&max_prioidx) + 1;
 	map = rtnl_dereference(dev->priomap);
 	if (!map || map->priomap_len < max_len)
 		ret = extend_netdev_table(dev, max_len);
-	rtnl_unlock();
 
 	return ret;
 }
@@ -256,17 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
 	if (!dev)
 		goto out_free_devname;
 
+	rtnl_lock();
 	ret = write_update_netdev_table(dev);
 	if (ret < 0)
 		goto out_put_dev;
 
-	rcu_read_lock();
-	map = rcu_dereference(dev->priomap);
+	map = rtnl_dereference(dev->priomap);
 	if (map)
 		map->priomap[prioidx] = priority;
-	rcu_read_unlock();
 
 out_put_dev:
+	rtnl_unlock();
 	dev_put(dev);
 
 out_free_devname:
-- 
cgit 


From c0de08d04215031d68fa13af36f347a6cfa252ca Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Thu, 16 Aug 2012 22:02:58 +0000
Subject: af_packet: don't emit packet on orig fanout group

If a packet is emitted on one socket in one group of fanout sockets,
it is transmitted again. It is thus read again on one of the sockets
of the fanout group. This result in a loop for software which
generate packets when receiving one.
This retransmission is not the intended behavior: a fanout group
must behave like a single socket. The packet should not be
transmitted on a socket if it originates from a socket belonging
to the same fanout group.

This patch fixes the issue by changing the transmission check to
take fanout group info account.

Reported-by: Aleksandr Kotov <a1k@mail.ru>
Signed-off-by: Eric Leblond <eric@regit.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c         | 16 ++++++++++++++--
 net/packet/af_packet.c |  9 +++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a39354ee1432..debd9372472f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1642,6 +1642,19 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+	if (ptype->af_packet_priv == NULL)
+		return false;
+
+	if (ptype->id_match)
+		return ptype->id_match(ptype, skb->sk);
+	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+		return true;
+
+	return false;
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1659,8 +1672,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
 		if ((ptype->dev == dev || !ptype->dev) &&
-		    (ptype->af_packet_priv == NULL ||
-		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+		    (!skb_loop_sk(ptype, skb))) {
 			if (pt_prev) {
 				deliver_skb(skb2, pt_prev, skb->dev);
 				pt_prev = ptype;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8ac890a1a4c0..aee7196aac36 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1273,6 +1273,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 	spin_unlock(&f->lock);
 }
 
+bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+{
+	if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
+		return true;
+
+	return false;
+}
+
 static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 {
 	struct packet_sock *po = pkt_sk(sk);
@@ -1325,6 +1333,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->prot_hook.dev = po->prot_hook.dev;
 		match->prot_hook.func = packet_rcv_fanout;
 		match->prot_hook.af_packet_priv = match;
+		match->prot_hook.id_match = match_fanout_group;
 		dev_add_pack(&match->prot_hook);
 		list_add(&match->list, &fanout_list);
 	}
-- 
cgit 


From d92c7f8aabae913de16eb855b19cd2002c341896 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Fri, 17 Aug 2012 10:33:12 +0000
Subject: caif: Do not dereference NULL in chnl_recv_cb()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In net/caif/chnl_net.c::chnl_recv_cb() we call skb_header_pointer()
which may return NULL, but we do not check for a NULL pointer before
dereferencing it.
This patch adds such a NULL check and properly free's allocated memory
and return an error (-EINVAL) on failure - much better than crashing..

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Acked-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 69771c04ba8f..e597733affb8 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 
 	/* check the version of IP */
 	ip_version = skb_header_pointer(skb, 0, 1, &buf);
+	if (!ip_version) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
 
 	switch (*ip_version >> 4) {
 	case 4:
-- 
cgit 


From 9d7b0fc1ef1f17aff57c0dc9a59453d8fca255c3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 20 Aug 2012 02:56:56 -0700
Subject: net: ipv6: fix oops in inet_putpeer()

Commit 97bab73f (inet: Hide route peer accesses behind helpers.) introduced
a bug in xfrm6_policy_destroy(). The xfrm_dst's _rt6i_peer member is not
initialized, causing a false positive result from inetpeer_ptr_is_peer(),
which in turn causes a NULL pointer dereference in inet_putpeer().

Pid: 314, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #17 To Be Filled By O.E.M. To Be Filled By O.E.M./P4S800D-X
EIP: 0060:[<c03abf93>] EFLAGS: 00010246 CPU: 0
EIP is at inet_putpeer+0xe/0x16
EAX: 00000000 EBX: f3481700 ECX: 00000000 EDX: 000dd641
ESI: f3481700 EDI: c05e949c EBP: f551def4 ESP: f551def4
 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068
CR0: 8005003b CR2: 00000070 CR3: 3243d000 CR4: 00000750
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
 f551df04 c0423de1 00000000 f3481700 f551df18 c038d5f7 f254b9f8 f551df28
 f34f85d8 f551df20 c03ef48d f551df3c c0396870 f30697e8 f24e1738 c05e98f4
 f5509540 c05cd2b4 f551df7c c0142d2b c043feb5 f5509540 00000000 c05cd2e8
 [<c0423de1>] xfrm6_dst_destroy+0x42/0xdb
 [<c038d5f7>] dst_destroy+0x1d/0xa4
 [<c03ef48d>] xfrm_bundle_flo_delete+0x2b/0x36
 [<c0396870>] flow_cache_gc_task+0x85/0x9f
 [<c0142d2b>] process_one_work+0x122/0x441
 [<c043feb5>] ? apic_timer_interrupt+0x31/0x38
 [<c03967eb>] ? flow_cache_new_hashrnd+0x2b/0x2b
 [<c0143e2d>] worker_thread+0x113/0x3cc

Fix by adding a init_dst() callback to struct xfrm_policy_afinfo to
properly initialize the dst's peer pointer.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 8 ++++++++
 net/xfrm/xfrm_policy.c  | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef39812107b1..f8c4c08ffb60 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl)
 	return 0;
 }
 
+static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
+{
+	struct rt6_info *rt = (struct rt6_info *)xdst;
+
+	rt6_init_peer(rt, net->ipv6.peers);
+}
+
 static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 			   int nfheader_len)
 {
@@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.get_saddr = 		xfrm6_get_saddr,
 	.decode_session =	_decode_session6,
 	.get_tos =		xfrm6_get_tos,
+	.init_dst =		xfrm6_init_dst,
 	.init_path =		xfrm6_init_path,
 	.fill_dst =		xfrm6_fill_dst,
 	.blackhole_route =	ip6_blackhole_route,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c5a5165a5927..5a2aa17e4d3c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1357,6 +1357,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
 		xdst->flo.ops = &xfrm_bundle_fc_ops;
+		if (afinfo->init_dst)
+			afinfo->init_dst(net, xdst);
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
-- 
cgit 


From 3de7a37b02f607716753dc669c1dca4f71db08fc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 18 Aug 2012 14:36:44 +0000
Subject: net/core/dev.c: fix kernel-doc warning

Fix kernel-doc warning:

Warning(net/core/dev.c:5745): No description found for parameter 'dev'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc:	"David S. Miller" <davem@davemloft.net>
Cc:	netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index debd9372472f..83988362805e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5744,6 +5744,7 @@ EXPORT_SYMBOL(netdev_refcnt_read);
 
 /**
  * netdev_wait_allrefs - wait until all references are gone.
+ * @dev: target net_device
  *
  * This is called when unregistering network devices.
  *
-- 
cgit 


From fae6ef87faeb8853896920c68ee703d715799d28 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sun, 19 Aug 2012 03:30:38 +0000
Subject: net: tcp: move sk_rx_dst_set call after tcp_create_openreq_child()

This commit removes the sk_rx_dst_set calls from
tcp_create_openreq_child(), because at that point the icsk_af_ops
field of ipv6_mapped TCP sockets has not been set to its proper final
value.

Instead, to make sure we get the right sk_rx_dst_set variant
appropriate for the address family of the new connection, we have
tcp_v{4,6}_syn_recv_sock() directly call the appropriate function
shortly after the call to tcp_create_openreq_child() returns.

This also moves inet6_sk_rx_dst_set() to avoid a forward declaration
with the new approach.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Reported-by: Artem Savkov <artem.savkov@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c      |  1 +
 net/ipv4/tcp_minisocks.c |  2 --
 net/ipv6/tcp_ipv6.c      | 25 +++++++++++++------------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 767823764016..5bf2040b25b1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1462,6 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto exit_nonewsk;
 
 	newsk->sk_gso_type = SKB_GSO_TCPV4;
+	inet_sk_rx_dst_set(newsk, skb);
 
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d9c9dcef2de3..6ff7f10dce9d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		struct tcp_sock *oldtp = tcp_sk(sk);
 		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
 
-		newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb);
-
 		/* TCP Cookie Transactions require space for the cookie pair,
 		 * as it differs for each connection.  There is no need to
 		 * copy any s_data_payload stored at the original socket.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bb9ce2b2f377..a3e60cc04a8a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
 }
 #endif
 
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+	dst_hold(dst);
+	sk->sk_rx_dst = dst;
+	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+	if (rt->rt6i_node)
+		inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+}
+
 static void tcp_v6_hash(struct sock *sk)
 {
 	if (sk->sk_state != TCP_CLOSE) {
@@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newsk->sk_gso_type = SKB_GSO_TCPV6;
 	__ip6_dst_store(newsk, dst, NULL, NULL);
+	inet6_sk_rx_dst_set(newsk, skb);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1729,18 +1742,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 	.twsk_destructor= tcp_twsk_destructor,
 };
 
-static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-	const struct rt6_info *rt = (const struct rt6_info *)dst;
-
-	dst_hold(dst);
-	sk->sk_rx_dst = dst;
-	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
-	if (rt->rt6i_node)
-		inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
-}
-
 static const struct inet_connection_sock_af_ops ipv6_specific = {
 	.queue_xmit	   = inet6_csk_xmit,
 	.send_check	   = tcp_v6_send_check,
-- 
cgit 


From d1c338a509cea5378df59629ad47382810c38623 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Sun, 19 Aug 2012 12:29:16 -0700
Subject: libceph: delay debugfs initialization until we learn global_id

The debugfs directory includes the cluster fsid and our unique global_id.
We need to delay the initialization of the debug entry until we have
learned both the fsid and our global_id from the monitor or else the
second client can't create its debugfs entry and will fail (and multiple
client instances aren't properly reflected in debugfs).

Reported by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
---
 net/ceph/ceph_common.c |  1 -
 net/ceph/debugfs.c     |  4 ++++
 net/ceph/mon_client.c  | 51 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 69e38db28e5f..a8020293f342 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -84,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
 			return -1;
 		}
 	} else {
-		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
 		memcpy(&client->fsid, fsid, sizeof(*fsid));
 	}
 	return 0;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 54b531a01121..38b5dc1823d4 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
 		 client->monc.auth->global_id);
 
+	dout("ceph_debugfs_client_init %p %s\n", client, name);
+
+	BUG_ON(client->debugfs_dir);
 	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
 	if (!client->debugfs_dir)
 		goto out;
@@ -234,6 +237,7 @@ out:
 
 void ceph_debugfs_client_cleanup(struct ceph_client *client)
 {
+	dout("ceph_debugfs_client_cleanup %p\n", client);
 	debugfs_remove(client->debugfs_osdmap);
 	debugfs_remove(client->debugfs_monmap);
 	debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 105d533b55f3..900ea0f043fc 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -310,6 +310,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
 }
 EXPORT_SYMBOL(ceph_monc_open_session);
 
+/*
+ * We require the fsid and global_id in order to initialize our
+ * debugfs dir.
+ */
+static bool have_debugfs_info(struct ceph_mon_client *monc)
+{
+	dout("have_debugfs_info fsid %d globalid %lld\n",
+	     (int)monc->client->have_fsid, monc->auth->global_id);
+	return monc->client->have_fsid && monc->auth->global_id > 0;
+}
+
 /*
  * The monitor responds with mount ack indicate mount success.  The
  * included client ticket allows the client to talk to MDSs and OSDs.
@@ -320,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
 	struct ceph_client *client = monc->client;
 	struct ceph_monmap *monmap = NULL, *old = monc->monmap;
 	void *p, *end;
+	int had_debugfs_info, init_debugfs = 0;
 
 	mutex_lock(&monc->mutex);
 
+	had_debugfs_info = have_debugfs_info(monc);
+
 	dout("handle_monmap\n");
 	p = msg->front.iov_base;
 	end = p + msg->front.iov_len;
@@ -344,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
 
 	if (!client->have_fsid) {
 		client->have_fsid = true;
+		if (!had_debugfs_info && have_debugfs_info(monc)) {
+			pr_info("client%lld fsid %pU\n",
+				ceph_client_id(monc->client),
+				&monc->client->fsid);
+			init_debugfs = 1;
+		}
 		mutex_unlock(&monc->mutex);
-		/*
-		 * do debugfs initialization without mutex to avoid
-		 * creating a locking dependency
-		 */
-		ceph_debugfs_client_init(client);
+
+		if (init_debugfs) {
+			/*
+			 * do debugfs initialization without mutex to avoid
+			 * creating a locking dependency
+			 */
+			ceph_debugfs_client_init(monc->client);
+		}
+
 		goto out_unlocked;
 	}
 out:
@@ -865,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 {
 	int ret;
 	int was_auth = 0;
+	int had_debugfs_info, init_debugfs = 0;
 
 	mutex_lock(&monc->mutex);
+	had_debugfs_info = have_debugfs_info(monc);
 	if (monc->auth->ops)
 		was_auth = monc->auth->ops->is_authenticated(monc->auth);
 	monc->pending_auth = 0;
@@ -889,7 +915,22 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 		__send_subscribe(monc);
 		__resend_generic_request(monc);
 	}
+
+	if (!had_debugfs_info && have_debugfs_info(monc)) {
+		pr_info("client%lld fsid %pU\n",
+			ceph_client_id(monc->client),
+			&monc->client->fsid);
+		init_debugfs = 1;
+	}
 	mutex_unlock(&monc->mutex);
+
+	if (init_debugfs) {
+		/*
+		 * do debugfs initialization without mutex to avoid
+		 * creating a locking dependency
+		 */
+		ceph_debugfs_client_init(monc->client);
+	}
 }
 
 static int __validate_auth(struct ceph_mon_client *monc)
-- 
cgit 


From 144d56e91044181ec0ef67aeca91e9a8b5718348 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 20 Aug 2012 00:22:46 +0000
Subject: tcp: fix possible socket refcount problem

Commit 6f458dfb40 (tcp: improve latencies of timer triggered events)
added bug leading to following trace :

[ 2866.131281] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000
[ 2866.131726]
[ 2866.132188] =========================
[ 2866.132281] [ BUG: held lock freed! ]
[ 2866.132281] 3.6.0-rc1+ #622 Not tainted
[ 2866.132281] -------------------------
[ 2866.132281] kworker/0:1/652 is freeing memory ffff880019ec0000-ffff880019ec0a1f, with a lock still held there!
[ 2866.132281]  (sk_lock-AF_INET-RPC){+.+...}, at: [<ffffffff81903619>] tcp_sendmsg+0x29/0xcc6
[ 2866.132281] 4 locks held by kworker/0:1/652:
[ 2866.132281]  #0:  (rpciod){.+.+.+}, at: [<ffffffff81083567>] process_one_work+0x1de/0x47f
[ 2866.132281]  #1:  ((&task->u.tk_work)){+.+.+.}, at: [<ffffffff81083567>] process_one_work+0x1de/0x47f
[ 2866.132281]  #2:  (sk_lock-AF_INET-RPC){+.+...}, at: [<ffffffff81903619>] tcp_sendmsg+0x29/0xcc6
[ 2866.132281]  #3:  (&icsk->icsk_retransmit_timer){+.-...}, at: [<ffffffff81078017>] run_timer_softirq+0x1ad/0x35f
[ 2866.132281]
[ 2866.132281] stack backtrace:
[ 2866.132281] Pid: 652, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #622
[ 2866.132281] Call Trace:
[ 2866.132281]  <IRQ>  [<ffffffff810bc527>] debug_check_no_locks_freed+0x112/0x159
[ 2866.132281]  [<ffffffff818a0839>] ? __sk_free+0xfd/0x114
[ 2866.132281]  [<ffffffff811549fa>] kmem_cache_free+0x6b/0x13a
[ 2866.132281]  [<ffffffff818a0839>] __sk_free+0xfd/0x114
[ 2866.132281]  [<ffffffff818a08c0>] sk_free+0x1c/0x1e
[ 2866.132281]  [<ffffffff81911e1c>] tcp_write_timer+0x51/0x56
[ 2866.132281]  [<ffffffff81078082>] run_timer_softirq+0x218/0x35f
[ 2866.132281]  [<ffffffff81078017>] ? run_timer_softirq+0x1ad/0x35f
[ 2866.132281]  [<ffffffff810f5831>] ? rb_commit+0x58/0x85
[ 2866.132281]  [<ffffffff81911dcb>] ? tcp_write_timer_handler+0x148/0x148
[ 2866.132281]  [<ffffffff81070bd6>] __do_softirq+0xcb/0x1f9
[ 2866.132281]  [<ffffffff81a0a00c>] ? _raw_spin_unlock+0x29/0x2e
[ 2866.132281]  [<ffffffff81a1227c>] call_softirq+0x1c/0x30
[ 2866.132281]  [<ffffffff81039f38>] do_softirq+0x4a/0xa6
[ 2866.132281]  [<ffffffff81070f2b>] irq_exit+0x51/0xad
[ 2866.132281]  [<ffffffff81a129cd>] do_IRQ+0x9d/0xb4
[ 2866.132281]  [<ffffffff81a0a3ef>] common_interrupt+0x6f/0x6f
[ 2866.132281]  <EOI>  [<ffffffff8109d006>] ? sched_clock_cpu+0x58/0xd1
[ 2866.132281]  [<ffffffff81a0a172>] ? _raw_spin_unlock_irqrestore+0x4c/0x56
[ 2866.132281]  [<ffffffff81078692>] mod_timer+0x178/0x1a9
[ 2866.132281]  [<ffffffff818a00aa>] sk_reset_timer+0x19/0x26
[ 2866.132281]  [<ffffffff8190b2cc>] tcp_rearm_rto+0x99/0xa4
[ 2866.132281]  [<ffffffff8190dfba>] tcp_event_new_data_sent+0x6e/0x70
[ 2866.132281]  [<ffffffff8190f7ea>] tcp_write_xmit+0x7de/0x8e4
[ 2866.132281]  [<ffffffff818a565d>] ? __alloc_skb+0xa0/0x1a1
[ 2866.132281]  [<ffffffff8190f952>] __tcp_push_pending_frames+0x2e/0x8a
[ 2866.132281]  [<ffffffff81904122>] tcp_sendmsg+0xb32/0xcc6
[ 2866.132281]  [<ffffffff819229c2>] inet_sendmsg+0xaa/0xd5
[ 2866.132281]  [<ffffffff81922918>] ? inet_autobind+0x5f/0x5f
[ 2866.132281]  [<ffffffff810ee7f1>] ? trace_clock_local+0x9/0xb
[ 2866.132281]  [<ffffffff8189adab>] sock_sendmsg+0xa3/0xc4
[ 2866.132281]  [<ffffffff810f5de6>] ? rb_reserve_next_event+0x26f/0x2d5
[ 2866.132281]  [<ffffffff8103e6a9>] ? native_sched_clock+0x29/0x6f
[ 2866.132281]  [<ffffffff8103e6f8>] ? sched_clock+0x9/0xd
[ 2866.132281]  [<ffffffff810ee7f1>] ? trace_clock_local+0x9/0xb
[ 2866.132281]  [<ffffffff8189ae03>] kernel_sendmsg+0x37/0x43
[ 2866.132281]  [<ffffffff8199ce49>] xs_send_kvec+0x77/0x80
[ 2866.132281]  [<ffffffff8199cec1>] xs_sendpages+0x6f/0x1a0
[ 2866.132281]  [<ffffffff8107826d>] ? try_to_del_timer_sync+0x55/0x61
[ 2866.132281]  [<ffffffff8199d0d2>] xs_tcp_send_request+0x55/0xf1
[ 2866.132281]  [<ffffffff8199bb90>] xprt_transmit+0x89/0x1db
[ 2866.132281]  [<ffffffff81999bcd>] ? call_connect+0x3c/0x3c
[ 2866.132281]  [<ffffffff81999d92>] call_transmit+0x1c5/0x20e
[ 2866.132281]  [<ffffffff819a0d55>] __rpc_execute+0x6f/0x225
[ 2866.132281]  [<ffffffff81999bcd>] ? call_connect+0x3c/0x3c
[ 2866.132281]  [<ffffffff819a0f33>] rpc_async_schedule+0x28/0x34
[ 2866.132281]  [<ffffffff810835d6>] process_one_work+0x24d/0x47f
[ 2866.132281]  [<ffffffff81083567>] ? process_one_work+0x1de/0x47f
[ 2866.132281]  [<ffffffff819a0f0b>] ? __rpc_execute+0x225/0x225
[ 2866.132281]  [<ffffffff81083a6d>] worker_thread+0x236/0x317
[ 2866.132281]  [<ffffffff81083837>] ? process_scheduled_works+0x2f/0x2f
[ 2866.132281]  [<ffffffff8108b7b8>] kthread+0x9a/0xa2
[ 2866.132281]  [<ffffffff81a12184>] kernel_thread_helper+0x4/0x10
[ 2866.132281]  [<ffffffff81a0a4b0>] ? retint_restore_args+0x13/0x13
[ 2866.132281]  [<ffffffff8108b71e>] ? __init_kthread_worker+0x5a/0x5a
[ 2866.132281]  [<ffffffff81a12180>] ? gs_change+0x13/0x13
[ 2866.308506] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000
[ 2866.309689] =============================================================================
[ 2866.310254] BUG TCP (Not tainted): Object already free
[ 2866.310254] -----------------------------------------------------------------------------
[ 2866.310254]

The bug comes from the fact that timer set in sk_reset_timer() can run
before we actually do the sock_hold(). socket refcount reaches zero and
we free the socket too soon.

timer handler is not allowed to reduce socket refcnt if socket is owned
by the user, or we need to change sk_reset_timer() implementation.

We should take a reference on the socket in case TCP_DELACK_TIMER_DEFERRED
or TCP_DELACK_TIMER_DEFERRED bit are set in tsq_flags

Also fix a typo in tcp_delack_timer(), where TCP_WRITE_TIMER_DEFERRED
was used instead of TCP_DELACK_TIMER_DEFERRED.

For consistency, use same socket refcount change for TCP_MTU_REDUCED_DEFERRED,
even if not fired from a timer.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c   |  8 +++++---
 net/ipv4/tcp_output.c | 14 +++++++++-----
 net/ipv4/tcp_timer.c  |  6 ++++--
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5bf2040b25b1..00a748d14062 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 			tp->mtu_info = info;
-			if (!sock_owned_by_user(sk))
+			if (!sock_owned_by_user(sk)) {
 				tcp_v4_mtu_reduced(sk);
-			else
-				set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
+			} else {
+				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+					sock_hold(sk);
+			}
 			goto out;
 		}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20dfd892c86f..d04632673a9e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk)
 	if (flags & (1UL << TCP_TSQ_DEFERRED))
 		tcp_tsq_handler(sk);
 
-	if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED))
+	if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
 		tcp_write_timer_handler(sk);
-
-	if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
+		__sock_put(sk);
+	}
+	if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
 		tcp_delack_timer_handler(sk);
-
-	if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED))
+		__sock_put(sk);
+	}
+	if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
 		sk->sk_prot->mtu_reduced(sk);
+		__sock_put(sk);
+	}
 }
 EXPORT_SYMBOL(tcp_release_cb);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad55a38..b774a03bd1dc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
 		inet_csk(sk)->icsk_ack.blocked = 1;
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		/* deleguate our work to tcp_release_cb() */
-		set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+			sock_hold(sk);
 	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
@@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data)
 		tcp_write_timer_handler(sk);
 	} else {
 		/* deleguate our work to tcp_release_cb() */
-		set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+			sock_hold(sk);
 	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
-- 
cgit 


From 1a7b27c97ce675b42eeb7bfaf6e15c34f35c8f95 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Mon, 20 Aug 2012 02:52:09 +0000
Subject: ipv4: Use newinet->inet_opt in inet_csk_route_child_sock()

Since 0e734419923bd ("ipv4: Use inet_csk_route_child_sock() in DCCP and
TCP."), inet_csk_route_child_sock() is called instead of
inet_csk_route_req().

However, after creating the child-sock in tcp/dccp_v4_syn_recv_sock(),
ireq->opt is set to NULL, before calling inet_csk_route_child_sock().
Thus, inside inet_csk_route_child_sock() opt is always NULL and the
SRR-options are not respected anymore.
Packets sent by the server won't have the correct destination-IP.

This patch fixes it by accessing newinet->inet_opt instead of ireq->opt
inside inet_csk_route_child_sock().

Reported-by: Luca Boccassi <luca.boccassi@gmail.com>
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index db0cf17c00f7..7f75f21d7b83 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct inet_sock *newinet = inet_sk(newsk);
-	struct ip_options_rcu *opt = ireq->opt;
+	struct ip_options_rcu *opt;
 	struct net *net = sock_net(sk);
 	struct flowi4 *fl4;
 	struct rtable *rt;
 
 	fl4 = &newinet->cork.fl.u.ip4;
+
+	rcu_read_lock();
+	opt = rcu_dereference(newinet->inet_opt);
 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 		goto no_route;
 	if (opt && opt->opt.is_strictroute && rt->rt_gateway)
 		goto route_err;
+	rcu_read_unlock();
 	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
 no_route:
+	rcu_read_unlock();
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 	return NULL;
 }
-- 
cgit 


From a9915a1b52df52ad87f3b33422da95cf25372f09 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 20 Aug 2012 07:26:45 +0000
Subject: ipv4: fix ip header ident selection in __ip_make_skb()

Christian Casteyde reported a kmemcheck 32-bit read from uninitialized
memory in __ip_select_ident().

It turns out that __ip_make_skb() called ip_select_ident() before
properly initializing iph->daddr.

This is a bug uncovered by commit 1d861aa4b3fb (inet: Minimize use of
cached route inetpeer.)

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=46131

Reported-by: Christian Casteyde <casteyde.christian@free.fr>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 147ccc3e93db..c196d749daf2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	iph->ihl = 5;
 	iph->tos = inet->tos;
 	iph->frag_off = df;
-	ip_select_ident(iph, &rt->dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	ip_copy_addrs(iph, fl4);
+	ip_select_ident(iph, &rt->dst, sk);
 
 	if (opt) {
 		iph->ihl += opt->optlen>>2;
-- 
cgit 


From e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Aug 2012 06:21:17 +0000
Subject: af_netlink: force credentials passing [CVE-2012-3520]

Pablo Neira Ayuso discovered that avahi and
potentially NetworkManager accept spoofed Netlink messages because of a
kernel bug.  The kernel passes all-zero SCM_CREDENTIALS ancillary data
to the receiver if the sender did not provide such data, instead of not
including any such data at all or including the correct data from the
peer (as it is the case with AF_UNIX).

This bug was introduced in commit 16e572626961
(af_unix: dont send SCM_CREDENTIALS by default)

This patch forces passing credentials for netlink, as
before the regression.

Another fix would be to not add SCM_CREDENTIALS in
netlink messages if not provided by the sender, but it
might break some programs.

With help from Florian Weimer & Petr Matousek

This issue is designated as CVE-2012-3520

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Petr Matousek <pmatouse@redhat.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 +-
 net/unix/af_unix.c       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5463969da45b..1445d73533ed 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &scm;
 
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, true);
 	if (err < 0)
 		return err;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e4768c180da2..c5ee4ff61364 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1450,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, false);
 	if (err < 0)
 		return err;
 
@@ -1619,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, false);
 	if (err < 0)
 		return err;
 
-- 
cgit 


From 6d4221b53707486dfad3f5bfe568d2ce7f4c9863 Mon Sep 17 00:00:00 2001
From: Jim Schutt <jaschut@sandia.gov>
Date: Fri, 10 Aug 2012 10:37:38 -0700
Subject: libceph: avoid truncation due to racing banners

Because the Ceph client messenger uses a non-blocking connect, it is
possible for the sending of the client banner to race with the
arrival of the banner sent by the peer.

When ceph_sock_state_change() notices the connect has completed, it
schedules work to process the socket via con_work().  During this
time the peer is writing its banner, and arrival of the peer banner
races with con_work().

If con_work() calls try_read() before the peer banner arrives, there
is nothing for it to do, after which con_work() calls try_write() to
send the client's banner.  In this case Ceph's protocol negotiation
can complete succesfully.

The server-side messenger immediately sends its banner and addresses
after accepting a connect request, *before* actually attempting to
read or verify the banner from the client.  As a result, it is
possible for the banner from the server to arrive before con_work()
calls try_read().  If that happens, try_read() will read the banner
and prepare protocol negotiation info via prepare_write_connect().
prepare_write_connect() calls con_out_kvec_reset(), which discards
the as-yet-unsent client banner.  Next, con_work() calls
try_write(), which sends the protocol negotiation info rather than
the banner that the peer is expecting.

The result is that the peer sees an invalid banner, and the client
reports "negotiation failed".

Fix this by moving con_out_kvec_reset() out of
prepare_write_connect() to its callers at all locations except the
one where the banner might still need to be sent.

[elder@inktak.com: added note about server-side behavior]

Signed-off-by: Jim Schutt <jaschut@sandia.gov>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 net/ceph/messenger.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b9796750034a..24c5eea8c45b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -915,7 +915,6 @@ static int prepare_write_connect(struct ceph_connection *con)
 	con->out_connect.authorizer_len = auth ?
 		cpu_to_le32(auth->authorizer_buf_len) : 0;
 
-	con_out_kvec_reset(con);
 	con_out_kvec_add(con, sizeof (con->out_connect),
 					&con->out_connect);
 	if (auth && auth->authorizer_buf_len)
@@ -1557,6 +1556,7 @@ static int process_connect(struct ceph_connection *con)
 			return -1;
 		}
 		con->auth_retry = 1;
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -1577,6 +1577,7 @@ static int process_connect(struct ceph_connection *con)
 		       ENTITY_NAME(con->peer_name),
 		       ceph_pr_addr(&con->peer_addr.in_addr));
 		reset_connection(con);
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -1601,6 +1602,7 @@ static int process_connect(struct ceph_connection *con)
 		     le32_to_cpu(con->out_connect.connect_seq),
 		     le32_to_cpu(con->in_reply.connect_seq));
 		con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -1617,6 +1619,7 @@ static int process_connect(struct ceph_connection *con)
 		     le32_to_cpu(con->in_reply.global_seq));
 		get_global_seq(con->msgr,
 			       le32_to_cpu(con->in_reply.global_seq));
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -2135,7 +2138,11 @@ more:
 		BUG_ON(con->state != CON_STATE_CONNECTING);
 		con->state = CON_STATE_NEGOTIATING;
 
-		/* Banner is good, exchange connection info */
+		/*
+		 * Received banner is good, exchange connection info.
+		 * Do not reset out_kvec, as sending our banner raced
+		 * with receiving peer banner after connect completed.
+		 */
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			goto out;
-- 
cgit