From 7a7d3e4c031b969c3b570787fc9fdb605abda03e Mon Sep 17 00:00:00 2001
From: Simon Dinkin <simondinkin@gmail.com>
Date: Thu, 31 Aug 2017 13:09:36 +0300
Subject: mac80211: fix incorrect assignment of reassoc value

this fix minor issue in the log message.
in ieee80211_rx_mgmt_assoc_resp function, when assigning the
reassoc value from the mgmt frame control:
ieee80211_is_reassoc_resp function need to be used, instead of
ieee80211_is_reassoc_req function.

Signed-off-by: Simon Dinkin <simon.dinkin@tandemg.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b588e593b0ec..3b8e2709d8de 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3155,7 +3155,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	if (len < 24 + 6)
 		return;
 
-	reassoc = ieee80211_is_reassoc_req(mgmt->frame_control);
+	reassoc = ieee80211_is_reassoc_resp(mgmt->frame_control);
 	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 	aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
-- 
cgit 


From 53168215909281a09d3afc6fb51a9d4f81f74d39 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jun 2017 12:20:30 +0200
Subject: mac80211: fix VLAN handling with TXQs

With TXQs, the AP_VLAN interfaces are resolved to their owner AP
interface when enqueuing the frame, which makes sense since the
frame really goes out on that as far as the driver is concerned.

However, this introduces a problem: frames to be encrypted with
a VLAN-specific GTK will now be encrypted with the AP GTK, since
the information about which virtual interface to use to select
the key is taken from the TXQ.

Fix this by preserving info->control.vif and using that in the
dequeue function. This now requires doing the driver-mapping
in the dequeue as well.

Since there's no way to filter the frames that are sitting on a
TXQ, drop all frames, which may affect other interfaces, when an
AP_VLAN is removed.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 15 ++-------------
 net/mac80211/iface.c   | 17 +++++++++++++++--
 net/mac80211/tx.c      | 36 +++++++++++++++++++++++++++++-------
 3 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f8149ca192b4..885690fa39c8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -919,21 +919,10 @@ struct ieee80211_tx_info {
 				unsigned long jiffies;
 			};
 			/* NB: vif can be NULL for injected frames */
-			union {
-				/* NB: vif can be NULL for injected frames */
-				struct ieee80211_vif *vif;
-
-				/* When packets are enqueued on txq it's easy
-				 * to re-construct the vif pointer. There's no
-				 * more space in tx_info so it can be used to
-				 * store the necessary enqueue time for packet
-				 * sojourn time computation.
-				 */
-				codel_time_t enqueue_time;
-			};
+			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
 			u32 flags;
-			/* 4 bytes free */
+			codel_time_t enqueue_time;
 		} control;
 		struct {
 			u64 cookie;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9228ac73c429..44399322f356 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -792,6 +792,7 @@ static int ieee80211_open(struct net_device *dev)
 static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 			      bool going_down)
 {
+	struct ieee80211_sub_if_data *txq_sdata = sdata;
 	struct ieee80211_local *local = sdata->local;
 	struct fq *fq = &local->fq;
 	unsigned long flags;
@@ -937,6 +938,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP_VLAN:
+		txq_sdata = container_of(sdata->bss,
+					 struct ieee80211_sub_if_data, u.ap);
+
 		mutex_lock(&local->mtx);
 		list_del(&sdata->u.vlan.list);
 		mutex_unlock(&local->mtx);
@@ -1007,8 +1011,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
-	if (sdata->vif.txq) {
-		struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+	if (txq_sdata->vif.txq) {
+		struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq);
+
+		/*
+		 * FIXME FIXME
+		 *
+		 * We really shouldn't purge the *entire* txqi since that
+		 * contains frames for the other AP_VLANs (and possibly
+		 * the AP itself) as well, but there's no API in FQ now
+		 * to be able to filter.
+		 */
 
 		spin_lock_bh(&fq->lock);
 		ieee80211_txq_purge(local, txqi);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8858f4f185e9..94826680cf2b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1276,11 +1276,6 @@ static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
 	IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
 }
 
-static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi)
-{
-	IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif;
-}
-
 static u32 codel_skb_len_func(const struct sk_buff *skb)
 {
 	return skb->len;
@@ -3414,6 +3409,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	struct ieee80211_tx_info *info;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result r;
+	struct ieee80211_vif *vif;
 
 	spin_lock_bh(&fq->lock);
 
@@ -3430,8 +3426,6 @@ begin:
 	if (!skb)
 		goto out;
 
-	ieee80211_set_skb_vif(skb, txqi);
-
 	hdr = (struct ieee80211_hdr *)skb->data;
 	info = IEEE80211_SKB_CB(skb);
 
@@ -3488,6 +3482,34 @@ begin:
 		}
 	}
 
+	switch (tx.sdata->vif.type) {
+	case NL80211_IFTYPE_MONITOR:
+		if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+			vif = &tx.sdata->vif;
+			break;
+		}
+		tx.sdata = rcu_dereference(local->monitor_sdata);
+		if (tx.sdata) {
+			vif = &tx.sdata->vif;
+			info->hw_queue =
+				vif->hw_queue[skb_get_queue_mapping(skb)];
+		} else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
+			ieee80211_free_txskb(&local->hw, skb);
+			goto begin;
+		} else {
+			vif = NULL;
+		}
+		break;
+	case NL80211_IFTYPE_AP_VLAN:
+		tx.sdata = container_of(tx.sdata->bss,
+					struct ieee80211_sub_if_data, u.ap);
+		/* fall through */
+	default:
+		vif = &tx.sdata->vif;
+		break;
+	}
+
+	IEEE80211_SKB_CB(skb)->control.vif = vif;
 out:
 	spin_unlock_bh(&fq->lock);
 
-- 
cgit 


From d81b0fd0e7b76b05873299ae2dd310127b13613b Mon Sep 17 00:00:00 2001
From: Sharon Dvir <sharon.dvir@intel.com>
Date: Sat, 5 Aug 2017 11:44:30 +0300
Subject: mac80211: shorten debug prints using ht_dbg() to avoid warning

Invoking ht_dbg() with too long of a string will print a warning.
Shorten the messages while retaining the printed patameters.

Signed-off-by: Sharon Dvir <sharon.dvir@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-tx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index cbd48762256c..420486b5a1d9 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -436,7 +436,7 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 	    test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
 		rcu_read_unlock();
 		ht_dbg(sta->sdata,
-		       "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n",
+		       "timer expired on %pM tid %d not expecting addBA response\n",
 		       sta->sta.addr, tid);
 		return;
 	}
@@ -639,7 +639,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	    time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] +
 			HT_AGG_RETRIES_PERIOD)) {
 		ht_dbg(sdata,
-		       "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n",
+		       "BA request denied - %d failed requests on %pM tid %u\n",
 		       sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid);
 		ret = -EBUSY;
 		goto err_unlock_sta;
-- 
cgit 


From b44eebea181a36378bea8f27e7f9b4175bfad683 Mon Sep 17 00:00:00 2001
From: Liad Kaufman <liad.kaufman@intel.com>
Date: Sat, 5 Aug 2017 11:44:29 +0300
Subject: mac80211: add MESH IE in the correct order

VHT MESH support was added, but the order of the IEs
wasn't enforced. Fix that.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 259698de569f..6aef6793d052 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1436,7 +1436,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 			WLAN_EID_SSID_LIST,
 			WLAN_EID_CHANNEL_USAGE,
 			WLAN_EID_INTERWORKING,
-			/* mesh ID can't happen here */
+			WLAN_EID_MESH_ID,
 			/* 60 GHz can't happen here right now */
 		};
 		noffset = ieee80211_ie_split(ie, ie_len,
-- 
cgit 


From 89e9bfc4ee859ad2a477f10aa2d5c37377242296 Mon Sep 17 00:00:00 2001
From: Chunho Lee <handera@gmail.com>
Date: Fri, 7 Jul 2017 17:03:14 -0700
Subject: mac80211: Fix null pointer dereference with iTXQ support

This change adds null pointer check before dereferencing pointer dev on
netif_tx_start_all_queues() when an interface is added.
With iTXQ support, netif_tx_start_all_queues() is always called while
an interface is added. however, the netdev queues are not associated
and dev is null when the interface is either NL80211_IFTYPE_P2P_DEVICE
or NL80211_IFTYPE_NAN.

Signed-off-by: Chunho Lee <ch.lee@newracom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 44399322f356..f75029abf728 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -731,7 +731,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
 	    local->ops->wake_tx_queue) {
 		/* XXX: for AP_VLAN, actually track AP queues */
-		netif_tx_start_all_queues(dev);
+		if (dev)
+			netif_tx_start_all_queues(dev);
 	} else if (dev) {
 		unsigned long flags;
 		int n_acs = IEEE80211_NUM_ACS;
-- 
cgit 


From 9de981f507474f326e42117858dc9a9321331ae5 Mon Sep 17 00:00:00 2001
From: Beni Lev <beni.lev@intel.com>
Date: Tue, 25 Jul 2017 11:25:25 +0300
Subject: mac80211_hwsim: Use proper TX power

In struct ieee80211_tx_info, control.vif pointer and rate_driver_data[0]
falls on the same place, depending on the union usage.
During the whole TX process, the union is referred to as a control struct,
which holds the vif that is later used in the tx flow, especially in order
to derive the used tx power.
Referring direcly to rate_driver_data[0] and assigning a value to it,
overwrites the vif pointer, hence making all later references irrelevant.
Moreover, rate_driver_data[0] isn't used later in the flow in order to
retrieve the channel that it is pointing to.

Cc: stable@vger.kernel.org
Signed-off-by: Beni Lev <beni.lev@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index c8852acc1462..6467ffac9811 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1362,8 +1362,6 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
 				       txi->control.rates,
 				       ARRAY_SIZE(txi->control.rates));
 
-	txi->rate_driver_data[0] = channel;
-
 	if (skb->len >= 24 + 8 &&
 	    ieee80211_is_probe_resp(hdr->frame_control)) {
 		/* fake header transmission time */
-- 
cgit 


From 979e1f08042b83152dfe3d76df10db31eb7edf98 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jun 2017 12:20:28 +0200
Subject: mac80211: agg-tx: call drv_wake_tx_queue in proper context

Since drv_wake_tx_queue() is normally called in the TX path, which
is already in an RCU critical section, we should call it the same
way in the aggregation code path, so if the driver expects to be
able to use RCU, it'll already be protected without having to enter
a nested critical section.

Additionally, disable soft-IRQs, since not doing so could cause
issues in a driver that relies on them already being disabled like
in the other path.

Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-tx.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 420486b5a1d9..bef516ec47f9 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -226,7 +226,11 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
 		clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
 
 	clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+	local_bh_disable();
+	rcu_read_lock();
 	drv_wake_tx_queue(sta->sdata->local, txqi);
+	rcu_read_unlock();
+	local_bh_enable();
 }
 
 /*
-- 
cgit 


From 6e46d8ce894374fc135c96a8d1057c6af1fef237 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 18 Aug 2017 15:33:57 +0300
Subject: mac80211: flush hw_roc_start work before cancelling the ROC

When HW ROC is supported it is possible that after the HW notified
that the ROC has started, the ROC was cancelled and another ROC was
added while the hw_roc_start worker is waiting on the mutex (since
cancelling the ROC and adding another one also holds the same mutex).
As a result, the hw_roc_start worker will continue to run after the
new ROC is added but before it is actually started by the HW.
This may result in notifying userspace that the ROC has started before
it actually does, or in case of management tx ROC, in an attempt to
tx while not on the right channel.

In addition, when the driver will notify mac80211 that the second ROC
has started, mac80211 will warn that this ROC has already been
notified.

Fix this by flushing the hw_roc_start work before cancelling an ROC.

Cc: stable@vger.kernel.org
Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/offchannel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f8e7a8bbc618..faf4f6055000 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -707,6 +707,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
 	if (!cookie)
 		return -ENOENT;
 
+	flush_work(&local->hw_roc_start);
+
 	mutex_lock(&local->mtx);
 	list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
 		if (!mgmt_tx && roc->cookie != cookie)
-- 
cgit 


From ba83bfb1e86501d21077b570e10e443f1605be64 Mon Sep 17 00:00:00 2001
From: Igor Mitsyanko <igor.mitsyanko.os@quantenna.com>
Date: Wed, 30 Aug 2017 13:52:25 -0700
Subject: nl80211: look for HT/VHT capabilities in beacon's tail

There are no HT/VHT capabilities in cfg80211_ap_settings::beacon_ies,
these should be looked for in beacon's tail instead.

Fixes: 66cd794e3c30 ("nl80211: add HT/VHT capabilities to AP parameters")
Signed-off-by: Igor Mitsyanko <igor.mitsyanko.os@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8ce85420ecb0..0df8023f480b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3791,8 +3791,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
 static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
 {
 	const struct cfg80211_beacon_data *bcn = &params->beacon;
-	size_t ies_len = bcn->beacon_ies_len;
-	const u8 *ies = bcn->beacon_ies;
+	size_t ies_len = bcn->tail_len;
+	const u8 *ies = bcn->tail;
 	const u8 *rates;
 	const u8 *cap;
 
-- 
cgit 


From 4e0854a74f08e6a9d847f2c2cfae7b07c931d125 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 6 Sep 2017 13:45:40 +0300
Subject: cfg80211: honor NL80211_RRF_NO_HT40{MINUS,PLUS}

Honor the NL80211_RRF_NO_HT40{MINUS,PLUS} flags in
reg_process_ht_flags_channel. Not doing so leads can lead
to a firmware assert in iwlwifi for example.

Fixes: b0d7aa59592b ("cfg80211: allow wiphy specific regdomain management")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5fae296a6a58..6e94f6934a0e 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -4,6 +4,7 @@
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright      2017  Intel Deutschland GmbH
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -1483,7 +1484,9 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
 {
 	struct ieee80211_supported_band *sband = wiphy->bands[channel->band];
 	struct ieee80211_channel *channel_before = NULL, *channel_after = NULL;
+	const struct ieee80211_regdomain *regd;
 	unsigned int i;
+	u32 flags;
 
 	if (!is_ht40_allowed(channel)) {
 		channel->flags |= IEEE80211_CHAN_NO_HT40;
@@ -1503,17 +1506,30 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
 			channel_after = c;
 	}
 
+	flags = 0;
+	regd = get_wiphy_regdom(wiphy);
+	if (regd) {
+		const struct ieee80211_reg_rule *reg_rule =
+			freq_reg_info_regd(MHZ_TO_KHZ(channel->center_freq),
+					   regd, MHZ_TO_KHZ(20));
+
+		if (!IS_ERR(reg_rule))
+			flags = reg_rule->flags;
+	}
+
 	/*
 	 * Please note that this assumes target bandwidth is 20 MHz,
 	 * if that ever changes we also need to change the below logic
 	 * to include that as well.
 	 */
-	if (!is_ht40_allowed(channel_before))
+	if (!is_ht40_allowed(channel_before) ||
+	    flags & NL80211_RRF_NO_HT40MINUS)
 		channel->flags |= IEEE80211_CHAN_NO_HT40MINUS;
 	else
 		channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
 
-	if (!is_ht40_allowed(channel_after))
+	if (!is_ht40_allowed(channel_after) ||
+	    flags & NL80211_RRF_NO_HT40PLUS)
 		channel->flags |= IEEE80211_CHAN_NO_HT40PLUS;
 	else
 		channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
-- 
cgit 


From 98e93e968e4947cd71c2eb69e323682daa453ee7 Mon Sep 17 00:00:00 2001
From: Ilan peer <ilan.peer@intel.com>
Date: Wed, 6 Sep 2017 17:32:40 +0300
Subject: mac80211: Complete ampdu work schedule during session tear down

Commit 7a7c0a6438b8 ("mac80211: fix TX aggregation start/stop callback race")
added a cancellation of the ampdu work after the loop that stopped the
Tx and Rx BA sessions. However, in some cases, e.g., during HW reconfig,
the low level driver might call mac80211 APIs to complete the stopping
of the BA sessions, which would queue the ampdu work to handle the actual
completion. This work needs to be performed as otherwise mac80211 data
structures would not be properly synced.

Fix this by checking if BA session STOP_CB bit is set after the BA session
cancellation and properly clean the session.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
[Johannes: the work isn't flushed because that could do other things we
 don't want, and the locking situation isn't clear]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ht.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c92df492e898..4cba7fca10d4 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -300,6 +300,24 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 
 	/* stopping might queue the work again - so cancel only afterwards */
 	cancel_work_sync(&sta->ampdu_mlme.work);
+
+	/*
+	 * In case the tear down is part of a reconfigure due to HW restart
+	 * request, it is possible that the low level driver requested to stop
+	 * the BA session, so handle it to properly clean tid_tx data.
+	 */
+	mutex_lock(&sta->ampdu_mlme.mtx);
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+		struct tid_ampdu_tx *tid_tx =
+			rcu_dereference_protected_tid_tx(sta, i);
+
+		if (!tid_tx)
+			continue;
+
+		if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
+			ieee80211_stop_tx_ba_cb(sta, i, tid_tx);
+	}
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
 
 void ieee80211_ba_session_work(struct work_struct *work)
-- 
cgit 


From bde59c475e0883e4c4294bcd9b9c7e08ae18c828 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 6 Sep 2017 15:01:42 +0200
Subject: mac80211: fix deadlock in driver-managed RX BA session start

When an RX BA session is started by the driver, and it has to tell
mac80211 about it, the corresponding bit in tid_rx_manage_offl gets
set and the BA session work is scheduled. Upon testing this bit, it
will call __ieee80211_start_rx_ba_session(), thus deadlocking as it
already holds the ampdu_mlme.mtx, which that acquires again.

Fix this by adding ___ieee80211_start_rx_ba_session(), a version of
the function that requires the mutex already held.

Cc: stable@vger.kernel.org
Fixes: 699cb58c8a52 ("mac80211: manage RX BA session offload without SKB queue")
Reported-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c      | 32 +++++++++++++++++++++-----------
 net/mac80211/ht.c          |  6 +++---
 net/mac80211/ieee80211_i.h |  4 ++++
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 2b36eff5d97e..2849a1fc41c5 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -245,10 +245,10 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	ieee80211_tx_skb(sdata, skb);
 }
 
-void __ieee80211_start_rx_ba_session(struct sta_info *sta,
-				     u8 dialog_token, u16 timeout,
-				     u16 start_seq_num, u16 ba_policy, u16 tid,
-				     u16 buf_size, bool tx, bool auto_seq)
+void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
+				      u8 dialog_token, u16 timeout,
+				      u16 start_seq_num, u16 ba_policy, u16 tid,
+				      u16 buf_size, bool tx, bool auto_seq)
 {
 	struct ieee80211_local *local = sta->sdata->local;
 	struct tid_ampdu_rx *tid_agg_rx;
@@ -267,7 +267,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		ht_dbg(sta->sdata,
 		       "STA %pM requests BA session on unsupported tid %d\n",
 		       sta->sta.addr, tid);
-		goto end_no_lock;
+		goto end;
 	}
 
 	if (!sta->sta.ht_cap.ht_supported) {
@@ -275,14 +275,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		       "STA %pM erroneously requests BA session on tid %d w/o QoS\n",
 		       sta->sta.addr, tid);
 		/* send a response anyway, it's an error case if we get here */
-		goto end_no_lock;
+		goto end;
 	}
 
 	if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) {
 		ht_dbg(sta->sdata,
 		       "Suspend in progress - Denying ADDBA request (%pM tid %d)\n",
 		       sta->sta.addr, tid);
-		goto end_no_lock;
+		goto end;
 	}
 
 	/* sanity check for incoming parameters:
@@ -296,7 +296,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		ht_dbg_ratelimited(sta->sdata,
 				   "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
 				   sta->sta.addr, tid, ba_policy, buf_size);
-		goto end_no_lock;
+		goto end;
 	}
 	/* determine default buffer size */
 	if (buf_size == 0)
@@ -311,7 +311,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 	       buf_size, sta->sta.addr);
 
 	/* examine state machine */
-	mutex_lock(&sta->ampdu_mlme.mtx);
+	lockdep_assert_held(&sta->ampdu_mlme.mtx);
 
 	if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
 		if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
@@ -415,15 +415,25 @@ end:
 		__clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
 		sta->ampdu_mlme.tid_rx_token[tid] = dialog_token;
 	}
-	mutex_unlock(&sta->ampdu_mlme.mtx);
 
-end_no_lock:
 	if (tx)
 		ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
 					  dialog_token, status, 1, buf_size,
 					  timeout);
 }
 
+void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+				     u8 dialog_token, u16 timeout,
+				     u16 start_seq_num, u16 ba_policy, u16 tid,
+				     u16 buf_size, bool tx, bool auto_seq)
+{
+	mutex_lock(&sta->ampdu_mlme.mtx);
+	___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
+					 start_seq_num, ba_policy, tid,
+					 buf_size, tx, auto_seq);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
+}
+
 void ieee80211_process_addba_request(struct ieee80211_local *local,
 				     struct sta_info *sta,
 				     struct ieee80211_mgmt *mgmt,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 4cba7fca10d4..d6d0b4201e40 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -351,9 +351,9 @@ void ieee80211_ba_session_work(struct work_struct *work)
 
 		if (test_and_clear_bit(tid,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
-			__ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
-							IEEE80211_MAX_AMPDU_BUF,
-							false, true);
+			___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
+							 IEEE80211_MAX_AMPDU_BUF,
+							 false, true);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2197c62a0a6e..9675814f64db 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1760,6 +1760,10 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 				     u8 dialog_token, u16 timeout,
 				     u16 start_seq_num, u16 ba_policy, u16 tid,
 				     u16 buf_size, bool tx, bool auto_seq);
+void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
+				      u8 dialog_token, u16 timeout,
+				      u16 start_seq_num, u16 ba_policy, u16 tid,
+				      u16 buf_size, bool tx, bool auto_seq);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 					 enum ieee80211_agg_stop_reason reason);
 void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
-- 
cgit 


From 9a94b3a4bdfdb404f1846099e880c52e144f06c0 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 3 Sep 2017 17:32:16 +0300
Subject: dt-binding: phy: don't confuse with Ethernet phy properties

The generic PHY 'phys' property sometime appears in the same node with
the Ethernet PHY 'phy' or 'phy-handle' properties. Add a warning in
phy-bindings.txt to reduce confusion.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/phy/phy-bindings.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/phy/phy-bindings.txt b/Documentation/devicetree/bindings/phy/phy-bindings.txt
index 1293c321754c..a403b81d0679 100644
--- a/Documentation/devicetree/bindings/phy/phy-bindings.txt
+++ b/Documentation/devicetree/bindings/phy/phy-bindings.txt
@@ -34,7 +34,9 @@ PHY user node
 =============
 
 Required Properties:
-phys : the phandle for the PHY device (used by the PHY subsystem)
+phys : the phandle for the PHY device (used by the PHY subsystem; not to be
+       confused with the Ethernet specific 'phy' and 'phy-handle' properties,
+       see Documentation/devicetree/bindings/net/ethernet.txt for these)
 phy-names : the names of the PHY corresponding to the PHYs present in the
 	    *phys* phandle
 
-- 
cgit 


From 39ad1297a2084e0724da73d9eda2ceb9573a5d6c Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Mon, 4 Sep 2017 14:21:12 +0800
Subject: sched: Use __qdisc_drop instead of kfree_skb in sch_prio and sch_qfq

The commit 520ac30f4551 ("net_sched: drop packets after root qdisc lock
is released) made a big change of tc for performance. There are two points
left in sch_prio and sch_qfq which are not changed with that commit. Now
enhance them now with __qdisc_drop.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_prio.c | 2 +-
 net/sched/sch_qfq.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index f31b28f788c0..2dd6c68ae91e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -80,7 +80,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 	}
 #endif
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index cd661a7f81e6..6ddfd4991108 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1215,7 +1215,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (cl == NULL) {
 		if (err & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return err;
 	}
 	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
-- 
cgit 


From be82485fbcbb1cf11b13e7356231c72fdf21241c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 6 Sep 2017 11:47:12 +0800
Subject: netlink: fix an use-after-free issue for nlk groups

ChunYu found a netlink use-after-free issue by syzkaller:

[28448.842981] BUG: KASAN: use-after-free in __nla_put+0x37/0x40 at addr ffff8807185e2378
[28448.969918] Call Trace:
[...]
[28449.117207]  __nla_put+0x37/0x40
[28449.132027]  nla_put+0xf5/0x130
[28449.146261]  sk_diag_fill.isra.4.constprop.5+0x5a0/0x750 [netlink_diag]
[28449.176608]  __netlink_diag_dump+0x25a/0x700 [netlink_diag]
[28449.202215]  netlink_diag_dump+0x176/0x240 [netlink_diag]
[28449.226834]  netlink_dump+0x488/0xbb0
[28449.298014]  __netlink_dump_start+0x4e8/0x760
[28449.317924]  netlink_diag_handler_dump+0x261/0x340 [netlink_diag]
[28449.413414]  sock_diag_rcv_msg+0x207/0x390
[28449.432409]  netlink_rcv_skb+0x149/0x380
[28449.467647]  sock_diag_rcv+0x2d/0x40
[28449.484362]  netlink_unicast+0x562/0x7b0
[28449.564790]  netlink_sendmsg+0xaa8/0xe60
[28449.661510]  sock_sendmsg+0xcf/0x110
[28449.865631]  __sys_sendmsg+0xf3/0x240
[28450.000964]  SyS_sendmsg+0x32/0x50
[28450.016969]  do_syscall_64+0x25c/0x6c0
[28450.154439]  entry_SYSCALL64_slow_path+0x25/0x25

It was caused by no protection between nlk groups' free in netlink_release
and nlk groups' accessing in sk_diag_dump_groups. The similar issue also
exists in netlink_seq_show().

This patch is to defer nlk groups' free in deferred_put_nlk_sk.

Reported-by: ChunYu Wang <chunwang@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5acee49db90b..94a61e602ee7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -691,6 +691,9 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
 	struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
 	struct sock *sk = &nlk->sk;
 
+	kfree(nlk->groups);
+	nlk->groups = NULL;
+
 	if (!refcount_dec_and_test(&sk->sk_refcnt))
 		return;
 
@@ -769,9 +772,6 @@ static int netlink_release(struct socket *sock)
 		netlink_table_ungrab();
 	}
 
-	kfree(nlk->groups);
-	nlk->groups = NULL;
-
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 	local_bh_enable();
-- 
cgit 


From f773608026ee1e75e2256e3ad625c222ff6f9305 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 6 Sep 2017 11:53:29 +0800
Subject: netlink: access nlk groups safely in netlink bind and getname

Now there is no lock protecting nlk ngroups/groups' accessing in
netlink bind and getname. It's safe from nlk groups' setting in
netlink_release, but not from netlink_realloc_groups called by
netlink_setsockopt.

netlink_lock_table is needed in both netlink bind and getname when
accessing nlk groups.

Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 94a61e602ee7..327807731b44 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -955,7 +955,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 	struct net *net = sock_net(sk);
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
-	int err;
+	int err = 0;
 	long unsigned int groups = nladdr->nl_groups;
 	bool bound;
 
@@ -983,6 +983,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			return -EINVAL;
 	}
 
+	netlink_lock_table();
 	if (nlk->netlink_bind && groups) {
 		int group;
 
@@ -993,7 +994,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			if (!err)
 				continue;
 			netlink_undo_bind(group, groups, sk);
-			return err;
+			goto unlock;
 		}
 	}
 
@@ -1006,12 +1007,13 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			netlink_autobind(sock);
 		if (err) {
 			netlink_undo_bind(nlk->ngroups, groups, sk);
-			return err;
+			goto unlock;
 		}
 	}
 
 	if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
-		return 0;
+		goto unlock;
+	netlink_unlock_table();
 
 	netlink_table_grab();
 	netlink_update_subscriptions(sk, nlk->subscriptions +
@@ -1022,6 +1024,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 	netlink_table_ungrab();
 
 	return 0;
+
+unlock:
+	netlink_unlock_table();
+	return err;
 }
 
 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
@@ -1079,7 +1085,9 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 		nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
 	} else {
 		nladdr->nl_pid = nlk->portid;
+		netlink_lock_table();
 		nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
+		netlink_unlock_table();
 	}
 	return 0;
 }
-- 
cgit 


From 8e0deed92406d93ae0365cb8a6134db5721e7aca Mon Sep 17 00:00:00 2001
From: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Date: Wed, 6 Sep 2017 11:08:06 +0200
Subject: tipc: remove unnecessary call to dev_net()

The net device is already stored in the 'net' variable, so no need to call
dev_net() again.

Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ac1d66d7e1fd..47ec121574ce 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -637,7 +637,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 		break;
 	case NETDEV_UNREGISTER:
 	case NETDEV_CHANGENAME:
-		bearer_disable(dev_net(dev), b);
+		bearer_disable(net, b);
 		break;
 	}
 	return NOTIFY_OK;
-- 
cgit 


From a33fcba6ec01efcca33b1afad91057020f247f15 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Mon, 4 Sep 2017 12:51:33 -0500
Subject: rtlwifi: btcoexist: Fix breakage of ant_sel for rtl8723be

In commit bcd37f4a0831 ("rtlwifi: btcoex: 23b 2ant: let bt transmit when
hw initialisation done"), there is an additional error when the module
parameter ant_sel is used to select the auxilary antenna. The error is
that the antenna selection is not checked when writing the antenna
selection register.

Fixes: bcd37f4a0831 ("rtlwifi: btcoex: 23b 2ant: let bt transmit when hw initialisation done")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Ping-Ke Shih <pkshih@realtek.com>
Cc: Yan-Hsuan Chuang <yhchuang@realtek.com>
Cc: Birming Chiu <birming@realtek.com>
Cc: Shaofu <shaofu@realtek.com>
Cc: Steven Ting <steventing@realtek.com>
Cc: Stable <stable@vger.kernel.org> # 4.12+
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
index 31965f0ef69d..e8f07573aed9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
@@ -1183,7 +1183,10 @@ static void btc8723b2ant_set_ant_path(struct btc_coexist *btcoexist,
 		}
 
 		/* fixed internal switch S1->WiFi, S0->BT */
-		btcoexist->btc_write_4byte(btcoexist, 0x948, 0x0);
+		if (board_info->btdm_ant_pos == BTC_ANTENNA_AT_MAIN_PORT)
+			btcoexist->btc_write_2byte(btcoexist, 0x948, 0x0);
+		else
+			btcoexist->btc_write_2byte(btcoexist, 0x948, 0x280);
 
 		switch (antpos_type) {
 		case BTC_ANT_WIFI_AT_MAIN:
-- 
cgit 


From 6d622692836950b3c943776f84c4557ff6c02f3b Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Mon, 4 Sep 2017 12:51:34 -0500
Subject: rtlwifi: btcoexist: Fix antenna selection code

In commit 87d8a9f35202 ("rtlwifi: btcoex: call bind to setup btcoex"),
the code turns on a call to exhalbtc_bind_bt_coex_withadapter(). This
routine contains a bug that causes incorrect antenna selection for those
HP laptops with only one antenna and an incorrectly programmed EFUSE.
These boxes are the ones that need the ant_sel module parameter.

Fixes: 87d8a9f35202 ("rtlwifi: btcoex: call bind to setup btcoex")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Ping-Ke Shih <pkshih@realtek.com>
Cc: Yan-Hsuan Chuang <yhchuang@realtek.com>
Cc: Birming Chiu <birming@realtek.com>
Cc: Shaofu <shaofu@realtek.com>
Cc: Steven Ting <steventing@realtek.com>
Cc: Stable <stable@vger.kernel.org> # 4.13+
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.c       | 23 +++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index c1eacd8352a2..b5e9877d935c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -173,6 +173,16 @@ static u8 halbtc_get_wifi_central_chnl(struct btc_coexist *btcoexist)
 
 u8 rtl_get_hwpg_single_ant_path(struct rtl_priv *rtlpriv)
 {
+	struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params;
+
+	/* override ant_num / ant_path */
+	if (mod_params->ant_sel) {
+		rtlpriv->btcoexist.btc_info.ant_num =
+			(mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1);
+
+		rtlpriv->btcoexist.btc_info.single_ant_path =
+			(mod_params->ant_sel == 1 ? 0 : 1);
+	}
 	return rtlpriv->btcoexist.btc_info.single_ant_path;
 }
 
@@ -183,6 +193,7 @@ u8 rtl_get_hwpg_bt_type(struct rtl_priv *rtlpriv)
 
 u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv)
 {
+	struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params;
 	u8 num;
 
 	if (rtlpriv->btcoexist.btc_info.ant_num == ANT_X2)
@@ -190,6 +201,10 @@ u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv)
 	else
 		num = 1;
 
+	/* override ant_num / ant_path */
+	if (mod_params->ant_sel)
+		num = (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1) + 1;
+
 	return num;
 }
 
@@ -876,7 +891,7 @@ bool exhalbtc_bind_bt_coex_withadapter(void *adapter)
 {
 	struct btc_coexist *btcoexist = &gl_bt_coexist;
 	struct rtl_priv *rtlpriv = adapter;
-	u8 ant_num = 2, chip_type, single_ant_path = 0;
+	u8 ant_num = 2, chip_type;
 
 	if (btcoexist->binded)
 		return false;
@@ -911,12 +926,6 @@ bool exhalbtc_bind_bt_coex_withadapter(void *adapter)
 	ant_num = rtl_get_hwpg_ant_num(rtlpriv);
 	exhalbtc_set_ant_num(rtlpriv, BT_COEX_ANT_TYPE_PG, ant_num);
 
-	/* set default antenna position to main  port */
-	btcoexist->board_info.btdm_ant_pos = BTC_ANTENNA_AT_MAIN_PORT;
-
-	single_ant_path = rtl_get_hwpg_single_ant_path(rtlpriv);
-	exhalbtc_set_single_ant_path(single_ant_path);
-
 	if (rtl_get_hwpg_package_type(rtlpriv) == 0)
 		btcoexist->board_info.tfbga_package = false;
 	else if (rtl_get_hwpg_package_type(rtlpriv) == 1)
-- 
cgit 


From 2eabc84d2f8e4f36d3719eeb6a330e10c46c8da7 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Thu, 7 Sep 2017 10:51:52 +0300
Subject: iwlwifi: mvm: only send LEDS_CMD when the FW supports it

The LEDS_CMD command is only supported in some newer FW versions
(e.g. iwlwifi-8000C-31.ucode), so we can't send it to older versions
(such as iwlwifi-8000C-27.ucode).

To fix this, check for a new bit in the FW capabilities TLV that tells
when the command is supported.

Note that the current version of -31.ucode in linux-firmware.git
(31.532993.0) does not have this capability bit set, so the LED won't
work, even though this version should support it.  But we will update
this firmware soon, so it won't be a problem anymore.

Fixes: 7089ae634c50 ("iwlwifi: mvm: use firmware LED command where applicable")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/intel/iwlwifi/fw/file.h | 1 +
 drivers/net/wireless/intel/iwlwifi/mvm/led.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 887f6d8fc8a7..279248cd9cfb 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -378,6 +378,7 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG	= (__force iwl_ucode_tlv_capa_t)80,
 	IWL_UCODE_TLV_CAPA_LQM_SUPPORT			= (__force iwl_ucode_tlv_capa_t)81,
 	IWL_UCODE_TLV_CAPA_TX_POWER_ACK			= (__force iwl_ucode_tlv_capa_t)84,
+	IWL_UCODE_TLV_CAPA_LED_CMD_SUPPORT		= (__force iwl_ucode_tlv_capa_t)86,
 	IWL_UCODE_TLV_CAPA_MLME_OFFLOAD			= (__force iwl_ucode_tlv_capa_t)96,
 
 	NUM_IWL_UCODE_TLV_CAPA
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/led.c b/drivers/net/wireless/intel/iwlwifi/mvm/led.c
index 005e2e7278a5..b27269504a62 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/led.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/led.c
@@ -92,7 +92,8 @@ static void iwl_mvm_send_led_fw_cmd(struct iwl_mvm *mvm, bool on)
 
 static void iwl_mvm_led_set(struct iwl_mvm *mvm, bool on)
 {
-	if (mvm->cfg->device_family >= IWL_DEVICE_FAMILY_8000) {
+	if (fw_has_capa(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_LED_CMD_SUPPORT)) {
 		iwl_mvm_send_led_fw_cmd(mvm, on);
 		return;
 	}
-- 
cgit 


From 80532384af4ccb6d6cc965fa9232aaa2c456362c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 6 Sep 2017 13:14:19 +0200
Subject: net: sched: fix memleak for chain zero

There's a memleak happening for chain 0. The thing is, chain 0 needs to
be always present, not created on demand. Therefore tcf_block_get upon
creation of block calls the tcf_chain_create function directly. The
chain is created with refcnt == 1, which is not correct in this case and
causes the memleak. So move the refcnt increment into tcf_chain_get
function even for the case when chain needs to be created.

Reported-by: Jakub Kicinski <kubakici@wp.pl>
Fixes: 5bc1701881e3 ("net: sched: introduce multichain support for filters")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ea6c65fd5fc5..c743f03cfebd 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -182,7 +182,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 	list_add_tail(&chain->list, &block->chain_list);
 	chain->block = block;
 	chain->index = chain_index;
-	chain->refcnt = 1;
+	chain->refcnt = 0;
 	return chain;
 }
 
@@ -217,15 +217,15 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 	struct tcf_chain *chain;
 
 	list_for_each_entry(chain, &block->chain_list, list) {
-		if (chain->index == chain_index) {
-			chain->refcnt++;
-			return chain;
-		}
+		if (chain->index == chain_index)
+			goto incref;
 	}
-	if (create)
-		return tcf_chain_create(block, chain_index);
-	else
-		return NULL;
+	chain = create ? tcf_chain_create(block, chain_index) : NULL;
+
+incref:
+	if (chain)
+		chain->refcnt++;
+	return chain;
 }
 EXPORT_SYMBOL(tcf_chain_get);
 
-- 
cgit 


From 5c25f30c93fdc5bf25e62101aeaae7a4f9b421b3 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 5 Sep 2017 17:26:33 +0800
Subject: ip6_gre: update mtu properly in ip6gre_err

Now when probessing ICMPV6_PKT_TOOBIG, ip6gre_err only subtracts the
offset of gre header from mtu info. The expected mtu of gre device
should also subtract gre header. Otherwise, the next packets still
can't be sent out.

Jianlin found this issue when using the topo:
  client(ip6gre)<---->(nic1)route(nic2)<----->(ip6gre)server

and reducing nic2's mtu, then both tcp and sctp's performance with
big size data became 0.

This patch is to fix it by also subtracting grehdr (tun->tun_hlen)
from mtu info when updating gre device's mtu in ip6gre_err(). It
also needs to subtract ETH_HLEN if gre dev'type is ARPHRD_ETHER.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 67ff2aaf5dcb..b7a72d409334 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -432,7 +432,9 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 		break;
 	case ICMPV6_PKT_TOOBIG:
-		mtu = be32_to_cpu(info) - offset;
+		mtu = be32_to_cpu(info) - offset - t->tun_hlen;
+		if (t->dev->type == ARPHRD_ETHER)
+			mtu -= ETH_HLEN;
 		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
 		t->dev->mtu = mtu;
-- 
cgit 


From ca2c1418efe9f7fe37aa1f355efdf4eb293673ce Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 6 Sep 2017 14:44:36 +0200
Subject: udp: drop head states only when all skb references are gone

After commit 0ddf3fb2c43d ("udp: preserve skb->dst if required
for IP options processing") we clear the skb head state as soon
as the skb carrying them is first processed.

Since the same skb can be processed several times when MSG_PEEK
is used, we can end up lacking the required head states, and
eventually oopsing.

Fix this clearing the skb head state only when processing the
last skb reference.

Reported-by: Eric Dumazet <edumazet@google.com>
Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 net/core/skbuff.c      | 9 +++------
 net/ipv4/udp.c         | 5 ++++-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f751f3b93039..72299ef00061 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -958,7 +958,7 @@ void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
 void consume_skb(struct sk_buff *skb);
-void consume_stateless_skb(struct sk_buff *skb);
+void __consume_stateless_skb(struct sk_buff *skb);
 void  __kfree_skb(struct sk_buff *skb);
 extern struct kmem_cache *skbuff_head_cache;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 68065d7d383f..16982de649b9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -710,14 +710,11 @@ EXPORT_SYMBOL(consume_skb);
  *	consume_stateless_skb - free an skbuff, assuming it is stateless
  *	@skb: buffer to free
  *
- *	Works like consume_skb(), but this variant assumes that all the head
- *	states have been already dropped.
+ *	Alike consume_skb(), but this variant assumes that this is the last
+ *	skb reference and all the head states have been already dropped
  */
-void consume_stateless_skb(struct sk_buff *skb)
+void __consume_stateless_skb(struct sk_buff *skb)
 {
-	if (!skb_unref(skb))
-		return;
-
 	trace_consume_skb(skb);
 	skb_release_data(skb);
 	kfree_skbmem(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1c9e78c83c..ef29df8648e4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1397,12 +1397,15 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		unlock_sock_fast(sk, slow);
 	}
 
+	if (!skb_unref(skb))
+		return;
+
 	/* In the more common cases we cleared the head states previously,
 	 * see __udp_queue_rcv_skb().
 	 */
 	if (unlikely(udp_skb_has_head_state(skb)))
 		skb_release_head_state(skb);
-	consume_stateless_skb(skb);
+	__consume_stateless_skb(skb);
 }
 EXPORT_SYMBOL_GPL(skb_consume_udp);
 
-- 
cgit 


From eef5a7cc2a571eb2176c8b9260d4ccfc9f6be127 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Sep 2017 15:38:58 +0200
Subject: isdn: isdnloop: fix logic error in isdnloop_sendbuf

gcc-7 found an ancient bug in the loop driver, leading to a condition that
is always false, meaning we ignore the contents of 'card->flags' here:

drivers/isdn/isdnloop/isdnloop.c:412:37: error: ?: using integer constants in boolean context, the expression will always evaluate to 'true' [-Werror=int-in-bool-context]

This changes the braces in the expression to ensure we actually
compare the flag bits, rather than comparing a constant. As Joe Perches
pointed out, an earlier patch of mine incorrectly assumed this was a
false-positive warning.

Cc: Joe Perches <joe@perches.com>
Link: https://patchwork.kernel.org/patch/9840289/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/isdnloop/isdnloop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index 6ffd13466b8c..e97232646ba1 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -409,7 +409,7 @@ isdnloop_sendbuf(int channel, struct sk_buff *skb, isdnloop_card *card)
 		return -EINVAL;
 	}
 	if (len) {
-		if (!(card->flags & (channel) ? ISDNLOOP_FLAGS_B2ACTIVE : ISDNLOOP_FLAGS_B1ACTIVE))
+		if (!(card->flags & (channel ? ISDNLOOP_FLAGS_B2ACTIVE : ISDNLOOP_FLAGS_B1ACTIVE)))
 			return 0;
 		if (card->sndcount[channel] > ISDNLOOP_MAX_SQUEUE)
 			return 0;
-- 
cgit 


From 126f760ca94dae77425695f9f9238b731de86e32 Mon Sep 17 00:00:00 2001
From: Håkon Bugge <Haakon.Bugge@oracle.com>
Date: Wed, 6 Sep 2017 18:35:51 +0200
Subject: rds: Fix incorrect statistics counting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In rds_send_xmit() there is logic to batch the sends. However, if
another thread has acquired the lock and has incremented the send_gen,
it is considered a race and we yield. The code incrementing the
s_send_lock_queue_raced statistics counter did not count this event
correctly.

This commit counts the race condition correctly.

Changes from v1:
- Removed check for *someone_on_xmit()*
- Fixed incorrect indentation

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Knut Omang <knut.omang@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index 058a40743041..b52cdc8ae428 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -428,14 +428,18 @@ over_batch:
 	 * some work and we will skip our goto
 	 */
 	if (ret == 0) {
+		bool raced;
+
 		smp_mb();
+		raced = send_gen != READ_ONCE(cp->cp_send_gen);
+
 		if ((test_bit(0, &conn->c_map_queued) ||
-		     !list_empty(&cp->cp_send_queue)) &&
-			send_gen == READ_ONCE(cp->cp_send_gen)) {
-			rds_stats_inc(s_send_lock_queue_raced);
+		    !list_empty(&cp->cp_send_queue)) && !raced) {
 			if (batch_count < send_batch_count)
 				goto restart;
 			queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+		} else if (raced) {
+			rds_stats_inc(s_send_lock_queue_raced);
 		}
 	}
 out:
-- 
cgit 


From f957dd3c8db2781c8a334b166800788feb618625 Mon Sep 17 00:00:00 2001
From: Ian W MORRISON <ianwmorrison@gmail.com>
Date: Thu, 31 Aug 2017 08:51:03 +1000
Subject: brcmfmac: feature check for multi-scheduled scan fails on bcm4345
 devices

The firmware feature check introduced for multi-scheduled scan is also
failing for bcm4345 devices resulting in a firmware crash.
The reason for this crash has not yet been root cause so this patch avoids
the feature check for those device as a short-term fix.

Fixes: 9fe929aaace6 ("brcmfmac: add firmware feature detection for gscan feature")
Cc: <stable@vger.kernel.org> # v4.13
Signed-off-by: Ian W MORRISON <ianwmorrison@gmail.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
index f1b60740e020..53ae30259989 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
@@ -159,7 +159,8 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
 
 	brcmf_feat_firmware_capabilities(ifp);
 	memset(&gscan_cfg, 0, sizeof(gscan_cfg));
-	if (drvr->bus_if->chip != BRCM_CC_43430_CHIP_ID)
+	if (drvr->bus_if->chip != BRCM_CC_43430_CHIP_ID &&
+	    drvr->bus_if->chip != BRCM_CC_4345_CHIP_ID)
 		brcmf_feat_iovar_data_set(ifp, BRCMF_FEAT_GSCAN,
 					  "pfn_gscan_cfg",
 					  &gscan_cfg, sizeof(gscan_cfg));
-- 
cgit 


From 1cc4a018669f2fb18c10010f1a7ab3f6fb688cef Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 20 Aug 2017 13:38:07 +0800
Subject: netfilter: ipvs: fix the issue that sctp_conn_schedule drops non-INIT
 packet

Commit 5e26b1b3abce ("ipvs: support scheduling inverse and icmp SCTP
packets") changed to check packet type early. It introduced a side
effect: if it's not a INIT packet, ports will be set as  NULL, and
the packet will be dropped later.

It caused that sctp couldn't create connection when ipvs module is
loaded and any scheduler is registered on server.

Li Shuang reproduced it by running the cmds on sctp server:
  # ipvsadm -A -t 1.1.1.1:80 -s rr
  # ipvsadm -D -t 1.1.1.1:80
then the server could't work any more.

This patch is to return 1 when it's not an INIT packet. It means ipvs
will accept it without creating a conn for it, just like what it does
for tcp.

Fixes: 5e26b1b3abce ("ipvs: support scheduling inverse and icmp SCTP packets")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index e1efa446b305..81f08198b125 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -24,9 +24,12 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		if (sh) {
 			sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
 						 sizeof(_schunkh), &_schunkh);
-			if (sch && (sch->type == SCTP_CID_INIT ||
-				    sysctl_sloppy_sctp(ipvs)))
+			if (sch) {
+				if (!(sysctl_sloppy_sctp(ipvs) ||
+				      sch->type == SCTP_CID_INIT))
+					return 1;
 				ports = &sh->source;
+			}
 		}
 	} else {
 		ports = skb_header_pointer(
-- 
cgit 


From 68913a018f6082f8f90abb8ff9114435ef45dff7 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 20 Aug 2017 13:38:08 +0800
Subject: netfilter: ipvs: do not create conn for ABORT packet in
 sctp_conn_schedule

There's no reason for ipvs to create a conn for an ABORT packet
even if sysctl_sloppy_sctp is set.

This patch is to accept it without creating a conn, just as ipvs
does for tcp's RST packet.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 81f08198b125..57c8ee66491e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -25,7 +25,8 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 			sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
 						 sizeof(_schunkh), &_schunkh);
 			if (sch) {
-				if (!(sysctl_sloppy_sctp(ipvs) ||
+				if (sch->type == SCTP_CID_ABORT ||
+				    !(sysctl_sloppy_sctp(ipvs) ||
 				      sch->type == SCTP_CID_INIT))
 					return 1;
 				ports = &sh->source;
-- 
cgit 


From ba1cc08d9488c94cb8d94f545305688b72a2a300 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 8 Sep 2017 10:26:19 +0200
Subject: ipv6: fix memory leak with multiple tables during netns destruction

fib6_net_exit only frees the main and local tables. If another table was
created with fib6_alloc_table, we leak it when the netns is destroyed.

Fix this in the same way ip_fib_net_exit cleans up tables, by walking
through the whole hashtable of fib6_table's. We can get rid of the
special cases for local and main, since they're also part of the
hashtable.

Reproducer:
    ip netns add x
    ip -net x -6 rule add from 6003:1::/64 table 100
    ip netns del x

Reported-by: Jianlin Shi <jishi@redhat.com>
Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index a3b5c163325f..8280172c806c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -191,6 +191,12 @@ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 }
 EXPORT_SYMBOL_GPL(rt6_free_pcpu);
 
+static void fib6_free_table(struct fib6_table *table)
+{
+	inetpeer_invalidate_tree(&table->tb6_peers);
+	kfree(table);
+}
+
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
 {
 	unsigned int h;
@@ -2022,15 +2028,22 @@ out_timer:
 
 static void fib6_net_exit(struct net *net)
 {
+	unsigned int i;
+
 	rt6_ifdown(net, NULL);
 	del_timer_sync(&net->ipv6.ip6_fib_timer);
 
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
-	kfree(net->ipv6.fib6_local_tbl);
-#endif
-	inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
-	kfree(net->ipv6.fib6_main_tbl);
+	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[i];
+		struct hlist_node *tmp;
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
+			hlist_del(&tb->tb6_hlist);
+			fib6_free_table(tb);
+		}
+	}
+
 	kfree(net->ipv6.fib_table_hash);
 	kfree(net->ipv6.rt6_stats);
 	fib6_notifier_exit(net);
-- 
cgit 


From 75c2631468e8af554057246b2413e738dd96af3d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 31 Aug 2017 13:45:24 +0200
Subject: netfilter: nf_nat: don't bug when mapping already exists

It seems preferrable to limp along if we have a conflicting mapping,
its certainly better than a BUG().

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 40573aa6c133..dc3519cc7209 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -416,7 +416,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 	WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
 		maniptype != NF_NAT_MANIP_DST);
-	BUG_ON(nf_nat_initialized(ct, maniptype));
+
+	if (WARN_ON(nf_nat_initialized(ct, maniptype)))
+		return NF_DROP;
 
 	/* What we've got will look like inverse of reply. Normally
 	 * this is what is in the conntrack, except for prior
-- 
cgit 


From a5d7a714569199f909cd60ff7074107bf15c7db4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 1 Sep 2017 22:41:03 +0200
Subject: netfilter: xtables: add scheduling opportunity in get_counters

There are reports about spurious softlockups during iptables-restore, a
backtrace i saw points at get_counters -- it uses a sequence lock and also
has unbounded restart loop.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 1 +
 net/ipv4/netfilter/ip_tables.c  | 1 +
 net/ipv6/netfilter/ip6_tables.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e04457198f93..9e2770fd00be 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -629,6 +629,7 @@ static void get_counters(const struct xt_table_info *t,
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
+			cond_resched();
 		}
 	}
 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 576cba2b57e9..39286e543ee6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -776,6 +776,7 @@ get_counters(const struct xt_table_info *t,
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i; /* macro does multi eval of i */
+			cond_resched();
 		}
 	}
 }
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 54b1e75eded1..01bd3ee5ebc6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -795,6 +795,7 @@ get_counters(const struct xt_table_info *t,
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
+			cond_resched();
 		}
 	}
 }
-- 
cgit 


From e1bf1687740ce1a3598a1c5e452b852ff2190682 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Sep 2017 14:39:51 +0200
Subject: netfilter: nat: Revert "netfilter: nat: convert nat bysrc hash to
 rhashtable"

This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1.

It was not a good idea. The custom hash table was a much better
fit for this purpose.

A fast lookup is not essential, in fact for most cases there is no lookup
at all because original tuple is not taken and can be used as-is.
What needs to be fast is insertion and deletion.

rhlist removal however requires a rhlist walk.
We can have thousands of entries in such a list if source port/addresses
are reused for multiple flows, if this happens removal requests are so
expensive that deletions of a few thousand flows can take several
seconds(!).

The advantages that we got from rhashtable are:
1) table auto-sizing
2) multiple locks

1) would be nice to have, but it is not essential as we have at
most one lookup per new flow, so even a million flows in the bysource
table are not a problem compared to current deletion cost.
2) is easy to add to custom hash table.

I tried to add hlist_node to rhlist to speed up rhltable_remove but this
isn't doable without changing semantics.  rhltable_remove_fast will
check that the to-be-deleted object is part of the table and that
requires a list walk that we want to avoid.

Furthermore, using hlist_node increases size of struct rhlist_head, which
in turn increases nf_conn size.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=196821
Reported-by: Ivan Babrou <ibobrik@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |   3 +-
 include/net/netfilter/nf_nat.h       |   1 -
 net/netfilter/nf_nat_core.c          | 130 ++++++++++++++---------------------
 3 files changed, 54 insertions(+), 80 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index fdc9c64a1c94..8f3bd30511de 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,7 +17,6 @@
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/atomic.h>
-#include <linux/rhashtable.h>
 
 #include <linux/netfilter/nf_conntrack_tcp.h>
 #include <linux/netfilter/nf_conntrack_dccp.h>
@@ -77,7 +76,7 @@ struct nf_conn {
 	possible_net_t ct_net;
 
 #if IS_ENABLED(CONFIG_NF_NAT)
-	struct rhlist_head nat_bysource;
+	struct hlist_node	nat_bysource;
 #endif
 	/* all members below initialized via memset */
 	u8 __nfct_init_offset[0];
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 05c82a1a4267..b71701302e61 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,6 +1,5 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
-#include <linux/rhashtable.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index dc3519cc7209..f090419f5f97 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
+static DEFINE_SPINLOCK(nf_nat_lock);
+
 static DEFINE_MUTEX(nf_nat_proto_mutex);
 static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 
-struct nf_nat_conn_key {
-	const struct net *net;
-	const struct nf_conntrack_tuple *tuple;
-	const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
 
 inline const struct nf_nat_l3proto *
 __nf_nat_l3proto_find(u8 family)
@@ -118,17 +116,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 EXPORT_SYMBOL(nf_xfrm_me_harder);
 #endif /* CONFIG_XFRM */
 
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_tuple *t;
-	const struct nf_conn *ct = data;
+	unsigned int hash;
+
+	get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
 
-	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 	/* Original src, to ensure we map it consistently if poss. */
+	hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+		      tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
 
-	seed ^= net_hash_mix(nf_ct_net(ct));
-	return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
-		      t->dst.protonum ^ seed);
+	return reciprocal_scale(hash, nf_nat_htable_size);
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -184,28 +184,6 @@ same_src(const struct nf_conn *ct,
 		t->src.u.all == tuple->src.u.all);
 }
 
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
-			       const void *obj)
-{
-	const struct nf_nat_conn_key *key = arg->key;
-	const struct nf_conn *ct = obj;
-
-	if (!same_src(ct, key->tuple) ||
-	    !net_eq(nf_ct_net(ct), key->net) ||
-	    !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
-		return 1;
-
-	return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
-	.head_offset = offsetof(struct nf_conn, nat_bysource),
-	.obj_hashfn = nf_nat_bysource_hash,
-	.obj_cmpfn = nf_nat_bysource_cmp,
-	.nelem_hint = 256,
-	.min_size = 1024,
-};
-
 /* Only called for SRC manip */
 static int
 find_appropriate_src(struct net *net,
@@ -216,26 +194,22 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn *ct;
-	struct nf_nat_conn_key key = {
-		.net = net,
-		.tuple = tuple,
-		.zone = zone
-	};
-	struct rhlist_head *hl, *h;
-
-	hl = rhltable_lookup(&nf_nat_bysource_table, &key,
-			     nf_nat_bysource_params);
 
-	rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
-		nf_ct_invert_tuplepr(result,
-				     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-		result->dst = tuple->dst;
-
-		if (in_range(l3proto, l4proto, result, range))
-			return 1;
+	hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+		if (same_src(ct, tuple) &&
+		    net_eq(net, nf_ct_net(ct)) &&
+		    nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+			/* Copy source part from reply tuple. */
+			nf_ct_invert_tuplepr(result,
+				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+			result->dst = tuple->dst;
+
+			if (in_range(l3proto, l4proto, result, range))
+				return 1;
+		}
 	}
-
 	return 0;
 }
 
@@ -408,6 +382,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 
 	/* Can't setup nat info for confirmed ct. */
@@ -449,19 +424,14 @@ nf_nat_setup_info(struct nf_conn *ct,
 	}
 
 	if (maniptype == NF_NAT_MANIP_SRC) {
-		struct nf_nat_conn_key key = {
-			.net = nf_ct_net(ct),
-			.tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-			.zone = nf_ct_zone(ct),
-		};
-		int err;
-
-		err = rhltable_insert_key(&nf_nat_bysource_table,
-					  &key,
-					  &ct->nat_bysource,
-					  nf_nat_bysource_params);
-		if (err)
-			return NF_DROP;
+		unsigned int srchash;
+
+		srchash = hash_by_src(net,
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		spin_lock_bh(&nf_nat_lock);
+		hlist_add_head_rcu(&ct->nat_bysource,
+				   &nf_nat_bysource[srchash]);
+		spin_unlock_bh(&nf_nat_lock);
 	}
 
 	/* It's done. */
@@ -570,8 +540,9 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	 * will delete entry from already-freed table.
 	 */
 	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
-	rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
-			nf_nat_bysource_params);
+	spin_lock_bh(&nf_nat_lock);
+	hlist_del_rcu(&ct->nat_bysource);
+	spin_unlock_bh(&nf_nat_lock);
 
 	/* don't delete conntrack.  Although that would make things a lot
 	 * simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -699,9 +670,11 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
 /* No one using conntrack by the time this called. */
 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
-	if (ct->status & IPS_SRC_NAT_DONE)
-		rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
-				nf_nat_bysource_params);
+	if (ct->status & IPS_SRC_NAT_DONE) {
+		spin_lock_bh(&nf_nat_lock);
+		hlist_del_rcu(&ct->nat_bysource);
+		spin_unlock_bh(&nf_nat_lock);
+	}
 }
 
 static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -825,13 +798,16 @@ static int __init nf_nat_init(void)
 {
 	int ret;
 
-	ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
-	if (ret)
-		return ret;
+	/* Leave them the same for the moment. */
+	nf_nat_htable_size = nf_conntrack_htable_size;
+
+	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+	if (!nf_nat_bysource)
+		return -ENOMEM;
 
 	ret = nf_ct_extend_register(&nat_extend);
 	if (ret < 0) {
-		rhltable_destroy(&nf_nat_bysource_table);
+		nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
 		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
 		return ret;
 	}
@@ -865,8 +841,8 @@ static void __exit nf_nat_cleanup(void)
 
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
-
-	rhltable_destroy(&nf_nat_bysource_table);
+	synchronize_net();
+	nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
 }
 
 MODULE_LICENSE("GPL");
-- 
cgit 


From 8073e960a03bf7b5d5ebfc5ff18ac475e1688f46 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Sep 2017 14:39:52 +0200
Subject: netfilter: nat: use keyed locks

no need to serialize on a single lock, we can partition the table and
add/delete in parallel to different slots.
This restores one of the advantages that got lost with the rhlist
revert.

Cc: Ivan Babrou <ibobrik@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index f090419f5f97..f393a7086025 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,7 +30,7 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
-static DEFINE_SPINLOCK(nf_nat_lock);
+static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
 
 static DEFINE_MUTEX(nf_nat_proto_mutex);
 static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
@@ -425,13 +425,15 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 	if (maniptype == NF_NAT_MANIP_SRC) {
 		unsigned int srchash;
+		spinlock_t *lock;
 
 		srchash = hash_by_src(net,
 				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		spin_lock_bh(&nf_nat_lock);
+		lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)];
+		spin_lock_bh(lock);
 		hlist_add_head_rcu(&ct->nat_bysource,
 				   &nf_nat_bysource[srchash]);
-		spin_unlock_bh(&nf_nat_lock);
+		spin_unlock_bh(lock);
 	}
 
 	/* It's done. */
@@ -525,6 +527,16 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
 	return i->status & IPS_NAT_MASK ? 1 : 0;
 }
 
+static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+	unsigned int h;
+
+	h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+	hlist_del_rcu(&ct->nat_bysource);
+	spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+}
+
 static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 {
 	if (nf_nat_proto_remove(ct, data))
@@ -540,9 +552,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	 * will delete entry from already-freed table.
 	 */
 	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
-	spin_lock_bh(&nf_nat_lock);
-	hlist_del_rcu(&ct->nat_bysource);
-	spin_unlock_bh(&nf_nat_lock);
+	__nf_nat_cleanup_conntrack(ct);
 
 	/* don't delete conntrack.  Although that would make things a lot
 	 * simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -670,11 +680,8 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
 /* No one using conntrack by the time this called. */
 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
-	if (ct->status & IPS_SRC_NAT_DONE) {
-		spin_lock_bh(&nf_nat_lock);
-		hlist_del_rcu(&ct->nat_bysource);
-		spin_unlock_bh(&nf_nat_lock);
-	}
+	if (ct->status & IPS_SRC_NAT_DONE)
+		__nf_nat_cleanup_conntrack(ct);
 }
 
 static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -796,10 +803,12 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
 
 static int __init nf_nat_init(void)
 {
-	int ret;
+	int ret, i;
 
 	/* Leave them the same for the moment. */
 	nf_nat_htable_size = nf_conntrack_htable_size;
+	if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks))
+		nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks);
 
 	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
 	if (!nf_nat_bysource)
@@ -812,6 +821,9 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
+	for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++)
+		spin_lock_init(&nf_nat_locks[i]);
+
 	nf_ct_helper_expectfn_register(&follow_master_nat);
 
 	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
-- 
cgit 


From 74585d4f84379528347630253c42518c5002d2f9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Sep 2017 14:47:57 +0200
Subject: netfilter: core: remove erroneous warn_on

kernel test robot reported:

WARNING: CPU: 0 PID: 1244 at net/netfilter/core.c:218 __nf_hook_entries_try_shrink+0x49/0xcd
[..]

After allowing batching in nf_unregister_net_hooks its possible that an earlier
call to __nf_hook_entries_try_shrink already compacted the list.
If this happens we don't need to do anything.

Fixes: d3ad2c17b4047 ("netfilter: core: batch nf_unregister_net_hooks synchronize_net calls")
Reported-by: kernel test robot <xiaolong.ye@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Aaron Conole <aconole@bytheb.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 04fe25abc5f6..52cd2901a097 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -215,7 +215,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
 	if (skip == hook_entries)
 		goto out_assign;
 
-	if (WARN_ON(skip == 0))
+	if (skip == 0)
 		return NULL;
 
 	hook_entries -= skip;
-- 
cgit 


From 05d0eae7c1cf6bf7b19c02b3a97ff457b3317323 Mon Sep 17 00:00:00 2001
From: Zhizhou Tian <zhizhou.tian@gmail.com>
Date: Fri, 8 Sep 2017 11:00:16 +0800
Subject: netfilter: xt_hashlimit: alloc hashtable with right size

struct xt_byteslimit_htable used hlist_head, but memory allocation is
done through sizeof(struct list_head).

Signed-off-by: Zhizhou Tian <zhizhou.tian@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_hashlimit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 10d48234f5f4..962ea4a63d9f 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -279,7 +279,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
 		size = cfg->size;
 	} else {
 		size = (totalram_pages << PAGE_SHIFT) / 16384 /
-		       sizeof(struct list_head);
+		       sizeof(struct hlist_head);
 		if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
 			size = 8192;
 		if (size < 16)
@@ -287,7 +287,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
 	}
 	/* FIXME: don't use vmalloc() here or anywhere else -HW */
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
-	                sizeof(struct list_head) * size);
+	                sizeof(struct hlist_head) * size);
 	if (hinfo == NULL)
 		return -ENOMEM;
 	*out_hinfo = hinfo;
-- 
cgit 


From 90c4ae4e2c1da9f1eaf846136861af43d4c1ff34 Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Fri, 8 Sep 2017 01:38:58 -0400
Subject: netfilter: xt_hashlimit: fix build error caused by 64bit division

64bit division causes build/link errors on 32bit architectures. It
prints out error messages like:

ERROR: "__aeabi_uldivmod" [net/netfilter/xt_hashlimit.ko] undefined!

The value of avg passed through by userspace in BYTE mode cannot exceed
U32_MAX. Which means 64bit division in user2rate_bytes is unnecessary.
To fix this I have changed the type of param 'user' to u32.

Since anything greater than U32_MAX is an invalid input we error out in
hashlimit_mt_check_common() when this is the case.

Changes in v2:
	Making return type as u32 would cause an overflow for small
	values of 'user' (for example 2, 3 etc). To avoid this I bumped up
	'r' to u64 again as well as the return type. This is OK since the
	variable that stores the result is u64. We still avoid 64bit
	division here since 'user' is u32.

Fixes: bea74641e378 ("netfilter: xt_hashlimit: add rate match mode")
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_hashlimit.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 962ea4a63d9f..5da8746f7b88 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -35,6 +35,7 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/xt_hashlimit.h>
 #include <linux/mutex.h>
+#include <linux/kernel.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -527,12 +528,12 @@ static u64 user2rate(u64 user)
 	}
 }
 
-static u64 user2rate_bytes(u64 user)
+static u64 user2rate_bytes(u32 user)
 {
 	u64 r;
 
-	r = user ? 0xFFFFFFFFULL / user : 0xFFFFFFFFULL;
-	r = (r - 1) << 4;
+	r = user ? U32_MAX / user : U32_MAX;
+	r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
 	return r;
 }
 
@@ -588,7 +589,8 @@ static void rateinfo_init(struct dsthash_ent *dh,
 		dh->rateinfo.prev_window = 0;
 		dh->rateinfo.current_rate = 0;
 		if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
-			dh->rateinfo.rate = user2rate_bytes(hinfo->cfg.avg);
+			dh->rateinfo.rate =
+				user2rate_bytes((u32)hinfo->cfg.avg);
 			if (hinfo->cfg.burst)
 				dh->rateinfo.burst =
 					hinfo->cfg.burst * dh->rateinfo.rate;
@@ -870,7 +872,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 
 	/* Check for overflow. */
 	if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
-		if (cfg->avg == 0) {
+		if (cfg->avg == 0 || cfg->avg > U32_MAX) {
 			pr_info("hashlimit invalid rate\n");
 			return -ERANGE;
 		}
-- 
cgit 


From 7906b00f5cd1cd484fced7fcda892176e3202c8a Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 8 Sep 2017 11:35:21 -0300
Subject: sctp: fix missing wake ups in some situations

Commit fb586f25300f ("sctp: delay calls to sk_data_ready() as much as
possible") minimized the number of wake ups that are triggered in case
the association receives a packet with multiple data chunks on it and/or
when io_events are enabled and then commit 0970f5b36659 ("sctp: signal
sk_data_ready earlier on data chunks reception") moved the wake up to as
soon as possible. It thus relies on the state machine running later to
clean the flag that the event was already generated.

The issue is that there are 2 call paths that calls
sctp_ulpq_tail_event() outside of the state machine, causing the flag to
linger and possibly omitting a needed wake up in the sequence.

One of the call paths is when enabling SCTP_SENDER_DRY_EVENTS via
setsockopt(SCTP_EVENTS), as noticed by Harald Welte. The other is when
partial reliability triggers removal of chunks from the send queue when
the application calls sendmsg().

This commit fixes it by not setting the flag in case the socket is not
owned by the user, as it won't be cleaned later. This works for
user-initiated calls and also for rx path processing.

Fixes: fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible")
Reported-by: Harald Welte <laforge@gnumonks.org>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ulpqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0225d62a869f..a71be33f3afe 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 		sctp_ulpq_clear_pd(ulpq);
 
 	if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
-		sp->data_ready_signalled = 1;
+		if (!sock_owned_by_user(sk))
+			sp->data_ready_signalled = 1;
 		sk->sk_data_ready(sk);
 	}
 	return 1;
-- 
cgit 


From 1f3b359f1004bd34b7b0bad70b93e3c7af92a37b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 8 Sep 2017 12:44:47 -0700
Subject: tcp: fix a request socket leak

While the cited commit fixed a possible deadlock, it added a leak
of the request socket, since reqsk_put() must be called if the BPF
filter decided the ACK packet must be dropped.

Fixes: d624d276d1dd ("tcp: fix possible deadlock in TCP stack vs BPF filter")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 6 +++---
 net/ipv6/tcp_ipv6.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a63486afa7a7..d9416b5162bc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1669,9 +1669,9 @@ process:
 		 */
 		sock_hold(sk);
 		refcounted = true;
-		if (tcp_filter(sk, skb))
-			goto discard_and_relse;
-		nsk = tcp_check_req(sk, skb, req, false);
+		nsk = NULL;
+		if (!tcp_filter(sk, skb))
+			nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 38f76d8b231e..64d94afa427f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1460,9 +1460,9 @@ process:
 		}
 		sock_hold(sk);
 		refcounted = true;
-		if (tcp_filter(sk, skb))
-			goto discard_and_relse;
-		nsk = tcp_check_req(sk, skb, req, false);
+		nsk = NULL;
+		if (!tcp_filter(sk, skb))
+			nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
-- 
cgit 


From 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 8 Sep 2017 15:48:47 -0700
Subject: ipv6: fix typo in fib6_net_exit()

IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ.

Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8280172c806c..e5308d7cbd75 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2033,7 +2033,7 @@ static void fib6_net_exit(struct net *net)
 	rt6_ifdown(net, NULL);
 	del_timer_sync(&net->ipv6.ip6_fib_timer);
 
-	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+	for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
 		struct hlist_head *head = &net->ipv6.fib_table_hash[i];
 		struct hlist_node *tmp;
 		struct fib6_table *tb;
-- 
cgit 


From 0f693f1995cf002432b70f43ce73f79bf8d0b6c9 Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Thu, 7 Sep 2017 14:08:34 +0800
Subject: ip_tunnel: fix setting ttl and tos value in collect_md mode

ttl and tos variables are declared and assigned, but are not used in
iptunnel_xmit() function.

Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel")
Cc: Alexei Starovoitov <ast@fb.com>
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 129d1a3616f8..e1856bfa753d 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -618,8 +618,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 		ip_rt_put(rt);
 		goto tx_dropped;
 	}
-	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
-		      key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+		      df, !net_eq(tunnel->net, dev_net(dev)));
 	return;
 tx_error:
 	dev->stats.tx_errors++;
-- 
cgit 


From 18e1173d5f3615c8c2680853ba87830ad8a0febc Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Thu, 7 Sep 2017 14:08:35 +0800
Subject: ip6_tunnel: fix setting hop_limit value for ipv6 tunnel

Similar to vxlan/geneve tunnel, if hop_limit is zero, it should fall
back to ip6_dst_hoplimt().

Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3a0ba2ae4b0f..10a693a19323 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1184,6 +1184,7 @@ route_lookup:
 		init_tel_txopt(&opt, encap_limit);
 		ipv6_push_frag_opts(skb, &opt.ops, &proto);
 	}
+	hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
 
 	/* Calculate max headroom for all the headers and adjust
 	 * needed_headroom if necessary.
-- 
cgit 


From c43593d81ab59b8ad24ee5545e098e30f742d461 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 7 Sep 2017 11:09:59 +0300
Subject: dt-bindings: net: don't confuse with generic PHY property

This complements commit 9a94b3a4bd (dt-binding: phy: don't confuse with
Ethernet phy properties).

The generic PHY 'phys' property sometime appears in the same node with
the Ethernet PHY 'phy' or 'phy-handle' properties. Add a warning in
ethernet.txt to reduce confusion.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ethernet.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index 7da86f22a13b..2974e63ba311 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -1,5 +1,9 @@
 The following properties are common to the Ethernet controllers:
 
+NOTE: All 'phy*' properties documented below are Ethernet specific. For the
+generic PHY 'phys' property, see
+Documentation/devicetree/bindings/phy/phy-bindings.txt.
+
 - local-mac-address: array of 6 bytes, specifies the MAC address that was
   assigned to the network device;
 - mac-address: array of 6 bytes, specifies the MAC address that was last used by
-- 
cgit 


From 165da3585743076c2a9de6f12b0b31df846d7c01 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 7 Sep 2017 12:25:48 +0300
Subject: dt-bindings: add SFF vendor prefix

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 4e72012928b4..12b5808b82f2 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -289,6 +289,7 @@ schindler	Schindler
 seagate	Seagate Technology PLC
 semtech	Semtech Corporation
 sensirion	Sensirion AG
+sff	Small Form Factor Committee
 sgx	SGX Sensortech
 sharp	Sharp Corporation
 si-en	Si-En Technology Ltd.
-- 
cgit 


From 3ef3714000ad86e8b8fbdaa3abc4d468a8e98a79 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 7 Sep 2017 12:25:49 +0300
Subject: dt-binding: net: sfp binding documentation

Add device-tree binding documentation SFP transceivers. Support for SFP
transceivers has been recently introduced (drivers/net/phy/sfp.c).

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/sff,sfp.txt | 76 +++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/sff,sfp.txt

diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
new file mode 100644
index 000000000000..60e970ce10ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -0,0 +1,76 @@
+Small Form Factor (SFF) Committee Small Form-factor Pluggable (SFP)
+Transceiver
+
+Required properties:
+
+- compatible : must be "sff,sfp"
+
+Optional Properties:
+
+- i2c-bus : phandle of an I2C bus controller for the SFP two wire serial
+  interface
+
+- mod-def0-gpios : GPIO phandle and a specifier of the MOD-DEF0 (AKA Mod_ABS)
+  module presence input gpio signal, active (module absent) high
+
+- los-gpios : GPIO phandle and a specifier of the Receiver Loss of Signal
+  Indication input gpio signal, active (signal lost) high
+
+- tx-fault-gpios : GPIO phandle and a specifier of the Module Transmitter
+  Fault input gpio signal, active (fault condition) high
+
+- tx-disable-gpios : GPIO phandle and a specifier of the Transmitter Disable
+  output gpio signal, active (Tx disable) high
+
+- rate-select0-gpios : GPIO phandle and a specifier of the Rx Signaling Rate
+  Select (AKA RS0) output gpio signal, low: low Rx rate, high: high Rx rate
+
+- rate-select1-gpios : GPIO phandle and a specifier of the Tx Signaling Rate
+  Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
+  high Tx rate
+
+Example #1: Direct serdes to SFP connection
+
+sfp_eth3: sfp-eth3 {
+	compatible = "sff,sfp";
+	i2c-bus = <&sfp_1g_i2c>;
+	los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
+	mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
+	tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
+	tx-fault-gpios = <&cpm_gpio2 19 GPIO_ACTIVE_HIGH>;
+};
+
+&cps_emac3 {
+	phy-names = "comphy";
+	phys = <&cps_comphy5 0>;
+	sfp = <&sfp_eth3>;
+};
+
+Example #2: Serdes to PHY to SFP connection
+
+sfp_eth0: sfp-eth0 {
+	compatible = "sff,sfp";
+	i2c-bus = <&sfpp0_i2c>;
+	los-gpios = <&cps_gpio1 28 GPIO_ACTIVE_HIGH>;
+	mod-def0-gpios = <&cps_gpio1 27 GPIO_ACTIVE_LOW>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&cps_sfpp0_pins>;
+	tx-disable-gpios = <&cps_gpio1 29 GPIO_ACTIVE_HIGH>;
+	tx-fault-gpios  = <&cps_gpio1 26 GPIO_ACTIVE_HIGH>;
+};
+
+p0_phy: ethernet-phy@0 {
+	compatible = "ethernet-phy-ieee802.3-c45";
+	pinctrl-names = "default";
+	pinctrl-0 = <&cpm_phy0_pins &cps_phy0_pins>;
+	reg = <0>;
+	interrupt = <&cpm_gpio2 18 IRQ_TYPE_EDGE_FALLING>;
+	sfp = <&sfp_eth0>;
+};
+
+&cpm_eth0 {
+	phy = <&p0_phy>;
+	phy-mode = "10gbase-kr";
+};
-- 
cgit 


From 25ee079371640573bbb22ab04ca5fe7a6e9842cf Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 7 Sep 2017 12:25:50 +0300
Subject: net: phy: sfp: rename dt properties to match the binding

Make the Rx rate select control gpio property name match the documented
binding. This would make the addition of 'rate-select1-gpios' for SFP+
support more natural.

Also, make the MOD-DEF0 gpio property name match the documentation.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index fb2cf4342f48..baee371bf767 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -58,11 +58,11 @@ enum {
 };
 
 static const char *gpio_of_names[] = {
-	"moddef0",
+	"mod-def0",
 	"los",
 	"tx-fault",
 	"tx-disable",
-	"rate-select",
+	"rate-select0",
 };
 
 static const enum gpiod_flags gpio_flags[] = {
-- 
cgit 


From 0fdbedc7ddfc22e5a6e9596dd01b0dd922bf142c Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 7 Sep 2017 13:24:20 +0200
Subject: davicom: Display proper debug level up to 6

This will make it explicit some messages are of the form:
dm9000_dbg(db, 5, ...

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/davicom/dm9000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 16fe776ddbe5..50222b7b81f3 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -65,7 +65,7 @@ MODULE_PARM_DESC(watchdog, "transmit timeout in milliseconds");
  */
 static int debug;
 module_param(debug, int, 0644);
-MODULE_PARM_DESC(debug, "dm9000 debug level (0-4)");
+MODULE_PARM_DESC(debug, "dm9000 debug level (0-6)");
 
 /* DM9000 register address locking.
  *
-- 
cgit 


From e333ac1f1db09e37425c90bb6cf3dec721a6d71d Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Thu, 7 Sep 2017 18:32:30 +0300
Subject: net: ethernet: ti: netcp_core: no need in netif_napi_del

Don't remove rx_napi specifically just before free_netdev(),
it's supposed to be done in it and is confusing w/o tx_napi deletion.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/netcp_core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index eb96a6913235..437d36289786 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2145,7 +2145,6 @@ static void netcp_delete_interface(struct netcp_device *netcp_device,
 
 	of_node_put(netcp->node_interface);
 	unregister_netdev(ndev);
-	netif_napi_del(&netcp->rx_napi);
 	free_netdev(ndev);
 }
 
-- 
cgit 


From 9a486c9dc515f7daa05a344621031ee09645c522 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 7 Sep 2017 12:35:14 -0700
Subject: net: tulip: Constify tulip_tbl

It looks like all users of tulip_tbl are reads, so mark this table
as read-only.

$ git grep tulip_tbl  # edited to avoid line-wraps...
interrupt.c: iowrite32(tulip_tbl[tp->chip_id].valid_intrs, ...
interrupt.c: iowrite32(tulip_tbl[tp->chip_id].valid_intrs&~RxPollInt, ...
interrupt.c: iowrite32(tulip_tbl[tp->chip_id].valid_intrs, ...
interrupt.c: iowrite32(tulip_tbl[tp->chip_id].valid_intrs | TimerInt,
pnic.c:      iowrite32(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
tulip.h:     extern struct tulip_chip_table tulip_tbl[];
tulip_core.c:struct tulip_chip_table tulip_tbl[] = {
tulip_core.c:iowrite32(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR5);
tulip_core.c:iowrite32(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
tulip_core.c:setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer,
tulip_core.c:const char *chip_name = tulip_tbl[chip_idx].chip_name;
tulip_core.c:if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size)
tulip_core.c:ioaddr =  pci_iomap(..., tulip_tbl[chip_idx].io_size);
tulip_core.c:tp->flags = tulip_tbl[chip_idx].flags;
tulip_core.c:setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer,
tulip_core.c:INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task);

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jarod Wilson <jarod@redhat.com>
Cc: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Cc: netdev@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/tulip.h      | 2 +-
 drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/dec/tulip/tulip.h b/drivers/net/ethernet/dec/tulip/tulip.h
index 38431a155f09..06660dbc44b7 100644
--- a/drivers/net/ethernet/dec/tulip/tulip.h
+++ b/drivers/net/ethernet/dec/tulip/tulip.h
@@ -515,7 +515,7 @@ void comet_timer(unsigned long data);
 extern int tulip_debug;
 extern const char * const medianame[];
 extern const char tulip_media_cap[];
-extern struct tulip_chip_table tulip_tbl[];
+extern const struct tulip_chip_table tulip_tbl[];
 void oom_timer(unsigned long data);
 extern u8 t21040_csr13[];
 
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 84394b43c0a1..851b6d1f5a42 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -138,7 +138,7 @@ static void tulip_timer(unsigned long data)
  * It is indexed via the values in 'enum chips'
  */
 
-struct tulip_chip_table tulip_tbl[] = {
+const struct tulip_chip_table tulip_tbl[] = {
   { }, /* placeholder for array, slot unused currently */
   { }, /* placeholder for array, slot unused currently */
 
-- 
cgit 


From 109980b894e9dae66c37c3d804a415aa68b19c7e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 8 Sep 2017 00:14:51 +0200
Subject: bpf: don't select potentially stale ri->map from buggy xdp progs

We can potentially run into a couple of issues with the XDP
bpf_redirect_map() helper. The ri->map in the per CPU storage
can become stale in several ways, mostly due to misuse, where
we can then trigger a use after free on the map:

i) prog A is calling bpf_redirect_map(), returning XDP_REDIRECT
and running on a driver not supporting XDP_REDIRECT yet. The
ri->map on that CPU becomes stale when the XDP program is unloaded
on the driver, and a prog B loaded on a different driver which
supports XDP_REDIRECT return code. prog B would have to omit
calling to bpf_redirect_map() and just return XDP_REDIRECT, which
would then access the freed map in xdp_do_redirect() since not
cleared for that CPU.

ii) prog A is calling bpf_redirect_map(), returning a code other
than XDP_REDIRECT. prog A is then detached, which triggers release
of the map. prog B is attached which, similarly as in i), would
just return XDP_REDIRECT without having called bpf_redirect_map()
and thus be accessing the freed map in xdp_do_redirect() since
not cleared for that CPU.

iii) prog A is attached to generic XDP, calling the bpf_redirect_map()
helper and returning XDP_REDIRECT. xdp_do_generic_redirect() is
currently not handling ri->map (will be fixed by Jesper), so it's
not being reset. Later loading a e.g. native prog B which would,
say, call bpf_xdp_redirect() and then returns XDP_REDIRECT would
find in xdp_do_redirect() that a map was set and uses that causing
use after free on map access.

Fix thus needs to avoid accessing stale ri->map pointers, naive
way would be to call a BPF function from drivers that just resets
it to NULL for all XDP return codes but XDP_REDIRECT and including
XDP_REDIRECT for drivers not supporting it yet (and let ri->map
being handled in xdp_do_generic_redirect()). There is a less
intrusive way w/o letting drivers call a reset for each BPF run.

The verifier knows we're calling into bpf_xdp_redirect_map()
helper, so it can do a small insn rewrite transparent to the prog
itself in the sense that it fills R4 with a pointer to the own
bpf_prog. We have that pointer at verification time anyway and
R4 is allowed to be used as per calling convention we scratch
R0 to R5 anyway, so they become inaccessible and program cannot
read them prior to a write. Then, the helper would store the prog
pointer in the current CPUs struct redirect_info. Later in
xdp_do_*_redirect() we check whether the redirect_info's prog
pointer is the same as passed xdp_prog pointer, and if that's
the case then all good, since the prog holds a ref on the map
anyway, so it is always valid at that point in time and must
have a reference count of at least 1. If in the unlikely case
they are not equal, it means we got a stale pointer, so we clear
and bail out right there. Also do reset map and the owning prog
in bpf_xdp_redirect(), so that bpf_xdp_redirect_map() and
bpf_xdp_redirect() won't get mixed up, only the last call should
take precedence. A tc bpf_redirect() doesn't use map anywhere
yet, so no need to clear it there since never accessed in that
layer.

Note that in case the prog is released, and thus the map as
well we're still under RCU read critical section at that time
and have preemption disabled as well. Once we commit with the
__dev_map_insert_ctx() from xdp_do_redirect_map() and set the
map to ri->map_to_flush, we still wait for a xdp_do_flush_map()
to finish in devmap dismantle time once flush_needed bit is set,
so that is fine.

Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 16 ++++++++++++++++
 net/core/filter.c     | 21 +++++++++++++++++++--
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d690c7dd1f1a..477b6932c3c1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4203,6 +4203,22 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			continue;
 		}
 
+		if (insn->imm == BPF_FUNC_redirect_map) {
+			u64 addr = (unsigned long)prog;
+			struct bpf_insn r4_ld[] = {
+				BPF_LD_IMM64(BPF_REG_4, addr),
+				*insn,
+			};
+			cnt = ARRAY_SIZE(r4_ld);
+
+			new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+		}
 patch_call_imm:
 		fn = prog->aux->ops->get_func_proto(insn->imm);
 		/* all functions that have prototype and verifier allowed
diff --git a/net/core/filter.c b/net/core/filter.c
index 5912c738a7b2..0848df2cd9bf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1794,6 +1794,7 @@ struct redirect_info {
 	u32 flags;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
+	const struct bpf_prog *map_owner;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -1807,7 +1808,6 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
-	ri->map = NULL;
 
 	return TC_ACT_REDIRECT;
 }
@@ -2504,6 +2504,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	const struct bpf_prog *map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
 	struct net_device *fwd;
@@ -2511,6 +2512,15 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 
 	ri->ifindex = 0;
 	ri->map = NULL;
+	ri->map_owner = NULL;
+
+	/* This is really only caused by a deliberately crappy
+	 * BPF program, normally we would never hit that case,
+	 * so no need to inform someone via tracepoints either,
+	 * just bail out.
+	 */
+	if (unlikely(map_owner != xdp_prog))
+		return -EINVAL;
 
 	fwd = __dev_map_lookup_elem(map, index);
 	if (!fwd) {
@@ -2607,6 +2617,8 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
+	ri->map = NULL;
+	ri->map_owner = NULL;
 
 	return XDP_REDIRECT;
 }
@@ -2619,7 +2631,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags)
+BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
+	   const struct bpf_prog *, map_owner)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 
@@ -2629,10 +2642,14 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags
 	ri->ifindex = ifindex;
 	ri->flags = flags;
 	ri->map = map;
+	ri->map_owner = map_owner;
 
 	return XDP_REDIRECT;
 }
 
+/* Note, arg4 is hidden from users and populated by the verifier
+ * with the right pointer.
+ */
 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.func           = bpf_xdp_redirect_map,
 	.gpl_only       = false,
-- 
cgit 


From bbbe211c295ffb309247adb7b871dda60d92d2d5 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 8 Sep 2017 14:00:30 -0700
Subject: net: rcu lock and preempt disable missing around generic xdp

do_xdp_generic must be called inside rcu critical section with preempt
disabled to ensure BPF programs are valid and per-cpu variables used
for redirect operations are consistent. This patch ensures this is true
and fixes the splat below.

The netif_receive_skb_internal() code path is now broken into two rcu
critical sections. I decided it was better to limit the preempt_enable/disable
block to just the xdp static key portion and the fallout is more
rcu_read_lock/unlock calls. Seems like the best option to me.

[  607.596901] =============================
[  607.596906] WARNING: suspicious RCU usage
[  607.596912] 4.13.0-rc4+ #570 Not tainted
[  607.596917] -----------------------------
[  607.596923] net/core/dev.c:3948 suspicious rcu_dereference_check() usage!
[  607.596927]
[  607.596927] other info that might help us debug this:
[  607.596927]
[  607.596933]
[  607.596933] rcu_scheduler_active = 2, debug_locks = 1
[  607.596938] 2 locks held by pool/14624:
[  607.596943]  #0:  (rcu_read_lock_bh){......}, at: [<ffffffff95445ffd>] ip_finish_output2+0x14d/0x890
[  607.596973]  #1:  (rcu_read_lock_bh){......}, at: [<ffffffff953c8e3a>] __dev_queue_xmit+0x14a/0xfd0
[  607.597000]
[  607.597000] stack backtrace:
[  607.597006] CPU: 5 PID: 14624 Comm: pool Not tainted 4.13.0-rc4+ #570
[  607.597011] Hardware name: Dell Inc. Precision Tower 5810/0HHV7N, BIOS A17 03/01/2017
[  607.597016] Call Trace:
[  607.597027]  dump_stack+0x67/0x92
[  607.597040]  lockdep_rcu_suspicious+0xdd/0x110
[  607.597054]  do_xdp_generic+0x313/0xa50
[  607.597068]  ? time_hardirqs_on+0x5b/0x150
[  607.597076]  ? mark_held_locks+0x6b/0xc0
[  607.597088]  ? netdev_pick_tx+0x150/0x150
[  607.597117]  netif_rx_internal+0x205/0x3f0
[  607.597127]  ? do_xdp_generic+0xa50/0xa50
[  607.597144]  ? lock_downgrade+0x2b0/0x2b0
[  607.597158]  ? __lock_is_held+0x93/0x100
[  607.597187]  netif_rx+0x119/0x190
[  607.597202]  loopback_xmit+0xfd/0x1b0
[  607.597214]  dev_hard_start_xmit+0x127/0x4e0

Fixes: d445516966dc ("net: xdp: support xdp generic on virtual devices")
Fixes: b5cdae3291f7 ("net: Generic XDP")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 6f845e4fec17..fb766d906148 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3981,8 +3981,13 @@ static int netif_rx_internal(struct sk_buff *skb)
 	trace_netif_rx(skb);
 
 	if (static_key_false(&generic_xdp_needed)) {
-		int ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
-					 skb);
+		int ret;
+
+		preempt_disable();
+		rcu_read_lock();
+		ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		rcu_read_unlock();
+		preempt_enable();
 
 		/* Consider XDP consuming the packet a success from
 		 * the netdev point of view we do not want to count
@@ -4500,18 +4505,20 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
 
-	rcu_read_lock();
-
 	if (static_key_false(&generic_xdp_needed)) {
-		int ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
-					 skb);
+		int ret;
 
-		if (ret != XDP_PASS) {
-			rcu_read_unlock();
+		preempt_disable();
+		rcu_read_lock();
+		ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		rcu_read_unlock();
+		preempt_enable();
+
+		if (ret != XDP_PASS)
 			return NET_RX_DROP;
-		}
 	}
 
+	rcu_read_lock();
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
-- 
cgit 


From 5a67da2a71c64daeb456f6f3e87b5c7cecdc5ffa Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 8 Sep 2017 14:00:49 -0700
Subject: bpf: add support for sockmap detach programs

The bpf map sockmap supports adding programs via attach commands. This
patch adds the detach command to keep the API symmetric and allow
users to remove previously added programs. Otherwise the user would
have to delete the map and re-add it to get in this state.

This also adds a series of additional tests to capture detach operation
and also attaching/detaching invalid prog types.

API note: socks will run (or not run) programs depending on the state
of the map at the time the sock is added. We do not for example walk
the map and remove programs from previously attached socks.

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h                     |  8 +++---
 kernel/bpf/sockmap.c                    |  2 +-
 kernel/bpf/syscall.c                    | 27 ++++++++++-------
 tools/testing/selftests/bpf/test_maps.c | 51 ++++++++++++++++++++++++++++++++-
 4 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c2cb1b5c094e..8390859e79e7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -385,16 +385,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
 
 #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
 struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
-int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
+int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
 #else
 static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	return NULL;
 }
 
-static inline int sock_map_attach_prog(struct bpf_map *map,
-				       struct bpf_prog *prog,
-				       u32 type)
+static inline int sock_map_prog(struct bpf_map *map,
+				struct bpf_prog *prog,
+				u32 type)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index f6ffde9c6a68..6424ce0e4969 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -792,7 +792,7 @@ out_progs:
 	return err;
 }
 
-int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
+int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 {
 	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
 	struct bpf_prog *orig;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 70ad8e220343..cb17e1cd1d43 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1096,10 +1096,10 @@ static int bpf_obj_get(const union bpf_attr *attr)
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr)
+static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
 {
+	struct bpf_prog *prog = NULL;
 	int ufd = attr->target_fd;
-	struct bpf_prog *prog;
 	struct bpf_map *map;
 	struct fd f;
 	int err;
@@ -1109,16 +1109,20 @@ static int sockmap_get_from_fd(const union bpf_attr *attr)
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, BPF_PROG_TYPE_SK_SKB);
-	if (IS_ERR(prog)) {
-		fdput(f);
-		return PTR_ERR(prog);
+	if (attach) {
+		prog = bpf_prog_get_type(attr->attach_bpf_fd,
+					 BPF_PROG_TYPE_SK_SKB);
+		if (IS_ERR(prog)) {
+			fdput(f);
+			return PTR_ERR(prog);
+		}
 	}
 
-	err = sock_map_attach_prog(map, prog, attr->attach_type);
+	err = sock_map_prog(map, prog, attr->attach_type);
 	if (err) {
 		fdput(f);
-		bpf_prog_put(prog);
+		if (prog)
+			bpf_prog_put(prog);
 		return err;
 	}
 
@@ -1155,7 +1159,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		break;
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr);
+		return sockmap_get_from_fd(attr, true);
 	default:
 		return -EINVAL;
 	}
@@ -1204,7 +1208,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
 		cgroup_put(cgrp);
 		break;
-
+	case BPF_SK_SKB_STREAM_PARSER:
+	case BPF_SK_SKB_STREAM_VERDICT:
+		ret = sockmap_get_from_fd(attr, false);
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 4acc772a28c0..fe3a443a1102 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -558,7 +558,7 @@ static void test_sockmap(int tasks, void *data)
 		}
 	}
 
-	/* Test attaching bad fds */
+	/* Test attaching/detaching bad fds */
 	err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
 	if (!err) {
 		printf("Failed invalid parser prog attach\n");
@@ -571,6 +571,30 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
+	if (!err) {
+		printf("Failed unknown prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
+	if (err) {
+		printf("Failed empty parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
+	if (err) {
+		printf("Failed empty verdict prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
+	if (!err) {
+		printf("Detach invalid prog successful\n");
+		goto out_sockmap;
+	}
+
 	/* Load SK_SKB program and Attach */
 	err = bpf_prog_load(SOCKMAP_PARSE_PROG,
 			    BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
@@ -643,6 +667,13 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(verdict_prog, map_fd_rx,
+			      __MAX_BPF_ATTACH_TYPE, 0);
+	if (!err) {
+		printf("Attached unknown bpf prog\n");
+		goto out_sockmap;
+	}
+
 	/* Test map update elem afterwards fd lives in fd and map_fd */
 	for (i = 0; i < 6; i++) {
 		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
@@ -809,6 +840,24 @@ static void test_sockmap(int tasks, void *data)
 		assert(status == 0);
 	}
 
+	err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
+	if (!err) {
+		printf("Detached an invalid prog type.\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
+	if (err) {
+		printf("Failed parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
+	if (err) {
+		printf("Failed parser prog detach\n");
+		goto out_sockmap;
+	}
+
 	/* Test map close sockets */
 	for (i = 0; i < 6; i++)
 		close(sfd[i]);
-- 
cgit 


From 374fb014fc5b15e420faa00af036868a635eadd3 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 8 Sep 2017 14:01:10 -0700
Subject: bpf: devmap, use cond_resched instead of cpu_relax

Be a bit more friendly about waiting for flush bits to complete.
Replace the cpu_relax() with a cond_resched().

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/devmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ecf9f99ecc57..959c9a07f318 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -159,7 +159,7 @@ static void dev_map_free(struct bpf_map *map)
 		unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
 
 		while (!bitmap_empty(bitmap, dtab->map.max_entries))
-			cpu_relax();
+			cond_resched();
 	}
 
 	for (i = 0; i < dtab->map.max_entries; i++) {
-- 
cgit 


From a010a2f6540ecc39f8701c6f7d35e22992c03fb6 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 8 Sep 2017 15:38:07 -0700
Subject: Revert "mdio_bus: Remove unneeded gpiod NULL check"

This reverts commit 95b80bf3db03c2bf572a357cf74b9a6aefef0a4a ("mdio_bus:
Remove unneeded gpiod NULL check"), this commit assumed that GPIOLIB
checks for NULL descriptors, so it's safe to drop them, but it is not
when CONFIG_GPIOLIB is disabled in the kernel. If we do call
gpiod_set_value_cansleep() on a GPIO descriptor we will issue warnings
coming from the inline stubs declared in include/linux/gpio/consumer.h.

Fixes: 95b80bf3db03 ("mdio_bus: Remove unneeded gpiod NULL check")
Reported-by: Woojung Huh <Woojung.Huh@microchip.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index b6f9fa670168..2df7b62c1a36 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -399,7 +399,8 @@ error:
 	}
 
 	/* Put PHYs in RESET to save power */
-	gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+	if (bus->reset_gpiod)
+		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 	return err;
@@ -424,7 +425,8 @@ void mdiobus_unregister(struct mii_bus *bus)
 	}
 
 	/* Put PHYs in RESET to save power */
-	gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+	if (bus->reset_gpiod)
+		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 }
-- 
cgit 


From 9beb8bedb05c5f3a353dba62b8fa7cbbb9ec685e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 9 Sep 2017 01:40:35 +0200
Subject: bpf: make error reporting in bpf_warn_invalid_xdp_action more clear

Differ between illegal XDP action code and just driver
unsupported one to provide better feedback when we throw
a one-time warning here. Reason is that with 814abfabef3c
("xdp: add bpf_redirect helper function") not all drivers
support the new XDP return code yet and thus they will
fall into their 'default' case when checking for return
codes after program return, which then triggers a
bpf_warn_invalid_xdp_action() stating that the return
code is illegal, but from XDP perspective it's not.

I decided not to place something like a XDP_ACT_MAX define
into uapi i) given we don't have this either for all other
program types, ii) future action codes could have further
encoding there, which would render such define unsuitable
and we wouldn't be able to rip it out again, and iii) we
rarely add new action codes.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 4 ++--
 net/core/filter.c        | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ba848b761cfb..43ab5c402f98 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -766,8 +766,8 @@ struct bpf_sock {
 
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
- * return codes are reserved for future use. Unknown return codes will result
- * in packet drop.
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
  */
 enum xdp_action {
 	XDP_ABORTED = 0,
diff --git a/net/core/filter.c b/net/core/filter.c
index 0848df2cd9bf..3a50a9b021e2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3609,7 +3609,11 @@ static bool xdp_is_valid_access(int off, int size,
 
 void bpf_warn_invalid_xdp_action(u32 act)
 {
-	WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
+	const u32 act_max = XDP_REDIRECT;
+
+	WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
+		  act > act_max ? "Illegal" : "Driver unsupported",
+		  act);
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
-- 
cgit