From 46c0ef6e1eb95f619d9f62da4332749153db92f7 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 16 Mar 2018 11:35:51 +0900
Subject: xfrm: fix rcu_read_unlock usage in xfrm_local_error

In the xfrm_local_error, rcu_read_unlock should be called when afinfo
is not NULL. because xfrm_state_get_afinfo calls rcu_read_unlock
if afinfo is NULL.

Fixes: af5d27c4e12b ("xfrm: remove xfrm_state_put_afinfo")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_output.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 23468672a767..89b178a78dc7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -285,8 +285,9 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
 		return;
 
 	afinfo = xfrm_state_get_afinfo(proto);
-	if (afinfo)
+	if (afinfo) {
 		afinfo->local_error(skb, mtu);
-	rcu_read_unlock();
+		rcu_read_unlock();
+	}
 }
 EXPORT_SYMBOL_GPL(xfrm_local_error);
-- 
cgit 


From 334167decf98f01a66c91ea84180c278bc4ad290 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 15 Feb 2018 15:48:09 +0200
Subject: iwlwifi: mvm: set the correct tid when we flush the MCAST sta

The tid being used for the queue (cab_queue) for the MCAST
station has been changed recently to be 0 (for BE).
The flush path still flushed only the special tid (15)
which means that the firmware wasn't flushing the right
queue and we could get a firmware crash upon remove
station if we had an MCAST packet on the ring.

The current code that flushes queues for a station only
differentiates between internal stations (stations that
aren't instantiated in mac80211, like the MCAST station)
and the non-internal ones.
Internal stations can be either: BCAST (beacons), MCAST
(for cab_queue), GENERAL_PURPOSE (p2p dev, and sniffer
injection). The internal stations can use different tids.

To make the code simpler, just flush all the tids always
and add the special internal tid (15) for internal
stations. The firmware will know how to handle this even
if we hadn't any queue mapped that that tid.

Fixes: e340c1a6ef4b ("iwlwifi: mvm: Correctly set the tid for mcast queue")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index af6dfceab6b8..7dfe4cde55e3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1894,14 +1894,12 @@ int iwl_mvm_flush_sta(struct iwl_mvm *mvm, void *sta, bool internal, u32 flags)
 	struct iwl_mvm_int_sta *int_sta = sta;
 	struct iwl_mvm_sta *mvm_sta = sta;
 
-	if (iwl_mvm_has_new_tx_api(mvm)) {
-		if (internal)
-			return iwl_mvm_flush_sta_tids(mvm, int_sta->sta_id,
-						      BIT(IWL_MGMT_TID), flags);
+	BUILD_BUG_ON(offsetof(struct iwl_mvm_int_sta, sta_id) !=
+		     offsetof(struct iwl_mvm_sta, sta_id));
 
+	if (iwl_mvm_has_new_tx_api(mvm))
 		return iwl_mvm_flush_sta_tids(mvm, mvm_sta->sta_id,
-					      0xFF, flags);
-	}
+					      0xff | BIT(IWL_MGMT_TID), flags);
 
 	if (internal)
 		return iwl_mvm_flush_tx_path(mvm, int_sta->tfd_queue_msk,
-- 
cgit 


From e829b17caf96c2da34620e335fb777592990906c Mon Sep 17 00:00:00 2001
From: Beni Lev <beni.lev@intel.com>
Date: Tue, 20 Feb 2018 13:41:54 +0200
Subject: iwlwifi: mvm: Correctly set IGTK for AP

Currently when an IGTK is set for an AP, it is set as a regular key.
Since the cipher is set to CMAC, the STA_KEY_FLG_EXT flag is added to
the host command, which causes assert 0x253D on NICs that do not support
this.

Fixes: 85aeb58cec1a ("iwlwifi: mvm: Enable security on new TX API")
Signed-off-by: Beni Lev <beni.lev@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 630e23cb0ffb..5be4bae5b70d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -3233,17 +3233,9 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 		}
 		sta_id = mvm_sta->sta_id;
 
-		if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
-		    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
-		    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) {
-			ret = iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id,
-						    false);
-			goto end;
-		}
-
 		/*
 		 * It is possible that the 'sta' parameter is NULL, and thus
-		 * there is a need to retrieve  the sta from the local station
+		 * there is a need to retrieve the sta from the local station
 		 * table.
 		 */
 		if (!sta) {
@@ -3258,6 +3250,17 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 
 		if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif))
 			return -EINVAL;
+	} else {
+		struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+		sta_id = mvmvif->mcast_sta.sta_id;
+	}
+
+	if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) {
+		ret = iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, false);
+		goto end;
 	}
 
 	/* If the key_offset is not pre-assigned, we need to find a
-- 
cgit 


From 75fd4fec3e4c43b131c7c4958adb3ab9f1665513 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Feb 2018 13:51:21 +0100
Subject: iwlwifi: mvm: fix error checking for multi/broadcast sta

The earlier patch called the station add functions but didn't
assign their return value to the ret variable, so that the
checks for it were meaningless. Fix that.

Found by smatch:

.../mac80211.c:2560 iwl_mvm_start_ap_ibss() warn: we tested 'ret' before and it was 'false'
.../mac80211.c:2563 iwl_mvm_start_ap_ibss() warn: we tested 'ret' before and it was 'false'

Fixes: 3a89411cd31c ("iwlwifi: mvm: fix assert 0x2B00 on older FWs")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index ebf511150f4d..d09afb4acaeb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2132,10 +2132,10 @@ static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw,
 		 * Send the bcast station. At this stage the TBTT and DTIM time
 		 * events are added and applied to the scheduler
 		 */
-		iwl_mvm_send_add_bcast_sta(mvm, vif);
+		ret = iwl_mvm_send_add_bcast_sta(mvm, vif);
 		if (ret)
 			goto out_unbind;
-		iwl_mvm_add_mcast_sta(mvm, vif);
+		ret = iwl_mvm_add_mcast_sta(mvm, vif);
 		if (ret) {
 			iwl_mvm_send_rm_bcast_sta(mvm, vif);
 			goto out_unbind;
-- 
cgit 


From 86a2b2043af79deff5cf000c5a08847faa4f2ee0 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Wed, 21 Feb 2018 22:18:59 +0200
Subject: iwlwifi: add shared clock PHY config flag for some devices

Some devices use a shared clock which is very sensitive to variations
and cause trouble in some situations.  We need to set a bit in the phy
configuration to indicate that to the FW.  To make this generic, add a
extra_phy_config_flags element to the device configuration and OR it
into the phy_cfg before sending it to the firmware.  And also create a
set of configurations for devices that use shared clocks and need this
extra bit to be set.

Fixes: c62446d2b028 ("iwlwifi: add new 9460 series PCI IDs")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/9000.c   | 62 +++++++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/fw/file.h    |  1 +
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  5 ++
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c     |  4 ++
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 38 +++++++--------
 5 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
index 90a1d14cf7d2..ab1469473e4f 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
@@ -53,6 +53,7 @@
 #include <linux/stringify.h>
 #include "iwl-config.h"
 #include "iwl-agn-hw.h"
+#include "fw/file.h"
 
 /* Highest firmware API version supported */
 #define IWL9000_UCODE_API_MAX	36
@@ -265,6 +266,67 @@ const struct iwl_cfg iwl9560_2ac_cfg_soc = {
 	.integrated = true,
 	.soc_latency = 5000,
 };
+
+const struct iwl_cfg iwl9460_2ac_cfg_shared_clk = {
+	.name = "Intel(R) Dual Band Wireless AC 9460",
+	.fw_name_pre = IWL9000A_FW_PRE,
+	.fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
+	.fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
+	.soc_latency = 5000,
+	.extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK
+};
+
+const struct iwl_cfg iwl9461_2ac_cfg_shared_clk = {
+	.name = "Intel(R) Dual Band Wireless AC 9461",
+	.fw_name_pre = IWL9000A_FW_PRE,
+	.fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
+	.fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
+	.soc_latency = 5000,
+	.extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK
+};
+
+const struct iwl_cfg iwl9462_2ac_cfg_shared_clk = {
+	.name = "Intel(R) Dual Band Wireless AC 9462",
+	.fw_name_pre = IWL9000A_FW_PRE,
+	.fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
+	.fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
+	.soc_latency = 5000,
+	.extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK
+};
+
+const struct iwl_cfg iwl9560_2ac_cfg_shared_clk = {
+	.name = "Intel(R) Dual Band Wireless AC 9560",
+	.fw_name_pre = IWL9000A_FW_PRE,
+	.fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
+	.fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
+	.soc_latency = 5000,
+	.extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK
+};
+
 MODULE_FIRMWARE(IWL9000A_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL9000B_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL9000RFB_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 1a05d506ac9a..2cb303c5c42e 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -441,6 +441,7 @@ enum iwl_fw_phy_cfg {
 	FW_PHY_CFG_TX_CHAIN = 0xf << FW_PHY_CFG_TX_CHAIN_POS,
 	FW_PHY_CFG_RX_CHAIN_POS = 20,
 	FW_PHY_CFG_RX_CHAIN = 0xf << FW_PHY_CFG_RX_CHAIN_POS,
+	FW_PHY_CFG_SHARED_CLK = BIT(31),
 };
 
 #define IWL_UCODE_MAX_CS		1
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 258d439bb0a9..f0f5636dd3ea 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -398,6 +398,7 @@ struct iwl_cfg {
 	u8 ucode_api_max;
 	u8 ucode_api_min;
 	u32 min_umac_error_event_table;
+	u32 extra_phy_cfg_flags;
 };
 
 /*
@@ -477,6 +478,10 @@ extern const struct iwl_cfg iwl9460_2ac_cfg_soc;
 extern const struct iwl_cfg iwl9461_2ac_cfg_soc;
 extern const struct iwl_cfg iwl9462_2ac_cfg_soc;
 extern const struct iwl_cfg iwl9560_2ac_cfg_soc;
+extern const struct iwl_cfg iwl9460_2ac_cfg_shared_clk;
+extern const struct iwl_cfg iwl9461_2ac_cfg_shared_clk;
+extern const struct iwl_cfg iwl9462_2ac_cfg_shared_clk;
+extern const struct iwl_cfg iwl9560_2ac_cfg_shared_clk;
 extern const struct iwl_cfg iwl22000_2ac_cfg_hr;
 extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb;
 extern const struct iwl_cfg iwl22000_2ac_cfg_jf;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 0920be637b57..3c59109bea20 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -433,6 +433,10 @@ static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
 
 	/* Set parameters */
 	phy_cfg_cmd.phy_cfg = cpu_to_le32(iwl_mvm_get_phy_config(mvm));
+
+	/* set flags extra PHY configuration flags from the device's cfg */
+	phy_cfg_cmd.phy_cfg |= cpu_to_le32(mvm->cfg->extra_phy_cfg_flags);
+
 	phy_cfg_cmd.calib_control.event_trigger =
 		mvm->fw->default_calib[ucode_type].event_trigger;
 	phy_cfg_cmd.calib_control.flow_trigger =
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 56fc28750a41..e323d3abb6ac 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -579,25 +579,25 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x003C, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0064, iwl9461_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x00A0, iwl9462_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x00A4, iwl9462_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0230, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0234, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0238, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x023C, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0260, iwl9461_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x0264, iwl9461_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x02A0, iwl9462_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x02A4, iwl9462_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x4034, iwl9560_2ac_cfg_soc)},
-	{IWL_PCI_DEVICE(0x31DC, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x003C, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0064, iwl9461_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x00A0, iwl9462_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x00A4, iwl9462_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0230, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0234, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0238, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x023C, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0260, iwl9461_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0264, iwl9461_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x02A0, iwl9462_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x02A4, iwl9462_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x4034, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x40A4, iwl9462_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x34F0, 0x0030, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x34F0, 0x0034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x34F0, 0x02A4, iwl9462_2ac_cfg_soc)},
-- 
cgit 


From 1259055170287a350cad453e9eac139c81609860 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Thu, 15 Mar 2018 08:29:09 +0100
Subject: brcmfmac: drop Inter-Access Point Protocol packets by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Testing brcmfmac with more recent firmwares resulted in AP interfaces
not working in some specific setups. Debugging resulted in discovering
support for IAPP in Broadcom's firmwares.

Older firmwares were only generating 802.11f frames. Newer ones like:
1) 10.10 (TOB) (r663589)
2) 10.10.122.20 (r683106)
for 4366b1 and 4366c0 respectively seem to also /respect/ 802.11f frames
in the Tx path by performing a STA disassociation.

This obsoleted standard and its implementation is something that:
1) Most people don't need / want to use
2) Can allow local DoS attacks
3) Breaks AP interfaces in some specific bridge setups

To solve issues it can cause this commit modifies brcmfmac to drop IAPP
packets. If affects:
1) Rx path: driver won't be sending these unwanted packets up.
2) Tx path: driver will reject packets that would trigger STA
   disassociation perfromed by a firmware (possible local DoS attack).

It appears there are some Broadcom's clients/users who care about this
feature despite the drawbacks. They can switch it on using a new module
param.

This change results in only two more comparisons (check for module param
and check for Ethernet packet length) for 99.9% of packets. Its overhead
should be very minimal.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../wireless/broadcom/brcm80211/brcmfmac/common.c  |  5 ++
 .../wireless/broadcom/brcm80211/brcmfmac/common.h  |  1 +
 .../wireless/broadcom/brcm80211/brcmfmac/core.c    | 57 ++++++++++++++++++++++
 3 files changed, 63 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 9be0b051066a..ebe35e6a40e7 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -75,6 +75,10 @@ static int brcmf_roamoff;
 module_param_named(roamoff, brcmf_roamoff, int, S_IRUSR);
 MODULE_PARM_DESC(roamoff, "Do not use internal roaming engine");
 
+static int brcmf_iapp_enable;
+module_param_named(iapp, brcmf_iapp_enable, int, 0);
+MODULE_PARM_DESC(iapp, "Enable partial support for the obsoleted Inter-Access Point Protocol");
+
 #ifdef DEBUG
 /* always succeed brcmf_bus_started() */
 static int brcmf_ignore_probe_fail;
@@ -441,6 +445,7 @@ struct brcmf_mp_device *brcmf_get_module_param(struct device *dev,
 	settings->feature_disable = brcmf_feature_disable;
 	settings->fcmode = brcmf_fcmode;
 	settings->roamoff = !!brcmf_roamoff;
+	settings->iapp = !!brcmf_iapp_enable;
 #ifdef DEBUG
 	settings->ignore_probe_fail = !!brcmf_ignore_probe_fail;
 #endif
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
index a62f8e70b320..ef914619e8e1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
@@ -58,6 +58,7 @@ struct brcmf_mp_device {
 	unsigned int	feature_disable;
 	int		fcmode;
 	bool		roamoff;
+	bool		iapp;
 	bool		ignore_probe_fail;
 	struct brcmfmac_pd_cc *country_codes;
 	union {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 930e423f83a8..44b7774ebd02 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -230,6 +230,37 @@ static void brcmf_netdev_set_multicast_list(struct net_device *ndev)
 	schedule_work(&ifp->multicast_work);
 }
 
+/**
+ * brcmf_skb_is_iapp - checks if skb is an IAPP packet
+ *
+ * @skb: skb to check
+ */
+static bool brcmf_skb_is_iapp(struct sk_buff *skb)
+{
+	static const u8 iapp_l2_update_packet[6] __aligned(2) = {
+		0x00, 0x01, 0xaf, 0x81, 0x01, 0x00,
+	};
+	unsigned char *eth_data;
+#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	const u16 *a, *b;
+#endif
+
+	if (skb->len - skb->mac_len != 6 ||
+	    !is_multicast_ether_addr(eth_hdr(skb)->h_dest))
+		return false;
+
+	eth_data = skb_mac_header(skb) + ETH_HLEN;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	return !(((*(const u32 *)eth_data) ^ (*(const u32 *)iapp_l2_update_packet)) |
+		 ((*(const u16 *)(eth_data + 4)) ^ (*(const u16 *)(iapp_l2_update_packet + 4))));
+#else
+	a = (const u16 *)eth_data;
+	b = (const u16 *)iapp_l2_update_packet;
+
+	return !((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]));
+#endif
+}
+
 static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb,
 					   struct net_device *ndev)
 {
@@ -250,6 +281,23 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb,
 		goto done;
 	}
 
+	/* Some recent Broadcom's firmwares disassociate STA when they receive
+	 * an 802.11f ADD frame. This behavior can lead to a local DoS security
+	 * issue. Attacker may trigger disassociation of any STA by sending a
+	 * proper Ethernet frame to the wireless interface.
+	 *
+	 * Moreover this feature may break AP interfaces in some specific
+	 * setups. This applies e.g. to the bridge with hairpin mode enabled and
+	 * IFLA_BRPORT_MCAST_TO_UCAST set. IAPP packet generated by a firmware
+	 * will get passed back to the wireless interface and cause immediate
+	 * disassociation of a just-connected STA.
+	 */
+	if (!drvr->settings->iapp && brcmf_skb_is_iapp(skb)) {
+		dev_kfree_skb(skb);
+		ret = -EINVAL;
+		goto done;
+	}
+
 	/* Make sure there's enough writeable headroom */
 	if (skb_headroom(skb) < drvr->hdrlen || skb_header_cloned(skb)) {
 		head_delta = max_t(int, drvr->hdrlen - skb_headroom(skb), 0);
@@ -325,6 +373,15 @@ void brcmf_txflowblock_if(struct brcmf_if *ifp,
 
 void brcmf_netif_rx(struct brcmf_if *ifp, struct sk_buff *skb)
 {
+	/* Most of Broadcom's firmwares send 802.11f ADD frame every time a new
+	 * STA connects to the AP interface. This is an obsoleted standard most
+	 * users don't use, so don't pass these frames up unless requested.
+	 */
+	if (!ifp->drvr->settings->iapp && brcmf_skb_is_iapp(skb)) {
+		brcmu_pkt_buf_free_skb(skb);
+		return;
+	}
+
 	if (skb->pkt_type == PACKET_MULTICAST)
 		ifp->ndev->stats.multicast++;
 
-- 
cgit 


From dd1df24737727e119c263acf1be2a92763938297 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:16:27 +0100
Subject: vti4: Don't count header length twice on tunnel setup

This re-introduces the effect of commit a32452366b72 ("vti4:
Don't count header length twice.") which was accidentally
reverted by merge commit f895f0cfbb77 ("Merge branch 'master' of
git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec").

The commit message from Steffen Klassert said:

    We currently count the size of LL_MAX_HEADER and struct iphdr
    twice for vti4 devices, this leads to a wrong device mtu.
    The size of LL_MAX_HEADER and struct iphdr is already counted in
    ip_tunnel_bind_dev(), so don't do it again in vti_tunnel_init().

And this is still the case now: ip_tunnel_bind_dev() already
accounts for the header length of the link layer (not
necessarily LL_MAX_HEADER, if the output device is found), plus
one IP header.

For example, with a vti device on top of veth, with MTU of 1500,
the existing implementation would set the initial vti MTU to
1332, accounting once for LL_MAX_HEADER (128, included in
hard_header_len by vti) and twice for the same IP header (once
from hard_header_len, once from ip_tunnel_bind_dev()).

It should instead be 1480, because ip_tunnel_bind_dev() is able
to figure out that the output device is veth, so no additional
link layer header is attached, and will properly count one
single IP header.

The existing issue had the side effect of avoiding PMTUD for
most xfrm policies, by arbitrarily lowering the initial MTU.
However, the only way to get a consistent PMTU value is to let
the xfrm PMTU discovery do its course, and commit d6af1a31cc72
("vti: Add pmtu handling to vti_xmit.") now takes care of local
delivery cases where the application ignores local socket
notifications.

Fixes: b9959fd3b0fa ("vti: switch to new ip tunnel code")
Fixes: f895f0cfbb77 ("Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ip_vti.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 51b1669334fe..502e5222eaa9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -387,7 +387,6 @@ static int vti_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &iph->saddr, 4);
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
-	dev->hard_header_len	= LL_MAX_HEADER + sizeof(struct iphdr);
 	dev->mtu		= ETH_DATA_LEN;
 	dev->flags		= IFF_NOARP;
 	dev->addr_len		= 4;
-- 
cgit 


From 24fc79798b8ddfd46f2dd363a8d29072c083b977 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:16:28 +0100
Subject: ip_tunnel: Clamp MTU to bounds on new link

Otherwise, it's possible to specify invalid MTU values directly
on creation of a link (via 'ip link add'). This is already
prevented on subsequent MTU changes by commit b96f9afee4eb
("ipv4/6: use core net MTU range checking").

Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ip_tunnel.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6d21068f9b55..7c76dd17b6b9 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1108,8 +1108,14 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 		eth_hw_addr_random(dev);
 
 	mtu = ip_tunnel_bind_dev(dev);
-	if (!tb[IFLA_MTU])
+	if (tb[IFLA_MTU]) {
+		unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen;
+
+		dev->mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
+				 (unsigned int)(max - sizeof(struct iphdr)));
+	} else {
 		dev->mtu = mtu;
+	}
 
 	ip_tunnel_add(itn, nt);
 out:
-- 
cgit 


From 03080e5ec72740c1a62e6730f2a5f3f114f11b19 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:16:29 +0100
Subject: vti4: Don't override MTU passed on link creation via IFLA_MTU

Don't hardcode a MTU value on vti tunnel initialization,
ip_tunnel_newlink() is able to deal with this already. See also
commit ffc2b6ee4174 ("ip_gre: fix IFLA_MTU ignored on NEWLINK").

Fixes: 1181412c1a67 ("net/ipv4: VTI support new module for ip_vti.")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ip_vti.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 502e5222eaa9..3f091ccad9af 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -387,7 +387,6 @@ static int vti_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &iph->saddr, 4);
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
-	dev->mtu		= ETH_DATA_LEN;
 	dev->flags		= IFF_NOARP;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
-- 
cgit 


From c6741fbed6dc0f183d26c4b6bca4517672f92e6c Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:17:11 +0100
Subject: vti6: Properly adjust vti6 MTU from MTU of lower device

If a lower device is found, we don't need to subtract
LL_MAX_HEADER to calculate our MTU: just use its MTU, the link
layer headers are already taken into account by it.

If the lower device is not found, start from ETH_DATA_LEN
instead, and only in this case subtract a worst-case
LL_MAX_HEADER.

We then need to subtract our additional IPv6 header from the
calculation.

While at it, note that vti6 doesn't have a hardware header, so
it doesn't need to set dev->hard_header_len. And as
vti6_link_config() now always sets the MTU, there's no need to
set a default value in vti6_dev_setup().

This makes the behaviour consistent with IPv4 vti, after
commit a32452366b72 ("vti4: Don't count header length twice."),
which was accidentally reverted by merge commit f895f0cfbb77
("Merge branch 'master' of
git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec").

While commit 53c81e95df17 ("ip6_vti: adjust vti mtu according to
mtu of lower device") improved on the original situation, this
was still not ideal. As reported in that commit message itself,
if we start from an underlying veth MTU of 9000, we end up with
an MTU of 8832, that is, 9000 - LL_MAX_HEADER - sizeof(ipv6hdr).
This should simply be 8880, or 9000 - sizeof(ipv6hdr) instead:
we found the lower device (veth) and we know we don't have any
additional link layer header, so there's no need to subtract an
hypothetical worst-case number.

Fixes: 53c81e95df17 ("ip6_vti: adjust vti mtu according to mtu of lower device")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ip6_vti.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index fa3ae1cb50d3..2ceef41cc097 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -627,6 +627,7 @@ static void vti6_link_config(struct ip6_tnl *t)
 	struct net_device *dev = t->dev;
 	struct __ip6_tnl_parm *p = &t->parms;
 	struct net_device *tdev = NULL;
+	int mtu;
 
 	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
 	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -656,8 +657,11 @@ static void vti6_link_config(struct ip6_tnl *t)
 		tdev = __dev_get_by_index(t->net, p->link);
 
 	if (tdev)
-		dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
-				 IPV6_MIN_MTU);
+		mtu = tdev->mtu - sizeof(struct ipv6hdr);
+	else
+		mtu = ETH_DATA_LEN - LL_MAX_HEADER - sizeof(struct ipv6hdr);
+
+	dev->mtu = max_t(int, mtu, IPV6_MIN_MTU);
 }
 
 /**
@@ -866,8 +870,6 @@ static void vti6_dev_setup(struct net_device *dev)
 	dev->priv_destructor = vti6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
-	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
-	dev->mtu = ETH_DATA_LEN;
 	dev->min_mtu = IPV6_MIN_MTU;
 	dev->max_mtu = IP_MAX_MTU;
 	dev->flags |= IFF_NOARP;
-- 
cgit 


From 7a67e69a339a6824be2fc483073782ab2f47fcd2 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:17:12 +0100
Subject: vti6: Keep set MTU on link creation or change, validate it

In vti6_link_config(), if MTU is already given on link creation
or change, validate and use it instead of recomputing it. To do
that, we need to propagate the knowledge that MTU was set by
userspace all the way down to vti6_link_config().

To keep this simple, vti6_dev_init() sets the new 'keep_mtu'
argument of vti6_link_config() to true: on initialization, we
don't have convenient access to netlink attributes there, but we
will anyway check whether dev->mtu is set in vti6_link_config().
If it's non-zero, it was set to the value of the IFLA_MTU
attribute during creation. Otherwise, determine a reasonable
value.

Fixes: ed1efb2aefbb ("ipv6: Add support for IPsec virtual tunnel interfaces")
Fixes: 53c81e95df17 ("ip6_vti: adjust vti mtu according to mtu of lower device")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ip6_vti.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2ceef41cc097..971175142e14 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -622,7 +622,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	return 0;
 }
 
-static void vti6_link_config(struct ip6_tnl *t)
+static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
 {
 	struct net_device *dev = t->dev;
 	struct __ip6_tnl_parm *p = &t->parms;
@@ -641,6 +641,11 @@ static void vti6_link_config(struct ip6_tnl *t)
 	else
 		dev->flags &= ~IFF_POINTOPOINT;
 
+	if (keep_mtu && dev->mtu) {
+		dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu);
+		return;
+	}
+
 	if (p->flags & IP6_TNL_F_CAP_XMIT) {
 		int strict = (ipv6_addr_type(&p->raddr) &
 			      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
@@ -668,12 +673,14 @@ static void vti6_link_config(struct ip6_tnl *t)
  * vti6_tnl_change - update the tunnel parameters
  *   @t: tunnel to be changed
  *   @p: tunnel configuration parameters
+ *   @keep_mtu: MTU was set from userspace, don't re-compute it
  *
  * Description:
  *   vti6_tnl_change() updates the tunnel parameters
  **/
 static int
-vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
+vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
+		bool keep_mtu)
 {
 	t->parms.laddr = p->laddr;
 	t->parms.raddr = p->raddr;
@@ -683,11 +690,12 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
 	t->parms.proto = p->proto;
 	t->parms.fwmark = p->fwmark;
 	dst_cache_reset(&t->dst_cache);
-	vti6_link_config(t);
+	vti6_link_config(t, keep_mtu);
 	return 0;
 }
 
-static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p,
+		       bool keep_mtu)
 {
 	struct net *net = dev_net(t->dev);
 	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
@@ -695,7 +703,7 @@ static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
 
 	vti6_tnl_unlink(ip6n, t);
 	synchronize_net();
-	err = vti6_tnl_change(t, p);
+	err = vti6_tnl_change(t, p, keep_mtu);
 	vti6_tnl_link(ip6n, t);
 	netdev_state_change(t->dev);
 	return err;
@@ -808,7 +816,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			} else
 				t = netdev_priv(dev);
 
-			err = vti6_update(t, &p1);
+			err = vti6_update(t, &p1, false);
 		}
 		if (t) {
 			err = 0;
@@ -907,7 +915,7 @@ static int vti6_dev_init(struct net_device *dev)
 
 	if (err)
 		return err;
-	vti6_link_config(t);
+	vti6_link_config(t, true);
 	return 0;
 }
 
@@ -1012,7 +1020,7 @@ static int vti6_changelink(struct net_device *dev, struct nlattr *tb[],
 	} else
 		t = netdev_priv(dev);
 
-	return vti6_update(t, &p);
+	return vti6_update(t, &p, tb && tb[IFLA_MTU]);
 }
 
 static size_t vti6_get_size(const struct net_device *dev)
-- 
cgit 


From f8a554b4aa9686bb2c12f6bae516e58783289a03 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 15 Mar 2018 17:17:13 +0100
Subject: vti6: Fix dev->max_mtu setting

We shouldn't allow a tunnel to have IP_MAX_MTU as MTU, because
another IPv6 header is going on top of our packets. Without this
patch, we might end up building packets bigger than IP_MAX_MTU.

Fixes: b96f9afee4eb ("ipv4/6: use core net MTU range checking")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ip6_vti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 971175142e14..ce18cd20389d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -879,7 +879,7 @@ static void vti6_dev_setup(struct net_device *dev)
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->min_mtu = IPV6_MIN_MTU;
-	dev->max_mtu = IP_MAX_MTU;
+	dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
 	netif_keep_dst(dev);
-- 
cgit 


From 19125cb0591ae63cd4591e3dfe4c22058e748518 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Wed, 28 Feb 2018 17:18:48 +0200
Subject: iwlwifi: mvm: Increase session protection time after CS

After switching to a new channel, driver schedules session protection
time event in order to hear the beacon on the new channel.
The duration of the protection is two beacon intervals.
However, since we start to switch slightly before beacon with count 1, in
case we don't hear (or AP doesn't transmit) the very first beacon on the
new channel the protection ends without hearing any beacon at all.
At this stage the switch is not complete, the queues are closed and the
interface doesn't have quota yet or TBTT events. As the result, we are
stuck forever waiting for iwl_mvm_post_channel_switch() to be called.

Fix this by increasing the protection time to be 3 beacon intervals and
in addition drop the connection if the time event ends before we got any
beacon.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c   |  5 ++++-
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h        |  3 +++
 drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 15 ++++++++-------
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index d09afb4acaeb..7152fdc00fb1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3494,6 +3494,7 @@ static int __iwl_mvm_assign_vif_chanctx(struct iwl_mvm *mvm,
 		ret = 0;
 		goto out;
 	case NL80211_IFTYPE_STATION:
+		mvmvif->csa_bcn_pending = false;
 		break;
 	case NL80211_IFTYPE_MONITOR:
 		/* always disable PS when a monitor interface is active */
@@ -3537,7 +3538,7 @@ static int __iwl_mvm_assign_vif_chanctx(struct iwl_mvm *mvm,
 	}
 
 	if (switching_chanctx && vif->type == NL80211_IFTYPE_STATION) {
-		u32 duration = 2 * vif->bss_conf.beacon_int;
+		u32 duration = 3 * vif->bss_conf.beacon_int;
 
 		/* iwl_mvm_protect_session() reads directly from the
 		 * device (the system time), so make sure it is
@@ -3550,6 +3551,7 @@ static int __iwl_mvm_assign_vif_chanctx(struct iwl_mvm *mvm,
 		/* Protect the session to make sure we hear the first
 		 * beacon on the new channel.
 		 */
+		mvmvif->csa_bcn_pending = true;
 		iwl_mvm_protect_session(mvm, vif, duration, duration,
 					vif->bss_conf.beacon_int / 2,
 					true);
@@ -3988,6 +3990,7 @@ static int iwl_mvm_post_channel_switch(struct ieee80211_hw *hw,
 	if (vif->type == NL80211_IFTYPE_STATION) {
 		struct iwl_mvm_sta *mvmsta;
 
+		mvmvif->csa_bcn_pending = false;
 		mvmsta = iwl_mvm_sta_from_staid_protected(mvm,
 							  mvmvif->ap_sta_id);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 89ff02d7c876..625b238a3f0a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -438,6 +438,9 @@ struct iwl_mvm_vif {
 	bool csa_failed;
 	u16 csa_target_freq;
 
+	/* Indicates that we are waiting for a beacon on a new channel */
+	bool csa_bcn_pending;
+
 	/* TCP Checksum Offload */
 	netdev_features_t features;
 };
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index acb217e666db..cd91bc44259c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -18,11 +19,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -35,6 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -198,9 +195,13 @@ static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm,
 					struct ieee80211_vif *vif,
 					const char *errmsg)
 {
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
 	if (vif->type != NL80211_IFTYPE_STATION)
 		return false;
-	if (vif->bss_conf.assoc && vif->bss_conf.dtim_period)
+
+	if (!mvmvif->csa_bcn_pending && vif->bss_conf.assoc &&
+	    vif->bss_conf.dtim_period)
 		return false;
 	if (errmsg)
 		IWL_ERR(mvm, "%s\n", errmsg);
@@ -344,7 +345,7 @@ static void iwl_mvm_te_handle_notif(struct iwl_mvm *mvm,
 			 * and know the dtim period.
 			 */
 			iwl_mvm_te_check_disconnect(mvm, te_data->vif,
-				"No association and the time event is over already...");
+				"No beacon heard and the time event is over already...");
 			break;
 		default:
 			break;
-- 
cgit 


From 4b387906b1c3692bb790388c335515c0cf098a23 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Wed, 7 Mar 2018 10:41:18 +0200
Subject: iwlwifi: mvm: clear tx queue id when unreserving aggregation queue

When a queue is reserved for aggregation, the queue id is assigned
to the tid_data. This is fine since iwl_mvm_sta_tx_agg_oper()
takes care of allocating the queue before actual tx starts.
When the reservation is cancelled (e.g. when the AP declined the
aggregation request) the tid_data is not cleared. As a result,
following tx for this tid was trying to use an unallocated queue.

Fix this by setting the txq_id for the tid to invalid when unreserving
the queue.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 5be4bae5b70d..0b3a780aee12 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2696,8 +2696,10 @@ out:
 
 static void iwl_mvm_unreserve_agg_queue(struct iwl_mvm *mvm,
 					struct iwl_mvm_sta *mvmsta,
-					u16 txq_id)
+					struct iwl_mvm_tid_data *tid_data)
 {
+	u16 txq_id = tid_data->txq_id;
+
 	if (iwl_mvm_has_new_tx_api(mvm))
 		return;
 
@@ -2709,8 +2711,10 @@ static void iwl_mvm_unreserve_agg_queue(struct iwl_mvm *mvm,
 	 * allocated through iwl_mvm_enable_txq, so we can just mark it back as
 	 * free.
 	 */
-	if (mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_RESERVED)
+	if (mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_RESERVED) {
 		mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_FREE;
+		tid_data->txq_id = IWL_MVM_INVALID_QUEUE;
+	}
 
 	spin_unlock_bh(&mvm->queue_info_lock);
 }
@@ -2741,7 +2745,7 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
 	mvmsta->agg_tids &= ~BIT(tid);
 
-	iwl_mvm_unreserve_agg_queue(mvm, mvmsta, txq_id);
+	iwl_mvm_unreserve_agg_queue(mvm, mvmsta, tid_data);
 
 	switch (tid_data->state) {
 	case IWL_AGG_ON:
@@ -2808,7 +2812,7 @@ int iwl_mvm_sta_tx_agg_flush(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	mvmsta->agg_tids &= ~BIT(tid);
 	spin_unlock_bh(&mvmsta->lock);
 
-	iwl_mvm_unreserve_agg_queue(mvm, mvmsta, txq_id);
+	iwl_mvm_unreserve_agg_queue(mvm, mvmsta, tid_data);
 
 	if (old_state >= IWL_AGG_ON) {
 		iwl_mvm_drain_sta(mvm, mvmsta, true);
-- 
cgit 


From df65c8d1728adee2a52c30287d33da83a8c87480 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Tue, 6 Mar 2018 14:10:49 +0200
Subject: iwlwifi: mvm: make sure internal station has a valid id

If the driver failed to resume from D3, it is possible that it has
no valid aux station. In such case, fw restart will end up in sending
station related commands with an invalid station id, which will
result in an assert.

Fix this by allocating a new station id for the aux station if it
does not have a valid id even in the case of fw restart.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 0b3a780aee12..fc13f6291c06 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1695,7 +1695,8 @@ int iwl_mvm_allocate_int_sta(struct iwl_mvm *mvm,
 			     u32 qmask, enum nl80211_iftype iftype,
 			     enum iwl_sta_type type)
 {
-	if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+	if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) ||
+	    sta->sta_id == IWL_MVM_INVALID_STA) {
 		sta->sta_id = iwl_mvm_find_free_sta_id(mvm, iftype);
 		if (WARN_ON_ONCE(sta->sta_id == IWL_MVM_INVALID_STA))
 			return -ENOSPC;
-- 
cgit 


From 4a6d2e525b43eba5870ea7e360f59aa65de00705 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Mon, 5 Mar 2018 11:26:53 +0200
Subject: iwlwifi: mvm: fix array out of bounds reference

When starting aggregation, the code checks the status of the queue
allocated to the aggregation tid, which might not yet be allocated
and thus the queue index may be invalid.
Fix this by reserving a new queue in case the queue id is invalid.

While at it, clean up some unreachable code (a condition that is
already handled earlier) and remove all the non-DQA comments since
non-DQA mode is no longer supported.

Fixes: cf961e16620f ("iwlwifi: mvm: support dqa-mode agg on non-shared queue")
Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 38 ++++++++--------------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index fc13f6291c06..80067eb9ea05 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2479,28 +2479,12 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
 	/*
 	 * Note the possible cases:
-	 *  1. In DQA mode with an enabled TXQ - TXQ needs to become agg'ed
-	 *  2. Non-DQA mode: the TXQ hasn't yet been enabled, so find a free
-	 *	one and mark it as reserved
-	 *  3. In DQA mode, but no traffic yet on this TID: same treatment as in
-	 *	non-DQA mode, since the TXQ hasn't yet been allocated
-	 * Don't support case 3 for new TX path as it is not expected to happen
-	 * and aggregation will be offloaded soon anyway
+	 *  1. An enabled TXQ - TXQ needs to become agg'ed
+	 *  2. The TXQ hasn't yet been enabled, so find a free one and mark
+	 *	it as reserved
 	 */
 	txq_id = mvmsta->tid_data[tid].txq_id;
-	if (iwl_mvm_has_new_tx_api(mvm)) {
-		if (txq_id == IWL_MVM_INVALID_QUEUE) {
-			ret = -ENXIO;
-			goto release_locks;
-		}
-	} else if (unlikely(mvm->queue_info[txq_id].status ==
-			    IWL_MVM_QUEUE_SHARED)) {
-		ret = -ENXIO;
-		IWL_DEBUG_TX_QUEUES(mvm,
-				    "Can't start tid %d agg on shared queue!\n",
-				    tid);
-		goto release_locks;
-	} else if (mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) {
+	if (txq_id == IWL_MVM_INVALID_QUEUE) {
 		txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
 						 IWL_MVM_DQA_MIN_DATA_QUEUE,
 						 IWL_MVM_DQA_MAX_DATA_QUEUE);
@@ -2509,16 +2493,16 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			IWL_ERR(mvm, "Failed to allocate agg queue\n");
 			goto release_locks;
 		}
-		/*
-		 * TXQ shouldn't be in inactive mode for non-DQA, so getting
-		 * an inactive queue from iwl_mvm_find_free_queue() is
-		 * certainly a bug
-		 */
-		WARN_ON(mvm->queue_info[txq_id].status ==
-			IWL_MVM_QUEUE_INACTIVE);
 
 		/* TXQ hasn't yet been enabled, so mark it only as reserved */
 		mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED;
+	} else if (unlikely(mvm->queue_info[txq_id].status ==
+			    IWL_MVM_QUEUE_SHARED)) {
+		ret = -ENXIO;
+		IWL_DEBUG_TX_QUEUES(mvm,
+				    "Can't start tid %d agg on shared queue!\n",
+				    tid);
+		goto release_locks;
 	}
 
 	spin_unlock(&mvm->queue_info_lock);
-- 
cgit 


From 9f4ef1d70f05ebb7f0755545c05ea383be0eeb0f Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Sun, 4 Mar 2018 20:06:29 +0200
Subject: iwlwifi: mvm: Move unused phy's to a default channel

When immediate quiet bit is set in CSA, the entire channel is blocked
by the firmware. It is expected that all the MACs will evacuate the
channel and the phy will be eventually either moved or removed.

Currently, the phy context is just unreferenced and thus, the quiet
bit is kept set and it will be impossible to TX on this phy, if we
will need to reuse it in the future.  This can be seen when doing a
channel switch with mode=1 (quiet) twice from channel X to Y and then
back to channel X.

Fix that, by moving the phy context to a default channel when not
referenced anymore.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
index 305cd56bf746..7f5434b34d0d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2017           Intel Deutschland GmbH
+ * Copyright(c) 2018           Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -18,11 +19,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -34,6 +30,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2018           Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -286,6 +283,20 @@ void iwl_mvm_phy_ctxt_unref(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt)
 		return;
 
 	ctxt->ref--;
+
+	/*
+	 * Move unused phy's to a default channel. When the phy is moved the,
+	 * fw will cleanup immediate quiet bit if it was previously set,
+	 * otherwise we might not be able to reuse this phy.
+	 */
+	if (ctxt->ref == 0) {
+		struct ieee80211_channel *chan;
+		struct cfg80211_chan_def chandef;
+
+		chan = &mvm->hw->wiphy->bands[NL80211_BAND_2GHZ]->channels[0];
+		cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_NO_HT);
+		iwl_mvm_phy_ctxt_changed(mvm, ctxt, &chandef, 1, 1);
+	}
 }
 
 static void iwl_mvm_binding_iterator(void *_data, u8 *mac,
-- 
cgit 


From 9b9322db5c5a1917a66c71fe47c3848a9a31227e Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 14 Mar 2018 20:02:59 +0100
Subject: brcmfmac: Fix check for ISO3166 code

The commit "regulatory: add NUL to request alpha2" increases the length of
alpha2 to 3. This causes a regression on brcmfmac, because
brcmf_cfg80211_reg_notifier() expect valid ISO3166 codes in the complete
array. So fix this accordingly.

Fixes: 657308f73e67 ("regulatory: add NUL to request alpha2")
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 15fa00d79fc6..1ad97a40940d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -6802,7 +6802,7 @@ static void brcmf_cfg80211_reg_notifier(struct wiphy *wiphy,
 		return;
 
 	/* ignore non-ISO3166 country codes */
-	for (i = 0; i < sizeof(req->alpha2); i++)
+	for (i = 0; i < 2; i++)
 		if (req->alpha2[i] < 'A' || req->alpha2[i] > 'Z') {
 			brcmf_err("not an ISO3166 code (0x%02x 0x%02x)\n",
 				  req->alpha2[0], req->alpha2[1]);
-- 
cgit 


From 4f2921ca21b71a9faaecd84a9fc74401d3a8d275 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 14 Mar 2018 13:37:58 +0100
Subject: netfilter: nf_tables: meter: pick a set backend that supports updates

in nftables, 'meter' can be used to instantiate a hash-table at run
time:

rule add filter forward iif "internal" meter hostacct { ip saddr counter}
nft list meter ip filter hostacct
table ip filter {
  meter hostacct {
    type ipv4_addr
    elements = { 192.168.0.1 : counter packets 8 bytes 2672, ..

because elemets get added on the fly, the kernel must chose a set
backend type that implements the ->update() function, otherwise
rule insertion fails with EOPNOTSUPP.

Therefore, skip set types that lack ->update, and also
make sure we do not discard a (bad) candidate when we did yet
find any candidate at all.  This could happen when userspace prefers
low memory footprint -- the set implementation currently checked might
not be a fit at all.  Make sure we pick it anyway (!bops).  In
case next candidate is a better fix, it will be chosen instead.

But in case nothing else is found we at least have a non-ideal
match rather than no match at all.

Fixes: 6c03ae210ce3 ("netfilter: nft_set_hash: add non-resizable hashtable implementation")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 5 ++++-
 net/netfilter/nft_set_hash.c  | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c4acc7340eb1..36f69acaf51f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2446,6 +2446,9 @@ EXPORT_SYMBOL_GPL(nft_unregister_set);
 
 static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
 {
+	if ((flags & NFT_SET_EVAL) && !ops->update)
+		return false;
+
 	return (flags & ops->features) == (flags & NFT_SET_FEATURES);
 }
 
@@ -2510,7 +2513,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 				if (est.space == best.space &&
 				    est.lookup < best.lookup)
 					break;
-			} else if (est.size < best.size) {
+			} else if (est.size < best.size || !bops) {
 				break;
 			}
 			continue;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index d40591fe1b2f..fc9c6d5d64cd 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -674,7 +674,7 @@ static const struct nft_set_ops *
 nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
 		    u32 flags)
 {
-	if (desc->size && !(flags & NFT_SET_TIMEOUT)) {
+	if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) {
 		switch (desc->klen) {
 		case 4:
 			return &nft_hash_fast_ops;
-- 
cgit 


From ae6153b50f9bf75a4952050f32fe168f68cdd657 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 18 Mar 2018 19:22:39 +0100
Subject: netfilter: nf_tables: permit second nat hook if colliding hook is
 going away

Sergei Trofimovich reported that restoring an nft ruleset doesn't work
anymore unless old rule content is flushed first.

The problem stems from a recent change designed to prevent multiple nat
hooks at the same hook point locations and nftables transaction model.

A 'flush ruleset' won't take effect until the entire transaction has
completed.

So, if one has a nft.rules file that contains a 'flush ruleset',
followed by a nat hook register request, then 'nft -f file' will work,
but running 'nft -f file' again will fail with -EBUSY.

Reason is that nftables will place the flush/removal requests in the
transaction list, but it will not act on the removal until after all new
rules are in place.

The netfilter core will therefore get request to register a new nat
hook before the old one is removed -- this now fails as the netfilter
core can't know the existing hook is staged for removal.

To fix this, we can search the transaction log when a hook collision
is detected.  The collision is okay if

 1. there is a delete request pending for the nat hook that is already
    registered.
 2. there is no second add request for a matching nat hook.
    This is required to only apply the exception once.

Fixes: f92b40a8b2645 ("netfilter: core: only allow one nat hook per hook point")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 64 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 36f69acaf51f..cc8ca00e6e6e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -74,15 +74,77 @@ static void nft_trans_destroy(struct nft_trans *trans)
 	kfree(trans);
 }
 
+/* removal requests are queued in the commit_list, but not acted upon
+ * until after all new rules are in place.
+ *
+ * Therefore, nf_register_net_hook(net, &nat_hook) runs before pending
+ * nf_unregister_net_hook().
+ *
+ * nf_register_net_hook thus fails if a nat hook is already in place
+ * even if the conflicting hook is about to be removed.
+ *
+ * If collision is detected, search commit_log for DELCHAIN matching
+ * the new nat hooknum; if we find one collision is temporary:
+ *
+ * Either transaction is aborted (new/colliding hook is removed), or
+ * transaction is committed (old hook is removed).
+ */
+static bool nf_tables_allow_nat_conflict(const struct net *net,
+					 const struct nf_hook_ops *ops)
+{
+	const struct nft_trans *trans;
+	bool ret = false;
+
+	if (!ops->nat_hook)
+		return false;
+
+	list_for_each_entry(trans, &net->nft.commit_list, list) {
+		const struct nf_hook_ops *pending_ops;
+		const struct nft_chain *pending;
+
+		if (trans->msg_type != NFT_MSG_NEWCHAIN &&
+		    trans->msg_type != NFT_MSG_DELCHAIN)
+			continue;
+
+		pending = trans->ctx.chain;
+		if (!nft_is_base_chain(pending))
+			continue;
+
+		pending_ops = &nft_base_chain(pending)->ops;
+		if (pending_ops->nat_hook &&
+		    pending_ops->pf == ops->pf &&
+		    pending_ops->hooknum == ops->hooknum) {
+			/* other hook registration already pending? */
+			if (trans->msg_type == NFT_MSG_NEWCHAIN)
+				return false;
+
+			ret = true;
+		}
+	}
+
+	return ret;
+}
+
 static int nf_tables_register_hook(struct net *net,
 				   const struct nft_table *table,
 				   struct nft_chain *chain)
 {
+	struct nf_hook_ops *ops;
+	int ret;
+
 	if (table->flags & NFT_TABLE_F_DORMANT ||
 	    !nft_is_base_chain(chain))
 		return 0;
 
-	return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+	ops = &nft_base_chain(chain)->ops;
+	ret = nf_register_net_hook(net, ops);
+	if (ret == -EBUSY && nf_tables_allow_nat_conflict(net, ops)) {
+		ops->nat_hook = false;
+		ret = nf_register_net_hook(net, ops);
+		ops->nat_hook = true;
+	}
+
+	return ret;
 }
 
 static void nf_tables_unregister_hook(struct net *net,
-- 
cgit 


From 467697d289e7e6e1b15910d99096c0da08c56d5b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 20 Mar 2018 15:25:37 +0100
Subject: netfilter: nf_tables: add missing netlink attrs to policies

Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements")
Fixes: f25ad2e907f1 ("netfilter: nf_tables: prepare for expressions associated to set elements")
Fixes: 1a94e38d254b ("netfilter: nf_tables: add NFTA_RULE_ID attribute")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index cc8ca00e6e6e..14777c404071 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1973,6 +1973,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
 	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
 	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
 				    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_RULE_ID]		= { .type = NLA_U32 },
 };
 
 static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -3380,6 +3381,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
 	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
 	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
 					    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_SET_ELEM_EXPR]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
-- 
cgit 


From aebfa52a925d701114afd6af0def35bab16d4f47 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 22 Mar 2018 11:08:50 +0100
Subject: netfilter: drop template ct when conntrack is skipped.

The ipv4 nf_ct code currently skips the nf_conntrak_in() call
for fragmented packets. As a results later matches/target can end
up manipulating template ct entry instead of 'real' ones.

Exploiting the above, syzbot found a way to trigger the following
splat:

WARNING: CPU: 1 PID: 4242 at net/netfilter/xt_cluster.c:55
xt_cluster_mt+0x6c1/0x840 net/netfilter/xt_cluster.c:127
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 4242 Comm: syzkaller027971 Not tainted 4.16.0-rc2+ #243
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:17 [inline]
  dump_stack+0x194/0x24d lib/dump_stack.c:53
  panic+0x1e4/0x41c kernel/panic.c:183
  __warn+0x1dc/0x200 kernel/panic.c:547
  report_bug+0x211/0x2d0 lib/bug.c:184
  fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178
  fixup_bug arch/x86/kernel/traps.c:247 [inline]
  do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
  do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
  invalid_op+0x58/0x80 arch/x86/entry/entry_64.S:957
RIP: 0010:xt_cluster_hash net/netfilter/xt_cluster.c:55 [inline]
RIP: 0010:xt_cluster_mt+0x6c1/0x840 net/netfilter/xt_cluster.c:127
RSP: 0018:ffff8801d2f6f2d0 EFLAGS: 00010293
RAX: ffff8801af700540 RBX: 0000000000000000 RCX: ffffffff84a2d1e1
RDX: 0000000000000000 RSI: ffff8801d2f6f478 RDI: ffff8801cafd336a
RBP: ffff8801d2f6f2e8 R08: 0000000000000000 R09: 0000000000000001
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801b03b3d18
R13: ffff8801cafd3300 R14: dffffc0000000000 R15: ffff8801d2f6f478
  ipt_do_table+0xa91/0x19b0 net/ipv4/netfilter/ip_tables.c:296
  iptable_filter_hook+0x65/0x80 net/ipv4/netfilter/iptable_filter.c:41
  nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline]
  nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483
  nf_hook include/linux/netfilter.h:243 [inline]
  NF_HOOK include/linux/netfilter.h:286 [inline]
  raw_send_hdrinc.isra.17+0xf39/0x1880 net/ipv4/raw.c:432
  raw_sendmsg+0x14cd/0x26b0 net/ipv4/raw.c:669
  inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763
  sock_sendmsg_nosec net/socket.c:629 [inline]
  sock_sendmsg+0xca/0x110 net/socket.c:639
  SYSC_sendto+0x361/0x5c0 net/socket.c:1748
  SyS_sendto+0x40/0x50 net/socket.c:1716
  do_syscall_64+0x280/0x940 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x441b49
RSP: 002b:00007ffff5ca8b18 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441b49
RDX: 0000000000000030 RSI: 0000000020ff7000 RDI: 0000000000000003
RBP: 00000000006cc018 R08: 000000002066354c R09: 0000000000000010
R10: 0000000000000000 R11: 0000000000000216 R12: 0000000000403470
R13: 0000000000403500 R14: 0000000000000000 R15: 0000000000000000
Dumping ftrace buffer:
    (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..

Instead of adding checks for template ct on every target/match
manipulating skb->_nfct, simply drop the template ct when skipping
nf_conntrack_in().

Fixes: 7b4fdf77a450ec ("netfilter: don't track fragmented packets")
Reported-and-tested-by: syzbot+0346441ae0545cfcea3a@syzkaller.appspotmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b50721d9d30e..9db988f9a4d7 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,8 +154,20 @@ static unsigned int ipv4_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+	if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *tmpl;
+
+		tmpl = nf_ct_get(skb, &ctinfo);
+		if (tmpl && nf_ct_is_template(tmpl)) {
+			/* when skipping ct, clear templates to avoid fooling
+			 * later targets/matches
+			 */
+			skb->_nfct = 0;
+			nf_ct_put(tmpl);
+		}
 		return NF_ACCEPT;
+	}
 
 	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
 }
-- 
cgit 


From d92191aa84e5f187d543867c3d54b38f294833fa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 21 Mar 2018 13:55:42 +0100
Subject: netfilter: nf_tables: cache device name in flowtable object

Devices going away have to grab the nfnl_lock from the netdev event path
to avoid races with control plane updates.

However, netlink dumps in netfilter do not hold nfnl_lock mutex. Cache
the device name into the objects to avoid an use-after-free situation
for a device that is going away.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  4 ++++
 net/netfilter/nf_tables_api.c     | 15 +++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 663b015dace5..30eb0652b025 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1068,6 +1068,8 @@ struct nft_object_ops {
 int nft_register_obj(struct nft_object_type *obj_type);
 void nft_unregister_obj(struct nft_object_type *obj_type);
 
+#define NFT_FLOWTABLE_DEVICE_MAX	8
+
 /**
  *	struct nft_flowtable - nf_tables flow table
  *
@@ -1080,6 +1082,7 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
  *	@genmask: generation mask
  *	@use: number of references to this flow table
  * 	@handle: unique object handle
+ *	@dev_name: array of device names
  *	@data: rhashtable and garbage collector
  * 	@ops: array of hooks
  */
@@ -1093,6 +1096,7 @@ struct nft_flowtable {
 	u32				genmask:2,
 					use:30;
 	u64				handle;
+	char				*dev_name[NFT_FLOWTABLE_DEVICE_MAX];
 	/* runtime data below here */
 	struct nf_hook_ops		*ops ____cacheline_aligned;
 	struct nf_flowtable		data;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 14777c404071..977d43e00f98 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4932,8 +4932,6 @@ nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
        return ERR_PTR(-ENOENT);
 }
 
-#define NFT_FLOWTABLE_DEVICE_MAX	8
-
 static int nf_tables_parse_devices(const struct nft_ctx *ctx,
 				   const struct nlattr *attr,
 				   struct net_device *dev_array[], int *len)
@@ -5006,7 +5004,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 	err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
 				      dev_array, &n);
 	if (err < 0)
-		goto err1;
+		return err;
 
 	ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
 	if (!ops) {
@@ -5026,6 +5024,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 		flowtable->ops[i].priv		= &flowtable->data.rhashtable;
 		flowtable->ops[i].hook		= flowtable->data.type->hook;
 		flowtable->ops[i].dev		= dev_array[i];
+		flowtable->dev_name[i]		= kstrdup(dev_array[i]->name,
+							  GFP_KERNEL);
 	}
 
 	err = 0;
@@ -5203,8 +5203,10 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 err5:
 	i = flowtable->ops_len;
 err4:
-	for (k = i - 1; k >= 0; k--)
+	for (k = i - 1; k >= 0; k--) {
+		kfree(flowtable->dev_name[k]);
 		nf_unregister_net_hook(net, &flowtable->ops[k]);
+	}
 
 	kfree(flowtable->ops);
 err3:
@@ -5294,9 +5296,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 		goto nla_put_failure;
 
 	for (i = 0; i < flowtable->ops_len; i++) {
-		if (flowtable->ops[i].dev &&
+		if (flowtable->dev_name[i][0] &&
 		    nla_put_string(skb, NFTA_DEVICE_NAME,
-				   flowtable->ops[i].dev->name))
+				   flowtable->dev_name[i]))
 			goto nla_put_failure;
 	}
 	nla_nest_end(skb, nest_devs);
@@ -5538,6 +5540,7 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
 			continue;
 
 		nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
+		flowtable->dev_name[i][0] = '\0';
 		flowtable->ops[i].dev = NULL;
 		break;
 	}
-- 
cgit 


From 90d2723c6d4cb2ace50fc3b932a2bcc77710450b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 20 Mar 2018 17:00:19 +0100
Subject: netfilter: nf_tables: do not hold reference on netdevice from
 preparation phase

The netfilter netdevice event handler hold the nfnl_lock mutex, this
avoids races with a device going away while such device is being
attached to hooks from the netlink control plane. Therefore, either
control plane bails out with ENOENT or netdevice event path waits until
the hook that is attached to net_device is registered.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 977d43e00f98..530e12ae52d7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1288,8 +1288,6 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 		free_percpu(basechain->stats);
 		if (basechain->stats)
 			static_branch_dec(&nft_counters_enabled);
-		if (basechain->ops.dev != NULL)
-			dev_put(basechain->ops.dev);
 		kfree(chain->name);
 		kfree(basechain);
 	} else {
@@ -1356,7 +1354,7 @@ static int nft_chain_parse_hook(struct net *net,
 		}
 
 		nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
-		dev = dev_get_by_name(net, ifname);
+		dev = __dev_get_by_name(net, ifname);
 		if (!dev) {
 			module_put(type->owner);
 			return -ENOENT;
@@ -1373,8 +1371,6 @@ static int nft_chain_parse_hook(struct net *net,
 static void nft_chain_release_hook(struct nft_chain_hook *hook)
 {
 	module_put(hook->type->owner);
-	if (hook->dev != NULL)
-		dev_put(hook->dev);
 }
 
 static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
@@ -4948,7 +4944,7 @@ static int nf_tables_parse_devices(const struct nft_ctx *ctx,
 		}
 
 		nla_strlcpy(ifname, tmp, IFNAMSIZ);
-		dev = dev_get_by_name(ctx->net, ifname);
+		dev = __dev_get_by_name(ctx->net, ifname);
 		if (!dev) {
 			err = -ENOENT;
 			goto err1;
@@ -5007,10 +5003,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 		return err;
 
 	ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
-	if (!ops) {
-		err = -ENOMEM;
-		goto err1;
-	}
+	if (!ops)
+		return -ENOMEM;
 
 	flowtable->hooknum	= hooknum;
 	flowtable->priority	= priority;
@@ -5028,11 +5022,6 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 							  GFP_KERNEL);
 	}
 
-	err = 0;
-err1:
-	for (i = 0; i < n; i++)
-		dev_put(dev_array[i]);
-
 	return err;
 }
 
-- 
cgit 


From 9a3fb9fb84cc30577c1b012a6a3efda944684291 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 19 Mar 2018 07:15:39 +0100
Subject: xfrm: Fix transport mode skb control buffer usage.

A recent commit introduced a new struct xfrm_trans_cb
that is used with the sk_buff control buffer. Unfortunately
it placed the structure in front of the control buffer and
overlooked that the IPv4/IPv6 control buffer is still needed
for some layer 4 protocols. As a result the IPv4/IPv6 control
buffer is overwritten with this structure. Fix this by setting
a apropriate header in front of the structure.

Fixes acf568ee859f ("xfrm: Reinject transport-mode packets ...")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_input.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1472c0857975..81788105c164 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -26,6 +26,12 @@ struct xfrm_trans_tasklet {
 };
 
 struct xfrm_trans_cb {
+	union {
+		struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;
 	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
 };
 
-- 
cgit 


From f6cc9c054e77b9a28d4594bcc201697edb21dfd2 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 22 Mar 2018 19:53:33 +0200
Subject: ip_tunnel: Emit events for post-register MTU changes

For tunnels created with IFLA_MTU, MTU of the netdevice is set by
rtnl_create_link() (called from rtnl_newlink()) before the device is
registered. However without IFLA_MTU that's not done.

rtnl_newlink() proceeds by calling struct rtnl_link_ops.newlink, which
via ip_tunnel_newlink() calls register_netdevice(), and that emits
NETDEV_REGISTER. Thus any listeners that inspect the netdevice get the
MTU of 0.

After ip_tunnel_newlink() corrects the MTU after registering the
netdevice, but since there's no event, the listeners don't get to know
about the MTU until something else happens--such as a NETDEV_UP event.
That's not ideal.

So instead of setting the MTU directly, go through dev_set_mtu(), which
takes care of distributing the necessary NETDEV_PRECHANGEMTU and
NETDEV_CHANGEMTU events.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6d21068f9b55..7b85ffad5d74 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -362,13 +362,18 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
 	struct ip_tunnel *nt;
 	struct net_device *dev;
 	int t_hlen;
+	int mtu;
+	int err;
 
 	BUG_ON(!itn->fb_tunnel_dev);
 	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
 	if (IS_ERR(dev))
 		return ERR_CAST(dev);
 
-	dev->mtu = ip_tunnel_bind_dev(dev);
+	mtu = ip_tunnel_bind_dev(dev);
+	err = dev_set_mtu(dev, mtu);
+	if (err)
+		goto err_dev_set_mtu;
 
 	nt = netdev_priv(dev);
 	t_hlen = nt->hlen + sizeof(struct iphdr);
@@ -376,6 +381,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
 	dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
 	ip_tunnel_add(itn, nt);
 	return nt;
+
+err_dev_set_mtu:
+	unregister_netdevice(dev);
+	return ERR_PTR(err);
 }
 
 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
@@ -1102,17 +1111,24 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 	nt->fwmark = fwmark;
 	err = register_netdevice(dev);
 	if (err)
-		goto out;
+		goto err_register_netdevice;
 
 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
 		eth_hw_addr_random(dev);
 
 	mtu = ip_tunnel_bind_dev(dev);
-	if (!tb[IFLA_MTU])
-		dev->mtu = mtu;
+	if (!tb[IFLA_MTU]) {
+		err = dev_set_mtu(dev, mtu);
+		if (err)
+			goto err_dev_set_mtu;
+	}
 
 	ip_tunnel_add(itn, nt);
-out:
+	return 0;
+
+err_dev_set_mtu:
+	unregister_netdevice(dev);
+err_register_netdevice:
 	return err;
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
-- 
cgit 


From 22b99058dd79ecff9de64eb30e15a330d89862c6 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 22 Mar 2018 19:53:34 +0200
Subject: mlxsw: spectrum_router: Move mlxsw_sp_rif_ipip_lb_op()

Move the function so that it can be called without forward declaration
from a function that will be added in a follow-up patch.

Fixes: 0063587d3587 ("mlxsw: spectrum: Support decap-only IP-in-IP tunnels")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 54 +++++++++++-----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index f7948e983637..a7d0adf068ec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1380,6 +1380,33 @@ mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
 						  decap_fib_entry);
 }
 
+static int
+mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
+			struct mlxsw_sp_vr *ul_vr, bool enable)
+{
+	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
+	struct mlxsw_sp_rif *rif = &lb_rif->common;
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+	u32 saddr4;
+
+	switch (lb_cf.ul_protocol) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
+		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
+				    rif->rif_index, rif->vr_id, rif->dev->mtu);
+		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
+			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
+			    ul_vr->id, saddr4, lb_cf.okey);
+		break;
+
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return -EAFNOSUPPORT;
+	}
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
 						struct net_device *ol_dev)
 {
@@ -6843,33 +6870,6 @@ mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
 	rif_lb->lb_config = params_lb->lb_config;
 }
 
-static int
-mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
-			struct mlxsw_sp_vr *ul_vr, bool enable)
-{
-	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
-	struct mlxsw_sp_rif *rif = &lb_rif->common;
-	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
-	char ritr_pl[MLXSW_REG_RITR_LEN];
-	u32 saddr4;
-
-	switch (lb_cf.ul_protocol) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
-		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
-				    rif->rif_index, rif->vr_id, rif->dev->mtu);
-		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
-			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
-			    ul_vr->id, saddr4, lb_cf.okey);
-		break;
-
-	case MLXSW_SP_L3_PROTO_IPV6:
-		return -EAFNOSUPPORT;
-	}
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-}
-
 static int
 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
 {
-- 
cgit 


From 68c3cd925597842c4744fd93d61f8cd2496cdfc8 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 22 Mar 2018 19:53:35 +0200
Subject: mlxsw: spectrum_router: Handle MTU change of GRE netdevs

Update MTU of overlay loopback in accordance with the setting on the
tunnel netdevice.

Fixes: 0063587d3587 ("mlxsw: spectrum: Support decap-only IP-in-IP tunnels")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index a7d0adf068ec..997e24dcb053 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1407,6 +1407,28 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 }
 
+static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
+						 struct net_device *ol_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+	struct mlxsw_sp_rif_ipip_lb *lb_rif;
+	struct mlxsw_sp_vr *ul_vr;
+	int err = 0;
+
+	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+	if (ipip_entry) {
+		lb_rif = ipip_entry->ol_lb;
+		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
+		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+		if (err)
+			goto out;
+		lb_rif->common.mtu = ol_dev->mtu;
+	}
+
+out:
+	return err;
+}
+
 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
 						struct net_device *ol_dev)
 {
@@ -1687,6 +1709,8 @@ int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
 		extack = info->extack;
 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
 							       ol_dev, extack);
+	case NETDEV_CHANGEMTU:
+		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
 	}
 	return 0;
 }
-- 
cgit 


From 1bfa26ff8c4b7512f4e4efa6df211239223033d4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 23 Mar 2018 07:56:58 -0700
Subject: ipv6: fix possible deadlock in rt6_age_examine_exception()

syzbot reported a LOCKDEP splat [1] in rt6_age_examine_exception()

rt6_age_examine_exception() is called while rt6_exception_lock is held.
This lock is the lower one in the lock hierarchy, thus we can not
call dst_neigh_lookup() function, as it can fallback to neigh_create()

We should instead do a pure RCU lookup. As a bonus we avoid
a pair of atomic operations on neigh refcount.

[1]

WARNING: possible circular locking dependency detected
4.16.0-rc4+ #277 Not tainted

syz-executor7/4015 is trying to acquire lock:
 (&ndev->lock){++--}, at: [<00000000416dce19>] __ipv6_dev_mc_dec+0x45/0x350 net/ipv6/mcast.c:928

but task is already holding lock:
 (&tbl->lock){++-.}, at: [<00000000b5cb1d65>] neigh_ifdown+0x3d/0x250 net/core/neighbour.c:292

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 (&tbl->lock){++-.}:
       __raw_write_lock_bh include/linux/rwlock_api_smp.h:203 [inline]
       _raw_write_lock_bh+0x31/0x40 kernel/locking/spinlock.c:312
       __neigh_create+0x87e/0x1d90 net/core/neighbour.c:528
       neigh_create include/net/neighbour.h:315 [inline]
       ip6_neigh_lookup+0x9a7/0xba0 net/ipv6/route.c:228
       dst_neigh_lookup include/net/dst.h:405 [inline]
       rt6_age_examine_exception net/ipv6/route.c:1609 [inline]
       rt6_age_exceptions+0x381/0x660 net/ipv6/route.c:1645
       fib6_age+0xfb/0x140 net/ipv6/ip6_fib.c:2033
       fib6_clean_node+0x389/0x580 net/ipv6/ip6_fib.c:1919
       fib6_walk_continue+0x46c/0x8a0 net/ipv6/ip6_fib.c:1845
       fib6_walk+0x91/0xf0 net/ipv6/ip6_fib.c:1893
       fib6_clean_tree+0x1e6/0x340 net/ipv6/ip6_fib.c:1970
       __fib6_clean_all+0x1f4/0x3a0 net/ipv6/ip6_fib.c:1986
       fib6_clean_all net/ipv6/ip6_fib.c:1997 [inline]
       fib6_run_gc+0x16b/0x3c0 net/ipv6/ip6_fib.c:2053
       ndisc_netdev_event+0x3c2/0x4a0 net/ipv6/ndisc.c:1781
       notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93
       __raw_notifier_call_chain kernel/notifier.c:394 [inline]
       raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
       call_netdevice_notifiers_info+0x32/0x70 net/core/dev.c:1707
       call_netdevice_notifiers net/core/dev.c:1725 [inline]
       __dev_notify_flags+0x262/0x430 net/core/dev.c:6960
       dev_change_flags+0xf5/0x140 net/core/dev.c:6994
       devinet_ioctl+0x126a/0x1ac0 net/ipv4/devinet.c:1080
       inet_ioctl+0x184/0x310 net/ipv4/af_inet.c:919
       sock_do_ioctl+0xef/0x390 net/socket.c:957
       sock_ioctl+0x36b/0x610 net/socket.c:1081
       vfs_ioctl fs/ioctl.c:46 [inline]
       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
       SYSC_ioctl fs/ioctl.c:701 [inline]
       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
       do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

-> #2 (rt6_exception_lock){+.-.}:
       __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline]
       _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168
       spin_lock_bh include/linux/spinlock.h:315 [inline]
       rt6_flush_exceptions+0x21/0x210 net/ipv6/route.c:1367
       fib6_del_route net/ipv6/ip6_fib.c:1677 [inline]
       fib6_del+0x624/0x12c0 net/ipv6/ip6_fib.c:1761
       __ip6_del_rt+0xc7/0x120 net/ipv6/route.c:2980
       ip6_del_rt+0x132/0x1a0 net/ipv6/route.c:2993
       __ipv6_dev_ac_dec+0x3b1/0x600 net/ipv6/anycast.c:332
       ipv6_dev_ac_dec net/ipv6/anycast.c:345 [inline]
       ipv6_sock_ac_close+0x2b4/0x3e0 net/ipv6/anycast.c:200
       inet6_release+0x48/0x70 net/ipv6/af_inet6.c:433
       sock_release+0x8d/0x1e0 net/socket.c:594
       sock_close+0x16/0x20 net/socket.c:1149
       __fput+0x327/0x7e0 fs/file_table.c:209
       ____fput+0x15/0x20 fs/file_table.c:243
       task_work_run+0x199/0x270 kernel/task_work.c:113
       exit_task_work include/linux/task_work.h:22 [inline]
       do_exit+0x9bb/0x1ad0 kernel/exit.c:865
       do_group_exit+0x149/0x400 kernel/exit.c:968
       get_signal+0x73a/0x16d0 kernel/signal.c:2469
       do_signal+0x90/0x1e90 arch/x86/kernel/signal.c:809
       exit_to_usermode_loop+0x258/0x2f0 arch/x86/entry/common.c:162
       prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline]
       syscall_return_slowpath arch/x86/entry/common.c:265 [inline]
       do_syscall_64+0x6ec/0x940 arch/x86/entry/common.c:292
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

-> #1 (&(&tb->tb6_lock)->rlock){+.-.}:
       __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline]
       _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168
       spin_lock_bh include/linux/spinlock.h:315 [inline]
       __ip6_ins_rt+0x56/0x90 net/ipv6/route.c:1007
       ip6_route_add+0x141/0x190 net/ipv6/route.c:2955
       addrconf_prefix_route+0x44f/0x620 net/ipv6/addrconf.c:2359
       fixup_permanent_addr net/ipv6/addrconf.c:3368 [inline]
       addrconf_permanent_addr net/ipv6/addrconf.c:3391 [inline]
       addrconf_notify+0x1ad2/0x2310 net/ipv6/addrconf.c:3460
       notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93
       __raw_notifier_call_chain kernel/notifier.c:394 [inline]
       raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
       call_netdevice_notifiers_info+0x32/0x70 net/core/dev.c:1707
       call_netdevice_notifiers net/core/dev.c:1725 [inline]
       __dev_notify_flags+0x15d/0x430 net/core/dev.c:6958
       dev_change_flags+0xf5/0x140 net/core/dev.c:6994
       do_setlink+0xa22/0x3bb0 net/core/rtnetlink.c:2357
       rtnl_newlink+0xf37/0x1a50 net/core/rtnetlink.c:2965
       rtnetlink_rcv_msg+0x57f/0xb10 net/core/rtnetlink.c:4641
       netlink_rcv_skb+0x14b/0x380 net/netlink/af_netlink.c:2444
       rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4659
       netlink_unicast_kernel net/netlink/af_netlink.c:1308 [inline]
       netlink_unicast+0x4c4/0x6b0 net/netlink/af_netlink.c:1334
       netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1897
       sock_sendmsg_nosec net/socket.c:629 [inline]
       sock_sendmsg+0xca/0x110 net/socket.c:639
       ___sys_sendmsg+0x767/0x8b0 net/socket.c:2047
       __sys_sendmsg+0xe5/0x210 net/socket.c:2081
       SYSC_sendmsg net/socket.c:2092 [inline]
       SyS_sendmsg+0x2d/0x50 net/socket.c:2088
       do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

-> #0 (&ndev->lock){++--}:
       lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920
       __raw_write_lock_bh include/linux/rwlock_api_smp.h:203 [inline]
       _raw_write_lock_bh+0x31/0x40 kernel/locking/spinlock.c:312
       __ipv6_dev_mc_dec+0x45/0x350 net/ipv6/mcast.c:928
       ipv6_dev_mc_dec+0x110/0x1f0 net/ipv6/mcast.c:961
       pndisc_destructor+0x21a/0x340 net/ipv6/ndisc.c:392
       pneigh_ifdown net/core/neighbour.c:695 [inline]
       neigh_ifdown+0x149/0x250 net/core/neighbour.c:294
       rt6_disable_ip+0x537/0x700 net/ipv6/route.c:3874
       addrconf_ifdown+0x14b/0x14f0 net/ipv6/addrconf.c:3633
       addrconf_notify+0x5f8/0x2310 net/ipv6/addrconf.c:3557
       notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93
       __raw_notifier_call_chain kernel/notifier.c:394 [inline]
       raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
       call_netdevice_notifiers_info+0x32/0x70 net/core/dev.c:1707
       call_netdevice_notifiers net/core/dev.c:1725 [inline]
       __dev_notify_flags+0x262/0x430 net/core/dev.c:6960
       dev_change_flags+0xf5/0x140 net/core/dev.c:6994
       devinet_ioctl+0x126a/0x1ac0 net/ipv4/devinet.c:1080
       inet_ioctl+0x184/0x310 net/ipv4/af_inet.c:919
       packet_ioctl+0x1ff/0x310 net/packet/af_packet.c:4066
       sock_do_ioctl+0xef/0x390 net/socket.c:957
       sock_ioctl+0x36b/0x610 net/socket.c:1081
       vfs_ioctl fs/ioctl.c:46 [inline]
       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
       SYSC_ioctl fs/ioctl.c:701 [inline]
       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
       do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

other info that might help us debug this:

Chain exists of:
  &ndev->lock --> rt6_exception_lock --> &tbl->lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&tbl->lock);
                               lock(rt6_exception_lock);
                               lock(&tbl->lock);
  lock(&ndev->lock);

 *** DEADLOCK ***

2 locks held by syz-executor7/4015:
 #0:  (rtnl_mutex){+.+.}, at: [<00000000a2f16daa>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74
 #1:  (&tbl->lock){++-.}, at: [<00000000b5cb1d65>] neigh_ifdown+0x3d/0x250 net/core/neighbour.c:292

stack backtrace:
CPU: 0 PID: 4015 Comm: syz-executor7 Not tainted 4.16.0-rc4+ #277
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x194/0x24d lib/dump_stack.c:53
 print_circular_bug.isra.38+0x2cd/0x2dc kernel/locking/lockdep.c:1223
 check_prev_add kernel/locking/lockdep.c:1863 [inline]
 check_prevs_add kernel/locking/lockdep.c:1976 [inline]
 validate_chain kernel/locking/lockdep.c:2417 [inline]
 __lock_acquire+0x30a8/0x3e00 kernel/locking/lockdep.c:3431
 lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920
 __raw_write_lock_bh include/linux/rwlock_api_smp.h:203 [inline]
 _raw_write_lock_bh+0x31/0x40 kernel/locking/spinlock.c:312
 __ipv6_dev_mc_dec+0x45/0x350 net/ipv6/mcast.c:928
 ipv6_dev_mc_dec+0x110/0x1f0 net/ipv6/mcast.c:961
 pndisc_destructor+0x21a/0x340 net/ipv6/ndisc.c:392
 pneigh_ifdown net/core/neighbour.c:695 [inline]
 neigh_ifdown+0x149/0x250 net/core/neighbour.c:294
 rt6_disable_ip+0x537/0x700 net/ipv6/route.c:3874
 addrconf_ifdown+0x14b/0x14f0 net/ipv6/addrconf.c:3633
 addrconf_notify+0x5f8/0x2310 net/ipv6/addrconf.c:3557
 notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x32/0x70 net/core/dev.c:1707
 call_netdevice_notifiers net/core/dev.c:1725 [inline]
 __dev_notify_flags+0x262/0x430 net/core/dev.c:6960
 dev_change_flags+0xf5/0x140 net/core/dev.c:6994
 devinet_ioctl+0x126a/0x1ac0 net/ipv4/devinet.c:1080
 inet_ioctl+0x184/0x310 net/ipv4/af_inet.c:919
 packet_ioctl+0x1ff/0x310 net/packet/af_packet.c:4066
 sock_do_ioctl+0xef/0x390 net/socket.c:957
 sock_ioctl+0x36b/0x610 net/socket.c:1081
 vfs_ioctl fs/ioctl.c:46 [inline]
 do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
 SYSC_ioctl fs/ioctl.c:701 [inline]
 SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7

Fixes: c757faa8bfa2 ("ipv6: prepare fib6_age() for exception table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Acked-by: Wei Wang <weiwan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b0d5c64e1978..b33d057ac5eb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1626,11 +1626,10 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 		struct neighbour *neigh;
 		__u8 neigh_flags = 0;
 
-		neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
-		if (neigh) {
+		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+		if (neigh)
 			neigh_flags = neigh->flags;
-			neigh_release(neigh);
-		}
+
 		if (!(neigh_flags & NTF_ROUTER)) {
 			RT6_TRACE("purging route %p via non-router but gateway\n",
 				  rt);
@@ -1654,7 +1653,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
 		return;
 
-	spin_lock_bh(&rt6_exception_lock);
+	rcu_read_lock_bh();
+	spin_lock(&rt6_exception_lock);
 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
 				    lockdep_is_held(&rt6_exception_lock));
 
@@ -1668,7 +1668,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
 			bucket++;
 		}
 	}
-	spin_unlock_bh(&rt6_exception_lock);
+	spin_unlock(&rt6_exception_lock);
+	rcu_read_unlock_bh();
 }
 
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-- 
cgit 


From f8fb3419ead44f9a3136995acd24e35da4525177 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Tue, 20 Mar 2018 03:13:27 +0100
Subject: batman-adv: fix multicast-via-unicast transmission with AP isolation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For multicast frames AP isolation is only supposed to be checked on
the receiving nodes and never on the originating one.

Furthermore, the isolation or wifi flag bits should only be intepreted
as such for unicast and never multicast TT entries.

By injecting flags to the multicast TT entry claimed by a single
target node it was verified in tests that this multicast address
becomes unreachable, leading to packet loss.

Omitting the "src" parameter to the batadv_transtable_search() call
successfully skipped the AP isolation check and made the target
reachable again.

Fixes: 1d8ab8d3c176 ("batman-adv: Modified forwarding behaviour for multicast packets")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/multicast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d70640135e3a..ee56af5c43e0 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -814,8 +814,8 @@ static struct batadv_orig_node *
 batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
 			      struct ethhdr *ethhdr)
 {
-	return batadv_transtable_search(bat_priv, ethhdr->h_source,
-					ethhdr->h_dest, BATADV_NO_FLAGS);
+	return batadv_transtable_search(bat_priv, NULL, ethhdr->h_dest,
+					BATADV_NO_FLAGS);
 }
 
 /**
-- 
cgit 


From a752c0a4524889cdc0765925258fd1fd72344100 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Thu, 22 Mar 2018 00:21:32 +0100
Subject: batman-adv: fix packet loss for broadcasted DHCP packets to a server
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DHCP connectivity issues can currently occur if the following conditions
are met:

1) A DHCP packet from a client to a server
2) This packet has a multicast destination
3) This destination has a matching entry in the translation table
   (FF:FF:FF:FF:FF:FF for IPv4, 33:33:00:01:00:02/33:33:00:01:00:03
    for IPv6)
4) The orig-node determined by TT for the multicast destination
   does not match the orig-node determined by best-gateway-selection

In this case the DHCP packet will be dropped.

The "gateway-out-of-range" check is supposed to only be applied to
unicasted DHCP packets to a specific DHCP server.

In that case dropping the the unicasted frame forces the client to
retry via a broadcasted one, but now directed to the new best
gateway.

A DHCP packet with broadcast/multicast destination is already ensured to
always be delivered to the best gateway. Dropping a multicasted
DHCP packet here will only prevent completing DHCP as there is no
other fallback.

So far, it seems the unicast check was implicitly performed by
expecting the batadv_transtable_search() to return NULL for multicast
destinations. However, a multicast address could have always ended up in
the translation table and in fact is now common.

To fix this potential loss of a DHCP client-to-server packet to a
multicast address this patch adds an explicit multicast destination
check to reliably bail out of the gateway-out-of-range check for such
destinations.

The issue and fix were tested in the following three node setup:

- Line topology, A-B-C
- A: gateway client, DHCP client
- B: gateway server, hop-penalty increased: 30->60, DHCP server
- C: gateway server, code modifications to announce FF:FF:FF:FF:FF:FF

Without this patch, A would never transmit its DHCP Discover packet
due to an always "out-of-range" condition. With this patch,
a full DHCP handshake between A and B was possible again.

Fixes: be7af5cf9cae ("batman-adv: refactoring gateway handling code")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/gateway_client.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 37fe9a644f22..808d2dd4a839 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -746,7 +746,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
 {
 	struct batadv_neigh_node *neigh_curr = NULL;
 	struct batadv_neigh_node *neigh_old = NULL;
-	struct batadv_orig_node *orig_dst_node;
+	struct batadv_orig_node *orig_dst_node = NULL;
 	struct batadv_gw_node *gw_node = NULL;
 	struct batadv_gw_node *curr_gw = NULL;
 	struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo;
@@ -757,6 +757,9 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
 
 	vid = batadv_get_vid(skb, 0);
 
+	if (is_multicast_ether_addr(ethhdr->h_dest))
+		goto out;
+
 	orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
 						 ethhdr->h_dest, vid);
 	if (!orig_dst_node)
-- 
cgit 


From 4b6eca9d6817e58ae9b3342a5974e8fd9285279e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 23 Mar 2018 19:45:12 -0700
Subject: tools: bpftool: don't use hex numbers in JSON output

JSON does not accept hex numbers with 0x prefix.  Simply print
as decimal numbers, JSON should be primarily machine-readable.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Fixes: 831a0aafe5c3 ("tools: bpftool: add JSON output for `bpftool map *` commands")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f95fa67bb498..f509c86faede 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -428,7 +428,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 		jsonw_string_field(json_wtr, "name", info->name);
 
 	jsonw_name(json_wtr, "flags");
-	jsonw_printf(json_wtr, "%#x", info->map_flags);
+	jsonw_printf(json_wtr, "%d", info->map_flags);
 
 	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
 
-- 
cgit 


From e8a4796ee2954db9f7e9b52e5fbc66ef876752f6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 23 Mar 2018 20:00:13 -0700
Subject: nfp: bpf: fix check of program max insn count

NFP program allocation length is in bytes and NFP program length
is in instructions, fix the comparison of the two.

Fixes: 9314c442d7dd ("nfp: bpf: move translation prepare to offload.c")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 56451edf01c2..ecd7c33baf3c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -74,7 +74,9 @@ nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
 {
-	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
+	if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
+		pr_warn("instruction limit reached (%u NFP instructions)\n",
+			nfp_prog->prog_len);
 		nfp_prog->error = -ENOSPC;
 		return;
 	}
@@ -2463,6 +2465,8 @@ static int nfp_translate(struct nfp_prog *nfp_prog)
 		err = cb(nfp_prog, meta);
 		if (err)
 			return err;
+		if (nfp_prog->error)
+			return nfp_prog->error;
 
 		nfp_prog->n_translated++;
 	}
-- 
cgit 


From 32c1733f0dd4bd11d6e65512bf4dc337c0452c8e Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Thu, 22 Mar 2018 21:12:39 -0600
Subject: netfilter: nf_socket: Fix out of bounds access in
 nf_sk_lookup_slow_v{4,6}

skb_header_pointer will copy data into a buffer if data is non linear,
otherwise it will return a pointer in the linear section of the data.
nf_sk_lookup_slow_v{4,6} always copies data of size udphdr but later
accesses memory within the size of tcphdr (th->doff) in case of TCP
packets. This causes a crash when running with KASAN with the following
call stack -

BUG: KASAN: stack-out-of-bounds in xt_socket_lookup_slow_v4+0x524/0x718
net/netfilter/xt_socket.c:178
Read of size 2 at addr ffffffe3d417a87c by task syz-executor/28971
CPU: 2 PID: 28971 Comm: syz-executor Tainted: G    B   W  O    4.9.65+ #1
Call trace:
[<ffffff9467e8d390>] dump_backtrace+0x0/0x428 arch/arm64/kernel/traps.c:76
[<ffffff9467e8d7e0>] show_stack+0x28/0x38 arch/arm64/kernel/traps.c:226
[<ffffff946842d9b8>] __dump_stack lib/dump_stack.c:15 [inline]
[<ffffff946842d9b8>] dump_stack+0xd4/0x124 lib/dump_stack.c:51
[<ffffff946811d4b0>] print_address_description+0x68/0x258 mm/kasan/report.c:248
[<ffffff946811d8c8>] kasan_report_error mm/kasan/report.c:347 [inline]
[<ffffff946811d8c8>] kasan_report.part.2+0x228/0x2f0 mm/kasan/report.c:371
[<ffffff946811df44>] kasan_report+0x5c/0x70 mm/kasan/report.c:372
[<ffffff946811bebc>] check_memory_region_inline mm/kasan/kasan.c:308 [inline]
[<ffffff946811bebc>] __asan_load2+0x84/0x98 mm/kasan/kasan.c:739
[<ffffff94694d6f04>] __tcp_hdrlen include/linux/tcp.h:35 [inline]
[<ffffff94694d6f04>] xt_socket_lookup_slow_v4+0x524/0x718 net/netfilter/xt_socket.c:178

Fix this by copying data into appropriate size headers based on protocol.

Fixes: a583636a83ea ("inet: refactor inet[6]_lookup functions to take skb")
Signed-off-by: Tejaswi Tanikella <tejaswit@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_socket_ipv4.c | 6 ++++--
 net/ipv6/netfilter/nf_socket_ipv6.c | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index e9293bdebba0..4824b1e183a1 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -108,10 +108,12 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
 	int doff = 0;
 
 	if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
-		struct udphdr _hdr, *hp;
+		struct tcphdr _hdr;
+		struct udphdr *hp;
 
 		hp = skb_header_pointer(skb, ip_hdrlen(skb),
-					sizeof(_hdr), &_hdr);
+					iph->protocol == IPPROTO_UDP ?
+					sizeof(*hp) : sizeof(_hdr), &_hdr);
 		if (hp == NULL)
 			return NULL;
 
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index ebb2bf84232a..f14de4b6d639 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -116,9 +116,11 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
 	}
 
 	if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
-		struct udphdr _hdr, *hp;
+		struct tcphdr _hdr;
+		struct udphdr *hp;
 
-		hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+		hp = skb_header_pointer(skb, thoff, tproto == IPPROTO_UDP ?
+					sizeof(*hp) : sizeof(_hdr), &_hdr);
 		if (hp == NULL)
 			return NULL;
 
-- 
cgit 


From bc58a1baf2a97838a422ce4e75180c4b680e7a9b Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.vnet.ibm.com>
Date: Fri, 23 Mar 2018 11:05:45 +0100
Subject: net/ipv4: disable SMC TCP option with SYN Cookies

Currently, the SMC experimental TCP option in a SYN packet is lost on
the server side when SYN Cookies are active. However, the corresponding
SYNACK sent back to the client contains the SMC option. This causes an
inconsistent view of the SMC capabilities on the client and server.

This patch disables the SMC option in the SYNACK when SYN Cookies are
active to avoid this issue.

Fixes: 60e2a7780793b ("tcp: TCP experimental option for SMC")
Signed-off-by: Hans Wippel <hwippel@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 2 ++
 net/ipv4/tcp_input.c  | 3 +++
 net/ipv6/syncookies.c | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index fda37f2862c9..c3387dfd725b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -349,6 +349,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->snt_synack	= 0;
 	treq->tfo_listener	= false;
+	if (IS_ENABLED(CONFIG_SMC))
+		ireq->smc_ok = 0;
 
 	ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9a1b3c1c1c14..ff6cd98ce8d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6256,6 +6256,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
 
+	if (IS_ENABLED(CONFIG_SMC) && want_cookie)
+		tmp_opt.smc_ok = 0;
+
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb, sk);
 	inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index e7a3a6b6cf56..e997141aed8c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -217,6 +217,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	treq->snt_isn = cookie;
 	treq->ts_off = 0;
 	treq->txhash = net_tx_rndhash();
+	if (IS_ENABLED(CONFIG_SMC))
+		ireq->smc_ok = 0;
 
 	/*
 	 * We need to lookup the dst_entry to get the correct window size.
-- 
cgit 


From e69647a19c870c2f919e4d5023af8a515e8ef25f Mon Sep 17 00:00:00 2001
From: Raghuram Chary J <raghuramchary.jallipalli@microchip.com>
Date: Fri, 23 Mar 2018 15:48:08 +0530
Subject: lan78xx: Set ASD in MAC_CR when EEE is enabled.

Description:
EEE does not work with lan7800 when AutoSpeed is not set.
(This can happen when EEPROM is not populated or configured incorrectly)

Root-Cause:
When EEE is enabled, the mac config register ASD is not set
i.e. in default state, causing EEE fail.

Fix:
Set the register when eeprom is not present.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Raghuram Chary J <raghuramchary.jallipalli@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 60a604cc7647..90d176279152 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2351,6 +2351,7 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	u32 buf;
 	int ret = 0;
 	unsigned long timeout;
+	u8 sig;
 
 	ret = lan78xx_read_reg(dev, HW_CFG, &buf);
 	buf |= HW_CFG_LRST_;
@@ -2450,6 +2451,15 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	/* LAN7801 only has RGMII mode */
 	if (dev->chipid == ID_REV_CHIP_ID_7801_)
 		buf &= ~MAC_CR_GMII_EN_;
+
+	if (dev->chipid == ID_REV_CHIP_ID_7800_) {
+		ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig);
+		if (!ret && sig != EEPROM_INDICATOR) {
+			/* Implies there is no external eeprom. Set mac speed */
+			netdev_info(dev->net, "No External EEPROM. Setting MAC Speed\n");
+			buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_;
+		}
+	}
 	ret = lan78xx_write_reg(dev, MAC_CR, buf);
 
 	ret = lan78xx_read_reg(dev, MAC_TX, &buf);
-- 
cgit 


From 7880287981b60a6808f39f297bb66936e8bdf57a Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 23 Mar 2018 13:49:02 +0100
Subject: netlink: make sure nladdr has correct size in netlink_connect()

KMSAN reports use of uninitialized memory in the case when |alen| is
smaller than sizeof(struct sockaddr_nl), and therefore |nladdr| isn't
fully copied from the userspace.

Signed-off-by: Alexander Potapenko <glider@google.com>
Fixes: 1da177e4c3f41524 ("Linux-2.6.12-rc2")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 07e8478068f0..70c455341243 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1085,6 +1085,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 	if (addr->sa_family != AF_NETLINK)
 		return -EINVAL;
 
+	if (alen < sizeof(struct sockaddr_nl))
+		return -EINVAL;
+
 	if ((nladdr->nl_groups || nladdr->nl_pid) &&
 	    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
 		return -EPERM;
-- 
cgit 


From 10b8a3de603df7b96004179b1b33b1708c76d144 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 23 Mar 2018 14:47:30 +0100
Subject: ipv6: the entire IPv6 header chain must fit the first fragment

While building ipv6 datagram we currently allow arbitrary large
extheaders, even beyond pmtu size. The syzbot has found a way
to exploit the above to trigger the following splat:

kernel BUG at ./include/linux/skbuff.h:2073!
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
    (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 4230 Comm: syzkaller672661 Not tainted 4.16.0-rc2+ #326
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
RIP: 0010:__skb_pull include/linux/skbuff.h:2073 [inline]
RIP: 0010:__ip6_make_skb+0x1ac8/0x2190 net/ipv6/ip6_output.c:1636
RSP: 0018:ffff8801bc18f0f0 EFLAGS: 00010293
RAX: ffff8801b17400c0 RBX: 0000000000000738 RCX: ffffffff84f01828
RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8801b415ac18
RBP: ffff8801bc18f360 R08: ffff8801b4576844 R09: 0000000000000000
R10: ffff8801bc18f380 R11: ffffed00367aee4e R12: 00000000000000d6
R13: ffff8801b415a740 R14: dffffc0000000000 R15: ffff8801b45767c0
FS:  0000000001535880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000002000b000 CR3: 00000001b4123001 CR4: 00000000001606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  ip6_finish_skb include/net/ipv6.h:969 [inline]
  udp_v6_push_pending_frames+0x269/0x3b0 net/ipv6/udp.c:1073
  udpv6_sendmsg+0x2a96/0x3400 net/ipv6/udp.c:1343
  inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:764
  sock_sendmsg_nosec net/socket.c:630 [inline]
  sock_sendmsg+0xca/0x110 net/socket.c:640
  ___sys_sendmsg+0x320/0x8b0 net/socket.c:2046
  __sys_sendmmsg+0x1ee/0x620 net/socket.c:2136
  SYSC_sendmmsg net/socket.c:2167 [inline]
  SyS_sendmmsg+0x35/0x60 net/socket.c:2162
  do_syscall_64+0x280/0x940 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x4404c9
RSP: 002b:00007ffdce35f948 EFLAGS: 00000217 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 00000000004404c9
RDX: 0000000000000003 RSI: 0000000020001f00 RDI: 0000000000000003
RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8
R10: 0000000020000080 R11: 0000000000000217 R12: 0000000000401df0
R13: 0000000000401e80 R14: 0000000000000000 R15: 0000000000000000
Code: ff e8 1d 5e b9 fc e9 15 e9 ff ff e8 13 5e b9 fc e9 44 e8 ff ff e8 29
5e b9 fc e9 c0 e6 ff ff e8 3f f3 80 fc 0f 0b e8 38 f3 80 fc <0f> 0b 49 8d
87 80 00 00 00 4d 8d 87 84 00 00 00 48 89 85 20 fe
RIP: __skb_pull include/linux/skbuff.h:2073 [inline] RSP: ffff8801bc18f0f0
RIP: __ip6_make_skb+0x1ac8/0x2190 net/ipv6/ip6_output.c:1636 RSP:
ffff8801bc18f0f0

As stated by RFC 7112 section 5:

   When a host fragments an IPv6 datagram, it MUST include the entire
   IPv6 Header Chain in the First Fragment.

So this patch addresses the issue dropping datagrams with excessive
extheader length. It also updates the error path to report to the
calling socket nonnegative pmtu values.

The issue apparently predates git history.

v1 -> v2: cleanup error path, as per Eric's suggestion

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+91e6f9932ff122fa4410@syzkaller.appspotmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a8a919520090..5cb18c8ba9b2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1246,7 +1246,7 @@ static int __ip6_append_data(struct sock *sk,
 			     const struct sockcm_cookie *sockc)
 {
 	struct sk_buff *skb, *skb_prev = NULL;
-	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
+	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
 	int exthdrlen = 0;
 	int dst_exthdrlen = 0;
 	int hh_len;
@@ -1282,6 +1282,12 @@ static int __ip6_append_data(struct sock *sk,
 		      sizeof(struct frag_hdr) : 0) +
 		     rt->rt6i_nfheader_len;
 
+	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
+	 * the first fragment
+	 */
+	if (headersize + transhdrlen > mtu)
+		goto emsgsize;
+
 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
 	    (sk->sk_protocol == IPPROTO_UDP ||
 	     sk->sk_protocol == IPPROTO_RAW)) {
@@ -1297,9 +1303,8 @@ static int __ip6_append_data(struct sock *sk,
 
 	if (cork->length + length > maxnonfragsize - headersize) {
 emsgsize:
-		ipv6_local_error(sk, EMSGSIZE, fl6,
-				 mtu - headersize +
-				 sizeof(struct ipv6hdr));
+		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
+		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
 		return -EMSGSIZE;
 	}
 
-- 
cgit 


From 16e693c55eae355810ad805cce5bffd566bf77fc Mon Sep 17 00:00:00 2001
From: Sanjeev Gupta <ghane0@gmail.com>
Date: Sat, 24 Mar 2018 13:07:42 +0800
Subject: Documentation/isdn: check and fix dead links ...

and switch to https where possible.

All links have been eyeballed to verify that the domains have
not changed, etc.

Signed-off-by: Sanjeev Gupta <ghane0@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/isdn/INTERFACE.CAPI |  2 +-
 Documentation/isdn/README         |  4 ++--
 Documentation/isdn/README.FAQ     |  4 ++--
 Documentation/isdn/README.gigaset | 16 +++++++++-------
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 1688b5a1fd77..021aa9cf139d 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -18,7 +18,7 @@ corresponding hardware driver. Kernel CAPI then forwards CAPI messages in both
 directions between the application and the hardware driver.
 
 Format and semantics of CAPI messages are specified in the CAPI 2.0 standard.
-This standard is freely available from http://www.capi.org.
+This standard is freely available from https://www.capi.org.
 
 
 2. Driver and Device Registration
diff --git a/Documentation/isdn/README b/Documentation/isdn/README
index 32d4e80c2c03..74bd2bdb455b 100644
--- a/Documentation/isdn/README
+++ b/Documentation/isdn/README
@@ -33,10 +33,10 @@ README for the ISDN-subsystem
      de.alt.comm.isdn4linux
 
   There is also a well maintained FAQ in English available at
-     http://www.mhessler.de/i4lfaq/
+     https://www.mhessler.de/i4lfaq/
   It can be viewed online, or downloaded in sgml/text/html format.
   The FAQ can also be viewed online at
-     http://www.isdn4linux.de/faq/
+     https://www.isdn4linux.de/faq/i4lfaq.html
   or downloaded from
      ftp://ftp.isdn4linux.de/pub/isdn4linux/FAQ/
 
diff --git a/Documentation/isdn/README.FAQ b/Documentation/isdn/README.FAQ
index 356f7944641d..e5dd1addacdd 100644
--- a/Documentation/isdn/README.FAQ
+++ b/Documentation/isdn/README.FAQ
@@ -8,9 +8,9 @@ You find it in:
 
 In case you just want to see the FAQ online, or download the newest version,
 you can have a look at my website:
-http://www.mhessler.de/i4lfaq/ (view + download)
+https://www.mhessler.de/i4lfaq/ (view + download)
 or:
-http://www.isdn4linux.de/faq/ (view)
+https://www.isdn4linux.de/faq/4lfaq.html (view)
 
 As the extension tells, the FAQ is in SGML format, and you can convert it
 into text/html/... format by using the sgml2txt/sgml2html/... tools.
diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
index 7534c6039adc..9b1ce277ca3d 100644
--- a/Documentation/isdn/README.gigaset
+++ b/Documentation/isdn/README.gigaset
@@ -29,8 +29,9 @@ GigaSet 307x Device Driver
         T-Com Sinus 721 data
         Chicago 390 USB (KPN)
 
-     See also http://www.erbze.info/sinus_gigaset.htm and
-              http://gigaset307x.sourceforge.net/
+     See also http://www.erbze.info/sinus_gigaset.htm
+       (archived at https://web.archive.org/web/20100717020421/http://www.erbze.info:80/sinus_gigaset.htm ) and
+	http://gigaset307x.sourceforge.net/
 
      We had also reports from users of Gigaset M105 who could use the drivers
      with SX 100 and CX 100 ISDN bases (only in unimodem mode, see section 2.5.)
@@ -52,7 +53,7 @@ GigaSet 307x Device Driver
      to use CAPI 2.0 or ISDN4Linux for ISDN connections (voice or data).
 
      There are some user space tools available at
-     http://sourceforge.net/projects/gigaset307x/
+     https://sourceforge.net/projects/gigaset307x/
      which provide access to additional device specific functions like SMS,
      phonebook or call journal.
 
@@ -202,7 +203,7 @@ GigaSet 307x Device Driver
      You can use some configuration tool of your distribution to configure this
      "modem" or configure pppd/wvdial manually. There are some example ppp
      configuration files and chat scripts in the gigaset-VERSION/ppp directory
-     in the driver packages from http://sourceforge.net/projects/gigaset307x/.
+     in the driver packages from https://sourceforge.net/projects/gigaset307x/.
      Please note that the USB drivers are not able to change the state of the
      control lines. This means you must use "Stupid Mode" if you are using
      wvdial or you should use the nocrtscts option of pppd.
@@ -361,7 +362,7 @@ GigaSet 307x Device Driver
      ---------------------------
      If you can't solve problems with the driver on your own, feel free to
      use one of the forums, bug trackers, or mailing lists on
-         http://sourceforge.net/projects/gigaset307x
+         https://sourceforge.net/projects/gigaset307x
      or write an electronic mail to the maintainers.
 
      Try to provide as much information as possible, such as
@@ -391,11 +392,12 @@ GigaSet 307x Device Driver
 4.   Links, other software
      ---------------------
      - Sourceforge project developing this driver and associated tools
-         http://sourceforge.net/projects/gigaset307x
+         https://sourceforge.net/projects/gigaset307x
      - Yahoo! Group on the Siemens Gigaset family of devices
-         http://de.groups.yahoo.com/group/Siemens-Gigaset
+         https://de.groups.yahoo.com/group/Siemens-Gigaset
      - Siemens Gigaset/T-Sinus compatibility table
          http://www.erbze.info/sinus_gigaset.htm
+	    (archived at https://web.archive.org/web/20100717020421/http://www.erbze.info:80/sinus_gigaset.htm )
 
 
 5.   Credits
-- 
cgit 


From 743989254ea9f132517806d8893ca9b6cf9dc86b Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Sat, 24 Mar 2018 22:08:14 +0100
Subject: net: qmi_wwan: add BroadMobi BM806U 2020:2033

BroadMobi BM806U is an Qualcomm MDM9225 based 3G/4G modem.
Tested hardware BM806U is mounted on D-Link DWR-921-C3 router.
The USB id is added to qmi_wwan.c to allow QMI communication with
the BM806U.

Tested on 4.14 kernel and OpenWRT.

Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 76ac48095c29..7ced28859261 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1180,6 +1180,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
 	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
 	{QMI_FIXED_INTF(0x2001, 0x7e35, 4)},	/* D-Link DWM-222 */
+	{QMI_FIXED_INTF(0x2020, 0x2033, 4)},	/* BroadMobi BM806U */
 	{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)},    /* Sierra Wireless MC7700 */
 	{QMI_FIXED_INTF(0x114f, 0x68a2, 8)},    /* Sierra Wireless MC7750 */
 	{QMI_FIXED_INTF(0x1199, 0x68a2, 8)},	/* Sierra Wireless MC7710 in QMI mode */
-- 
cgit 


From eb82a994479245a79647d302f9b4eb8e7c9d7ca6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 24 Mar 2018 22:25:06 -0700
Subject: net: sched, fix OOO packets with pfifo_fast

After the qdisc lock was dropped in pfifo_fast we allow multiple
enqueue threads and dequeue threads to run in parallel. On the
enqueue side the skb bit ooo_okay is used to ensure all related
skbs are enqueued in-order. On the dequeue side though there is
no similar logic. What we observe is with fewer queues than CPUs
it is possible to re-order packets when two instances of
__qdisc_run() are running in parallel. Each thread will dequeue
a skb and then whichever thread calls the ndo op first will
be sent on the wire. This doesn't typically happen because
qdisc_run() is usually triggered by the same core that did the
enqueue. However, drivers will trigger __netif_schedule()
when queues are transitioning from stopped to awake using the
netif_tx_wake_* APIs. When this happens netif_schedule() calls
qdisc_run() on the same CPU that did the netif_tx_wake_* which
is usually done in the interrupt completion context. This CPU
is selected with the irq affinity which is unrelated to the
enqueue operations.

To resolve this we add a RUNNING bit to the qdisc to ensure
only a single dequeue per qdisc is running. Enqueue and dequeue
operations can still run in parallel and also on multi queue
NICs we can still have a dequeue in-flight per qdisc, which
is typically per CPU.

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Reported-by: Jakob Unterwurzacher <jakob.unterwurzacher@theobroma-systems.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/sch_generic.c   | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2092d33194dd..8da32678ce18 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -30,6 +30,7 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
+	__QDISC_STATE_RUNNING,
 };
 
 struct qdisc_size_table {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7e3fbe9cc936..39c144b6ff98 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -373,24 +373,33 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
  */
 static inline bool qdisc_restart(struct Qdisc *q, int *packets)
 {
+	bool more, validate, nolock = q->flags & TCQ_F_NOLOCK;
 	spinlock_t *root_lock = NULL;
 	struct netdev_queue *txq;
 	struct net_device *dev;
 	struct sk_buff *skb;
-	bool validate;
 
 	/* Dequeue packet */
+	if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+		return false;
+
 	skb = dequeue_skb(q, &validate, packets);
-	if (unlikely(!skb))
+	if (unlikely(!skb)) {
+		if (nolock)
+			clear_bit(__QDISC_STATE_RUNNING, &q->state);
 		return false;
+	}
 
-	if (!(q->flags & TCQ_F_NOLOCK))
+	if (!nolock)
 		root_lock = qdisc_lock(q);
 
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
 
-	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+	more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+	if (nolock)
+		clear_bit(__QDISC_STATE_RUNNING, &q->state);
+	return more;
 }
 
 void __qdisc_run(struct Qdisc *q)
-- 
cgit 


From 5c78f6bfae2b10ff70e21d343e64584ea6280c26 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Mar 2018 01:16:45 +0800
Subject: bonding: fix the err path for dev hwaddr sync in bond_enslave

vlan_vids_add_by_dev is called right after dev hwaddr sync, so on
the err path it should unsync dev hwaddr. Otherwise, the slave
dev's hwaddr will never be unsync when this err happens.

Fixes: 1ff412ad7714 ("bonding: change the bond's vlan syncing functions with the standard ones")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c669554d70bb..0c299de4f2ef 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1565,7 +1565,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 	if (res) {
 		netdev_err(bond_dev, "Couldn't add bond vlan ids to %s\n",
 			   slave_dev->name);
-		goto err_close;
+		goto err_hwaddr_unsync;
 	}
 
 	prev_slave = bond_last_slave(bond);
@@ -1755,9 +1755,6 @@ err_unregister:
 	netdev_rx_handler_unregister(slave_dev);
 
 err_detach:
-	if (!bond_uses_primary(bond))
-		bond_hw_addr_flush(bond_dev, slave_dev);
-
 	vlan_vids_del_by_dev(slave_dev, bond_dev);
 	if (rcu_access_pointer(bond->primary_slave) == new_slave)
 		RCU_INIT_POINTER(bond->primary_slave, NULL);
@@ -1771,6 +1768,10 @@ err_detach:
 	synchronize_rcu();
 	slave_disable_netpoll(new_slave);
 
+err_hwaddr_unsync:
+	if (!bond_uses_primary(bond))
+		bond_hw_addr_flush(bond_dev, slave_dev);
+
 err_close:
 	slave_dev->priv_flags &= ~IFF_BONDING;
 	dev_close(slave_dev);
-- 
cgit 


From ae42cc62a9f07f1f6979054ed92606b9c30f4a2e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Mar 2018 01:16:46 +0800
Subject: bonding: move dev_mc_sync after master_upper_dev_link in bond_enslave

Beniamino found a crash when adding vlan as slave of bond which is also
the parent link:

  ip link add bond1 type bond
  ip link set bond1 up
  ip link add link bond1 vlan1 type vlan id 80
  ip link set vlan1 master bond1

The call trace is as below:

  [<ffffffffa850842a>] queued_spin_lock_slowpath+0xb/0xf
  [<ffffffffa8515680>] _raw_spin_lock+0x20/0x30
  [<ffffffffa83f6f07>] dev_mc_sync+0x37/0x80
  [<ffffffffc08687dc>] vlan_dev_set_rx_mode+0x1c/0x30 [8021q]
  [<ffffffffa83efd2a>] __dev_set_rx_mode+0x5a/0xa0
  [<ffffffffa83f7138>] dev_mc_sync_multiple+0x78/0x80
  [<ffffffffc084127c>] bond_enslave+0x67c/0x1190 [bonding]
  [<ffffffffa8401909>] do_setlink+0x9c9/0xe50
  [<ffffffffa8403bf2>] rtnl_newlink+0x522/0x880
  [<ffffffffa8403ff7>] rtnetlink_rcv_msg+0xa7/0x260
  [<ffffffffa8424ecb>] netlink_rcv_skb+0xab/0xc0
  [<ffffffffa83fe498>] rtnetlink_rcv+0x28/0x30
  [<ffffffffa8424850>] netlink_unicast+0x170/0x210
  [<ffffffffa8424bf8>] netlink_sendmsg+0x308/0x420
  [<ffffffffa83cc396>] sock_sendmsg+0xb6/0xf0

This is actually a dead lock caused by sync slave hwaddr from master when
the master is the slave's 'slave'. This dead loop check is actually done
by netdev_master_upper_dev_link. However, Commit 1f718f0f4f97 ("bonding:
populate neighbour's private on enslave") moved it after dev_mc_sync.

This patch is to fix it by moving dev_mc_sync after master_upper_dev_link,
so that this loop check would be earlier than dev_mc_sync. It also moves
if (mode == BOND_MODE_8023AD) into if (!bond_uses_primary) clause as an
improvement.

Note team driver also has this issue, I will fix it in another patch.

Fixes: 1f718f0f4f97 ("bonding: populate neighbour's private on enslave")
Reported-by: Beniamino Galvani <bgalvani@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 73 ++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0c299de4f2ef..55e198554ec0 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1528,44 +1528,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 			goto err_close;
 	}
 
-	/* If the mode uses primary, then the following is handled by
-	 * bond_change_active_slave().
-	 */
-	if (!bond_uses_primary(bond)) {
-		/* set promiscuity level to new slave */
-		if (bond_dev->flags & IFF_PROMISC) {
-			res = dev_set_promiscuity(slave_dev, 1);
-			if (res)
-				goto err_close;
-		}
-
-		/* set allmulti level to new slave */
-		if (bond_dev->flags & IFF_ALLMULTI) {
-			res = dev_set_allmulti(slave_dev, 1);
-			if (res)
-				goto err_close;
-		}
-
-		netif_addr_lock_bh(bond_dev);
-
-		dev_mc_sync_multiple(slave_dev, bond_dev);
-		dev_uc_sync_multiple(slave_dev, bond_dev);
-
-		netif_addr_unlock_bh(bond_dev);
-	}
-
-	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-		/* add lacpdu mc addr to mc list */
-		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
-
-		dev_mc_add(slave_dev, lacpdu_multicast);
-	}
-
 	res = vlan_vids_add_by_dev(slave_dev, bond_dev);
 	if (res) {
 		netdev_err(bond_dev, "Couldn't add bond vlan ids to %s\n",
 			   slave_dev->name);
-		goto err_hwaddr_unsync;
+		goto err_close;
 	}
 
 	prev_slave = bond_last_slave(bond);
@@ -1725,6 +1692,37 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 		goto err_upper_unlink;
 	}
 
+	/* If the mode uses primary, then the following is handled by
+	 * bond_change_active_slave().
+	 */
+	if (!bond_uses_primary(bond)) {
+		/* set promiscuity level to new slave */
+		if (bond_dev->flags & IFF_PROMISC) {
+			res = dev_set_promiscuity(slave_dev, 1);
+			if (res)
+				goto err_sysfs_del;
+		}
+
+		/* set allmulti level to new slave */
+		if (bond_dev->flags & IFF_ALLMULTI) {
+			res = dev_set_allmulti(slave_dev, 1);
+			if (res)
+				goto err_sysfs_del;
+		}
+
+		netif_addr_lock_bh(bond_dev);
+		dev_mc_sync_multiple(slave_dev, bond_dev);
+		dev_uc_sync_multiple(slave_dev, bond_dev);
+		netif_addr_unlock_bh(bond_dev);
+
+		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+			/* add lacpdu mc addr to mc list */
+			u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+
+			dev_mc_add(slave_dev, lacpdu_multicast);
+		}
+	}
+
 	bond->slave_cnt++;
 	bond_compute_features(bond);
 	bond_set_carrier(bond);
@@ -1748,6 +1746,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 	return 0;
 
 /* Undo stages on error */
+err_sysfs_del:
+	bond_sysfs_slave_del(new_slave);
+
 err_upper_unlink:
 	bond_upper_dev_unlink(bond, new_slave);
 
@@ -1768,10 +1769,6 @@ err_detach:
 	synchronize_rcu();
 	slave_disable_netpoll(new_slave);
 
-err_hwaddr_unsync:
-	if (!bond_uses_primary(bond))
-		bond_hw_addr_flush(bond_dev, slave_dev);
-
 err_close:
 	slave_dev->priv_flags &= ~IFF_BONDING;
 	dev_close(slave_dev);
-- 
cgit 


From 9f5a90c107741b864398f4ac0014711a8c1d8474 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Mar 2018 01:16:47 +0800
Subject: bonding: process the err returned by dev_set_allmulti properly in
 bond_enslave

When dev_set_promiscuity(1) succeeds but dev_set_allmulti(1) fails,
dev_set_promiscuity(-1) should be done before going to the err path.
Otherwise, dev->promiscuity will leak.

Fixes: 7e1a1ac1fbaa ("bonding: Check return of dev_set_promiscuity/allmulti")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 55e198554ec0..b7b113018853 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1706,8 +1706,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 		/* set allmulti level to new slave */
 		if (bond_dev->flags & IFF_ALLMULTI) {
 			res = dev_set_allmulti(slave_dev, 1);
-			if (res)
+			if (res) {
+				if (bond_dev->flags & IFF_PROMISC)
+					dev_set_promiscuity(slave_dev, -1);
 				goto err_sysfs_del;
+			}
 		}
 
 		netif_addr_lock_bh(bond_dev);
-- 
cgit 


From 982cf3b3999d39a2eaca0a65542df33c19b5d814 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Mar 2018 01:25:06 +0800
Subject: team: move dev_mc_sync after master_upper_dev_link in team_port_add

The same fix as in 'bonding: move dev_mc_sync after master_upper_dev_link
in bond_enslave' is needed for team driver.

The panic can be reproduced easily:

  ip link add team1 type team
  ip link set team1 up
  ip link add link team1 vlan1 type vlan id 80
  ip link set vlan1 master team1

Fixes: cb41c997d444 ("team: team should sync the port's uc/mc addrs when add a port")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 56c701b73c12..befed2d22bf4 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1197,11 +1197,6 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_dev_open;
 	}
 
-	netif_addr_lock_bh(dev);
-	dev_uc_sync_multiple(port_dev, dev);
-	dev_mc_sync_multiple(port_dev, dev);
-	netif_addr_unlock_bh(dev);
-
 	err = vlan_vids_add_by_dev(port_dev, dev);
 	if (err) {
 		netdev_err(dev, "Failed to add vlan ids to device %s\n",
@@ -1241,6 +1236,11 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_option_port_add;
 	}
 
+	netif_addr_lock_bh(dev);
+	dev_uc_sync_multiple(port_dev, dev);
+	dev_mc_sync_multiple(port_dev, dev);
+	netif_addr_unlock_bh(dev);
+
 	port->index = -1;
 	list_add_tail_rcu(&port->list, &team->port_list);
 	team_port_enable(team, port);
@@ -1265,8 +1265,6 @@ err_enable_netpoll:
 	vlan_vids_del_by_dev(port_dev, dev);
 
 err_vids_add:
-	dev_uc_unsync(port_dev, dev);
-	dev_mc_unsync(port_dev, dev);
 	dev_close(port_dev);
 
 err_dev_open:
-- 
cgit 


From f3d801baf118c9d452ee7c278df16880c892e669 Mon Sep 17 00:00:00 2001
From: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Date: Mon, 26 Mar 2018 07:19:57 +0200
Subject: net/usb/qmi_wwan.c: Add USB id for lt4120 modem

This is needed to support the modem found in HP EliteBook 820 G3.

Signed-off-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 7ced28859261..2508ab08fc5a 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1241,6 +1241,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 8)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 10)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+	{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)},	/* HP lt4120 Snapdragon X5 LTE */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
 	{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},	/* SIMCom 7230E */
 	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)},	/* Quectel EC25, EC20 R2.0  Mini PCIe */
-- 
cgit 


From aaa3149bbee9ba9b4e6f0bd6e3e7d191edeae942 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 26 Mar 2018 16:10:23 +0800
Subject: vhost_net: add missing lock nesting notation

We try to hold TX virtqueue mutex in vhost_net_rx_peek_head_len()
after RX virtqueue mutex is held in handle_rx(). This requires an
appropriate lock nesting notation to calm down deadlock detector.

Fixes: 0308813724606 ("vhost_net: basic polling support")
Reported-by: syzbot+7f073540b1384a614e09@syzkaller.appspotmail.com
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8139bc70ad7d..12bcfbac2cc9 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -630,7 +630,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 
 	if (!len && vq->busyloop_timeout) {
 		/* Both tx vq and rx socket were polled here */
-		mutex_lock(&vq->mutex);
+		mutex_lock_nested(&vq->mutex, 1);
 		vhost_disable_notify(&net->dev, vq);
 
 		preempt_disable();
@@ -763,7 +763,7 @@ static void handle_rx(struct vhost_net *net)
 	struct iov_iter fixup;
 	__virtio16 num_buffers;
 
-	mutex_lock(&vq->mutex);
+	mutex_lock_nested(&vq->mutex, 0);
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
-- 
cgit 


From 1c82c9e1f2ba9f32ac41012b91abddd639010f7a Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 26 Mar 2018 18:07:09 +0800
Subject: net: dsa: mt7530: remove redundant MODULE_ALIAS entries

MODULE_ALIAS exports information to allow the module to be auto-loaded at
boot for the drivers registered using legacy platform registration.

However, currently the driver is always used by DT-only platform,
MODULE_ALIAS is redundant and should be removed properly.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 8a0bb000d056..d31246cbefd1 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1424,4 +1424,3 @@ mdio_module_driver(mt7530_mdio_driver);
 MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
 MODULE_DESCRIPTION("Driver for Mediatek MT7530 Switch");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:mediatek-mt7530");
-- 
cgit 


From 3c82b372a9f44aa224b8d5106ff6f1ad516fa8a8 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 26 Mar 2018 18:07:10 +0800
Subject: net: dsa: mt7530: fix module autoloading for OF platform drivers

It's required to create a modules.alias via MODULE_DEVICE_TABLE helper
for the OF platform driver. Otherwise, module autoloading cannot work.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index d31246cbefd1..4e53c5ce23ff 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1409,6 +1409,7 @@ static const struct of_device_id mt7530_of_match[] = {
 	{ .compatible = "mediatek,mt7530" },
 	{ /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, mt7530_of_match);
 
 static struct mdio_driver mt7530_mdio_driver = {
 	.probe  = mt7530_probe,
-- 
cgit 


From 84c9c8f2ac3c22924d30643894fcf7597c633d3e Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Fri, 2 Mar 2018 02:09:08 +0000
Subject: net/mlx5e: Don't override vport admin link state in switchdev mode

The vport admin original link state will be re-applied after returning
back to legacy mode, it is not right to change the admin link state value
when in switchdev mode.

Use direct vport commands to alter logical vport state in netdev
representor open/close flows rather than the administrative eswitch API.

Fixes: 20a1ea674783 ('net/mlx5e: Support VF vport link state control for SRIOV switchdev mode')
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 363d8dcb7f17..0273c233bc85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -668,7 +668,6 @@ static int mlx5e_rep_open(struct net_device *dev)
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	int err;
 
 	mutex_lock(&priv->state_lock);
@@ -676,8 +675,9 @@ static int mlx5e_rep_open(struct net_device *dev)
 	if (err)
 		goto unlock;
 
-	if (!mlx5_eswitch_set_vport_state(esw, rep->vport,
-					  MLX5_ESW_VPORT_ADMIN_STATE_UP))
+	if (!mlx5_modify_vport_admin_state(priv->mdev,
+			MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+			rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP))
 		netif_carrier_on(dev);
 
 unlock:
@@ -690,11 +690,12 @@ static int mlx5e_rep_close(struct net_device *dev)
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	int ret;
 
 	mutex_lock(&priv->state_lock);
-	(void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+	mlx5_modify_vport_admin_state(priv->mdev,
+			MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+			rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
 	ret = mlx5e_close_locked(dev);
 	mutex_unlock(&priv->state_lock);
 	return ret;
-- 
cgit 


From 5ecadff0b6cfb52ba5d1bd07c6372acea5418c39 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 13 Feb 2018 13:43:39 +0200
Subject: net/mlx5e: Use 32 bits to store VF representor SQ number

SQs are 32 and not 16 bits, hence it's wrong to use only 16 bits to
store the sq number for which are going to set steering rule, fix that.

Fixes: cb67b832921c ('net/mlx5e: Introduce SRIOV VF representors')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 0273c233bc85..738554a6c69f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -209,7 +209,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 
 static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep,
-				 u16 *sqns_array, int sqns_num)
+				 u32 *sqns_array, int sqns_num)
 {
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5e_rep_priv *rpriv;
@@ -255,9 +255,9 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 	struct mlx5e_channel *c;
 	int n, tc, num_sqs = 0;
 	int err = -ENOMEM;
-	u16 *sqs;
+	u32 *sqs;
 
-	sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(u16), GFP_KERNEL);
+	sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL);
 	if (!sqs)
 		goto out;
 
-- 
cgit 


From f125376b06bcc57dfb0216ac8d6ec6d5dcf81025 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 15 Feb 2018 12:39:55 +0200
Subject: net/mlx5: Make eswitch support to depend on switchdev

Add dependancy for switchdev to be congfigured as any user-space control
plane SW is expected to use the HW switchdev ID to locate the representors
related to VFs of a certain PF and apply SW/offloaded switching on them.

Fixes: e80541ecabd5 ('net/mlx5: Add CONFIG_MLX5_ESWITCH Kconfig')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig   | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 2 --
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 25deaa5a534c..c032319f1cb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -46,7 +46,7 @@ config MLX5_MPFS
 
 config MLX5_ESWITCH
 	bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
-	depends on MLX5_CORE_EN
+	depends on MLX5_CORE_EN && NET_SWITCHDEV
 	default y
 	---help---
 	  Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index da94c8cba5ee..e35859657217 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4069,7 +4069,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
 	}
 }
 
-#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH)
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
 static const struct switchdev_ops mlx5e_switchdev_ops = {
 	.switchdev_port_attr_get	= mlx5e_attr_get,
 };
@@ -4175,7 +4175,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 
 	mlx5e_set_netdev_dev_addr(netdev);
 
-#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH)
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
 	if (MLX5_VPORT_MANAGER(mdev))
 		netdev->switchdev_ops = &mlx5e_switchdev_ops;
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 738554a6c69f..5ece289548ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -900,9 +900,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
 
 	netdev->ethtool_ops	  = &mlx5e_rep_ethtool_ops;
 
-#ifdef CONFIG_NET_SWITCHDEV
 	netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
-#endif
 
 	netdev->features	 |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
 	netdev->hw_features      |= NETIF_F_HW_TC;
-- 
cgit 


From b392a2078b5e0094ff38aa0c9d2a31b3f607d4ef Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 15 Feb 2018 12:41:48 +0200
Subject: net/mlx5e: Verify coalescing parameters in range

Add check of coalescing parameters received through ethtool are within
range of values supported by the HW.
Driver gets the coalescing rx/tx-usecs and rx/tx-frames as set by the
users through ethtool. The ethtool support up to 32 bit value for each.
However, mlx5 modify cq limits the coalescing time parameter to 12 bit
and coalescing frames parameters to 16 bits.
Return out of range error if user tries to set these parameters to
higher values.

Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality')
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cc8048f68f11..59ebfdae6695 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -477,6 +477,9 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
 	return mlx5e_ethtool_get_coalesce(priv, coal);
 }
 
+#define MLX5E_MAX_COAL_TIME		MLX5_MAX_CQ_PERIOD
+#define MLX5E_MAX_COAL_FRAMES		MLX5_MAX_CQ_COUNT
+
 static void
 mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
 {
@@ -511,6 +514,20 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 	if (!MLX5_CAP_GEN(mdev, cq_moderation))
 		return -EOPNOTSUPP;
 
+	if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME ||
+	    coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) {
+		netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n",
+			    __func__, MLX5E_MAX_COAL_TIME);
+		return -ERANGE;
+	}
+
+	if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES ||
+	    coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) {
+		netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n",
+			    __func__, MLX5E_MAX_COAL_FRAMES);
+		return -ERANGE;
+	}
+
 	mutex_lock(&priv->state_lock);
 	new_channels.params = priv->channels.params;
 
-- 
cgit 


From 4246f698dd58e3c6246fa919ef0b0a1d29a57e4a Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Wed, 28 Feb 2018 12:56:42 +0200
Subject: net/mlx5e: Fix traffic being dropped on VF representor

Increase representor netdev RQ size to avoid dropped packets.
The current size (two) is just too small to keep up with
conventional slow path traffic patterns.
Also match the SQ size to the RQ size.

Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 5ece289548ad..9a5a2a7eeab3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -44,6 +44,11 @@
 #include "en_tc.h"
 #include "fs_core.h"
 
+#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
+	max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_LOG_RQ_SIZE \
+	max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)
+
 static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
 
 static void mlx5e_rep_get_drvinfo(struct net_device *dev,
@@ -878,9 +883,9 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
 					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
-	params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+	params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
 	params->rq_wq_type  = MLX5_WQ_TYPE_LINKED_LIST;
-	params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+	params->log_rq_size = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
 
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
-- 
cgit 


From af1607c37d9d85a66fbcf43b7f11bf3d94b9bb69 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Thu, 8 Mar 2018 09:20:55 +0000
Subject: net/mlx5e: Fix memory usage issues in offloading TC flows

For NIC flows, the parsed attributes are not freed when we exit
successfully from mlx5e_configure_flower().

There is possible double free for eswitch flows. If error is returned
from rhashtable_insert_fast(), the parse attrs will be freed in
mlx5e_tc_del_flow(), but they will be freed again before exiting
mlx5e_configure_flower().

To fix both issues we do the following:
(1) change the condition that determines if to issue the free call to
    check if this flow is NIC flow, or it does not have encap action.
(2) reorder the code such that that the check and free calls are done
    before we attempt to add into the hash table.

Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow')
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fa86a1466718..ae11678a31e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2608,19 +2608,19 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
 	if (err != -EAGAIN)
 		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
 
+	if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+	    !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
+		kvfree(parse_attr);
+
 	err = rhashtable_insert_fast(&tc->ht, &flow->node,
 				     tc->ht_params);
-	if (err)
-		goto err_del_rule;
+	if (err) {
+		mlx5e_tc_del_flow(priv, flow);
+		kfree(flow);
+	}
 
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH &&
-	    !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
-		kvfree(parse_attr);
 	return err;
 
-err_del_rule:
-	mlx5e_tc_del_flow(priv, flow);
-
 err_free:
 	kvfree(parse_attr);
 	kfree(flow);
-- 
cgit 


From 423c9db29943cfc43e3a408192e9efa4178af6a1 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 13 Mar 2018 21:43:43 +0200
Subject: net/mlx5e: Avoid using the ipv6 stub in the TC offload neigh update
 path

Currently we use the global ipv6_stub var to access the ipv6 global
nd table. This practice gets us to troubles when the stub is only partially
set e.g when ipv6 is loaded under the disabled policy. In this case, as of commit
343d60aada5a ("ipv6: change ipv6_stub_impl.ipv6_dst_lookup to take net argument")
the stub is not null, but stub->nd_tbl is and we crash.

As we can access the ipv6 nd_tbl directly, the fix is just to avoid the
reference through the stub. There is one place in the code where we
issue ipv6 route lookup and keep doing it through the stub, but that
mentioned commit makes sure we get -EAFNOSUPPORT from the stack.

Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow")
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 9a5a2a7eeab3..500d817d2b0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -293,7 +293,7 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-	unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms,
+	unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms,
 						DELAY_PROBE_TIME);
 #else
 	unsigned long ipv6_interval = ~0UL;
@@ -429,7 +429,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
 	case NETEVENT_NEIGH_UPDATE:
 		n = ptr;
 #if IS_ENABLED(CONFIG_IPV6)
-		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+		if (n->tbl != &nd_tbl && n->tbl != &arp_tbl)
 #else
 		if (n->tbl != &arp_tbl)
 #endif
@@ -477,7 +477,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
 		 * done per device delay prob time parameter.
 		 */
 #if IS_ENABLED(CONFIG_IPV6)
-		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+		if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl))
 #else
 		if (!p->dev || p->tbl != &arp_tbl)
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ae11678a31e8..43234cabf444 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -963,7 +963,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 		tbl = &arp_tbl;
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (m_neigh->family == AF_INET6)
-		tbl = ipv6_stub->nd_tbl;
+		tbl = &nd_tbl;
 #endif
 	else
 		return;
-- 
cgit 


From a117f73dc2430443f23e18367fa545981129c1a6 Mon Sep 17 00:00:00 2001
From: Shahar Klein <shahark@mellanox.com>
Date: Tue, 20 Mar 2018 14:44:40 +0200
Subject: net/mlx5e: Sync netdev vxlan ports at open

When mlx5_core is loaded it is expected to sync ports
with all vxlan devices so it can support vxlan encap/decap.
This is done via udp_tunnel_get_rx_info(). Currently this
call is set in mlx5e_nic_enable() and if the netdev is not in
NETREG_REGISTERED state it will not be called.

Normally on load the netdev state is not NETREG_REGISTERED
so udp_tunnel_get_rx_info() will not be called.

Moving udp_tunnel_get_rx_info() to mlx5e_open() so
it will be called on netdev UP event and allow encap/decap.

Fixes: 610e89e05c3f ("net/mlx5e: Don't sync netdev state when not registered")
Signed-off-by: Shahar Klein <shahark@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e35859657217..9b4827d36e3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2572,6 +2572,9 @@ int mlx5e_open(struct net_device *netdev)
 		mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
 	mutex_unlock(&priv->state_lock);
 
+	if (mlx5e_vxlan_allowed(priv->mdev))
+		udp_tunnel_get_rx_info(netdev);
+
 	return err;
 }
 
@@ -4327,12 +4330,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_init_app(priv);
 #endif
-	/* Device already registered: sync netdev system state */
-	if (mlx5e_vxlan_allowed(mdev)) {
-		rtnl_lock();
-		udp_tunnel_get_rx_info(netdev);
-		rtnl_unlock();
-	}
 
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 
-- 
cgit 


From d4c4bc11353f3bea6754f7d21e3612c9f32d1d64 Mon Sep 17 00:00:00 2001
From: Giuseppe Lippolis <giu.lippolis@gmail.com>
Date: Mon, 26 Mar 2018 16:34:39 +0200
Subject: net-usb: add qmi_wwan if on lte modem wistron neweb d18q1

This modem is embedded on dlink dwr-921 router.
    The oem configuration states:

    T:  Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=480 MxCh= 0
    D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
    P:  Vendor=1435 ProdID=0918 Rev= 2.32
    S:  Manufacturer=Android
    S:  Product=Android
    S:  SerialNumber=0123456789ABCDEF
    C:* #Ifs= 7 Cfg#= 1 Atr=80 MxPwr=500mA
    I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
    E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none)
    E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
    E:  Ad=84(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
    E:  Ad=86(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
    E:  Ad=88(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
    E:  Ad=8a(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
    E:  Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    I:* If#= 6 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none)
    E:  Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
    E:  Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=125us

Tested on openwrt distribution

Signed-off-by: Giuseppe Lippolis <giu.lippolis@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 2508ab08fc5a..ca066b785e9f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1104,6 +1104,9 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
+	{QMI_FIXED_INTF(0x1435, 0xd181, 3)},	/* Wistron NeWeb D18Q1 */
+	{QMI_FIXED_INTF(0x1435, 0xd181, 4)},	/* Wistron NeWeb D18Q1 */
+	{QMI_FIXED_INTF(0x1435, 0xd181, 5)},	/* Wistron NeWeb D18Q1 */
 	{QMI_FIXED_INTF(0x16d8, 0x6003, 0)},	/* CMOTech 6003 */
 	{QMI_FIXED_INTF(0x16d8, 0x6007, 0)},	/* CMOTech CHE-628S */
 	{QMI_FIXED_INTF(0x16d8, 0x6008, 0)},	/* CMOTech CMU-301 */
-- 
cgit 


From 1dfe82ebd7d8fd43dba9948fdfb31f145014baa0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 26 Mar 2018 08:08:07 -0700
Subject: net: fix possible out-of-bound read in skb_network_protocol()

skb mac header is not necessarily set at the time skb_network_protocol()
is called. Use skb->data instead.

BUG: KASAN: slab-out-of-bounds in skb_network_protocol+0x46b/0x4b0 net/core/dev.c:2739
Read of size 2 at addr ffff8801b3097a0b by task syz-executor5/14242

CPU: 1 PID: 14242 Comm: syz-executor5 Not tainted 4.16.0-rc6+ #280
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x194/0x24d lib/dump_stack.c:53
 print_address_description+0x73/0x250 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x23c/0x360 mm/kasan/report.c:412
 __asan_report_load_n_noabort+0xf/0x20 mm/kasan/report.c:443
 skb_network_protocol+0x46b/0x4b0 net/core/dev.c:2739
 harmonize_features net/core/dev.c:2924 [inline]
 netif_skb_features+0x509/0x9b0 net/core/dev.c:3011
 validate_xmit_skb+0x81/0xb00 net/core/dev.c:3084
 validate_xmit_skb_list+0xbf/0x120 net/core/dev.c:3142
 packet_direct_xmit+0x117/0x790 net/packet/af_packet.c:256
 packet_snd net/packet/af_packet.c:2944 [inline]
 packet_sendmsg+0x3aed/0x60b0 net/packet/af_packet.c:2969
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:639
 ___sys_sendmsg+0x767/0x8b0 net/socket.c:2047
 __sys_sendmsg+0xe5/0x210 net/socket.c:2081

Fixes: 19acc327258a ("gso: Handle Trans-Ether-Bridging protocol in skb_network_protocol()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Pravin B Shelar <pshelar@ovn.org>
Reported-by: Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 12be20535714..ef0cc6ea5f8d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2735,7 +2735,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 		if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
 			return 0;
 
-		eth = (struct ethhdr *)skb_mac_header(skb);
+		eth = (struct ethhdr *)skb->data;
 		type = eth->h_proto;
 	}
 
-- 
cgit 


From 19c9ea363a244f85f90a424f9936e6d56449e33c Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 26 Mar 2018 19:19:30 +0200
Subject: r8169: fix setting driver_data after register_netdev

pci_set_drvdata() is called only after registering the net_device,
therefore we could run into a NPE if one of the functions using
driver_data is called before it's set.

Fix this by calling pci_set_drvdata() before registering the
net_device.

This fix is a candidate for stable. As far as I can see the
bug has been there in kernel version 3.2 already, therefore
I can't provide a reference which commit is fixed by it.

The fix may need small adjustments per kernel version because
due to other changes the label which is jumped to if
register_netdev() fails has changed over time.

Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0bf7d1759250..b4779acb6b5c 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -8660,12 +8660,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!tp->counters)
 		return -ENOMEM;
 
+	pci_set_drvdata(pdev, dev);
+
 	rc = register_netdev(dev);
 	if (rc < 0)
 		return rc;
 
-	pci_set_drvdata(pdev, dev);
-
 	netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
 		   rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
 		   (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
-- 
cgit 


From 734549eb550c0c720bc89e50501f1b1e98cdd841 Mon Sep 17 00:00:00 2001
From: Craig Dillabaugh <cdillaba@mojatatu.com>
Date: Mon, 26 Mar 2018 14:58:32 -0400
Subject: net sched actions: fix dumping which requires several messages to
 user space

Fixes a bug in the tcf_dump_walker function that can cause some actions
to not be reported when dumping a large number of actions. This issue
became more aggrevated when cookies feature was added. In particular
this issue is manifest when large cookie values are assigned to the
actions and when enough actions are created that the resulting table
must be dumped in multiple batches.

The number of actions returned in each batch is limited by the total
number of actions and the memory buffer size.  With small cookies
the numeric limit is reached before the buffer size limit, which avoids
the code path triggering this bug. When large cookies are used buffer
fills before the numeric limit, and the erroneous code path is hit.

For example after creating 32 csum actions with the cookie
aaaabbbbccccdddd

$ tc actions ls action csum
total acts 26

    action order 0: csum (tcp) action continue
    index 1 ref 1 bind 0
    cookie aaaabbbbccccdddd

    .....

    action order 25: csum (tcp) action continue
    index 26 ref 1 bind 0
    cookie aaaabbbbccccdddd
total acts 6

    action order 0: csum (tcp) action continue
    index 28 ref 1 bind 0
    cookie aaaabbbbccccdddd

    ......

    action order 5: csum (tcp) action continue
    index 32 ref 1 bind 0
    cookie aaaabbbbccccdddd

Note that the action with index 27 is omitted from the report.

Fixes: 4b3550ef530c ("[NET_SCHED]: Use nla_nest_start/nla_nest_end")"
Signed-off-by: Craig Dillabaugh <cdillaba@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eba6682727dd..efc6bfb9a4e0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -135,8 +135,10 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			continue;
 
 		nest = nla_nest_start(skb, n_i);
-		if (!nest)
+		if (!nest) {
+			index--;
 			goto nla_put_failure;
+		}
 		err = tcf_action_dump_1(skb, p, 0, 0);
 		if (err < 0) {
 			index--;
-- 
cgit 


From cd00edc179863848abab5cc5683de5b7b5f70954 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 26 Mar 2018 12:31:21 -0700
Subject: strparser: Fix sign of err codes

strp_parser_err is called with a negative code everywhere, which then
calls abort_parser with a negative code.  strp_msg_timeout calls
abort_parser directly with a positive code.  Negate ETIMEDOUT
to match signed-ness of other calls.

The default abort_parser callback, strp_abort_strp, sets
sk->sk_err to err.  Also negate the error here so sk_err always
holds a positive value, as the rest of the net code expects.  Currently
a negative sk_err can result in endless loops, or user code that
thinks it actually sent/received err bytes.

Found while testing net/tls_sw recv path.

Fixes: 43a0c6751a322847 ("strparser: Stream parser for messages")
Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/strparser/strparser.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 1fdab5c4eda8..b9283ce5cd85 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -60,7 +60,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
 		struct sock *sk = strp->sk;
 
 		/* Report an error on the lower socket */
-		sk->sk_err = err;
+		sk->sk_err = -err;
 		sk->sk_error_report(sk);
 	}
 }
@@ -458,7 +458,7 @@ static void strp_msg_timeout(struct work_struct *w)
 	/* Message assembly timed out */
 	STRP_STATS_INCR(strp->stats.msg_timeouts);
 	strp->cb.lock(strp);
-	strp->cb.abort_parser(strp, ETIMEDOUT);
+	strp->cb.abort_parser(strp, -ETIMEDOUT);
 	strp->cb.unlock(strp);
 }
 
-- 
cgit 


From b85ab56c3f81c5a24b5a5213374f549df06430da Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 26 Mar 2018 15:08:33 -0700
Subject: llc: properly handle dev_queue_xmit() return value

llc_conn_send_pdu() pushes the skb into write queue and
calls llc_conn_send_pdus() to flush them out. However, the
status of dev_queue_xmit() is not returned to caller,
in this case, llc_conn_state_process().

llc_conn_state_process() needs hold the skb no matter
success or failure, because it still uses it after that,
therefore we should hold skb before dev_queue_xmit() when
that skb is the one being processed by llc_conn_state_process().

For other callers, they can just pass NULL and ignore
the return value as they are.

Reported-by: Noam Rathaus <noamr@beyondsecurity.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc_conn.h |  2 +-
 net/llc/llc_c_ac.c     | 15 +++++++++------
 net/llc/llc_conn.c     | 32 +++++++++++++++++++++++---------
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index fe994d2e5286..5c40f118c0fa 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -103,7 +103,7 @@ void llc_sk_reset(struct sock *sk);
 
 /* Access to a connection */
 int llc_conn_state_process(struct sock *sk, struct sk_buff *skb);
-void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
+int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
 void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index f59648018060..163121192aca 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -389,7 +389,7 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb)
 	llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		llc_conn_send_pdu(sk, skb);
+		rc = llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;
@@ -916,7 +916,7 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
 	llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		llc_conn_send_pdu(sk, skb);
+		rc = llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;
@@ -935,14 +935,17 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
 int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb)
 {
 	struct llc_sock *llc = llc_sk(sk);
+	int ret;
 
 	if (llc->ack_must_be_send) {
-		llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
+		ret = llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
 		llc->ack_must_be_send = 0 ;
 		llc->ack_pf = 0;
-	} else
-		llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
-	return 0;
+	} else {
+		ret = llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
+	}
+
+	return ret;
 }
 
 /**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 9177dbb16dce..110e32bcb399 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -30,7 +30,7 @@
 #endif
 
 static int llc_find_offset(int state, int ev_type);
-static void llc_conn_send_pdus(struct sock *sk);
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
 static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
 static int llc_exec_conn_trans_actions(struct sock *sk,
 				       struct llc_conn_state_trans *trans,
@@ -193,11 +193,11 @@ out_skb_put:
 	return rc;
 }
 
-void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
 {
 	/* queue PDU to send to MAC layer */
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	llc_conn_send_pdus(sk);
+	return llc_conn_send_pdus(sk, skb);
 }
 
 /**
@@ -255,7 +255,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit)
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk);
+	llc_conn_send_pdus(sk, NULL);
 out:;
 }
 
@@ -296,7 +296,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit)
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk);
+	llc_conn_send_pdus(sk, NULL);
 out:;
 }
 
@@ -340,12 +340,16 @@ out:
 /**
  *	llc_conn_send_pdus - Sends queued PDUs
  *	@sk: active connection
+ *	@hold_skb: the skb held by caller, or NULL if does not care
  *
- *	Sends queued pdus to MAC layer for transmission.
+ *	Sends queued pdus to MAC layer for transmission. When @hold_skb is
+ *	NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
+ *	successfully, or 1 for failure.
  */
-static void llc_conn_send_pdus(struct sock *sk)
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
 {
 	struct sk_buff *skb;
+	int ret = 0;
 
 	while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -357,10 +361,20 @@ static void llc_conn_send_pdus(struct sock *sk)
 			skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
 			if (!skb2)
 				break;
-			skb = skb2;
+			dev_queue_xmit(skb2);
+		} else {
+			bool is_target = skb == hold_skb;
+			int rc;
+
+			if (is_target)
+				skb_get(skb);
+			rc = dev_queue_xmit(skb);
+			if (is_target)
+				ret = rc;
 		}
-		dev_queue_xmit(skb);
 	}
+
+	return ret;
 }
 
 /**
-- 
cgit 


From ab6f6dd18ab801fcbbaa05fb5401e91f48778e04 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Tue, 27 Mar 2018 10:43:50 +0200
Subject: net/smc: use announced length in sock_recvmsg()

Not every CLC proposal message needs the maximum buffer length.
Due to the MSG_WAITALL flag, it is important to use the peeked
real length when receiving the message.

Fixes: d63d271ce2b5ce ("smc: switch to sock_recvmsg()")
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_clc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8ac51583a063..15c213250606 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -133,7 +133,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 
 	/* receive the complete CLC message */
 	memset(&msg, 0, sizeof(struct msghdr));
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
 	krflags = MSG_WAITALL;
 	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
 	len = sock_recvmsg(smc->clcsock, &msg, krflags);
-- 
cgit 


From 6e8814ceb7e8f468659ef9253bd212c07ae19584 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 27 Mar 2018 14:41:18 +0300
Subject: net/mlx4_en: Fix mixed PFC and Global pause user control requests

Global pause and PFC configuration should be mutually exclusive (i.e. only
one of them at most can be set). However, once PFC was turned off,
driver automatically turned Global pause on. This is a bug.

Fix the driver behaviour to turn off PFC/Global once the user turned the
other on.

This also fixed a weird behaviour that at a current time, the profile
had both PFC and global pause configuration turned on, which is
Hardware-wise impossible and caused returning false positive indication
to query tools.

In addition, fix error code when setting global pause or PFC to change
metadata only upon successful change.

Also, removed useless debug print.

Fixes: af7d51852631 ("net/mlx4_en: Add DCB PFC support through CEE netlink commands")
Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c  | 72 ++++++++++++++-----------
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 33 +++++++-----
 drivers/net/ethernet/mellanox/mlx4/en_main.c    |  4 +-
 3 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index 1a0c3bf86ead..752a72499b4f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -156,57 +156,63 @@ static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
 static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	struct mlx4_en_port_profile *prof = priv->prof;
 	struct mlx4_en_dev *mdev = priv->mdev;
+	u8 tx_pause, tx_ppp, rx_pause, rx_ppp;
 
 	if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
 		return 1;
 
 	if (priv->cee_config.pfc_state) {
 		int tc;
+		rx_ppp = prof->rx_ppp;
+		tx_ppp = prof->tx_ppp;
 
-		priv->prof->rx_pause = 0;
-		priv->prof->tx_pause = 0;
 		for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) {
 			u8 tc_mask = 1 << tc;
 
 			switch (priv->cee_config.dcb_pfc[tc]) {
 			case pfc_disabled:
-				priv->prof->tx_ppp &= ~tc_mask;
-				priv->prof->rx_ppp &= ~tc_mask;
+				tx_ppp &= ~tc_mask;
+				rx_ppp &= ~tc_mask;
 				break;
 			case pfc_enabled_full:
-				priv->prof->tx_ppp |= tc_mask;
-				priv->prof->rx_ppp |= tc_mask;
+				tx_ppp |= tc_mask;
+				rx_ppp |= tc_mask;
 				break;
 			case pfc_enabled_tx:
-				priv->prof->tx_ppp |= tc_mask;
-				priv->prof->rx_ppp &= ~tc_mask;
+				tx_ppp |= tc_mask;
+				rx_ppp &= ~tc_mask;
 				break;
 			case pfc_enabled_rx:
-				priv->prof->tx_ppp &= ~tc_mask;
-				priv->prof->rx_ppp |= tc_mask;
+				tx_ppp &= ~tc_mask;
+				rx_ppp |= tc_mask;
 				break;
 			default:
 				break;
 			}
 		}
-		en_dbg(DRV, priv, "Set pfc on\n");
+		rx_pause = !!(rx_ppp || tx_ppp) ? 0 : prof->rx_pause;
+		tx_pause = !!(rx_ppp || tx_ppp) ? 0 : prof->tx_pause;
 	} else {
-		priv->prof->rx_pause = 1;
-		priv->prof->tx_pause = 1;
-		en_dbg(DRV, priv, "Set pfc off\n");
+		rx_ppp = 0;
+		tx_ppp = 0;
+		rx_pause = prof->rx_pause;
+		tx_pause = prof->tx_pause;
 	}
 
 	if (mlx4_SET_PORT_general(mdev->dev, priv->port,
 				  priv->rx_skb_size + ETH_FCS_LEN,
-				  priv->prof->tx_pause,
-				  priv->prof->tx_ppp,
-				  priv->prof->rx_pause,
-				  priv->prof->rx_ppp)) {
+				  tx_pause, tx_ppp, rx_pause, rx_ppp)) {
 		en_err(priv, "Failed setting pause params\n");
 		return 1;
 	}
 
+	prof->tx_ppp = tx_ppp;
+	prof->rx_ppp = rx_ppp;
+	prof->tx_pause = tx_pause;
+	prof->rx_pause = rx_pause;
+
 	return 0;
 }
 
@@ -408,6 +414,7 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev,
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_port_profile *prof = priv->prof;
 	struct mlx4_en_dev *mdev = priv->mdev;
+	u32 tx_pause, tx_ppp, rx_pause, rx_ppp;
 	int err;
 
 	en_dbg(DRV, priv, "cap: 0x%x en: 0x%x mbc: 0x%x delay: %d\n",
@@ -416,23 +423,26 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev,
 			pfc->mbc,
 			pfc->delay);
 
-	prof->rx_pause = !pfc->pfc_en;
-	prof->tx_pause = !pfc->pfc_en;
-	prof->rx_ppp = pfc->pfc_en;
-	prof->tx_ppp = pfc->pfc_en;
+	rx_pause = prof->rx_pause && !pfc->pfc_en;
+	tx_pause = prof->tx_pause && !pfc->pfc_en;
+	rx_ppp = pfc->pfc_en;
+	tx_ppp = pfc->pfc_en;
 
 	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
 				    priv->rx_skb_size + ETH_FCS_LEN,
-				    prof->tx_pause,
-				    prof->tx_ppp,
-				    prof->rx_pause,
-				    prof->rx_ppp);
-	if (err)
+				    tx_pause, tx_ppp, rx_pause, rx_ppp);
+	if (err) {
 		en_err(priv, "Failed setting pause params\n");
-	else
-		mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
-						prof->rx_ppp, prof->rx_pause,
-						prof->tx_ppp, prof->tx_pause);
+		return err;
+	}
+
+	mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+					rx_ppp, rx_pause, tx_ppp, tx_pause);
+
+	prof->tx_ppp = tx_ppp;
+	prof->rx_ppp = rx_ppp;
+	prof->rx_pause = rx_pause;
+	prof->tx_pause = tx_pause;
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ebc1f566a4d9..f3302edba8b4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1046,27 +1046,32 @@ static int mlx4_en_set_pauseparam(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	u8 tx_pause, tx_ppp, rx_pause, rx_ppp;
 	int err;
 
 	if (pause->autoneg)
 		return -EINVAL;
 
-	priv->prof->tx_pause = pause->tx_pause != 0;
-	priv->prof->rx_pause = pause->rx_pause != 0;
+	tx_pause = !!(pause->tx_pause);
+	rx_pause = !!(pause->rx_pause);
+	rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause);
+	tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause);
+
 	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
 				    priv->rx_skb_size + ETH_FCS_LEN,
-				    priv->prof->tx_pause,
-				    priv->prof->tx_ppp,
-				    priv->prof->rx_pause,
-				    priv->prof->rx_ppp);
-	if (err)
-		en_err(priv, "Failed setting pause params\n");
-	else
-		mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
-						priv->prof->rx_ppp,
-						priv->prof->rx_pause,
-						priv->prof->tx_ppp,
-						priv->prof->tx_pause);
+				    tx_pause, tx_ppp, rx_pause, rx_ppp);
+	if (err) {
+		en_err(priv, "Failed setting pause params, err = %d\n", err);
+		return err;
+	}
+
+	mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+					rx_ppp, rx_pause, tx_ppp, tx_pause);
+
+	priv->prof->tx_pause = tx_pause;
+	priv->prof->rx_pause = rx_pause;
+	priv->prof->tx_ppp = tx_ppp;
+	priv->prof->rx_ppp = rx_ppp;
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 2c2965497ed3..d25e16d2c319 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -163,9 +163,9 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 		params->udp_rss = 0;
 	}
 	for (i = 1; i <= MLX4_MAX_PORTS; i++) {
-		params->prof[i].rx_pause = 1;
+		params->prof[i].rx_pause = !(pfcrx || pfctx);
 		params->prof[i].rx_ppp = pfcrx;
-		params->prof[i].tx_pause = 1;
+		params->prof[i].tx_pause = !(pfcrx || pfctx);
 		params->prof[i].tx_ppp = pfctx;
 		params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
 		params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
-- 
cgit 


From 461d5f1b59490ce0096dfda45e10038c122a7892 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Tue, 27 Mar 2018 14:41:19 +0300
Subject: net/mlx4_core: Fix memory leak while delete slave's resources

mlx4_delete_all_resources_for_slave in resource tracker should free all
memory allocated for a slave.
While releasing memory of fs_rule, it misses releasing memory of
fs_rule->mirr_mbox.

Fixes: 78efed275117 ('net/mlx4_core: Support mirroring VF DMFS rules on both ports')
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 606a0e0beeae..29e50f787349 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -5088,6 +5088,7 @@ static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave)
 						 &tracker->res_tree[RES_FS_RULE]);
 					list_del(&fs_rule->com.list);
 					spin_unlock_irq(mlx4_tlock(dev));
+					kfree(fs_rule->mirr_mbox);
 					kfree(fs_rule);
 					state = 0;
 					break;
-- 
cgit 


From dc6455a71c7fc5117977e197f67f71b49f27baba Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 27 Mar 2018 20:50:52 +0800
Subject: vhost: correctly remove wait queue during poll failure

We tried to remove vq poll from wait queue, but do not check whether
or not it was in a list before. This will lead double free. Fixing
this by switching to use vhost_poll_stop() which zeros poll->wqh after
removing poll from waitqueue to make sure it won't be freed twice.

Cc: Darren Kenny <darren.kenny@oracle.com>
Reported-by: syzbot+c0272972b01b872e604a@syzkaller.appspotmail.com
Fixes: 2b8b328b61c79 ("vhost_net: handle polling errors when setting backend")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 1b3e8d2d5c8b..5d5a9d9e66d3 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -212,8 +212,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
 	if (mask & EPOLLERR) {
-		if (poll->wqh)
-			remove_wait_queue(poll->wqh, &poll->wait);
+		vhost_poll_stop(poll);
 		ret = -EINVAL;
 	}
 
-- 
cgit 


From b9fc828debc8ac2bb21b5819a44d2aea456f1c95 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@cavium.com>
Date: Tue, 27 Mar 2018 06:34:41 -0700
Subject: qede: Fix barrier usage after tx doorbell write.

Since commit c5ad119fb6c09b0297446be05bd66602fa564758
("net: sched: pfifo_fast use skb_array") driver is exposed
to an issue where it is hitting NULL skbs while handling TX
completions. Driver uses mmiowb() to flush the writes to the
doorbell bar which is a write-combined bar, however on x86
mmiowb() does not flush the write combined buffer.

This patch fixes this problem by replacing mmiowb() with wmb()
after the write combined doorbell write so that writes are
flushed and synchronized from more than one processor.

V1->V2:
-------
This patch was marked as "superseded" in patchwork.
(Not really sure for what reason).Resending it as v2.

Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Manish Chopra <manish.chopra@cavium.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_fp.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index dafc079ab6b9..2e921cab1792 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -320,13 +320,11 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
 	barrier();
 	writel(txq->tx_db.raw, txq->doorbell_addr);
 
-	/* mmiowb is needed to synchronize doorbell writes from more than one
-	 * processor. It guarantees that the write arrives to the device before
-	 * the queue lock is released and another start_xmit is called (possibly
-	 * on another CPU). Without this barrier, the next doorbell can bypass
-	 * this doorbell. This is applicable to IA64/Altix systems.
+	/* Fence required to flush the write combined buffer, since another
+	 * CPU may write to the same doorbell address and data may be lost
+	 * due to relaxed order nature of write combined bar.
 	 */
-	mmiowb();
+	wmb();
 }
 
 static int qede_xdp_xmit(struct qede_dev *edev, struct qede_fastpath *fp,
-- 
cgit 


From 2d2d99ec13f62d5d2cecb6169dfdb6bbe05356d0 Mon Sep 17 00:00:00 2001
From: Raghuram Chary J <raghuramchary.jallipalli@microchip.com>
Date: Tue, 27 Mar 2018 14:51:16 +0530
Subject: lan78xx: Crash in lan78xx_writ_reg (Workqueue: events
 lan78xx_deferred_multicast_write)

Description:
Crash was reported with syzkaller pointing to lan78xx_write_reg routine.

Root-cause:
Proper cleanup of workqueues and init/setup routines was not happening
in failure conditions.

Fix:
Handled the error conditions by cleaning up the queues and init/setup
routines.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Raghuram Chary J <raghuramchary.jallipalli@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 90d176279152..55a78eb96961 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2873,8 +2873,7 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 	if (ret < 0) {
 		netdev_warn(dev->net,
 			    "lan78xx_setup_irq_domain() failed : %d", ret);
-		kfree(pdata);
-		return ret;
+		goto out1;
 	}
 
 	dev->net->hard_header_len += TX_OVERHEAD;
@@ -2882,14 +2881,32 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 
 	/* Init all registers */
 	ret = lan78xx_reset(dev);
+	if (ret) {
+		netdev_warn(dev->net, "Registers INIT FAILED....");
+		goto out2;
+	}
 
 	ret = lan78xx_mdio_init(dev);
+	if (ret) {
+		netdev_warn(dev->net, "MDIO INIT FAILED.....");
+		goto out2;
+	}
 
 	dev->net->flags |= IFF_MULTICAST;
 
 	pdata->wol = WAKE_MAGIC;
 
 	return ret;
+
+out2:
+	lan78xx_remove_irq_domain(dev);
+
+out1:
+	netdev_warn(dev->net, "Bind routine FAILED");
+	cancel_work_sync(&pdata->set_multicast);
+	cancel_work_sync(&pdata->set_vlan);
+	kfree(pdata);
+	return ret;
 }
 
 static void lan78xx_unbind(struct lan78xx_net *dev, struct usb_interface *intf)
@@ -2901,6 +2918,8 @@ static void lan78xx_unbind(struct lan78xx_net *dev, struct usb_interface *intf)
 	lan78xx_remove_mdio(dev);
 
 	if (pdata) {
+		cancel_work_sync(&pdata->set_multicast);
+		cancel_work_sync(&pdata->set_vlan);
 		netif_dbg(dev, ifdown, dev->net, "free pdata");
 		kfree(pdata);
 		pdata = NULL;
-- 
cgit 


From 5568cdc368c349eee7b5fc48bc956234a0828d71 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 29 Mar 2018 11:42:14 -0400
Subject: ip_tunnel: Resolve ipsec merge conflict properly.

We want to use dev_set_mtu() regardless of how we calculate
the mtu value.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index f3db1f35a79d..a7fd1c5a2a14 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1120,14 +1120,14 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 	if (tb[IFLA_MTU]) {
 		unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen;
 
-		dev->mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
-				 (unsigned int)(max - sizeof(struct iphdr)));
-	} else {
-		err = dev_set_mtu(dev, mtu);
-		if (err)
-			goto err_dev_set_mtu;
+		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
+			    (unsigned int)(max - sizeof(struct iphdr)));
 	}
 
+	err = dev_set_mtu(dev, mtu);
+	if (err)
+		goto err_dev_set_mtu;
+
 	ip_tunnel_add(itn, nt);
 	return 0;
 
-- 
cgit 


From f03dbb06dc380274e351ca4b1ee1587ed4529e62 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 27 Mar 2018 11:28:48 -0700
Subject: hv_netvsc: enable multicast if necessary

My recent change to netvsc drive in how receive flags are handled
broke multicast.  The Hyper-v/Azure virtual interface there is not a
multicast filter list, filtering is only all or none. The driver must
enable all multicast if any multicast address is present.

Fixes: 009f766ca238 ("hv_netvsc: filter multicast/broadcast")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/rndis_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index a6ec41c399d6..465c42e30508 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -858,7 +858,7 @@ static void rndis_set_multicast(struct work_struct *w)
 	if (flags & IFF_PROMISC) {
 		filter = NDIS_PACKET_TYPE_PROMISCUOUS;
 	} else {
-		if (flags & IFF_ALLMULTI)
+		if (!netdev_mc_empty(rdev->ndev) || (flags & IFF_ALLMULTI))
 			filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 		if (flags & IFF_BROADCAST)
 			filter |= NDIS_PACKET_TYPE_BROADCAST;
-- 
cgit 


From 58f101bf87e32753342a6924772c6ebb0fbde24a Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@cavium.com>
Date: Wed, 28 Mar 2018 03:35:52 -0700
Subject: qede: Do not drop rx-checksum invalidated packets.

Today, driver drops received packets which are indicated as
invalid checksum by the device. Instead of dropping such packets,
pass them to the stack with CHECKSUM_NONE indication in skb.

Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_fp.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 2e921cab1792..14941303189d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1247,16 +1247,10 @@ static int qede_rx_process_cqe(struct qede_dev *edev,
 
 	csum_flag = qede_check_csum(parse_flag);
 	if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
-		if (qede_pkt_is_ip_fragmented(fp_cqe, parse_flag)) {
+		if (qede_pkt_is_ip_fragmented(fp_cqe, parse_flag))
 			rxq->rx_ip_frags++;
-		} else {
-			DP_NOTICE(edev,
-				  "CQE has error, flags = %x, dropping incoming packet\n",
-				  parse_flag);
+		else
 			rxq->rx_hw_errors++;
-			qede_recycle_rx_bd_ring(rxq, fp_cqe->bd_num);
-			return 0;
-		}
 	}
 
 	/* Basic validation passed; Need to prepare an SKB. This would also
-- 
cgit 


From d65026c6c62e7d9616c8ceb5a53b68bcdc050525 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 29 Mar 2018 16:00:04 +0800
Subject: vhost: validate log when IOTLB is enabled

Vq log_base is the userspace address of bitmap which has nothing to do
with IOTLB. So it needs to be validated unconditionally otherwise we
may try use 0 as log_base which may lead to pin pages that will lead
unexpected result (e.g trigger BUG_ON() in set_bit_to_user()).

Fixes: 6b1e6cc7855b0 ("vhost: new device IOTLB API")
Reported-by: syzbot+6304bf97ef436580fede@syzkaller.appspotmail.com
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5d5a9d9e66d3..5320039671b7 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1244,14 +1244,12 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
 /* Caller should have vq mutex and device mutex */
 int vhost_vq_access_ok(struct vhost_virtqueue *vq)
 {
-	if (vq->iotlb) {
-		/* When device IOTLB was used, the access validation
-		 * will be validated during prefetching.
-		 */
-		return 1;
-	}
-	return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) &&
-		vq_log_access_ok(vq, vq->log_base);
+	int ret = vq_log_access_ok(vq, vq->log_base);
+
+	if (ret || vq->iotlb)
+		return ret;
+
+	return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
 }
 EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
 
-- 
cgit 


From a9645b273e22662ebea563eae334eb3e4fc6614e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 29 Mar 2018 00:18:53 +0100
Subject: atm: iphase: fix spelling mistake: "Receiverd" -> "Received"

Trivial fix to spelling mistake in message text

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/iphase.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 98a3a43484c8..44abb8a0a5e5 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -3147,7 +3147,7 @@ static int ia_proc_read(struct atm_dev *dev,loff_t *pos,char *page)
                            "  Size of Tx Buffer  :  %u\n"
                            "  Number of Rx Buffer:  %u\n"
                            "  Size of Rx Buffer  :  %u\n"
-                           "  Packets Receiverd  :  %u\n"
+                           "  Packets Received   :  %u\n"
                            "  Packets Transmitted:  %u\n"
                            "  Cells Received     :  %u\n"
                            "  Cells Transmitted  :  %u\n"
-- 
cgit 


From ae4745730cf8e693d354ccd4dbaf59ea440c09a9 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Thu, 29 Mar 2018 19:05:29 +0900
Subject: net: Fix untag for vlan packets without ethernet header

In some situation vlan packets do not have ethernet headers. One example
is packets from tun devices. Users can specify vlan protocol in tun_pi
field instead of IP protocol, and skb_vlan_untag() attempts to untag such
packets.

skb_vlan_untag() (more precisely, skb_reorder_vlan_header() called by it)
however did not expect packets without ethernet headers, so in such a case
size argument for memmove() underflowed and triggered crash.

====
BUG: unable to handle kernel paging request at ffff8801cccb8000
IP: __memmove+0x24/0x1a0 arch/x86/lib/memmove_64.S:43
PGD 9cee067 P4D 9cee067 PUD 1d9401063 PMD 1cccb7063 PTE 2810100028101
Oops: 000b [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 17663 Comm: syz-executor2 Not tainted 4.16.0-rc7+ #368
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:__memmove+0x24/0x1a0 arch/x86/lib/memmove_64.S:43
RSP: 0018:ffff8801cc046e28 EFLAGS: 00010287
RAX: ffff8801ccc244c4 RBX: fffffffffffffffe RCX: fffffffffff6c4c2
RDX: fffffffffffffffe RSI: ffff8801cccb7ffc RDI: ffff8801cccb8000
RBP: ffff8801cc046e48 R08: ffff8801ccc244be R09: ffffed0039984899
R10: 0000000000000001 R11: ffffed0039984898 R12: ffff8801ccc244c4
R13: ffff8801ccc244c0 R14: ffff8801d96b7c06 R15: ffff8801d96b7b40
FS:  00007febd562d700(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff8801cccb8000 CR3: 00000001ccb2f006 CR4: 00000000001606e0
DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
Call Trace:
 memmove include/linux/string.h:360 [inline]
 skb_reorder_vlan_header net/core/skbuff.c:5031 [inline]
 skb_vlan_untag+0x470/0xc40 net/core/skbuff.c:5061
 __netif_receive_skb_core+0x119c/0x3460 net/core/dev.c:4460
 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4627
 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4701
 netif_receive_skb+0xae/0x390 net/core/dev.c:4725
 tun_rx_batched.isra.50+0x5ee/0x870 drivers/net/tun.c:1555
 tun_get_user+0x299e/0x3c20 drivers/net/tun.c:1962
 tun_chr_write_iter+0xb9/0x160 drivers/net/tun.c:1990
 call_write_iter include/linux/fs.h:1782 [inline]
 new_sync_write fs/read_write.c:469 [inline]
 __vfs_write+0x684/0x970 fs/read_write.c:482
 vfs_write+0x189/0x510 fs/read_write.c:544
 SYSC_write fs/read_write.c:589 [inline]
 SyS_write+0xef/0x220 fs/read_write.c:581
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x454879
RSP: 002b:00007febd562cc68 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007febd562d6d4 RCX: 0000000000454879
RDX: 0000000000000157 RSI: 0000000020000180 RDI: 0000000000000014
RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000000006b0 R14: 00000000006fc120 R15: 0000000000000000
Code: 90 90 90 90 90 90 90 48 89 f8 48 83 fa 20 0f 82 03 01 00 00 48 39 fe 7d 0f 49 89 f0 49 01 d0 49 39 f8 0f 8f 9f 00 00 00 48 89 d1 <f3> a4 c3 48 81 fa a8 02 00 00 72 05 40 38 fe 74 3b 48 83 ea 20
RIP: __memmove+0x24/0x1a0 arch/x86/lib/memmove_64.S:43 RSP: ffff8801cc046e28
CR2: ffff8801cccb8000
====

We don't need to copy headers for packets which do not have preceding
headers of vlan headers, so skip memmove() in that case.

Fixes: 4bbb3e0e8239 ("net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e7acdc30732..857e4e6f751a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5028,8 +5028,10 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 	}
 
 	mac_len = skb->data - skb_mac_header(skb);
-	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
-		mac_len - VLAN_HLEN - ETH_TLEN);
+	if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
+		memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+			mac_len - VLAN_HLEN - ETH_TLEN);
+	}
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
-- 
cgit 


From c769accdf3d8a103940bea2979b65556718567e9 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Thu, 29 Mar 2018 19:05:30 +0900
Subject: vlan: Fix vlan insertion for packets without ethernet header

In some situation vlan packets do not have ethernet headers. One example
is packets from tun devices. Users can specify vlan protocol in tun_pi
field instead of IP protocol. When we have a vlan device with reorder_hdr
disabled on top of the tun device, such packets from tun devices are
untagged in skb_vlan_untag() and vlan headers will be inserted back in
vlan_insert_inner_tag().

vlan_insert_inner_tag() however did not expect packets without ethernet
headers, so in such a case size argument for memmove() underflowed.

We don't need to copy headers for packets which do not have preceding
headers of vlan headers, so skip memmove() in that case.
Also don't write vlan protocol in skb->data when it does not have enough
room for it.

Fixes: cbe7128c4b92 ("vlan: Fix out of order vlan headers with reorder header off")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index c4a1cff9c768..7d30892da064 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -323,13 +323,24 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
 	skb_push(skb, VLAN_HLEN);
 
 	/* Move the mac header sans proto to the beginning of the new header. */
-	memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
+	if (likely(mac_len > ETH_TLEN))
+		memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
 	skb->mac_header -= VLAN_HLEN;
 
 	veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN);
 
 	/* first, the ethernet type */
-	veth->h_vlan_proto = vlan_proto;
+	if (likely(mac_len >= ETH_TLEN)) {
+		/* h_vlan_encapsulated_proto should already be populated, and
+		 * skb->data has space for h_vlan_proto
+		 */
+		veth->h_vlan_proto = vlan_proto;
+	} else {
+		/* h_vlan_encapsulated_proto should not be populated, and
+		 * skb->data has no space for h_vlan_proto
+		 */
+		veth->h_vlan_encapsulated_proto = skb->protocol;
+	}
 
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
-- 
cgit 


From f97c3dc3c0e8d23a5c4357d182afeef4c67f5c33 Mon Sep 17 00:00:00 2001
From: Tal Gilboa <talgi@mellanox.com>
Date: Thu, 29 Mar 2018 13:53:52 +0300
Subject: net/dim: Fix int overflow

When calculating difference between samples, the values
are multiplied by 100. Large values may cause int overflow
when multiplied (usually on first iteration).
Fixed by forcing 100 to be of type unsigned long.

Fixes: 4c4dbb4a7363 ("net/mlx5e: Move dynamic interrupt coalescing code to include/linux")
Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net_dim.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index bebeaad897cc..29ed8fd6379a 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -231,7 +231,7 @@ static inline void net_dim_exit_parking(struct net_dim *dim)
 }
 
 #define IS_SIGNIFICANT_DIFF(val, ref) \
-	(((100 * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
+	(((100UL * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
 
 static inline int net_dim_stats_compare(struct net_dim_stats *curr,
 					struct net_dim_stats *prev)
-- 
cgit 


From 5807b22c9164a21cd1077a9bc587f0bba361f72d Mon Sep 17 00:00:00 2001
From: David Lebrun <dlebrun@google.com>
Date: Thu, 29 Mar 2018 17:59:36 +0100
Subject: ipv6: sr: fix seg6 encap performances with TSO enabled

Enabling TSO can lead to abysmal performances when using seg6 in
encap mode, such as with the ixgbe driver. This patch adds a call to
iptunnel_handle_offloads() to remove the encapsulation bit if needed.

Before:
root@comp4-seg6bpf:~# iperf3 -c fc00::55
Connecting to host fc00::55, port 5201
[  4] local fc45::4 port 36592 connected to fc00::55 port 5201
[ ID] Interval           Transfer     Bandwidth       Retr  Cwnd
[  4]   0.00-1.00   sec   196 KBytes  1.60 Mbits/sec   47   6.66 KBytes
[  4]   1.00-2.00   sec   304 KBytes  2.49 Mbits/sec  100   5.33 KBytes
[  4]   2.00-3.00   sec   284 KBytes  2.32 Mbits/sec   92   5.33 KBytes

After:
root@comp4-seg6bpf:~# iperf3 -c fc00::55
Connecting to host fc00::55, port 5201
[  4] local fc45::4 port 43062 connected to fc00::55 port 5201
[ ID] Interval           Transfer     Bandwidth       Retr  Cwnd
[  4]   0.00-1.00   sec  1.03 GBytes  8.89 Gbits/sec    0    743 KBytes
[  4]   1.00-2.00   sec  1.03 GBytes  8.87 Gbits/sec    0    743 KBytes
[  4]   2.00-3.00   sec  1.03 GBytes  8.87 Gbits/sec    0    743 KBytes

Reported-by: Tom Herbert <tom@quantonium.net>
Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
Signed-off-by: David Lebrun <dlebrun@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_iptunnel.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 7a78dcfda68a..f343e6f0fc95 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -16,6 +16,7 @@
 #include <linux/net.h>
 #include <linux/module.h>
 #include <net/ip.h>
+#include <net/ip_tunnels.h>
 #include <net/lwtunnel.h>
 #include <net/netevent.h>
 #include <net/netns/generic.h>
@@ -211,11 +212,6 @@ static int seg6_do_srh(struct sk_buff *skb)
 
 	tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
-
 	switch (tinfo->mode) {
 	case SEG6_IPTUN_MODE_INLINE:
 		if (skb->protocol != htons(ETH_P_IPV6))
@@ -224,10 +220,12 @@ static int seg6_do_srh(struct sk_buff *skb)
 		err = seg6_do_srh_inline(skb, tinfo->srh);
 		if (err)
 			return err;
-
-		skb_reset_inner_headers(skb);
 		break;
 	case SEG6_IPTUN_MODE_ENCAP:
+		err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
+		if (err)
+			return err;
+
 		if (skb->protocol == htons(ETH_P_IPV6))
 			proto = IPPROTO_IPV6;
 		else if (skb->protocol == htons(ETH_P_IP))
@@ -239,6 +237,8 @@ static int seg6_do_srh(struct sk_buff *skb)
 		if (err)
 			return err;
 
+		skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+		skb_set_inner_protocol(skb, skb->protocol);
 		skb->protocol = htons(ETH_P_IPV6);
 		break;
 	case SEG6_IPTUN_MODE_L2ENCAP:
@@ -262,8 +262,6 @@ static int seg6_do_srh(struct sk_buff *skb)
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 
-	skb_set_inner_protocol(skb, skb->protocol);
-
 	return 0;
 }
 
-- 
cgit 


From 82dd0d2a9a76fc8fa2b18d80b987d455728bf83a Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 29 Mar 2018 12:49:52 -0700
Subject: vrf: Fix use after free and double free in vrf_finish_output

Miguel reported an skb use after free / double free in vrf_finish_output
when neigh_output returns an error. The vrf driver should return after
the call to neigh_output as it takes over the skb on error path as well.

Patch is a simplified version of Miguel's patch which was written for 4.9,
and updated to top of tree.

Fixes: 8f58336d3f78a ("net: Add ethernet header for pass through VRF device")
Signed-off-by: Miguel Fadon Perlines <mfadon@teldat.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 139c61c8244a..ac40924fe437 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -578,12 +578,13 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 	if (!IS_ERR(neigh)) {
 		sock_confirm_neigh(skb, neigh);
 		ret = neigh_output(neigh, skb);
+		rcu_read_unlock_bh();
+		return ret;
 	}
 
 	rcu_read_unlock_bh();
 err:
-	if (unlikely(ret < 0))
-		vrf_tx_error(skb->dev, skb);
+	vrf_tx_error(skb->dev, skb);
 	return ret;
 }
 
-- 
cgit 


From b6cdbc85234b072340b8923e69f49ec293f905dc Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 29 Mar 2018 17:44:57 -0700
Subject: net/ipv6: Fix route leaking between VRFs

Donald reported that IPv6 route leaking between VRFs is not working.
The root cause is the strict argument in the call to rt6_lookup when
validating the nexthop spec.

ip6_route_check_nh validates the gateway and device (if given) of a
route spec. It in turn could call rt6_lookup (e.g., lookup in a given
table did not succeed so it falls back to a full lookup) and if so
sets the strict argument to 1. That means if the egress device is given,
the route lookup needs to return a result with the same device. This
strict requirement does not work with VRFs (IPv4 or IPv6) because the
oif in the flow struct is overridden with the index of the VRF device
to trigger a match on the l3mdev rule and force the lookup to its table.

The right long term solution is to add an l3mdev index to the flow
struct such that the oif is not overridden. That solution will not
backport well, so this patch aims for a simpler solution to relax the
strict argument if the route spec device is an l3mdev slave. As done
in other places, use the FLOWI_FLAG_SKIP_NH_OIF to know that the
RT6_LOOKUP_F_IFACE flag needs to be removed.

Fixes: ca254490c8df ("net: Add VRF support to IPv6 stack")
Reported-by: Donald Sharp <sharpd@cumulusnetworks.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b33d057ac5eb..fc74352fac12 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -919,6 +919,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	struct rt6_info *rt, *rt_cache;
 	struct fib6_node *fn;
 
+	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+		flags &= ~RT6_LOOKUP_F_IFACE;
+
 	rcu_read_lock();
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-- 
cgit 


From e81b5e01c14add8395dfba7130f8829206bb507d Mon Sep 17 00:00:00 2001
From: Yelena Krivosheev <yelena@marvell.com>
Date: Fri, 30 Mar 2018 12:05:31 +0200
Subject: net: mvneta: fix enable of all initialized RXQs

In mvneta_port_up() we enable relevant RX and TX port queues by write
queues bit map to an appropriate register.

q_map must be ZERO in the beginning of this process.

Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Acked-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 25e9a551cc8c..3f6fb635738c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1132,6 +1132,7 @@ static void mvneta_port_up(struct mvneta_port *pp)
 	}
 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
 
+	q_map = 0;
 	/* Enable all initialized RXQs. */
 	for (queue = 0; queue < rxq_number; queue++) {
 		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
-- 
cgit