From bf1ecd210541ef5f3a110e88e8ca5d33b4aa5c23 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Tue, 31 May 2016 00:16:50 +0300
Subject: cfg80211: Allow cfg80211_connect_result() errors to be distinguished

Previously, the status parameter to cfg80211_connect_result() was
documented as using WLAN_STATUS_UNSPECIFIED_FAILURE (1) when the real
status code for the failure is not known. This value can be used by an
AP (and often is) and as such, user space cannot distinguish between
explicitly rejected authentication/association and not being able to
even try to associate or not receiving a response from the AP.

Add a new inline function, cfg80211_connect_timeout(), to be used when
the driver knows that the connection attempt failed due to a reason
where connection could not be attempt or no response was received from
the AP. The internal functions now allow a negative status value (-1) to
be used as an indication of this special case. This results in the
NL80211_ATTR_TIMED_OUT to be added to the NL80211_CMD_CONNECT event to
allow user space to determine this case was hit. For backwards
compatibility, NL80211_STATUS_CODE with the value
WLAN_STATUS_UNSPECIFIED_FAILURE is still indicated in the event in such
a case.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
[johannes: fix cfg80211_connect_bss() prototype to use int for status,
 add cfg80211_connect_timeout() to docbook, fix docbook]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 53 +++++++++++++++++++++++++++++++++-----------
 include/uapi/linux/nl80211.h |  7 +++++-
 2 files changed, 46 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 63921672bed0..537f010cf5e1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2367,19 +2367,23 @@ struct cfg80211_qos_map {
  *	(invoked with the wireless_dev mutex held)
  *
  * @connect: Connect to the ESS with the specified parameters. When connected,
- *	call cfg80211_connect_result() with status code %WLAN_STATUS_SUCCESS.
- *	If the connection fails for some reason, call cfg80211_connect_result()
- *	with the status from the AP. The driver is allowed to roam to other
- *	BSSes within the ESS when the other BSS matches the connect parameters.
- *	When such roaming is initiated by the driver, the driver is expected to
- *	verify that the target matches the configured security parameters and
- *	to use Reassociation Request frame instead of Association Request frame.
- *	The connect function can also be used to request the driver to perform
- *	a specific roam when connected to an ESS. In that case, the prev_bssid
+ *	call cfg80211_connect_result()/cfg80211_connect_bss() with status code
+ *	%WLAN_STATUS_SUCCESS. If the connection fails for some reason, call
+ *	cfg80211_connect_result()/cfg80211_connect_bss() with the status code
+ *	from the AP or cfg80211_connect_timeout() if no frame with status code
+ *	was received.
+ *	The driver is allowed to roam to other BSSes within the ESS when the
+ *	other BSS matches the connect parameters. When such roaming is initiated
+ *	by the driver, the driver is expected to verify that the target matches
+ *	the configured security parameters and to use Reassociation Request
+ *	frame instead of Association Request frame.
+ *	The connect function can also be used to request the driver to perform a
+ *	specific roam when connected to an ESS. In that case, the prev_bssid
  *	parameter is set to the BSSID of the currently associated BSS as an
- *	indication of requesting reassociation. In both the driver-initiated and
- *	new connect() call initiated roaming cases, the result of roaming is
- *	indicated with a call to cfg80211_roamed() or cfg80211_roamed_bss().
+ *	indication of requesting reassociation.
+ *	In both the driver-initiated and new connect() call initiated roaming
+ *	cases, the result of roaming is indicated with a call to
+ *	cfg80211_roamed() or cfg80211_roamed_bss().
  *	(invoked with the wireless_dev mutex held)
  * @disconnect: Disconnect from the BSS/ESS.
  *	(invoked with the wireless_dev mutex held)
@@ -4680,7 +4684,7 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
 			  struct cfg80211_bss *bss, const u8 *req_ie,
 			  size_t req_ie_len, const u8 *resp_ie,
-			  size_t resp_ie_len, u16 status, gfp_t gfp);
+			  size_t resp_ie_len, int status, gfp_t gfp);
 
 /**
  * cfg80211_connect_result - notify cfg80211 of connection result
@@ -4709,6 +4713,29 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			     resp_ie_len, status, gfp);
 }
 
+/**
+ * cfg80211_connect_timeout - notify cfg80211 of connection timeout
+ *
+ * @dev: network device
+ * @bssid: the BSSID of the AP
+ * @req_ie: association request IEs (maybe be %NULL)
+ * @req_ie_len: association request IEs length
+ * @gfp: allocation flags
+ *
+ * It should be called by the underlying driver whenever connect() has failed
+ * in a sequence where no explicit authentication/association rejection was
+ * received from the AP. This could happen, e.g., due to not being able to send
+ * out the Authentication or Association Request frame or timing out while
+ * waiting for the response.
+ */
+static inline void
+cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid,
+			 const u8 *req_ie, size_t req_ie_len, gfp_t gfp)
+{
+	cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, NULL, 0, -1,
+			     gfp);
+}
+
 /**
  * cfg80211_roamed - notify cfg80211 of roaming
  *
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e23d78685a01..8d995316aadb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -493,7 +493,12 @@
  *	This attribute is ignored if driver does not support roam scan.
  *	It is also sent as an event, with the BSSID and response IEs when the
  *	connection is established or failed to be established. This can be
- *	determined by the STATUS_CODE attribute.
+ *	determined by the %NL80211_ATTR_STATUS_CODE attribute (0 = success,
+ *	non-zero = failure). If %NL80211_ATTR_TIMED_OUT is included in the
+ *	event, the connection attempt failed due to not being able to initiate
+ *	authentication/association or not receiving a response from the AP.
+ *	Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
+ *	well to remain backwards compatible.
  * @NL80211_CMD_ROAM: request that the card roam (currently not implemented),
  *	sent as an event when the card/driver roamed by itself.
  * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
-- 
cgit 


From 019ae3a918811715192b22c400ac78d54acc26a9 Mon Sep 17 00:00:00 2001
From: "Kanchanapally, Vidyullatha" <vkanchan@qti.qualcomm.com>
Date: Mon, 16 May 2016 10:41:04 +0530
Subject: cfg80211: Advertise extended capabilities per interface type to
 userspace

The driver extended capabilities may differ for different
interface types which the userspace needs to know (for
example the fine timing measurement initiator and responder
bits might differ for a station and AP). Add a new nl80211
attribute to provide extended capabilities per interface type
to userspace.

Signed-off-by: Vidyullatha Kanchanapally <vkanchan@qti.qualcomm.com>
Reviewed-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 28 +++++++++++++++++++++++++++-
 include/uapi/linux/nl80211.h |  7 +++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 537f010cf5e1..7bbb00d8b2cd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3083,6 +3083,24 @@ struct wiphy_vendor_command {
 		      unsigned long *storage);
 };
 
+/**
+ * struct wiphy_iftype_ext_capab - extended capabilities per interface type
+ * @iftype: interface type
+ * @extended_capabilities: extended capabilities supported by the driver,
+ *	additional capabilities might be supported by userspace; these are the
+ *	802.11 extended capabilities ("Extended Capabilities element") and are
+ *	in the same format as in the information element. See IEEE Std
+ *	802.11-2012 8.4.2.29 for the defined fields.
+ * @extended_capabilities_mask: mask of the valid values
+ * @extended_capabilities_len: length of the extended capabilities
+ */
+struct wiphy_iftype_ext_capab {
+	enum nl80211_iftype iftype;
+	const u8 *extended_capabilities;
+	const u8 *extended_capabilities_mask;
+	u8 extended_capabilities_len;
+};
+
 /**
  * struct wiphy - wireless hardware description
  * @reg_notifier: the driver's regulatory notification callback,
@@ -3203,9 +3221,14 @@ struct wiphy_vendor_command {
  *	additional capabilities might be supported by userspace; these are
  *	the 802.11 extended capabilities ("Extended Capabilities element")
  *	and are in the same format as in the information element. See
- *	802.11-2012 8.4.2.29 for the defined fields.
+ *	802.11-2012 8.4.2.29 for the defined fields. These are the default
+ *	extended capabilities to be used if the capabilities are not specified
+ *	for a specific interface type in iftype_ext_capab.
  * @extended_capabilities_mask: mask of the valid values
  * @extended_capabilities_len: length of the extended capabilities
+ * @iftype_ext_capab: array of extended capabilities per interface type
+ * @num_iftype_ext_capab: number of interface types for which extended
+ *	capabilities are specified separately.
  * @coalesce: packet coalescing support information
  *
  * @vendor_commands: array of vendor commands supported by the hardware
@@ -3305,6 +3328,9 @@ struct wiphy {
 	const u8 *extended_capabilities, *extended_capabilities_mask;
 	u8 extended_capabilities_len;
 
+	const struct wiphy_iftype_ext_capab *iftype_ext_capab;
+	unsigned int num_iftype_ext_capab;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 8d995316aadb..53c8278827a0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1824,6 +1824,11 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_PAD: attribute used for padding for 64-bit alignment
  *
+ * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes:
+ *	%NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA,
+ *	%NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per
+ *	interface type.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2206,6 +2211,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PAD,
 
+	NL80211_ATTR_IFTYPE_EXT_CAPA,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit 


From 595d0b29463343c3be995d3948930b8231e5b8cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 31 May 2016 15:22:41 -0700
Subject: udp: avoid csum_partial() for validated skb

In commit e6afc8ace6dd5 ("udp: remove headers from UDP packets before
queueing"), udp_csum_pull_header() helper was added but missed fact
that CHECKSUM_UNNECESSARY packets were now converted to CHECKSUM_NONE
and skb->csum_valid was set to 1 for them.

Since csum_partial() is quite expensive, even for 8-byte area, it is
worth adding a test.

We also can use skb->data instead of udp_hdr() as we are pulling
UDP headers, as it is sightly faster.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index ae07f375370d..8894d7144189 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -160,8 +160,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
 
 static inline void udp_csum_pull_header(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_NONE)
-		skb->csum = csum_partial(udp_hdr(skb), sizeof(struct udphdr),
+	if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE)
+		skb->csum = csum_partial(skb->data, sizeof(struct udphdr),
 					 skb->csum);
 	skb_pull_rcsum(skb, sizeof(struct udphdr));
 	UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
-- 
cgit 


From 351a4dedb34cbeb9f747f0e2309e891b6fb906cb Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Thu, 2 Jun 2016 10:23:29 +0300
Subject: qed: Utilize FW 8.10.3.0

The New QED firmware contains several fixes, including:
  - Wrong classification of packets in 4-port devices.
  - Anti-spoof interoperability with encapsulated packets.
  - Tx-switching of encapsulated packets.
It also slightly improves Tx performance of the device.

In addition, this firmware contains the necessary logic for
supporting iscsi & rdma, for which we plan on pushing protocol
drivers in the imminent future.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/common_hsi.h | 205 +++++++++++++++++++++++++++++------------
 include/linux/qed/eth_common.h | 124 +++++++++++++++----------
 include/linux/qed/qed_eth_if.h |   1 +
 3 files changed, 222 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 3f14c7efe68f..285189a5ea6d 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -13,9 +13,19 @@
 
 #define X_FINAL_CLEANUP_AGG_INT 1
 
+/* Queue Zone sizes in bytes */
+#define TSTORM_QZONE_SIZE 8
+#define MSTORM_QZONE_SIZE 0
+#define USTORM_QZONE_SIZE 8
+#define XSTORM_QZONE_SIZE 8
+#define YSTORM_QZONE_SIZE 0
+#define PSTORM_QZONE_SIZE 0
+
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16
+
 #define FW_MAJOR_VERSION	8
-#define FW_MINOR_VERSION	7
-#define FW_REVISION_VERSION	3
+#define FW_MINOR_VERSION	10
+#define FW_REVISION_VERSION	5
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -97,45 +107,86 @@
 #define DQ_XCM_AGG_VAL_SEL_REG6   7
 
 /* XCM agg val selection */
-#define DQ_XCM_ETH_EDPM_NUM_BDS_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD2
-#define DQ_XCM_ETH_TX_BD_CONS_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD3
-#define DQ_XCM_CORE_TX_BD_CONS_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD3
-#define DQ_XCM_ETH_TX_BD_PROD_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD4
-#define DQ_XCM_CORE_TX_BD_PROD_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD4
-#define DQ_XCM_CORE_SPQ_PROD_CMD \
-	DQ_XCM_AGG_VAL_SEL_WORD4
-#define DQ_XCM_ETH_GO_TO_BD_CONS_CMD            DQ_XCM_AGG_VAL_SEL_WORD5
+#define	DQ_XCM_CORE_TX_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
+#define	DQ_XCM_CORE_TX_BD_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
+#define	DQ_XCM_CORE_SPQ_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
+#define	DQ_XCM_ETH_EDPM_NUM_BDS_CMD	DQ_XCM_AGG_VAL_SEL_WORD2
+#define	DQ_XCM_ETH_TX_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
+#define	DQ_XCM_ETH_TX_BD_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
+#define	DQ_XCM_ETH_GO_TO_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD5
+
+/* UCM agg val selection (HW) */
+#define	DQ_UCM_AGG_VAL_SEL_WORD0	0
+#define	DQ_UCM_AGG_VAL_SEL_WORD1	1
+#define	DQ_UCM_AGG_VAL_SEL_WORD2	2
+#define	DQ_UCM_AGG_VAL_SEL_WORD3	3
+#define	DQ_UCM_AGG_VAL_SEL_REG0	4
+#define	DQ_UCM_AGG_VAL_SEL_REG1	5
+#define	DQ_UCM_AGG_VAL_SEL_REG2	6
+#define	DQ_UCM_AGG_VAL_SEL_REG3	7
+
+/* UCM agg val selection (FW) */
+#define DQ_UCM_ETH_PMD_TX_CONS_CMD	DQ_UCM_AGG_VAL_SEL_WORD2
+#define DQ_UCM_ETH_PMD_RX_CONS_CMD	DQ_UCM_AGG_VAL_SEL_WORD3
+#define DQ_UCM_ROCE_CQ_CONS_CMD		DQ_UCM_AGG_VAL_SEL_REG0
+#define DQ_UCM_ROCE_CQ_PROD_CMD		DQ_UCM_AGG_VAL_SEL_REG2
+
+/* TCM agg val selection (HW) */
+#define	DQ_TCM_AGG_VAL_SEL_WORD0	0
+#define	DQ_TCM_AGG_VAL_SEL_WORD1	1
+#define	DQ_TCM_AGG_VAL_SEL_WORD2	2
+#define	DQ_TCM_AGG_VAL_SEL_WORD3	3
+#define	DQ_TCM_AGG_VAL_SEL_REG1		4
+#define	DQ_TCM_AGG_VAL_SEL_REG2		5
+#define	DQ_TCM_AGG_VAL_SEL_REG6		6
+#define	DQ_TCM_AGG_VAL_SEL_REG9		7
+
+/* TCM agg val selection (FW) */
+#define DQ_TCM_L2B_BD_PROD_CMD \
+	DQ_TCM_AGG_VAL_SEL_WORD1
+#define DQ_TCM_ROCE_RQ_PROD_CMD	\
+	DQ_TCM_AGG_VAL_SEL_WORD0
 
 /* XCM agg counter flag selection */
-#define DQ_XCM_AGG_FLG_SHIFT_BIT14  0
-#define DQ_XCM_AGG_FLG_SHIFT_BIT15  1
-#define DQ_XCM_AGG_FLG_SHIFT_CF12   2
-#define DQ_XCM_AGG_FLG_SHIFT_CF13   3
-#define DQ_XCM_AGG_FLG_SHIFT_CF18   4
-#define DQ_XCM_AGG_FLG_SHIFT_CF19   5
-#define DQ_XCM_AGG_FLG_SHIFT_CF22   6
-#define DQ_XCM_AGG_FLG_SHIFT_CF23   7
+#define	DQ_XCM_AGG_FLG_SHIFT_BIT14	0
+#define	DQ_XCM_AGG_FLG_SHIFT_BIT15	1
+#define	DQ_XCM_AGG_FLG_SHIFT_CF12	2
+#define	DQ_XCM_AGG_FLG_SHIFT_CF13	3
+#define	DQ_XCM_AGG_FLG_SHIFT_CF18	4
+#define	DQ_XCM_AGG_FLG_SHIFT_CF19	5
+#define	DQ_XCM_AGG_FLG_SHIFT_CF22	6
+#define	DQ_XCM_AGG_FLG_SHIFT_CF23	7
 
 /* XCM agg counter flag selection */
-#define DQ_XCM_ETH_DQ_CF_CMD		(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_CORE_DQ_CF_CMD		(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_ETH_TERMINATE_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_CORE_TERMINATE_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_ETH_SLOW_PATH_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_CORE_SLOW_PATH_CMD	(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_ETH_TPH_EN_CMD		(1 << \
-					DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_CORE_DQ_CF_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_CORE_TERMINATE_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_CORE_SLOW_PATH_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_DQ_CF_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_ETH_TERMINATE_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ETH_SLOW_PATH_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_TPH_EN_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF23)
+
+/* UCM agg counter flag selection (HW) */
+#define	DQ_UCM_AGG_FLG_SHIFT_CF0	0
+#define	DQ_UCM_AGG_FLG_SHIFT_CF1	1
+#define	DQ_UCM_AGG_FLG_SHIFT_CF3	2
+#define	DQ_UCM_AGG_FLG_SHIFT_CF4	3
+#define	DQ_UCM_AGG_FLG_SHIFT_CF5	4
+#define	DQ_UCM_AGG_FLG_SHIFT_CF6	5
+#define	DQ_UCM_AGG_FLG_SHIFT_RULE0EN	6
+#define	DQ_UCM_AGG_FLG_SHIFT_RULE1EN	7
+
+/* UCM agg counter flag selection (FW) */
+#define DQ_UCM_ETH_PMD_TX_ARM_CMD	(1 << DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ETH_PMD_RX_ARM_CMD	(1 << DQ_UCM_AGG_FLG_SHIFT_CF5)
+
+#define	DQ_REGION_SHIFT	(12)
+
+/* DPM */
+#define	DQ_DPM_WQE_BUFF_SIZE	(320)
+
+/* Conn type ranges */
+#define	DQ_CONN_TYPE_RANGE_SHIFT	(4)
 
 /*****************/
 /* QM CONSTANTS  */
@@ -282,8 +333,6 @@
 	(PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \
 	 PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1)
 
-#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN	12
-#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER	1024
 
 #define PXP_VF_BAR0_START_IGU                   0
 #define PXP_VF_BAR0_IGU_LENGTH                  0x3000
@@ -342,6 +391,9 @@
 
 #define PXP_VF_BAR0_GRC_WINDOW_LENGTH           32
 
+#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN		12
+#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER		1024
+
 /* ILT Records */
 #define PXP_NUM_ILT_RECORDS_BB 7600
 #define PXP_NUM_ILT_RECORDS_K2 11000
@@ -379,6 +431,38 @@ struct async_data {
 	u8	fw_debug_param;
 };
 
+struct coalescing_timeset {
+	u8 value;
+#define	COALESCING_TIMESET_TIMESET_MASK		0x7F
+#define	COALESCING_TIMESET_TIMESET_SHIFT	0
+#define	COALESCING_TIMESET_VALID_MASK		0x1
+#define	COALESCING_TIMESET_VALID_SHIFT		7
+};
+
+struct common_prs_pf_msg_info {
+	__le32 value;
+#define	COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK	0x1
+#define	COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT	0
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK		0x1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT		1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK		0x1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT		2
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK		0x1
+#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT		3
+#define	COMMON_PRS_PF_MSG_INFO_RESERVED_MASK		0xFFFFFFF
+#define	COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT		4
+};
+
+struct common_queue_zone {
+	__le16 ring_drv_data_consumer;
+	__le16 reserved;
+};
+
+struct eth_rx_prod_data {
+	__le16 bd_prod;
+	__le16 cqe_prod;
+};
+
 struct regpair {
 	__le32	lo;
 	__le32	hi;
@@ -388,11 +472,23 @@ struct vf_pf_channel_eqe_data {
 	struct regpair msg_addr;
 };
 
+struct malicious_vf_eqe_data {
+	u8 vf_id;
+	u8 err_id;
+	__le16 reserved[3];
+};
+
+struct initial_cleanup_eqe_data {
+	u8 vf_id;
+	u8 reserved[7];
+};
+
 /* Event Data Union */
 union event_ring_data {
-	u8				bytes[8];
-	struct vf_pf_channel_eqe_data	vf_pf_channel;
-	struct async_data		async_info;
+	u8 bytes[8];
+	struct vf_pf_channel_eqe_data vf_pf_channel;
+	struct malicious_vf_eqe_data malicious_vf;
+	struct initial_cleanup_eqe_data vf_init_cleanup;
 };
 
 /* Event Ring Entry */
@@ -433,6 +529,16 @@ enum protocol_type {
 	MAX_PROTOCOL_TYPE
 };
 
+struct ustorm_eth_queue_zone {
+	struct coalescing_timeset int_coalescing_timeset;
+	u8 reserved[3];
+};
+
+struct ustorm_queue_zone {
+	struct ustorm_eth_queue_zone eth;
+	struct common_queue_zone common;
+};
+
 /* status block structure */
 struct cau_pi_entry {
 	u32 prod;
@@ -683,19 +789,4 @@ struct status_block {
 #define STATUS_BLOCK_ZERO_PAD3_SHIFT  24
 };
 
-struct tunnel_parsing_flags {
-	u8 flags;
-#define TUNNEL_PARSING_FLAGS_TYPE_MASK              0x3
-#define TUNNEL_PARSING_FLAGS_TYPE_SHIFT             0
-#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK  0x1
-#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT 2
-#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK     0x3
-#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT    3
-#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK   0x1
-#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT  5
-#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK     0x1
-#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT    6
-#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK      0x1
-#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT     7
-};
 #endif /* __COMMON_HSI__ */
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index 092cb0c1afcb..b5ebc697d05f 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -12,6 +12,8 @@
 /********************/
 /* ETH FW CONSTANTS */
 /********************/
+#define ETH_HSI_VER_MAJOR                   3
+#define ETH_HSI_VER_MINOR                   0
 #define ETH_CACHE_LINE_SIZE                 64
 
 #define ETH_MAX_RAMROD_PER_CON                          8
@@ -57,19 +59,6 @@
 #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE    6
 #define ETH_TPA_CQE_END_LEN_LIST_SIZE     4
 
-/* Queue Zone sizes */
-#define TSTORM_QZONE_SIZE    0
-#define MSTORM_QZONE_SIZE    sizeof(struct mstorm_eth_queue_zone)
-#define USTORM_QZONE_SIZE    sizeof(struct ustorm_eth_queue_zone)
-#define XSTORM_QZONE_SIZE    0
-#define YSTORM_QZONE_SIZE    sizeof(struct ystorm_eth_queue_zone)
-#define PSTORM_QZONE_SIZE    0
-
-/* Interrupt coalescing TimeSet */
-struct coalescing_timeset {
-	u8	timeset;
-	u8	valid;
-};
 
 struct eth_tx_1st_bd_flags {
 	u8 bitfields;
@@ -97,12 +86,12 @@ struct eth_tx_data_1st_bd {
 	u8				nbds;
 	struct eth_tx_1st_bd_flags	bd_flags;
 	__le16				bitfields;
-#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_MASK  0x1
-#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT 0
+#define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK  0x1
+#define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0
 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK          0x1
 #define ETH_TX_DATA_1ST_BD_RESERVED0_SHIFT         1
-#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_MASK        0x3FFF
-#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_SHIFT       2
+#define ETH_TX_DATA_1ST_BD_PKT_LEN_MASK    0x3FFF
+#define ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT   2
 };
 
 /* The parsing information data for the second tx bd of a given packet. */
@@ -136,28 +125,51 @@ struct eth_tx_data_2nd_bd {
 #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT                13
 };
 
+struct eth_fast_path_cqe_fw_debug {
+	u8 reserved0;
+	u8 reserved1;
+	__le16 reserved2;
+};
+
+/*  tunneling parsing flags */
+struct eth_tunnel_parsing_flags {
+	u8 flags;
+#define	ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK		0x3
+#define	ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT		0
+#define	ETH_TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT	2
+#define	ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK	0x3
+#define	ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT	3
+#define	ETH_TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT	5
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT	6
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK	0x1
+#define	ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT	7
+};
+
 /* Regular ETH Rx FP CQE. */
 struct eth_fast_path_rx_reg_cqe {
-	u8	type;
-	u8	bitfields;
+	u8 type;
+	u8 bitfields;
 #define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK  0x7
 #define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT 0
 #define ETH_FAST_PATH_RX_REG_CQE_TC_MASK             0xF
 #define ETH_FAST_PATH_RX_REG_CQE_TC_SHIFT            3
 #define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_MASK      0x1
 #define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_SHIFT     7
-	__le16				pkt_len;
-	struct parsing_and_err_flags	pars_flags;
-	__le16				vlan_tag;
-	__le32				rss_hash;
-	__le16				len_on_first_bd;
-	u8				placement_offset;
-	struct tunnel_parsing_flags	tunnel_pars_flags;
-	u8				bd_num;
-	u8				reserved[7];
-	u32				fw_debug;
-	u8				reserved1[3];
-	u8				flags;
+	__le16 pkt_len;
+	struct parsing_and_err_flags pars_flags;
+	__le16 vlan_tag;
+	__le32 rss_hash;
+	__le16 len_on_first_bd;
+	u8 placement_offset;
+	struct eth_tunnel_parsing_flags tunnel_pars_flags;
+	u8 bd_num;
+	u8 reserved[7];
+	struct eth_fast_path_cqe_fw_debug fw_debug;
+	u8 reserved1[3];
+	u8 flags;
 #define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK          0x1
 #define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT         0
 #define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK   0x1
@@ -207,11 +219,11 @@ struct eth_fast_path_rx_tpa_start_cqe {
 	__le32	rss_hash;
 	__le16	len_on_first_bd;
 	u8	placement_offset;
-	struct tunnel_parsing_flags tunnel_pars_flags;
+	struct eth_tunnel_parsing_flags tunnel_pars_flags;
 	u8	tpa_agg_index;
 	u8	header_len;
 	__le16	ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE];
-	u32	fw_debug;
+	struct eth_fast_path_cqe_fw_debug fw_debug;
 };
 
 /* The L4 pseudo checksum mode for Ethernet */
@@ -264,12 +276,25 @@ enum eth_rx_cqe_type {
 	MAX_ETH_RX_CQE_TYPE
 };
 
-/* ETH Rx producers data */
-struct eth_rx_prod_data {
-	__le16	bd_prod;
-	__le16	cqe_prod;
-	__le16	reserved;
-	__le16	reserved1;
+enum eth_rx_tunn_type {
+	ETH_RX_NO_TUNN,
+	ETH_RX_TUNN_GENEVE,
+	ETH_RX_TUNN_GRE,
+	ETH_RX_TUNN_VXLAN,
+	MAX_ETH_RX_TUNN_TYPE
+};
+
+/*  Aggregation end reason. */
+enum eth_tpa_end_reason {
+	ETH_AGG_END_UNUSED,
+	ETH_AGG_END_SP_UPDATE,
+	ETH_AGG_END_MAX_LEN,
+	ETH_AGG_END_LAST_SEG,
+	ETH_AGG_END_TIMEOUT,
+	ETH_AGG_END_NOT_CONSISTENT,
+	ETH_AGG_END_OUT_OF_ORDER,
+	ETH_AGG_END_NON_TPA_SEG,
+	MAX_ETH_TPA_END_REASON
 };
 
 /* The first tx bd of a given packet */
@@ -337,21 +362,18 @@ union eth_tx_bd_types {
 };
 
 /* Mstorm Queue Zone */
-struct mstorm_eth_queue_zone {
-	struct eth_rx_prod_data rx_producers;
-	__le32			reserved[2];
-};
-
-/* Ustorm Queue Zone */
-struct ustorm_eth_queue_zone {
-	struct coalescing_timeset	int_coalescing_timeset;
-	__le16				reserved[3];
+enum eth_tx_tunn_type {
+	ETH_TX_TUNN_GENEVE,
+	ETH_TX_TUNN_TTAG,
+	ETH_TX_TUNN_GRE,
+	ETH_TX_TUNN_VXLAN,
+	MAX_ETH_TX_TUNN_TYPE
 };
 
 /* Ystorm Queue Zone */
-struct ystorm_eth_queue_zone {
-	struct coalescing_timeset	int_coalescing_timeset;
-	__le16				reserved[3];
+struct xstorm_eth_queue_zone {
+	struct coalescing_timeset int_coalescing_timeset;
+	u8 reserved[7];
 };
 
 /* ETH doorbell data */
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 6ae8cb4a61d3..f8ff71126d9e 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -113,6 +113,7 @@ struct qed_queue_start_common_params {
 	u8 vport_id;
 	u16 sb;
 	u16 sb_idx;
+	u16 vf_qid;
 };
 
 struct qed_tunn_params {
-- 
cgit 


From 3953c46c3ac7eef31a9935427371c6f54a22f1ba Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 2 Jun 2016 15:05:40 -0300
Subject: sk_buff: allow segmenting based on frag sizes

This patch allows segmenting a skb based on its frags sizes instead of
based on a fixed value.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ee38a4127475..329a0a9ef671 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -301,6 +301,11 @@ struct sk_buff;
 #endif
 extern int sysctl_max_skb_frags;
 
+/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
+ * segment using its current segmentation instead.
+ */
+#define GSO_BY_FRAGS	0xFFFF
+
 typedef struct skb_frag_struct skb_frag_t;
 
 struct skb_frag_struct {
-- 
cgit 


From ae7ef81ef000adeee7a87585b9135ff8a8064acc Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 2 Jun 2016 15:05:41 -0300
Subject: skbuff: introduce skb_gso_validate_mtu

skb_gso_network_seglen is not enough for checking fragment sizes if
skb is using GSO_BY_FRAGS as we have to check frag per frag.

This patch introduces skb_gso_validate_mtu, based on the former, which
will wrap the use case inside it as all calls to skb_gso_network_seglen
were to validate if it fits on a given TMU, and improve the check.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 329a0a9ef671..aa3f9d7e8d5c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2992,6 +2992,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
 unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
+bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 int skb_ensure_writable(struct sk_buff *skb, int write_len);
-- 
cgit 


From 90017accff61ae89283ad9a51f9ac46ca01633fb Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 2 Jun 2016 15:05:43 -0300
Subject: sctp: Add GSO support

SCTP has this pecualiarity that its packets cannot be just segmented to
(P)MTU. Its chunks must be contained in IP segments, padding respected.
So we can't just generate a big skb, set gso_size to the fragmentation
point and deliver it to IP layer.

This patch takes a different approach. SCTP will now build a skb as it
would be if it was received using GRO. That is, there will be a cover
skb with protocol headers and children ones containing the actual
segments, already segmented to a way that respects SCTP RFCs.

With that, we can tell skb_segment() to just split based on frag_list,
trusting its sizes are already in accordance.

This way SCTP can benefit from GSO and instead of passing several
packets through the stack, it can pass a single large packet.

v2:
- Added support for receiving GSO frames, as requested by Dave Miller.
- Clear skb->cb if packet is GSO (otherwise it's not used by SCTP)
- Added heuristics similar to what we have in TCP for not generating
  single GSO packets that fills cwnd.
v3:
- consider sctphdr size in skb_gso_transport_seglen()
- rebased due to 5c7cdf339af5 ("gso: Remove arbitrary checks for
  unsupported GSO")

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 7 +++++--
 include/linux/netdevice.h       | 1 +
 include/linux/skbuff.h          | 2 ++
 include/net/sctp/sctp.h         | 4 ++++
 include/net/sctp/structs.h      | 5 +++++
 5 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index aa7b2400f98c..9c6c8ef2e9e7 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -53,8 +53,9 @@ enum {
 					 *     headers in software.
 					 */
 	NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
+	NETIF_F_GSO_SCTP_BIT,		/* ... SCTP fragmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
-		NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
+		NETIF_F_GSO_SCTP_BIT,
 
 	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
 	NETIF_F_SCTP_CRC_BIT,		/* SCTP checksum offload */
@@ -128,6 +129,7 @@ enum {
 #define NETIF_F_TSO_MANGLEID	__NETIF_F(TSO_MANGLEID)
 #define NETIF_F_GSO_PARTIAL	 __NETIF_F(GSO_PARTIAL)
 #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
+#define NETIF_F_GSO_SCTP	__NETIF_F(GSO_SCTP)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
@@ -166,7 +168,8 @@ enum {
 				 NETIF_F_FSO)
 
 /* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_UFO)
+#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_UFO | \
+				 NETIF_F_GSO_SCTP)
 
 /*
  * If one device supports one of these features, then enable them
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f45929ce8157..fa6df2699532 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_SCTP    != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
 
 	return (features & feature) == feature;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aa3f9d7e8d5c..dc0fca747c5e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -487,6 +487,8 @@ enum {
 	SKB_GSO_PARTIAL = 1 << 13,
 
 	SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+
+	SKB_GSO_SCTP = 1 << 15,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index b392ac8382f2..632e205ca54b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net);
 int sctp_remaddr_proc_init(struct net *net);
 void sctp_remaddr_proc_exit(struct net *net);
 
+/*
+ * sctp/offload.c
+ */
+int sctp_offload_init(void);
 
 /*
  * Module global variables
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 16b013a6191c..83c5ec58b93a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -566,6 +566,9 @@ struct sctp_chunk {
 	/* This points to the sk_buff containing the actual data.  */
 	struct sk_buff *skb;
 
+	/* In case of GSO packets, this will store the head one */
+	struct sk_buff *head_skb;
+
 	/* These are the SCTP headers by reverse order in a packet.
 	 * Note that some of these may happen more than once.  In that
 	 * case, we point at the "current" one, whatever that means
@@ -696,6 +699,8 @@ struct sctp_packet {
 	size_t overhead;
 	/* This is the total size of all chunks INCLUDING padding.  */
 	size_t size;
+	/* This is the maximum size this packet may have */
+	size_t max_size;
 
 	/* The packet is destined for this transport address.
 	 * The function we finally use to pass down to the next lower
-- 
cgit 


From a91eb52abb504a1dd3248a5d07b54e7f95d5fcf1 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Fri, 3 Jun 2016 14:35:32 +0300
Subject: qed: Revisit chain implementation

RoCE driver is going to need a 32-bit chain [current chain implementation
for qed* currently supports only 16-bit producer/consumer chains].

This patch adds said support, as well as doing other slight tweaks and
modifications to qed's chain API.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_chain.h | 553 +++++++++++++++++++++++++-----------------
 include/linux/qed/qed_if.h    |   3 +-
 2 files changed, 336 insertions(+), 220 deletions(-)

(limited to 'include')

diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 5f8fcaaa6504..eceaa9ed2ae9 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -47,16 +47,56 @@ enum qed_chain_use_mode {
 	QED_CHAIN_USE_TO_CONSUME_PRODUCE,	/* Chain starts empty */
 };
 
+enum qed_chain_cnt_type {
+	/* The chain's size/prod/cons are kept in 16-bit variables */
+	QED_CHAIN_CNT_TYPE_U16,
+
+	/* The chain's size/prod/cons are kept in 32-bit variables  */
+	QED_CHAIN_CNT_TYPE_U32,
+};
+
 struct qed_chain_next {
 	struct regpair	next_phys;
 	void		*next_virt;
 };
 
+struct qed_chain_pbl_u16 {
+	u16 prod_page_idx;
+	u16 cons_page_idx;
+};
+
+struct qed_chain_pbl_u32 {
+	u32 prod_page_idx;
+	u32 cons_page_idx;
+};
+
 struct qed_chain_pbl {
+	/* Base address of a pre-allocated buffer for pbl */
 	dma_addr_t	p_phys_table;
 	void		*p_virt_table;
-	u16		prod_page_idx;
-	u16		cons_page_idx;
+
+	/* Table for keeping the virtual addresses of the chain pages,
+	 * respectively to the physical addresses in the pbl table.
+	 */
+	void **pp_virt_addr_tbl;
+
+	/* Index to current used page by producer/consumer */
+	union {
+		struct qed_chain_pbl_u16 pbl16;
+		struct qed_chain_pbl_u32 pbl32;
+	} u;
+};
+
+struct qed_chain_u16 {
+	/* Cyclic index of next element to produce/consme */
+	u16 prod_idx;
+	u16 cons_idx;
+};
+
+struct qed_chain_u32 {
+	/* Cyclic index of next element to produce/consme */
+	u32 prod_idx;
+	u32 cons_idx;
 };
 
 struct qed_chain {
@@ -64,13 +104,25 @@ struct qed_chain {
 	dma_addr_t		p_phys_addr;
 	void			*p_prod_elem;
 	void			*p_cons_elem;
-	u16			page_cnt;
+
 	enum qed_chain_mode	mode;
 	enum qed_chain_use_mode intended_use; /* used to produce/consume */
-	u16			capacity; /*< number of _usable_ elements */
-	u16			size; /* number of elements */
-	u16			prod_idx;
-	u16			cons_idx;
+	enum qed_chain_cnt_type cnt_type;
+
+	union {
+		struct qed_chain_u16 chain16;
+		struct qed_chain_u32 chain32;
+	} u;
+
+	u32 page_cnt;
+
+	/* Number of elements - capacity is for usable elements only,
+	 * while size will contain total number of elements [for entire chain].
+	 */
+	u32 capacity;
+	u32 size;
+
+	/* Elements information for fast calculations */
 	u16			elem_per_page;
 	u16			elem_per_page_mask;
 	u16			elem_unusable;
@@ -96,66 +148,69 @@ struct qed_chain {
 #define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \
 	DIV_ROUND_UP(elem_cnt, USABLE_ELEMS_PER_PAGE(elem_size, mode))
 
+#define is_chain_u16(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16)
+#define is_chain_u32(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32)
+
 /* Accessors */
 static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain)
 {
-	return p_chain->prod_idx;
+	return p_chain->u.chain16.prod_idx;
 }
 
 static inline u16 qed_chain_get_cons_idx(struct qed_chain *p_chain)
 {
-	return p_chain->cons_idx;
+	return p_chain->u.chain16.cons_idx;
+}
+
+static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain)
+{
+	return p_chain->u.chain32.cons_idx;
 }
 
 static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain)
 {
 	u16 used;
 
-	/* we don't need to trancate upon assignmet, as we assign u32->u16 */
-	used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) -
-		(u32)p_chain->cons_idx;
+	used = (u16) (((u32)0x10000 +
+		       (u32)p_chain->u.chain16.prod_idx) -
+		      (u32)p_chain->u.chain16.cons_idx);
 	if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
-		used -= p_chain->prod_idx / p_chain->elem_per_page -
-			p_chain->cons_idx / p_chain->elem_per_page;
+		used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page -
+		    p_chain->u.chain16.cons_idx / p_chain->elem_per_page;
 
-	return p_chain->capacity - used;
+	return (u16)(p_chain->capacity - used);
 }
 
-static inline u8 qed_chain_is_full(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain)
 {
-	return qed_chain_get_elem_left(p_chain) == p_chain->capacity;
-}
+	u32 used;
 
-static inline u8 qed_chain_is_empty(struct qed_chain *p_chain)
-{
-	return qed_chain_get_elem_left(p_chain) == 0;
-}
+	used = (u32) (((u64)0x100000000ULL +
+		       (u64)p_chain->u.chain32.prod_idx) -
+		      (u64)p_chain->u.chain32.cons_idx);
+	if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
+		used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page -
+		    p_chain->u.chain32.cons_idx / p_chain->elem_per_page;
 
-static inline u16 qed_chain_get_elem_per_page(
-	struct qed_chain *p_chain)
-{
-	return p_chain->elem_per_page;
+	return p_chain->capacity - used;
 }
 
-static inline u16 qed_chain_get_usable_per_page(
-	struct qed_chain *p_chain)
+static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain)
 {
 	return p_chain->usable_per_page;
 }
 
-static inline u16 qed_chain_get_unusable_per_page(
-	struct qed_chain *p_chain)
+static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
 {
 	return p_chain->elem_unusable;
 }
 
-static inline u16 qed_chain_get_size(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain)
 {
-	return p_chain->size;
+	return p_chain->page_cnt;
 }
 
-static inline dma_addr_t
-qed_chain_get_pbl_phys(struct qed_chain *p_chain)
+static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 {
 	return p_chain->pbl.p_phys_table;
 }
@@ -172,64 +227,62 @@ qed_chain_get_pbl_phys(struct qed_chain *p_chain)
  */
 static inline void
 qed_chain_advance_page(struct qed_chain *p_chain,
-		       void **p_next_elem,
-		       u16 *idx_to_inc,
-		       u16 *page_to_inc)
+		       void **p_next_elem, void *idx_to_inc, void *page_to_inc)
 
 {
+	struct qed_chain_next *p_next = NULL;
+	u32 page_index = 0;
 	switch (p_chain->mode) {
 	case QED_CHAIN_MODE_NEXT_PTR:
-	{
-		struct qed_chain_next *p_next = *p_next_elem;
+		p_next = *p_next_elem;
 		*p_next_elem = p_next->next_virt;
-		*idx_to_inc += p_chain->elem_unusable;
+		if (is_chain_u16(p_chain))
+			*(u16 *)idx_to_inc += p_chain->elem_unusable;
+		else
+			*(u32 *)idx_to_inc += p_chain->elem_unusable;
 		break;
-	}
 	case QED_CHAIN_MODE_SINGLE:
 		*p_next_elem = p_chain->p_virt_addr;
 		break;
 
 	case QED_CHAIN_MODE_PBL:
-		/* It is assumed pages are sequential, next element needs
-		 * to change only when passing going back to first from last.
-		 */
-		if (++(*page_to_inc) == p_chain->page_cnt) {
-			*page_to_inc = 0;
-			*p_next_elem = p_chain->p_virt_addr;
+		if (is_chain_u16(p_chain)) {
+			if (++(*(u16 *)page_to_inc) == p_chain->page_cnt)
+				*(u16 *)page_to_inc = 0;
+			page_index = *(u16 *)page_to_inc;
+		} else {
+			if (++(*(u32 *)page_to_inc) == p_chain->page_cnt)
+				*(u32 *)page_to_inc = 0;
+			page_index = *(u32 *)page_to_inc;
 		}
+		*p_next_elem = p_chain->pbl.pp_virt_addr_tbl[page_index];
 	}
 }
 
 #define is_unusable_idx(p, idx)	\
-	(((p)->idx & (p)->elem_per_page_mask) == (p)->usable_per_page)
+	(((p)->u.chain16.idx & (p)->elem_per_page_mask) == (p)->usable_per_page)
+
+#define is_unusable_idx_u32(p, idx) \
+	(((p)->u.chain32.idx & (p)->elem_per_page_mask) == (p)->usable_per_page)
+#define is_unusable_next_idx(p, idx)				 \
+	((((p)->u.chain16.idx + 1) & (p)->elem_per_page_mask) == \
+	 (p)->usable_per_page)
 
-#define is_unusable_next_idx(p, idx) \
-	((((p)->idx + 1) & (p)->elem_per_page_mask) == (p)->usable_per_page)
+#define is_unusable_next_idx_u32(p, idx)			 \
+	((((p)->u.chain32.idx + 1) & (p)->elem_per_page_mask) == \
+	 (p)->usable_per_page)
 
-#define test_ans_skip(p, idx)				\
+#define test_and_skip(p, idx)						   \
 	do {						\
-		if (is_unusable_idx(p, idx)) {		\
-			(p)->idx += (p)->elem_unusable;	\
+		if (is_chain_u16(p)) {					   \
+			if (is_unusable_idx(p, idx))			   \
+				(p)->u.chain16.idx += (p)->elem_unusable;  \
+		} else {						   \
+			if (is_unusable_idx_u32(p, idx))		   \
+				(p)->u.chain32.idx += (p)->elem_unusable;  \
 		}					\
 	} while (0)
 
-/**
- * @brief qed_chain_return_multi_produced -
- *
- * A chain in which the driver "Produces" elements should use this API
- * to indicate previous produced elements are now consumed.
- *
- * @param p_chain
- * @param num
- */
-static inline void
-qed_chain_return_multi_produced(struct qed_chain *p_chain,
-				u16 num)
-{
-	p_chain->cons_idx += num;
-	test_ans_skip(p_chain, cons_idx);
-}
-
 /**
  * @brief qed_chain_return_produced -
  *
@@ -240,8 +293,11 @@ qed_chain_return_multi_produced(struct qed_chain *p_chain,
  */
 static inline void qed_chain_return_produced(struct qed_chain *p_chain)
 {
-	p_chain->cons_idx++;
-	test_ans_skip(p_chain, cons_idx);
+	if (is_chain_u16(p_chain))
+		p_chain->u.chain16.cons_idx++;
+	else
+		p_chain->u.chain32.cons_idx++;
+	test_and_skip(p_chain, cons_idx);
 }
 
 /**
@@ -257,21 +313,33 @@ static inline void qed_chain_return_produced(struct qed_chain *p_chain)
  */
 static inline void *qed_chain_produce(struct qed_chain *p_chain)
 {
-	void *ret = NULL;
-
-	if ((p_chain->prod_idx & p_chain->elem_per_page_mask) ==
-	    p_chain->next_page_mask) {
-		qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
-				       &p_chain->prod_idx,
-				       &p_chain->pbl.prod_page_idx);
+	void *p_ret = NULL, *p_prod_idx, *p_prod_page_idx;
+
+	if (is_chain_u16(p_chain)) {
+		if ((p_chain->u.chain16.prod_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_prod_idx = &p_chain->u.chain16.prod_idx;
+			p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx;
+			qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
+					       p_prod_idx, p_prod_page_idx);
+		}
+		p_chain->u.chain16.prod_idx++;
+	} else {
+		if ((p_chain->u.chain32.prod_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_prod_idx = &p_chain->u.chain32.prod_idx;
+			p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx;
+			qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
+					       p_prod_idx, p_prod_page_idx);
+		}
+		p_chain->u.chain32.prod_idx++;
 	}
 
-	ret = p_chain->p_prod_elem;
-	p_chain->prod_idx++;
+	p_ret = p_chain->p_prod_elem;
 	p_chain->p_prod_elem = (void *)(((u8 *)p_chain->p_prod_elem) +
 					p_chain->elem_size);
 
-	return ret;
+	return p_ret;
 }
 
 /**
@@ -282,9 +350,9 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
  * @param p_chain
  * @param num
  *
- * @return u16, number of unusable BDs
+ * @return number of unusable BDs
  */
-static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain)
 {
 	return p_chain->capacity;
 }
@@ -297,11 +365,13 @@ static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain)
  *
  * @param p_chain
  */
-static inline void
-qed_chain_recycle_consumed(struct qed_chain *p_chain)
+static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain)
 {
-	test_ans_skip(p_chain, prod_idx);
-	p_chain->prod_idx++;
+	test_and_skip(p_chain, prod_idx);
+	if (is_chain_u16(p_chain))
+		p_chain->u.chain16.prod_idx++;
+	else
+		p_chain->u.chain32.prod_idx++;
 }
 
 /**
@@ -316,21 +386,33 @@ qed_chain_recycle_consumed(struct qed_chain *p_chain)
  */
 static inline void *qed_chain_consume(struct qed_chain *p_chain)
 {
-	void *ret = NULL;
-
-	if ((p_chain->cons_idx & p_chain->elem_per_page_mask) ==
-	    p_chain->next_page_mask) {
+	void *p_ret = NULL, *p_cons_idx, *p_cons_page_idx;
+
+	if (is_chain_u16(p_chain)) {
+		if ((p_chain->u.chain16.cons_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_cons_idx = &p_chain->u.chain16.cons_idx;
+			p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx;
+			qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
+					       p_cons_idx, p_cons_page_idx);
+		}
+		p_chain->u.chain16.cons_idx++;
+	} else {
+		if ((p_chain->u.chain32.cons_idx &
+		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
+			p_cons_idx = &p_chain->u.chain32.cons_idx;
+			p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx;
 		qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
-				       &p_chain->cons_idx,
-				       &p_chain->pbl.cons_page_idx);
+					       p_cons_idx, p_cons_page_idx);
+		}
+		p_chain->u.chain32.cons_idx++;
 	}
 
-	ret = p_chain->p_cons_elem;
-	p_chain->cons_idx++;
+	p_ret = p_chain->p_cons_elem;
 	p_chain->p_cons_elem = (void *)(((u8 *)p_chain->p_cons_elem) +
 					p_chain->elem_size);
 
-	return ret;
+	return p_ret;
 }
 
 /**
@@ -340,16 +422,33 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
  */
 static inline void qed_chain_reset(struct qed_chain *p_chain)
 {
-	int i;
-
-	p_chain->prod_idx	= 0;
-	p_chain->cons_idx	= 0;
-	p_chain->p_cons_elem	= p_chain->p_virt_addr;
-	p_chain->p_prod_elem	= p_chain->p_virt_addr;
+	u32 i;
+
+	if (is_chain_u16(p_chain)) {
+		p_chain->u.chain16.prod_idx = 0;
+		p_chain->u.chain16.cons_idx = 0;
+	} else {
+		p_chain->u.chain32.prod_idx = 0;
+		p_chain->u.chain32.cons_idx = 0;
+	}
+	p_chain->p_cons_elem = p_chain->p_virt_addr;
+	p_chain->p_prod_elem = p_chain->p_virt_addr;
 
 	if (p_chain->mode == QED_CHAIN_MODE_PBL) {
-		p_chain->pbl.prod_page_idx	= p_chain->page_cnt - 1;
-		p_chain->pbl.cons_page_idx	= p_chain->page_cnt - 1;
+		/* Use (page_cnt - 1) as a reset value for the prod/cons page's
+		 * indices, to avoid unnecessary page advancing on the first
+		 * call to qed_chain_produce/consume. Instead, the indices
+		 * will be advanced to page_cnt and then will be wrapped to 0.
+		 */
+		u32 reset_val = p_chain->page_cnt - 1;
+
+		if (is_chain_u16(p_chain)) {
+			p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val;
+			p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val;
+		} else {
+			p_chain->pbl.u.pbl32.prod_page_idx = reset_val;
+			p_chain->pbl.u.pbl32.cons_page_idx = reset_val;
+		}
 	}
 
 	switch (p_chain->intended_use) {
@@ -377,168 +476,184 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
  * @param intended_use
  * @param mode
  */
-static inline void qed_chain_init(struct qed_chain *p_chain,
-				  void *p_virt_addr,
-				  dma_addr_t p_phys_addr,
-				  u16 page_cnt,
-				  u8 elem_size,
-				  enum qed_chain_use_mode intended_use,
-				  enum qed_chain_mode mode)
+static inline void qed_chain_init_params(struct qed_chain *p_chain,
+					 u32 page_cnt,
+					 u8 elem_size,
+					 enum qed_chain_use_mode intended_use,
+					 enum qed_chain_mode mode,
+					 enum qed_chain_cnt_type cnt_type)
 {
 	/* chain fixed parameters */
-	p_chain->p_virt_addr	= p_virt_addr;
-	p_chain->p_phys_addr	= p_phys_addr;
+	p_chain->p_virt_addr = NULL;
+	p_chain->p_phys_addr = 0;
 	p_chain->elem_size	= elem_size;
-	p_chain->page_cnt	= page_cnt;
+	p_chain->intended_use = intended_use;
 	p_chain->mode		= mode;
+	p_chain->cnt_type = cnt_type;
 
-	p_chain->intended_use		= intended_use;
 	p_chain->elem_per_page		= ELEMS_PER_PAGE(elem_size);
-	p_chain->usable_per_page =
-		USABLE_ELEMS_PER_PAGE(elem_size, mode);
-	p_chain->capacity		= p_chain->usable_per_page * page_cnt;
-	p_chain->size			= p_chain->elem_per_page * page_cnt;
+	p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode);
 	p_chain->elem_per_page_mask	= p_chain->elem_per_page - 1;
-
 	p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode);
-
 	p_chain->next_page_mask = (p_chain->usable_per_page &
 				   p_chain->elem_per_page_mask);
 
-	if (mode == QED_CHAIN_MODE_NEXT_PTR) {
-		struct qed_chain_next	*p_next;
-		u16			i;
-
-		for (i = 0; i < page_cnt - 1; i++) {
-			/* Increment mem_phy to the next page. */
-			p_phys_addr += QED_CHAIN_PAGE_SIZE;
-
-			/* Initialize the physical address of the next page. */
-			p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
-							   elem_size *
-							   p_chain->
-							   usable_per_page);
-
-			p_next->next_phys.lo	= DMA_LO_LE(p_phys_addr);
-			p_next->next_phys.hi	= DMA_HI_LE(p_phys_addr);
-
-			/* Initialize the virtual address of the next page. */
-			p_next->next_virt = (void *)((u8 *)p_virt_addr +
-						     QED_CHAIN_PAGE_SIZE);
-
-			/* Move to the next page. */
-			p_virt_addr = p_next->next_virt;
-		}
-
-		/* Last page's next should point to beginning of the chain */
-		p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
-						   elem_size *
-						   p_chain->usable_per_page);
+	p_chain->page_cnt = page_cnt;
+	p_chain->capacity = p_chain->usable_per_page * page_cnt;
+	p_chain->size = p_chain->elem_per_page * page_cnt;
 
-		p_next->next_phys.lo	= DMA_LO_LE(p_chain->p_phys_addr);
-		p_next->next_phys.hi	= DMA_HI_LE(p_chain->p_phys_addr);
-		p_next->next_virt	= p_chain->p_virt_addr;
-	}
-	qed_chain_reset(p_chain);
+	p_chain->pbl.p_phys_table = 0;
+	p_chain->pbl.p_virt_table = NULL;
+	p_chain->pbl.pp_virt_addr_tbl = NULL;
 }
 
 /**
- * @brief qed_chain_pbl_init - Initalizes a basic pbl chain
- *        struct
+ * @brief qed_chain_init_mem -
+ *
+ * Initalizes a basic chain struct with its chain buffers
+ *
  * @param p_chain
  * @param p_virt_addr	virtual address of allocated buffer's beginning
  * @param p_phys_addr	physical address of allocated buffer's beginning
- * @param page_cnt	number of pages in the allocated buffer
- * @param elem_size	size of each element in the chain
- * @param use_mode
- * @param p_phys_pbl	pointer to a pre-allocated side table
- *                      which will hold physical page addresses.
- * @param p_virt_pbl	pointer to a pre allocated side table
- *                      which will hold virtual page addresses.
+ *
  */
-static inline void
-qed_chain_pbl_init(struct qed_chain *p_chain,
-		   void *p_virt_addr,
-		   dma_addr_t p_phys_addr,
-		   u16 page_cnt,
-		   u8 elem_size,
-		   enum qed_chain_use_mode use_mode,
-		   dma_addr_t p_phys_pbl,
-		   dma_addr_t *p_virt_pbl)
+static inline void qed_chain_init_mem(struct qed_chain *p_chain,
+				      void *p_virt_addr, dma_addr_t p_phys_addr)
 {
-	dma_addr_t *p_pbl_dma = p_virt_pbl;
-	int i;
-
-	qed_chain_init(p_chain, p_virt_addr, p_phys_addr, page_cnt,
-		       elem_size, use_mode, QED_CHAIN_MODE_PBL);
+	p_chain->p_virt_addr = p_virt_addr;
+	p_chain->p_phys_addr = p_phys_addr;
+}
 
+/**
+ * @brief qed_chain_init_pbl_mem -
+ *
+ * Initalizes a basic chain struct with its pbl buffers
+ *
+ * @param p_chain
+ * @param p_virt_pbl	pointer to a pre allocated side table which will hold
+ *                      virtual page addresses.
+ * @param p_phys_pbl	pointer to a pre-allocated side table which will hold
+ *                      physical page addresses.
+ * @param pp_virt_addr_tbl
+ *                      pointer to a pre-allocated side table which will hold
+ *                      the virtual addresses of the chain pages.
+ *
+ */
+static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain,
+					  void *p_virt_pbl,
+					  dma_addr_t p_phys_pbl,
+					  void **pp_virt_addr_tbl)
+{
 	p_chain->pbl.p_phys_table = p_phys_pbl;
 	p_chain->pbl.p_virt_table = p_virt_pbl;
-
-	/* Fill the PBL with physical addresses*/
-	for (i = 0; i < page_cnt; i++) {
-		*p_pbl_dma = p_phys_addr;
-		p_phys_addr += QED_CHAIN_PAGE_SIZE;
-		p_pbl_dma++;
-	}
+	p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl;
 }
 
 /**
- * @brief qed_chain_set_prod - sets the prod to the given
- *        value
+ * @brief qed_chain_init_next_ptr_elem -
+ *
+ * Initalizes a next pointer element
+ *
+ * @param p_chain
+ * @param p_virt_curr	virtual address of a chain page of which the next
+ *                      pointer element is initialized
+ * @param p_virt_next	virtual address of the next chain page
+ * @param p_phys_next	physical address of the next chain page
  *
- * @param prod_idx
- * @param p_prod_elem
  */
-static inline void qed_chain_set_prod(struct qed_chain *p_chain,
-				      u16 prod_idx,
-				      void *p_prod_elem)
+static inline void
+qed_chain_init_next_ptr_elem(struct qed_chain *p_chain,
+			     void *p_virt_curr,
+			     void *p_virt_next, dma_addr_t p_phys_next)
 {
-	p_chain->prod_idx	= prod_idx;
-	p_chain->p_prod_elem	= p_prod_elem;
+	struct qed_chain_next *p_next;
+	u32 size;
+
+	size = p_chain->elem_size * p_chain->usable_per_page;
+	p_next = (struct qed_chain_next *)((u8 *)p_virt_curr + size);
+
+	DMA_REGPAIR_LE(p_next->next_phys, p_phys_next);
+
+	p_next->next_virt = p_virt_next;
 }
 
 /**
- * @brief qed_chain_get_elem -
+ * @brief qed_chain_get_last_elem -
  *
- * get a pointer to an element represented by absolute idx
+ * Returns a pointer to the last element of the chain
  *
  * @param p_chain
- * @assumption p_chain->size is a power of 2
  *
- * @return void*, a pointer to next element
+ * @return void*
  */
-static inline void *qed_chain_sge_get_elem(struct qed_chain *p_chain,
-					   u16 idx)
+static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain)
 {
-	void *ret = NULL;
-
-	if (idx >= p_chain->size)
-		return NULL;
+	struct qed_chain_next *p_next = NULL;
+	void *p_virt_addr = NULL;
+	u32 size, last_page_idx;
 
-	ret = (u8 *)p_chain->p_virt_addr + p_chain->elem_size * idx;
+	if (!p_chain->p_virt_addr)
+		goto out;
 
-	return ret;
+	switch (p_chain->mode) {
+	case QED_CHAIN_MODE_NEXT_PTR:
+		size = p_chain->elem_size * p_chain->usable_per_page;
+		p_virt_addr = p_chain->p_virt_addr;
+		p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + size);
+		while (p_next->next_virt != p_chain->p_virt_addr) {
+			p_virt_addr = p_next->next_virt;
+			p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
+							   size);
+		}
+		break;
+	case QED_CHAIN_MODE_SINGLE:
+		p_virt_addr = p_chain->p_virt_addr;
+		break;
+	case QED_CHAIN_MODE_PBL:
+		last_page_idx = p_chain->page_cnt - 1;
+		p_virt_addr = p_chain->pbl.pp_virt_addr_tbl[last_page_idx];
+		break;
+	}
+	/* p_virt_addr points at this stage to the last page of the chain */
+	size = p_chain->elem_size * (p_chain->usable_per_page - 1);
+	p_virt_addr = (u8 *)p_virt_addr + size;
+out:
+	return p_virt_addr;
 }
 
 /**
- * @brief qed_chain_sge_inc_cons_prod
+ * @brief qed_chain_set_prod - sets the prod to the given value
  *
- * for sge chains, producer isn't increased serially, the ring
- * is expected to be full at all times. Once elements are
- * consumed, they are immediately produced.
+ * @param prod_idx
+ * @param p_prod_elem
+ */
+static inline void qed_chain_set_prod(struct qed_chain *p_chain,
+				      u32 prod_idx, void *p_prod_elem)
+{
+	if (is_chain_u16(p_chain))
+		p_chain->u.chain16.prod_idx = (u16) prod_idx;
+	else
+		p_chain->u.chain32.prod_idx = prod_idx;
+	p_chain->p_prod_elem = p_prod_elem;
+}
+
+/**
+ * @brief qed_chain_pbl_zero_mem - set chain memory to 0
  *
  * @param p_chain
- * @param cnt
- *
- * @return inline void
  */
-static inline void
-qed_chain_sge_inc_cons_prod(struct qed_chain *p_chain,
-			    u16 cnt)
+static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain)
 {
-	p_chain->prod_idx += cnt;
-	p_chain->cons_idx += cnt;
+	u32 i, page_cnt;
+
+	if (p_chain->mode != QED_CHAIN_MODE_PBL)
+		return;
+
+	page_cnt = qed_chain_get_page_cnt(p_chain);
+
+	for (i = 0; i < page_cnt; i++)
+		memset(p_chain->pbl.pp_virt_addr_tbl[i], 0,
+		       QED_CHAIN_PAGE_SIZE);
 }
 
 #endif
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 4c29439f54bf..15efccfdc46e 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -325,7 +325,8 @@ struct qed_common_ops {
 	int		(*chain_alloc)(struct qed_dev *cdev,
 				       enum qed_chain_use_mode intended_use,
 				       enum qed_chain_mode mode,
-				       u16 num_elems,
+				       enum qed_chain_cnt_type cnt_type,
+				       u32 num_elems,
 				       size_t elem_size,
 				       struct qed_chain *p_chain);
 
-- 
cgit 


From 7a9b6b8f6e4c52b31830d32570c2a226e27651f9 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Fri, 3 Jun 2016 14:35:33 +0300
Subject: qed: Add common HSI for new protocols

This adds the qed portion of the RoCE & iSCSI firmware HSI,
as well as adding several new common HSI files which would be required
by both qed and qed* protocols.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/common_hsi.h     |  190 ++++-
 include/linux/qed/iscsi_common.h   | 1439 ++++++++++++++++++++++++++++++++++++
 include/linux/qed/rdma_common.h    |   44 ++
 include/linux/qed/roce_common.h    |   17 +
 include/linux/qed/storage_common.h |   91 +++
 include/linux/qed/tcp_common.h     |  226 ++++++
 6 files changed, 2006 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/qed/iscsi_common.h
 create mode 100644 include/linux/qed/rdma_common.h
 create mode 100644 include/linux/qed/roce_common.h
 create mode 100644 include/linux/qed/storage_common.h
 create mode 100644 include/linux/qed/tcp_common.h

(limited to 'include')

diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 285189a5ea6d..077a3b6cc80f 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -12,6 +12,7 @@
 #define CORE_SPQE_PAGE_SIZE_BYTES                       4096
 
 #define X_FINAL_CLEANUP_AGG_INT 1
+#define NUM_OF_GLOBAL_QUEUES                            128
 
 /* Queue Zone sizes in bytes */
 #define TSTORM_QZONE_SIZE 8
@@ -694,7 +695,10 @@ struct parsing_and_err_flags {
 #define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT         15
 };
 
-/* Concrete Function ID. */
+struct pb_context {
+	__le32 crc[4];
+};
+
 struct pxp_concrete_fid {
 	__le16 fid;
 #define PXP_CONCRETE_FID_PFID_MASK     0xF
@@ -761,6 +765,72 @@ struct pxp_ptt_entry {
 };
 
 /* RSS hash type */
+struct rdif_task_context {
+	__le32 initial_ref_tag;
+	__le16 app_tag_value;
+	__le16 app_tag_mask;
+	u8 flags0;
+#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK            0x1
+#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT           0
+#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK      0x1
+#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT     1
+#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK           0x1
+#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT          2
+#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK         0x1
+#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT        3
+#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK          0x3
+#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT         4
+#define RDIF_TASK_CONTEXT_CRC_SEED_MASK                0x1
+#define RDIF_TASK_CONTEXT_CRC_SEED_SHIFT               6
+#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK         0x1
+#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT        7
+	u8 partial_dif_data[7];
+	__le16 partial_crc_value;
+	__le16 partial_checksum_value;
+	__le32 offset_in_io;
+	__le16 flags1;
+#define RDIF_TASK_CONTEXT_VALIDATEGUARD_MASK           0x1
+#define RDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT          0
+#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK          0x1
+#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT         1
+#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK          0x1
+#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT         2
+#define RDIF_TASK_CONTEXT_FORWARDGUARD_MASK            0x1
+#define RDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT           3
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT          4
+#define RDIF_TASK_CONTEXT_FORWARDREFTAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT          5
+#define RDIF_TASK_CONTEXT_INTERVALSIZE_MASK            0x7
+#define RDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT           6
+#define RDIF_TASK_CONTEXT_HOSTINTERFACE_MASK           0x3
+#define RDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT          9
+#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK           0x1
+#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT          11
+#define RDIF_TASK_CONTEXT_RESERVED0_MASK               0x1
+#define RDIF_TASK_CONTEXT_RESERVED0_SHIFT              12
+#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK        0x1
+#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT       13
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK   0x1
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT  14
+#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK   0x1
+#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT  15
+	__le16 state;
+#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_MASK    0xF
+#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_SHIFT   0
+#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_MASK  0xF
+#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_SHIFT 4
+#define RDIF_TASK_CONTEXT_ERRORINIO_MASK               0x1
+#define RDIF_TASK_CONTEXT_ERRORINIO_SHIFT              8
+#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK        0x1
+#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT       9
+#define RDIF_TASK_CONTEXT_REFTAGMASK_MASK              0xF
+#define RDIF_TASK_CONTEXT_REFTAGMASK_SHIFT             10
+#define RDIF_TASK_CONTEXT_RESERVED1_MASK               0x3
+#define RDIF_TASK_CONTEXT_RESERVED1_SHIFT              14
+	__le32 reserved2;
+};
+
 enum rss_hash_type {
 	RSS_HASH_TYPE_DEFAULT	= 0,
 	RSS_HASH_TYPE_IPV4	= 1,
@@ -789,4 +859,122 @@ struct status_block {
 #define STATUS_BLOCK_ZERO_PAD3_SHIFT  24
 };
 
+struct tdif_task_context {
+	__le32 initial_ref_tag;
+	__le16 app_tag_value;
+	__le16 app_tag_mask;
+	__le16 partial_crc_valueB;
+	__le16 partial_checksum_valueB;
+	__le16 stateB;
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_SHIFT   0
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_SHIFT 4
+#define TDIF_TASK_CONTEXT_ERRORINIOB_MASK               0x1
+#define TDIF_TASK_CONTEXT_ERRORINIOB_SHIFT              8
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK         0x1
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT        9
+#define TDIF_TASK_CONTEXT_RESERVED0_MASK                0x3F
+#define TDIF_TASK_CONTEXT_RESERVED0_SHIFT               10
+	u8 reserved1;
+	u8 flags0;
+#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK             0x1
+#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT            0
+#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK       0x1
+#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT      1
+#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK            0x1
+#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT           2
+#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK          0x1
+#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT         3
+#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK           0x3
+#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT          4
+#define TDIF_TASK_CONTEXT_CRC_SEED_MASK                 0x1
+#define TDIF_TASK_CONTEXT_CRC_SEED_SHIFT                6
+#define TDIF_TASK_CONTEXT_RESERVED2_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED2_SHIFT               7
+	__le32 flags1;
+#define TDIF_TASK_CONTEXT_VALIDATEGUARD_MASK            0x1
+#define TDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT           0
+#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK           0x1
+#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT          1
+#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK           0x1
+#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT          2
+#define TDIF_TASK_CONTEXT_FORWARDGUARD_MASK             0x1
+#define TDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT            3
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK            0x1
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT           4
+#define TDIF_TASK_CONTEXT_FORWARDREFTAG_MASK            0x1
+#define TDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT           5
+#define TDIF_TASK_CONTEXT_INTERVALSIZE_MASK             0x7
+#define TDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT            6
+#define TDIF_TASK_CONTEXT_HOSTINTERFACE_MASK            0x3
+#define TDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT           9
+#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK            0x1
+#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT           11
+#define TDIF_TASK_CONTEXT_RESERVED3_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED3_SHIFT               12
+#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK         0x1
+#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT        13
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_SHIFT   14
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_SHIFT 18
+#define TDIF_TASK_CONTEXT_ERRORINIOA_MASK               0x1
+#define TDIF_TASK_CONTEXT_ERRORINIOA_SHIFT              22
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_MASK        0x1
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_SHIFT       23
+#define TDIF_TASK_CONTEXT_REFTAGMASK_MASK               0xF
+#define TDIF_TASK_CONTEXT_REFTAGMASK_SHIFT              24
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK    0x1
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT   28
+#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK    0x1
+#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT   29
+#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK          0x1
+#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT         30
+#define TDIF_TASK_CONTEXT_RESERVED4_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED4_SHIFT               31
+	__le32 offset_in_iob;
+	__le16 partial_crc_value_a;
+	__le16 partial_checksum_valuea_;
+	__le32 offset_in_ioa;
+	u8 partial_dif_data_a[8];
+	u8 partial_dif_data_b[8];
+};
+
+struct timers_context {
+	__le32 logical_client0;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK     0xFFFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC0_MASK              0x1
+#define TIMERS_CONTEXT_VALIDLC0_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC0_MASK             0x1
+#define TIMERS_CONTEXT_ACTIVELC0_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED0_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED0_SHIFT            30
+	__le32 logical_client1;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK     0xFFFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC1_MASK              0x1
+#define TIMERS_CONTEXT_VALIDLC1_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC1_MASK             0x1
+#define TIMERS_CONTEXT_ACTIVELC1_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED1_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED1_SHIFT            30
+	__le32 logical_client2;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK     0xFFFFFFF
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC2_MASK              0x1
+#define TIMERS_CONTEXT_VALIDLC2_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC2_MASK             0x1
+#define TIMERS_CONTEXT_ACTIVELC2_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED2_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED2_SHIFT            30
+	__le32 host_expiration_fields;
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK  0xFFFFFFF
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT 0
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK  0x1
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT 28
+#define TIMERS_CONTEXT_RESERVED3_MASK             0x7
+#define TIMERS_CONTEXT_RESERVED3_SHIFT            29
+};
 #endif /* __COMMON_HSI__ */
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
new file mode 100644
index 000000000000..b3c0feb15ae9
--- /dev/null
+++ b/include/linux/qed/iscsi_common.h
@@ -0,0 +1,1439 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __ISCSI_COMMON__
+#define __ISCSI_COMMON__
+/**********************/
+/* ISCSI FW CONSTANTS */
+/**********************/
+
+/* iSCSI HSI constants */
+#define ISCSI_DEFAULT_MTU       (1500)
+
+/* Current iSCSI HSI version number composed of two fields (16 bit) */
+#define ISCSI_HSI_MAJOR_VERSION (0)
+#define ISCSI_HSI_MINOR_VERSION (0)
+
+/* KWQ (kernel work queue) layer codes */
+#define ISCSI_SLOW_PATH_LAYER_CODE   (6)
+
+/* CQE completion status */
+#define ISCSI_EQE_COMPLETION_SUCCESS (0x0)
+#define ISCSI_EQE_RST_CONN_RCVD (0x1)
+
+/* iSCSI parameter defaults */
+#define ISCSI_DEFAULT_HEADER_DIGEST         (0)
+#define ISCSI_DEFAULT_DATA_DIGEST           (0)
+#define ISCSI_DEFAULT_INITIAL_R2T           (1)
+#define ISCSI_DEFAULT_IMMEDIATE_DATA        (1)
+#define ISCSI_DEFAULT_MAX_PDU_LENGTH        (0x2000)
+#define ISCSI_DEFAULT_FIRST_BURST_LENGTH    (0x10000)
+#define ISCSI_DEFAULT_MAX_BURST_LENGTH      (0x40000)
+#define ISCSI_DEFAULT_MAX_OUTSTANDING_R2T   (1)
+
+/* iSCSI parameter limits */
+#define ISCSI_MIN_VAL_MAX_PDU_LENGTH        (0x200)
+#define ISCSI_MAX_VAL_MAX_PDU_LENGTH        (0xffffff)
+#define ISCSI_MIN_VAL_BURST_LENGTH          (0x200)
+#define ISCSI_MAX_VAL_BURST_LENGTH          (0xffffff)
+#define ISCSI_MIN_VAL_MAX_OUTSTANDING_R2T   (1)
+#define ISCSI_MAX_VAL_MAX_OUTSTANDING_R2T   (0xff)
+
+/* iSCSI reserved params */
+#define ISCSI_ITT_ALL_ONES	(0xffffffff)
+#define ISCSI_TTT_ALL_ONES	(0xffffffff)
+
+#define ISCSI_OPTION_1_OFF_CHIP_TCP 1
+#define ISCSI_OPTION_2_ON_CHIP_TCP 2
+
+#define ISCSI_INITIATOR_MODE 0
+#define ISCSI_TARGET_MODE 1
+
+/* iSCSI request op codes */
+#define ISCSI_OPCODE_NOP_OUT_NO_IMM                     (0)
+#define ISCSI_OPCODE_NOP_OUT                            ( \
+		ISCSI_OPCODE_NOP_OUT_NO_IMM | 0x40)
+#define ISCSI_OPCODE_SCSI_CMD_NO_IMM            (1)
+#define ISCSI_OPCODE_SCSI_CMD                           ( \
+		ISCSI_OPCODE_SCSI_CMD_NO_IMM | 0x40)
+#define ISCSI_OPCODE_TMF_REQUEST_NO_IMM         (2)
+#define ISCSI_OPCODE_TMF_REQUEST                        ( \
+		ISCSI_OPCODE_TMF_REQUEST_NO_IMM | 0x40)
+#define ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM       (3)
+#define ISCSI_OPCODE_LOGIN_REQUEST                      ( \
+		ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM | 0x40)
+#define ISCSI_OPCODE_TEXT_REQUEST_NO_IMM        (4)
+#define ISCSI_OPCODE_TEXT_REQUEST                       ( \
+		ISCSI_OPCODE_TEXT_REQUEST_NO_IMM | 0x40)
+#define ISCSI_OPCODE_DATA_OUT                           (5)
+#define ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM      (6)
+#define ISCSI_OPCODE_LOGOUT_REQUEST                     ( \
+		ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM | 0x40)
+
+/* iSCSI response/messages op codes */
+#define ISCSI_OPCODE_NOP_IN             (0x20)
+#define ISCSI_OPCODE_SCSI_RESPONSE      (0x21)
+#define ISCSI_OPCODE_TMF_RESPONSE       (0x22)
+#define ISCSI_OPCODE_LOGIN_RESPONSE     (0x23)
+#define ISCSI_OPCODE_TEXT_RESPONSE      (0x24)
+#define ISCSI_OPCODE_DATA_IN            (0x25)
+#define ISCSI_OPCODE_LOGOUT_RESPONSE    (0x26)
+#define ISCSI_OPCODE_R2T                (0x31)
+#define ISCSI_OPCODE_ASYNC_MSG          (0x32)
+#define ISCSI_OPCODE_REJECT             (0x3f)
+
+/* iSCSI stages */
+#define ISCSI_STAGE_SECURITY_NEGOTIATION            (0)
+#define ISCSI_STAGE_LOGIN_OPERATIONAL_NEGOTIATION   (1)
+#define ISCSI_STAGE_FULL_FEATURE_PHASE              (3)
+
+/* iSCSI CQE errors */
+#define CQE_ERROR_BITMAP_DATA_DIGEST          (0x08)
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN  (0x10)
+#define CQE_ERROR_BITMAP_DATA_TRUNCATED       (0x20)
+
+struct cqe_error_bitmap {
+	u8 cqe_error_status_bits;
+#define CQE_ERROR_BITMAP_DIF_ERR_BITS_MASK         0x7
+#define CQE_ERROR_BITMAP_DIF_ERR_BITS_SHIFT        0
+#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_MASK      0x1
+#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_SHIFT     3
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_MASK  0x1
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_SHIFT 4
+#define CQE_ERROR_BITMAP_DATA_TRUNCATED_ERR_MASK   0x1
+#define CQE_ERROR_BITMAP_DATA_TRUNCATED_ERR_SHIFT  5
+#define CQE_ERROR_BITMAP_UNDER_RUN_ERR_MASK        0x1
+#define CQE_ERROR_BITMAP_UNDER_RUN_ERR_SHIFT       6
+#define CQE_ERROR_BITMAP_RESERVED2_MASK            0x1
+#define CQE_ERROR_BITMAP_RESERVED2_SHIFT           7
+};
+
+union cqe_error_status {
+	u8 error_status;
+	struct cqe_error_bitmap error_bits;
+};
+
+struct data_hdr {
+	__le32 data[12];
+};
+
+struct iscsi_async_msg_hdr {
+	__le16 reserved0;
+	u8 flags_attr;
+#define ISCSI_ASYNC_MSG_HDR_RSRV_MASK           0x7F
+#define ISCSI_ASYNC_MSG_HDR_RSRV_SHIFT          0
+#define ISCSI_ASYNC_MSG_HDR_CONST1_MASK         0x1
+#define ISCSI_ASYNC_MSG_HDR_CONST1_SHIFT        7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_ASYNC_MSG_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_ASYNC_MSG_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_ASYNC_MSG_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_ASYNC_MSG_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 all_ones;
+	__le32 reserved1;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le16 param1_rsrv;
+	u8 async_vcode;
+	u8 async_event;
+	__le16 param3_rsrv;
+	__le16 param2_rsrv;
+	__le32 reserved7;
+};
+
+struct iscsi_sge {
+	struct regpair sge_addr;
+	__le16 sge_len;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct iscsi_cached_sge_ctx {
+	struct iscsi_sge sge;
+	struct regpair reserved;
+	__le32 dsgl_curr_offset[2];
+};
+
+struct iscsi_cmd_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_CMD_HDR_ATTR_MASK           0x7
+#define ISCSI_CMD_HDR_ATTR_SHIFT          0
+#define ISCSI_CMD_HDR_RSRV_MASK           0x3
+#define ISCSI_CMD_HDR_RSRV_SHIFT          3
+#define ISCSI_CMD_HDR_WRITE_MASK          0x1
+#define ISCSI_CMD_HDR_WRITE_SHIFT         5
+#define ISCSI_CMD_HDR_READ_MASK           0x1
+#define ISCSI_CMD_HDR_READ_SHIFT          6
+#define ISCSI_CMD_HDR_FINAL_MASK          0x1
+#define ISCSI_CMD_HDR_FINAL_SHIFT         7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_CMD_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_CMD_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_CMD_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_CMD_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 expected_transfer_length;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 cdb[4];
+};
+
+struct iscsi_common_hdr {
+	u8 hdr_status;
+	u8 hdr_response;
+	u8 hdr_flags;
+	u8 hdr_first_byte;
+#define ISCSI_COMMON_HDR_OPCODE_MASK         0x3F
+#define ISCSI_COMMON_HDR_OPCODE_SHIFT        0
+#define ISCSI_COMMON_HDR_IMM_MASK            0x1
+#define ISCSI_COMMON_HDR_IMM_SHIFT           6
+#define ISCSI_COMMON_HDR_RSRV_MASK           0x1
+#define ISCSI_COMMON_HDR_RSRV_SHIFT          7
+	__le32 hdr_second_dword;
+#define ISCSI_COMMON_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_COMMON_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 lun_reserved[4];
+	__le32 data[6];
+};
+
+struct iscsi_conn_offload_params {
+	struct regpair sq_pbl_addr;
+	struct regpair r2tq_pbl_addr;
+	struct regpair xhq_pbl_addr;
+	struct regpair uhq_pbl_addr;
+	__le32 initial_ack;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	u8 flags;
+#define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_MASK  0x1
+#define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_SHIFT 0
+#define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_MASK     0x1
+#define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_SHIFT    1
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK       0x3F
+#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT      2
+	u8 pbl_page_size_log;
+	u8 pbe_page_size_log;
+	u8 default_cq;
+	__le32 stat_sn;
+};
+
+struct iscsi_slow_path_hdr {
+	u8 op_code;
+	u8 flags;
+#define ISCSI_SLOW_PATH_HDR_RESERVED0_MASK   0xF
+#define ISCSI_SLOW_PATH_HDR_RESERVED0_SHIFT  0
+#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_MASK  0x7
+#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_SHIFT 4
+#define ISCSI_SLOW_PATH_HDR_RESERVED1_MASK   0x1
+#define ISCSI_SLOW_PATH_HDR_RESERVED1_SHIFT  7
+};
+
+struct iscsi_conn_update_ramrod_params {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	u8 flags;
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_MASK           0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_SHIFT          0
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DD_EN_MASK           0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DD_EN_SHIFT          1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_MASK     0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_SHIFT    2
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_MASK  0x1
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_SHIFT 3
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK       0xF
+#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT      4
+	u8 reserved0[3];
+	__le32 max_seq_size;
+	__le32 max_send_pdu_length;
+	__le32 max_recv_pdu_length;
+	__le32 first_seq_length;
+	__le32 exp_stat_sn;
+};
+
+struct iscsi_ext_cdb_cmd_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_EXT_CDB_CMD_HDR_ATTR_MASK          0x7
+#define ISCSI_EXT_CDB_CMD_HDR_ATTR_SHIFT         0
+#define ISCSI_EXT_CDB_CMD_HDR_RSRV_MASK          0x3
+#define ISCSI_EXT_CDB_CMD_HDR_RSRV_SHIFT         3
+#define ISCSI_EXT_CDB_CMD_HDR_WRITE_MASK         0x1
+#define ISCSI_EXT_CDB_CMD_HDR_WRITE_SHIFT        5
+#define ISCSI_EXT_CDB_CMD_HDR_READ_MASK          0x1
+#define ISCSI_EXT_CDB_CMD_HDR_READ_SHIFT         6
+#define ISCSI_EXT_CDB_CMD_HDR_FINAL_MASK         0x1
+#define ISCSI_EXT_CDB_CMD_HDR_FINAL_SHIFT        7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_EXT_CDB_CMD_HDR_DATA_SEG_LEN_MASK  0xFFFFFF
+#define ISCSI_EXT_CDB_CMD_HDR_DATA_SEG_LEN_SHIFT 0
+#define ISCSI_EXT_CDB_CMD_HDR_CDB_SIZE_MASK      0xFF
+#define ISCSI_EXT_CDB_CMD_HDR_CDB_SIZE_SHIFT     24
+	struct regpair lun;
+	__le32 itt;
+	__le32 expected_transfer_length;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	struct iscsi_sge cdb_sge;
+};
+
+struct iscsi_login_req_hdr {
+	u8 version_min;
+	u8 version_max;
+	u8 flags_attr;
+#define ISCSI_LOGIN_REQ_HDR_NSG_MASK            0x3
+#define ISCSI_LOGIN_REQ_HDR_NSG_SHIFT           0
+#define ISCSI_LOGIN_REQ_HDR_CSG_MASK            0x3
+#define ISCSI_LOGIN_REQ_HDR_CSG_SHIFT           2
+#define ISCSI_LOGIN_REQ_HDR_RSRV_MASK           0x3
+#define ISCSI_LOGIN_REQ_HDR_RSRV_SHIFT          4
+#define ISCSI_LOGIN_REQ_HDR_C_MASK              0x1
+#define ISCSI_LOGIN_REQ_HDR_C_SHIFT             6
+#define ISCSI_LOGIN_REQ_HDR_T_MASK              0x1
+#define ISCSI_LOGIN_REQ_HDR_T_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 isid_TABC;
+	__le16 tsih;
+	__le16 isid_d;
+	__le32 itt;
+	__le16 reserved1;
+	__le16 cid;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved2[4];
+};
+
+struct iscsi_logout_req_hdr {
+	__le16 reserved0;
+	u8 reason_code;
+	u8 opcode;
+	__le32 reserved1;
+	__le32 reserved2[2];
+	__le32 itt;
+	__le16 reserved3;
+	__le16 cid;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved4[4];
+};
+
+struct iscsi_data_out_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_DATA_OUT_HDR_RSRV_MASK   0x7F
+#define ISCSI_DATA_OUT_HDR_RSRV_SHIFT  0
+#define ISCSI_DATA_OUT_HDR_FINAL_MASK  0x1
+#define ISCSI_DATA_OUT_HDR_FINAL_SHIFT 7
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 reserved3;
+	__le32 exp_stat_sn;
+	__le32 reserved4;
+	__le32 data_sn;
+	__le32 buffer_offset;
+	__le32 reserved5;
+};
+
+struct iscsi_data_in_hdr {
+	u8 status_rsvd;
+	u8 reserved1;
+	u8 flags;
+#define ISCSI_DATA_IN_HDR_STATUS_MASK     0x1
+#define ISCSI_DATA_IN_HDR_STATUS_SHIFT    0
+#define ISCSI_DATA_IN_HDR_UNDERFLOW_MASK  0x1
+#define ISCSI_DATA_IN_HDR_UNDERFLOW_SHIFT 1
+#define ISCSI_DATA_IN_HDR_OVERFLOW_MASK   0x1
+#define ISCSI_DATA_IN_HDR_OVERFLOW_SHIFT  2
+#define ISCSI_DATA_IN_HDR_RSRV_MASK       0x7
+#define ISCSI_DATA_IN_HDR_RSRV_SHIFT      3
+#define ISCSI_DATA_IN_HDR_ACK_MASK        0x1
+#define ISCSI_DATA_IN_HDR_ACK_SHIFT       6
+#define ISCSI_DATA_IN_HDR_FINAL_MASK      0x1
+#define ISCSI_DATA_IN_HDR_FINAL_SHIFT     7
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 data_sn;
+	__le32 buffer_offset;
+	__le32 residual_count;
+};
+
+struct iscsi_r2t_hdr {
+	u8 reserved0[3];
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 r2t_sn;
+	__le32 buffer_offset;
+	__le32 desired_data_trns_len;
+};
+
+struct iscsi_nop_out_hdr {
+	__le16 reserved1;
+	u8 flags_attr;
+#define ISCSI_NOP_OUT_HDR_RSRV_MASK    0x7F
+#define ISCSI_NOP_OUT_HDR_RSRV_SHIFT   0
+#define ISCSI_NOP_OUT_HDR_CONST1_MASK  0x1
+#define ISCSI_NOP_OUT_HDR_CONST1_SHIFT 7
+	u8 opcode;
+	__le32 reserved2;
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved3;
+	__le32 reserved4;
+	__le32 reserved5;
+	__le32 reserved6;
+};
+
+struct iscsi_nop_in_hdr {
+	__le16 reserved0;
+	u8 flags_attr;
+#define ISCSI_NOP_IN_HDR_RSRV_MASK           0x7F
+#define ISCSI_NOP_IN_HDR_RSRV_SHIFT          0
+#define ISCSI_NOP_IN_HDR_CONST1_MASK         0x1
+#define ISCSI_NOP_IN_HDR_CONST1_SHIFT        7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_NOP_IN_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_NOP_IN_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_NOP_IN_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_NOP_IN_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved5;
+	__le32 reserved6;
+	__le32 reserved7;
+};
+
+struct iscsi_login_response_hdr {
+	u8 version_active;
+	u8 version_max;
+	u8 flags_attr;
+#define ISCSI_LOGIN_RESPONSE_HDR_NSG_MASK            0x3
+#define ISCSI_LOGIN_RESPONSE_HDR_NSG_SHIFT           0
+#define ISCSI_LOGIN_RESPONSE_HDR_CSG_MASK            0x3
+#define ISCSI_LOGIN_RESPONSE_HDR_CSG_SHIFT           2
+#define ISCSI_LOGIN_RESPONSE_HDR_RSRV_MASK           0x3
+#define ISCSI_LOGIN_RESPONSE_HDR_RSRV_SHIFT          4
+#define ISCSI_LOGIN_RESPONSE_HDR_C_MASK              0x1
+#define ISCSI_LOGIN_RESPONSE_HDR_C_SHIFT             6
+#define ISCSI_LOGIN_RESPONSE_HDR_T_MASK              0x1
+#define ISCSI_LOGIN_RESPONSE_HDR_T_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 isid_TABC;
+	__le16 tsih;
+	__le16 isid_d;
+	__le32 itt;
+	__le32 reserved1;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le16 reserved2;
+	u8 status_detail;
+	u8 status_class;
+	__le32 reserved4[2];
+};
+
+struct iscsi_logout_response_hdr {
+	u8 reserved1;
+	u8 response;
+	u8 flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_LOGOUT_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_LOGOUT_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_LOGOUT_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_LOGOUT_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	__le32 reserved2[2];
+	__le32 itt;
+	__le32 reserved3;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved4;
+	__le16 time2retain;
+	__le16 time2wait;
+	__le32 reserved5[1];
+};
+
+struct iscsi_text_request_hdr {
+	__le16 reserved0;
+	u8 flags_attr;
+#define ISCSI_TEXT_REQUEST_HDR_RSRV_MASK           0x3F
+#define ISCSI_TEXT_REQUEST_HDR_RSRV_SHIFT          0
+#define ISCSI_TEXT_REQUEST_HDR_C_MASK              0x1
+#define ISCSI_TEXT_REQUEST_HDR_C_SHIFT             6
+#define ISCSI_TEXT_REQUEST_HDR_F_MASK              0x1
+#define ISCSI_TEXT_REQUEST_HDR_F_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TEXT_REQUEST_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TEXT_REQUEST_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TEXT_REQUEST_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TEXT_REQUEST_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 reserved4[4];
+};
+
+struct iscsi_text_response_hdr {
+	__le16 reserved1;
+	u8 flags;
+#define ISCSI_TEXT_RESPONSE_HDR_RSRV_MASK           0x3F
+#define ISCSI_TEXT_RESPONSE_HDR_RSRV_SHIFT          0
+#define ISCSI_TEXT_RESPONSE_HDR_C_MASK              0x1
+#define ISCSI_TEXT_RESPONSE_HDR_C_SHIFT             6
+#define ISCSI_TEXT_RESPONSE_HDR_F_MASK              0x1
+#define ISCSI_TEXT_RESPONSE_HDR_F_SHIFT             7
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TEXT_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TEXT_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TEXT_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TEXT_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 ttt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved4[3];
+};
+
+struct iscsi_tmf_request_hdr {
+	__le16 reserved0;
+	u8 function;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TMF_REQUEST_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TMF_REQUEST_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TMF_REQUEST_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TMF_REQUEST_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 rtt;
+	__le32 cmd_sn;
+	__le32 exp_stat_sn;
+	__le32 ref_cmd_sn;
+	__le32 exp_data_sn;
+	__le32 reserved4[2];
+};
+
+struct iscsi_tmf_response_hdr {
+	u8 reserved2;
+	u8 hdr_response;
+	u8 hdr_flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_TMF_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_TMF_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair reserved0;
+	__le32 itt;
+	__le32 rtt;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 reserved4[3];
+};
+
+struct iscsi_response_hdr {
+	u8 hdr_status;
+	u8 hdr_response;
+	u8 hdr_flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_RESPONSE_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair lun;
+	__le32 itt;
+	__le32 snack_tag;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 exp_data_sn;
+	__le32 bi_residual_count;
+	__le32 residual_count;
+};
+
+struct iscsi_reject_hdr {
+	u8 reserved4;
+	u8 hdr_reason;
+	u8 hdr_flags;
+	u8 opcode;
+	__le32 hdr_second_dword;
+#define ISCSI_REJECT_HDR_DATA_SEG_LEN_MASK   0xFFFFFF
+#define ISCSI_REJECT_HDR_DATA_SEG_LEN_SHIFT  0
+#define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_MASK  0xFF
+#define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_SHIFT 24
+	struct regpair reserved0;
+	__le32 reserved1;
+	__le32 reserved2;
+	__le32 stat_sn;
+	__le32 exp_cmd_sn;
+	__le32 max_cmd_sn;
+	__le32 data_sn;
+	__le32 reserved3[2];
+};
+
+union iscsi_task_hdr {
+	struct iscsi_common_hdr common;
+	struct data_hdr data;
+	struct iscsi_cmd_hdr cmd;
+	struct iscsi_ext_cdb_cmd_hdr ext_cdb_cmd;
+	struct iscsi_login_req_hdr login_req;
+	struct iscsi_logout_req_hdr logout_req;
+	struct iscsi_data_out_hdr data_out;
+	struct iscsi_data_in_hdr data_in;
+	struct iscsi_r2t_hdr r2t;
+	struct iscsi_nop_out_hdr nop_out;
+	struct iscsi_nop_in_hdr nop_in;
+	struct iscsi_login_response_hdr login_response;
+	struct iscsi_logout_response_hdr logout_response;
+	struct iscsi_text_request_hdr text_request;
+	struct iscsi_text_response_hdr text_response;
+	struct iscsi_tmf_request_hdr tmf_request;
+	struct iscsi_tmf_response_hdr tmf_response;
+	struct iscsi_response_hdr response;
+	struct iscsi_reject_hdr reject;
+	struct iscsi_async_msg_hdr async_msg;
+};
+
+struct iscsi_cqe_common {
+	__le16 conn_id;
+	u8 cqe_type;
+	union cqe_error_status error_bitmap;
+	__le32 reserved[3];
+	union iscsi_task_hdr iscsi_hdr;
+};
+
+struct iscsi_cqe_solicited {
+	__le16 conn_id;
+	u8 cqe_type;
+	union cqe_error_status error_bitmap;
+	__le16 itid;
+	u8 task_type;
+	u8 fw_dbg_field;
+	__le32 reserved1[2];
+	union iscsi_task_hdr iscsi_hdr;
+};
+
+struct iscsi_cqe_unsolicited {
+	__le16 conn_id;
+	u8 cqe_type;
+	union cqe_error_status error_bitmap;
+	__le16 reserved0;
+	u8 reserved1;
+	u8 unsol_cqe_type;
+	struct regpair rqe_opaque;
+	union iscsi_task_hdr iscsi_hdr;
+};
+
+union iscsi_cqe {
+	struct iscsi_cqe_common cqe_common;
+	struct iscsi_cqe_solicited cqe_solicited;
+	struct iscsi_cqe_unsolicited cqe_unsolicited;
+};
+
+enum iscsi_cqes_type {
+	ISCSI_CQE_TYPE_SOLICITED = 1,
+	ISCSI_CQE_TYPE_UNSOLICITED,
+	ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE
+	   ,
+	ISCSI_CQE_TYPE_TASK_CLEANUP,
+	ISCSI_CQE_TYPE_DUMMY,
+	MAX_ISCSI_CQES_TYPE
+};
+
+enum iscsi_cqe_unsolicited_type {
+	ISCSI_CQE_UNSOLICITED_NONE,
+	ISCSI_CQE_UNSOLICITED_SINGLE,
+	ISCSI_CQE_UNSOLICITED_FIRST,
+	ISCSI_CQE_UNSOLICITED_MIDDLE,
+	ISCSI_CQE_UNSOLICITED_LAST,
+	MAX_ISCSI_CQE_UNSOLICITED_TYPE
+};
+
+struct iscsi_virt_sgl_ctx {
+	struct regpair sgl_base;
+	struct regpair dsgl_base;
+	__le32 sgl_initial_offset;
+	__le32 dsgl_initial_offset;
+	__le32 dsgl_curr_offset[2];
+};
+
+struct iscsi_sgl_var_params {
+	u8 sgl_ptr;
+	u8 dsgl_ptr;
+	__le16 sge_offset;
+	__le16 dsge_offset;
+};
+
+struct iscsi_phys_sgl_ctx {
+	struct regpair sgl_base;
+	struct regpair dsgl_base;
+	u8 sgl_size;
+	u8 dsgl_size;
+	__le16 reserved;
+	struct iscsi_sgl_var_params var_params[2];
+};
+
+union iscsi_data_desc_ctx {
+	struct iscsi_virt_sgl_ctx virt_sgl;
+	struct iscsi_phys_sgl_ctx phys_sgl;
+	struct iscsi_cached_sge_ctx cached_sge;
+};
+
+struct iscsi_debug_modes {
+	u8 flags;
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RX_CONN_ERROR_MASK         0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RX_CONN_ERROR_SHIFT        0
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_RESET_MASK            0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_RESET_SHIFT           1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_FIN_MASK              0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_FIN_SHIFT             2
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_CLEANUP_MASK          0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_CLEANUP_SHIFT         3
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_MASK  0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_SHIFT 4
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_MASK              0x1
+#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_SHIFT             5
+#define ISCSI_DEBUG_MODES_RESERVED0_MASK                       0x3
+#define ISCSI_DEBUG_MODES_RESERVED0_SHIFT                      6
+};
+
+struct iscsi_dif_flags {
+	u8 flags;
+#define ISCSI_DIF_FLAGS_PROT_INTERVAL_SIZE_LOG_MASK  0xF
+#define ISCSI_DIF_FLAGS_PROT_INTERVAL_SIZE_LOG_SHIFT 0
+#define ISCSI_DIF_FLAGS_DIF_TO_PEER_MASK             0x1
+#define ISCSI_DIF_FLAGS_DIF_TO_PEER_SHIFT            4
+#define ISCSI_DIF_FLAGS_HOST_INTERFACE_MASK          0x7
+#define ISCSI_DIF_FLAGS_HOST_INTERFACE_SHIFT         5
+};
+
+enum iscsi_eqe_opcode {
+	ISCSI_EVENT_TYPE_INIT_FUNC = 0,
+	ISCSI_EVENT_TYPE_DESTROY_FUNC,
+	ISCSI_EVENT_TYPE_OFFLOAD_CONN,
+	ISCSI_EVENT_TYPE_UPDATE_CONN,
+	ISCSI_EVENT_TYPE_CLEAR_SQ,
+	ISCSI_EVENT_TYPE_TERMINATE_CONN,
+	ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE,
+	ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE,
+	RESERVED8,
+	RESERVED9,
+	ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10,
+	ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD,
+	ISCSI_EVENT_TYPE_ASYN_CLOSE_RCVD,
+	ISCSI_EVENT_TYPE_ASYN_SYN_RCVD,
+	ISCSI_EVENT_TYPE_ASYN_MAX_RT_TIME,
+	ISCSI_EVENT_TYPE_ASYN_MAX_RT_CNT,
+	ISCSI_EVENT_TYPE_ASYN_MAX_KA_PROBES_CNT,
+	ISCSI_EVENT_TYPE_ASYN_FIN_WAIT2,
+	ISCSI_EVENT_TYPE_ISCSI_CONN_ERROR,
+	ISCSI_EVENT_TYPE_TCP_CONN_ERROR,
+	ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES,
+	MAX_ISCSI_EQE_OPCODE
+};
+
+enum iscsi_error_types {
+	ISCSI_STATUS_NONE = 0,
+	ISCSI_CQE_ERROR_UNSOLICITED_RCV_ON_INVALID_CONN = 1,
+	ISCSI_CONN_ERROR_TASK_CID_MISMATCH,
+	ISCSI_CONN_ERROR_TASK_NOT_VALID,
+	ISCSI_CONN_ERROR_RQ_RING_IS_FULL,
+	ISCSI_CONN_ERROR_CMDQ_RING_IS_FULL,
+	ISCSI_CONN_ERROR_HQE_CACHING_FAILED,
+	ISCSI_CONN_ERROR_HEADER_DIGEST_ERROR,
+	ISCSI_CONN_ERROR_LOCAL_COMPLETION_ERROR,
+	ISCSI_CONN_ERROR_DATA_OVERRUN,
+	ISCSI_CONN_ERROR_OUT_OF_SGES_ERROR,
+	ISCSI_CONN_ERROR_TCP_SEG_PROC_URG_ERROR,
+	ISCSI_CONN_ERROR_TCP_SEG_PROC_IP_OPTIONS_ERROR,
+	ISCSI_CONN_ERROR_TCP_SEG_PROC_CONNECT_INVALID_WS_OPTION,
+	ISCSI_CONN_ERROR_TCP_IP_FRAGMENT_ERROR,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_LEN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_TYPE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_ITT_OUT_OF_RANGE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_TTT_OUT_OF_RANGE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SEG_LEN_EXCEEDS_PDU_SIZE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_OPCODE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_OPCODE_BEFORE_UPDATE,
+	ISCSI_CONN_ERROR_UNVALID_NOPIN_DSL,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_CARRIES_NO_DATA,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_IN_TTT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_OUT_ITT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_TTT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_BUFFER_OFFSET,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_BUFFER_OFFSET_OOO,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_SN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_0,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_1,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_2,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_LUN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_F_BIT_ZERO,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_F_BIT_ZERO_S_BIT_ONE,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_EXP_STAT_SN,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DSL_NOT_ZERO,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_DSL,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SEG_LEN_TOO_BIG,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_OUTSTANDING_R2T_COUNT,
+	ISCSI_CONN_ERROR_PROTOCOL_ERR_DIF_TX,
+	ISCSI_CONN_ERROR_SENSE_DATA_LENGTH,
+	ISCSI_CONN_ERROR_DATA_PLACEMENT_ERROR,
+	ISCSI_ERROR_UNKNOWN,
+	MAX_ISCSI_ERROR_TYPES
+};
+
+struct iscsi_mflags {
+	u8 mflags;
+#define ISCSI_MFLAGS_SLOW_IO_MASK     0x1
+#define ISCSI_MFLAGS_SLOW_IO_SHIFT    0
+#define ISCSI_MFLAGS_SINGLE_SGE_MASK  0x1
+#define ISCSI_MFLAGS_SINGLE_SGE_SHIFT 1
+#define ISCSI_MFLAGS_RESERVED_MASK    0x3F
+#define ISCSI_MFLAGS_RESERVED_SHIFT   2
+};
+
+struct iscsi_sgl {
+	struct regpair sgl_addr;
+	__le16 updated_sge_size;
+	__le16 updated_sge_offset;
+	__le32 byte_offset;
+};
+
+union iscsi_mstorm_sgl {
+	struct iscsi_sgl sgl_struct;
+	struct iscsi_sge single_sge;
+};
+
+enum iscsi_ramrod_cmd_id {
+	ISCSI_RAMROD_CMD_ID_UNUSED = 0,
+	ISCSI_RAMROD_CMD_ID_INIT_FUNC = 1,
+	ISCSI_RAMROD_CMD_ID_DESTROY_FUNC = 2,
+	ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN = 3,
+	ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4,
+	ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5,
+	ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6,
+	MAX_ISCSI_RAMROD_CMD_ID
+};
+
+struct iscsi_reg1 {
+	__le32 reg1_map;
+#define ISCSI_REG1_NUM_FAST_SGES_MASK  0x7
+#define ISCSI_REG1_NUM_FAST_SGES_SHIFT 0
+#define ISCSI_REG1_RESERVED1_MASK      0x1FFFFFFF
+#define ISCSI_REG1_RESERVED1_SHIFT     3
+};
+
+union iscsi_seq_num {
+	__le16 data_sn;
+	__le16 r2t_sn;
+};
+
+struct iscsi_spe_conn_offload {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	struct iscsi_conn_offload_params iscsi;
+	struct tcp_offload_params tcp;
+};
+
+struct iscsi_spe_conn_offload_option2 {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	struct iscsi_conn_offload_params iscsi;
+	struct tcp_offload_params_opt2 tcp;
+};
+
+struct iscsi_spe_conn_termination {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	u8 abortive;
+	u8 reserved0[7];
+	struct regpair queue_cnts_addr;
+	struct regpair query_params_addr;
+};
+
+struct iscsi_spe_func_dstry {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct iscsi_spe_func_init {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 half_way_close_timeout;
+	u8 num_sq_pages_in_ring;
+	u8 num_r2tq_pages_in_ring;
+	u8 num_uhq_pages_in_ring;
+	u8 ll2_rx_queue_id;
+	u8 ooo_enable;
+	struct iscsi_debug_modes debug_mode;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
+	__le32 reserved4;
+	struct scsi_init_func_params func_params;
+	struct scsi_init_func_queues q_params;
+};
+
+struct ystorm_iscsi_task_state {
+	union iscsi_data_desc_ctx sgl_ctx_union;
+	__le32 buffer_offset[2];
+	__le16 bytes_nxt_dif;
+	__le16 rxmit_bytes_nxt_dif;
+	union iscsi_seq_num seq_num_union;
+	u8 dif_bytes_leftover;
+	u8 rxmit_dif_bytes_leftover;
+	__le16 reuse_count;
+	struct iscsi_dif_flags dif_flags;
+	u8 local_comp;
+	__le32 exp_r2t_sn;
+	__le32 sgl_offset[2];
+};
+
+struct ystorm_iscsi_task_st_ctx {
+	struct ystorm_iscsi_task_state state;
+	union iscsi_task_hdr pdu_hdr;
+};
+
+struct ystorm_iscsi_task_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	__le16 word0;
+	u8 flags0;
+#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK     0xF
+#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT    0
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT       4
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT       5
+#define YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK       0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT      6
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT       7
+	u8 flags1;
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK         0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT        0
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1_MASK         0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT        2
+#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_MASK  0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_SHIFT 4
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK       0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT      6
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK       0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT      7
+	u8 flags2;
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK        0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT       0
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT    1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT    2
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT    3
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT    4
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT    5
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT    6
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK     0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT    7
+	u8 byte2;
+	__le32 TTT;
+	u8 byte3;
+	u8 byte4;
+	__le16 word1;
+};
+
+struct mstorm_iscsi_task_ag_ctx {
+	u8 cdu_validation;
+	u8 byte1;
+	__le16 task_cid;
+	u8 flags0;
+#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK     0xF
+#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT    0
+#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK        0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT       4
+#define MSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK                0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT               5
+#define MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK               0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT              6
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK   0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT  7
+	u8 flags1;
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_MASK     0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT    0
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1_MASK                 0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT                2
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2_MASK                 0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT                4
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK  0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT 6
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK               0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT              7
+	u8 flags2;
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK               0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT              0
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT            1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT            2
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT            3
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT            4
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT            5
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT            6
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK             0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT            7
+	u8 byte2;
+	__le32 reg0;
+	u8 byte3;
+	u8 byte4;
+	__le16 word1;
+};
+
+struct ustorm_iscsi_task_ag_ctx {
+	u8 reserved;
+	u8 state;
+	__le16 icid;
+	u8 flags0;
+#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK        0xF
+#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT       0
+#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK           0x1
+#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT          4
+#define USTORM_ISCSI_TASK_AG_CTX_BIT1_MASK                   0x1
+#define USTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT                  5
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK          0x3
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT         6
+	u8 flags1;
+#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_MASK              0x3
+#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_SHIFT             0
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_MASK               0x3
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_SHIFT              2
+#define USTORM_ISCSI_TASK_AG_CTX_CF3_MASK                    0x3
+#define USTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT                   4
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_MASK           0x3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_SHIFT          6
+	u8 flags2;
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK       0x1
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT      0
+#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK     0x1
+#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT    1
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_MASK            0x1
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_SHIFT           2
+#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK                  0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT                 3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK        0x1
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT       4
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK  0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT 5
+#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT               6
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK    0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT   7
+	u8 flags3;
+#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT               0
+#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT               1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT               2
+#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK                0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT               3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_MASK         0xF
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT        4
+	__le32 dif_err_intervals;
+	__le32 dif_error_1st_interval;
+	__le32 rcv_cont_len;
+	__le32 exp_cont_len;
+	__le32 total_data_acked;
+	__le32 exp_data_acked;
+	u8 next_tid_valid;
+	u8 byte3;
+	__le16 word1;
+	__le16 next_tid;
+	__le16 word3;
+	__le32 hdr_residual_count;
+	__le32 exp_r2t_sn;
+};
+
+struct mstorm_iscsi_task_st_ctx {
+	union iscsi_mstorm_sgl sgl_union;
+	struct iscsi_dif_flags dif_flags;
+	struct iscsi_mflags flags;
+	u8 sgl_size;
+	u8 host_sge_index;
+	__le16 dix_cur_sge_offset;
+	__le16 dix_cur_sge_size;
+	__le32 data_offset_rtid;
+	u8 dif_offset;
+	u8 dix_sgl_size;
+	u8 dix_sge_index;
+	u8 task_type;
+	struct regpair sense_db;
+	struct regpair dix_sgl_cur_sge;
+	__le32 rem_task_size;
+	__le16 reuse_count;
+	__le16 dif_data_residue;
+	u8 reserved0[4];
+	__le32 reserved1[1];
+};
+
+struct ustorm_iscsi_task_st_ctx {
+	__le32 rem_rcv_len;
+	__le32 exp_data_transfer_len;
+	__le32 exp_data_sn;
+	struct regpair lun;
+	struct iscsi_reg1 reg1;
+	u8 flags2;
+#define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_MASK             0x1
+#define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_SHIFT            0
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_MASK             0x7F
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_SHIFT            1
+	u8 reserved2;
+	__le16 reserved3;
+	__le32 reserved4;
+	__le32 reserved5;
+	__le32 reserved6;
+	__le32 reserved7;
+	u8 task_type;
+	u8 error_flags;
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_DIGEST_ERROR_MASK     0x1
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_DIGEST_ERROR_SHIFT    0
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_TRUNCATED_ERROR_MASK  0x1
+#define USTORM_ISCSI_TASK_ST_CTX_DATA_TRUNCATED_ERROR_SHIFT 1
+#define USTORM_ISCSI_TASK_ST_CTX_UNDER_RUN_ERROR_MASK       0x1
+#define USTORM_ISCSI_TASK_ST_CTX_UNDER_RUN_ERROR_SHIFT      2
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED8_MASK             0x1F
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED8_SHIFT            3
+	u8 flags;
+#define USTORM_ISCSI_TASK_ST_CTX_CQE_WRITE_MASK             0x3
+#define USTORM_ISCSI_TASK_ST_CTX_CQE_WRITE_SHIFT            0
+#define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_MASK            0x1
+#define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_SHIFT           2
+#define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_MASK        0x1
+#define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_SHIFT       3
+#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_MASK   0x1
+#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_SHIFT  4
+#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_MASK        0x1
+#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_SHIFT       5
+#define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_MASK         0x1
+#define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_SHIFT        6
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_MASK             0x1
+#define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_SHIFT            7
+	u8 cq_rss_number;
+};
+
+struct iscsi_task_context {
+	struct ystorm_iscsi_task_st_ctx ystorm_st_context;
+	struct regpair ystorm_st_padding[2];
+	struct ystorm_iscsi_task_ag_ctx ystorm_ag_context;
+	struct regpair ystorm_ag_padding[2];
+	struct tdif_task_context tdif_context;
+	struct mstorm_iscsi_task_ag_ctx mstorm_ag_context;
+	struct regpair mstorm_ag_padding[2];
+	struct ustorm_iscsi_task_ag_ctx ustorm_ag_context;
+	struct mstorm_iscsi_task_st_ctx mstorm_st_context;
+	struct ustorm_iscsi_task_st_ctx ustorm_st_context;
+	struct rdif_task_context rdif_context;
+};
+
+enum iscsi_task_type {
+	ISCSI_TASK_TYPE_INITIATOR_WRITE,
+	ISCSI_TASK_TYPE_INITIATOR_READ,
+	ISCSI_TASK_TYPE_MIDPATH,
+	ISCSI_TASK_TYPE_UNSOLIC,
+	ISCSI_TASK_TYPE_EXCHCLEANUP,
+	ISCSI_TASK_TYPE_IRRELEVANT,
+	ISCSI_TASK_TYPE_TARGET_WRITE,
+	ISCSI_TASK_TYPE_TARGET_READ,
+	ISCSI_TASK_TYPE_TARGET_RESPONSE,
+	ISCSI_TASK_TYPE_LOGIN_RESPONSE,
+	MAX_ISCSI_TASK_TYPE
+};
+
+union iscsi_ttt_txlen_union {
+	__le32 desired_tx_len;
+	__le32 ttt;
+};
+
+struct iscsi_uhqe {
+	__le32 reg1;
+#define ISCSI_UHQE_PDU_PAYLOAD_LEN_MASK     0xFFFFF
+#define ISCSI_UHQE_PDU_PAYLOAD_LEN_SHIFT    0
+#define ISCSI_UHQE_LOCAL_COMP_MASK          0x1
+#define ISCSI_UHQE_LOCAL_COMP_SHIFT         20
+#define ISCSI_UHQE_TOGGLE_BIT_MASK          0x1
+#define ISCSI_UHQE_TOGGLE_BIT_SHIFT         21
+#define ISCSI_UHQE_PURE_PAYLOAD_MASK        0x1
+#define ISCSI_UHQE_PURE_PAYLOAD_SHIFT       22
+#define ISCSI_UHQE_LOGIN_RESPONSE_PDU_MASK  0x1
+#define ISCSI_UHQE_LOGIN_RESPONSE_PDU_SHIFT 23
+#define ISCSI_UHQE_TASK_ID_HI_MASK          0xFF
+#define ISCSI_UHQE_TASK_ID_HI_SHIFT         24
+	__le32 reg2;
+#define ISCSI_UHQE_BUFFER_OFFSET_MASK       0xFFFFFF
+#define ISCSI_UHQE_BUFFER_OFFSET_SHIFT      0
+#define ISCSI_UHQE_TASK_ID_LO_MASK          0xFF
+#define ISCSI_UHQE_TASK_ID_LO_SHIFT         24
+};
+
+struct iscsi_wqe_field {
+	__le32 contlen_cdbsize_field;
+#define ISCSI_WQE_FIELD_CONT_LEN_MASK  0xFFFFFF
+#define ISCSI_WQE_FIELD_CONT_LEN_SHIFT 0
+#define ISCSI_WQE_FIELD_CDB_SIZE_MASK  0xFF
+#define ISCSI_WQE_FIELD_CDB_SIZE_SHIFT 24
+};
+
+union iscsi_wqe_field_union {
+	struct iscsi_wqe_field cont_field;
+	__le32 prev_tid;
+};
+
+struct iscsi_wqe {
+	__le16 task_id;
+	u8 flags;
+#define ISCSI_WQE_WQE_TYPE_MASK        0x7
+#define ISCSI_WQE_WQE_TYPE_SHIFT       0
+#define ISCSI_WQE_NUM_FAST_SGES_MASK   0x7
+#define ISCSI_WQE_NUM_FAST_SGES_SHIFT  3
+#define ISCSI_WQE_PTU_INVALIDATE_MASK  0x1
+#define ISCSI_WQE_PTU_INVALIDATE_SHIFT 6
+#define ISCSI_WQE_RESPONSE_MASK        0x1
+#define ISCSI_WQE_RESPONSE_SHIFT       7
+	struct iscsi_dif_flags prot_flags;
+	union iscsi_wqe_field_union cont_prevtid_union;
+};
+
+enum iscsi_wqe_type {
+	ISCSI_WQE_TYPE_NORMAL,
+	ISCSI_WQE_TYPE_TASK_CLEANUP,
+	ISCSI_WQE_TYPE_MIDDLE_PATH,
+	ISCSI_WQE_TYPE_LOGIN,
+	ISCSI_WQE_TYPE_FIRST_R2T_CONT,
+	ISCSI_WQE_TYPE_NONFIRST_R2T_CONT,
+	ISCSI_WQE_TYPE_RESPONSE,
+	MAX_ISCSI_WQE_TYPE
+};
+
+struct iscsi_xhqe {
+	union iscsi_ttt_txlen_union ttt_or_txlen;
+	__le32 exp_stat_sn;
+	struct iscsi_dif_flags prot_flags;
+	u8 total_ahs_length;
+	u8 opcode;
+	u8 flags;
+#define ISCSI_XHQE_NUM_FAST_SGES_MASK  0x7
+#define ISCSI_XHQE_NUM_FAST_SGES_SHIFT 0
+#define ISCSI_XHQE_FINAL_MASK          0x1
+#define ISCSI_XHQE_FINAL_SHIFT         3
+#define ISCSI_XHQE_SUPER_IO_MASK       0x1
+#define ISCSI_XHQE_SUPER_IO_SHIFT      4
+#define ISCSI_XHQE_STATUS_BIT_MASK     0x1
+#define ISCSI_XHQE_STATUS_BIT_SHIFT    5
+#define ISCSI_XHQE_RESERVED_MASK       0x3
+#define ISCSI_XHQE_RESERVED_SHIFT      6
+	union iscsi_seq_num seq_num_union;
+	__le16 reserved1;
+};
+
+struct mstorm_iscsi_stats_drv {
+	struct regpair iscsi_rx_dropped_pdus_task_not_valid;
+};
+
+struct ooo_opaque {
+	__le32 cid;
+	u8 drop_isle;
+	u8 drop_size;
+	u8 ooo_opcode;
+	u8 ooo_isle;
+};
+
+struct pstorm_iscsi_stats_drv {
+	struct regpair iscsi_tx_bytes_cnt;
+	struct regpair iscsi_tx_packet_cnt;
+};
+
+struct tstorm_iscsi_stats_drv {
+	struct regpair iscsi_rx_bytes_cnt;
+	struct regpair iscsi_rx_packet_cnt;
+	struct regpair iscsi_rx_new_ooo_isle_events_cnt;
+	__le32 iscsi_cmdq_threshold_cnt;
+	__le32 iscsi_rq_threshold_cnt;
+	__le32 iscsi_immq_threshold_cnt;
+};
+
+struct ustorm_iscsi_stats_drv {
+	struct regpair iscsi_rx_data_pdu_cnt;
+	struct regpair iscsi_rx_r2t_pdu_cnt;
+	struct regpair iscsi_rx_total_pdu_cnt;
+};
+
+struct xstorm_iscsi_stats_drv {
+	struct regpair iscsi_tx_go_to_slow_start_event_cnt;
+	struct regpair iscsi_tx_fast_retransmit_event_cnt;
+};
+
+struct ystorm_iscsi_stats_drv {
+	struct regpair iscsi_tx_data_pdu_cnt;
+	struct regpair iscsi_tx_r2t_pdu_cnt;
+	struct regpair iscsi_tx_total_pdu_cnt;
+};
+
+struct iscsi_db_data {
+	u8 params;
+#define ISCSI_DB_DATA_DEST_MASK         0x3
+#define ISCSI_DB_DATA_DEST_SHIFT        0
+#define ISCSI_DB_DATA_AGG_CMD_MASK      0x3
+#define ISCSI_DB_DATA_AGG_CMD_SHIFT     2
+#define ISCSI_DB_DATA_BYPASS_EN_MASK    0x1
+#define ISCSI_DB_DATA_BYPASS_EN_SHIFT   4
+#define ISCSI_DB_DATA_RESERVED_MASK     0x1
+#define ISCSI_DB_DATA_RESERVED_SHIFT    5
+#define ISCSI_DB_DATA_AGG_VAL_SEL_MASK  0x3
+#define ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	u8 agg_flags;
+	__le16 sq_prod;
+};
+
+struct tstorm_iscsi_task_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	__le16 word0;
+	u8 flags0;
+#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK  0xF
+#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT    4
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT    5
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_SHIFT    6
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT    7
+	u8 flags1;
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT    0
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_MASK     0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_SHIFT    1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT     2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT     4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT     6
+	u8 flags2;
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT     0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4_SHIFT     2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5_SHIFT     4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6_SHIFT     6
+	u8 flags3;
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7_MASK      0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7_SHIFT     0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT   2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT   3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT   4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT   5
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_SHIFT   6
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_SHIFT   7
+	u8 flags4;
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_SHIFT   0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_MASK    0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_SHIFT   1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 2
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 3
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 4
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 5
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 6
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK  0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 7
+	u8 byte2;
+	__le16 word1;
+	__le32 reg0;
+	u8 byte3;
+	u8 byte4;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg1;
+	__le32 reg2;
+};
+
+#endif /* __ISCSI_COMMON__ */
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
new file mode 100644
index 000000000000..187991c1f439
--- /dev/null
+++ b/include/linux/qed/rdma_common.h
@@ -0,0 +1,44 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __RDMA_COMMON__
+#define __RDMA_COMMON__
+/************************/
+/* RDMA FW CONSTANTS */
+/************************/
+
+#define RDMA_RESERVED_LKEY                      (0)
+#define RDMA_RING_PAGE_SIZE                     (0x1000)
+
+#define RDMA_MAX_SGE_PER_SQ_WQE         (4)
+#define RDMA_MAX_SGE_PER_RQ_WQE         (4)
+
+#define RDMA_MAX_DATA_SIZE_IN_WQE       (0x7FFFFFFF)
+
+#define RDMA_REQ_RD_ATOMIC_ELM_SIZE             (0x50)
+#define RDMA_RESP_RD_ATOMIC_ELM_SIZE    (0x20)
+
+#define RDMA_MAX_CQS                            (64 * 1024)
+#define RDMA_MAX_TIDS                           (128 * 1024 - 1)
+#define RDMA_MAX_PDS                            (64 * 1024)
+
+#define RDMA_NUM_STATISTIC_COUNTERS                     MAX_NUM_VPORTS
+
+#define RDMA_TASK_TYPE (PROTOCOLID_ROCE)
+
+struct rdma_srq_id {
+	__le16 srq_idx;
+	__le16 opaque_fid;
+};
+
+struct rdma_srq_producers {
+	__le32 sge_prod;
+	__le32 wqe_prod;
+};
+
+#endif /* __RDMA_COMMON__ */
diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h
new file mode 100644
index 000000000000..2eeaf3dc6646
--- /dev/null
+++ b/include/linux/qed/roce_common.h
@@ -0,0 +1,17 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __ROCE_COMMON__
+#define __ROCE_COMMON__
+
+#define ROCE_REQ_MAX_INLINE_DATA_SIZE (256)
+#define ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE (288)
+
+#define ROCE_MAX_QPS	(32 * 1024)
+
+#endif /* __ROCE_COMMON__ */
diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h
new file mode 100644
index 000000000000..3b8e1efd9bc2
--- /dev/null
+++ b/include/linux/qed/storage_common.h
@@ -0,0 +1,91 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __STORAGE_COMMON__
+#define __STORAGE_COMMON__
+
+#define NUM_OF_CMDQS_CQS (NUM_OF_GLOBAL_QUEUES / 2)
+#define BDQ_NUM_RESOURCES (4)
+
+#define BDQ_ID_RQ                        (0)
+#define BDQ_ID_IMM_DATA          (1)
+#define BDQ_NUM_IDS          (2)
+
+#define BDQ_MAX_EXTERNAL_RING_SIZE (1 << 15)
+
+struct scsi_bd {
+	struct regpair address;
+	struct regpair opaque;
+};
+
+struct scsi_bdq_ram_drv_data {
+	__le16 external_producer;
+	__le16 reserved0[3];
+};
+
+struct scsi_drv_cmdq {
+	__le16 cmdq_cons;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct scsi_init_func_params {
+	__le16 num_tasks;
+	u8 log_page_size;
+	u8 debug_mode;
+	u8 reserved2[12];
+};
+
+struct scsi_init_func_queues {
+	struct regpair glbl_q_params_addr;
+	__le16 rq_buffer_size;
+	__le16 cq_num_entries;
+	__le16 cmdq_num_entries;
+	u8 bdq_resource_id;
+	u8 q_validity;
+#define SCSI_INIT_FUNC_QUEUES_RQ_VALID_MASK        0x1
+#define SCSI_INIT_FUNC_QUEUES_RQ_VALID_SHIFT       0
+#define SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID_MASK  0x1
+#define SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID_SHIFT 1
+#define SCSI_INIT_FUNC_QUEUES_CMD_VALID_MASK       0x1
+#define SCSI_INIT_FUNC_QUEUES_CMD_VALID_SHIFT      2
+#define SCSI_INIT_FUNC_QUEUES_RESERVED_VALID_MASK  0x1F
+#define SCSI_INIT_FUNC_QUEUES_RESERVED_VALID_SHIFT 3
+	u8 num_queues;
+	u8 queue_relative_offset;
+	u8 cq_sb_pi;
+	u8 cmdq_sb_pi;
+	__le16 cq_cmdq_sb_num_arr[NUM_OF_CMDQS_CQS];
+	__le16 reserved0;
+	u8 bdq_pbl_num_entries[BDQ_NUM_IDS];
+	struct regpair bdq_pbl_base_address[BDQ_NUM_IDS];
+	__le16 bdq_xoff_threshold[BDQ_NUM_IDS];
+	__le16 bdq_xon_threshold[BDQ_NUM_IDS];
+	__le16 cmdq_xoff_threshold;
+	__le16 cmdq_xon_threshold;
+	__le32 reserved1;
+};
+
+struct scsi_ram_per_bdq_resource_drv_data {
+	struct scsi_bdq_ram_drv_data drv_data_per_bdq_id[BDQ_NUM_IDS];
+};
+
+struct scsi_sge {
+	struct regpair sge_addr;
+	__le16 sge_len;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+struct scsi_terminate_extra_params {
+	__le16 unsolicited_cq_count;
+	__le16 cmdq_count;
+	u8 reserved[4];
+};
+
+#endif /* __STORAGE_COMMON__ */
diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h
new file mode 100644
index 000000000000..accba0e6b704
--- /dev/null
+++ b/include/linux/qed/tcp_common.h
@@ -0,0 +1,226 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __TCP_COMMON__
+#define __TCP_COMMON__
+
+#define TCP_INVALID_TIMEOUT_VAL -1
+
+enum tcp_connect_mode {
+	TCP_CONNECT_ACTIVE,
+	TCP_CONNECT_PASSIVE,
+	MAX_TCP_CONNECT_MODE
+};
+
+struct tcp_init_params {
+	__le32 max_cwnd;
+	__le16 dup_ack_threshold;
+	__le16 tx_sws_timer;
+	__le16 min_rto;
+	__le16 min_rto_rt;
+	__le16 max_rto;
+	u8 maxfinrt;
+	u8 reserved[1];
+};
+
+enum tcp_ip_version {
+	TCP_IPV4,
+	TCP_IPV6,
+	MAX_TCP_IP_VERSION
+};
+
+struct tcp_offload_params {
+	__le16 local_mac_addr_lo;
+	__le16 local_mac_addr_mid;
+	__le16 local_mac_addr_hi;
+	__le16 remote_mac_addr_lo;
+	__le16 remote_mac_addr_mid;
+	__le16 remote_mac_addr_hi;
+	__le16 vlan_id;
+	u8 flags;
+#define TCP_OFFLOAD_PARAMS_TS_EN_MASK         0x1
+#define TCP_OFFLOAD_PARAMS_TS_EN_SHIFT        0
+#define TCP_OFFLOAD_PARAMS_DA_EN_MASK         0x1
+#define TCP_OFFLOAD_PARAMS_DA_EN_SHIFT        1
+#define TCP_OFFLOAD_PARAMS_KA_EN_MASK         0x1
+#define TCP_OFFLOAD_PARAMS_KA_EN_SHIFT        2
+#define TCP_OFFLOAD_PARAMS_NAGLE_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_NAGLE_EN_SHIFT     3
+#define TCP_OFFLOAD_PARAMS_DA_CNT_EN_MASK     0x1
+#define TCP_OFFLOAD_PARAMS_DA_CNT_EN_SHIFT    4
+#define TCP_OFFLOAD_PARAMS_FIN_SENT_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_FIN_SENT_SHIFT     5
+#define TCP_OFFLOAD_PARAMS_FIN_RECEIVED_MASK  0x1
+#define TCP_OFFLOAD_PARAMS_FIN_RECEIVED_SHIFT 6
+#define TCP_OFFLOAD_PARAMS_RESERVED0_MASK     0x1
+#define TCP_OFFLOAD_PARAMS_RESERVED0_SHIFT    7
+	u8 ip_version;
+	__le32 remote_ip[4];
+	__le32 local_ip[4];
+	__le32 flow_label;
+	u8 ttl;
+	u8 tos_or_tc;
+	__le16 remote_port;
+	__le16 local_port;
+	__le16 mss;
+	u8 rcv_wnd_scale;
+	u8 connect_mode;
+	__le16 srtt;
+	__le32 cwnd;
+	__le32 ss_thresh;
+	__le16 reserved1;
+	u8 ka_max_probe_cnt;
+	u8 dup_ack_theshold;
+	__le32 rcv_next;
+	__le32 snd_una;
+	__le32 snd_next;
+	__le32 snd_max;
+	__le32 snd_wnd;
+	__le32 rcv_wnd;
+	__le32 snd_wl1;
+	__le32 ts_time;
+	__le32 ts_recent;
+	__le32 ts_recent_age;
+	__le32 total_rt;
+	__le32 ka_timeout_delta;
+	__le32 rt_timeout_delta;
+	u8 dup_ack_cnt;
+	u8 snd_wnd_probe_cnt;
+	u8 ka_probe_cnt;
+	u8 rt_cnt;
+	__le16 rtt_var;
+	__le16 reserved2;
+	__le32 ka_timeout;
+	__le32 ka_interval;
+	__le32 max_rt_time;
+	__le32 initial_rcv_wnd;
+	u8 snd_wnd_scale;
+	u8 ack_frequency;
+	__le16 da_timeout_value;
+	__le32 ts_ticks_per_second;
+};
+
+struct tcp_offload_params_opt2 {
+	__le16 local_mac_addr_lo;
+	__le16 local_mac_addr_mid;
+	__le16 local_mac_addr_hi;
+	__le16 remote_mac_addr_lo;
+	__le16 remote_mac_addr_mid;
+	__le16 remote_mac_addr_hi;
+	__le16 vlan_id;
+	u8 flags;
+#define TCP_OFFLOAD_PARAMS_OPT2_TS_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_OPT2_TS_EN_SHIFT     0
+#define TCP_OFFLOAD_PARAMS_OPT2_DA_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_OPT2_DA_EN_SHIFT     1
+#define TCP_OFFLOAD_PARAMS_OPT2_KA_EN_MASK      0x1
+#define TCP_OFFLOAD_PARAMS_OPT2_KA_EN_SHIFT     2
+#define TCP_OFFLOAD_PARAMS_OPT2_RESERVED0_MASK  0x1F
+#define TCP_OFFLOAD_PARAMS_OPT2_RESERVED0_SHIFT 3
+	u8 ip_version;
+	__le32 remote_ip[4];
+	__le32 local_ip[4];
+	__le32 flow_label;
+	u8 ttl;
+	u8 tos_or_tc;
+	__le16 remote_port;
+	__le16 local_port;
+	__le16 mss;
+	u8 rcv_wnd_scale;
+	u8 connect_mode;
+	__le16 syn_ip_payload_length;
+	__le32 syn_phy_addr_lo;
+	__le32 syn_phy_addr_hi;
+	__le32 reserved1[22];
+};
+
+enum tcp_seg_placement_event {
+	TCP_EVENT_ADD_PEN,
+	TCP_EVENT_ADD_NEW_ISLE,
+	TCP_EVENT_ADD_ISLE_RIGHT,
+	TCP_EVENT_ADD_ISLE_LEFT,
+	TCP_EVENT_JOIN,
+	TCP_EVENT_NOP,
+	MAX_TCP_SEG_PLACEMENT_EVENT
+};
+
+struct tcp_update_params {
+	__le16 flags;
+#define TCP_UPDATE_PARAMS_REMOTE_MAC_ADDR_CHANGED_MASK   0x1
+#define TCP_UPDATE_PARAMS_REMOTE_MAC_ADDR_CHANGED_SHIFT  0
+#define TCP_UPDATE_PARAMS_MSS_CHANGED_MASK               0x1
+#define TCP_UPDATE_PARAMS_MSS_CHANGED_SHIFT              1
+#define TCP_UPDATE_PARAMS_TTL_CHANGED_MASK               0x1
+#define TCP_UPDATE_PARAMS_TTL_CHANGED_SHIFT              2
+#define TCP_UPDATE_PARAMS_TOS_OR_TC_CHANGED_MASK         0x1
+#define TCP_UPDATE_PARAMS_TOS_OR_TC_CHANGED_SHIFT        3
+#define TCP_UPDATE_PARAMS_KA_TIMEOUT_CHANGED_MASK        0x1
+#define TCP_UPDATE_PARAMS_KA_TIMEOUT_CHANGED_SHIFT       4
+#define TCP_UPDATE_PARAMS_KA_INTERVAL_CHANGED_MASK       0x1
+#define TCP_UPDATE_PARAMS_KA_INTERVAL_CHANGED_SHIFT      5
+#define TCP_UPDATE_PARAMS_MAX_RT_TIME_CHANGED_MASK       0x1
+#define TCP_UPDATE_PARAMS_MAX_RT_TIME_CHANGED_SHIFT      6
+#define TCP_UPDATE_PARAMS_FLOW_LABEL_CHANGED_MASK        0x1
+#define TCP_UPDATE_PARAMS_FLOW_LABEL_CHANGED_SHIFT       7
+#define TCP_UPDATE_PARAMS_INITIAL_RCV_WND_CHANGED_MASK   0x1
+#define TCP_UPDATE_PARAMS_INITIAL_RCV_WND_CHANGED_SHIFT  8
+#define TCP_UPDATE_PARAMS_KA_MAX_PROBE_CNT_CHANGED_MASK  0x1
+#define TCP_UPDATE_PARAMS_KA_MAX_PROBE_CNT_CHANGED_SHIFT 9
+#define TCP_UPDATE_PARAMS_KA_EN_CHANGED_MASK             0x1
+#define TCP_UPDATE_PARAMS_KA_EN_CHANGED_SHIFT            10
+#define TCP_UPDATE_PARAMS_NAGLE_EN_CHANGED_MASK          0x1
+#define TCP_UPDATE_PARAMS_NAGLE_EN_CHANGED_SHIFT         11
+#define TCP_UPDATE_PARAMS_KA_EN_MASK                     0x1
+#define TCP_UPDATE_PARAMS_KA_EN_SHIFT                    12
+#define TCP_UPDATE_PARAMS_NAGLE_EN_MASK                  0x1
+#define TCP_UPDATE_PARAMS_NAGLE_EN_SHIFT                 13
+#define TCP_UPDATE_PARAMS_KA_RESTART_MASK                0x1
+#define TCP_UPDATE_PARAMS_KA_RESTART_SHIFT               14
+#define TCP_UPDATE_PARAMS_RETRANSMIT_RESTART_MASK        0x1
+#define TCP_UPDATE_PARAMS_RETRANSMIT_RESTART_SHIFT       15
+	__le16 remote_mac_addr_lo;
+	__le16 remote_mac_addr_mid;
+	__le16 remote_mac_addr_hi;
+	__le16 mss;
+	u8 ttl;
+	u8 tos_or_tc;
+	__le32 ka_timeout;
+	__le32 ka_interval;
+	__le32 max_rt_time;
+	__le32 flow_label;
+	__le32 initial_rcv_wnd;
+	u8 ka_max_probe_cnt;
+	u8 reserved1[7];
+};
+
+struct tcp_upload_params {
+	__le32 rcv_next;
+	__le32 snd_una;
+	__le32 snd_next;
+	__le32 snd_max;
+	__le32 snd_wnd;
+	__le32 rcv_wnd;
+	__le32 snd_wl1;
+	__le32 cwnd;
+	__le32 ss_thresh;
+	__le16 srtt;
+	__le16 rtt_var;
+	__le32 ts_time;
+	__le32 ts_recent;
+	__le32 ts_recent_age;
+	__le32 total_rt;
+	__le32 ka_timeout_delta;
+	__le32 rt_timeout_delta;
+	u8 dup_ack_cnt;
+	u8 snd_wnd_probe_cnt;
+	u8 ka_probe_cnt;
+	u8 rt_cnt;
+	__le32 reserved;
+};
+
+#endif /* __TCP_COMMON__ */
-- 
cgit 


From c5ac93191d7e6977c5c3465ac94c73ebb8a8ecba Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Fri, 3 Jun 2016 14:35:34 +0300
Subject: qed: Add iscsi/rdma personalities

This patch adds in the ecore 2 new personalities in addition to
QED_PCI_ETH - QED_PCI_ISCSI and QED_PCI_ETH_ROCE.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/common_hsi.h |  4 +--
 include/linux/qed/qed_if.h     | 65 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 077a3b6cc80f..40c0ada01806 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -517,9 +517,9 @@ enum mf_mode {
 
 /* Per-protocol connection types */
 enum protocol_type {
-	PROTOCOLID_RESERVED1,
+	PROTOCOLID_ISCSI,
 	PROTOCOLID_RESERVED2,
-	PROTOCOLID_RESERVED3,
+	PROTOCOLID_ROCE,
 	PROTOCOLID_CORE,
 	PROTOCOLID_ETH,
 	PROTOCOLID_RESERVED4,
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 15efccfdc46e..e8cc49f9688a 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -58,8 +58,70 @@ struct qed_eth_pf_params {
 	u16 num_cons;
 };
 
+/* Most of the the parameters below are described in the FW iSCSI / TCP HSI */
+struct qed_iscsi_pf_params {
+	u64 glbl_q_params_addr;
+	u64 bdq_pbl_base_addr[2];
+	u32 max_cwnd;
+	u16 cq_num_entries;
+	u16 cmdq_num_entries;
+	u16 dup_ack_threshold;
+	u16 tx_sws_timer;
+	u16 min_rto;
+	u16 min_rto_rt;
+	u16 max_rto;
+
+	/* The following parameters are used during HW-init
+	 * and these parameters need to be passed as arguments
+	 * to update_pf_params routine invoked before slowpath start
+	 */
+	u16 num_cons;
+	u16 num_tasks;
+
+	/* The following parameters are used during protocol-init */
+	u16 half_way_close_timeout;
+	u16 bdq_xoff_threshold[2];
+	u16 bdq_xon_threshold[2];
+	u16 cmdq_xoff_threshold;
+	u16 cmdq_xon_threshold;
+	u16 rq_buffer_size;
+
+	u8 num_sq_pages_in_ring;
+	u8 num_r2tq_pages_in_ring;
+	u8 num_uhq_pages_in_ring;
+	u8 num_queues;
+	u8 log_page_size;
+	u8 rqe_log_size;
+	u8 max_fin_rt;
+	u8 gl_rq_pi;
+	u8 gl_cmd_pi;
+	u8 debug_mode;
+	u8 ll2_ooo_queue_id;
+	u8 ooo_enable;
+
+	u8 is_target;
+	u8 bdq_pbl_num_entries[2];
+};
+
+struct qed_rdma_pf_params {
+	/* Supplied to QED during resource allocation (may affect the ILT and
+	 * the doorbell BAR).
+	 */
+	u32 min_dpis;		/* number of requested DPIs */
+	u32 num_mrs;		/* number of requested memory regions */
+	u32 num_qps;		/* number of requested Queue Pairs */
+	u32 num_srqs;		/* number of requested SRQ */
+	u8 roce_edpm_mode;	/* see QED_ROCE_EDPM_MODE_ENABLE */
+	u8 gl_pi;		/* protocol index */
+
+	/* Will allocate rate limiters to be used with QPs */
+	u8 enable_dcqcn;
+};
+
 struct qed_pf_params {
 	struct qed_eth_pf_params eth_pf_params;
+	struct qed_iscsi_pf_params iscsi_pf_params;
+	struct qed_rdma_pf_params rdma_pf_params;
 };
 
 enum qed_int_mode {
@@ -100,6 +162,8 @@ struct qed_dev_info {
 	/* MFW version */
 	u32		mfw_rev;
 
+	bool rdma_supported;
+
 	u32		flash_size;
 	u8		mf_mode;
 	bool		tx_switching;
@@ -111,6 +175,7 @@ enum qed_sb_type {
 
 enum qed_protocol {
 	QED_PROTOCOL_ETH,
+	QED_PROTOCOL_ISCSI,
 };
 
 struct qed_link_params {
-- 
cgit 


From 81b1251d3b761b303955408dd6a49618b5683c2b Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sat, 4 Jun 2016 08:20:16 +0300
Subject: qed: Fix next-ptr chains for BE / 32-bit

Commit a91eb52abb50 ("qed: Revisit chain implementation") contains an
incorrect implementation for BE platforms, as device's regpairs containing
addresses are LE and they're not converted correctly when read back.
In addition, it raises a compilation warning for 32-bit platforms where
dma_addr_t is a 32-bit variable.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_chain.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index eceaa9ed2ae9..7e441bdeabdc 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -25,10 +25,9 @@
 				} while (0)
 
 #define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
-#define HILO_DMA(hi, lo)        HILO_GEN(hi, lo, dma_addr_t)
 #define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64)
-#define HILO_DMA_REGPAIR(regpair)       (HILO_DMA(regpair.hi, regpair.lo))
 #define HILO_64_REGPAIR(regpair)        (HILO_64(regpair.hi, regpair.lo))
+#define HILO_DMA_REGPAIR(regpair)	((dma_addr_t)HILO_64_REGPAIR(regpair))
 
 enum qed_chain_mode {
 	/* Each Page contains a next pointer at its end */
-- 
cgit 


From c8b098086b4c744084350d2757a637ad756adf34 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 4 Jun 2016 21:16:57 +0200
Subject: net: dsa: Add a ports structure and use it in the switch structure

There are going to be more per-port members added to the switch
structure. So add a port structure and move the netdev into it.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 17c3d37b6779..9aed8572037c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -119,6 +119,10 @@ struct dsa_switch_tree {
 	struct dsa_switch	*ds[DSA_MAX_SWITCHES];
 };
 
+struct dsa_port {
+	struct net_device	*netdev;
+};
+
 struct dsa_switch {
 	struct device *dev;
 
@@ -158,8 +162,8 @@ struct dsa_switch {
 	u32			dsa_port_mask;
 	u32			enabled_port_mask;
 	u32			phys_mii_mask;
+	struct dsa_port		ports[DSA_MAX_PORTS];
 	struct mii_bus		*slave_mii_bus;
-	struct net_device	*ports[DSA_MAX_PORTS];
 };
 
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
@@ -174,7 +178,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p)
 
 static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
 {
-	return ds->enabled_port_mask & (1 << p) && ds->ports[p];
+	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
 }
 
 static inline u8 dsa_upstream_port(struct dsa_switch *ds)
-- 
cgit 


From 189b0d93ec61e1f991e96d7bc03b03cf929d164c Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 4 Jun 2016 21:16:58 +0200
Subject: net: dsa: Move port device node into port structure

Move the port device node structure into the port structure, from the
chip data. This information is needed in the next step of implementing
the new binding.

The chip data structure is used while parsing the whole old binding,
before the individual switch structures exist. With the new bindings,
this is reversed, the switches exist first, and the interconnections
between the switches is derived from the individual switch
bindings. Thus this chip data structure becomes unneeded.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
eviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 9aed8572037c..8314197d028f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -121,6 +121,7 @@ struct dsa_switch_tree {
 
 struct dsa_port {
 	struct net_device	*netdev;
+	struct device_node	*dn;
 };
 
 struct dsa_switch {
-- 
cgit 


From 4a7704ffa86705b0580b6473c407b7b7618e072d Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 4 Jun 2016 21:16:59 +0200
Subject: net: dsa: Remove dynamic allocate of routing table

With a maximum of four switches, the size of the routing table is the
same as the pointer to it. Removing it makes the code simpler.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8314197d028f..4e3afa9648ca 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -58,12 +58,11 @@ struct dsa_chip_data {
 	struct device_node *port_dn[DSA_MAX_PORTS];
 
 	/*
-	 * An array (with nr_chips elements) of which element [a]
-	 * indicates which port on this switch should be used to
-	 * send packets to that are destined for switch a.  Can be
-	 * NULL if there is only one switch chip.
+	 * An array of which element [a] indicates which port on this
+	 * switch should be used to send packets to that are destined
+	 * for switch a. Can be NULL if there is only one switch chip.
 	 */
-	s8		*rtable;
+	s8		rtable[DSA_MAX_SWITCHES];
 };
 
 struct dsa_platform_data {
-- 
cgit 


From 66472fc04e8be62858f29c7798ed17e984c1ab3b Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 4 Jun 2016 21:17:00 +0200
Subject: net: dsa: Copy the routing table into the switch structure

The new binding will not have a chip data structure, it will place the
routing directly into the switch structure. To enable backwards
compatibility, copy the routing from the chip data into the switch
structure.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 4e3afa9648ca..b666f27b3daa 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -148,6 +148,13 @@ struct dsa_switch {
 	 */
 	struct dsa_switch_driver	*drv;
 
+	/*
+	 * An array of which element [a] indicates which port on this
+	 * switch should be used to send packets to that are destined
+	 * for switch a. Can be NULL if there is only one switch chip.
+	 */
+	s8		rtable[DSA_MAX_SWITCHES];
+
 #ifdef CONFIG_NET_DSA_HWMON
 	/*
 	 * Hardware monitoring information
@@ -194,7 +201,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 	if (dst->cpu_switch == ds->index)
 		return dst->cpu_port;
 	else
-		return ds->cd->rtable[dst->cpu_switch];
+		return ds->rtable[dst->cpu_switch];
 }
 
 struct switchdev_trans;
-- 
cgit 


From 39a7f2a4eb496c0c68cc93fcb403190b48605168 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 4 Jun 2016 21:17:03 +0200
Subject: net: dsa: Refactor selection of tag ops into a function

Replace the two switch statements with an array lookup, and store the
result in the dsa tree structure. The drivers no longer need to know
the selected tag protocol, so remove it from the dsa switch structure.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b666f27b3daa..bd6ecaa0de96 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -26,6 +26,7 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_TRAILER,
 	DSA_TAG_PROTO_EDSA,
 	DSA_TAG_PROTO_BRCM,
+	DSA_TAG_LAST,		/* MUST BE LAST */
 };
 
 #define DSA_MAX_SWITCHES	4
@@ -99,7 +100,6 @@ struct dsa_switch_tree {
 				       struct net_device *dev,
 				       struct packet_type *pt,
 				       struct net_device *orig_dev);
-	enum dsa_tag_protocol	tag_protocol;
 
 	/*
 	 * Original copy of the master netdev ethtool_ops
@@ -116,6 +116,12 @@ struct dsa_switch_tree {
 	 * Data for the individual switch chips.
 	 */
 	struct dsa_switch	*ds[DSA_MAX_SWITCHES];
+
+	/*
+	 * Tagging protocol operations for adding and removing an
+	 * encapsulation tag.
+	 */
+	const struct dsa_device_ops *tag_ops;
 };
 
 struct dsa_port {
-- 
cgit 


From 83c0afaec7b730b16c518aecc8e6246ec91b265e Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 4 Jun 2016 21:17:07 +0200
Subject: net: dsa: Add new binding implementation

The existing DSA binding has a number of limitations and problems. The
main problem is that it cannot represent a switch as a linux device,
hanging off some bus. It is limited to one CPU port. The DSA platform
device is artificial, and does not really represent hardware.

Implement a new binding which can be embedded into any type of node on
a bus to represent one switch device, and its links to other switches.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index bd6ecaa0de96..cca7ef230742 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -85,6 +85,17 @@ struct dsa_platform_data {
 struct packet_type;
 
 struct dsa_switch_tree {
+	struct list_head	list;
+
+	/* Tree identifier */
+	u32 tree;
+
+	/* Number of switches attached to this tree */
+	struct kref refcount;
+
+	/* Has this tree been applied to the hardware? */
+	bool applied;
+
 	/*
 	 * Configuration data for the platform device that owns
 	 * this dsa switch tree instance.
@@ -169,10 +180,16 @@ struct dsa_switch {
 	struct device		*hwmon_dev;
 #endif
 
+	/*
+	 * The lower device this switch uses to talk to the host
+	 */
+	struct net_device *master_netdev;
+
 	/*
 	 * Slave mii_bus and devices for the individual ports.
 	 */
 	u32			dsa_port_mask;
+	u32			cpu_port_mask;
 	u32			enabled_port_mask;
 	u32			phys_mii_mask;
 	struct dsa_port		ports[DSA_MAX_PORTS];
@@ -361,4 +378,7 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
 	return dst->rcv != NULL;
 }
+
+void dsa_unregister_switch(struct dsa_switch *ds);
+int dsa_register_switch(struct dsa_switch *ds, struct device_node *np);
 #endif
-- 
cgit 


From 6eb17e0df74c036eba7548915b37f009403fe09e Mon Sep 17 00:00:00 2001
From: Kejian Yan <yankejian@huawei.com>
Date: Fri, 3 Jun 2016 10:55:09 +0800
Subject: ACPI: bus: add stub acpi_dev_found() to linux/acpi.h

acpi_dev_found() will be used to detect if a given ACPI device is in the
system. It will be compiled in non-ACPI case, but the function is in
acpi_bus.h and acpi_bus.h can only be used in ACPI case, so this patch add
the stub function to linux/acpi.h to make compiled successfully in
non-ACPI cases.

Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Signed-off-by: Kejian Yan <yankejian@huawei.com>
Signed-off-by: Yisen Zhuang <Yisen.Zhuang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 288fac5294f5..3025d1930f6e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -543,6 +543,11 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *);
 
 struct fwnode_handle;
 
+static inline bool acpi_dev_found(const char *hid)
+{
+	return false;
+}
+
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 {
 	return false;
-- 
cgit 


From 4ae399241adba66ad72e5973a1004f37ffbe67cd Mon Sep 17 00:00:00 2001
From: Kejian Yan <yankejian@huawei.com>
Date: Fri, 3 Jun 2016 10:55:10 +0800
Subject: ACPI: bus: add stub acpi_evaluate_dsm() to linux/acpi.h

acpi_evaluate_dsm() will be used to handle the _DSM method in ACPI case.
It will be compiled in non-ACPI case, but the function is in acpi_bus.h
and acpi_bus.h can only be used in ACPI case, so this patch add the stub
function to linux/acpi.h to make compiled successfully in non-ACPI cases.

Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Signed-off-by: Kejian Yan <yankejian@huawei.com>
Signed-off-by: Yisen Zhuang <Yisen.Zhuang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/acpi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3025d1930f6e..4d4bb4955682 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -659,6 +659,14 @@ static inline bool acpi_driver_match_device(struct device *dev,
 	return false;
 }
 
+static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
+						   const u8 *uuid,
+						   int rev, int func,
+						   union acpi_object *argv4)
+{
+	return NULL;
+}
+
 static inline int acpi_device_uevent_modalias(struct device *dev,
 				struct kobj_uevent_env *env)
 {
-- 
cgit 


From 14de9d114a82a564b94388c95af79a701dc93134 Mon Sep 17 00:00:00 2001
From: Aaron Conole <aconole@redhat.com>
Date: Fri, 3 Jun 2016 16:57:12 -0400
Subject: virtio-net: Add initial MTU advice feature

This commit adds the feature bit and associated mtu device entry for the
virtio network device.  When a virtio device comes up, it checks the
feature bit for the VIRTIO_NET_F_MTU feature.  If such feature bit is
enabled, the driver will read the advised MTU and use it as the initial
value.

Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/virtio_net.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index ec32293a00db..1ab4ea6ec847 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -55,6 +55,7 @@
 #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
+#define VIRTIO_NET_F_MTU 25	/* Initial MTU advice */
 
 #ifndef VIRTIO_NET_NO_LEGACY
 #define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
@@ -73,6 +74,8 @@ struct virtio_net_config {
 	 * Legal values are between 1 and 0x8000
 	 */
 	__u16 max_virtqueue_pairs;
+	/* Default maximum transmit unit advice */
+	__u16 mtu;
 } __attribute__((packed));
 
 /*
-- 
cgit 


From 53eb440f4ada034ea43b295891feec3df0fa7a29 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Mon, 6 Jun 2016 06:32:54 -0400
Subject: net sched actions: introduce timestamp for firsttime use

Useful to know when the action was first used for accounting
(and debugging)

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h        | 2 ++
 include/uapi/linux/pkt_cls.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9a9a8edc138f..8389c007076f 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -76,6 +76,8 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
 
 	if (tm->lastuse != now)
 		tm->lastuse = now;
+	if (unlikely(!tm->firstuse))
+		tm->firstuse = now;
 }
 
 struct tc_action {
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index f4297c8a42fe..9ba1410bd21d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -124,6 +124,7 @@ struct tcf_t {
 	__u64   install;
 	__u64   lastuse;
 	__u64   expires;
+	__u64   firstuse;
 };
 
 struct tc_cnt {
-- 
cgit 


From 48d8ee1694dd1ab25614b58f968123a4598f887e Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Mon, 6 Jun 2016 06:32:55 -0400
Subject: net sched actions: aggregate dumping of actions timeinfo

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8389c007076f..a891978310e9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -80,6 +80,14 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
 		tm->firstuse = now;
 }
 
+static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
+{
+	dtm->install = jiffies_to_clock_t(jiffies - stm->install);
+	dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse);
+	dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse);
+	dtm->expires = jiffies_to_clock_t(stm->expires);
+}
+
 struct tc_action {
 	void			*priv;
 	const struct tc_action_ops	*ops;
-- 
cgit 


From 0b0f43fe2e7291aa97b1febeaa5a0de453d007ca Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 5 Jun 2016 10:41:32 -0400
Subject: net sched: indentation and other OCD stylistic fixes

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/net/act_api.h          | 14 ++++++++------
 include/net/tc_act/tc_defact.h |  4 ++--
 include/uapi/linux/pkt_cls.h   |  6 +++---
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index a891978310e9..db218a12efb5 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -2,8 +2,8 @@
 #define __NET_ACT_API_H
 
 /*
- * Public police action API for classifiers/qdiscs
- */
+ * Public action API for classifiers/qdiscs
+*/
 
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
@@ -107,7 +107,8 @@ struct tc_action_ops {
 	char    kind[IFNAMSIZ];
 	__u32   type; /* TBD to match kind */
 	struct module		*owner;
-	int     (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *);
+	int     (*act)(struct sk_buff *, const struct tc_action *,
+		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *, int bind);
 	int     (*lookup)(struct net *, struct tc_action *, u32);
@@ -125,8 +126,8 @@ struct tc_action_net {
 };
 
 static inline
-int tc_action_net_init(struct tc_action_net *tn, const struct tc_action_ops *ops,
-		       unsigned int mask)
+int tc_action_net_init(struct tc_action_net *tn,
+		       const struct tc_action_ops *ops, unsigned int mask)
 {
 	int err = 0;
 
@@ -169,7 +170,8 @@ static inline int tcf_hash_release(struct tc_action *a, bool bind)
 }
 
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
-int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops);
+int tcf_unregister_action(struct tc_action_ops *a,
+			  struct pernet_operations *ops);
 int tcf_action_destroy(struct list_head *actions, int bind);
 int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
 		    struct tcf_result *res);
diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h
index 9763dcbb9bc3..ab9b5d6be67b 100644
--- a/include/net/tc_act/tc_defact.h
+++ b/include/net/tc_act/tc_defact.h
@@ -5,8 +5,8 @@
 
 struct tcf_defact {
 	struct tcf_common	common;
-	u32     		tcfd_datalen;
-	void    		*tcfd_defdata;
+	u32		tcfd_datalen;
+	void		*tcfd_defdata;
 };
 #define to_defact(a) \
 	container_of(a->priv, struct tcf_defact, common)
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9ba1410bd21d..5702e933dc07 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -115,8 +115,8 @@ struct tc_police {
 	__u32			mtu;
 	struct tc_ratespec	rate;
 	struct tc_ratespec	peakrate;
-	int 			refcnt;
-	int 			bindcnt;
+	int			refcnt;
+	int			bindcnt;
 	__u32			capab;
 };
 
@@ -128,7 +128,7 @@ struct tcf_t {
 };
 
 struct tc_cnt {
-	int                   refcnt; 
+	int                   refcnt;
 	int                   bindcnt;
 };
 
-- 
cgit 


From 68f047e3d62ebfac24ff9be551476cf30eafb00e Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Mon, 6 Jun 2016 14:29:58 +0800
Subject: fsl/qe: add rx_sync and tx_sync for TDM mode

Rx_sync and tx_sync are used by QE-TDM mode,
add them to struct ucc_fast_info.

Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/fsl/qe/qe.h       | 2 ++
 include/soc/fsl/qe/ucc_fast.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 33b29ead3d55..f91874582ab3 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -80,6 +80,8 @@ enum qe_clock {
 	QE_CLK22,		/* Clock 22 */
 	QE_CLK23,		/* Clock 23 */
 	QE_CLK24,		/* Clock 24 */
+	QE_RSYNC_PIN,		/* RSYNC from pin */
+	QE_TSYNC_PIN,		/* TSYNC from pin */
 	QE_CLK_DUMMY
 };
 
diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h
index df8ea7958c63..31548b7aa50e 100644
--- a/include/soc/fsl/qe/ucc_fast.h
+++ b/include/soc/fsl/qe/ucc_fast.h
@@ -120,6 +120,8 @@ struct ucc_fast_info {
 	int ucc_num;
 	enum qe_clock rx_clock;
 	enum qe_clock tx_clock;
+	enum qe_clock rx_sync;
+	enum qe_clock tx_sync;
 	u32 regs;
 	int irq;
 	u32 uccm_mask;
-- 
cgit 


From bb8b2062aff321af1fc58781cc07fbbea01cceb3 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Mon, 6 Jun 2016 14:29:59 +0800
Subject: fsl/qe: setup clock source for TDM mode

Add tdm clock configuration in both qe clock system and ucc
fast controller.

Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/fsl/qe/qe.h       | 16 ++++++++++++++++
 include/soc/fsl/qe/ucc.h      |  4 ++++
 include/soc/fsl/qe/ucc_fast.h |  1 +
 3 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index f91874582ab3..c3b1dc8a21a7 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -244,6 +244,22 @@ static inline int qe_alive_during_sleep(void)
 #define qe_muram_addr cpm_muram_addr
 #define qe_muram_offset cpm_muram_offset
 
+#define qe_setbits32(_addr, _v) iowrite32be(ioread32be(_addr) |  (_v), (_addr))
+#define qe_clrbits32(_addr, _v) iowrite32be(ioread32be(_addr) & ~(_v), (_addr))
+
+#define qe_setbits16(_addr, _v) iowrite16be(ioread16be(_addr) |  (_v), (_addr))
+#define qe_clrbits16(_addr, _v) iowrite16be(ioread16be(_addr) & ~(_v), (_addr))
+
+#define qe_setbits8(_addr, _v) iowrite8(ioread8(_addr) |  (_v), (_addr))
+#define qe_clrbits8(_addr, _v) iowrite8(ioread8(_addr) & ~(_v), (_addr))
+
+#define qe_clrsetbits32(addr, clear, set) \
+	iowrite32be((ioread32be(addr) & ~(clear)) | (set), (addr))
+#define qe_clrsetbits16(addr, clear, set) \
+	iowrite16be((ioread16be(addr) & ~(clear)) | (set), (addr))
+#define qe_clrsetbits8(addr, clear, set) \
+	iowrite8((ioread8(addr) & ~(clear)) | (set), (addr))
+
 /* Structure that defines QE firmware binary files.
  *
  * See Documentation/powerpc/qe_firmware.txt for a description of these
diff --git a/include/soc/fsl/qe/ucc.h b/include/soc/fsl/qe/ucc.h
index 894f14cbb044..6bbbb597f2af 100644
--- a/include/soc/fsl/qe/ucc.h
+++ b/include/soc/fsl/qe/ucc.h
@@ -41,6 +41,10 @@ int ucc_set_qe_mux_mii_mng(unsigned int ucc_num);
 
 int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock,
 	enum comm_dir mode);
+int ucc_set_tdm_rxtx_clk(unsigned int tdm_num, enum qe_clock clock,
+			 enum comm_dir mode);
+int ucc_set_tdm_rxtx_sync(unsigned int tdm_num, enum qe_clock clock,
+			  enum comm_dir mode);
 
 int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask);
 
diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h
index 31548b7aa50e..b2633b7dcc00 100644
--- a/include/soc/fsl/qe/ucc_fast.h
+++ b/include/soc/fsl/qe/ucc_fast.h
@@ -118,6 +118,7 @@ enum ucc_fast_transparent_tcrc {
 /* Fast UCC initialization structure */
 struct ucc_fast_info {
 	int ucc_num;
+	int tdm_num;
 	enum qe_clock rx_clock;
 	enum qe_clock tx_clock;
 	enum qe_clock rx_sync;
-- 
cgit 


From 19163ac3123e7fef8b1ecb2f1d4223f58ed5e884 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Mon, 6 Jun 2016 14:30:00 +0800
Subject: fsl/qe: Make regs resouce_size_t

Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/fsl/qe/ucc_fast.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h
index b2633b7dcc00..e898895223f9 100644
--- a/include/soc/fsl/qe/ucc_fast.h
+++ b/include/soc/fsl/qe/ucc_fast.h
@@ -123,7 +123,7 @@ struct ucc_fast_info {
 	enum qe_clock tx_clock;
 	enum qe_clock rx_sync;
 	enum qe_clock tx_sync;
-	u32 regs;
+	resource_size_t regs;
 	int irq;
 	u32 uccm_mask;
 	int bd_mem_part;
-- 
cgit 


From 35ef1c20fdb26779b6c3c4fd74bbdd5028e70005 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Mon, 6 Jun 2016 14:30:01 +0800
Subject: fsl/qe: Add QE TDM lib

QE has module to support TDM, some other protocols
supported by QE are based on TDM.
add a qe-tdm lib, this lib provides functions to the protocols
using TDM to configurate QE-TDM.

Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/fsl/qe/immap_qe.h |  5 +--
 include/soc/fsl/qe/qe_tdm.h   | 94 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 4 deletions(-)
 create mode 100644 include/soc/fsl/qe/qe_tdm.h

(limited to 'include')

diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h
index bedbff891423..c76ef30b05ba 100644
--- a/include/soc/fsl/qe/immap_qe.h
+++ b/include/soc/fsl/qe/immap_qe.h
@@ -159,10 +159,7 @@ struct spi {
 
 /* SI */
 struct si1 {
-	__be16	siamr1;		/* SI1 TDMA mode register */
-	__be16	sibmr1;		/* SI1 TDMB mode register */
-	__be16	sicmr1;		/* SI1 TDMC mode register */
-	__be16	sidmr1;		/* SI1 TDMD mode register */
+	__be16	sixmr1[4];	/* SI1 TDMx (x = A B C D) mode register */
 	u8	siglmr1_h;	/* SI1 global mode register high */
 	u8	res0[0x1];
 	u8	sicmdr1_h;	/* SI1 command register high */
diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h
new file mode 100644
index 000000000000..4c91498ab117
--- /dev/null
+++ b/include/soc/fsl/qe/qe_tdm.h
@@ -0,0 +1,94 @@
+/*
+ * Internal header file for QE TDM mode routines.
+ *
+ * Copyright (C) 2016 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Authors:	Zhao Qiang <qiang.zhao@nxp.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version
+ */
+
+#ifndef CONFIG_QE_TDM_H
+#define CONFIG_QE_TDM_H
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+#include <soc/fsl/qe/immap_qe.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <soc/fsl/qe/ucc.h>
+#include <soc/fsl/qe/ucc_fast.h>
+
+/* SI RAM entries */
+#define SIR_LAST	0x0001
+#define SIR_BYTE	0x0002
+#define SIR_CNT(x)	((x) << 2)
+#define SIR_CSEL(x)	((x) << 5)
+#define SIR_SGS		0x0200
+#define SIR_SWTR	0x4000
+#define SIR_MCC		0x8000
+#define SIR_IDLE	0
+
+/* SIxMR fields */
+#define SIMR_SAD(x) ((x) << 12)
+#define SIMR_SDM_NORMAL	0x0000
+#define SIMR_SDM_INTERNAL_LOOPBACK	0x0800
+#define SIMR_SDM_MASK	0x0c00
+#define SIMR_CRT	0x0040
+#define SIMR_SL		0x0020
+#define SIMR_CE		0x0010
+#define SIMR_FE		0x0008
+#define SIMR_GM		0x0004
+#define SIMR_TFSD(n)	(n)
+#define SIMR_RFSD(n)	((n) << 8)
+
+enum tdm_ts_t {
+	TDM_TX_TS,
+	TDM_RX_TS
+};
+
+enum tdm_framer_t {
+	TDM_FRAMER_T1,
+	TDM_FRAMER_E1
+};
+
+enum tdm_mode_t {
+	TDM_INTERNAL_LOOPBACK,
+	TDM_NORMAL
+};
+
+struct si_mode_info {
+	u8 simr_rfsd;
+	u8 simr_tfsd;
+	u8 simr_crt;
+	u8 simr_sl;
+	u8 simr_ce;
+	u8 simr_fe;
+	u8 simr_gm;
+};
+
+struct ucc_tdm_info {
+	struct ucc_fast_info uf_info;
+	struct si_mode_info si_info;
+};
+
+struct ucc_tdm {
+	u16 tdm_port;		/* port for this tdm:TDMA,TDMB */
+	u32 siram_entry_id;
+	u16 __iomem *siram;
+	struct si1 __iomem *si_regs;
+	enum tdm_framer_t tdm_framer_type;
+	enum tdm_mode_t tdm_mode;
+	u8 num_of_ts;		/* the number of timeslots in this tdm frame */
+	u32 tx_ts_mask;		/* tx time slot mask */
+	u32 rx_ts_mask;		/* rx time slot mask */
+};
+
+int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
+		     struct ucc_tdm_info *ut_info);
+void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info);
+#endif
-- 
cgit 


From c19b6d246a35627c3a69b2fa6bdece212b48214b Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Mon, 6 Jun 2016 14:30:02 +0800
Subject: drivers/net: support hdlc function for QE-UCC

The driver add hdlc support for Freescale QUICC Engine.
It support NMSI and TSA mode.

Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/fsl/qe/qe.h       |  1 +
 include/soc/fsl/qe/ucc_fast.h | 22 ++++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index c3b1dc8a21a7..70339d7958c0 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -657,6 +657,7 @@ struct ucc_slow_pram {
 #define UCC_SLOW_GUMR_L_MODE_QMC	0x00000002
 
 /* General UCC FAST Mode Register */
+#define UCC_FAST_GUMR_LOOPBACK	0x40000000
 #define UCC_FAST_GUMR_TCI	0x20000000
 #define UCC_FAST_GUMR_TRX	0x10000000
 #define UCC_FAST_GUMR_TTX	0x08000000
diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h
index e898895223f9..3ee9e7c1a7d7 100644
--- a/include/soc/fsl/qe/ucc_fast.h
+++ b/include/soc/fsl/qe/ucc_fast.h
@@ -21,19 +21,37 @@
 
 #include <soc/fsl/qe/ucc.h>
 
-/* Receive BD's status */
+/* Receive BD's status and length*/
 #define R_E	0x80000000	/* buffer empty */
 #define R_W	0x20000000	/* wrap bit */
 #define R_I	0x10000000	/* interrupt on reception */
 #define R_L	0x08000000	/* last */
 #define R_F	0x04000000	/* first */
 
-/* transmit BD's status */
+/* transmit BD's status and length*/
 #define T_R	0x80000000	/* ready bit */
 #define T_W	0x20000000	/* wrap bit */
 #define T_I	0x10000000	/* interrupt on completion */
 #define T_L	0x08000000	/* last */
 
+/* Receive BD's status */
+#define R_E_S	0x8000	/* buffer empty */
+#define R_W_S	0x2000	/* wrap bit */
+#define R_I_S	0x1000	/* interrupt on reception */
+#define R_L_S	0x0800	/* last */
+#define R_F_S	0x0400	/* first */
+#define R_CM_S	0x0200	/* continuous mode */
+#define R_CR_S	0x0004	/* crc */
+#define R_OV_S	0x0002	/* crc */
+
+/* transmit BD's status */
+#define T_R_S	0x8000	/* ready bit */
+#define T_W_S	0x2000	/* wrap bit */
+#define T_I_S	0x1000	/* interrupt on completion */
+#define T_L_S	0x0800	/* last */
+#define T_TC_S	0x0400	/* crc */
+#define T_TM_S	0x0200	/* continuous mode */
+
 /* Rx Data buffer must be 4 bytes aligned in most cases */
 #define UCC_FAST_RX_ALIGN			4
 #define UCC_FAST_MRBLR_ALIGNMENT		4
-- 
cgit 


From f9eb8aea2a1e12fc2f584d1627deeb957435a801 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jun 2016 09:37:15 -0700
Subject: net_sched: transform qdisc running bit into a seqcount

Instead of using a single bit (__QDISC___STATE_RUNNING)
in sch->__state, use a seqcount.

This adds lockdep support, but more importantly it will allow us
to sample qdisc/class statistics without having to grab qdisc root lock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/sch_generic.h | 15 ++++-----------
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fa6df2699532..59d7e06d88d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1862,6 +1862,7 @@ struct net_device {
 #endif
 	struct phy_device	*phydev;
 	struct lock_class_key	*qdisc_tx_busylock;
+	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a1fd76c22a59..bff8d895ef8a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -29,13 +29,6 @@ enum qdisc_state_t {
 	__QDISC_STATE_THROTTLED,
 };
 
-/*
- * following bits are only changed while qdisc lock is held
- */
-enum qdisc___state_t {
-	__QDISC___STATE_RUNNING = 1,
-};
-
 struct qdisc_size_table {
 	struct rcu_head		rcu;
 	struct list_head	list;
@@ -93,7 +86,7 @@ struct Qdisc {
 	unsigned long		state;
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
-	unsigned int		__state;
+	seqcount_t		running;
 	struct gnet_stats_queue	qstats;
 	struct rcu_head		rcu_head;
 	int			padded;
@@ -104,20 +97,20 @@ struct Qdisc {
 
 static inline bool qdisc_is_running(const struct Qdisc *qdisc)
 {
-	return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false;
+	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
 }
 
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc_is_running(qdisc))
 		return false;
-	qdisc->__state |= __QDISC___STATE_RUNNING;
+	write_seqcount_begin(&qdisc->running);
 	return true;
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
-	qdisc->__state &= ~__QDISC___STATE_RUNNING;
+	write_seqcount_end(&qdisc->running);
 }
 
 static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
-- 
cgit 


From edb09eb17ed89eaa82a52dd306beac93e292b485 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jun 2016 09:37:16 -0700
Subject: net: sched: do not acquire qdisc spinlock in qdisc/class stats dump

Large tc dumps (tc -s {qdisc|class} sh dev ethX) done by Google BwE host
agent [1] are problematic at scale :

For each qdisc/class found in the dump, we currently lock the root qdisc
spinlock in order to get stats. Sampling stats every 5 seconds from
thousands of HTB classes is a challenge when the root qdisc spinlock is
under high pressure. Not only the dumps take time, they also slow
down the fast path (queue/dequeue packets) by 10 % to 20 % in some cases.

An audit of existing qdiscs showed that sch_fq_codel is the only qdisc
that might need the qdisc lock in fq_codel_dump_stats() and
fq_codel_dump_class_stats()

In v2 of this patch, I now use the Qdisc running seqcount to provide
consistent reads of packets/bytes counters, regardless of 32/64 bit arches.

I also changed rate estimators to use the same infrastructure
so that they no longer need to lock root qdisc lock.

[1]
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43838.pdf

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kevin Athey <kda@google.com>
Cc: Xiaotian Pei <xiaotian@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h   | 12 ++++++++----
 include/net/sch_generic.h |  8 ++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 610cd397890e..231e121cc7d9 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -33,10 +33,12 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 				 spinlock_t *lock, struct gnet_dump *d,
 				 int padattr);
 
-int gnet_stats_copy_basic(struct gnet_dump *d,
+int gnet_stats_copy_basic(const seqcount_t *running,
+			  struct gnet_dump *d,
 			  struct gnet_stats_basic_cpu __percpu *cpu,
 			  struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+void __gnet_stats_copy_basic(const seqcount_t *running,
+			     struct gnet_stats_basic_packed *bstats,
 			     struct gnet_stats_basic_cpu __percpu *cpu,
 			     struct gnet_stats_basic_packed *b);
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
@@ -52,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct gnet_stats_rate_est64 *rate_est,
-		      spinlock_t *stats_lock, struct nlattr *opt);
+		      spinlock_t *stats_lock,
+		      seqcount_t *running, struct nlattr *opt);
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est64 *rate_est);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct gnet_stats_rate_est64 *rate_est,
-			  spinlock_t *stats_lock, struct nlattr *opt);
+			  spinlock_t *stats_lock,
+			  seqcount_t *running, struct nlattr *opt);
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 			  const struct gnet_stats_rate_est64 *rate_est);
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bff8d895ef8a..c4f5749342ec 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -314,6 +314,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
 	return qdisc_lock(root);
 }
 
+static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+{
+	struct Qdisc *root = qdisc_root_sleeping(qdisc);
+
+	ASSERT_RTNL();
+	return &root->running;
+}
+
 static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->dev;
-- 
cgit 


From 6f23d96cfa4fb68c4c9683f161f831057a5a134f Mon Sep 17 00:00:00 2001
From: Andreas Ziegler <andreas.ziegler@fau.de>
Date: Wed, 8 Jun 2016 11:36:56 +0200
Subject: fsl/qe: Do not prefix header guard with CONFIG_

The CONFIG_ prefix should only be used for options which
can be configured through Kconfig and not for guarding headers.

Signed-off-by: Andreas Ziegler <andreas.ziegler@fau.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/fsl/qe/qe_tdm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h
index 4c91498ab117..a1664b635f1a 100644
--- a/include/soc/fsl/qe/qe_tdm.h
+++ b/include/soc/fsl/qe/qe_tdm.h
@@ -11,8 +11,8 @@
  * option) any later version
  */
 
-#ifndef CONFIG_QE_TDM_H
-#define CONFIG_QE_TDM_H
+#ifndef _QE_TDM_H_
+#define _QE_TDM_H_
 
 #include <linux/kernel.h>
 #include <linux/list.h>
-- 
cgit 


From 6ad8c632ee48ae099aa13704ef18a641220fe211 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Wed, 8 Jun 2016 06:22:10 -0400
Subject: qed: Add support for query/config dcbx.

Query API reads the dcbx data from the device shared memory and return it
to the caller. The config API configures the user provided dcbx values on
the device, and initiates the dcbx negotiation with the peer.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 90 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index e8cc49f9688a..e1d5122e8a96 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -34,6 +34,96 @@ enum dcbx_protocol_type {
 	DCBX_MAX_PROTOCOL_TYPE
 };
 
+#ifdef CONFIG_DCB
+#define QED_LLDP_CHASSIS_ID_STAT_LEN 4
+#define QED_LLDP_PORT_ID_STAT_LEN 4
+#define QED_DCBX_MAX_APP_PROTOCOL 32
+#define QED_MAX_PFC_PRIORITIES 8
+#define QED_DCBX_DSCP_SIZE 64
+
+struct qed_dcbx_lldp_remote {
+	u32 peer_chassis_id[QED_LLDP_CHASSIS_ID_STAT_LEN];
+	u32 peer_port_id[QED_LLDP_PORT_ID_STAT_LEN];
+	bool enable_rx;
+	bool enable_tx;
+	u32 tx_interval;
+	u32 max_credit;
+};
+
+struct qed_dcbx_lldp_local {
+	u32 local_chassis_id[QED_LLDP_CHASSIS_ID_STAT_LEN];
+	u32 local_port_id[QED_LLDP_PORT_ID_STAT_LEN];
+};
+
+struct qed_dcbx_app_prio {
+	u8 roce;
+	u8 roce_v2;
+	u8 fcoe;
+	u8 iscsi;
+	u8 eth;
+};
+
+struct qed_dbcx_pfc_params {
+	bool willing;
+	bool enabled;
+	u8 prio[QED_MAX_PFC_PRIORITIES];
+	u8 max_tc;
+};
+
+struct qed_app_entry {
+	bool ethtype;
+	bool enabled;
+	u8 prio;
+	u16 proto_id;
+	enum dcbx_protocol_type proto_type;
+};
+
+struct qed_dcbx_params {
+	struct qed_app_entry app_entry[QED_DCBX_MAX_APP_PROTOCOL];
+	u16 num_app_entries;
+	bool app_willing;
+	bool app_valid;
+	bool app_error;
+	bool ets_willing;
+	bool ets_enabled;
+	bool ets_cbs;
+	bool valid;
+	u8 ets_pri_tc_tbl[QED_MAX_PFC_PRIORITIES];
+	u8 ets_tc_bw_tbl[QED_MAX_PFC_PRIORITIES];
+	u8 ets_tc_tsa_tbl[QED_MAX_PFC_PRIORITIES];
+	struct qed_dbcx_pfc_params pfc;
+	u8 max_ets_tc;
+};
+
+struct qed_dcbx_admin_params {
+	struct qed_dcbx_params params;
+	bool valid;
+};
+
+struct qed_dcbx_remote_params {
+	struct qed_dcbx_params params;
+	bool valid;
+};
+
+struct qed_dcbx_operational_params {
+	struct qed_dcbx_app_prio app_prio;
+	struct qed_dcbx_params params;
+	bool valid;
+	bool enabled;
+	bool ieee;
+	bool cee;
+	u32 err;
+};
+
+struct qed_dcbx_get {
+	struct qed_dcbx_operational_params operational;
+	struct qed_dcbx_lldp_remote lldp_remote;
+	struct qed_dcbx_lldp_local lldp_local;
+	struct qed_dcbx_remote_params remote;
+	struct qed_dcbx_admin_params local;
+};
+#endif
+
 enum qed_led_mode {
 	QED_LED_MODE_OFF,
 	QED_LED_MODE_ON,
-- 
cgit 


From a1d8d8a51e8317269dd127d94b9de14f67d9563f Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Wed, 8 Jun 2016 06:22:11 -0400
Subject: qed: Add dcbnl support.

This patch adds the implementation for both cee/ieee dcbnl callbacks by
using the qed query/config APIs.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_eth_if.h | 62 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index f8ff71126d9e..71d523b4bc54 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -128,11 +128,73 @@ struct qed_eth_cb_ops {
 	void (*force_mac) (void *dev, u8 *mac);
 };
 
+#ifdef CONFIG_DCB
+/* Prototype declaration of qed_eth_dcbnl_ops should match with the declaration
+ * of dcbnl_rtnl_ops structure.
+ */
+struct qed_eth_dcbnl_ops {
+	/* IEEE 802.1Qaz std */
+	int (*ieee_getpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc);
+	int (*ieee_setpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc);
+	int (*ieee_getets)(struct qed_dev *cdev, struct ieee_ets *ets);
+	int (*ieee_setets)(struct qed_dev *cdev, struct ieee_ets *ets);
+	int (*ieee_peer_getets)(struct qed_dev *cdev, struct ieee_ets *ets);
+	int (*ieee_peer_getpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc);
+	int (*ieee_getapp)(struct qed_dev *cdev, struct dcb_app *app);
+	int (*ieee_setapp)(struct qed_dev *cdev, struct dcb_app *app);
+
+	/* CEE std */
+	u8 (*getstate)(struct qed_dev *cdev);
+	u8 (*setstate)(struct qed_dev *cdev, u8 state);
+	void (*getpgtccfgtx)(struct qed_dev *cdev, int prio, u8 *prio_type,
+			     u8 *pgid, u8 *bw_pct, u8 *up_map);
+	void (*getpgbwgcfgtx)(struct qed_dev *cdev, int pgid, u8 *bw_pct);
+	void (*getpgtccfgrx)(struct qed_dev *cdev, int prio, u8 *prio_type,
+			     u8 *pgid, u8 *bw_pct, u8 *up_map);
+	void (*getpgbwgcfgrx)(struct qed_dev *cdev, int pgid, u8 *bw_pct);
+	void (*getpfccfg)(struct qed_dev *cdev, int prio, u8 *setting);
+	void (*setpfccfg)(struct qed_dev *cdev, int prio, u8 setting);
+	u8 (*getcap)(struct qed_dev *cdev, int capid, u8 *cap);
+	int (*getnumtcs)(struct qed_dev *cdev, int tcid, u8 *num);
+	u8 (*getpfcstate)(struct qed_dev *cdev);
+	int (*getapp)(struct qed_dev *cdev, u8 idtype, u16 id);
+	u8 (*getfeatcfg)(struct qed_dev *cdev, int featid, u8 *flags);
+
+	/* DCBX configuration */
+	u8 (*getdcbx)(struct qed_dev *cdev);
+	void (*setpgtccfgtx)(struct qed_dev *cdev, int prio,
+			     u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map);
+	void (*setpgtccfgrx)(struct qed_dev *cdev, int prio,
+			     u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map);
+	void (*setpgbwgcfgtx)(struct qed_dev *cdev, int pgid, u8 bw_pct);
+	void (*setpgbwgcfgrx)(struct qed_dev *cdev, int pgid, u8 bw_pct);
+	u8 (*setall)(struct qed_dev *cdev);
+	int (*setnumtcs)(struct qed_dev *cdev, int tcid, u8 num);
+	void (*setpfcstate)(struct qed_dev *cdev, u8 state);
+	int (*setapp)(struct qed_dev *cdev, u8 idtype, u16 idval, u8 up);
+	u8 (*setdcbx)(struct qed_dev *cdev, u8 state);
+	u8 (*setfeatcfg)(struct qed_dev *cdev, int featid, u8 flags);
+
+	/* Peer apps */
+	int (*peer_getappinfo)(struct qed_dev *cdev,
+			       struct dcb_peer_app_info *info,
+			       u16 *app_count);
+	int (*peer_getapptable)(struct qed_dev *cdev, struct dcb_app *table);
+
+	/* CEE peer */
+	int (*cee_peer_getpfc)(struct qed_dev *cdev, struct cee_pfc *pfc);
+	int (*cee_peer_getpg)(struct qed_dev *cdev, struct cee_pg *pg);
+};
+#endif
+
 struct qed_eth_ops {
 	const struct qed_common_ops *common;
 #ifdef CONFIG_QED_SRIOV
 	const struct qed_iov_hv_ops *iov;
 #endif
+#ifdef CONFIG_DCB
+	const struct qed_eth_dcbnl_ops *dcb;
+#endif
 
 	int (*fill_dev_info)(struct qed_dev *cdev,
 			     struct qed_dev_eth_info *info);
-- 
cgit 


From 123b36526592f009bf8eccb7c8833aeda296d9cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Jun 2016 07:22:49 -0700
Subject: net: sched: fix missing doc annotations

"make htmldocs" complains otherwise:

.//net/core/gen_stats.c:168: warning: No description found for parameter 'running'
.//include/linux/netdevice.h:1867: warning: No description found for parameter 'qdisc_running_key'

Fixes: f9eb8aea2a1e ("net_sched: transform qdisc running bit into a seqcount")
Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59d7e06d88d5..541562333ba5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1594,7 +1594,8 @@ enum netdev_priv_flags {
  *	@phydev:	Physical device may attach itself
  *			for hardware timestamping
  *
- *	@qdisc_tx_busylock:	XXX: need comments on this one
+ *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
+ *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
  *
  *	@proto_down:	protocol port state information can be sent to the
  *			switch driver and used to set the phys state of the
-- 
cgit 


From 0c73c523cf737b5d446705392e0e14ee0411a351 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 7 Jun 2016 16:32:42 -0700
Subject: net: dsa: Initialize CPU port ethtool ops per tree

Now that we can properly support multiple distinct trees in the system,
using a global variable: dsa_cpu_port_ethtool_ops is getting clobbered
as soon as the second switch tree gets probed, and we don't want that.

We need to move this to be dynamically allocated, and since we can't
really be comparing addresses anymore to determine first time
initialization versus any other times, just move this to dsa.c and
dsa2.c where the remainder of the dst/ds initialization happens.

The operations teardown restores the master netdev's ethtool_ops to its
original ethtool_ops pointer (typically within the Ethernet driver)

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index cca7ef230742..20b3087ad193 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -116,6 +116,7 @@ struct dsa_switch_tree {
 	 * Original copy of the master netdev ethtool_ops
 	 */
 	struct ethtool_ops	master_ethtool_ops;
+	const struct ethtool_ops *master_orig_ethtool_ops;
 
 	/*
 	 * The switch and port to which the CPU is attached.
-- 
cgit 


From 96c63fa7393d0a346acfe5a91e0c7d4c7782641b Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 8 Jun 2016 10:55:39 -0700
Subject: net: Add l3mdev rule

Currently, VRFs require 1 oif and 1 iif rule per address family per
VRF. As the number of VRF devices increases it brings scalability
issues with the increasing rule list. All of the VRF rules have the
same format with the exception of the specific table id to direct the
lookup. Since the table id is available from the oif or iif in the
loopup, the VRF rules can be consolidated to a single rule that pulls
the table from the VRF device.

This patch introduces a new rule attribute l3mdev. The l3mdev rule
means the table id used for the lookup is pulled from the L3 master
device (e.g., VRF) rather than being statically defined. With the
l3mdev rule all of the basic VRF FIB rules are reduced to 1 l3mdev
rule per address family (IPv4 and IPv6).

If an admin wishes to insert higher priority rules for specific VRFs
those rules will co-exist with the l3mdev rule. This capability means
current VRF scripts will co-exist with this new simpler implementation.

Currently, the rules list for both ipv4 and ipv6 look like this:
    $ ip  ru ls
    1000:       from all oif vrf1 lookup 1001
    1000:       from all iif vrf1 lookup 1001
    1000:       from all oif vrf2 lookup 1002
    1000:       from all iif vrf2 lookup 1002
    1000:       from all oif vrf3 lookup 1003
    1000:       from all iif vrf3 lookup 1003
    1000:       from all oif vrf4 lookup 1004
    1000:       from all iif vrf4 lookup 1004
    1000:       from all oif vrf5 lookup 1005
    1000:       from all iif vrf5 lookup 1005
    1000:       from all oif vrf6 lookup 1006
    1000:       from all iif vrf6 lookup 1006
    1000:       from all oif vrf7 lookup 1007
    1000:       from all iif vrf7 lookup 1007
    1000:       from all oif vrf8 lookup 1008
    1000:       from all iif vrf8 lookup 1008
    ...
    32765:      from all lookup local
    32766:      from all lookup main
    32767:      from all lookup default

With the l3mdev rule the list is just the following regardless of the
number of VRFs:
    $ ip ru ls
    1000:       from all lookup [l3mdev table]
    32765:      from all lookup local
    32766:      from all lookup main
    32767:      from all lookup default

(Note: the above pretty print of the rule is based on an iproute2
       prototype. Actual verbage may change)

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h        | 24 ++++++++++++++++++++++--
 include/net/l3mdev.h           | 12 ++++++++++++
 include/uapi/linux/fib_rules.h |  1 +
 3 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 59160de702b6..456e4a6006ab 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -17,7 +17,8 @@ struct fib_rule {
 	u32			flags;
 	u32			table;
 	u8			action;
-	/* 3 bytes hole, try to use */
+	u8			l3mdev;
+	/* 2 bytes hole, try to use */
 	u32			target;
 	__be64			tun_id;
 	struct fib_rule __rcu	*ctarget;
@@ -36,6 +37,7 @@ struct fib_lookup_arg {
 	void			*lookup_ptr;
 	void			*result;
 	struct fib_rule		*rule;
+	u32			table;
 	int			flags;
 #define FIB_LOOKUP_NOREF		1
 #define FIB_LOOKUP_IGNORE_LINKSTATE	2
@@ -89,7 +91,8 @@ struct fib_rules_ops {
 	[FRA_TABLE]     = { .type = NLA_U32 }, \
 	[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
 	[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
-	[FRA_GOTO]	= { .type = NLA_U32 }
+	[FRA_GOTO]	= { .type = NLA_U32 }, \
+	[FRA_L3MDEV]	= { .type = NLA_U8 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
@@ -102,6 +105,20 @@ static inline void fib_rule_put(struct fib_rule *rule)
 		kfree_rcu(rule, rcu);
 }
 
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static inline u32 fib_rule_get_table(struct fib_rule *rule,
+				     struct fib_lookup_arg *arg)
+{
+	return rule->l3mdev ? arg->table : rule->table;
+}
+#else
+static inline u32 fib_rule_get_table(struct fib_rule *rule,
+				     struct fib_lookup_arg *arg)
+{
+	return rule->table;
+}
+#endif
+
 static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
 {
 	if (nla[FRA_TABLE])
@@ -117,4 +134,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 		     struct fib_lookup_arg *);
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 			 u32 flags);
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh);
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh);
 #endif
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 374388dc01c8..34f33eb96a5e 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -11,6 +11,8 @@
 #ifndef _NET_L3MDEV_H_
 #define _NET_L3MDEV_H_
 
+#include <net/fib_rules.h>
+
 /**
  * struct l3mdev_ops - l3mdev operations
  *
@@ -41,6 +43,9 @@ struct l3mdev_ops {
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
 
+int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+			  struct fib_lookup_arg *arg);
+
 int l3mdev_master_ifindex_rcu(const struct net_device *dev);
 static inline int l3mdev_master_ifindex(struct net_device *dev)
 {
@@ -236,6 +241,13 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
 {
 	return skb;
 }
+
+static inline
+int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+			  struct fib_lookup_arg *arg)
+{
+	return 1;
+}
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 620c8a5ddc00..14404b3ebb89 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -50,6 +50,7 @@ enum {
 	FRA_FWMASK,	/* mask for netfilter mark */
 	FRA_OIFNAME,
 	FRA_PAD,
+	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	__FRA_MAX
 };
 
-- 
cgit 


From dd47c1fa776cda48531b651c88341e951140b0a7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Jun 2016 00:27:40 +0200
Subject: cbq: remove TCA_CBQ_POLICE support

iproute2 doesn't implement any cbq option that results in this attribute
being sent to kernel.

To make use of it, user would have to

- patch iproute2
- add a class
- attach a qdisc to the class (default pfifo doesn't work as
  q->handle is 0 and cbq_set_police() is a no-op in this case)
- re-'add' the same class (tc class change ...) again
- user must also specifiy a defmap (e.g. 'split 1:0 defmap 3f'), since
  this 'police' feature relies on its presence
- the added qdisc must be one of bfifo, pfifo or netem

If all of these conditions are met and _some_ leaf qdiscs, namely
p/bfifo, netem, plug or tbf would drop a packet, kernel calls back into
cbq, which will attempt to re-queue the skb into a different class
as indicated by the parents' defmap entry for TC_PRIO_BESTEFFORT.

[ i.e. we behave as if tc_classify returned TC_ACT_RECLASSIFY ].

This feature, which isn't documented or implemented in iproute2,
and isn't implemented consistently (most qdiscs like sfq, codel, etc
drop right away instead of attempting this reclassification) is the
sole reason for the reshape_fail and __parent member in Qdisc struct.

So remove TCA_CBQ_POLICE support from the kernel, reject it via EOPNOTSUPP
so userspace knows we don't support it, and then remove no-longer needed
infrastructure in followup commit.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c4f5749342ec..c069ac1dd75d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -68,10 +68,6 @@ struct Qdisc {
 
 	void			*u32_node;
 
-	/* This field is deprecated, but it is still used by CBQ
-	 * and it will live until better solution will be invented.
-	 */
-	struct Qdisc		*__parent;
 	struct netdev_queue	*dev_queue;
 
 	struct gnet_stats_rate_est64	rate_est;
-- 
cgit 


From c3a173d7dba2d7c74dd4ab871b8f22bf56ac10b2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Jun 2016 00:27:41 +0200
Subject: sched: remove qdisc_rehape_fail

After the removal of TCA_CBQ_POLICE in cbq scheduler qdisc->reshape_fail
is always NULL, i.e. qdisc_rehape_fail is now the same as qdisc_drop.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c069ac1dd75d..a9aec633d467 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -63,9 +63,6 @@ struct Qdisc {
 	struct list_head	list;
 	u32			handle;
 	u32			parent;
-	int			(*reshape_fail)(struct sk_buff *skb,
-					struct Qdisc *q);
-
 	void			*u32_node;
 
 	struct netdev_queue	*dev_queue;
@@ -771,22 +768,6 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_DROP;
 }
 
-static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch)
-{
-	qdisc_qstats_drop(sch);
-
-#ifdef CONFIG_NET_CLS_ACT
-	if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
-		goto drop;
-
-	return NET_XMIT_SUCCESS;
-
-drop:
-#endif
-	kfree_skb(skb);
-	return NET_XMIT_DROP;
-}
-
 /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how
    long it will take to send a packet given its size.
  */
-- 
cgit 


From a09ceb0e08140a1eec05b49b4c232d3481339cb0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Jun 2016 00:27:42 +0200
Subject: sched: remove qdisc->drop

after removal of TCA_CBQ_OVL_STRATEGY from cbq scheduler, there are no
more callers of ->drop() outside of other ->drop functions, i.e.
nothing calls them.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 36 ------------------------------------
 1 file changed, 36 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a9aec633d467..4dedb7f12ed5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -174,7 +174,6 @@ struct Qdisc_ops {
 	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
 	struct sk_buff *	(*dequeue)(struct Qdisc *);
 	struct sk_buff *	(*peek)(struct Qdisc *);
-	unsigned int		(*drop)(struct Qdisc *);
 
 	int			(*init)(struct Qdisc *, struct nlattr *arg);
 	void			(*reset)(struct Qdisc *);
@@ -658,22 +657,6 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch)
 	return __qdisc_queue_drop_head(sch, &sch->q);
 }
 
-static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch,
-						   struct sk_buff_head *list)
-{
-	struct sk_buff *skb = __skb_dequeue_tail(list);
-
-	if (likely(skb != NULL))
-		qdisc_qstats_backlog_dec(sch, skb);
-
-	return skb;
-}
-
-static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch)
-{
-	return __qdisc_dequeue_tail(sch, &sch->q);
-}
-
 static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
 {
 	return skb_peek(&sch->q);
@@ -741,25 +724,6 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
 	return old;
 }
 
-static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch,
-					      struct sk_buff_head *list)
-{
-	struct sk_buff *skb = __qdisc_dequeue_tail(sch, list);
-
-	if (likely(skb != NULL)) {
-		unsigned int len = qdisc_pkt_len(skb);
-		kfree_skb(skb);
-		return len;
-	}
-
-	return 0;
-}
-
-static inline unsigned int qdisc_queue_drop(struct Qdisc *sch)
-{
-	return __qdisc_queue_drop(sch, &sch->q);
-}
-
 static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
 {
 	kfree_skb(skb);
-- 
cgit 


From c8945043cdc687388b7a43fc6f474bddd9607e80 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Jun 2016 00:27:43 +0200
Subject: sched: place state, next_sched and gso_skb in same cacheline again

Earlier commits removed two members from struct Qdisc which places
next_sched/gso_skb into a different cacheline than ->state.

This restores the struct layout to what it was before the removal.
Move the two members, then add an annotation so they all reside in the
same cacheline.

This adds a 16 byte hole after cpu_qstats.

The hole could be closed but as it doesn't decrease total struct size just
do it this way.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4dedb7f12ed5..49534e28824b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -71,11 +71,11 @@ struct Qdisc {
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue	__percpu *cpu_qstats;
 
-	struct Qdisc		*next_sched;
-	struct sk_buff		*gso_skb;
 	/*
 	 * For performance sake on SMP, we put highly modified fields at the end
 	 */
+	struct Qdisc		*next_sched ____cacheline_aligned_in_smp;
+	struct sk_buff		*gso_skb;
 	unsigned long		state;
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
-- 
cgit 


From 1dad640b9ef320e8de92c418bcc08448d67590a4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 9 Jun 2016 10:14:39 +0200
Subject: wext: reformat struct/union declarations

Everytime I need to look for these, my usual strategy fails
because it assumes the right formatting. Fix the formatting
here to make it consistent with the rest of the kernel.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/wireless.h | 63 +++++++++++++++----------------------------
 1 file changed, 22 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h
index c1592e3e4036..d9ecd7c6d691 100644
--- a/include/uapi/linux/wireless.h
+++ b/include/uapi/linux/wireless.h
@@ -670,8 +670,7 @@
 /*
  *	Generic format for most parameters that fit in an int
  */
-struct	iw_param
-{
+struct iw_param {
   __s32		value;		/* The value of the parameter itself */
   __u8		fixed;		/* Hardware should not use auto select */
   __u8		disabled;	/* Disable the feature */
@@ -682,8 +681,7 @@ struct	iw_param
  *	For all data larger than 16 octets, we need to use a
  *	pointer to memory allocated in user space.
  */
-struct	iw_point
-{
+struct iw_point {
   void __user	*pointer;	/* Pointer to the data  (in user space) */
   __u16		length;		/* number of fields or size in bytes */
   __u16		flags;		/* Optional params */
@@ -698,8 +696,7 @@ struct	iw_point
  *	of 10 to get 'm' lower than 10^9, with 'm'= f / (10^'e')...
  *	The power of 10 is in 'e', the result of the division is in 'm'.
  */
-struct	iw_freq
-{
+struct iw_freq {
 	__s32		m;		/* Mantissa */
 	__s16		e;		/* Exponent */
 	__u8		i;		/* List index (when in range struct) */
@@ -709,8 +706,7 @@ struct	iw_freq
 /*
  *	Quality of the link
  */
-struct	iw_quality
-{
+struct iw_quality {
 	__u8		qual;		/* link quality (%retries, SNR,
 					   %missed beacons or better...) */
 	__u8		level;		/* signal level (dBm) */
@@ -725,8 +721,7 @@ struct	iw_quality
  *	is already pretty exhaustive, and you should use that first.
  *	This is only additional stats...
  */
-struct	iw_discarded
-{
+struct iw_discarded {
 	__u32		nwid;		/* Rx : Wrong nwid/essid */
 	__u32		code;		/* Rx : Unable to code/decode (WEP) */
 	__u32		fragment;	/* Rx : Can't perform MAC reassembly */
@@ -738,16 +733,14 @@ struct	iw_discarded
  *	Packet/Time period missed in the wireless adapter due to
  *	"wireless" specific problems...
  */
-struct	iw_missed
-{
+struct iw_missed {
 	__u32		beacon;		/* Missed beacons/superframe */
 };
 
 /*
  *	Quality range (for spy threshold)
  */
-struct	iw_thrspy
-{
+struct iw_thrspy {
 	struct sockaddr		addr;		/* Source address (hw/mac) */
 	struct iw_quality	qual;		/* Quality of the link */
 	struct iw_quality	low;		/* Low threshold */
@@ -765,8 +758,7 @@ struct	iw_thrspy
  *	Especially, scan results are required to include an entry for the
  *	current BSS if the driver is in Managed mode and associated with an AP.
  */
-struct	iw_scan_req
-{
+struct iw_scan_req {
 	__u8		scan_type; /* IW_SCAN_TYPE_{ACTIVE,PASSIVE} */
 	__u8		essid_len;
 	__u8		num_channels; /* num entries in channel_list;
@@ -827,8 +819,7 @@ struct	iw_scan_req
  *	RX_SEQ_VALID for SIOCGIWENCODEEXT are optional, but can be useful for
  *	debugging/testing.
  */
-struct	iw_encode_ext
-{
+struct iw_encode_ext {
 	__u32		ext_flags; /* IW_ENCODE_EXT_* */
 	__u8		tx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */
 	__u8		rx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */
@@ -841,8 +832,7 @@ struct	iw_encode_ext
 };
 
 /* SIOCSIWMLME data */
-struct	iw_mlme
-{
+struct iw_mlme {
 	__u16		cmd; /* IW_MLME_* */
 	__u16		reason_code;
 	struct sockaddr	addr;
@@ -855,16 +845,14 @@ struct	iw_mlme
 
 #define IW_PMKID_LEN	16
 
-struct	iw_pmksa
-{
+struct iw_pmksa {
 	__u32		cmd; /* IW_PMKSA_* */
 	struct sockaddr	bssid;
 	__u8		pmkid[IW_PMKID_LEN];
 };
 
 /* IWEVMICHAELMICFAILURE data */
-struct	iw_michaelmicfailure
-{
+struct iw_michaelmicfailure {
 	__u32		flags;
 	struct sockaddr	src_addr;
 	__u8		tsc[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */
@@ -872,8 +860,7 @@ struct	iw_michaelmicfailure
 
 /* IWEVPMKIDCAND data */
 #define IW_PMKID_CAND_PREAUTH	0x00000001 /* RNS pre-authentication enabled */
-struct	iw_pmkid_cand
-{
+struct iw_pmkid_cand {
 	__u32		flags; /* IW_PMKID_CAND_* */
 	__u32		index; /* the smaller the index, the higher the
 				* priority */
@@ -884,8 +871,7 @@ struct	iw_pmkid_cand
 /*
  * Wireless statistics (used for /proc/net/wireless)
  */
-struct	iw_statistics
-{
+struct iw_statistics {
 	__u16		status;		/* Status
 					 * - device dependent for now */
 
@@ -897,7 +883,7 @@ struct	iw_statistics
 
 /* ------------------------ IOCTL REQUEST ------------------------ */
 /*
- * This structure defines the payload of an ioctl, and is used 
+ * This structure defines the payload of an ioctl, and is used
  * below.
  *
  * Note that this structure should fit on the memory footprint
@@ -906,8 +892,7 @@ struct	iw_statistics
  * You should check this when increasing the structures defined
  * above in this file...
  */
-union	iwreq_data
-{
+union iwreq_data {
 	/* Config - generic */
 	char		name[IFNAMSIZ];
 	/* Name : used to verify the presence of  wireless extensions.
@@ -944,15 +929,14 @@ union	iwreq_data
  * convenience...
  * Do I need to remind you about structure size (32 octets) ?
  */
-struct	iwreq 
-{
+struct iwreq {
 	union
 	{
 		char	ifrn_name[IFNAMSIZ];	/* if name, e.g. "eth0" */
 	} ifr_ifrn;
 
 	/* Data part (defined just above) */
-	union	iwreq_data	u;
+	union iwreq_data	u;
 };
 
 /* -------------------------- IOCTL DATA -------------------------- */
@@ -965,8 +949,7 @@ struct	iwreq
  *	Range of parameters
  */
 
-struct	iw_range
-{
+struct iw_range {
 	/* Informative stuff (to choose between different interface) */
 	__u32		throughput;	/* To give an idea... */
 	/* In theory this value should be the maximum benchmarked
@@ -1069,9 +1052,8 @@ struct	iw_range
 /*
  * Private ioctl interface information
  */
- 
-struct	iw_priv_args
-{
+
+struct iw_priv_args {
 	__u32		cmd;		/* Number of the ioctl to issue */
 	__u16		set_args;	/* Type and number of args */
 	__u16		get_args;	/* Type and number of args */
@@ -1088,8 +1070,7 @@ struct	iw_priv_args
 /*
  * A Wireless Event. Contains basically the same data as the ioctl...
  */
-struct iw_event
-{
+struct iw_event {
 	__u16		len;			/* Real length of this stuff */
 	__u16		cmd;			/* Wireless IOCTL */
 	union iwreq_data	u;		/* IOCTL fixed payload */
-- 
cgit 


From 80a83cfc434b1e3afe38974570b460db4898bec6 Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Thu, 19 May 2016 10:37:48 +0200
Subject: mac80211: skip netdev queue control with software queuing

Qdiscs are designed with no regard to 802.11
aggregation requirements and hand out
packet-by-packet with no guarantee they are
destined to the same tid. This does more bad than
good no matter how fairly a given qdisc may behave
on an ethernet interface.

Software queuing used per-AC netdev subqueue
congestion control whenever a global AC limit was
hit. This meant in practice a single station or
tid queue could starve others rather easily. This
could resonate with qdiscs in a bad way or could
just end up with poor aggregation performance.
Increasing the AC limit would increase induced
latency which is also bad.

Disabling qdiscs by default and performing
taildrop instead of netdev subqueue congestion
control on the other hand makes it possible for
tid queues to fill up "in the meantime" while
preventing stations starving each other.

This increases aggregation opportunities and
should allow software queuing based drivers
achieve better performance by utilizing airtime
more efficiently with big aggregates.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index be30b0549b88..a8683aec6dbe 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2147,9 +2147,6 @@ enum ieee80211_hw_flags {
  * @n_cipher_schemes: a size of an array of cipher schemes definitions.
  * @cipher_schemes: a pointer to an array of cipher scheme definitions
  *	supported by HW.
- *
- * @txq_ac_max_pending: maximum number of frames per AC pending in all txq
- *	entries for a vif.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -2180,7 +2177,6 @@ struct ieee80211_hw {
 	u8 uapsd_max_sp_len;
 	u8 n_cipher_schemes;
 	const struct ieee80211_cipher_scheme *cipher_schemes;
-	int txq_ac_max_pending;
 };
 
 static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
-- 
cgit 


From 5caa328e3811b7cfa33fd02c93280ffa622deb0e Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Thu, 19 May 2016 10:37:51 +0200
Subject: mac80211: implement codel on fair queuing flows

There is no other limit other than a global
packet count limit when using software queuing.
This means a single flow queue can grow insanely
long. This is particularly bad for TCP congestion
algorithms which requires a little more
sophisticated frame dropping scheme than a mere
headdrop on limit overflow.

Hence apply (a slighly modified, to fit the knobs)
CoDel5 on flow queues. This improves TCP
convergence and stability when combined with
wireless driver which keeps its own tx queue/fifo
at a minimum fill level for given link conditions.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a8683aec6dbe..a52009ffc19f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
+#include <net/codel.h>
 #include <asm/unaligned.h>
 
 /**
@@ -895,7 +896,18 @@ struct ieee80211_tx_info {
 				unsigned long jiffies;
 			};
 			/* NB: vif can be NULL for injected frames */
-			struct ieee80211_vif *vif;
+			union {
+				/* NB: vif can be NULL for injected frames */
+				struct ieee80211_vif *vif;
+
+				/* When packets are enqueued on txq it's easy
+				 * to re-construct the vif pointer. There's no
+				 * more space in tx_info so it can be used to
+				 * store the necessary enqueue time for packet
+				 * sojourn time computation.
+				 */
+				codel_time_t enqueue_time;
+			};
 			struct ieee80211_key_conf *hw_key;
 			u32 flags;
 			/* 4 bytes free */
-- 
cgit 


From 52fbb2907988aa0583c6d9d53a56aee090b2df7e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Jun 2016 07:45:11 -0700
Subject: net: sched: fix qdisc->running lockdep annotations

1) qdisc_run_begin() is really using the equivalent of a trylock.
  Instead of using write_seqcount_begin(), use a combination of
  raw_write_seqcount_begin() and correct lockdep annotation.

2) sch_direct_xmit() should use regular spin_lock(root_lock)

Fixes: f9eb8aea2a1e ("net_sched: transform qdisc running bit into a seqcount")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 49534e28824b..a4c0f1649e2b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -97,7 +97,11 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc_is_running(qdisc))
 		return false;
-	write_seqcount_begin(&qdisc->running);
+	/* Variant of write_seqcount_begin() telling lockdep a trylock
+	 * was attempted.
+	 */
+	raw_write_seqcount_begin(&qdisc->running);
+	seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
 	return true;
 }
 
-- 
cgit 


From d3fff6c443fe8f8a5ef2bdcea45e2ff39db948c7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Jun 2016 07:45:12 -0700
Subject: net: add netdev_lockdep_set_classes() helper

It is time to add netdev_lockdep_set_classes() helper
so that lockdep annotations per device type are easier to manage.

This removes a lot of copies and missing annotations.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 541562333ba5..4f234b102892 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1946,6 +1946,23 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 		f(dev, &dev->_tx[i], arg);
 }
 
+#define netdev_lockdep_set_classes(dev)				\
+{								\
+	static struct lock_class_key qdisc_tx_busylock_key;	\
+	static struct lock_class_key qdisc_running_key;		\
+	static struct lock_class_key qdisc_xmit_lock_key;	\
+	static struct lock_class_key dev_addr_list_lock_key;	\
+	unsigned int i;						\
+								\
+	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
+	(dev)->qdisc_running_key = &qdisc_running_key;		\
+	lockdep_set_class(&(dev)->addr_list_lock,		\
+			  &dev_addr_list_lock_key); 		\
+	for (i = 0; i < (dev)->num_tx_queues; i++)		\
+		lockdep_set_class(&(dev)->_tx[i]._xmit_lock,	\
+				  &qdisc_xmit_lock_key);	\
+}
+
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
 				    void *accel_priv);
-- 
cgit 


From 967dd82ffc52e9d8ea0defde094f9a39a3f4eeed Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 9 Jun 2016 18:23:53 -0700
Subject: net: dsa: b53: Add support for Broadcom RoboSwitch

This patch adds support for Broadcom's BCM53xx switch family, also known
as RoboSwitch. Some of these switches are ubiquituous, found in home
routers, Wi-Fi routers, DSL and cable modem gateways and other
networking related products.

This drivers adds the library driver (b53_common.c) as well as a few bus
glue drivers for MDIO, SPI, Switch Register Access Block (SRAB) and
memory-mapped I/O into a SoC's address space (Broadcom BCM63xx/33xx).

Basic operations are supported to bring the Layer 1/2 up and running,
but not much more at this point, subsequent patches add the remaining
features.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/platform_data/b53.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 include/linux/platform_data/b53.h

(limited to 'include')

diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h
new file mode 100644
index 000000000000..69d279c0da96
--- /dev/null
+++ b/include/linux/platform_data/b53.h
@@ -0,0 +1,33 @@
+/*
+ * B53 platform data
+ *
+ * Copyright (C) 2013 Jonas Gorski <jogo@openwrt.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __B53_H
+#define __B53_H
+
+#include <linux/kernel.h>
+
+struct b53_platform_data {
+	u32 chip_id;
+	u16 enabled_ports;
+
+	/* only used by MMAP'd driver */
+	unsigned big_endian:1;
+	void __iomem *regs;
+};
+
+#endif
-- 
cgit 


From 2341e0775747864b684abe8627f3d45b167f2940 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Jun 2016 23:02:51 +0100
Subject: rxrpc: Simplify connect() implementation and simplify sendmsg() op

Simplify the RxRPC connect() implementation.  It will just note the
destination address it is given, and if a sendmsg() comes along with no
address, this will be assigned as the address.  No transport struct will be
held internally, which will allow us to remove this later.

Simplify sendmsg() also.  Whilst a call is active, userspace refers to it
by a private unique user ID specified in a control message.  When sendmsg()
sees a user ID that doesn't map to an extant call, it creates a new call
for that user ID and attempts to add it.  If, when we try to add it, the
user ID is now registered, we now reject the message with -EEXIST.  We
should never see this situation unless two threads are racing, trying to
create a call with the same ID - which would be an error.

It also isn't required to provide sendmsg() with an address - provided the
control message data holds a user ID that maps to a currently active call.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rxrpc.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index a53915cd5581..1e8f216e2cf1 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -40,16 +40,18 @@ struct sockaddr_rxrpc {
 
 /*
  * RxRPC control messages
+ * - If neither abort or accept are specified, the message is a data message.
  * - terminal messages mean that a user call ID tag can be recycled
+ * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg()
  */
-#define RXRPC_USER_CALL_ID	1	/* user call ID specifier */
-#define RXRPC_ABORT		2	/* abort request / notification [terminal] */
-#define RXRPC_ACK		3	/* [Server] RPC op final ACK received [terminal] */
-#define RXRPC_NET_ERROR		5	/* network error received [terminal] */
-#define RXRPC_BUSY		6	/* server busy received [terminal] */
-#define RXRPC_LOCAL_ERROR	7	/* local error generated [terminal] */
-#define RXRPC_NEW_CALL		8	/* [Server] new incoming call notification */
-#define RXRPC_ACCEPT		9	/* [Server] accept request */
+#define RXRPC_USER_CALL_ID	1	/* sr: user call ID specifier */
+#define RXRPC_ABORT		2	/* sr: abort request / notification [terminal] */
+#define RXRPC_ACK		3	/* -r: [Service] RPC op final ACK received [terminal] */
+#define RXRPC_NET_ERROR		5	/* -r: network error received [terminal] */
+#define RXRPC_BUSY		6	/* -r: server busy received [terminal] */
+#define RXRPC_LOCAL_ERROR	7	/* -r: local error generated [terminal] */
+#define RXRPC_NEW_CALL		8	/* -r: [Service] new incoming call notification */
+#define RXRPC_ACCEPT		9	/* s-: [Service] accept request */
 
 /*
  * RxRPC security levels
-- 
cgit 


From e434863718d4b99dd0d6e0cefd3c5e79e4fa2083 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 9 Jun 2016 10:21:00 -0700
Subject: net: vrf: Fix crash when IPv6 is disabled at boot time

Frank Kellermann reported a kernel crash with 4.5.0 when IPv6 is
disabled at boot using the kernel option ipv6.disable=1. Using
current net-next with the boot option:

$ ip link add red type vrf table 1001

Generates:
[12210.919584] BUG: unable to handle kernel NULL pointer dereference at 0000000000000748
[12210.921341] IP: [<ffffffff814b30e3>] fib6_get_table+0x2c/0x5a
[12210.922537] PGD b79e3067 PUD bb32b067 PMD 0
[12210.923479] Oops: 0000 [#1] SMP
[12210.924001] Modules linked in: ipvlan 8021q garp mrp stp llc
[12210.925130] CPU: 3 PID: 1177 Comm: ip Not tainted 4.7.0-rc1+ #235
[12210.926168] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[12210.928065] task: ffff8800b9ac4640 ti: ffff8800bacac000 task.ti: ffff8800bacac000
[12210.929328] RIP: 0010:[<ffffffff814b30e3>]  [<ffffffff814b30e3>] fib6_get_table+0x2c/0x5a
[12210.930697] RSP: 0018:ffff8800bacaf888  EFLAGS: 00010202
[12210.931563] RAX: 0000000000000748 RBX: ffffffff81a9e280 RCX: ffff8800b9ac4e28
[12210.932688] RDX: 00000000000000e9 RSI: 0000000000000002 RDI: 0000000000000286
[12210.933820] RBP: ffff8800bacaf898 R08: ffff8800b9ac4df0 R09: 000000000052001b
[12210.934941] R10: 00000000657c0000 R11: 000000000000c649 R12: 00000000000003e9
[12210.936032] R13: 00000000000003e9 R14: ffff8800bace7800 R15: ffff8800bb3ec000
[12210.937103] FS:  00007faa1766c700(0000) GS:ffff88013ac00000(0000) knlGS:0000000000000000
[12210.938321] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[12210.939166] CR2: 0000000000000748 CR3: 00000000b79d6000 CR4: 00000000000406e0
[12210.940278] Stack:
[12210.940603]  ffff8800bb3ec000 ffffffff81a9e280 ffff8800bacaf8c8 ffffffff814b3135
[12210.941818]  ffff8800bb3ec000 ffffffff81a9e280 ffffffff81a9e280 ffff8800bace7800
[12210.943040]  ffff8800bacaf8f0 ffffffff81397c88 ffff8800bb3ec000 ffffffff81a9e280
[12210.944288] Call Trace:
[12210.944688]  [<ffffffff814b3135>] fib6_new_table+0x24/0x8a
[12210.945516]  [<ffffffff81397c88>] vrf_dev_init+0xd4/0x162
[12210.946328]  [<ffffffff814091e1>] register_netdevice+0x100/0x396
[12210.947209]  [<ffffffff8139823d>] vrf_newlink+0x40/0xb3
[12210.948001]  [<ffffffff814187f0>] rtnl_newlink+0x5d3/0x6d5
...

The problem above is due to the fact that the fib hash table is not
allocated when IPv6 is disabled at boot.

As for the VRF driver it should not do any IPv6 initializations if IPv6
is disabled, so it needs to know if IPv6 is disabled at boot. The disable
parameter is private to the IPv6 module, so provide an accessor for
modules to determine if IPv6 was disabled at boot time.

Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5c91b0b055d4..c6dbcd84a2c7 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -283,6 +283,8 @@ struct tcp6_timewait_sock {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
+bool ipv6_mod_enabled(void);
+
 static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
 {
 	return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL;
@@ -326,6 +328,11 @@ static inline int inet_v6_ipv6only(const struct sock *sk)
 #define ipv6_only_sock(sk)	0
 #define ipv6_sk_rxinfo(sk)	0
 
+static inline bool ipv6_mod_enabled(void)
+{
+	return false;
+}
+
 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 {
 	return NULL;
-- 
cgit 


From 7d84e37e114215be0a0c80095891c8268a99352b Mon Sep 17 00:00:00 2001
From: Aaron Conole <aconole@redhat.com>
Date: Thu, 9 Jun 2016 13:41:15 -0400
Subject: virtio_net: Update the feature bit to comply with spec

A draft version of the MTU Advice feature bit was specified as 25.  This
bit is not within the allowed range for network device feature bits, and
should be changed to be feature bit 3 to fully comply with the spec.

Fixes 14de9d114a82 ('virtio-net: Add initial MTU advice feature')
Signed-off-by: Aaron Conole <aconole@redhat.com>
Suggested-by: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/virtio_net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 1ab4ea6ec847..0da0e3a98f17 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -35,6 +35,7 @@
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
+#define VIRTIO_NET_F_MTU	3	/* Initial MTU advice */
 #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
 #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
 #define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
@@ -55,7 +56,6 @@
 #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
-#define VIRTIO_NET_F_MTU 25	/* Initial MTU advice */
 
 #ifndef VIRTIO_NET_NO_LEGACY
 #define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
-- 
cgit 


From 7486216b3a0bd26375b17b2cc168a311106cea70 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 9 Jun 2016 15:11:34 +0300
Subject: {net,IB}/mlx5: mlx5_ifc updates

Introducing mlx5_ifc updates for upcoming ConnectX-4 features.

Needed bits and hardware structures for mlx5e netdev:
	- MLX5_CQ_PERIOD_NUM_MODES for adaptive moderation
	  support
	- QoS rate limiting
	- SQ context rate limiting
	- Auto negotiation fields in PTYS register
	- Source SQN field in flow table entry match structure
	- DCBX parameters

Needed bits and hardware structures for IB:
	- New XRQ opcodes, commands and capabilities layout
	- Extend q counters definition to support IB.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 275 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 263 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9a05cd7e5890..209add93159f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -123,6 +123,10 @@ enum {
 	MLX5_CMD_OP_DRAIN_DCT                     = 0x712,
 	MLX5_CMD_OP_QUERY_DCT                     = 0x713,
 	MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION     = 0x714,
+	MLX5_CMD_OP_CREATE_XRQ                    = 0x717,
+	MLX5_CMD_OP_DESTROY_XRQ                   = 0x718,
+	MLX5_CMD_OP_QUERY_XRQ                     = 0x719,
+	MLX5_CMD_OP_ARM_XRQ                       = 0x71a,
 	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
 	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
 	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
@@ -139,6 +143,8 @@ enum {
 	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
 	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
 	MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
+	MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
+	MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
 	MLX5_CMD_OP_ALLOC_PD                      = 0x800,
 	MLX5_CMD_OP_DEALLOC_PD                    = 0x801,
 	MLX5_CMD_OP_ALLOC_UAR                     = 0x802,
@@ -361,7 +367,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
 };
 
 struct mlx5_ifc_fte_match_set_misc_bits {
-	u8         reserved_at_0[0x20];
+	u8         reserved_at_0[0x8];
+	u8         source_sqn[0x18];
 
 	u8         reserved_at_20[0x10];
 	u8         source_port[0x10];
@@ -505,6 +512,17 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         reserved_at_20[0x7e0];
 };
 
+struct mlx5_ifc_qos_cap_bits {
+	u8         packet_pacing[0x1];
+	u8         reserved_0[0x1f];
+	u8         reserved_1[0x20];
+	u8         packet_pacing_max_rate[0x20];
+	u8         packet_pacing_min_rate[0x20];
+	u8         reserved_2[0x10];
+	u8         packet_pacing_rate_table_size[0x10];
+	u8         reserved_3[0x760];
+};
+
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         csum_cap[0x1];
 	u8         vlan_cap[0x1];
@@ -744,7 +762,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         out_of_seq_cnt[0x1];
 	u8         vport_counters[0x1];
-	u8         reserved_at_182[0x4];
+	u8         retransmission_q_counters[0x1];
+	u8         reserved_at_183[0x3];
 	u8         max_qp_cnt[0xa];
 	u8         pkey_table_size[0x10];
 
@@ -771,7 +790,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_msg[0x5];
 	u8         reserved_at_1c8[0x4];
 	u8         max_tc[0x4];
-	u8         reserved_at_1d0[0x6];
+	u8         reserved_at_1d0[0x1];
+	u8         dcbx[0x1];
+	u8         reserved_at_1d2[0x4];
 	u8         rol_s[0x1];
 	u8         rol_g[0x1];
 	u8         reserved_at_1d8[0x1];
@@ -803,7 +824,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         tph[0x1];
 	u8         rf[0x1];
 	u8         dct[0x1];
-	u8         reserved_at_21b[0x1];
+	u8         qos[0x1];
 	u8         eth_net_offloads[0x1];
 	u8         roce[0x1];
 	u8         atomic[0x1];
@@ -929,7 +950,15 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cqe_compression_timeout[0x10];
 	u8         cqe_compression_max_num[0x10];
 
-	u8         reserved_at_5e0[0x220];
+	u8         reserved_at_5e0[0x10];
+	u8         tag_matching[0x1];
+	u8         rndv_offload_rc[0x1];
+	u8         rndv_offload_dc[0x1];
+	u8         log_tag_matching_list_sz[0x5];
+	u8         reserved_at_5e8[0x3];
+	u8         log_max_xrq[0x5];
+
+	u8         reserved_at_5f0[0x200];
 };
 
 enum mlx5_flow_destination_type {
@@ -1967,7 +1996,7 @@ struct mlx5_ifc_qpc_bits {
 
 	u8         reserved_at_560[0x5];
 	u8         rq_type[0x3];
-	u8         srqn_rmpn[0x18];
+	u8         srqn_rmpn_xrqn[0x18];
 
 	u8         reserved_at_580[0x8];
 	u8         rmsn[0x18];
@@ -2018,6 +2047,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
 	struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
+	struct mlx5_ifc_qos_cap_bits qos_cap;
 	u8         reserved_at_0[0x8000];
 };
 
@@ -2244,8 +2274,9 @@ struct mlx5_ifc_sqc_bits {
 	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_at_60[0xa0];
+	u8         reserved_at_60[0x90];
 
+	u8         packet_pacing_rate_limit_index[0x10];
 	u8         tis_lst_sz[0x10];
 	u8         reserved_at_110[0x10];
 
@@ -2593,7 +2624,7 @@ struct mlx5_ifc_dctc_bits {
 	u8         reserved_at_98[0x8];
 
 	u8         reserved_at_a0[0x8];
-	u8         srqn[0x18];
+	u8         srqn_xrqn[0x18];
 
 	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
@@ -2645,6 +2676,7 @@ enum {
 enum {
 	MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
 	MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
+	MLX5_CQ_PERIOD_NUM_MODES
 };
 
 struct mlx5_ifc_cqc_bits {
@@ -2722,6 +2754,54 @@ struct mlx5_ifc_query_adapter_param_block_bits {
 	u8         vsd_contd_psid[16][0x8];
 };
 
+enum {
+	MLX5_XRQC_STATE_GOOD   = 0x0,
+	MLX5_XRQC_STATE_ERROR  = 0x1,
+};
+
+enum {
+	MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0,
+	MLX5_XRQC_TOPOLOGY_TAG_MATCHING        = 0x1,
+};
+
+enum {
+	MLX5_XRQC_OFFLOAD_RNDV = 0x1,
+};
+
+struct mlx5_ifc_tag_matching_topology_context_bits {
+	u8         log_matching_list_sz[0x4];
+	u8         reserved_at_4[0xc];
+	u8         append_next_index[0x10];
+
+	u8         sw_phase_cnt[0x10];
+	u8         hw_phase_cnt[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_xrqc_bits {
+	u8         state[0x4];
+	u8         rlkey[0x1];
+	u8         reserved_at_5[0xf];
+	u8         topology[0x4];
+	u8         reserved_at_18[0x4];
+	u8         offload[0x4];
+
+	u8         reserved_at_20[0x8];
+	u8         user_index[0x18];
+
+	u8         reserved_at_40[0x8];
+	u8         cqn[0x18];
+
+	u8         reserved_at_60[0xa0];
+
+	struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
+
+	u8         reserved_at_180[0x180];
+
+	struct mlx5_ifc_wq_bits wq;
+};
+
 union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
 	struct mlx5_ifc_modify_field_select_bits modify_field_select;
 	struct mlx5_ifc_resize_field_select_bits resize_field_select;
@@ -3144,6 +3224,30 @@ struct mlx5_ifc_rst2init_qp_in_bits {
 	u8         reserved_at_800[0x80];
 };
 
+struct mlx5_ifc_query_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
+struct mlx5_ifc_query_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_query_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -3547,7 +3651,27 @@ struct mlx5_ifc_query_q_counter_out_bits {
 
 	u8         out_of_sequence[0x20];
 
-	u8         reserved_at_1e0[0x620];
+	u8         reserved_at_1e0[0x20];
+
+	u8         duplicate_request[0x20];
+
+	u8         reserved_at_220[0x20];
+
+	u8         rnr_nak_retry_err[0x20];
+
+	u8         reserved_at_260[0x20];
+
+	u8         packet_seq_err[0x20];
+
+	u8         reserved_at_2a0[0x20];
+
+	u8         implied_nak_seq_err[0x20];
+
+	u8         reserved_at_2e0[0x20];
+
+	u8         local_ack_timeout_err[0x20];
+
+	u8         reserved_at_320[0x4e0];
 };
 
 struct mlx5_ifc_query_q_counter_in_bits {
@@ -4998,6 +5122,28 @@ struct mlx5_ifc_detach_from_mcg_in_bits {
 	u8         multicast_gid[16][0x8];
 };
 
+struct mlx5_ifc_destroy_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_destroy_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -5583,6 +5729,30 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_create_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
 struct mlx5_ifc_create_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6124,6 +6294,29 @@ struct mlx5_ifc_attach_to_mcg_in_bits {
 	u8         multicast_gid[16][0x8];
 };
 
+struct mlx5_ifc_arm_xrq_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_arm_xrq_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         xrqn[0x18];
+
+	u8         reserved_at_60[0x10];
+	u8         lwm[0x10];
+};
+
 struct mlx5_ifc_arm_xrc_srq_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6161,7 +6354,8 @@ struct mlx5_ifc_arm_rq_out_bits {
 };
 
 enum {
-	MLX5_ARM_RQ_IN_OP_MOD_SRQ_  = 0x1,
+	MLX5_ARM_RQ_IN_OP_MOD_SRQ = 0x1,
+	MLX5_ARM_RQ_IN_OP_MOD_XRQ = 0x2,
 };
 
 struct mlx5_ifc_arm_rq_in_bits {
@@ -6354,6 +6548,30 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
 	u8         vxlan_udp_port[0x10];
 };
 
+struct mlx5_ifc_set_rate_limit_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_rate_limit_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x10];
+	u8         rate_limit_index[0x10];
+
+	u8         reserved_at_60[0x20];
+
+	u8         rate_limit[0x20];
+};
+
 struct mlx5_ifc_access_register_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -6478,12 +6696,15 @@ struct mlx5_ifc_pude_reg_bits {
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-	u8         reserved_at_0[0x8];
+	u8         an_disable_cap[0x1];
+	u8         an_disable_admin[0x1];
+	u8         reserved_at_2[0x6];
 	u8         local_port[0x8];
 	u8         reserved_at_10[0xd];
 	u8         proto_mask[0x3];
 
-	u8         reserved_at_20[0x40];
+	u8         an_status[0x4];
+	u8         reserved_at_24[0x3c];
 
 	u8         eth_proto_capability[0x20];
 
@@ -7444,4 +7665,34 @@ struct mlx5_ifc_mcia_reg_bits {
 	u8         dword_11[0x20];
 };
 
+struct mlx5_ifc_dcbx_param_bits {
+	u8         dcbx_cee_cap[0x1];
+	u8         dcbx_ieee_cap[0x1];
+	u8         dcbx_standby_cap[0x1];
+	u8         reserved_at_0[0x5];
+	u8         port_number[0x8];
+	u8         reserved_at_10[0xa];
+	u8         max_application_table_size[6];
+	u8         reserved_at_20[0x15];
+	u8         version_oper[0x3];
+	u8         reserved_at_38[5];
+	u8         version_admin[0x3];
+	u8         willing_admin[0x1];
+	u8         reserved_at_41[0x3];
+	u8         pfc_cap_oper[0x4];
+	u8         reserved_at_48[0x4];
+	u8         pfc_cap_admin[0x4];
+	u8         reserved_at_50[0x4];
+	u8         num_of_tc_oper[0x4];
+	u8         reserved_at_58[0x4];
+	u8         num_of_tc_admin[0x4];
+	u8         remote_willing[0x1];
+	u8         reserved_at_61[3];
+	u8         remote_pfc_cap[4];
+	u8         reserved_at_68[0x14];
+	u8         remote_num_of_tc[0x4];
+	u8         reserved_at_80[0x18];
+	u8         error[0x8];
+	u8         reserved_at_a0[0x160];
+};
 #endif /* MLX5_IFC_H */
-- 
cgit 


From f2a4d086ed4c588d32fe9b7aa67fead7280e7bf1 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Fri, 10 Jun 2016 11:49:33 -0700
Subject: openvswitch: Add packet truncation support.

The patch adds a new OVS action, OVS_ACTION_ATTR_TRUNC, in order to
truncate packets. A 'max_len' is added for setting up the maximum
packet size, and a 'cutlen' field is to record the number of bytes
to trim the packet when the packet is outputting to a port, or when
the packet is sent to userspace.

Signed-off-by: William Tu <u9012063@gmail.com>
Cc: Pravin Shelar <pshelar@nicira.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index bb0d515b7654..8274675ba9a3 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -580,6 +580,10 @@ enum ovs_userspace_attr {
 
 #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
 
+struct ovs_action_trunc {
+	uint32_t max_len; /* Max packet size in bytes. */
+};
+
 /**
  * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
  * @mpls_lse: MPLS label stack entry to push.
@@ -703,6 +707,7 @@ enum ovs_nat_attr {
  * enum ovs_action_attr - Action types.
  *
  * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
+ * @OVS_ACTION_ATTR_TRUNC: Output packet to port with truncated packet size.
  * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
  * %OVS_USERSPACE_ATTR_* attributes.
  * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
@@ -756,6 +761,7 @@ enum ovs_action_attr {
 				       * The data must be zero for the unmasked
 				       * bits. */
 	OVS_ACTION_ATTR_CT,           /* Nested OVS_CT_ATTR_* . */
+	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
-- 
cgit 


From a70b506efe899dc8d650eafcc0b11fc9ee746627 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 10 Jun 2016 21:19:06 +0200
Subject: bpf: enforce recursion limit on redirects

Respect the stack's xmit_recursion limit for calls into dev_queue_xmit().
Currently, they are not handeled by the limiter when attached to clsact's
egress parent, for example, and a buggy program redirecting it to the
same device again could run into stack overflow eventually. It would be
good if we could notify an admin to give him a chance to react. We reuse
xmit_recursion instead of having one private to eBPF, so that the stack's
current recursion depth will be taken into account as well. Follow-up to
commit 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") and
27b29f63058d ("bpf: add bpf_redirect() helper").

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4f234b102892..94eef356a65f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2389,6 +2389,8 @@ void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
 DECLARE_PER_CPU(int, xmit_recursion);
+#define XMIT_RECURSION_LIMIT	10
+
 static inline int dev_recursion_level(void)
 {
 	return this_cpu_read(xmit_recursion);
-- 
cgit 


From 678ece30226ac05e95768d5f70db53a475569d37 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 8 Jun 2016 16:09:17 +0300
Subject: virtio_net: add _UAPI prefix to virtio_net header guards

This gives better namespacing and prevents conflicts with no-uapi version
of virtio_net header that will be introduced in the following patch.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/virtio_net.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 0da0e3a98f17..fc353b518288 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -1,5 +1,5 @@
-#ifndef _LINUX_VIRTIO_NET_H
-#define _LINUX_VIRTIO_NET_H
+#ifndef _UAPI_LINUX_VIRTIO_NET_H
+#define _UAPI_LINUX_VIRTIO_NET_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
  *
@@ -245,4 +245,4 @@ struct virtio_net_ctrl_mq {
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
 
-#endif /* _LINUX_VIRTIO_NET_H */
+#endif /* _UAPI_LINUX_VIRTIO_NET_H */
-- 
cgit 


From fd2a0437dc33b6425cabf74cc7fc7fdba6d5903b Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 8 Jun 2016 16:09:18 +0300
Subject: virtio_net: introduce virtio_net_hdr_{from,to}_skb

The code for conversion between virtio_net_hdr and skb GSO info is
duplicated at several places. Let's put it to a common place to allow
reuse.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 101 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 include/linux/virtio_net.h

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
new file mode 100644
index 000000000000..1c912f85e041
--- /dev/null
+++ b/include/linux/virtio_net.h
@@ -0,0 +1,101 @@
+#ifndef _LINUX_VIRTIO_NET_H
+#define _LINUX_VIRTIO_NET_H
+
+#include <linux/if_vlan.h>
+#include <uapi/linux/virtio_net.h>
+
+static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
+					const struct virtio_net_hdr *hdr,
+					bool little_endian)
+{
+	unsigned short gso_type = 0;
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+			gso_type = SKB_GSO_TCPV4;
+			break;
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			gso_type = SKB_GSO_TCPV6;
+			break;
+		case VIRTIO_NET_HDR_GSO_UDP:
+			gso_type = SKB_GSO_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+			gso_type |= SKB_GSO_TCP_ECN;
+
+		if (hdr->gso_size == 0)
+			return -EINVAL;
+	}
+
+	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start);
+		u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
+
+		if (!skb_partial_csum_set(skb, start, off))
+			return -EINVAL;
+	}
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
+
+		skb_shinfo(skb)->gso_size = gso_size;
+		skb_shinfo(skb)->gso_type = gso_type;
+
+		/* Header must be checked, and gso_segs computed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	return 0;
+}
+
+static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
+					  struct virtio_net_hdr *hdr,
+					  bool little_endian)
+{
+	memset(hdr, 0, sizeof(*hdr));
+
+	if (skb_is_gso(skb)) {
+		struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+		/* This is a hint as to how much should be linear. */
+		hdr->hdr_len = __cpu_to_virtio16(little_endian,
+						 skb_headlen(skb));
+		hdr->gso_size = __cpu_to_virtio16(little_endian,
+						  sinfo->gso_size);
+		if (sinfo->gso_type & SKB_GSO_TCPV4)
+			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+		else if (sinfo->gso_type & SKB_GSO_TCPV6)
+			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+		else if (sinfo->gso_type & SKB_GSO_UDP)
+			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+		else
+			return -EINVAL;
+		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+	} else
+		hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		if (skb_vlan_tag_present(skb))
+			hdr->csum_start = __cpu_to_virtio16(little_endian,
+				skb_checksum_start_offset(skb) + VLAN_HLEN);
+		else
+			hdr->csum_start = __cpu_to_virtio16(little_endian,
+				skb_checksum_start_offset(skb));
+		hdr->csum_offset = __cpu_to_virtio16(little_endian,
+				skb->csum_offset);
+	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+	} /* else everything is zero */
+
+	return 0;
+}
+
+#endif /* _LINUX_VIRTIO_BYTEORDER */
-- 
cgit 


From 6f094b9ec680209c5b7314feee983b2f4c910b1b Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Wed, 8 Jun 2016 21:16:44 -0700
Subject: tcp: add in_flight to tcp_skb_cb

Add in_flight (bytes in flight when packet was sent) field
to tx component of tcp_skb_cb and make it available to
congestion modules' pkts_acked() function through the
ack_sample function argument.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0bcc70f4e1fb..a79894b66726 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -767,6 +767,7 @@ struct tcp_skb_cb {
 	union {
 		struct {
 			/* There is space for up to 20 bytes */
+			__u32 in_flight;/* Bytes in flight when packet sent */
 		} tx;   /* only used for outgoing skbs */
 		union {
 			struct inet_skb_parm	h4;
@@ -859,6 +860,7 @@ union tcp_cc_info;
 struct ack_sample {
 	u32 pkts_acked;
 	s32 rtt_us;
+	u32 in_flight;
 };
 
 struct tcp_congestion_ops {
-- 
cgit 


From 002245cc6407c8ff65b8024554080eb6de1a8e2c Mon Sep 17 00:00:00 2001
From: Zi Shen Lim <zlim.lnx@gmail.com>
Date: Wed, 8 Jun 2016 21:18:47 -0700
Subject: bpf: fix missing header inclusion

Commit 0fc174dea545 ("ebpf: make internal bpf API independent of
CONFIG_BPF_SYSCALL ifdefs") introduced usage of ERR_PTR() in
bpf_prog_get(), however did not include linux/err.h.

Without this patch, when compiling arm64 BPF without CONFIG_BPF_SYSCALL:
...
In file included from arch/arm64/net/bpf_jit_comp.c:21:0:
include/linux/bpf.h: In function 'bpf_prog_get':
include/linux/bpf.h:235:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration]
  return ERR_PTR(-EOPNOTSUPP);
         ^
include/linux/bpf.h:235:9: warning: return makes pointer from integer without a cast [-Wint-conversion]
In file included from include/linux/rwsem.h:17:0,
                 from include/linux/mm_types.h:10,
                 from include/linux/sched.h:27,
                 from arch/arm64/include/asm/compat.h:25,
                 from arch/arm64/include/asm/stat.h:23,
                 from include/linux/stat.h:5,
                 from include/linux/compat.h:12,
                 from include/linux/filter.h:10,
                 from arch/arm64/net/bpf_jit_comp.c:22:
include/linux/err.h: At top level:
include/linux/err.h:23:35: error: conflicting types for 'ERR_PTR'
 static inline void * __must_check ERR_PTR(long error)
                                   ^
In file included from arch/arm64/net/bpf_jit_comp.c:21:0:
include/linux/bpf.h:235:9: note: previous implicit declaration of 'ERR_PTR' was here
  return ERR_PTR(-EOPNOTSUPP);
         ^
...

Fixes: 0fc174dea545 ("ebpf: make internal bpf API independent of CONFIG_BPF_SYSCALL ifdefs")
Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8ee27b8afe81..1bcae82c6cb1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -11,6 +11,7 @@
 #include <linux/workqueue.h>
 #include <linux/file.h>
 #include <linux/percpu.h>
+#include <linux/err.h>
 
 struct bpf_map;
 
-- 
cgit 


From f20e6657a8758fe8d074889a6f1883674f01c7f2 Mon Sep 17 00:00:00 2001
From: Pramod Kumar <pramod.kumar@broadcom.com>
Date: Fri, 10 Jun 2016 11:03:45 +0530
Subject: mdio: mux: Enhanced MDIO mux framework for integrated multiplexers

An integrated multiplexer uses same address space for
"muxed bus selection" and "generation of mdio transaction"
hence its good to register parent bus from mux driver.

Hence added a mechanism where mux driver could register a
parent bus and pass it down to framework via mdio_mux_init api.

Signed-off-by: Pramod Kumar <pramod.kumar@broadcom.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio-mux.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mdio-mux.h b/include/linux/mdio-mux.h
index a243dbba8659..61f5b21b31c7 100644
--- a/include/linux/mdio-mux.h
+++ b/include/linux/mdio-mux.h
@@ -10,11 +10,13 @@
 #ifndef __LINUX_MDIO_MUX_H
 #define __LINUX_MDIO_MUX_H
 #include <linux/device.h>
+#include <linux/phy.h>
 
 int mdio_mux_init(struct device *dev,
 		  int (*switch_fn) (int cur, int desired, void *data),
 		  void **mux_handle,
-		  void *data);
+		  void *data,
+		  struct mii_bus *mux_bus);
 
 void mdio_mux_uninit(void *mux_handle);
 
-- 
cgit 


From 45f50bed1d808794e514e9eed0e579a8756ce2ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 10 Jun 2016 16:41:39 -0700
Subject: net_sched: remove generic throttled management

__QDISC_STATE_THROTTLED bit manipulation is rather expensive
for HTB and few others.

I already removed it for sch_fq in commit f2600cf02b5b
("net: sched: avoid costly atomic operation in fq_dequeue()")
and so far nobody complained.

When one ore more packets are stuck in one or more throttled
HTB class, a htb dequeue() performs two atomic operations
to clear/set __QDISC_STATE_THROTTLED bit, while root qdisc
lock is held.

Removing this pair of atomic operations bring me a 8 % performance
increase on 200 TCP_RR tests, in presence of throttled classes.

This patch has no side effect, since nothing actually uses
disc_is_throttled() anymore.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   |  4 ++--
 include/net/sch_generic.h | 16 ----------------
 2 files changed, 2 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index fea53f4d92ca..7caa99b482c6 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -67,12 +67,12 @@ struct qdisc_watchdog {
 };
 
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle);
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
 
 static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
 					   psched_time_t expires)
 {
-	qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires), true);
+	qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires));
 }
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9f3581980c15..9a0d177884c6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -26,7 +26,6 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
-	__QDISC_STATE_THROTTLED,
 };
 
 struct qdisc_size_table {
@@ -125,21 +124,6 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
 #endif
 }
 
-static inline bool qdisc_is_throttled(const struct Qdisc *qdisc)
-{
-	return test_bit(__QDISC_STATE_THROTTLED, &qdisc->state) ? true : false;
-}
-
-static inline void qdisc_throttled(struct Qdisc *qdisc)
-{
-	set_bit(__QDISC_STATE_THROTTLED, &qdisc->state);
-}
-
-static inline void qdisc_unthrottled(struct Qdisc *qdisc)
-{
-	clear_bit(__QDISC_STATE_THROTTLED, &qdisc->state);
-}
-
 struct Qdisc_class_ops {
 	/* Child qdisc manipulation */
 	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
-- 
cgit 


From 99860208bc62d8ebd5c57495b84856506fe075bc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 11 Jun 2016 12:46:04 +0200
Subject: sched: remove NET_XMIT_POLICED

sch_atm returns this when TC_ACT_SHOT classification occurs.

But all other schedulers that use tc_classify
(htb, hfsc, drr, fq_codel ...) return NET_XMIT_SUCCESS | __BYPASS
in this case so just do that in atm.

BATMAN uses it as an intermediate return value to signal
forwarding vs. buffering, but it did not return POLICED to
callers outside of BATMAN.

Reviewed-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 94eef356a65f..d101e4d904ba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -90,7 +90,6 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
 #define NET_XMIT_SUCCESS	0x00
 #define NET_XMIT_DROP		0x01	/* skb dropped			*/
 #define NET_XMIT_CN		0x02	/* congestion notification	*/
-#define NET_XMIT_POLICED	0x03	/* skb is shot by police	*/
 #define NET_XMIT_MASK		0x0f	/* qdisc flags in net/sch_generic.h */
 
 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
-- 
cgit 


From cd2a9e62c8a3c5cae7691982667d79a0edc65283 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 13 Jun 2016 13:44:17 -0700
Subject: net: l3mdev: Remove const from flowi6 arg to get_rt6_dst

Allow drivers to pass flow arg to functions where the arg is not const
and allow the driver to make updates as needed (eg., setting oif).

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/l3mdev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 34f33eb96a5e..f8a416ec674c 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -38,7 +38,7 @@ struct l3mdev_ops {
 
 	/* IPv6 ops */
 	struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
-						 const struct flowi6 *fl6);
+						 struct flowi6 *fl6);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
@@ -139,7 +139,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 
 int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
 
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6);
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
 
 static inline
 struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -225,7 +225,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex,
 }
 
 static inline
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6)
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
 {
 	return NULL;
 }
-- 
cgit 


From 9ff74384600aeecba34ebdacbbde0627489ff601 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 13 Jun 2016 13:44:19 -0700
Subject: net: vrf: Handle ipv6 multicast and link-local addresses

IPv6 multicast and link-local addresses require special handling by the
VRF driver:
1. Rather than using the VRF device index and full FIB lookups,
   packets to/from these addresses should use direct FIB lookups based on
   the VRF device table.

2. fail sends/receives on a VRF device to/from a multicast address
   (e.g, make ping6 ff02::1%<vrf> fail)

3. move the setting of the flow oif to the first dst lookup and revert
   the change in icmpv6_echo_reply made in ca254490c8dfd ("net: Add VRF
   support to IPv6 stack"). Linklocal/mcast addresses require use of the
   skb->dev.

With this change connections into and out of a VRF enslaved device work
for multicast and link-local addresses work (icmp, tcp, and udp)
e.g.,

1. packets into VM with VRF config:
    ping6 -c3 fe80::e0:f9ff:fe1c:b974%br1
    ping6 -c3 ff02::1%br1

    ssh -6 fe80::e0:f9ff:fe1c:b974%br1

2. packets going out a VRF enslaved device:
    ping6 -c3 fe80::18f8:83ff:fe4b:7a2e%eth1
    ping6 -c3 ff02::1%eth1
    ssh -6 root@fe80::18f8:83ff:fe4b:7a2e%eth1

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 54c779416eec..f55bf3d294aa 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -76,6 +76,8 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags);
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+			       int ifindex, struct flowi6 *fl6, int flags);
 
 int ip6_route_init(void);
 void ip6_route_cleanup(void);
-- 
cgit 


From b2313077ed0db35ee186905d8076a737248edd24 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 13 Jun 2016 13:46:28 -0700
Subject: net_sched: make tcf_hash_check() boolean

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index db218a12efb5..fb82b5b5d9e7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -155,8 +155,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct tc_action *a);
 int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index);
 u32 tcf_hash_new_index(struct tc_action_net *tn);
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
-		   int bind);
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+		    int bind);
 int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		    struct tc_action *a, int size, int bind, bool cpustats);
 void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
-- 
cgit 


From 2e0ab8ca83c122f275b21ea917d52fee506910bf Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 13 Jun 2016 23:54:31 +0300
Subject: ptr_ring: array based FIFO for pointers

A simple array based FIFO of pointers.  Intended for net stack which
commonly has a single consumer/producer.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 264 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 264 insertions(+)
 create mode 100644 include/linux/ptr_ring.h

(limited to 'include')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
new file mode 100644
index 000000000000..633406f9af8e
--- /dev/null
+++ b/include/linux/ptr_ring.h
@@ -0,0 +1,264 @@
+/*
+ *	Definitions for the 'struct ptr_ring' datastructure.
+ *
+ *	Author:
+ *		Michael S. Tsirkin <mst@redhat.com>
+ *
+ *	Copyright (C) 2016 Red Hat, Inc.
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License as published by the
+ *	Free Software Foundation; either version 2 of the License, or (at your
+ *	option) any later version.
+ *
+ *	This is a limited-size FIFO maintaining pointers in FIFO order, with
+ *	one CPU producing entries and another consuming entries from a FIFO.
+ *
+ *	This implementation tries to minimize cache-contention when there is a
+ *	single producer and a single consumer CPU.
+ */
+
+#ifndef _LINUX_PTR_RING_H
+#define _LINUX_PTR_RING_H 1
+
+#ifdef __KERNEL__
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <asm/errno.h>
+#endif
+
+struct ptr_ring {
+	int producer ____cacheline_aligned_in_smp;
+	spinlock_t producer_lock;
+	int consumer ____cacheline_aligned_in_smp;
+	spinlock_t consumer_lock;
+	/* Shared consumer/producer data */
+	/* Read-only by both the producer and the consumer */
+	int size ____cacheline_aligned_in_smp; /* max entries in queue */
+	void **queue;
+};
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax().
+ * Callers don't need to take producer lock - if they don't
+ * the next call to __ptr_ring_produce may fail.
+ */
+static inline bool __ptr_ring_full(struct ptr_ring *r)
+{
+	return r->queue[r->producer];
+}
+
+static inline bool ptr_ring_full(struct ptr_ring *r)
+{
+	barrier();
+	return __ptr_ring_full(r);
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax().
+ */
+static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+	if (__ptr_ring_full(r))
+		return -ENOSPC;
+
+	r->queue[r->producer++] = ptr;
+	if (unlikely(r->producer >= r->size))
+		r->producer = 0;
+	return 0;
+}
+
+static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock(&r->producer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock_irq(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_irq(&r->producer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&r->producer_lock, flags);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_irqrestore(&r->producer_lock, flags);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock_bh(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_bh(&r->producer_lock);
+
+	return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
+ * There's no need for a lock if pointer is merely tested - see e.g.
+ * ptr_ring_empty.
+ */
+static inline void *__ptr_ring_peek(struct ptr_ring *r)
+{
+	return r->queue[r->consumer];
+}
+
+static inline bool ptr_ring_empty(struct ptr_ring *r)
+{
+	barrier();
+	return !__ptr_ring_peek(r);
+}
+
+/* Must only be called after __ptr_ring_peek returned !NULL */
+static inline void __ptr_ring_discard_one(struct ptr_ring *r)
+{
+	r->queue[r->consumer++] = NULL;
+	if (unlikely(r->consumer >= r->size))
+		r->consumer = 0;
+}
+
+static inline void *__ptr_ring_consume(struct ptr_ring *r)
+{
+	void *ptr;
+
+	ptr = __ptr_ring_peek(r);
+	if (ptr)
+		__ptr_ring_discard_one(r);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock(&r->consumer_lock);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock_irq(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	void *ptr;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock_bh(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ptr;
+}
+
+/* Cast to structure type and call a function without discarding from FIFO.
+ * Function must return a value.
+ * Callers must take consumer_lock.
+ */
+#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
+
+#define PTR_RING_PEEK_CALL(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock_irq(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_irq(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock_bh(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_bh(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	unsigned long __PTR_RING_PEEK_CALL_f;\
+	\
+	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+{
+	r->queue = kzalloc(ALIGN(size * sizeof *(r->queue), SMP_CACHE_BYTES),
+			   gfp);
+	if (!r->queue)
+		return -ENOMEM;
+
+	r->size = size;
+	r->producer = r->consumer = 0;
+	spin_lock_init(&r->producer_lock);
+	spin_lock_init(&r->consumer_lock);
+
+	return 0;
+}
+
+static inline void ptr_ring_cleanup(struct ptr_ring *r)
+{
+	kfree(r->queue);
+}
+
+#endif /* _LINUX_PTR_RING_H  */
-- 
cgit 


From ad69f35d1dc0a79f86627ca56e01f86512602a49 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 13 Jun 2016 23:54:41 +0300
Subject: skb_array: array based FIFO for skbs

A simple array based FIFO of pointers.  Intended for net stack so uses
skbs for type safety. Implemented as a set of wrappers around ptr_ring.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skb_array.h | 144 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 include/linux/skb_array.h

(limited to 'include')

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
new file mode 100644
index 000000000000..c4c090223333
--- /dev/null
+++ b/include/linux/skb_array.h
@@ -0,0 +1,144 @@
+/*
+ *	Definitions for the 'struct skb_array' datastructure.
+ *
+ *	Author:
+ *		Michael S. Tsirkin <mst@redhat.com>
+ *
+ *	Copyright (C) 2016 Red Hat, Inc.
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License as published by the
+ *	Free Software Foundation; either version 2 of the License, or (at your
+ *	option) any later version.
+ *
+ *	Limited-size FIFO of skbs. Can be used more or less whenever
+ *	sk_buff_head can be used, except you need to know the queue size in
+ *	advance.
+ *	Implemented as a type-safe wrapper around ptr_ring.
+ */
+
+#ifndef _LINUX_SKB_ARRAY_H
+#define _LINUX_SKB_ARRAY_H 1
+
+#ifdef __KERNEL__
+#include <linux/ptr_ring.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#endif
+
+struct skb_array {
+	struct ptr_ring ring;
+};
+
+/* Might be slightly faster than skb_array_full below, but callers invoking
+ * this in a loop must use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_full(struct skb_array *a)
+{
+	return __ptr_ring_full(&a->ring);
+}
+
+static inline bool skb_array_full(struct skb_array *a)
+{
+	return ptr_ring_full(&a->ring);
+}
+
+static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce(&a->ring, skb);
+}
+
+static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce_irq(&a->ring, skb);
+}
+
+static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce_bh(&a->ring, skb);
+}
+
+static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb)
+{
+	return ptr_ring_produce_any(&a->ring, skb);
+}
+
+/* Might be slightly faster than skb_array_empty below, but callers invoking
+ * this in a loop must take care to use a compiler barrier, for example
+ * cpu_relax().
+ */
+static inline bool __skb_array_empty(struct skb_array *a)
+{
+	return !__ptr_ring_peek(&a->ring);
+}
+
+static inline bool skb_array_empty(struct skb_array *a)
+{
+	return ptr_ring_empty(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume(struct skb_array *a)
+{
+	return ptr_ring_consume(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a)
+{
+	return ptr_ring_consume_irq(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_any(struct skb_array *a)
+{
+	return ptr_ring_consume_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a)
+{
+	return ptr_ring_consume_bh(&a->ring);
+}
+
+static inline int __skb_array_len_with_tag(struct sk_buff *skb)
+{
+	if (likely(skb)) {
+		int len = skb->len;
+
+		if (skb_vlan_tag_present(skb))
+			len += VLAN_HLEN;
+
+		return len;
+	} else {
+		return 0;
+	}
+}
+
+static inline int skb_array_peek_len(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_irq(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_bh(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_any(struct skb_array *a)
+{
+	return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
+{
+	return ptr_ring_init(&a->ring, size, gfp);
+}
+
+static inline void skb_array_cleanup(struct skb_array *a)
+{
+	ptr_ring_cleanup(&a->ring);
+}
+
+#endif /* _LINUX_SKB_ARRAY_H  */
-- 
cgit 


From 5d49de532002f02755decd1758aac53063a68625 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 13 Jun 2016 23:54:45 +0300
Subject: ptr_ring: resize support

This adds ring resize support. Seems to be necessary as
users such as tun allow userspace control over queue size.

If resize is used, this costs us ability to peek at queue without
consumer lock - should not be a big deal as peek and consumer are
usually run on the same CPU.

If ring is made bigger, ring contents is preserved.  If ring is made
smaller, extra pointers are passed to an optional destructor callback.

Cleanup function also gains destructor callback such that
all pointers in queue can be cleaned up.

This changes some APIs but we don't have any users yet,
so it won't break bisect.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 157 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 143 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 633406f9af8e..562a65e8bcc0 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -43,9 +43,9 @@ struct ptr_ring {
 };
 
 /* Note: callers invoking this in a loop must use a compiler barrier,
- * for example cpu_relax().
- * Callers don't need to take producer lock - if they don't
- * the next call to __ptr_ring_produce may fail.
+ * for example cpu_relax().  If ring is ever resized, callers must hold
+ * producer_lock - see e.g. ptr_ring_full.  Otherwise, if callers don't hold
+ * producer_lock, the next call to __ptr_ring_produce may fail.
  */
 static inline bool __ptr_ring_full(struct ptr_ring *r)
 {
@@ -54,16 +54,55 @@ static inline bool __ptr_ring_full(struct ptr_ring *r)
 
 static inline bool ptr_ring_full(struct ptr_ring *r)
 {
-	barrier();
-	return __ptr_ring_full(r);
+	bool ret;
+
+	spin_lock(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock(&r->producer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_irq(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_irq(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock_irq(&r->producer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&r->producer_lock, flags);
+	ret = __ptr_ring_full(r);
+	spin_unlock_irqrestore(&r->producer_lock, flags);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_bh(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_bh(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock_bh(&r->producer_lock);
+
+	return ret;
 }
 
 /* Note: callers invoking this in a loop must use a compiler barrier,
- * for example cpu_relax().
+ * for example cpu_relax(). Callers must hold producer_lock.
  */
 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
 {
-	if (__ptr_ring_full(r))
+	if (r->queue[r->producer])
 		return -ENOSPC;
 
 	r->queue[r->producer++] = ptr;
@@ -120,20 +159,68 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
 /* Note: callers invoking this in a loop must use a compiler barrier,
  * for example cpu_relax(). Callers must take consumer_lock
  * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
- * There's no need for a lock if pointer is merely tested - see e.g.
- * ptr_ring_empty.
+ * If ring is never resized, and if the pointer is merely
+ * tested, there's no need to take the lock - see e.g.  __ptr_ring_empty.
  */
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
 	return r->queue[r->consumer];
 }
 
-static inline bool ptr_ring_empty(struct ptr_ring *r)
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if the ring is ever resized - see e.g. ptr_ring_empty.
+ */
+static inline bool __ptr_ring_empty(struct ptr_ring *r)
 {
-	barrier();
 	return !__ptr_ring_peek(r);
 }
 
+static inline bool ptr_ring_empty(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_irq(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_bh(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ret;
+}
+
 /* Must only be called after __ptr_ring_peek returned !NULL */
 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
 {
@@ -241,10 +328,14 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
 	__PTR_RING_PEEK_CALL_v; \
 })
 
+static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
+{
+	return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
+}
+
 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 {
-	r->queue = kzalloc(ALIGN(size * sizeof *(r->queue), SMP_CACHE_BYTES),
-			   gfp);
+	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
 	if (!r->queue)
 		return -ENOMEM;
 
@@ -256,8 +347,46 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 	return 0;
 }
 
-static inline void ptr_ring_cleanup(struct ptr_ring *r)
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+				  void (*destroy)(void *))
+{
+	unsigned long flags;
+	int producer = 0;
+	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+	void **old;
+	void *ptr;
+
+	if (!queue)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&(r)->producer_lock, flags);
+
+	while ((ptr = ptr_ring_consume(r)))
+		if (producer < size)
+			queue[producer++] = ptr;
+		else if (destroy)
+			destroy(ptr);
+
+	r->size = size;
+	r->producer = producer;
+	r->consumer = 0;
+	old = r->queue;
+	r->queue = queue;
+
+	spin_unlock_irqrestore(&(r)->producer_lock, flags);
+
+	kfree(old);
+
+	return 0;
+}
+
+static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
 {
+	void *ptr;
+
+	if (destroy)
+		while ((ptr = ptr_ring_consume(r)))
+			destroy(ptr);
 	kfree(r->queue);
 }
 
-- 
cgit 


From 7d7072e3bad5569f636d3c54a36da40976bfd505 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 13 Jun 2016 23:54:50 +0300
Subject: skb_array: resize support

Update skb_array after ptr_ring API changes.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skb_array.h | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index c4c090223333..678bfbf78ac4 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -63,9 +63,9 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb
 	return ptr_ring_produce_any(&a->ring, skb);
 }
 
-/* Might be slightly faster than skb_array_empty below, but callers invoking
- * this in a loop must take care to use a compiler barrier, for example
- * cpu_relax().
+/* Might be slightly faster than skb_array_empty below, but only safe if the
+ * array is never resized. Also, callers invoking this in a loop must take care
+ * to use a compiler barrier, for example cpu_relax().
  */
 static inline bool __skb_array_empty(struct skb_array *a)
 {
@@ -77,6 +77,21 @@ static inline bool skb_array_empty(struct skb_array *a)
 	return ptr_ring_empty(&a->ring);
 }
 
+static inline bool skb_array_empty_bh(struct skb_array *a)
+{
+	return ptr_ring_empty_bh(&a->ring);
+}
+
+static inline bool skb_array_empty_irq(struct skb_array *a)
+{
+	return ptr_ring_empty_irq(&a->ring);
+}
+
+static inline bool skb_array_empty_any(struct skb_array *a)
+{
+	return ptr_ring_empty_any(&a->ring);
+}
+
 static inline struct sk_buff *skb_array_consume(struct skb_array *a)
 {
 	return ptr_ring_consume(&a->ring);
@@ -136,9 +151,19 @@ static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
 	return ptr_ring_init(&a->ring, size, gfp);
 }
 
+void __skb_array_destroy_skb(void *ptr)
+{
+	kfree_skb(ptr);
+}
+
+int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+{
+	return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
+}
+
 static inline void skb_array_cleanup(struct skb_array *a)
 {
-	ptr_ring_cleanup(&a->ring);
+	ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
 }
 
 #endif /* _LINUX_SKB_ARRAY_H  */
-- 
cgit 


From 1b5c5493e3e68181be344cb51bf9df192d05ffc2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 13 Jun 2016 20:21:50 -0700
Subject: net_sched: add the ability to defer skb freeing

qdisc are changed under RTNL protection and often
while blocking BH and root qdisc spinlock.

When lots of skbs need to be dropped, we free
them under these locks causing TX/RX freezes,
and more generally latency spikes.

This commit adds rtnl_kfree_skbs(), used to queue
skbs for deferred freeing.

Actual freeing happens right after RTNL is released,
with appropriate scheduling points.

rtnl_qdisc_drop() can also be used in place
of disc_drop() when RTNL is held.

qdisc_reset_queue() and __qdisc_reset_queue() get
the new behavior, so standard qdiscs like pfifo, pfifo_fast...
have their ->reset() method automatically handled.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  5 +++--
 include/net/sch_generic.h | 16 ++++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c006cc900c44..2daece8979f7 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -89,8 +89,9 @@ void net_inc_egress_queue(void);
 void net_dec_egress_queue(void);
 #endif
 
-extern void rtnetlink_init(void);
-extern void __rtnl_unlock(void);
+void rtnetlink_init(void);
+void __rtnl_unlock(void);
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);
 
 #define ASSERT_RTNL() do { \
 	if (unlikely(!rtnl_is_locked())) { \
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9a0d177884c6..4f7cee8344c4 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -683,19 +683,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
 	return skb;
 }
 
-static inline void __qdisc_reset_queue(struct Qdisc *sch,
-				       struct sk_buff_head *list)
+static inline void __qdisc_reset_queue(struct sk_buff_head *list)
 {
 	/*
 	 * We do not know the backlog in bytes of this list, it
 	 * is up to the caller to correct it
 	 */
-	__skb_queue_purge(list);
+	if (!skb_queue_empty(list)) {
+		rtnl_kfree_skbs(list->next, list->prev);
+		__skb_queue_head_init(list);
+	}
 }
 
 static inline void qdisc_reset_queue(struct Qdisc *sch)
 {
-	__qdisc_reset_queue(sch, &sch->q);
+	__qdisc_reset_queue(&sch->q);
 	sch->qstats.backlog = 0;
 }
 
@@ -716,6 +718,12 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
 	return old;
 }
 
+static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
+{
+	rtnl_kfree_skbs(skb, skb);
+	qdisc_qstats_drop(sch);
+}
+
 static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
 {
 	kfree_skb(skb);
-- 
cgit 


From 8626a0c83b0d471d859bcd908d016874df951fc3 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 15 Jun 2016 21:20:16 +0200
Subject: 6lowpan: add private neighbour data

This patch will introduce a 6lowpan neighbour private data. Like the
interface private data we handle private data for generic 6lowpan and
for link-layer specific 6lowpan.

The current first use case if to save the short address for a 802.15.4
6lowpan neighbour.

Cc: David S. Miller <davem@davemloft.net>
Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +--
 include/net/6lowpan.h     | 10 ++++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d101e4d904ba..36e43bd422f8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1483,8 +1483,7 @@ enum netdev_priv_flags {
  * 	@perm_addr:		Permanent hw address
  * 	@addr_assign_type:	Hw address assignment type
  * 	@addr_len:		Hardware address length
- * 	@neigh_priv_len;	Used in neigh_alloc(),
- * 				initialized only in atm/clip.c
+ *	@neigh_priv_len:	Used in neigh_alloc()
  * 	@dev_id:		Used to differentiate devices that share
  * 				the same link layer address
  * 	@dev_port:		Used to differentiate devices that share
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index da84cf920b78..2d9b9d39221e 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -141,6 +141,16 @@ struct lowpan_dev {
 	u8 priv[0] __aligned(sizeof(void *));
 };
 
+struct lowpan_802154_neigh {
+	__le16 short_addr;
+};
+
+static inline
+struct lowpan_802154_neigh *lowpan_802154_neigh(void *neigh_priv)
+{
+	return neigh_priv;
+}
+
 static inline
 struct lowpan_dev *lowpan_dev(const struct net_device *dev)
 {
-- 
cgit 


From 2ad3ed59198c5404c34515cfcfd9a2b3c54d964f Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 15 Jun 2016 21:20:17 +0200
Subject: 6lowpan: add 802.15.4 short addr slaac

This patch adds the autoconfiguration if a valid 802.15.4 short address
is available for 802.15.4 6LoWPAN interfaces.

Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/6lowpan.h  | 6 ++++++
 include/net/addrconf.h | 3 +++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index 2d9b9d39221e..5ab4c9901ccc 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -254,6 +254,12 @@ static inline bool lowpan_fetch_skb(struct sk_buff *skb, void *data,
 	return false;
 }
 
+static inline bool lowpan_802154_is_valid_src_short_addr(__le16 addr)
+{
+	/* First bit of addr is multicast, reserved or 802.15.4 specific */
+	return !(addr & cpu_to_le16(0x8000));
+}
+
 static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data,
 				       const size_t len)
 {
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 730d856683e5..b1774eb03f37 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -94,6 +94,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
 void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
 
+void addrconf_add_linklocal(struct inet6_dev *idev,
+			    const struct in6_addr *addr, u32 flags);
+
 static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
 {
 	if (dev->addr_len != ETH_ALEN)
-- 
cgit 


From 1e82f961ac8e94c50a933e89ee08071fc81a4bbd Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 15 Jun 2016 21:20:19 +0200
Subject: ndisc: add __ndisc_opt_addr_space function

This patch adds __ndisc_opt_addr_space as low-level function for
ndisc_opt_addr_space which doesn't depend on net_device parameter.

Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 2d8edaad29cb..4cee82654fe5 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -127,10 +127,15 @@ static inline int ndisc_addr_option_pad(unsigned short type)
 	}
 }
 
+static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad)
+{
+	return NDISC_OPT_SPACE(addr_len + pad);
+}
+
 static inline int ndisc_opt_addr_space(struct net_device *dev)
 {
-	return NDISC_OPT_SPACE(dev->addr_len +
-			       ndisc_addr_option_pad(dev->type));
+	return __ndisc_opt_addr_space(dev->addr_len,
+				      ndisc_addr_option_pad(dev->type));
 }
 
 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
-- 
cgit 


From 4f36ce84c54c971cd67c882035d9bde7b1a2ee53 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 15 Jun 2016 21:20:20 +0200
Subject: ndisc: add __ndisc_opt_addr_data function

This patch adds __ndisc_opt_addr_data as low-level function for
ndisc_opt_addr_data which doesn't depend on net_device parameter.

Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 4cee82654fe5..c8962adf24d0 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -138,17 +138,23 @@ static inline int ndisc_opt_addr_space(struct net_device *dev)
 				      ndisc_addr_option_pad(dev->type));
 }
 
-static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
-				      struct net_device *dev)
+static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p,
+					unsigned char addr_len, int prepad)
 {
 	u8 *lladdr = (u8 *)(p + 1);
 	int lladdrlen = p->nd_opt_len << 3;
-	int prepad = ndisc_addr_option_pad(dev->type);
-	if (lladdrlen != ndisc_opt_addr_space(dev))
+	if (lladdrlen != __ndisc_opt_addr_space(addr_len, prepad))
 		return NULL;
 	return lladdr + prepad;
 }
 
+static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
+				      struct net_device *dev)
+{
+	return __ndisc_opt_addr_data(p, dev->addr_len,
+				     ndisc_addr_option_pad(dev->type));
+}
+
 static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd)
 {
 	const u32 *p32 = pkey;
-- 
cgit 


From f997c55c1dc8841b3ee4df0493d0ac7966d42165 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 15 Jun 2016 21:20:23 +0200
Subject: ipv6: introduce neighbour discovery ops

This patch introduces neighbour discovery ops callback structure. The
idea is to separate the handling for 6LoWPAN into the 6lowpan module.

These callback offers 6lowpan different handling, such as 802.15.4 short
address handling or RFC6775 (Neighbor Discovery Optimization for IPv6
over 6LoWPANs).

Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   5 ++
 include/net/ndisc.h       | 197 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 199 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 36e43bd422f8..890158e99159 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1456,6 +1456,8 @@ enum netdev_priv_flags {
  *	@netdev_ops:	Includes several pointers to callbacks,
  *			if one wants to override the ndo_*() functions
  *	@ethtool_ops:	Management operations
+ *	@ndisc_ops:	Includes callbacks for different IPv6 neighbour
+ *			discovery handling. Necessary for e.g. 6LoWPAN.
  *	@header_ops:	Includes callbacks for creating,parsing,caching,etc
  *			of Layer 2 headers.
  *
@@ -1672,6 +1674,9 @@ struct net_device {
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	const struct l3mdev_ops	*l3mdev_ops;
 #endif
+#if IS_ENABLED(CONFIG_IPV6)
+	const struct ndisc_ops *ndisc_ops;
+#endif
 
 	const struct header_ops *header_ops;
 
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index c8962adf24d0..a5e276703cb3 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -58,6 +58,7 @@ struct inet6_dev;
 struct net_device;
 struct net_proto_family;
 struct sk_buff;
+struct prefix_info;
 
 extern struct neigh_table nd_tbl;
 
@@ -110,9 +111,182 @@ struct ndisc_options {
 
 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
 
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+					  u8 *opt, int opt_len,
 					  struct ndisc_options *ndopts);
 
+#define NDISC_OPS_REDIRECT_DATA_SPACE	2
+
+/*
+ * This structure defines the hooks for IPv6 neighbour discovery.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * int (*is_useropt)(u8 nd_opt_type):
+ *     This function is called when IPv6 decide RA userspace options. if
+ *     this function returns 1 then the option given by nd_opt_type will
+ *     be handled as userspace option additional to the IPv6 options.
+ *
+ * int (*parse_options)(const struct net_device *dev,
+ *			struct nd_opt_hdr *nd_opt,
+ *			struct ndisc_options *ndopts):
+ *     This function is called while parsing ndisc ops and put each position
+ *     as pointer into ndopts. If this function return unequal 0, then this
+ *     function took care about the ndisc option, if 0 then the IPv6 ndisc
+ *     option parser will take care about that option.
+ *
+ * void (*update)(const struct net_device *dev, struct neighbour *n,
+ *		  u32 flags, u8 icmp6_type,
+ *		  const struct ndisc_options *ndopts):
+ *     This function is called when IPv6 ndisc updates the neighbour cache
+ *     entry. Additional options which can be updated may be previously
+ *     parsed by parse_opts callback and accessible over ndopts parameter.
+ *
+ * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+ *			 struct neighbour *neigh, u8 *ha_buf,
+ *			 u8 **ha):
+ *     This function is called when the necessary option space will be
+ *     calculated before allocating a skb. The parameters neigh, ha_buf
+ *     abd ha are available on NDISC_REDIRECT messages only.
+ *
+ * void (*fill_addr_option)(const struct net_device *dev,
+ *			    struct sk_buff *skb, u8 icmp6_type,
+ *			    const u8 *ha):
+ *     This function is called when the skb will finally fill the option
+ *     fields inside skb. NOTE: this callback should fill the option
+ *     fields to the skb which are previously indicated by opt_space
+ *     parameter. That means the decision to add such option should
+ *     not lost between these two callbacks, e.g. protected by interface
+ *     up state.
+ *
+ * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+ *			       const struct prefix_info *pinfo,
+ *			       struct inet6_dev *in6_dev,
+ *			       struct in6_addr *addr,
+ *			       int addr_type, u32 addr_flags,
+ *			       bool sllao, bool tokenized,
+ *			       __u32 valid_lft, u32 prefered_lft,
+ *			       bool dev_addr_generated):
+ *     This function is called when a RA messages is received with valid
+ *     PIO option fields and an IPv6 address will be added to the interface
+ *     for autoconfiguration. The parameter dev_addr_generated reports about
+ *     if the address was based on dev->dev_addr or not. This can be used
+ *     to add a second address if link-layer operates with two link layer
+ *     addresses. E.g. 802.15.4 6LoWPAN.
+ */
+struct ndisc_ops {
+	int	(*is_useropt)(u8 nd_opt_type);
+	int	(*parse_options)(const struct net_device *dev,
+				 struct nd_opt_hdr *nd_opt,
+				 struct ndisc_options *ndopts);
+	void	(*update)(const struct net_device *dev, struct neighbour *n,
+			  u32 flags, u8 icmp6_type,
+			  const struct ndisc_options *ndopts);
+	int	(*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+				  struct neighbour *neigh, u8 *ha_buf,
+				  u8 **ha);
+	void	(*fill_addr_option)(const struct net_device *dev,
+				    struct sk_buff *skb, u8 icmp6_type,
+				    const u8 *ha);
+	void	(*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+				       const struct prefix_info *pinfo,
+				       struct inet6_dev *in6_dev,
+				       struct in6_addr *addr,
+				       int addr_type, u32 addr_flags,
+				       bool sllao, bool tokenized,
+				       __u32 valid_lft, u32 prefered_lft,
+				       bool dev_addr_generated);
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_ops_is_useropt(const struct net_device *dev,
+				       u8 nd_opt_type)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->is_useropt)
+		return dev->ndisc_ops->is_useropt(nd_opt_type);
+	else
+		return 0;
+}
+
+static inline int ndisc_ops_parse_options(const struct net_device *dev,
+					  struct nd_opt_hdr *nd_opt,
+					  struct ndisc_options *ndopts)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->parse_options)
+		return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts);
+	else
+		return 0;
+}
+
+static inline void ndisc_ops_update(const struct net_device *dev,
+					  struct neighbour *n, u32 flags,
+					  u8 icmp6_type,
+					  const struct ndisc_options *ndopts)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->update)
+		dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts);
+}
+
+static inline int ndisc_ops_opt_addr_space(const struct net_device *dev,
+					   u8 icmp6_type)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space &&
+	    icmp6_type != NDISC_REDIRECT)
+		return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL,
+						      NULL, NULL);
+	else
+		return 0;
+}
+
+static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev,
+						    struct neighbour *neigh,
+						    u8 *ha_buf, u8 **ha)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space)
+		return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT,
+						      neigh, ha_buf, ha);
+	else
+		return 0;
+}
+
+static inline void ndisc_ops_fill_addr_option(const struct net_device *dev,
+					      struct sk_buff *skb,
+					      u8 icmp6_type)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option &&
+	    icmp6_type != NDISC_REDIRECT)
+		dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL);
+}
+
+static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev,
+						       struct sk_buff *skb,
+						       const u8 *ha)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option)
+		dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha);
+}
+
+static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net,
+						 struct net_device *dev,
+						 const struct prefix_info *pinfo,
+						 struct inet6_dev *in6_dev,
+						 struct in6_addr *addr,
+						 int addr_type, u32 addr_flags,
+						 bool sllao, bool tokenized,
+						 __u32 valid_lft,
+						 u32 prefered_lft,
+						 bool dev_addr_generated)
+{
+	if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr)
+		dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+						    addr, addr_type,
+						    addr_flags, sllao,
+						    tokenized, valid_lft,
+						    prefered_lft,
+						    dev_addr_generated);
+}
+#endif
+
 /*
  * Return the padding between the option length and the start of the
  * link addr.  Currently only IP-over-InfiniBand needs this, although
@@ -132,11 +306,25 @@ static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad)
 	return NDISC_OPT_SPACE(addr_len + pad);
 }
 
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type)
+{
+	return __ndisc_opt_addr_space(dev->addr_len,
+				      ndisc_addr_option_pad(dev->type)) +
+		ndisc_ops_opt_addr_space(dev, icmp6_type);
+}
+
+static inline int ndisc_redirect_opt_addr_space(struct net_device *dev,
+						struct neighbour *neigh,
+						u8 *ops_data_buf,
+						u8 **ops_data)
 {
 	return __ndisc_opt_addr_space(dev->addr_len,
-				      ndisc_addr_option_pad(dev->type));
+				      ndisc_addr_option_pad(dev->type)) +
+		ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf,
+						  ops_data);
 }
+#endif
 
 static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p,
 					unsigned char addr_len, int prepad)
@@ -205,6 +393,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target);
 int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev,
 		 int dir);
 
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+		  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+		  struct ndisc_options *ndopts);
 
 /*
  *	IGMP
-- 
cgit 


From cc84b3c6b48ae81748c5e25d3558872385196162 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 15 Jun 2016 21:20:24 +0200
Subject: ipv6: export several functions

This patch exports some neighbour discovery functions which can be used
by 6lowpan neighbour discovery ops functionality then.

Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  7 +++++++
 include/net/ndisc.h    | 12 ++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index b1774eb03f37..9826d3a9464c 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -97,6 +97,13 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
 void addrconf_add_linklocal(struct inet6_dev *idev,
 			    const struct in6_addr *addr, u32 flags);
 
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+				 const struct prefix_info *pinfo,
+				 struct inet6_dev *in6_dev,
+				 const struct in6_addr *addr, int addr_type,
+				 u32 addr_flags, bool sllao, bool tokenized,
+				 __u32 valid_lft, u32 prefered_lft);
+
 static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
 {
 	if (dev->addr_len != ETH_ALEN)
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index a5e276703cb3..3f0f41ddbeb0 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -53,6 +53,15 @@ enum {
 
 #include <net/neighbour.h>
 
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(val, level, fmt, ...)				\
+do {								\
+	if (val <= ND_DEBUG)					\
+		net_##level##_ratelimited(fmt, ##__VA_ARGS__);	\
+} while (0)
+
 struct ctl_table;
 struct inet6_dev;
 struct net_device;
@@ -115,6 +124,9 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
 					  u8 *opt, int opt_len,
 					  struct ndisc_options *ndopts);
 
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+			      int data_len, int pad);
+
 #define NDISC_OPS_REDIRECT_DATA_SPACE	2
 
 /*
-- 
cgit 


From bbe5f5cefe2818eda0392c178de141ffc5734d90 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 15 Jun 2016 21:20:25 +0200
Subject: 6lowpan: introduce 6lowpan-nd

This patch introduce different 6lowpan handling for receive and transmit
NS/NA messages for the ipv6 neighbour discovery. The first use-case is
for supporting 802.15.4 short addresses inside the option fields and
handling for RFC6775 6CO option field as userspace option.

Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 3f0f41ddbeb0..be1fe2283254 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -35,6 +35,7 @@ enum {
 	ND_OPT_ROUTE_INFO = 24,		/* RFC4191 */
 	ND_OPT_RDNSS = 25,		/* RFC5006 */
 	ND_OPT_DNSSL = 31,		/* RFC6106 */
+	ND_OPT_6CO = 34,		/* RFC6775 */
 	__ND_OPT_MAX
 };
 
@@ -109,14 +110,19 @@ struct ndisc_options {
 #endif
 	struct nd_opt_hdr *nd_useropts;
 	struct nd_opt_hdr *nd_useropts_end;
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+	struct nd_opt_hdr *nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR + 1];
+#endif
 };
 
-#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end		nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
+#define nd_opts_src_lladdr		nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr		nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi			nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end			nd_opt_array[__ND_OPT_PREFIX_INFO_END]
+#define nd_opts_rh			nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu			nd_opt_array[ND_OPT_MTU]
+#define nd_802154_opts_src_lladdr	nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_802154_opts_tgt_lladdr	nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR]
 
 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
 
-- 
cgit 


From 22a59be8b7693eb2d0897a9638f5991f2f8e4ddd Mon Sep 17 00:00:00 2001
From: Philip Prindeville <philipp@redfish-solutions.com>
Date: Tue, 14 Jun 2016 15:53:02 -0600
Subject: net: ipv4: Add ability to have GRE ignore DF bit in IPv4 payloads

    In the presence of firewalls which improperly block ICMP Unreachable
    (including Fragmentation Required) messages, Path MTU Discovery is
    prevented from working.

    A workaround is to handle IPv4 payloads opaquely, ignoring the DF bit--as
    is done for other payloads like AppleTalk--and doing transparent
    fragmentation and reassembly.

    Redux includes the enforcement of mutual exclusion between this feature
    and Path MTU Discovery as suggested by Alexander Duyck.

    Cc: Alexander Duyck <alexander.duyck@gmail.com>
    Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
    Signed-off-by: Philip Prindeville <philipp@redfish-solutions.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h       | 1 +
 include/uapi/linux/if_tunnel.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index dbf444428437..9222678426a1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -132,6 +132,7 @@ struct ip_tunnel {
 	int			ip_tnl_net_id;
 	struct gro_cells	gro_cells;
 	bool			collect_md;
+	bool			ignore_df;
 };
 
 #define TUNNEL_CSUM		__cpu_to_be16(0x01)
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index af4de90ba27d..1046f5515174 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -113,6 +113,7 @@ enum {
 	IFLA_GRE_ENCAP_SPORT,
 	IFLA_GRE_ENCAP_DPORT,
 	IFLA_GRE_COLLECT_METADATA,
+	IFLA_GRE_IGNORE_DF,
 	__IFLA_GRE_MAX,
 };
 
-- 
cgit 


From 61d1b6a42fec61c5065f54cc62cef02b483c69fb Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 15 Jun 2016 22:47:12 +0200
Subject: bpf, maps: add release callback

Add a release callback for maps that is invoked when the last
reference to its struct file is gone and the struct file about
to be released by vfs. The handler will be used by fd array maps.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1bcae82c6cb1..29b5a1ae22cb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -19,7 +19,8 @@ struct bpf_map;
 struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
-	void (*map_free)(struct bpf_map *);
+	void (*map_release)(struct bpf_map *map, struct file *map_file);
+	void (*map_free)(struct bpf_map *map);
 	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
 
 	/* funcs callable from userspace and from eBPF programs */
-- 
cgit 


From d056a788765e67773124f520159185bc89f5d1ad Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 15 Jun 2016 22:47:13 +0200
Subject: bpf, maps: extend map_fd_get_ptr arguments

This patch extends map_fd_get_ptr() callback that is used by fd array
maps, so that struct file pointer from the related map can be passed
in. It's safe to remove map_update_elem() callback for the two maps since
this is only allowed from syscall side, but not from eBPF programs for these
two map types. Like in per-cpu map case, bpf_fd_array_map_update_elem()
needs to be called directly here due to the extra argument.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 29b5a1ae22cb..d7b43e73fe87 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -29,8 +29,9 @@ struct bpf_map_ops {
 	int (*map_delete_elem)(struct bpf_map *map, void *key);
 
 	/* funcs called by prog_array and perf_event_array map */
-	void *(*map_fd_get_ptr) (struct bpf_map *map, int fd);
-	void (*map_fd_put_ptr) (void *ptr);
+	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
+				int fd);
+	void (*map_fd_put_ptr)(void *ptr);
 };
 
 struct bpf_map {
@@ -169,7 +170,7 @@ struct bpf_array {
 
 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-void bpf_fd_array_map_clear(struct bpf_map *map);
+
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -207,8 +208,13 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
 			   u64 flags);
 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 			    u64 flags);
+
 int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 
+int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
+				 void *key, void *value, u64 map_flags);
+void bpf_fd_array_map_clear(struct bpf_map *map);
+
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
  * forced to use 'long' read/writes to try to atomically copy long counters.
  * Best-effort only.  No barriers here, since it _will_ race with concurrent
-- 
cgit 


From 3b1efb196eee45b2f0c4994e0c43edb5e367f620 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 15 Jun 2016 22:47:14 +0200
Subject: bpf, maps: flush own entries on perf map release

The behavior of perf event arrays are quite different from all
others as they are tightly coupled to perf event fds, f.e. shown
recently by commit e03e7ee34fdd ("perf/bpf: Convert perf_event_array
to use struct file") to make refcounting on perf event more robust.
A remaining issue that the current code still has is that since
additions to the perf event array take a reference on the struct
file via perf_event_get() and are only released via fput() (that
cleans up the perf event eventually via perf_event_release_kernel())
when the element is either manually removed from the map from user
space or automatically when the last reference on the perf event
map is dropped. However, this leads us to dangling struct file's
when the map gets pinned after the application owning the perf
event descriptor exits, and since the struct file reference will
in such case only be manually dropped or via pinned file removal,
it leads to the perf event living longer than necessary, consuming
needlessly resources for that time.

Relations between perf event fds and bpf perf event map fds can be
rather complex. F.e. maps can act as demuxers among different perf
event fds that can possibly be owned by different threads and based
on the index selection from the program, events get dispatched to
one of the per-cpu fd endpoints. One perf event fd (or, rather a
per-cpu set of them) can also live in multiple perf event maps at
the same time, listening for events. Also, another requirement is
that perf event fds can get closed from application side after they
have been attached to the perf event map, so that on exit perf event
map will take care of dropping their references eventually. Likewise,
when such maps are pinned, the intended behavior is that a user
application does bpf_obj_get(), puts its fds in there and on exit
when fd is released, they are dropped from the map again, so the map
acts rather as connector endpoint. This also makes perf event maps
inherently different from program arrays as described in more detail
in commit c9da161c6517 ("bpf: fix clearing on persistent program
array maps").

To tackle this, map entries are marked by the map struct file that
added the element to the map. And when the last reference to that map
struct file is released from user space, then the tracked entries
are purged from the map. This is okay, because new map struct files
instances resp. frontends to the anon inode are provided via
bpf_map_new_fd() that is called when we invoke bpf_obj_get_user()
for retrieving a pinned map, but also when an initial instance is
created via map_create(). The rest is resolved by the vfs layer
automatically for us by keeping reference count on the map's struct
file. Any concurrent updates on the map slot are fine as well, it
just means that perf_event_fd_array_release() needs to delete less
of its own entires.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d7b43e73fe87..9adfef694a25 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -13,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/err.h>
 
+struct perf_event;
 struct bpf_map;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
@@ -166,8 +167,16 @@ struct bpf_array {
 		void __percpu *pptrs[0] __aligned(8);
 	};
 };
+
 #define MAX_TAIL_CALL_CNT 32
 
+struct bpf_event_entry {
+	struct perf_event *event;
+	struct file *perf_file;
+	struct file *map_file;
+	struct rcu_head rcu;
+};
+
 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
-- 
cgit 


From 5fb384b066d7c320832c4541658e5c655c590ac5 Mon Sep 17 00:00:00 2001
From: Fabien Siron <fabien.siron@epita.fr>
Date: Wed, 15 Jun 2016 15:37:49 +0000
Subject: netlink: Add comment to warn about deprecated netlink rings attribute
 request

Signed-off-by: Fabien Siron <fabien.siron@epita.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink_diag.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
index d79399394b46..76b4d87c83a8 100644
--- a/include/uapi/linux/netlink_diag.h
+++ b/include/uapi/linux/netlink_diag.h
@@ -49,6 +49,7 @@ enum {
 #define NDIAG_SHOW_MEMINFO	0x00000001 /* show memory info of a socket */
 #define NDIAG_SHOW_GROUPS	0x00000002 /* show groups of a netlink socket */
 #ifndef __KERNEL__
+/* deprecated since 4.6 */
 #define NDIAG_SHOW_RING_CFG	0x00000004 /* show ring configuration */
 #endif
 
-- 
cgit 


From cecbc5563a02289164fa6379130243cbe08b2dd6 Mon Sep 17 00:00:00 2001
From: Vincent Palatin <vpalatin@chromium.org>
Date: Wed, 15 Jun 2016 11:32:21 -0700
Subject: net: stmmac: allow to split suspend/resume from init/exit callbacks

Let the stmmac platform drivers provide dedicated suspend and resume
callbacks rather than always re-using the init and exits callbacks.
If the driver does not provide the suspend or resume callback, we fall
back to the old behavior trying to use exit or init.

This allows a specific platform to perform only a partial power-down on
suspend if Wake-on-Lan is enabled but always perform the full shutdown
sequence if the module is unloaded.

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index ffdaca9c01af..0507dbfbf63c 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -135,6 +135,8 @@ struct plat_stmmacenet_data {
 	void (*bus_setup)(void __iomem *ioaddr);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
+	void (*suspend)(struct platform_device *pdev, void *priv);
+	void (*resume)(struct platform_device *pdev, void *priv);
 	void *bsp_priv;
 	struct stmmac_axi *axi;
 	int has_gmac4;
-- 
cgit 


From 6f3b911d5f29b98752e5da86a295210c0c4f4e14 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 17 Jun 2016 15:35:27 +0200
Subject: can: bcm: add support for CAN FD frames

The programming API of the CAN_BCM depends on struct can_frame which is
given as array directly behind the bcm_msg_head structure. To follow this
schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
flags indicates that the concatenated CAN frame structures behind the
bcm_msg_head are defined as struct canfd_frame.

This patch adds the support to handle CAN and CAN FD frames on a per BCM-op
base. Main changes:

- generally use struct canfd_frames instead if struct can_frames
- use canfd_frame.flags instead of can_frame.can_dlc for private BCM flags
- make all CAN frame sizes depending on the new CAN_FD_FRAME flags
- separate between CAN and CAN FD when sending/receiving frames

Due to the dependence of the CAN_FD_FRAME flag the former binary interface
for classic CAN frames remains stable.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/bcm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h
index 7a291dc1ff15..cefb304414ba 100644
--- a/include/uapi/linux/can/bcm.h
+++ b/include/uapi/linux/can/bcm.h
@@ -99,5 +99,6 @@ enum {
 #define RX_ANNOUNCE_RESUME  0x0100
 #define TX_RESET_MULTI_IDX  0x0200
 #define RX_RTR_FRAME        0x0400
+#define CAN_FD_FRAME        0x0800
 
 #endif /* !_UAPI_CAN_BCM_H */
-- 
cgit 


From 86a98057256020e75e1be0f88d7617491a06e8f1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Thu, 16 Jun 2016 12:20:44 -0700
Subject: vxlan/geneve: Include udp_tunnel.h in vxlan/geneve.h and fixup
 includes

This patch makes it so that we add udp_tunnel.h to vxlan.h and geneve.h
header files.  This is useful as I plan to move the generic handlers for
the port offloads into the udp_tunnel header file and leave the vxlan and
geneve headers to be a bit more protocol specific.

I also went through and cleaned out a number of redundant includes that
where in the .h and .c files for these drivers.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h     | 3 ---
 include/net/udp_tunnel.h | 2 ++
 include/net/vxlan.h      | 6 +-----
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index cb544a530146..f8aff18d6702 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -1,10 +1,7 @@
 #ifndef __NET_GENEVE_H
 #define __NET_GENEVE_H  1
 
-#ifdef CONFIG_INET
 #include <net/udp_tunnel.h>
-#endif
-
 
 /* Geneve Header:
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 9d14f707e534..59019562d14c 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -105,12 +105,14 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
 				    __be16 flags, __be64 tunnel_id,
 				    int md_size);
 
+#ifdef CONFIG_INET
 static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
 {
 	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 
 	return iptunnel_handle_offloads(skb, type);
 }
+#endif
 
 static inline void udp_tunnel_encap_enable(struct socket *sock)
 {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index b8803165df91..7d944941f32f 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -1,12 +1,8 @@
 #ifndef __NET_VXLAN_H
 #define __NET_VXLAN_H 1
 
-#include <linux/ip.h>
-#include <linux/ipv6.h>
 #include <linux/if_vlan.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/udp.h>
+#include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
 
 /* VXLAN protocol (RFC 7348) header:
-- 
cgit 


From e7b3db5e60e8f471c3f5ef93b497bafe5863e56a Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Thu, 16 Jun 2016 12:20:52 -0700
Subject: net: Combine GENEVE and VXLAN port notifiers into single functions

This patch merges the GENEVE and VXLAN code so that both functions pass
through a shared code path.  This way we can start the effort of using a
single function on the network device drivers to handle both of these
tunnel types.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp_tunnel.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 59019562d14c..71afbea873a0 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -84,6 +84,39 @@ struct udp_tunnel_sock_cfg {
 void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 			   struct udp_tunnel_sock_cfg *sock_cfg);
 
+/* -- List of parsable UDP tunnel types --
+ *
+ * Adding to this list will result in serious debate.  The main issue is
+ * that this list is essentially a list of workarounds for either poorly
+ * designed tunnels, or poorly designed device offloads.
+ *
+ * The parsing supported via these types should really be used for Rx
+ * traffic only as the network stack will have already inserted offsets for
+ * the location of the headers in the skb.  In addition any ports that are
+ * pushed should be kept within the namespace without leaking to other
+ * devices such as VFs or other ports on the same device.
+ *
+ * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the
+ * need to use this for Rx checksum offload.  It should not be necessary to
+ * call this function to perform Tx offloads on outgoing traffic.
+ */
+enum udp_parsable_tunnel_type {
+	UDP_TUNNEL_TYPE_VXLAN,		/* RFC 7348 */
+	UDP_TUNNEL_TYPE_GENEVE,		/* draft-ietf-nvo3-geneve */
+};
+
+struct udp_tunnel_info {
+	unsigned short type;
+	sa_family_t sa_family;
+	__be16 port;
+};
+
+/* Notify network devices of offloadable types */
+void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type);
+void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
+void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
+
 /* Transmit the skb using UDP encapsulation. */
 void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
-- 
cgit 


From 7c46a640de6fcc4f35d0702710356a024eadf68f Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Thu, 16 Jun 2016 12:21:00 -0700
Subject: net: Merge VXLAN and GENEVE push notifiers into a single notifier

This patch merges the notifiers for VXLAN and GENEVE into a single UDP
tunnel notifier.  The idea is that we will want to only have to make one
notifier call to receive the list of ports for VXLAN and GENEVE tunnels
that need to be offloaded.

In addition we add a new set of ndo functions named ndo_udp_tunnel_add and
ndo_udp_tunnel_del that are meant to allow us to track the tunnel meta-data
such as port and address family as tunnels are added and removed.  The
tunnel meta-data is now transported in a structure named udp_tunnel_info
which for now carries the type, address family, and port number.  In the
future this could be updated so that we can include a tuple of values
including things such as the destination IP address and other fields.

I also ended up going with a naming scheme that consisted of using the
prefix udp_tunnel on function names.  I applied this to the notifier and
ndo ops as well so that it hopefully points to the fact that these are
primarily used in the udp_tunnel functions.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 22 ++++++++++++++++++++--
 include/net/geneve.h      |  3 +--
 include/net/udp_tunnel.h  |  6 ++++++
 include/net/vxlan.h       |  4 ++--
 4 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 890158e99159..577d2a1814b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -61,6 +61,8 @@ struct wireless_dev;
 /* 802.15.4 specific */
 struct wpan_dev;
 struct mpls_dev;
+/* UDP Tunnel offloads */
+struct udp_tunnel_info;
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
@@ -1050,6 +1052,19 @@ struct tc_to_netdev {
  *	address family that vxlan is not listening to anymore. The operation
  *	is protected by the vxlan_net->sock_lock.
  *
+ * void (*ndo_udp_tunnel_add)(struct net_device *dev,
+ *			      struct udp_tunnel_info *ti);
+ *	Called by UDP tunnel to notify a driver about the UDP port and socket
+ *	address family that a UDP tunnel is listnening to. It is called only
+ *	when a new port starts listening. The operation is protected by the
+ *	RTNL.
+ *
+ * void (*ndo_udp_tunnel_del)(struct net_device *dev,
+ *			      struct udp_tunnel_info *ti);
+ *	Called by UDP tunnel to notify the driver about a UDP port and socket
+ *	address family that the UDP tunnel is not listening to anymore. The
+ *	operation is protected by the RTNL.
+ *
  * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
  *				 struct net_device *dev)
  *	Called by upper layer devices to accelerate switching or other
@@ -1269,6 +1284,10 @@ struct net_device_ops {
 	void			(*ndo_del_geneve_port)(struct  net_device *dev,
 						       sa_family_t sa_family,
 						       __be16 port);
+	void			(*ndo_udp_tunnel_add)(struct net_device *dev,
+						      struct udp_tunnel_info *ti);
+	void			(*ndo_udp_tunnel_del)(struct net_device *dev,
+						      struct udp_tunnel_info *ti);
 	void*			(*ndo_dfwd_add_station)(struct net_device *pdev,
 							struct net_device *dev);
 	void			(*ndo_dfwd_del_station)(struct net_device *pdev,
@@ -2255,8 +2274,7 @@ struct netdev_lag_lower_state_info {
 #define NETDEV_BONDING_INFO	0x0019
 #define NETDEV_PRECHANGEUPPER	0x001A
 #define NETDEV_CHANGELOWERSTATE	0x001B
-#define NETDEV_OFFLOAD_PUSH_VXLAN	0x001C
-#define NETDEV_OFFLOAD_PUSH_GENEVE	0x001D
+#define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
diff --git a/include/net/geneve.h b/include/net/geneve.h
index f8aff18d6702..3410c4b5a382 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -61,8 +61,7 @@ struct genevehdr {
 
 static inline void geneve_get_rx_port(struct net_device *netdev)
 {
-	ASSERT_RTNL();
-	call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_GENEVE, netdev);
+	udp_tunnel_get_rx_info(netdev);
 }
 
 #ifdef CONFIG_INET
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 71afbea873a0..1c9408a04213 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -117,6 +117,12 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
 void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
 
+static inline void udp_tunnel_get_rx_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
+}
+
 /* Transmit the skb using UDP encapsulation. */
 void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7d944941f32f..c62e2ed1c3af 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -4,6 +4,7 @@
 #include <linux/if_vlan.h>
 #include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
+#include <net/udp_tunnel.h>
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -390,8 +391,7 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
 
 static inline void vxlan_get_rx_port(struct net_device *netdev)
 {
-	ASSERT_RTNL();
-	call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_VXLAN, netdev);
+	udp_tunnel_get_rx_info(netdev);
 }
 
 static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
-- 
cgit 


From 1938ee1fd3de74d761a60806b048df652666afec Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Thu, 16 Jun 2016 12:23:12 -0700
Subject: net: Remove deprecated tunnel specific UDP offload functions

Now that we have all the drivers using udp_tunnel_get_rx_ports,
ndo_add_udp_enc_rx_port, and ndo_del_udp_enc_rx_port we can drop the
function calls that were specific to VXLAN and GENEVE.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 38 --------------------------------------
 include/net/geneve.h      |  5 -----
 include/net/vxlan.h       |  5 -----
 3 files changed, 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 577d2a1814b1..e84d9d23c2d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1026,32 +1026,6 @@ struct tc_to_netdev {
  *	not implement this, it is assumed that the hw is not able to have
  *	multiple net devices on single physical port.
  *
- * void (*ndo_add_vxlan_port)(struct  net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
- *	Called by vxlan to notify a driver about the UDP port and socket
- *	address family that vxlan is listening to. It is called only when
- *	a new port starts listening. The operation is protected by the
- *	vxlan_net->sock_lock.
- *
- * void (*ndo_add_geneve_port)(struct net_device *dev,
- *			       sa_family_t sa_family, __be16 port);
- *	Called by geneve to notify a driver about the UDP port and socket
- *	address family that geneve is listnening to. It is called only when
- *	a new port starts listening. The operation is protected by the
- *	geneve_net->sock_lock.
- *
- * void (*ndo_del_geneve_port)(struct net_device *dev,
- *			       sa_family_t sa_family, __be16 port);
- *	Called by geneve to notify the driver about a UDP port and socket
- *	address family that geneve is not listening to anymore. The operation
- *	is protected by the geneve_net->sock_lock.
- *
- * void (*ndo_del_vxlan_port)(struct  net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
- *	Called by vxlan to notify the driver about a UDP port and socket
- *	address family that vxlan is not listening to anymore. The operation
- *	is protected by the vxlan_net->sock_lock.
- *
  * void (*ndo_udp_tunnel_add)(struct net_device *dev,
  *			      struct udp_tunnel_info *ti);
  *	Called by UDP tunnel to notify a driver about the UDP port and socket
@@ -1272,18 +1246,6 @@ struct net_device_ops {
 							struct netdev_phys_item_id *ppid);
 	int			(*ndo_get_phys_port_name)(struct net_device *dev,
 							  char *name, size_t len);
-	void			(*ndo_add_vxlan_port)(struct  net_device *dev,
-						      sa_family_t sa_family,
-						      __be16 port);
-	void			(*ndo_del_vxlan_port)(struct  net_device *dev,
-						      sa_family_t sa_family,
-						      __be16 port);
-	void			(*ndo_add_geneve_port)(struct  net_device *dev,
-						       sa_family_t sa_family,
-						       __be16 port);
-	void			(*ndo_del_geneve_port)(struct  net_device *dev,
-						       sa_family_t sa_family,
-						       __be16 port);
 	void			(*ndo_udp_tunnel_add)(struct net_device *dev,
 						      struct udp_tunnel_info *ti);
 	void			(*ndo_udp_tunnel_del)(struct net_device *dev,
diff --git a/include/net/geneve.h b/include/net/geneve.h
index 3410c4b5a382..ec0327d4331b 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -59,11 +59,6 @@ struct genevehdr {
 	struct geneve_opt options[];
 };
 
-static inline void geneve_get_rx_port(struct net_device *netdev)
-{
-	udp_tunnel_get_rx_info(netdev);
-}
-
 #ifdef CONFIG_INET
 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 					u8 name_assign_type, u16 dst_port);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index c62e2ed1c3af..b96d0360c095 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -389,11 +389,6 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
 	return vni_field;
 }
 
-static inline void vxlan_get_rx_port(struct net_device *netdev)
-{
-	udp_tunnel_get_rx_info(netdev);
-}
-
 static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
 {
 	return vs->sock->sk->sk_family;
-- 
cgit 


From b9adcd69bd7b41625201686b4cfec7ff13357afc Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Thu, 16 Jun 2016 12:23:19 -0700
Subject: vxlan: Add new UDP encapsulation offload type for VXLAN-GPE

The fact is VXLAN with Generic Protocol Extensions cannot be supported by
the same hardware parsers that support VXLAN.  The protocol extensions
allow for things like a Next Protocol field which in turn allows for things
other than Ethernet to be passed over the tunnel.  Most existing parsers
will not know how to interpret this.

To resolve this I am giving VXLAN-GPE its own UDP encapsulation offload
type.  This way hardware that does support GPE can simply add this type to
the switch statement for VXLAN, and if they don't support it then this will
fix any issues where headers might be interpreted incorrectly.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp_tunnel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 1c9408a04213..02c5be037451 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -103,6 +103,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 enum udp_parsable_tunnel_type {
 	UDP_TUNNEL_TYPE_VXLAN,		/* RFC 7348 */
 	UDP_TUNNEL_TYPE_GENEVE,		/* draft-ietf-nvo3-geneve */
+	UDP_TUNNEL_TYPE_VXLAN_GPE,	/* draft-ietf-nvo3-vxlan-gpe */
 };
 
 struct udp_tunnel_info {
-- 
cgit 


From a2e2ff560f5113e8ca31432fbd985f5f1889efdc Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 16 Jun 2016 16:24:24 -0700
Subject: net: ipv6: Move ip6_route_get_saddr to inline

VRF driver needs access to ip6_route_get_saddr code. Since it does
little beyond ipv6_dev_get_saddr and ipv6_dev_get_saddr is already
exported for modules move ip6_route_get_saddr to the header as an
inline.

Code move only; no functional change.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index f55bf3d294aa..d97305d0e71f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -18,6 +18,7 @@ struct route_info {
 	__u8			prefix[0];	/* 0,8 or 16 */
 };
 
+#include <net/addrconf.h>
 #include <net/flow.h>
 #include <net/ip6_fib.h>
 #include <net/sock.h>
@@ -88,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg);
 int ip6_ins_rt(struct rt6_info *);
 int ip6_del_rt(struct rt6_info *);
 
-int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
-			const struct in6_addr *daddr, unsigned int prefs,
-			struct in6_addr *saddr);
+static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+				      const struct in6_addr *daddr,
+				      unsigned int prefs,
+				      struct in6_addr *saddr)
+{
+	struct inet6_dev *idev =
+			rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
+	int err = 0;
+
+	if (rt && rt->rt6i_prefsrc.plen)
+		*saddr = rt->rt6i_prefsrc.addr;
+	else
+		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+					 daddr, prefs, saddr);
+
+	return err;
+}
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif, int flags);
-- 
cgit 


From 0d240e7811c4ec1965760ee4643b5bbc9cfacbb3 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 16 Jun 2016 16:24:25 -0700
Subject: net: vrf: Implement get_saddr for IPv6

IPv6 source address selection needs to consider the real egress route.
Similar to IPv4 implement a get_saddr6 method which is called if
source address has not been set.  The get_saddr6 method does a full
lookup which means pulling a route from the VRF FIB table and properly
considering linklocal/multicast destination addresses. Lookup failures
(eg., unreachable) then cause the source address selection to fail
which gets propagated back to the caller.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/l3mdev.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index f8a416ec674c..818fd4f100fc 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -39,6 +39,9 @@ struct l3mdev_ops {
 	/* IPv6 ops */
 	struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
 						 struct flowi6 *fl6);
+	int		   (*l3mdev_get_saddr6)(struct net_device *dev,
+						const struct sock *sk,
+						struct flowi6 *fl6);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
@@ -140,6 +143,8 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
 
 struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
+int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+		      struct flowi6 *fl6);
 
 static inline
 struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -230,6 +235,12 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
 	return NULL;
 }
 
+static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+				    struct flowi6 *fl6)
+{
+	return 0;
+}
+
 static inline
 struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
 {
-- 
cgit 


From afbac6010aec514998214fb19a1f37732b7a1d77 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 16 Jun 2016 16:24:26 -0700
Subject: net: ipv6: Address selection needs to consider L3 domains

IPv6 version of 3f2fb9a834cb ("net: l3mdev: address selection should only
consider devices in L3 domain") and the follow up commit, a17b693cdd876
("net: l3mdev: prefer VRF master for source address selection").

That is, if outbound device is given then the address preference order
is an address from that device, an address from the master device if it
is enslaved, and then an address from a device in the same L3 domain.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/l3mdev.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 818fd4f100fc..e90095091aa0 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -79,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
 	return rc;
 }
 
+static inline
+const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
+{
+	/* netdev_master_upper_dev_get_rcu calls
+	 * list_first_or_null_rcu to walk the upper dev list.
+	 * list_first_or_null_rcu does not handle a const arg. We aren't
+	 * making changes, just want the master device from that list so
+	 * typecast to remove the const
+	 */
+	struct net_device *dev = (struct net_device *)_dev;
+	const struct net_device *master;
+
+	if (!dev)
+		return NULL;
+
+	if (netif_is_l3_master(dev))
+		master = dev;
+	else if (netif_is_l3_slave(dev))
+		master = netdev_master_upper_dev_get_rcu(dev);
+	else
+		master = NULL;
+
+	return master;
+}
+
 /* get index of an interface to use for FIB lookups. For devices
  * enslaved to an L3 master device FIB lookups are based on the
  * master index
@@ -190,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
 	return 0;
 }
 
+static inline
+const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
+{
+	return NULL;
+}
+
 static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
 {
 	return dev ? dev->ifindex : 0;
-- 
cgit 


From b1cadc1a0949c82ff7fcb15603e3caf2d32ff9f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 18 Jun 2016 21:52:02 -0700
Subject: ipv6: icmp: add a force_saddr param to icmp6_send()

SIT or GRE tunnels might want to translate an IPV4 address
into a v4mapped one when translating ICMP to ICMPv6.

This patch adds the parameter to icmp6_send() but
does not change icmpv6_send() signature.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 630f45335c73..432611a297fb 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -14,7 +14,8 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_IPV6)
 extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info);
 
-typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info);
+typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+			     const struct in6_addr *force_saddr);
 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn);
 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
 
-- 
cgit 


From 5fbba8ac9358f1e796c8aedcccc3487364643723 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 18 Jun 2016 21:52:03 -0700
Subject: ip6: move ipip6_err_gen_icmpv6_unreach()

We want to use this helper from GRE as well, so this is
the time to move it in net/ipv6/icmp.c

Also add a @nhs parameter, since SIT and GRE have different
values for the header(s) to skip.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 432611a297fb..9796481edbdb 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -18,6 +18,7 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 			     const struct in6_addr *force_saddr);
 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn);
 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
+int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs);
 
 #else
 
-- 
cgit 


From 2d7a3b276be2d032a6c1a48ced87a474327ee3d3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 18 Jun 2016 21:52:04 -0700
Subject: ipv6: translate ICMP_TIME_EXCEEDED to ICMPV6_TIME_EXCEED

For better traceroute/mtr support for SIT and GRE tunnels,
we translate IPV4 ICMP ICMP_TIME_EXCEEDED to ICMPV6_TIME_EXCEED

We also have to translate the IPv4 source IP address of ICMP
message to IPv6 v4mapped.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 9796481edbdb..97ae98071a03 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -18,7 +18,7 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 			     const struct in6_addr *force_saddr);
 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn);
 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
-int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs);
+int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type);
 
 #else
 
-- 
cgit 


From 9b8c6d7bf2e08a7d3eb6660a2bfaf29b8b49c329 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 18 Jun 2016 21:52:05 -0700
Subject: gre: better support for ICMP messages for gre+ipv6

ipgre_err() can call ip6_err_gen_icmpv6_unreach() for proper
support of ipv4+gre+icmp+ipv6+... frames, used for example
by traceroute/mtr.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 9222678426a1..a5e7035fb93f 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -157,6 +157,7 @@ struct tnl_ptk_info {
 	__be16 proto;
 	__be32 key;
 	__be32 seq;
+	int hdr_len;
 };
 
 #define PACKET_RCVD	0
-- 
cgit 


From 20e1954fe238dbe5f8d3a979e593fe352bd703cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 18 Jun 2016 21:52:06 -0700
Subject: ipv6: RFC 4884 partial support for SIT/GRE tunnels

When receiving an ICMPv4 message containing extensions as
defined in RFC 4884, and translating it to ICMPv6 at SIT
or GRE tunnel, we need some extra manipulation in order
to properly forward the extensions.

This patch only takes care of Time Exceeded messages as they
are the ones that typically carry information from various
routers in a fabric during a traceroute session.

It also avoids complex skb logic if the data_len is not
a multiple of 8.

RFC states :

   The "original datagram" field MUST contain at least 128 octets.
   If the original datagram did not contain 128 octets, the
   "original datagram" field MUST be zero padded to 128 octets.

In practice routers use 128 bytes of original datagram, not more.

Initial translation was added in commit ca15a078bd90
("sit: generate icmpv6 error when receiving icmpv4 error")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Oussama Ghorbel <ghorbel@pivasoftware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h    | 3 ++-
 include/uapi/linux/icmp.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 97ae98071a03..57086e9fc64c 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -18,7 +18,8 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 			     const struct in6_addr *force_saddr);
 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn);
 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
-int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type);
+int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
+			       unsigned int data_len);
 
 #else
 
diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h
index 16fff055f734..fddd9d736284 100644
--- a/include/uapi/linux/icmp.h
+++ b/include/uapi/linux/icmp.h
@@ -79,6 +79,7 @@ struct icmphdr {
 		__be16	__unused;
 		__be16	mtu;
 	} frag;
+	__u8	reserved[4];
   } un;
 };
 
-- 
cgit 


From cc8feb8edd92d854be552fe4f5e0eeabca40b9ee Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 4 Apr 2016 14:00:37 +0100
Subject: rxrpc: Fix exclusive connection handling

"Exclusive connections" are meant to be used for a single client call and
then scrapped.  The idea is to limit the use of the negotiated security
context.  The current code, however, isn't doing this: it is instead
restricting the socket to a single virtual connection and doing all the
calls over that.

This is changed such that the socket no longer maintains a special virtual
connection over which it will do all the calls, but rather gets a new one
each time a new exclusive call is made.

Further, using a socket option for this is a poor choice.  It should be
done on sendmsg with a control message marker instead so that calls can be
marked exclusive individually.  To that end, add RXRPC_EXCLUSIVE_CALL
which, if passed to sendmsg() as a control message element, will cause the
call to be done on an single-use connection.

The socket option (RXRPC_EXCLUSIVE_CONNECTION) still exists and, if set,
will override any lack of RXRPC_EXCLUSIVE_CALL being specified so that
programs using the setsockopt() will appear to work the same.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/rxrpc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index 1e8f216e2cf1..c68307bc306f 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -35,7 +35,7 @@ struct sockaddr_rxrpc {
  */
 #define RXRPC_SECURITY_KEY		1	/* [clnt] set client security key */
 #define RXRPC_SECURITY_KEYRING		2	/* [srvr] set ring of server security keys */
-#define RXRPC_EXCLUSIVE_CONNECTION	3	/* [clnt] use exclusive RxRPC connection */
+#define RXRPC_EXCLUSIVE_CONNECTION	3	/* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */
 #define RXRPC_MIN_SECURITY_LEVEL	4	/* minimum security level */
 
 /*
@@ -52,6 +52,7 @@ struct sockaddr_rxrpc {
 #define RXRPC_LOCAL_ERROR	7	/* -r: local error generated [terminal] */
 #define RXRPC_NEW_CALL		8	/* -r: [Service] new incoming call notification */
 #define RXRPC_ACCEPT		9	/* s-: [Service] accept request */
+#define RXRPC_EXCLUSIVE_CALL	10	/* s-: Call should be on exclusive connection */
 
 /*
  * RxRPC security levels
-- 
cgit 


From b95e5928fcc76d156352570858abdea7b2628efd Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Mon, 20 Jun 2016 07:26:17 -0700
Subject: openvswitch: Add packet len info to upcall.

The commit f2a4d086ed4c ("openvswitch: Add packet truncation support.")
introduces packet truncation before sending to userspace upcall receiver.
This patch passes up the skb->len before truncation so that the upcall
receiver knows the original packet size. Potentially this will be used
by sFlow, where OVS translates sFlow config header=N to a sample action,
truncating packet to N byte in kernel datapath. Thus, only N bytes instead
of full-packet size is copied from kernel to userspace, saving the
kernel-to-userspace bandwidth.

Signed-off-by: William Tu <u9012063@gmail.com>
Cc: Pravin Shelar <pshelar@nicira.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 8274675ba9a3..d95a3018f6a1 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -166,6 +166,7 @@ enum ovs_packet_cmd {
  * output port is actually a tunnel port. Contains the output tunnel key
  * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
  * @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and
+ * @OVS_PACKET_ATTR_LEN: Packet size before truncation.
  * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
  * size.
  *
@@ -185,6 +186,7 @@ enum ovs_packet_attr {
 	OVS_PACKET_ATTR_PROBE,      /* Packet operation is a feature probe,
 				       error logging should be suppressed. */
 	OVS_PACKET_ATTR_MRU,	    /* Maximum received IP fragment size. */
+	OVS_PACKET_ATTR_LEN,		/* Packet size before truncation. */
 	__OVS_PACKET_ATTR_MAX
 };
 
-- 
cgit 


From 506e65df52f2bf250aa9b4264efd180d1646bdec Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 10 Jun 2016 23:09:01 +0200
Subject: netfilter: make comparision helpers stub functions in ZONES=n case

Those comparisions are useless in case of ZONES=n; all conntracks
will reside in the same zone by definition.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_zones.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 4e32512cef32..bd4692690914 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -68,22 +68,34 @@ static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
 static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
 				enum ip_conntrack_dir dir)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone_matches_dir(zone, dir) ?
 	       zone->id : NF_CT_DEFAULT_ZONE_ID;
+#else
+	return NF_CT_DEFAULT_ZONE_ID;
+#endif
 }
 
 static inline bool nf_ct_zone_equal(const struct nf_conn *a,
 				    const struct nf_conntrack_zone *b,
 				    enum ip_conntrack_dir dir)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone_id(nf_ct_zone(a), dir) ==
 	       nf_ct_zone_id(b, dir);
+#else
+	return true;
+#endif
 }
 
 static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
 					const struct nf_conntrack_zone *b)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone(a)->id == b->id;
+#else
+	return true;
+#endif
 }
 #endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
 #endif /* _NF_CONNTRACK_ZONES_H */
-- 
cgit 


From 6c8dee9842461e6ee6eb46081478999b3d5cb297 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 11 Jun 2016 21:57:35 +0200
Subject: netfilter: move zone info into struct nf_conn

Curently we store zone information as a conntrack extension.
This has one drawback: for every lookup we need to fetch the zone data
from the extension area.

This change place the zone data directly into the main conntrack object
structure and then removes the zone conntrack extension.

The zone data is just 4 bytes, it fits into a padding hole before
the tuplehash info, so we do not even increase the nf_conn structure size.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h        |  3 +++
 include/net/netfilter/nf_conntrack_extend.h |  4 ----
 include/net/netfilter/nf_conntrack_zones.h  | 33 ++++++++++-------------------
 3 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index dd78bea227c8..9c0ed3d7af89 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -85,6 +85,9 @@ struct nf_conn {
 	spinlock_t	lock;
 	u16		cpu;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	struct nf_conntrack_zone zone;
+#endif
 	/* XXX should I move this to the tail ? - Y.K */
 	/* These are my tuples; original and reply */
 	struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 55d15049ab2f..b925395fa5ed 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -15,9 +15,6 @@ enum nf_ct_ext_id {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	NF_CT_EXT_ECACHE,
 #endif
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	NF_CT_EXT_ZONE,
-#endif
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
 	NF_CT_EXT_TSTAMP,
 #endif
@@ -38,7 +35,6 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
-#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
 #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
 #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
 #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index bd4692690914..64a718b60839 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -9,12 +9,11 @@
 static inline const struct nf_conntrack_zone *
 nf_ct_zone(const struct nf_conn *ct)
 {
-	const struct nf_conntrack_zone *nf_ct_zone = NULL;
-
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-	nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
+	return &ct->zone;
+#else
+	return &nf_ct_zone_dflt;
 #endif
-	return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt;
 }
 
 static inline const struct nf_conntrack_zone *
@@ -31,32 +30,22 @@ static inline const struct nf_conntrack_zone *
 nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
 		struct nf_conntrack_zone *tmp)
 {
-	const struct nf_conntrack_zone *zone;
-
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	if (!tmpl)
 		return &nf_ct_zone_dflt;
 
-	zone = nf_ct_zone(tmpl);
-	if (zone->flags & NF_CT_FLAG_MARK)
-		zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0);
-
-	return zone;
+	if (tmpl->zone.flags & NF_CT_FLAG_MARK)
+		return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0);
+#endif
+	return nf_ct_zone(tmpl);
 }
 
-static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags,
-				 const struct nf_conntrack_zone *info)
+static inline void nf_ct_zone_add(struct nf_conn *ct,
+				  const struct nf_conntrack_zone *zone)
 {
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-	struct nf_conntrack_zone *nf_ct_zone;
-
-	nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags);
-	if (!nf_ct_zone)
-		return -ENOMEM;
-
-	nf_ct_zone_init(nf_ct_zone, info->id, info->dir,
-			info->flags);
+	ct->zone = *zone;
 #endif
-	return 0;
 }
 
 static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
-- 
cgit 


From d1bd330a229fc8a69f0e7532138dfd42b4542fd4 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Tue, 21 Jun 2016 01:17:17 +0100
Subject: of_mdio: Enable fixed PHY support if driver is a module

The fixed_phy driver doesn't have to be built-in, and it's
important that of_mdio supports it even if it's a module.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 8f2237eb3485..6c8cb9aa4c00 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -69,7 +69,7 @@ static inline int of_mdio_parse_addr(struct device *dev,
 }
 #endif /* CONFIG_OF */
 
-#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
+#if defined(CONFIG_OF) && IS_ENABLED(CONFIG_FIXED_PHY)
 extern int of_phy_register_fixed_link(struct device_node *np);
 extern bool of_phy_is_fixed_link(struct device_node *np);
 #else
-- 
cgit 


From af7d5185263133f859dd4f35d45594deef9db854 Mon Sep 17 00:00:00 2001
From: Rana Shahout <ranas@mellanox.com>
Date: Tue, 21 Jun 2016 12:43:59 +0300
Subject: net/mlx4_en: Add DCB PFC support through CEE netlink commands

This patch adds support for reading and updating priority flow
control (PFC) attributes in the driver via netlink.

Signed-off-by: Rana Shahout <ranas@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 80dec87a94f8..4dbc1450bbe0 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -535,6 +535,7 @@ struct mlx4_caps {
 	int			max_rq_desc_sz;
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
+	int			max_tc_eth;
 	u32			*qp0_qkey;
 	u32			*qp0_proxy;
 	u32			*qp1_proxy;
@@ -1494,6 +1495,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
 
 int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
 			 u16 offset, u16 size, u8 *data);
+int mlx4_max_tc(struct mlx4_dev *dev);
 
 /* Returns true if running in low memory profile (kdump kernel) */
 static inline bool mlx4_low_memory_profile(void)
-- 
cgit 


From 722003ac40c2c397bd5bc2b714125bc82ab27043 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Tue, 21 Jun 2016 09:36:21 -0400
Subject: qed: Add support for coalescing config read/update.

This patch adds support for configuring the device tx/rx coalescing
timeout values in the order of micro seconds. It also adds APIs for
upper layer drivers for reading/updating the coalescing values.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index e1d5122e8a96..b1e3c57c7117 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -488,6 +488,30 @@ struct qed_common_ops {
 	void		(*chain_free)(struct qed_dev *cdev,
 				      struct qed_chain *p_chain);
 
+/**
+ * @brief get_coalesce - Get coalesce parameters in usec
+ *
+ * @param cdev
+ * @param rx_coal - Rx coalesce value in usec
+ * @param tx_coal - Tx coalesce value in usec
+ *
+ */
+	void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal);
+
+/**
+ * @brief set_coalesce - Configure Rx coalesce value in usec
+ *
+ * @param cdev
+ * @param rx_coal - Rx coalesce value in usec
+ * @param tx_coal - Tx coalesce value in usec
+ * @param qid - Queue index
+ * @param sb_id - Status Block Id
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
+			    u8 qid, u16 sb_id);
+
 /**
  * @brief set_led - Configure LED mode
  *
-- 
cgit 


From 7643507fe8b5bd8ab7522f6a81058cc1209d2585 Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Tue, 21 Jun 2016 14:58:46 -0400
Subject: netfilter: xt_NFLOG: nflog-range does not truncate packets

li->u.ulog.copy_len is currently ignored by the kernel, we should truncate
the packet to either li->u.ulog.copy_len (if set) or copy_range before
sending it to userspace. 0 is a valid input for copy_len, so add a new
flag to indicate whether this was option was specified by the user or not.

Add two flags to indicate whether nflog-size/copy_len was set or not.
XT_NFLOG_F_COPY_LEN is for XT_NFLOG and NFLOG_F_COPY_LEN for nfnetlink_log

On the userspace side, this was initially represented by the option
nflog-range, this will be replaced by --nflog-size now. --nflog-range would
still exist but does not do anything.

Reported-by: Joe Dollard <jdollard@akamai.com>
Reviewed-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_log.h          | 7 +++++++
 include/uapi/linux/netfilter/xt_NFLOG.h | 6 +++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 57639fca223a..83d855ba6af1 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -12,6 +12,9 @@
 #define NF_LOG_UID		0x08	/* Log UID owning local socket */
 #define NF_LOG_MASK		0x0f
 
+/* This flag indicates that copy_len field in nf_loginfo is set */
+#define NF_LOG_F_COPY_LEN	0x1
+
 enum nf_log_type {
 	NF_LOG_TYPE_LOG		= 0,
 	NF_LOG_TYPE_ULOG,
@@ -22,9 +25,13 @@ struct nf_loginfo {
 	u_int8_t type;
 	union {
 		struct {
+			/* copy_len will be used iff you set
+			 * NF_LOG_F_COPY_LEN in flags
+			 */
 			u_int32_t copy_len;
 			u_int16_t group;
 			u_int16_t qthreshold;
+			u_int16_t flags;
 		} ulog;
 		struct {
 			u_int8_t level;
diff --git a/include/uapi/linux/netfilter/xt_NFLOG.h b/include/uapi/linux/netfilter/xt_NFLOG.h
index 87b58311ce6b..f33070730fc8 100644
--- a/include/uapi/linux/netfilter/xt_NFLOG.h
+++ b/include/uapi/linux/netfilter/xt_NFLOG.h
@@ -6,9 +6,13 @@
 #define XT_NFLOG_DEFAULT_GROUP		0x1
 #define XT_NFLOG_DEFAULT_THRESHOLD	0
 
-#define XT_NFLOG_MASK			0x0
+#define XT_NFLOG_MASK			0x1
+
+/* This flag indicates that 'len' field in xt_nflog_info is set*/
+#define XT_NFLOG_F_COPY_LEN		0x1
 
 struct xt_nflog_info {
+	/* 'len' will be used iff you set XT_NFLOG_F_COPY_LEN in flags */
 	__u32	len;
 	__u16	group;
 	__u16	threshold;
-- 
cgit 


From 889f7ee7c6e84251215d43cbc856ea116c72d3f2 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 12 Jun 2016 18:07:07 +0200
Subject: netfilter: nf_tables: add generic macros to check for generation mask

Thus, we can reuse these to check the genmask of any object type, not
only rules. This is required now that tables, chain and sets will get a
generation mask field too in follow up patches.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 092235458691..d0778cbaf7f1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -969,6 +969,30 @@ static inline u8 nft_genmask_cur(const struct net *net)
 
 #define NFT_GENMASK_ANY		((1 << 0) | (1 << 1))
 
+/*
+ * Generic transaction helpers
+ */
+
+/* Check if this object is currently active. */
+#define nft_is_active(__net, __obj)				\
+	(((__obj)->genmask & nft_genmask_cur(__net)) == 0)
+
+/* Check if this object is active in the next generation. */
+#define nft_is_active_next(__net, __obj)			\
+	(((__obj)->genmask & nft_genmask_next(__net)) == 0)
+
+/* This object becomes active in the next generation. */
+#define nft_activate_next(__net, __obj)				\
+	(__obj)->genmask = nft_genmask_cur(__net)
+
+/* This object becomes inactive in the next generation. */
+#define nft_deactivate_next(__net, __obj)			\
+        (__obj)->genmask = nft_genmask_next(__net)
+
+/* After committing the ruleset, clear the stale generation bit. */
+#define nft_clear(__net, __obj)					\
+	(__obj)->genmask &= ~nft_genmask_next(__net)
+
 /*
  * Set element transaction helpers
  */
-- 
cgit 


From f2a6d766765d2794e26e25655d4ffcfe29c3ec2f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 14 Jun 2016 17:29:18 +0200
Subject: netfilter: nf_tables: add generation mask to tables

This patch addresses two problems:

1) The netlink dump is inconsistent when interfering with an ongoing
   transaction update for several reasons:

1.a) We don't honor the internal NFT_TABLE_INACTIVE flag, and we should
     be skipping these inactive objects in the dump.

1.b) We perform speculative deletion during the preparation phase, that
     may result in skipping active objects.

1.c) The listing order changes, which generates noise when tracking
     incremental ruleset update via tools like git or our own
     testsuite.

2) We don't allow to add and to update the object in the same batch,
   eg. add table x; add table x { flags dormant\; }.

In order to resolve these problems:

1) If the user requests a deletion, the object becomes inactive in the
   next generation. Then, ignore objects that scheduled to be deleted
   from the lookup path, as they will be effectively removed in the
   next generation.

2) From the get/dump path, if the object is not currently active, we
   skip it.

3) Support 'add X -> update X' sequence from a transaction.

After this update, we obtain a consistent list as long as we stay
in the same generation. The userspace side can detect interferences
through the generation counter so it can restart the dumping.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d0778cbaf7f1..05c9a64b39aa 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -838,6 +838,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
  *	@hgenerator: handle generator state
  *	@use: number of chain references to this table
  *	@flags: table flag (see enum nft_table_flags)
+ *	@genmask: generation mask
  *	@name: name of the table
  */
 struct nft_table {
@@ -846,7 +847,8 @@ struct nft_table {
 	struct list_head		sets;
 	u64				hgenerator;
 	u32				use;
-	u16				flags;
+	u16				flags:14,
+					genmask:2;
 	char				name[NFT_TABLE_MAXNAMELEN];
 };
 
@@ -992,6 +994,8 @@ static inline u8 nft_genmask_cur(const struct net *net)
 /* After committing the ruleset, clear the stale generation bit. */
 #define nft_clear(__net, __obj)					\
 	(__obj)->genmask &= ~nft_genmask_next(__net)
+#define nft_active_genmask(__obj, __genmask)			\
+	!((__obj)->genmask & __genmask)
 
 /*
  * Set element transaction helpers
-- 
cgit 


From 664b0f8cd8c66d02d14168ee7ac6a957cc88177f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 12 Jun 2016 19:21:31 +0200
Subject: netfilter: nf_tables: add generation mask to chains

Similar to ("netfilter: nf_tables: add generation mask to tables").

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 05c9a64b39aa..b023e287ea92 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -732,7 +732,6 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
 
 enum nft_chain_flags {
 	NFT_BASE_CHAIN			= 0x1,
-	NFT_CHAIN_INACTIVE		= 0x2,
 };
 
 /**
@@ -754,7 +753,8 @@ struct nft_chain {
 	u64				handle;
 	u32				use;
 	u16				level;
-	u8				flags;
+	u8				flags:6,
+					genmask:2;
 	char				name[NFT_CHAIN_MAXNAMELEN];
 };
 
-- 
cgit 


From 37a9cc52552579f22e18cca401cfc4351b6cbc72 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 12 Jun 2016 22:52:45 +0200
Subject: netfilter: nf_tables: add generation mask to sets

Similar to ("netfilter: nf_tables: add generation mask to tables").

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b023e287ea92..07a5ba47cbda 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -296,6 +296,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
  * 	@ops: set ops
  * 	@pnet: network namespace
  * 	@flags: set flags
+ *	@genmask: generation mask
  * 	@klen: key length
  * 	@dlen: data length
  * 	@data: private set data
@@ -317,7 +318,8 @@ struct nft_set {
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
 	possible_net_t			pnet;
-	u16				flags;
+	u16				flags:14,
+					genmask:2;
 	u8				klen;
 	u8				dlen;
 	unsigned char			data[]
@@ -335,9 +337,9 @@ static inline struct nft_set *nft_set_container_of(const void *priv)
 }
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-				     const struct nlattr *nla);
+				     const struct nlattr *nla, u8 genmask);
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-					  const struct nlattr *nla);
+					  const struct nlattr *nla, u8 genmask);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
-- 
cgit 


From 3183ab8997a477c8d9ad175a1cef70dff77c6dbc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 22 Jun 2016 13:26:10 +0200
Subject: netfilter: conntrack: allow increasing bucket size via sysctl too

No need to restrict this to module parameter.

We export a copy of the real hash size -- when user alters the value we
allocate the new table, copy entries etc before we update the real size
to the requested one.

This is also needed because the real size is used by concurrent readers
and cannot be changed without synchronizing the conntrack generation
seqcnt.

We only allow changing this value from the initial net namespace.

Tested using http-client-benchmark vs. httpterm with concurrent

while true;do
 echo $RANDOM > /proc/sys/net/netfilter/nf_conntrack_buckets
done

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 9c0ed3d7af89..5d3397f34583 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,6 +290,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
+int nf_conntrack_hash_resize(unsigned int hashsize);
 extern unsigned int nf_conntrack_htable_size;
 extern unsigned int nf_conntrack_max;
 
-- 
cgit 


From 82bec71d46b83f39860e2838ff8394e4fcd6efab Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 22 Jun 2016 14:26:33 +0200
Subject: netfilter: nf_tables: get rid of NFT_BASECHAIN_DISABLED

This flag was introduced to restore rulesets from the new netdev
family, but since 5ebe0b0eec9d6f7 ("netfilter: nf_tables: destroy
basechain and rules on netdevice removal") the ruleset is released
once the netdev is gone.

This also removes nft_register_basechain() and
nft_unregister_basechain() since they have no clients anymore after
this rework.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 07a5ba47cbda..1ea19a6e72e6 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -798,7 +798,6 @@ struct nft_stats {
 };
 
 #define NFT_HOOK_OPS_MAX		2
-#define NFT_BASECHAIN_DISABLED		(1 << 0)
 
 /**
  *	struct nft_base_chain - nf_tables base chain
-- 
cgit 


From 0071e184a535e40ce487528cb04f4690cb0da881 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Thu, 23 Jun 2016 12:24:08 +0200
Subject: netfilter: nf_tables: add support for inverted logic in nft_lookup

Introduce a new configuration option for this expression, which allows users
to invert the logic of set lookups.

In _init() we will now return EINVAL if NFT_LOOKUP_F_INV is in anyway
related to a map lookup.

The code in the _eval() function has been untangled and updated to sopport the
XOR of options, as we should consider 4 cases:
 * lookup false, invert false -> NFT_BREAK
 * lookup false, invert true -> return w/o NFT_BREAK
 * lookup true, invert false -> return w/o NFT_BREAK
 * lookup true, invert true -> NFT_BREAK

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6a4dbe04f09e..01751faccaf8 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -546,6 +546,10 @@ enum nft_cmp_attributes {
 };
 #define NFTA_CMP_MAX		(__NFTA_CMP_MAX - 1)
 
+enum nft_lookup_flags {
+	NFT_LOOKUP_F_INV = (1 << 0),
+};
+
 /**
  * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes
  *
@@ -553,6 +557,7 @@ enum nft_cmp_attributes {
  * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
+ * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags)
  */
 enum nft_lookup_attributes {
 	NFTA_LOOKUP_UNSPEC,
@@ -560,6 +565,7 @@ enum nft_lookup_attributes {
 	NFTA_LOOKUP_SREG,
 	NFTA_LOOKUP_DREG,
 	NFTA_LOOKUP_SET_ID,
+	NFTA_LOOKUP_FLAGS,
 	__NFTA_LOOKUP_MAX
 };
 #define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
-- 
cgit 


From 520ac30f45519b0a82dd92117c181d1d6144677b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Jun 2016 23:16:49 -0700
Subject: net_sched: drop packets after root qdisc lock is released

Qdisc performance suffers when packets are dropped at enqueue()
time because drops (kfree_skb()) are done while qdisc lock is held,
delaying a dequeue() draining the queue.

Nominal throughput can be reduced by 50 % when this happens,
at a time we would like the dequeue() to proceed as fast as possible.

Even FQ is vulnerable to this problem, while one of FQ goals was
to provide some flow isolation.

This patch adds a 'struct sk_buff **to_free' parameter to all
qdisc->enqueue(), and in qdisc_drop() helper.

I measured a performance increase of up to 12 %, but this patch
is a prereq so that future batches in enqueue() can fly.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4f7cee8344c4..04e84c07c94f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -37,8 +37,10 @@ struct qdisc_size_table {
 };
 
 struct Qdisc {
-	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
-	struct sk_buff *	(*dequeue)(struct Qdisc *dev);
+	int 			(*enqueue)(struct sk_buff *skb,
+					   struct Qdisc *sch,
+					   struct sk_buff **to_free);
+	struct sk_buff *	(*dequeue)(struct Qdisc *sch);
 	unsigned int		flags;
 #define TCQ_F_BUILTIN		1
 #define TCQ_F_INGRESS		2
@@ -160,7 +162,9 @@ struct Qdisc_ops {
 	char			id[IFNAMSIZ];
 	int			priv_size;
 
-	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
+	int 			(*enqueue)(struct sk_buff *skb,
+					   struct Qdisc *sch,
+					   struct sk_buff **to_free);
 	struct sk_buff *	(*dequeue)(struct Qdisc *);
 	struct sk_buff *	(*peek)(struct Qdisc *);
 
@@ -498,10 +502,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
 #endif
 }
 
-static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+				struct sk_buff **to_free)
 {
 	qdisc_calculate_pkt_len(skb, sch);
-	return sch->enqueue(skb, sch);
+	return sch->enqueue(skb, sch, to_free);
 }
 
 static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
@@ -626,24 +631,36 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
 	return __qdisc_dequeue_head(sch, &sch->q);
 }
 
+/* Instead of calling kfree_skb() while root qdisc lock is held,
+ * queue the skb for future freeing at end of __dev_xmit_skb()
+ */
+static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
+{
+	skb->next = *to_free;
+	*to_free = skb;
+}
+
 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
-					      struct sk_buff_head *list)
+						   struct sk_buff_head *list,
+						   struct sk_buff **to_free)
 {
 	struct sk_buff *skb = __skb_dequeue(list);
 
 	if (likely(skb != NULL)) {
 		unsigned int len = qdisc_pkt_len(skb);
+
 		qdisc_qstats_backlog_dec(sch, skb);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return len;
 	}
 
 	return 0;
 }
 
-static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch)
+static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
+						 struct sk_buff **to_free)
 {
-	return __qdisc_queue_drop_head(sch, &sch->q);
+	return __qdisc_queue_drop_head(sch, &sch->q, to_free);
 }
 
 static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
@@ -724,9 +741,11 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
 	qdisc_qstats_drop(sch);
 }
 
-static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
+
+static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
+			     struct sk_buff **to_free)
 {
-	kfree_skb(skb);
+	__qdisc_drop(skb, to_free);
 	qdisc_qstats_drop(sch);
 
 	return NET_XMIT_DROP;
-- 
cgit 


From 008830bc321c0fc22c0db8d5b0b56f854ed90a5c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Jun 2016 23:16:50 -0700
Subject: net_sched: fq_codel: cache skb->truesize into skb->cb

Now we defer skb drops, it makes sense to keep a copy
of skb->truesize in struct codel_skb_cb to avoid one
cache line miss per dropped skb in fq_codel_drop(),
to reduce latencies a bit further.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/codel_qdisc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h
index 8144d9cd2908..098630f83a55 100644
--- a/include/net/codel_qdisc.h
+++ b/include/net/codel_qdisc.h
@@ -52,6 +52,7 @@
 /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */
 struct codel_skb_cb {
 	codel_time_t enqueue_time;
+	unsigned int mem_usage;
 };
 
 static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb)
-- 
cgit 


From 4d202a0d31b96ab3324b21e7500d9a2da9ef57dd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Jun 2016 23:16:52 -0700
Subject: net_sched: generalize bulk dequeue

When qdisc bulk dequeue was added in linux-3.18 (commit
5772e9a3463b "qdisc: bulk dequeue support for qdiscs
with TCQ_F_ONETXQUEUE"), it was constrained to some
specific qdiscs.

With some extra care, we can extend this to all qdiscs,
so that typical traffic shaping solutions can benefit from
small batches (8 packets in this patch).

For example, HTB is often used on some multi queue device.
And bonding/team are multi queue devices...

Idea is to bulk-dequeue packets mapping to the same transmit queue.

This brings between 35 and 80 % performance increase in HTB setup
under pressure on a bonding setup :

1) NUMA node contention :   610,000 pps -> 1,110,000 pps
2) No node contention   : 1,380,000 pps -> 1,930,000 pps

Now we should work to add batches on the enqueue() side ;)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Florian Westphal <fw@strlen.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 04e84c07c94f..909aff2db2b3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -75,13 +75,14 @@ struct Qdisc {
 	/*
 	 * For performance sake on SMP, we put highly modified fields at the end
 	 */
-	struct Qdisc		*next_sched ____cacheline_aligned_in_smp;
-	struct sk_buff		*gso_skb;
-	unsigned long		state;
+	struct sk_buff		*gso_skb ____cacheline_aligned_in_smp;
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
 	seqcount_t		running;
 	struct gnet_stats_queue	qstats;
+	unsigned long		state;
+	struct Qdisc            *next_sched;
+	struct sk_buff		*skb_bad_txq;
 	struct rcu_head		rcu_head;
 	int			padded;
 	atomic_t		refcnt;
-- 
cgit 


From 1466cc5b23d18e7b6b8f1a45443d595393dbcae7 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.com>
Date: Thu, 23 Jun 2016 17:02:37 +0300
Subject: net/mlx5: Rate limit tables support

Configuring and managing HW rate limit tables.
The HW holds a table of rate limits, each rate is
associated with an index in that table.
Later a Send Queue uses this index to set the rate limit.
Multiple Send Queues can have the same rate limit, which is
represented by a single entry in this table.
Even though a rate can be shared, each queue is being rate
limited independently of others.

The SW shadow of this table holds the rate itself,
the index in the HW table and the refcount (number of queues)
working with this rate.

The exported functions are mlx5_rl_add_rate and mlx5_rl_remove_rate.
Number of different rates and their values are derived
from HW capabilities.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h |  4 ++++
 include/linux/mlx5/driver.h | 27 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 73a48479892d..e0a3ed758287 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1330,6 +1330,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_ESWITCH,
 	MLX5_CAP_RESERVED,
 	MLX5_CAP_VECTOR_CALC,
+	MLX5_CAP_QOS,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1414,6 +1415,9 @@ enum mlx5_cap_type {
 	MLX5_GET(vector_calc_cap, \
 		 mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap)
 
+#define MLX5_CAP_QOS(mdev, cap)\
+	MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80776d0c52dc..46260fdc5305 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -481,6 +481,21 @@ struct mlx5_fc_stats {
 
 struct mlx5_eswitch;
 
+struct mlx5_rl_entry {
+	u32                     rate;
+	u16                     index;
+	u16                     refcount;
+};
+
+struct mlx5_rl_table {
+	/* protect rate limit table */
+	struct mutex            rl_lock;
+	u16                     max_size;
+	u32                     max_rate;
+	u32                     min_rate;
+	struct mlx5_rl_entry   *rl_entry;
+};
+
 struct mlx5_priv {
 	char			name[MLX5_MAX_NAME_LEN];
 	struct mlx5_eq_table	eq_table;
@@ -544,6 +559,7 @@ struct mlx5_priv {
 	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
 
 	struct mlx5_fc_stats		fc_stats;
+	struct mlx5_rl_table            rl_table;
 };
 
 enum mlx5_device_state {
@@ -861,6 +877,12 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
 int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 			     u8 port_num, void *out, size_t sz);
 
+int mlx5_init_rl_table(struct mlx5_core_dev *dev);
+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
+bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
 	return ioread32be(&dev->iseg->initializing) >> 31;
@@ -938,6 +960,11 @@ static inline int mlx5_get_gid_table_len(u16 param)
 	return 8 * (1 << param);
 }
 
+static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
+{
+	return !!(dev->priv.rl_table.max_size);
+}
+
 enum {
 	MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
 };
-- 
cgit 


From 667daedaecd15b89d0ded7af49519f28d6ea2cf4 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 23 Jun 2016 17:02:42 +0300
Subject: net/mlx5e: Toggle link only after modifying port parameters

Add a dedicated function to toggle port link. It should be called only
after setting a port register.
Toggle will set port link to down and bring it back up in case that it's
admin status was up.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/port.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 9851862c0ec5..4adfac15f0e9 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -67,6 +67,7 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
 			       u8 local_port);
 int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
 			int proto_mask);
+void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 			       enum mlx5_port_status status);
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
-- 
cgit 


From 89da45b8b5b2187734a11038b8593714f964ffd1 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 23 Jun 2016 17:02:43 +0300
Subject: ethtool: Add 50G baseSR2 link mode

Add ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT bit.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: Ben Hutchings <bwh@kernel.org>
Cc: David Decotigny <decot@googlers.com>
Acked-By: David Decotigny <decot@googlers.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 5f030b46cff4..b8f38e84d93a 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1362,6 +1362,7 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT	= 37,
 	ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT	= 38,
 	ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT	= 39,
+	ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT         = 40,
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1370,7 +1371,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+	  = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
-- 
cgit 


From 52244d960755936fa9c8ce54d583d0ed46f24fb6 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 23 Jun 2016 17:02:46 +0300
Subject: net/mlx5e: Report correct auto negotiation and allow toggling

Previous to this patch auto negotiation was reported off although it was
on by default in hardware. This patch reports the correct information to
ethtool and allows the user to toggle it on/off.

Added another parameter to set port proto function in order to pass
the auto negotiation field to the hardware.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/port.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 4adfac15f0e9..e3012cc64b8a 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -47,6 +47,14 @@ enum mlx5_module_id {
 	MLX5_MODULE_ID_QSFP28           = 0x11,
 };
 
+enum mlx5_an_status {
+	MLX5_AN_UNAVAILABLE = 0,
+	MLX5_AN_COMPLETE    = 1,
+	MLX5_AN_FAILED      = 2,
+	MLX5_AN_LINK_UP     = 3,
+	MLX5_AN_LINK_DOWN   = 4,
+};
+
 #define MLX5_EEPROM_MAX_BYTES			32
 #define MLX5_EEPROM_IDENTIFIER_BYTE_MASK	0x000000ff
 #define MLX5_I2C_ADDR_LOW		0x50
@@ -65,14 +73,17 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
 			       u8 *proto_oper, int proto_mask,
 			       u8 local_port);
-int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
-			int proto_mask);
+int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
+		       u32 proto_admin, int proto_mask);
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 			       enum mlx5_port_status status);
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
 				 enum mlx5_port_status *status);
 int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration);
+void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
+			     u8 *an_status,
+			     u8 *an_disable_cap, u8 *an_disable_admin);
 
 int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port);
 void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);
-- 
cgit 


From 637c841dd7a5f9bd97b75cbe90b526fa1a52e530 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 23 Jun 2016 18:42:51 -0700
Subject: net: diag: Add support to filter on device index

Add support to inet_diag facility to filter sockets based on device
index. If an interface index is in the filter only sockets bound
to that index (sk_bound_dev_if) are returned.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index a16643705669..abbd1dc5d683 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -72,6 +72,7 @@ enum {
 	INET_DIAG_BC_AUTO,
 	INET_DIAG_BC_S_COND,
 	INET_DIAG_BC_D_COND,
+	INET_DIAG_BC_DEV_COND,   /* u32 ifindex */
 };
 
 struct inet_diag_hostcond {
-- 
cgit 


From a5e4bd991362223346e1d3561e61d7a25797fe25 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 24 Jun 2016 11:24:08 +0200
Subject: of_mdio: select fixed phy support unconditionally

Calling the fixed-phy functions when CONFIG_FIXED_PHY=m as a previous
change tried cannot work if the caller is in built-in code:

drivers/of/built-in.o: In function `of_phy_register_fixed_link':
of_reserved_mem.c:(.text+0x85e0): undefined reference to `fixed_phy_register'

Making of_mdio depend on 'FIXED_PHY || !FIXED_PHY' would solve this
dependency by enforcing that OF_MDIO itself becomes a loadable module
when FIXED_PHY=y, but that creates a different dependency as it
breaks any built-in ethernet driver that uses of_mdio.

Making FIXED_PHY a bool option also cannot work, since it depends on
PHYLIB, which again is tristate.

This version now uses 'select FIXED_PHY' to ensure that the fixed-phy
portion of of_mdio is not optional. The main downside of this is
a small increase in code size for cases that do not need fixed phy
support, but it should avoid all of the link-time problems.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: d1bd330a229f ("of_mdio: Enable fixed PHY support if driver is a module")
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 6c8cb9aa4c00..4b04587d0441 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -25,6 +25,8 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np);
+extern int of_phy_register_fixed_link(struct device_node *np);
+extern bool of_phy_is_fixed_link(struct device_node *np);
 
 #else /* CONFIG_OF */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
@@ -67,12 +69,6 @@ static inline int of_mdio_parse_addr(struct device *dev,
 {
 	return -ENOSYS;
 }
-#endif /* CONFIG_OF */
-
-#if defined(CONFIG_OF) && IS_ENABLED(CONFIG_FIXED_PHY)
-extern int of_phy_register_fixed_link(struct device_node *np);
-extern bool of_phy_is_fixed_link(struct device_node *np);
-#else
 static inline int of_phy_register_fixed_link(struct device_node *np)
 {
 	return -ENOSYS;
-- 
cgit 


From 02e57b9d7c8ce9e403f15f48fb91dd6549aaf465 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 24 Jun 2016 15:16:26 +0200
Subject: drivers: net: stmmac: add port selection programming

In case of SGMII more, for example when a MAC2MAC connection
is needed, the port selection bits (inside the MAC configuration
registers) have to be programmed according to the link selected.
So the patch adds a new DT parameter to pass the port selection
and to programmed related PCS and CORE to use it.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 0507dbfbf63c..705840e0438f 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -141,5 +141,6 @@ struct plat_stmmacenet_data {
 	struct stmmac_axi *axi;
 	int has_gmac4;
 	bool tso_en;
+	int mac_port_sel_speed;
 };
 #endif
-- 
cgit 


From 6816a7ffce32e999601825ddfd887f36d3052932 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 28 Jun 2016 12:18:25 +0200
Subject: bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_read

Follow-up commit to 1e33759c788c ("bpf, trace: add BPF_F_CURRENT_CPU
flag for bpf_perf_event_output") to add the same functionality into
bpf_perf_event_read() helper. The split of index into flags and index
component is also safe here, since such large maps are rejected during
map allocation time.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 406459b935a2..58df2da3e9bf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -347,7 +347,7 @@ enum bpf_func_id {
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK		0xffffffffULL
 #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
 
-- 
cgit 


From 6578171a7ff0c31dc73258f93da7407510abf085 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 28 Jun 2016 12:18:27 +0200
Subject: bpf: add bpf_skb_change_proto helper

This patch adds a minimal helper for doing the groundwork of changing
the skb->protocol in a controlled way. Currently supported is v4 to
v6 and vice versa transitions, which allows f.e. for a minimal, static
nat64 implementation where applications in containers that still
require IPv4 can be transparently operated in an IPv6-only environment.
For example, host facing veth of the container can transparently do
the transitions in a programmatic way with the help of clsact qdisc
and cls_bpf.

Idea is to separate concerns for keeping complexity of the helper
lower, which means that the programs utilize bpf_skb_change_proto(),
bpf_skb_store_bytes() and bpf_lX_csum_replace() to get the job done,
instead of doing everything in a single helper (and thus partially
duplicating helper functionality). Also, bpf_skb_change_proto()
shouldn't need to deal with raw packet data as this is done by other
helpers.

bpf_skb_proto_6_to_4() and bpf_skb_proto_4_to_6() unclone the skb to
operate on a private one, push or pop additionally required header
space and migrate the gso/gro meta data from the shared info. We do
mark the gso type as dodgy so that headers are checked and segs
recalculated by the gso/gro engine. The gso_size target is adapted
as well. The flags argument added is currently reserved and can be
used for future extensions.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 58df2da3e9bf..66cd738a937a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -313,6 +313,20 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_get_tunnel_opt,
 	BPF_FUNC_skb_set_tunnel_opt,
+
+	/**
+	 * bpf_skb_change_proto(skb, proto, flags)
+	 * Change protocol of the skb. Currently supported is
+	 * v4 -> v6, v6 -> v4 transitions. The helper will also
+	 * resize the skb. eBPF program is expected to fill the
+	 * new headers via skb_store_bytes and lX_csum_replace.
+	 * @skb: pointer to skb
+	 * @proto: new skb->protocol type
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_proto,
+
 	__BPF_FUNC_MAX_ID,
 };
 
-- 
cgit 


From d2485c4242a826fdf493fd3a27b8b792965b9b9e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 28 Jun 2016 12:18:28 +0200
Subject: bpf: add bpf_skb_change_type helper

This work adds a helper for changing skb->pkt_type in a controlled way.
We only allow a subset of possible values and can extend that in future
should other use cases come up. Doing this as a helper has the advantage
that errors can be handeled gracefully and thus helper kept extensible.

It's a write counterpart to pkt_type member we can already read from
struct __sk_buff context. Major use case is to change incoming skbs to
PACKET_HOST in a programmatic way instead of having to recirculate via
redirect(..., BPF_F_INGRESS), for example.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66cd738a937a..be6ac1291680 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -327,6 +327,15 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_change_proto,
 
+	/**
+	 * bpf_skb_change_type(skb, type)
+	 * Change packet type of skb.
+	 * @skb: pointer to skb
+	 * @type: new skb->pkt_type type
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_type,
+
 	__BPF_FUNC_MAX_ID,
 };
 
-- 
cgit 


From e98e915e11ad1efb11147122bd4932ec6b3425da Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Wed, 22 Jun 2016 20:23:03 +0900
Subject: wireless: Use macro instead of number

Use IEEE80211_MIN_ACTION_SIZE macro for robust management frame check.

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/ieee80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b118744d3382..1daebb307e6e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2464,7 +2464,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
  */
 static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb)
 {
-	if (skb->len < 25)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
 		return false;
 	return _ieee80211_is_robust_mgmt_frame((void *)skb->data);
 }
-- 
cgit 


From 46f6b06050b736dab4d41494dae27b883cddc365 Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Wed, 22 Jun 2016 19:55:20 +0900
Subject: mac80211: Encrypt "Group addressed privacy" action frames

Previously, the action frames to group address was not encrypted. But
[1] "Table 8-38 Category values" indicates "Mesh" and "Multihop" category
action frames should be encrypted (Group addressed privacy == yes). And the
encyption key should be MGTK ([1] 10.13 Group addressed robust management frame
procedures). So this patch modifies the code to make it suitable for spec.

[1] IEEE Std 802.11-2012

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/ieee80211.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 1daebb307e6e..a80516fd65c8 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -19,6 +19,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <linux/etherdevice.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
@@ -2486,6 +2487,35 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
 	return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC;
 }
 
+/**
+ * _ieee80211_is_group_privacy_action - check if frame is a group addressed
+ * privacy action frame
+ * @hdr: the frame
+ */
+static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr)
+{
+	struct ieee80211_mgmt *mgmt = (void *)hdr;
+
+	if (!ieee80211_is_action(hdr->frame_control) ||
+	    !is_multicast_ether_addr(hdr->addr1))
+		return false;
+
+	return mgmt->u.action.category == WLAN_CATEGORY_MESH_ACTION ||
+	       mgmt->u.action.category == WLAN_CATEGORY_MULTIHOP_ACTION;
+}
+
+/**
+ * ieee80211_is_group_privacy_action - check if frame is a group addressed
+ * privacy action frame
+ * @skb: the skb containing the frame, length will be checked
+ */
+static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb)
+{
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+		return false;
+	return _ieee80211_is_group_privacy_action((void *)skb->data);
+}
+
 /**
  * ieee80211_tu_to_usec - convert time units (TU) to microseconds
  * @tu: the TUs
-- 
cgit 


From 80e73cc563c4359be809a03bcb8e7e28141a813a Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Tue, 28 Jun 2016 16:57:05 +0200
Subject: net: rtnetlink: add support for the IFLA_STATS_LINK_XSTATS_SLAVE
 attribute

This patch adds support for the IFLA_STATS_LINK_XSTATS_SLAVE attribute
which allows to export per-slave statistics if the master device supports
the linkxstats callback. The attribute is passed down to the linkxstats
callback and it is up to the callback user to use it (an example has been
added to the only current user - the bridge). This allows us to query only
specific slaves of master devices like bridge ports and export only what
we're interested in instead of having to dump all ports and searching only
for a single one. This will be used to export per-port IGMP/MLD stats and
also per-port vlan stats in the future, possibly other statistics as well.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h      | 5 +++--
 include/uapi/linux/if_link.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 006a7b81d758..4113916cc1bb 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -98,10 +98,11 @@ struct rtnl_link_ops {
 						   const struct net_device *dev,
 						   const struct net_device *slave_dev);
 	struct net		*(*get_link_net)(const struct net_device *dev);
-	size_t			(*get_linkxstats_size)(const struct net_device *dev);
+	size_t			(*get_linkxstats_size)(const struct net_device *dev,
+						       int attr);
 	int			(*fill_linkxstats)(struct sk_buff *skb,
 						   const struct net_device *dev,
-						   int *prividx);
+						   int *prividx, int attr);
 };
 
 int __rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bb36bd5675a7..db2458ade81c 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -822,6 +822,7 @@ enum {
 	IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
 	IFLA_STATS_LINK_64,
 	IFLA_STATS_LINK_XSTATS,
+	IFLA_STATS_LINK_XSTATS_SLAVE,
 	__IFLA_STATS_MAX,
 };
 
-- 
cgit 


From 1080ab95e3c7bdd77870e209aff83c763fdcf439 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Tue, 28 Jun 2016 16:57:06 +0200
Subject: net: bridge: add support for IGMP/MLD stats and export them via
 netlink

This patch adds stats support for the currently used IGMP/MLD types by the
bridge. The stats are per-port (plus one stat per-bridge) and per-direction
(RX/TX). The stats are exported via netlink via the new linkxstats API
(RTM_GETSTATS). In order to minimize the performance impact, a new option
is used to enable/disable the stats - multicast_stats_enabled, similar to
the recent vlan stats. Also in order to avoid multiple IGMP/MLD type
lookups and checks, we make use of the current "igmp" member of the bridge
private skb->cb region to record the type on Rx (both host-generated and
external packets pass by multicast_rcv()). We can do that since the igmp
member was used as a boolean and all the valid IGMP/MLD types are positive
values. The normal bridge fast-path is not affected at all, the only
affected paths are the flooding ones and since we make use of the IGMP/MLD
type, we can quickly determine if the packet should be counted using
cache-hot data (cb's igmp member). We add counters for:
* IGMP Queries
* IGMP Leaves
* IGMP v1/v2/v3 reports

* MLD Queries
* MLD Leaves
* MLD v1/v2 reports

These are invaluable when monitoring or debugging complex multicast setups
with bridges.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 26 ++++++++++++++++++++++++++
 include/uapi/linux/if_link.h   |  1 +
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 397d503fdedb..8304fe6f0561 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -247,8 +247,34 @@ enum {
 enum {
 	BRIDGE_XSTATS_UNSPEC,
 	BRIDGE_XSTATS_VLAN,
+	BRIDGE_XSTATS_MCAST,
+	BRIDGE_XSTATS_PAD,
 	__BRIDGE_XSTATS_MAX
 };
 #define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1)
 
+enum {
+	BR_MCAST_DIR_RX,
+	BR_MCAST_DIR_TX,
+	BR_MCAST_DIR_SIZE
+};
+
+/* IGMP/MLD statistics */
+struct br_mcast_stats {
+	__u64 igmp_queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_leaves[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v1reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v2reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v3reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_parse_errors;
+
+	__u64 mld_queries[BR_MCAST_DIR_SIZE];
+	__u64 mld_leaves[BR_MCAST_DIR_SIZE];
+	__u64 mld_v1reports[BR_MCAST_DIR_SIZE];
+	__u64 mld_v2reports[BR_MCAST_DIR_SIZE];
+	__u64 mld_parse_errors;
+
+	__u64 mcast_bytes[BR_MCAST_DIR_SIZE];
+	__u64 mcast_packets[BR_MCAST_DIR_SIZE];
+};
 #endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index db2458ade81c..4285ac31e865 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -273,6 +273,7 @@ enum {
 	IFLA_BR_VLAN_DEFAULT_PVID,
 	IFLA_BR_PAD,
 	IFLA_BR_VLAN_STATS_ENABLED,
+	IFLA_BR_MCAST_STATS_ENABLED,
 	__IFLA_BR_MAX,
 };
 
-- 
cgit 


From b1ed4c4fa9a5ccf325184fd90edc50978ef6e33a Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Mon, 27 Jun 2016 15:33:56 -0700
Subject: tcp: add an ability to dump and restore window parameters

We found that sometimes a restored tcp socket doesn't work.

A reason of this bug is incorrect window parameters and in this case
tcp_acceptable_seq() returns tcp_wnd_end(tp) instead of tp->snd_nxt. The
other side drops packets with this seq, because seq is less than
tp->rcv_nxt ( tcp_sequence() ).

Data from a send queue is sent only if there is enough space in a
window, so when we restore unacked data, we need to expand a window to
fit this data.

This was in a first version of this patch:
"tcp: extend window to fit all restored unacked data in a send queue"

Then Alexey recommended me to restore window parameters instead of
adjusted them according with data in a sent queue. This sounds resonable.

rcv_wnd has to be restored, because it was reported to another side
and the offered window is never shrunk.
One of reasons why we need to restore snd_wnd was described above.

Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 53e8e3fe6b1b..482898fc433a 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -115,12 +115,22 @@ enum {
 #define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
 #define TCP_SAVE_SYN		27	/* Record SYN headers for new connections */
 #define TCP_SAVED_SYN		28	/* Get SYN headers recorded for connection */
+#define TCP_REPAIR_WINDOW	29	/* Get/set window parameters */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
 	__u32	opt_val;
 };
 
+struct tcp_repair_window {
+	__u32	snd_wl1;
+	__u32	snd_wnd;
+	__u32	max_window;
+
+	__u32	rcv_wnd;
+	__u32	rcv_wup;
+};
+
 enum {
 	TCP_NO_QUEUE,
 	TCP_RECV_QUEUE,
-- 
cgit 


From 2631b79f6cb8b634fe41b77de9c4add0ec6b3cae Mon Sep 17 00:00:00 2001
From: "Seymour, Shane M" <shane.seymour@hpe.com>
Date: Tue, 28 Jun 2016 23:06:48 +0000
Subject: tcp: increase size at which tcp_bound_to_half_wnd bounds to >
 TCP_MSS_DEFAULT

In previous commit 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4
the following comments were added:

"When peer uses tiny windows, there is no use in packetizing to sub-MSS
pieces for the sake of SWS or making sure there are enough packets in
the pipe for fast recovery."

The test should be > TCP_MSS_DEFAULT not >= 512. This allows low end
devices that send an MSS of 536 (TCP_MSS_DEFAULT) to see better network
performance by sending it 536 bytes of data at a time instead of bounding
to half window size (268). Other network stacks work this way, e.g. HP-UX.

Signed-off-by: Shane Seymour <shane.seymour@hpe.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a79894b66726..d825858fe4f1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -589,7 +589,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 	 * On the other hand, for extremely large MSS devices, handling
 	 * smaller than MSS windows in this way does make sense.
 	 */
-	if (tp->max_window >= 512)
+	if (tp->max_window > TCP_MSS_DEFAULT)
 		cutoff = (tp->max_window >> 1);
 	else
 		cutoff = tp->max_window;
-- 
cgit 


From 19689e38eca5d7b32755182d4e62efd7a5376c45 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 27 Jun 2016 18:51:53 +0200
Subject: tcp: md5: use kmalloc() backed scratch areas

Some arches have virtually mapped kernel stacks, or will soon have.

tcp_md5_hash_header() uses an automatic variable to copy tcp header
before mangling th->check and calling crypto function, which might
be problematic on such arches.

David says that using percpu storage is also problematic on non SMP
builds.

Just use kmalloc() to allocate scratch areas.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d825858fe4f1..c00e7d51bb18 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1384,7 +1384,7 @@ union tcp_md5sum_block {
 /* - pool: digest algorithm, hash description and scratch buffer */
 struct tcp_md5sig_pool {
 	struct ahash_request	*md5_req;
-	union tcp_md5sum_block	md5_blk;
+	void			*scratch;
 };
 
 /* - functions */
@@ -1420,7 +1420,6 @@ static inline void tcp_put_md5sig_pool(void)
 	local_bh_enable();
 }
 
-int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *);
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
 			  unsigned int header_len);
 int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
-- 
cgit 


From 982fb490c298896d15e9323a882f34a57c11ff56 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 30 Jun 2016 14:45:31 +0800
Subject: ptr_ring: support zero length ring

Sometimes, we need zero length ring. But current code will crash since
we don't do any check before accessing the ring. This patch fixes this.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 562a65e8bcc0..d78b8b89c707 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -102,7 +102,7 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r)
  */
 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
 {
-	if (r->queue[r->producer])
+	if (unlikely(!r->size) || r->queue[r->producer])
 		return -ENOSPC;
 
 	r->queue[r->producer++] = ptr;
@@ -164,7 +164,9 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
  */
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
-	return r->queue[r->consumer];
+	if (likely(r->size))
+		return r->queue[r->consumer];
+	return NULL;
 }
 
 /* Note: callers invoking this in a loop must use a compiler barrier,
-- 
cgit 


From fd68adec9de3104c236ffbcb3bd829d3e635a444 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 30 Jun 2016 14:45:32 +0800
Subject: skb_array: minor tweak

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skb_array.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index 678bfbf78ac4..2dd0d1e4ee7e 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -151,12 +151,12 @@ static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
 	return ptr_ring_init(&a->ring, size, gfp);
 }
 
-void __skb_array_destroy_skb(void *ptr)
+static void __skb_array_destroy_skb(void *ptr)
 {
 	kfree_skb(ptr);
 }
 
-int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
 {
 	return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
 }
-- 
cgit 


From 59e6ae53248a72d83cec77dd704b6990b2394479 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 30 Jun 2016 14:45:33 +0800
Subject: ptr_ring: support resizing multiple queues

Sometimes, we need support resizing multiple queues at once. This is
because it was not easy to recover to recover from a partial failure
of multiple queues resizing.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 71 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 62 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index d78b8b89c707..2052011bf9fb 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -349,20 +349,14 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 	return 0;
 }
 
-static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
-				  void (*destroy)(void *))
+static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
+					   int size, gfp_t gfp,
+					   void (*destroy)(void *))
 {
-	unsigned long flags;
 	int producer = 0;
-	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
 	void **old;
 	void *ptr;
 
-	if (!queue)
-		return -ENOMEM;
-
-	spin_lock_irqsave(&(r)->producer_lock, flags);
-
 	while ((ptr = ptr_ring_consume(r)))
 		if (producer < size)
 			queue[producer++] = ptr;
@@ -375,6 +369,23 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
 	old = r->queue;
 	r->queue = queue;
 
+	return old;
+}
+
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+				  void (*destroy)(void *))
+{
+	unsigned long flags;
+	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+	void **old;
+
+	if (!queue)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&(r)->producer_lock, flags);
+
+	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
+
 	spin_unlock_irqrestore(&(r)->producer_lock, flags);
 
 	kfree(old);
@@ -382,6 +393,48 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
 	return 0;
 }
 
+static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
+					   int size,
+					   gfp_t gfp, void (*destroy)(void *))
+{
+	unsigned long flags;
+	void ***queues;
+	int i;
+
+	queues = kmalloc(nrings * sizeof *queues, gfp);
+	if (!queues)
+		goto noqueues;
+
+	for (i = 0; i < nrings; ++i) {
+		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
+		if (!queues[i])
+			goto nomem;
+	}
+
+	for (i = 0; i < nrings; ++i) {
+		spin_lock_irqsave(&(rings[i])->producer_lock, flags);
+		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
+						  size, gfp, destroy);
+		spin_unlock_irqrestore(&(rings[i])->producer_lock, flags);
+	}
+
+	for (i = 0; i < nrings; ++i)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+noqueues:
+	return -ENOMEM;
+}
+
 static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
 {
 	void *ptr;
-- 
cgit 


From bf900b3dbefee49855c17aa09fb4245346a78fb3 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 30 Jun 2016 14:45:34 +0800
Subject: skb_array: add wrappers for resizing

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skb_array.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index 2dd0d1e4ee7e..f4dfade428f0 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -161,6 +161,15 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
 	return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
 }
 
+static inline int skb_array_resize_multiple(struct skb_array **rings,
+					    int nrings, int size, gfp_t gfp)
+{
+	BUILD_BUG_ON(offsetof(struct skb_array, ring));
+	return ptr_ring_resize_multiple((struct ptr_ring **)rings,
+					nrings, size, gfp,
+					__skb_array_destroy_skb);
+}
+
 static inline void skb_array_cleanup(struct skb_array *a)
 {
 	ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
-- 
cgit 


From 08294a26e15d7baf1e14ee569e9f2bc82a7ae768 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 30 Jun 2016 14:45:35 +0800
Subject: net: introduce NETDEV_CHANGE_TX_QUEUE_LEN

This patch introduces a new event - NETDEV_CHANGE_TX_QUEUE_LEN, this
will be triggered when tx_queue_len. It could be used by net device
who want to do some processing at that time. An example is tun who may
want to resize tx array when tx_queue_len is changed.

Cc: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e84d9d23c2d5..7dc2ec74122a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2237,6 +2237,7 @@ struct netdev_lag_lower_state_info {
 #define NETDEV_PRECHANGEUPPER	0x001A
 #define NETDEV_CHANGELOWERSTATE	0x001B
 #define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
+#define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
-- 
cgit 


From 1576d98605998fb59d121a39581129e134217182 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 30 Jun 2016 14:45:36 +0800
Subject: tun: switch to use skb array for tx

We used to queue tx packets in sk_receive_queue, this is less
efficient since it requires spinlocks to synchronize between producer
and consumer.

This patch tries to address this by:

- switch from sk_receive_queue to a skb_array, and resize it when
  tx_queue_len was changed.
- introduce a new proto_ops peek_len which was used for peeking the
  skb length.
- implement a tun version of peek_len for vhost_net to use and convert
  vhost_net to use peek_len if possible.

Pktgen test shows about 15.3% improvement on guest receiving pps for small
buffers:

Before: ~1300000pps
After : ~1500000pps

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 25aa03b51c4e..b9f0ff4d489c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -185,6 +185,7 @@ struct proto_ops {
 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	int		(*set_peek_off)(struct sock *sk, int val);
+	int		(*peek_len)(struct socket *sock);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
-- 
cgit 


From 4ae89ad92477219b504a49966ee010fe8dcb85af Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 24 Jun 2016 11:32:26 -0700
Subject: etherdevice.h & bridge: netfilter: Add and use
 ether_addr_equal_masked

There are code duplications of a masked ethernet address comparison here
so make it a separate function instead.

Miscellanea:

o Neaten alignment of FWINV macro uses to make it clearer for the reader

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/etherdevice.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 37ff4a6faa9a..6fec9e81bd70 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -373,6 +373,29 @@ static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2)
 #endif
 }
 
+/**
+ * ether_addr_equal_masked - Compare two Ethernet addresses with a mask
+ * @addr1: Pointer to a six-byte array containing the 1st Ethernet address
+ * @addr2: Pointer to a six-byte array containing the 2nd Ethernet address
+ * @mask: Pointer to a six-byte array containing the Ethernet address bitmask
+ *
+ * Compare two Ethernet addresses with a mask, returns true if for every bit
+ * set in the bitmask the equivalent bits in the ethernet addresses are equal.
+ * Using a mask with all bits set is a slower ether_addr_equal.
+ */
+static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2,
+					   const u8 *mask)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		if ((addr1[i] ^ addr2[i]) & mask[i])
+			return false;
+	}
+
+	return true;
+}
+
 /**
  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
  * @dev: Pointer to a device structure
-- 
cgit 


From 1aacde3d22c42281236155c1ef6d7a5aa32a826b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 30 Jun 2016 17:24:43 +0200
Subject: bpf: generally move prog destruction to RCU deferral

Jann Horn reported following analysis that could potentially result
in a very hard to trigger (if not impossible) UAF race, to quote his
event timeline:

 - Set up a process with threads T1, T2 and T3
 - Let T1 set up a socket filter F1 that invokes another filter F2
   through a BPF map [tail call]
 - Let T1 trigger the socket filter via a unix domain socket write,
   don't wait for completion
 - Let T2 call PERF_EVENT_IOC_SET_BPF with F2, don't wait for completion
 - Now T2 should be behind bpf_prog_get(), but before bpf_prog_put()
 - Let T3 close the file descriptor for F2, dropping the reference
   count of F2 to 2
 - At this point, T1 should have looked up F2 from the map, but not
   finished executing it
 - Let T3 remove F2 from the BPF map, dropping the reference count of
   F2 to 1
 - Now T2 should call bpf_prog_put() (wrong BPF program type), dropping
   the reference count of F2 to 0 and scheduling bpf_prog_free_deferred()
   via schedule_work()
 - At this point, the BPF program could be freed
 - BPF execution is still running in a freed BPF program

While at PERF_EVENT_IOC_SET_BPF time it's only guaranteed that the perf
event fd we're doing the syscall on doesn't disappear from underneath us
for whole syscall time, it may not be the case for the bpf fd used as
an argument only after we did the put. It needs to be a valid fd pointing
to a BPF program at the time of the call to make the bpf_prog_get() and
while T2 gets preempted, F2 must have dropped reference to 1 on the other
CPU. The fput() from the close() in T3 should also add additionally delay
to the reference drop via exit_task_work() when bpf_prog_release() gets
called as well as scheduling bpf_prog_free_deferred().

That said, it makes nevertheless sense to move the BPF prog destruction
generally after RCU grace period to guarantee that such scenario above,
but also others as recently fixed in ceb56070359b ("bpf, perf: delay release
of BPF prog after grace period") with regards to tail calls won't happen.
Integrating bpf_prog_free_deferred() directly into the RCU callback is
not allowed since the invocation might happen from either softirq or
process context, so we're not permitted to block. Reviewing all bpf_prog_put()
invocations from eBPF side (note, cBPF -> eBPF progs don't use this for
their destruction) with call_rcu() look good to me.

Since we don't know whether at the time of attaching the program, we're
already part of a tail call map, we need to use RCU variant. However, due
to this, there won't be severely more stress on the RCU callback queue:
situations with above bpf_prog_get() and bpf_prog_put() combo in practice
normally won't lead to releases, but even if they would, enough effort/
cycles have to be put into loading a BPF program into the kernel already.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8411032ac90d..749549888b86 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -220,7 +220,6 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 struct bpf_prog *bpf_prog_get(u32 ufd);
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
-void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
@@ -281,10 +280,6 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
-
-static inline void bpf_prog_put_rcu(struct bpf_prog *prog)
-{
-}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
-- 
cgit 


From 113214be7f6c98dd6d0435e4765aea8dea91662c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 30 Jun 2016 17:24:44 +0200
Subject: bpf: refactor bpf_prog_get and type check into helper

Since bpf_prog_get() and program type check is used in a couple of places,
refactor this into a small helper function that we can make use of. Since
the non RO prog->aux part is not used in performance critical paths and a
program destruction via RCU is rather very unlikley when doing the put, we
shouldn't have an issue just doing the bpf_prog_get() + prog->type != type
check, but actually not taking the ref at all (due to being in fdget() /
fdput() section of the bpf fd) is even cleaner and makes the diff smaller
as well, so just go for that. Callsites are changed to make use of the new
helper where possible.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 749549888b86..b3336b4f5d04 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -218,6 +218,7 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 
@@ -277,6 +278,12 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
+						 enum bpf_prog_type type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
-- 
cgit 


From 1f3fe7ebf6136c341012db9f554d4caa566fcbaa Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 30 Jun 2016 10:28:42 -0700
Subject: cgroup: Add cgroup_get_from_fd

Add a helper function to get a cgroup2 from a fd.  It will be
stored in a bpf array (BPF_MAP_TYPE_CGROUP_ARRAY) which will
be introduced in the later patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a20320c666fd..984f73b719a9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
 						       struct cgroup_subsys *ss);
 
 struct cgroup *cgroup_get_from_path(const char *path);
+struct cgroup *cgroup_get_from_fd(int fd);
 
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
-- 
cgit 


From 4ed8ec521ed57c4e207ad464ca0388776de74d4b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 30 Jun 2016 10:28:43 -0700
Subject: cgroup: bpf: Add BPF_MAP_TYPE_CGROUP_ARRAY

Add a BPF_MAP_TYPE_CGROUP_ARRAY and its bpf_map_ops's implementations.
To update an element, the caller is expected to obtain a cgroup2 backed
fd by open(cgroup2_dir) and then update the array with that fd.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index be6ac1291680..26c04be32003 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_PERCPU_HASH,
 	BPF_MAP_TYPE_PERCPU_ARRAY,
 	BPF_MAP_TYPE_STACK_TRACE,
+	BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
-- 
cgit 


From 4a482f34afcc162d8456f449b137ec2a95be60d8 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 30 Jun 2016 10:28:44 -0700
Subject: cgroup: bpf: Add bpf_skb_in_cgroup_proto

Adds a bpf helper, bpf_skb_in_cgroup, to decide if a skb->sk
belongs to a descendant of a cgroup2.  It is similar to the
feature added in netfilter:
commit c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match")

The user is expected to populate a BPF_MAP_TYPE_CGROUP_ARRAY
which will be used by the bpf_skb_in_cgroup.

Modifications to the bpf verifier is to ensure BPF_MAP_TYPE_CGROUP_ARRAY
and bpf_skb_in_cgroup() are always used together.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 26c04be32003..f44504d875e2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -337,6 +337,17 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_change_type,
 
+	/**
+	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * @skb: pointer to skb
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 skb failed the cgroup2 descendant test
+	 *   == 1 skb succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_skb_in_cgroup,
 	__BPF_FUNC_MAX_ID,
 };
 
-- 
cgit 


From acbc2004d7129a1ecf02414c1da8808bdc06d5a2 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Fri, 1 Jul 2016 14:50:58 +0300
Subject: net/mlx5: Introduce offloads steering namespace

Add a new namespace (MLX5_FLOW_NAMESPACE_OFFLOADS) to be populated
with flow steering rules that deal with rules that have have to
be executed before the EN NIC steering rules are matched.

The namespace is located after the bypass name-space and before the
kernel name-space. Therefore, it precedes the HW processing done for
rules set for the kernel NIC name-space.

Under SRIOV, it would allow us to match on e-switch missed packet
and forward them to the relevant VF representor TIR.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Amir Vadai <amir@vadai.me>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 4b7a107d9c19..6ad111938709 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority,
 
 enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_BYPASS,
+	MLX5_FLOW_NAMESPACE_OFFLOADS,
 	MLX5_FLOW_NAMESPACE_KERNEL,
 	MLX5_FLOW_NAMESPACE_LEFTOVERS,
 	MLX5_FLOW_NAMESPACE_ANCHOR,
-- 
cgit 


From 08f4b5918b2d6b491f0403cc1886f5cdccef89bb Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Fri, 1 Jul 2016 14:51:01 +0300
Subject: net/devlink: Add E-Switch mode control

Add the commands to set and show the mode of SRIOV E-Switch, two modes
are supported:

* legacy: operating in the "old" L2 based mode (DMAC --> VF vport)

* switchdev: the E-Switch is referred to as whitebox switch configured
using standard tools such as tc, bridge, openvswitch etc. To allow
working with the tools, for each VF, a VF representor netdevice is
created by the E-Switch manager vendor device driver instance (e.g PF).

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 3 +++
 include/uapi/linux/devlink.h | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1d45b61cb320..c99ffe8cef3c 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -90,6 +90,9 @@ struct devlink_ops {
 				       u16 tc_index,
 				       enum devlink_sb_pool_type pool_type,
 				       u32 *p_cur, u32 *p_max);
+
+	int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
+	int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ba0073b26fa6..915bfa74458c 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -57,6 +57,8 @@ enum devlink_command {
 	DEVLINK_CMD_SB_OCC_SNAPSHOT,
 	DEVLINK_CMD_SB_OCC_MAX_CLEAR,
 
+	DEVLINK_CMD_ESWITCH_MODE_GET,
+	DEVLINK_CMD_ESWITCH_MODE_SET,
 	/* add new commands above here */
 
 	__DEVLINK_CMD_MAX,
@@ -95,6 +97,11 @@ enum devlink_sb_threshold_type {
 
 #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
 
+enum devlink_eswitch_mode {
+	DEVLINK_ESWITCH_MODE_LEGACY,
+	DEVLINK_ESWITCH_MODE_SWITCHDEV,
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -125,6 +132,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_SB_TC_INDEX,		/* u16 */
 	DEVLINK_ATTR_SB_OCC_CUR,		/* u32 */
 	DEVLINK_ATTR_SB_OCC_MAX,		/* u32 */
+	DEVLINK_ATTR_ESWITCH_MODE,		/* u16 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
-- 
cgit 


From b50d292b4399f4eb11e82d0430aacf62dd5d5365 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Fri, 1 Jul 2016 14:51:04 +0300
Subject: net/mlx5e: Create NIC global resources only once

To allow creating more than one netdev over the same PCI function, we
change the driver such that global NIC resources are created once and
later be shared amongst all the mlx5e netdevs running over that port.

Move the CQ UAR, PD (pdn), Transport Domain (tdn), MKey resources from
being kept in the mlx5e priv part to a new resources structure
(mlx5e_resources) placed under the mlx5_core device.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 46260fdc5305..e22b3456b2ee 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -578,6 +578,18 @@ enum mlx5_pci_status {
 	MLX5_PCI_STATUS_ENABLED,
 };
 
+struct mlx5_td {
+	struct list_head tirs_list;
+	u32              tdn;
+};
+
+struct mlx5e_resources {
+	struct mlx5_uar            cq_uar;
+	u32                        pdn;
+	struct mlx5_td             td;
+	struct mlx5_core_mkey      mkey;
+};
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
 	/* sync pci state */
@@ -602,6 +614,7 @@ struct mlx5_core_dev {
 	struct mlx5_profile	*profile;
 	atomic_t		num_qps;
 	u32			issi;
+	struct mlx5e_resources  mlx5e_res;
 #ifdef CONFIG_RFS_ACCEL
 	struct cpu_rmap         *rmap;
 #endif
-- 
cgit 


From c37a2dfa67f7920b14ea77dc9f9f9660f7a1f6dd Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 24 Jun 2016 13:25:22 -0700
Subject: netfilter: Convert FWINV<[foo]> macros and uses to NF_INVF

netfilter uses multiple FWINV #defines with identical form that hide a
specific structure variable and dereference it with a invflags member.

$ git grep "#define FWINV"
include/linux/netfilter_bridge/ebtables.h:#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg))
net/bridge/netfilter/ebtables.c:#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg))
net/ipv4/netfilter/arp_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
net/ipv4/netfilter/ip_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
net/ipv6/netfilter/ip6_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
net/netfilter/xt_tcpudp.c:#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))

Consolidate these macros into a single NF_INVF macro.

Miscellanea:

o Neaten the alignment around these uses
o A few lines are > 80 columns for intelligibility

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h        | 4 ++++
 include/linux/netfilter_bridge/ebtables.h | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dc4f58a3cdcc..e94e81ab2b58 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -6,6 +6,10 @@
 #include <linux/static_key.h>
 #include <uapi/linux/netfilter/x_tables.h>
 
+/* Test a struct->invflags and a boolean for inequality */
+#define NF_INVF(ptr, flag, boolean)					\
+	((boolean) ^ !!((ptr)->invflags & (flag)))
+
 /**
  * struct xt_action_param - parameters for matches/targets
  *
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 2ea517c7c6b9..984b2112c77b 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -115,8 +115,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct ebt_table *table);
 
-/* Used in the kernel match() functions */
-#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg))
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
 #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
-- 
cgit 


From f86dec94e3a86c992a637df1c301a4df25a85801 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 15 Apr 2016 18:14:25 +0200
Subject: NFC: hci: delete unused nfc_llc_get_rx_head_tail_room()

It used to be EXPORTed, but then EXPORT usage was cleaned up
(in 2012), without noticing that the function has no users at all
(and curiously, never had any users).

Delete it.

While at it, remove non-static "inline" hints on nearby functions:
these hints don't work across compilation units anyway,
and these functions are not used in their .c file, thus they are
never inlined. IOW: "inline" here does not help in any way.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Samuel Ortiz <sameo@linux.intel.com>
CC: Christophe Ricard <christophe.ricard@gmail.com>
CC: linux-wireless@vger.kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/llc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h
index c25fbdee0d61..7ecb45757897 100644
--- a/include/net/nfc/llc.h
+++ b/include/net/nfc/llc.h
@@ -37,10 +37,6 @@ struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
 				 int tx_tailroom, llc_failure_t llc_failure);
 void nfc_llc_free(struct nfc_llc *llc);
 
-void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom,
-				   int *rx_tailroom);
-
-
 int nfc_llc_start(struct nfc_llc *llc);
 int nfc_llc_stop(struct nfc_llc *llc);
 void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb);
-- 
cgit 


From 7854a44526de84142e367f08288c9f3a33c4c8ee Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@collabora.com>
Date: Tue, 7 Jun 2016 16:21:52 +0200
Subject: NFC: digital: Add a delay between poll cycles

This replaces the polling work struct with a delayed work struct and add
a 10 ms delay between 2 poll cycles. This avoids to flood the device
with 'switch off'/'switch on' commands.

Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/digital.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 0ae101eef0f4..506e3f6eabef 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -220,7 +220,7 @@ struct nfc_digital_dev {
 	struct list_head cmd_queue;
 	struct mutex cmd_lock;
 
-	struct work_struct poll_work;
+	struct delayed_work poll_work;
 
 	u8 curr_protocol;
 	u8 curr_rf_tech;
-- 
cgit 


From 09748a22f4ab7b0ab5a83c432f6e18f65f18e09b Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Mon, 9 May 2016 18:41:08 +0200
Subject: batman-adv: add generic netlink family for batman-adv

debugfs is currently severely broken virtually everywhere in the kernel
where files are dynamically added and removed (see
http://lkml.iu.edu/hypermail/linux/kernel/1506.1/02196.html for some
details). In addition to that, debugfs is not namespace-aware.

Instead of adding new debugfs entries, the whole infrastructure should be
moved to netlink. This will fix the long standing problem of large buffers
for debug tables and hard to parse text files.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven.eckelmann@open-mesh.com: Strip down patch to only add genl family,
add missing kerneldoc]
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 53 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 include/uapi/linux/batman_adv.h

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
new file mode 100644
index 000000000000..79f797281b87
--- /dev/null
+++ b/include/uapi/linux/batman_adv.h
@@ -0,0 +1,53 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _UAPI_LINUX_BATMAN_ADV_H_
+#define _UAPI_LINUX_BATMAN_ADV_H_
+
+#define BATADV_NL_NAME "batadv"
+
+/**
+ * enum batadv_nl_attrs - batman-adv netlink attributes
+ *
+ * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors
+ * @__BATADV_ATTR_AFTER_LAST: internal use
+ * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
+ * @BATADV_ATTR_MAX: highest attribute number currently defined
+ */
+enum batadv_nl_attrs {
+	BATADV_ATTR_UNSPEC,
+	/* add attributes above here, update the policy in netlink.c */
+	__BATADV_ATTR_AFTER_LAST,
+	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
+	BATADV_ATTR_MAX = __BATADV_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum batadv_nl_commands - supported batman-adv netlink commands
+ *
+ * @BATADV_CMD_UNSPEC: unspecified command to catch errors
+ * @__BATADV_CMD_AFTER_LAST: internal use
+ * @BATADV_CMD_MAX: highest used command number
+ */
+enum batadv_nl_commands {
+	BATADV_CMD_UNSPEC,
+	/* add new commands above here */
+	__BATADV_CMD_AFTER_LAST,
+	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
+};
+
+#endif /* _UAPI_LINUX_BATMAN_ADV_H_ */
-- 
cgit 


From 5da0aef5e93591b373010c10f374c4161b37728c Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Mon, 9 May 2016 18:41:09 +0200
Subject: batman-adv: add netlink command to query generic mesh information
 files

BATADV_CMD_GET_MESH_INFO is used to query basic information about a
batman-adv softif (name, index and MAC address for both the softif and
the primary hardif; routing algorithm; batman-adv version).

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven.eckelmann@open-mesh.com: Reduce the number of changes to
BATADV_CMD_GET_MESH_INFO, add missing kerneldoc, add policy for attributes]
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 79f797281b87..c39623c7109e 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -24,12 +24,28 @@
  * enum batadv_nl_attrs - batman-adv netlink attributes
  *
  * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors
+ * @BATADV_ATTR_VERSION: batman-adv version string
+ * @BATADV_ATTR_ALGO_NAME: name of routing algorithm
+ * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface
+ * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface
+ * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface
+ * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface
+ * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface
+ * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
  */
 enum batadv_nl_attrs {
 	BATADV_ATTR_UNSPEC,
+	BATADV_ATTR_VERSION,
+	BATADV_ATTR_ALGO_NAME,
+	BATADV_ATTR_MESH_IFINDEX,
+	BATADV_ATTR_MESH_IFNAME,
+	BATADV_ATTR_MESH_ADDRESS,
+	BATADV_ATTR_HARD_IFINDEX,
+	BATADV_ATTR_HARD_IFNAME,
+	BATADV_ATTR_HARD_ADDRESS,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -40,11 +56,13 @@ enum batadv_nl_attrs {
  * enum batadv_nl_commands - supported batman-adv netlink commands
  *
  * @BATADV_CMD_UNSPEC: unspecified command to catch errors
+ * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
 enum batadv_nl_commands {
 	BATADV_CMD_UNSPEC,
+	BATADV_CMD_GET_MESH_INFO,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
-- 
cgit 


From 33a3bb4a3345bb511f9c69c913da95d4693e2a4e Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio.quartulli@open-mesh.com>
Date: Thu, 5 May 2016 13:09:43 +0200
Subject: batman-adv: throughput meter implementation

The throughput meter module is a simple, kernel-space replacement for
throughtput measurements tool like iperf and netperf. It is intended to
approximate TCP behaviour.

It is invoked through batctl: the protocol is connection oriented, with
cumulative acknowledgment and a dynamic-size sliding window.

The test *can* be interrupted by batctl. A receiver side timeout avoids
unlimited waitings for sender packets: after one second of inactivity, the
receiver abort the ongoing test.

Based on a prototype from Edo Monticelli <montik@autistici.org>

Signed-off-by: Antonio Quartulli <antonio.quartulli@open-mesh.com>
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 43 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index c39623c7109e..0fbf6fd4711b 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -20,6 +20,8 @@
 
 #define BATADV_NL_NAME "batadv"
 
+#define BATADV_NL_MCAST_GROUP_TPMETER	"tpmeter"
+
 /**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  *
@@ -32,6 +34,12 @@
  * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface
  * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface
  * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface
+ * @BATADV_ATTR_ORIG_ADDRESS: originator mac address
+ * @BATADV_ATTR_TPMETER_RESULT: result of run (see batadv_tp_meter_status)
+ * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took
+ * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run
+ * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
+ * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -46,6 +54,12 @@ enum batadv_nl_attrs {
 	BATADV_ATTR_HARD_IFINDEX,
 	BATADV_ATTR_HARD_IFNAME,
 	BATADV_ATTR_HARD_ADDRESS,
+	BATADV_ATTR_ORIG_ADDRESS,
+	BATADV_ATTR_TPMETER_RESULT,
+	BATADV_ATTR_TPMETER_TEST_TIME,
+	BATADV_ATTR_TPMETER_BYTES,
+	BATADV_ATTR_TPMETER_COOKIE,
+	BATADV_ATTR_PAD,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -57,15 +71,44 @@ enum batadv_nl_attrs {
  *
  * @BATADV_CMD_UNSPEC: unspecified command to catch errors
  * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
+ * @BATADV_CMD_TP_METER: Start a tp meter session
+ * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
 enum batadv_nl_commands {
 	BATADV_CMD_UNSPEC,
 	BATADV_CMD_GET_MESH_INFO,
+	BATADV_CMD_TP_METER,
+	BATADV_CMD_TP_METER_CANCEL,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
 };
 
+/**
+ * enum batadv_tp_meter_reason - reason of a tp meter test run stop
+ * @BATADV_TP_REASON_COMPLETE: sender finished tp run
+ * @BATADV_TP_REASON_CANCEL: sender was stopped during run
+ * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or didn't
+ *  answer
+ * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit
+ * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node already
+ *  ongoing
+ * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory
+ * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface
+ * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions
+ */
+enum batadv_tp_meter_reason {
+	BATADV_TP_REASON_COMPLETE		= 3,
+	BATADV_TP_REASON_CANCEL			= 4,
+	/* error status >= 128 */
+	BATADV_TP_REASON_DST_UNREACHABLE	= 128,
+	BATADV_TP_REASON_RESEND_LIMIT		= 129,
+	BATADV_TP_REASON_ALREADY_ONGOING	= 130,
+	BATADV_TP_REASON_MEMORY_ERROR		= 131,
+	BATADV_TP_REASON_CANT_SEND		= 132,
+	BATADV_TP_REASON_TOO_MANY		= 133,
+};
+
 #endif /* _UAPI_LINUX_BATMAN_ADV_H_ */
-- 
cgit 


From 8b10cab64c134ffbffac96edd1899d303d3afcac Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 2 Jul 2016 06:43:14 -0400
Subject: net: simplify and make pkt_type_ok() available for other users

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dc0fca747c5e..638b0e004310 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
 #include <linux/in6.h>
+#include <linux/if_packet.h>
 #include <net/flow.h>
 
 /* The interface for checksum offload between the stack and networking drivers
@@ -881,6 +882,15 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 	return (struct rtable *)skb_dst(skb);
 }
 
+/* For mangling skb->pkt_type from user space side from applications
+ * such as nft, tc, etc, we only allow a conservative subset of
+ * possible pkt_types to be set.
+*/
+static inline bool skb_pkt_type_ok(u32 ptype)
+{
+	return ptype <= PACKET_OTHERHOST;
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
-- 
cgit 


From ff202ee1ed8f032f05b80b541664cf02e75d7080 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 2 Jul 2016 06:43:15 -0400
Subject: net sched actions: skbedit add support for mod-ing skb pkt_type

Extremely useful for setting packet type to host so i dont
have to modify the dst mac address using pedit (which requires
that i know the mac address)

Example usage:
tc filter add dev eth0 parent ffff: protocol ip pref 9 u32 \
match ip src 5.5.5.5/32 \
flowid 1:5 action skbedit ptype host

This will tag all packets incoming from 5.5.5.5 with type
PACKET_HOST

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_skbedit.h        | 10 +++++-----
 include/uapi/linux/tc_act/tc_skbedit.h |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index b496d5ad7d42..d01a5d40cfb5 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -24,11 +24,11 @@
 
 struct tcf_skbedit {
 	struct tcf_common	common;
-	u32			flags;
-	u32     		priority;
-	u32     		mark;
-	u16			queue_mapping;
-	/* XXX: 16-bit pad here? */
+	u32		flags;
+	u32		priority;
+	u32		mark;
+	u16		queue_mapping;
+	u16		ptype;
 };
 #define to_skbedit(a) \
 	container_of(a->priv, struct tcf_skbedit, common)
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index fecb5cc48c40..a4d00c608d8f 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -27,6 +27,7 @@
 #define SKBEDIT_F_PRIORITY		0x1
 #define SKBEDIT_F_QUEUE_MAPPING		0x2
 #define SKBEDIT_F_MARK			0x4
+#define SKBEDIT_F_PTYPE			0x8
 
 struct tc_skbedit {
 	tc_gen;
@@ -40,6 +41,7 @@ enum {
 	TCA_SKBEDIT_QUEUE_MAPPING,
 	TCA_SKBEDIT_MARK,
 	TCA_SKBEDIT_PAD,
+	TCA_SKBEDIT_PTYPE,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
-- 
cgit 


From 13c5c240f789bbd2bcacb14a23771491485ae61f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 3 Jul 2016 01:28:47 +0200
Subject: bpf: add bpf_get_hash_recalc helper

If skb_clear_hash() was invoked due to mangling of relevant headers and
BPF program needs skb->hash later on, we can add a helper to trigger hash
recalculation via bpf_get_hash_recalc().

The helper will return the newly retrieved hash directly, but later access
can also be done via skb context again through skb->hash directly (inline)
without needing to call the helper once more.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f44504d875e2..c14ca1cd6297 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -348,6 +348,15 @@ enum bpf_func_id {
 	 *    < 0 error
 	 */
 	BPF_FUNC_skb_in_cgroup,
+
+	/**
+	 * bpf_get_hash_recalc(skb)
+	 * Retrieve and possibly recalculate skb->hash.
+	 * @skb: pointer to skb
+	 * Return: hash
+	 */
+	BPF_FUNC_get_hash_recalc,
+
 	__BPF_FUNC_MAX_ID,
 };
 
-- 
cgit 


From 7ce856aaaf13a5dc969ac5f998e5daaf1abe4cd2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 4 Jul 2016 08:23:12 +0200
Subject: mlxsw: spectrum: Add couple of lower device helper functions

Add functions that iterate over lower devices and find port device.
As a dependency add netdev_for_each_all_lower_dev and
netdev_for_each_all_lower_dev_rcu macro with
netdev_all_lower_get_next and netdev_all_lower_get_next_rcu shelpers.

Also, add functions to return mlxsw struct according to lower device
found and mlxsw_port struct with a reference to lower device.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7dc2ec74122a..0c6ee2c5099f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3804,12 +3804,30 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 
 void *netdev_lower_get_next(struct net_device *dev,
 				struct list_head **iter);
+
 #define netdev_for_each_lower_dev(dev, ldev, iter) \
 	for (iter = (dev)->adj_list.lower.next, \
 	     ldev = netdev_lower_get_next(dev, &(iter)); \
 	     ldev; \
 	     ldev = netdev_lower_get_next(dev, &(iter)))
 
+struct net_device *netdev_all_lower_get_next(struct net_device *dev,
+					     struct list_head **iter);
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+						 struct list_head **iter);
+
+#define netdev_for_each_all_lower_dev(dev, ldev, iter) \
+	for (iter = (dev)->all_adj_list.lower.next, \
+	     ldev = netdev_all_lower_get_next(dev, &(iter)); \
+	     ldev; \
+	     ldev = netdev_all_lower_get_next(dev, &(iter)))
+
+#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \
+	for (iter = (dev)->all_adj_list.lower.next, \
+	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \
+	     ldev; \
+	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)))
+
 void *netdev_adjacent_get_private(struct list_head *adj_list);
 void *netdev_lower_get_first_private_rcu(struct net_device *dev);
 struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
-- 
cgit 


From c5bb17302e734967822be559cf661704b707b4ed Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 4 Jul 2016 17:23:05 +0300
Subject: net/mlx5: Refactor mlx5_add_flow_rule

Reduce the set of arguments passed to mlx5_add_flow_rule
by introducing flow_spec structure.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/fs.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 6ad111938709..d22fe7e5a39a 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -68,6 +68,12 @@ struct mlx5_flow_group;
 struct mlx5_flow_rule;
 struct mlx5_flow_namespace;
 
+struct mlx5_flow_spec {
+	u8   match_criteria_enable;
+	u32  match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
+	u32  match_value[MLX5_ST_SZ_DW(fte_match_param)];
+};
+
 struct mlx5_flow_destination {
 	enum mlx5_flow_destination_type	type;
 	union {
@@ -116,9 +122,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
  */
 struct mlx5_flow_rule *
 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   u8 match_criteria_enable,
-		   u32 *match_criteria,
-		   u32 *match_value,
+		   struct mlx5_flow_spec *spec,
 		   u32 action,
 		   u32 flow_tag,
 		   struct mlx5_flow_destination *dest);
-- 
cgit 


From fba53f7b571925b8a0d59d460ad6de1fda928a3e Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 4 Jul 2016 17:23:06 +0300
Subject: net/mlx5: Introduce mlx5_flow_steering structure

Instead of having all steering private name spaces and
steering module fields flat in mlx5_core_priv, we wrap
them in mlx5_flow_steering for better modularity and
API exposure.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e22b3456b2ee..f21c45941887 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -550,14 +550,10 @@ struct mlx5_priv {
 	struct list_head        ctx_list;
 	spinlock_t              ctx_lock;
 
+	struct mlx5_flow_steering *steering;
 	struct mlx5_eswitch     *eswitch;
 	struct mlx5_core_sriov	sriov;
 	unsigned long		pci_dev_data;
-	struct mlx5_flow_root_namespace *root_ns;
-	struct mlx5_flow_root_namespace *fdb_root_ns;
-	struct mlx5_flow_root_namespace *esw_egress_root_ns;
-	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
-
 	struct mlx5_fc_stats		fc_stats;
 	struct mlx5_rl_table            rl_table;
 };
-- 
cgit 


From 6dc6071cfcde6cf687f8d288c9cef9ee6ee24dc7 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 4 Jul 2016 17:23:08 +0300
Subject: net/mlx5e: Add ethtool flow steering support

Implement etrhtool set_rxnfc callback to support ethtool flow spec
direct steering. This patch adds only the support of ether flow type
spec. L3/L4 flow specs support will be added in downstream patches.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index d22fe7e5a39a..e036d6030867 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -55,6 +55,7 @@ static inline void build_leftovers_ft_param(int *priority,
 enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_BYPASS,
 	MLX5_FLOW_NAMESPACE_OFFLOADS,
+	MLX5_FLOW_NAMESPACE_ETHTOOL,
 	MLX5_FLOW_NAMESPACE_KERNEL,
 	MLX5_FLOW_NAMESPACE_LEFTOVERS,
 	MLX5_FLOW_NAMESPACE_ANCHOR,
-- 
cgit 


From 503eebc265dcf5c512454fd5a6b6673ea4f1d7f2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 5 Jul 2016 11:27:37 +0200
Subject: net: add dev arg to ndo_neigh_construct/destroy

As the following patch will allow upper devices to follow the call down
lower devices, we need to add dev here and not rely on n->dev.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0c6ee2c5099f..91af73c9dd51 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1209,8 +1209,10 @@ struct net_device_ops {
 						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
-	int			(*ndo_neigh_construct)(struct neighbour *n);
-	void			(*ndo_neigh_destroy)(struct neighbour *n);
+	int			(*ndo_neigh_construct)(struct net_device *dev,
+						       struct neighbour *n);
+	void			(*ndo_neigh_destroy)(struct net_device *dev,
+						     struct neighbour *n);
 
 	int			(*ndo_fdb_add)(struct ndmsg *ndm,
 					       struct nlattr *tb[],
-- 
cgit 


From 18bfb924f0005a728caadd90ba755b2a660bf441 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 5 Jul 2016 11:27:38 +0200
Subject: net: introduce default neigh_construct/destroy ndo calls for L2 upper
 devices

L2 upper device needs to propagate neigh_construct/destroy calls down to
lower devices. Do this by defining default ndo functions and use them in
team, bond, bridge and vlan.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 91af73c9dd51..49736a31acaa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3845,6 +3845,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev);
 void netdev_lower_state_changed(struct net_device *lower_dev,
 				void *lower_state_info);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+					   struct neighbour *n);
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+					  struct neighbour *n);
 
 /* RSS keys are 40 or 52 bytes long */
 #define NETDEV_RSS_KEY_LEN 52
-- 
cgit 


From 2a4501ae18b52fcdf553404286e6cefabd1d17ec Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 5 Jul 2016 11:27:42 +0200
Subject: neigh: Send a notification when DELAY_PROBE_TIME changes

When the data plane is offloaded the traffic doesn't go through the
networking stack. Therefore, after first resolving a neighbour the NUD
state machine will transition it from REACHABLE to STALE until it's
finally deleted by the garbage collector.

To prevent such situations the offloading driver should notify the NUD
state machine on any neighbours that were recently used. The driver's
polling interval should be set so that the NUD state machine can
function as if the traffic wasn't offloaded.

Currently, there are no in-tree drivers that can report confirmation for
a neighbour, but only 'used' indication. Therefore, the polling interval
should be set according to DELAY_FIRST_PROBE_TIME, as a neighbour will
transition from REACHABLE state to DELAY (instead of STALE) if "a packet
was sent within the last DELAY_FIRST_PROBE_TIME seconds" (RFC 4861).

Send a netevent whenever the DELAY_FIRST_PROBE_TIME changes - either via
netlink or sysctl - so that offloading drivers can correctly set their
polling interval.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netevent.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netevent.h b/include/net/netevent.h
index d8bbb38584b6..f440df172b56 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -24,6 +24,7 @@ struct netevent_redirect {
 enum netevent_notif_type {
 	NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
+	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
-- 
cgit 


From c1adf20052d80f776849fa2c1acb472cdeb7786c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 1 Jul 2016 07:53:51 +0100
Subject: Introduce rb_replace_node_rcu()

Implement an RCU-safe variant of rb_replace_node() and rearrange
rb_replace_node() to do things in the same order.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/rbtree.h           |  2 ++
 include/linux/rbtree_augmented.h | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index b6900099ea81..e585018498d5 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -76,6 +76,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
 extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 			    struct rb_root *root);
+extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+				struct rb_root *root);
 
 static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
 				struct rb_node **rb_link)
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 14d7b831b63a..d076183e49be 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -130,6 +130,19 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
 		WRITE_ONCE(root->rb_node, new);
 }
 
+static inline void
+__rb_change_child_rcu(struct rb_node *old, struct rb_node *new,
+		      struct rb_node *parent, struct rb_root *root)
+{
+	if (parent) {
+		if (parent->rb_left == old)
+			rcu_assign_pointer(parent->rb_left, new);
+		else
+			rcu_assign_pointer(parent->rb_right, new);
+	} else
+		rcu_assign_pointer(root->rb_node, new);
+}
+
 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
 
-- 
cgit 


From 995f1405610bd8446c5be37d2ffc031a7729e406 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 1 Jul 2016 13:44:53 -0700
Subject: rcu: Suppress sparse warnings for rcu_dereference_raw()

Data structures that are used both with and without RCU protection
are difficult to write in a sparse-clean manner.  If you mark the
relevant pointers with __rcu, sparse will complain about all non-RCU
uses, but if you don't mark those pointers, sparse will complain about
all RCU uses.

This commit therefore suppresses sparse warnings for rcu_dereference_raw(),
allowing mixed-protection data structures to avoid these warnings.

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/rcupdate.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5f1533e3d032..85830e6c797b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -611,6 +611,12 @@ static inline void rcu_preempt_sleep_check(void)
 	rcu_dereference_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(p)); \
 })
+#define rcu_dereference_raw(p) \
+({ \
+	/* Dependency order vs. p above. */ \
+	typeof(p) ________p1 = lockless_dereference(p); \
+	((typeof(*p) __force __kernel *)(________p1)); \
+})
 
 /**
  * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
@@ -729,8 +735,6 @@ static inline void rcu_preempt_sleep_check(void)
 	__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
 				__rcu)
 
-#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
-
 /*
  * The tracing infrastructure traces RCU (we want that), but unfortunately
  * some of the RCU checks causes tracing to lock up the system.
-- 
cgit 


From c6e6a0c8be575c830a97b1942dabeab70f423fe0 Mon Sep 17 00:00:00 2001
From: Aviya Erenfeld <aviya.erenfeld@intel.com>
Date: Tue, 5 Jul 2016 15:23:08 +0300
Subject: nl80211: Add API to support VHT MU-MIMO air sniffer

add API to support VHT MU-MIMO air sniffer.
in MU-MIMO there are parallel frames on the air while the HW
has only one RX.
add the capability to sniff one of the MU-MIMO parallel frames by
giving the sniffer additional information so it'll know which
of the parallel frames it shall follow.

Add attribute - NL80211_ATTR_MU_MIMO_GROUP_DATA - for getting
a MU-MIMO groupID in order to monitor packets from that group
using VHT MU-MIMO.
And add attribute -NL80211_ATTR_MU_MIMO_FOLLOW_ADDR - for passing
MAC address to monitor mode.
that option will be used by VHT MU-MIMO air sniffer to follow a
station according to it's MAC address using VHT MU-MIMO.

Signed-off-by: Aviya Erenfeld <aviya.erenfeld@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 10 ++++++++--
 include/uapi/linux/nl80211.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7bbb00d8b2cd..fa4f0f793817 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -330,6 +330,9 @@ struct ieee80211_supported_band {
  * in a separate chapter.
  */
 
+#define VHT_MUMIMO_GROUPS_DATA_LEN (WLAN_MEMBERSHIP_LEN +\
+				    WLAN_USER_POSITION_LEN)
+
 /**
  * struct vif_params - describes virtual interface parameters
  * @use_4addr: use 4-address frames
@@ -339,10 +342,13 @@ struct ieee80211_supported_band {
  *	This feature is only fully supported by drivers that enable the
  *	%NL80211_FEATURE_MAC_ON_CREATE flag.  Others may support creating
  **	only p2p devices with specified MAC.
+ * @vht_mumimo_groups: MU-MIMO groupID. used for monitoring only
+ *	 packets belonging to that MU-MIMO groupID.
  */
 struct vif_params {
-       int use_4addr;
-       u8 macaddr[ETH_ALEN];
+	int use_4addr;
+	u8 macaddr[ETH_ALEN];
+	u8 vht_mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN];
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 53c8278827a0..1d7da7888dcf 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1829,6 +1829,25 @@ enum nl80211_commands {
  *	%NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per
  *	interface type.
  *
+ * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO
+ *	groupID for monitor mode.
+ *	The first 8 bytes are a mask that defines the membership in each
+ *	group (there are 64 groups, group 0 and 63 are reserved),
+ *	each bit represents a group and set to 1 for being a member in
+ *	that group and 0 for not being a member.
+ *	The remaining 16 bytes define the position in each group: 2 bits for
+ *	each group.
+ *	(smaller group numbers represented on most significant bits and bigger
+ *	group numbers on least significant bits.)
+ *	This attribute is used only if all interfaces are in monitor mode.
+ *	Set this attribute in order to monitor packets using the given MU-MIMO
+ *	groupID data.
+ *	to turn off that feature set all the bits of the groupID to zero.
+ * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow
+ *	when using MU-MIMO air sniffer.
+ *	to turn that feature off set an invalid mac address
+ *	(e.g. FF:FF:FF:FF:FF:FF)
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2213,6 +2232,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_IFTYPE_EXT_CAPA,
 
+	NL80211_ATTR_MU_MIMO_GROUP_DATA,
+	NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4479,6 +4501,12 @@ enum nl80211_feature_flags {
  *	%NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set
  *	the ASSOC_REQ_USE_RRM flag in the association request even if
  *	NL80211_FEATURE_QUIET is not advertized.
+ * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air
+ *	sniffer which means that it can be configured to hear packets from
+ *	certain groups which can be configured by the
+ *	%NL80211_ATTR_MU_MIMO_GROUP_DATA attribute,
+ *	or can be configured to follow a station by configuring the
+ *	%NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4486,6 +4514,7 @@ enum nl80211_feature_flags {
 enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_VHT_IBSS,
 	NL80211_EXT_FEATURE_RRM,
+	NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit 


From 1d76250bd34af86c6498fc51e50cab3bfbbeceaa Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Tue, 5 Jul 2016 17:10:13 +0300
Subject: nl80211: support beacon report scanning

Beacon report radio measurement requires reporting observed BSSs
on the channels specified in the beacon request. If the measurement
mode is set to passive or active, it requires actually performing a
scan (passive or active, accordingly), and reporting the time that
the scan was started and the time each beacon/probe was received
(both in terms of TSF of the BSS of the requesting AP). If the
request mode is table, this information is optional.
In addition, the radio measurement request specifies the channel
dwell time for the measurement.

In order to use scan for beacon report when the mode is active or
passive, add a parameter to scan request that specifies the
channel dwell time, and add scan start time and beacon received time
to scan results information.

Supporting beacon report is required for Multi Band Operation (MBO).

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: David Spinadel <david.spinadel@intel.com>
Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 40 +++++++++++++++++++++++++++++++++++-----
 include/uapi/linux/nl80211.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fa4f0f793817..e2658e392a1f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1423,6 +1423,21 @@ struct cfg80211_ssid {
 	u8 ssid_len;
 };
 
+/**
+ * struct cfg80211_scan_info - information about completed scan
+ * @scan_start_tsf: scan start time in terms of the TSF of the BSS that the
+ *	wireless device that requested the scan is connected to. If this
+ *	information is not available, this field is left zero.
+ * @tsf_bssid: the BSSID according to which %scan_start_tsf is set.
+ * @aborted: set to true if the scan was aborted for any reason,
+ *	userspace will be notified of that
+ */
+struct cfg80211_scan_info {
+	u64 scan_start_tsf;
+	u8 tsf_bssid[ETH_ALEN] __aligned(2);
+	bool aborted;
+};
+
 /**
  * struct cfg80211_scan_request - scan request description
  *
@@ -1433,12 +1448,17 @@ struct cfg80211_ssid {
  * @scan_width: channel width for scanning
  * @ie: optional information element(s) to add into Probe Request or %NULL
  * @ie_len: length of ie in octets
+ * @duration: how long to listen on each channel, in TUs. If
+ *	%duration_mandatory is not set, this is the maximum dwell time and
+ *	the actual dwell time may be shorter.
+ * @duration_mandatory: if set, the scan duration must be as specified by the
+ *	%duration field.
  * @flags: bit field of flags controlling operation
  * @rates: bitmap of rates to advertise for each band
  * @wiphy: the wiphy this was for
  * @scan_start: time (in jiffies) when the scan started
  * @wdev: the wireless device to scan for
- * @aborted: (internal) scan request was notified as aborted
+ * @info: (internal) information about completed scan
  * @notified: (internal) scan request was notified as done or aborted
  * @no_cck: used to send probe requests at non CCK rate in 2GHz band
  * @mac_addr: MAC address used with randomisation
@@ -1454,6 +1474,8 @@ struct cfg80211_scan_request {
 	enum nl80211_bss_scan_width scan_width;
 	const u8 *ie;
 	size_t ie_len;
+	u16 duration;
+	bool duration_mandatory;
 	u32 flags;
 
 	u32 rates[NUM_NL80211_BANDS];
@@ -1467,7 +1489,8 @@ struct cfg80211_scan_request {
 	/* internal */
 	struct wiphy *wiphy;
 	unsigned long scan_start;
-	bool aborted, notified;
+	struct cfg80211_scan_info info;
+	bool notified;
 	bool no_cck;
 
 	/* keep last */
@@ -1600,12 +1623,19 @@ enum cfg80211_signal_type {
  *	buffered on the device) and be accurate to about 10ms.
  *	If the frame isn't buffered, just passing the return value of
  *	ktime_get_boot_ns() is likely appropriate.
+ * @parent_tsf: the time at the start of reception of the first octet of the
+ *	timestamp field of the frame. The time is the TSF of the BSS specified
+ *	by %parent_bssid.
+ * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to
+ *	the BSS that requested the scan in which the beacon/probe was received.
  */
 struct cfg80211_inform_bss {
 	struct ieee80211_channel *chan;
 	enum nl80211_bss_scan_width scan_width;
 	s32 signal;
 	u64 boottime_ns;
+	u64 parent_tsf;
+	u8 parent_bssid[ETH_ALEN] __aligned(2);
 };
 
 /**
@@ -4067,10 +4097,10 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
  * cfg80211_scan_done - notify that scan finished
  *
  * @request: the corresponding scan request
- * @aborted: set to true if the scan was aborted for any reason,
- *	userspace will be notified of that
+ * @info: information about the completed scan
  */
-void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted);
+void cfg80211_scan_done(struct cfg80211_scan_request *request,
+			struct cfg80211_scan_info *info);
 
 /**
  * cfg80211_sched_scan_results - notify that new scan results are available
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1d7da7888dcf..b39ccab45333 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1848,6 +1848,22 @@ enum nl80211_commands {
  *	to turn that feature off set an invalid mac address
  *	(e.g. FF:FF:FF:FF:FF:FF)
  *
+ * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually
+ *	started (u64). The time is the TSF of the BSS the interface that
+ *	requested the scan is connected to (if available, otherwise this
+ *	attribute must not be included).
+ * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which
+ *	%NL80211_ATTR_SCAN_START_TIME_TSF is set.
+ * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If
+ *	%NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the
+ *	maximum measurement duration allowed. This attribute is used with
+ *	measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN
+ *	if the scan is used for beacon report radio measurement.
+ * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates
+ *	that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is
+ *	mandatory. If this flag is not set, the duration is the maximum duration
+ *	and the actual measurement duration may be shorter.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2235,6 +2251,11 @@ enum nl80211_attrs {
 	NL80211_ATTR_MU_MIMO_GROUP_DATA,
 	NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR,
 
+	NL80211_ATTR_SCAN_START_TIME_TSF,
+	NL80211_ATTR_SCAN_START_TIME_TSF_BSSID,
+	NL80211_ATTR_MEASUREMENT_DURATION,
+	NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3496,6 +3517,12 @@ enum nl80211_bss_scan_width {
  *	was last updated by a received frame. The value is expected to be
  *	accurate to about 10ms. (u64, nanoseconds)
  * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first
+ *	octet of the timestamp field of the last beacon/probe received for
+ *	this BSS. The time is the TSF of the BSS specified by
+ *	@NL80211_BSS_PARENT_BSSID. (u64).
+ * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF
+ *	is set.
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -3517,6 +3544,8 @@ enum nl80211_bss {
 	NL80211_BSS_PRESP_DATA,
 	NL80211_BSS_LAST_SEEN_BOOTTIME,
 	NL80211_BSS_PAD,
+	NL80211_BSS_PARENT_TSF,
+	NL80211_BSS_PARENT_BSSID,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
@@ -4507,6 +4536,16 @@ enum nl80211_feature_flags {
  *	%NL80211_ATTR_MU_MIMO_GROUP_DATA attribute,
  *	or can be configured to follow a station by configuring the
  *	%NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute.
+ * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual
+ *	time the scan started in scan results event. The time is the TSF of
+ *	the BSS that the interface that requested the scan is connected to
+ *	(if available).
+ * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the
+ *	time the last beacon/probe was received. The time is the TSF of the
+ *	BSS that the interface that requested the scan is connected to
+ *	(if available).
+ * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of
+ *	channel dwell time.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4515,6 +4554,9 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_VHT_IBSS,
 	NL80211_EXT_FEATURE_RRM,
 	NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER,
+	NL80211_EXT_FEATURE_SCAN_START_TIME,
+	NL80211_EXT_FEATURE_BSS_PARENT_TSF,
+	NL80211_EXT_FEATURE_SET_SCAN_DWELL,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit 


From 7947d3e075cde1a18e538f2dafbc850aa356ff79 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Tue, 5 Jul 2016 15:23:12 +0300
Subject: mac80211: Add support for beacon report radio measurement

Add the following to support beacon report radio measurement
with the measurement mode field set to passive or active:
1. Propagate the required scan duration to the device
2. Report the scan start time (in terms of TSF)
3. Report each BSS's detection time (also in terms of TSF)

TSF times refer to the BSS that the interface that requested the
scan is connected to.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Avraham Stern <avraham.stern@intel.com>
[changed ath9k/10k, at76c59x-usb, iwlegacy, wl1251 and wlcore to match
the new API]
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a52009ffc19f..b4faadbb4e01 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4697,9 +4697,10 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw);
  * any context, including hardirq context.
  *
  * @hw: the hardware that finished the scan
- * @aborted: set to true if scan was aborted
+ * @info: information about the completed scan
  */
-void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted);
+void ieee80211_scan_completed(struct ieee80211_hw *hw,
+			      struct cfg80211_scan_info *info);
 
 /**
  * ieee80211_sched_scan_results - got results from scheduled scan
-- 
cgit 


From 7d27a0ba7adc8ef30c2aae7592fce4c162aee4df Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Fri, 1 Jul 2016 10:19:34 +0900
Subject: cfg80211: Add mesh peer AID setting API

Previously, mesh power management functionality works only with kernel
MPM. Because user space MPM did not report mesh peer AID to kernel,
the kernel could not identify the bit in TIM element. So this patch
adds mesh peer AID setting API.

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 2 ++
 include/uapi/linux/nl80211.h | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e2658e392a1f..9c23f4d33e06 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -780,6 +780,7 @@ enum station_parameters_apply_mask {
  *	(bitmask of BIT(NL80211_STA_FLAG_...))
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
+ * @peer_aid: mesh peer AID or zero for no change
  * @plink_action: plink action to take
  * @plink_state: set the peer link state for a station
  * @ht_capa: HT capabilities of station
@@ -811,6 +812,7 @@ struct station_parameters {
 	u32 sta_modify_mask;
 	int listen_interval;
 	u16 aid;
+	u16 peer_aid;
 	u8 supported_rates_len;
 	u8 plink_action;
 	u8 plink_state;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b39ccab45333..220694151434 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1864,6 +1864,9 @@ enum nl80211_commands {
  *	mandatory. If this flag is not set, the duration is the maximum duration
  *	and the actual measurement duration may be shorter.
  *
+ * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is
+ *	used to pull the stored data for mesh peer in power save state.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2256,6 +2259,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_MEASUREMENT_DURATION,
 	NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY,
 
+	NL80211_ATTR_MESH_PEER_AID,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit 


From aece0c3fe1f06962a591268b8c236df0ae5d9e4e Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Sat, 18 Jun 2016 10:45:33 +0200
Subject: nl802154: move PAD to right position

The PAD define should be above the experimental support. We don't care
about if we break userspace in experimental stuff but PAD is part of the
existing UAPI.

Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Stefan Schmidt<stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/nl802154.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index fcab4de49951..7aad2fdfd16a 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -124,6 +124,8 @@ enum nl802154_attrs {
 
 	NL802154_ATTR_ACKREQ_DEFAULT,
 
+	NL802154_ATTR_PAD,
+
 	/* add attributes here, update the policy in nl802154.c */
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -138,8 +140,6 @@ enum nl802154_attrs {
 	NL802154_ATTR_SEC_KEY,
 #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 
-	NL802154_ATTR_PAD,
-
 	__NL802154_ATTR_AFTER_LAST,
 	NL802154_ATTR_MAX = __NL802154_ATTR_AFTER_LAST - 1
 };
-- 
cgit 


From 66e5c2672cd11b9310008099faf6a4ffb9dfb6d0 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Sat, 18 Jun 2016 10:45:34 +0200
Subject: ieee802154: add netns support

This patch adds netns support for 802.15.4 subsystem. Most parts are
copy&pasted from wireless subsystem, it has the identically userspace
API.

Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/cfg802154.h | 13 +++++++++++++
 include/net/nl802154.h  |  5 +++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 171cd76558fb..795ca4008f72 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -219,9 +219,22 @@ struct wpan_phy {
 
 	struct device dev;
 
+	/* the network namespace this phy lives in currently */
+	possible_net_t _net;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
+static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy)
+{
+	return read_pnet(&wpan_phy->_net);
+}
+
+static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net)
+{
+	write_pnet(&wpan_phy->_net, net);
+}
+
 struct ieee802154_addr {
 	u8 mode;
 	__le16 pan_id;
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 7aad2fdfd16a..ddcee128f5d9 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -54,6 +54,8 @@ enum nl802154_commands {
 
 	NL802154_CMD_SET_ACKREQ_DEFAULT,
 
+	NL802154_CMD_SET_WPAN_PHY_NETNS,
+
 	/* add new commands above here */
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -126,6 +128,9 @@ enum nl802154_attrs {
 
 	NL802154_ATTR_PAD,
 
+	NL802154_ATTR_PID,
+	NL802154_ATTR_NETNS_FD,
+
 	/* add attributes here, update the policy in nl802154.c */
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
-- 
cgit 


From 9cc577dd25b9762df7f353658426bb2e048c480a Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 6 Jul 2016 23:32:24 +0200
Subject: ieee802154: add ieee802154_skb_dst_pan helper

This patch adds ieee802154_skb_dst_pan function to get the pointer
address of the destination pan id at skb mac pointer.

Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 16 ++++++++++++++++
 include/net/mac802154.h    | 29 +++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index acedbb68a5a3..91f4665fea63 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -31,6 +31,8 @@
 #define IEEE802154_MIN_PSDU_LEN		9
 #define IEEE802154_FCS_LEN		2
 #define IEEE802154_MAX_AUTH_TAG_LEN	16
+#define IEEE802154_FC_LEN		2
+#define IEEE802154_SEQ_LEN		1
 
 /*  General MAC frame format:
  *  2 bytes: Frame Control
@@ -221,9 +223,14 @@ enum {
 #define IEEE802154_FCTL_ACKREQ		0x0020
 #define IEEE802154_FCTL_SECEN		0x0004
 #define IEEE802154_FCTL_INTRA_PAN	0x0040
+#define IEEE802154_FCTL_DADDR		0x0c00
 
 #define IEEE802154_FTYPE_DATA		0x0001
 
+#define IEEE802154_FCTL_ADDR_NONE	0x0000
+#define IEEE802154_FCTL_DADDR_SHORT	0x0800
+#define IEEE802154_FCTL_DADDR_EXTENDED	0x0c00
+
 /*
  * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA
  * @fc: frame control bytes in little-endian byteorder
@@ -261,6 +268,15 @@ static inline bool ieee802154_is_intra_pan(__le16 fc)
 	return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN);
 }
 
+/*
+ * ieee802154_daddr_mode - get daddr mode from fc
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline __le16 ieee802154_daddr_mode(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_DADDR);
+}
+
 /**
  * ieee802154_is_valid_psdu_len - check if psdu len is valid
  * available lengths:
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index e465c8551ac3..b3f7cd868fe9 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -257,6 +257,35 @@ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb)
 	return get_unaligned_le16(skb_mac_header(skb));
 }
 
+/**
+ * ieee802154_skb_dst_pan - get the pointer to destination pan field
+ * @fc: mac header frame control field
+ * @skb: skb where the destination pan pointer will be get from
+ */
+static inline unsigned char *ieee802154_skb_dst_pan(__le16 fc,
+						    const struct sk_buff *skb)
+{
+	unsigned char *dst_pan;
+
+	switch (ieee802154_daddr_mode(fc)) {
+	case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+		dst_pan = NULL;
+		break;
+	case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT):
+	case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED):
+		dst_pan = skb_mac_header(skb) +
+			  IEEE802154_FC_LEN +
+			  IEEE802154_SEQ_LEN;
+		break;
+	default:
+		WARN_ONCE(1, "invalid addr mode detected");
+		dst_pan = NULL;
+		break;
+	}
+
+	return dst_pan;
+}
+
 /**
  * ieee802154_be64_to_le64 - copies and convert be64 to le64
  * @le64_dst: le64 destination pointer
-- 
cgit 


From 19580cc1ed299c736b56b45c7576b477f185f8f5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 6 Jul 2016 23:32:25 +0200
Subject: ieee802154: add ieee802154_skb_src_pan helper

This patch adds ieee802154_skb_src_pan function to get the pointer
address of the source pan id at skb mac pointer.

Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 13 ++++++++++
 include/net/mac802154.h    | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 91f4665fea63..ddb890174a0e 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -50,6 +50,7 @@
 
 #define IEEE802154_EXTENDED_ADDR_LEN	8
 #define IEEE802154_SHORT_ADDR_LEN	2
+#define IEEE802154_PAN_ID_LEN		2
 
 #define IEEE802154_LIFS_PERIOD		40
 #define IEEE802154_SIFS_PERIOD		12
@@ -224,12 +225,15 @@ enum {
 #define IEEE802154_FCTL_SECEN		0x0004
 #define IEEE802154_FCTL_INTRA_PAN	0x0040
 #define IEEE802154_FCTL_DADDR		0x0c00
+#define IEEE802154_FCTL_SADDR		0xc000
 
 #define IEEE802154_FTYPE_DATA		0x0001
 
 #define IEEE802154_FCTL_ADDR_NONE	0x0000
 #define IEEE802154_FCTL_DADDR_SHORT	0x0800
 #define IEEE802154_FCTL_DADDR_EXTENDED	0x0c00
+#define IEEE802154_FCTL_SADDR_SHORT	0x8000
+#define IEEE802154_FCTL_SADDR_EXTENDED	0xc000
 
 /*
  * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA
@@ -277,6 +281,15 @@ static inline __le16 ieee802154_daddr_mode(__le16 fc)
 	return fc & cpu_to_le16(IEEE802154_FCTL_DADDR);
 }
 
+/*
+ * ieee802154_saddr_mode - get saddr mode from fc
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline __le16 ieee802154_saddr_mode(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_SADDR);
+}
+
 /**
  * ieee802154_is_valid_psdu_len - check if psdu len is valid
  * available lengths:
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index b3f7cd868fe9..ec01b35bc969 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -286,6 +286,65 @@ static inline unsigned char *ieee802154_skb_dst_pan(__le16 fc,
 	return dst_pan;
 }
 
+/**
+ * ieee802154_skb_src_pan - get the pointer to source pan field
+ * @fc: mac header frame control field
+ * @skb: skb where the source pan pointer will be get from
+ */
+static inline unsigned char *ieee802154_skb_src_pan(__le16 fc,
+						    const struct sk_buff *skb)
+{
+	unsigned char *src_pan;
+
+	switch (ieee802154_saddr_mode(fc)) {
+	case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+		src_pan = NULL;
+		break;
+	case cpu_to_le16(IEEE802154_FCTL_SADDR_SHORT):
+	case cpu_to_le16(IEEE802154_FCTL_SADDR_EXTENDED):
+		/* if intra-pan and source addr mode is non none,
+		 * then source pan id is equal destination pan id.
+		 */
+		if (ieee802154_is_intra_pan(fc)) {
+			src_pan = ieee802154_skb_dst_pan(fc, skb);
+			break;
+		}
+
+		switch (ieee802154_daddr_mode(fc)) {
+		case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN;
+			break;
+		case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN +
+				  IEEE802154_PAN_ID_LEN +
+				  IEEE802154_SHORT_ADDR_LEN;
+			break;
+		case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN +
+				  IEEE802154_PAN_ID_LEN +
+				  IEEE802154_EXTENDED_ADDR_LEN;
+			break;
+		default:
+			WARN_ONCE(1, "invalid addr mode detected");
+			src_pan = NULL;
+			break;
+		}
+		break;
+	default:
+		WARN_ONCE(1, "invalid addr mode detected");
+		src_pan = NULL;
+		break;
+	}
+
+	return src_pan;
+}
+
 /**
  * ieee802154_be64_to_le64 - copies and convert be64 to le64
  * @le64_dst: le64 destination pointer
-- 
cgit 


From 0ea0b9af9b7599ada307258dc841f4300873e8a1 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 6 Jul 2016 23:32:26 +0200
Subject: ieee802154: 6lowpan: fix intra pan id check

The RIOT-OS stack does send intra-pan frames but don't set the intra pan
flag inside the mac header. It seems this is valid frame addressing but
inefficient. Anyway this patch adds a new function for intra pan
addressing, doesn't matter if intra pan flag or source and destination
are the same. The newly introduction function will be used to check on
intra pan addressing for 6lowpan.

Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/mac802154.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index ec01b35bc969..d757edd0b0b7 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -345,6 +345,25 @@ static inline unsigned char *ieee802154_skb_src_pan(__le16 fc,
 	return src_pan;
 }
 
+/**
+ * ieee802154_skb_is_intra_pan_addressing - checks whenever the mac addressing
+ *	is an intra pan communication
+ * @fc: mac header frame control field
+ * @skb: skb where the source and destination pan should be get from
+ */
+static inline bool ieee802154_skb_is_intra_pan_addressing(__le16 fc,
+							  const struct sk_buff *skb)
+{
+	unsigned char *dst_pan = ieee802154_skb_dst_pan(fc, skb),
+		      *src_pan = ieee802154_skb_src_pan(fc, skb);
+
+	/* if one is NULL is no intra pan addressing */
+	if (!dst_pan || !src_pan)
+		return false;
+
+	return !memcmp(dst_pan, src_pan, IEEE802154_PAN_ID_LEN);
+}
+
 /**
  * ieee802154_be64_to_le64 - copies and convert be64 to le64
  * @le64_dst: le64 destination pointer
-- 
cgit 


From aaa7088eb29a0ffe6cf8d8a443695df41e5a62f3 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 6 Jul 2016 23:32:27 +0200
Subject: ieee802154: fix skb get fc on big endian

This patch fixes ieee802154_get_fc_from_skb function on big endian
machines. The function get_unaligned_le16 converts the byte order to
host byte order but we want to keep the byte order like in mac header.

Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/mac802154.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index d757edd0b0b7..bb7bfecc5ab3 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -247,6 +247,8 @@ struct ieee802154_ops {
  */
 static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb)
 {
+	__le16 fc;
+
 	/* check if we can fc at skb_mac_header of sk buffer */
 	if (unlikely(!skb_mac_header_was_set(skb) ||
 		     (skb_tail_pointer(skb) - skb_mac_header(skb)) < 2)) {
@@ -254,7 +256,8 @@ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb)
 		return cpu_to_le16(0);
 	}
 
-	return get_unaligned_le16(skb_mac_header(skb));
+	memcpy(&fc, skb_mac_header(skb), IEEE802154_FC_LEN);
+	return fc;
 }
 
 /**
-- 
cgit 


From 048e7f7e66a8693fe1707997bffd19d08cde08f5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aar@pengutronix.de>
Date: Wed, 6 Jul 2016 23:32:30 +0200
Subject: ieee802154: cleanup WARN_ON for fc fetch

This patch cleanups the WARN_ON which occurs when the sk buffer has
insufficient buffer space by moving the WARN_ON into if condition.

Signed-off-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/mac802154.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index bb7bfecc5ab3..286824acd008 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -250,11 +250,10 @@ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb)
 	__le16 fc;
 
 	/* check if we can fc at skb_mac_header of sk buffer */
-	if (unlikely(!skb_mac_header_was_set(skb) ||
-		     (skb_tail_pointer(skb) - skb_mac_header(skb)) < 2)) {
-		WARN_ON(1);
+	if (WARN_ON(!skb_mac_header_was_set(skb) ||
+		    (skb_tail_pointer(skb) -
+		     skb_mac_header(skb)) < IEEE802154_FC_LEN))
 		return cpu_to_le16(0);
-	}
 
 	memcpy(&fc, skb_mac_header(skb), IEEE802154_FC_LEN);
 	return fc;
-- 
cgit 


From 3467f0d433016c45d1851f3587d32816b7b2ffb0 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Thu, 23 Jun 2016 16:57:09 +0200
Subject: ath9k: Allow configuration of LED polarity in platform data.

Some devices running OpenWrt need this and it makes sense to add this
to ath9k_platform_data as the next patches will add a devicetree
(boolean) property for it as well.

Suggested-by: Vittorio Gambaletta <openwrt@vittgam.net>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
---
 include/linux/ath9k_platform.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index e66153d60bd5..76860a461ed2 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -40,6 +40,7 @@ struct ath9k_platform_data {
 	bool tx_gain_buffalo;
 	bool disable_2ghz;
 	bool disable_5ghz;
+	bool led_active_high;
 
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
-- 
cgit 


From d390238c4fba7c87a3bcd859ce3373c864eb7b02 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Wed, 6 Jul 2016 20:03:54 -0400
Subject: net: dsa: initialize the routing table

The routing table of every switch in a tree is currently initialized to
all zeros. This is an issue since 0 is a valid port number.

Add a DSA_RTABLE_NONE=-1 constant to initialize the signed values of the
routing table pointing to other switches.

This fixes the device mapping of the mv88e6xxx driver where the port
pointing to the switch itself and to non-existent switches was wrongly
configured to be 0. It is now set to the expected 0xf value.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 20b3087ad193..52ab18bc2b0d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -32,6 +32,8 @@ enum dsa_tag_protocol {
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
 
+#define DSA_RTABLE_NONE		-1
+
 struct dsa_chip_data {
 	/*
 	 * How to access the switch configuration registers.
-- 
cgit 


From 606274c5abd8e245add01bc7145a8cbb92b69ba8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 6 Jul 2016 22:38:36 -0700
Subject: bpf: introduce bpf_get_current_task() helper

over time there were multiple requests to access different data
structures and fields of task_struct current, so finally add
the helper to access 'current' as-is. Tracing bpf programs will do
the rest of walking the pointers via bpf_probe_read().
Note that current can be null and bpf program has to deal it with,
but even dumb passing null into bpf_probe_read() is still safe.

Suggested-by: Brendan Gregg <brendan.d.gregg@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c14ca1cd6297..262a7e883b19 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -357,6 +357,13 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_get_hash_recalc,
 
+	/**
+	 * u64 bpf_get_current_task(void)
+	 * Returns current task_struct
+	 * Return: current
+	 */
+	BPF_FUNC_get_current_task,
+
 	__BPF_FUNC_MAX_ID,
 };
 
-- 
cgit 


From ca8bee5dde1f02c2dbe8c8453dce27f2dfafb21c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 5 Jul 2016 14:30:14 +0200
Subject: Bluetooth: Rename HCI_BREDR into HCI_PRIMARY

The HCI_BREDR naming is confusing since it actually stands for Primary
Bluetooth Controller. Which is a term that has been used in the latest
standard. However from a legacy point of view there only really have
been Basic Rate (BR) and Enhanced Data Rate (EDR). Recent versions of
Bluetooth introduced Low Energy (LE) and made this terminology a little
bit confused since Dual Mode Controllers include BR/EDR and LE. To
simplify this the name HCI_PRIMARY stands for the Primary Controller
which can be a single mode or dual mode controller.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index eefcf3e96421..a3f86de6f100 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -65,7 +65,7 @@
 #define HCI_I2C		8
 
 /* HCI controller types */
-#define HCI_BREDR	0x00
+#define HCI_PRIMARY	0x00
 #define HCI_AMP		0x01
 
 /* First BR/EDR Controller shall have ID = 0 */
-- 
cgit 


From a65056ecf4b48be0d0284a7b6a57b6dace10b843 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Wed, 6 Jul 2016 12:12:21 -0700
Subject: net: bridge: extend MLD/IGMP query stats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As was suggested this patch adds support for the different versions of MLD
and IGMP query types. Since the user visible structure is still in net-next
we can augment it instead of adding netlink attributes.
The distinction between the different IGMP/MLD query types is done as
suggested in Section 7.1, RFC 3376 [1] and Section 8.1, RFC 3810 [2] based
on query payload size and code for IGMP. Since all IGMP packets go through
multicast_rcv() and it uses ip_mc_check_igmp/ipv6_mc_check_mld we can be
sure that at least the ip/ipv6 header can be directly used.

[1] https://tools.ietf.org/html/rfc3376#section-7
[2] https://tools.ietf.org/html/rfc3810#section-8.1

Suggested-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 8304fe6f0561..c186f64fffca 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -261,14 +261,17 @@ enum {
 
 /* IGMP/MLD statistics */
 struct br_mcast_stats {
-	__u64 igmp_queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v1queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v2queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v3queries[BR_MCAST_DIR_SIZE];
 	__u64 igmp_leaves[BR_MCAST_DIR_SIZE];
 	__u64 igmp_v1reports[BR_MCAST_DIR_SIZE];
 	__u64 igmp_v2reports[BR_MCAST_DIR_SIZE];
 	__u64 igmp_v3reports[BR_MCAST_DIR_SIZE];
 	__u64 igmp_parse_errors;
 
-	__u64 mld_queries[BR_MCAST_DIR_SIZE];
+	__u64 mld_v1queries[BR_MCAST_DIR_SIZE];
+	__u64 mld_v2queries[BR_MCAST_DIR_SIZE];
 	__u64 mld_leaves[BR_MCAST_DIR_SIZE];
 	__u64 mld_v1reports[BR_MCAST_DIR_SIZE];
 	__u64 mld_v2reports[BR_MCAST_DIR_SIZE];
-- 
cgit 


From 1db19db7f5ff4ddd3b1b6dd2092a87298ee5bd0b Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 7 Jul 2016 18:01:32 +0200
Subject: net: tracepoint napi:napi_poll add work and budget

An important information for the napi_poll tracepoint is knowing
the work done (packets processed) by the napi_poll() call. Add
both the work done and budget, as they are related.

Handle trace_napi_poll() param change in dropwatch/drop_monitor
and in python perf script netdev-times.py in backward compat way,
as python fortunately supports optional parameter handling.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/napi.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 8fe1e93f531d..118ed7767639 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -12,22 +12,27 @@
 
 TRACE_EVENT(napi_poll,
 
-	TP_PROTO(struct napi_struct *napi),
+	TP_PROTO(struct napi_struct *napi, int work, int budget),
 
-	TP_ARGS(napi),
+	TP_ARGS(napi, work, budget),
 
 	TP_STRUCT__entry(
 		__field(	struct napi_struct *,	napi)
+		__field(	int,			work)
+		__field(	int,			budget)
 		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
 	),
 
 	TP_fast_assign(
 		__entry->napi = napi;
+		__entry->work = work;
+		__entry->budget = budget;
 		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
 	),
 
-	TP_printk("napi poll on napi struct %p for device %s",
-		__entry->napi, __get_str(dev_name))
+	TP_printk("napi poll on napi struct %p for device %s work %d budget %d",
+		  __entry->napi, __get_str(dev_name),
+		  __entry->work, __entry->budget)
 );
 
 #undef NO_DEV
-- 
cgit 


From 1d984c2e03c1fb21539a9f50627e312788512013 Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@collabora.com>
Date: Fri, 8 Jul 2016 15:52:39 +0200
Subject: NFC: digital: Fix handling of saved PDU sk_buff pointers

This patch fixes the way an I-PDU is saved in case it needs to be sent
again. It is now copied using pskb_copy() and not simply referenced
using skb_get() since it could be modified by the driver.

digital_in_send_saved_skb() and digital_tg_send_saved_skb() still get a
reference on the saved skb which is re-sent but release it if the send
operation fails. That way the caller doesn't have to take care about skb
ref in case of error.

RTOX supervisor PDU must not be saved as this can override a previously
saved I-PDU that should be re-sent later on.

Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/digital.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 506e3f6eabef..f9a4e4771861 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -237,7 +237,6 @@ struct nfc_digital_dev {
 	int nack_count;
 
 	struct sk_buff *saved_skb;
-	unsigned int saved_skb_len;
 
 	u16 target_fsc;
 
-- 
cgit 


From 1a09c56f545c8ff8d338a38c7c40d79f4165a94c Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@collabora.com>
Date: Fri, 8 Jul 2016 15:52:45 +0200
Subject: NFC: digital: Add support for NFC DEP Response Waiting Time

When sending an ATR_REQ, the initiator must wait for the ATR_RES at
least 'RWT(nfcdep,activation) + dRWT(nfcdep)' and no more than
'RWT(nfcdep,activation) + dRWT(nfcdep) + dT(nfcdep,initiator)'. This
gives a timeout value between 1237 ms and 1337 ms. This patch defines
DIGITAL_ATR_RES_RWT to 1337 used for the timeout value of ATR_REQ
command.

For other DEP PDUs, the initiator must wait between 'RWT + dRWT(nfcdep)'
and 'RWT + dRWT(nfcdep) + dT(nfcdep,initiator)' where RWT is given by
the following formula: '(256 * 16 / f(c)) * 2^wt' where wt is the value
of the TO field in the ATR_RES response and is in the range between 0
and 14. This patch declares a mapping table for wt values and gives RWT
max values between 100 ms and 5049 ms.

This patch also defines DIGITAL_ATR_RES_TO_WT, the maximum wt value in
target mode, to 8.

Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/digital.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index f9a4e4771861..74fa7eb94e72 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -226,6 +226,7 @@ struct nfc_digital_dev {
 	u8 curr_rf_tech;
 	u8 curr_nfc_dep_pni;
 	u8 did;
+	u16 dep_rwt;
 
 	u8 local_payload_max;
 	u8 remote_payload_max;
-- 
cgit 


From 64b87639c9cbeb03e26bc65528416c961b1dde96 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Sun, 3 Jul 2016 13:18:43 +0800
Subject: netfilter: conntrack: fix race between nf_conntrack proc read and
 hash resize
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack
hash table via /sys/module/nf_conntrack/parameters/hashsize, race will
happen, because reader can observe a newly allocated hash but the old size
(or vice versa). So oops will happen like follows：

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000017
  IP: [<ffffffffa0418e21>] seq_print_acct+0x11/0x50 [nf_conntrack]
  Call Trace:
  [<ffffffffa0412f4e>] ? ct_seq_show+0x14e/0x340 [nf_conntrack]
  [<ffffffff81261a1c>] seq_read+0x2cc/0x390
  [<ffffffff812a8d62>] proc_reg_read+0x42/0x70
  [<ffffffff8123bee7>] __vfs_read+0x37/0x130
  [<ffffffff81347980>] ? security_file_permission+0xa0/0xc0
  [<ffffffff8123cf75>] vfs_read+0x95/0x140
  [<ffffffff8123e475>] SyS_read+0x55/0xc0
  [<ffffffff817c2572>] entry_SYSCALL_64_fastpath+0x1a/0xa4

It is very easy to reproduce this kernel crash.
1. open one shell and input the following cmds:
  while : ; do
    echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize
  done
2. open more shells and input the following cmds:
  while : ; do
    cat /proc/net/nf_conntrack
  done
3. just wait a monent, oops will happen soon.

The solution in this patch is based on Florian's Commit 5e3c61f98175
("netfilter: conntrack: fix lookup race during hash resize"). And
add a wrapper function nf_conntrack_get_ht to get hash and hsize
suggested by Florian Westphal.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3e2f3328945c..79d7ac5c9740 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_l3proto *l3proto,
 			const struct nf_conntrack_l4proto *l4proto);
 
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
+
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net,
-- 
cgit 


From 242922a027176cd260c5adce4ba6bbfa3a05190c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 3 Jul 2016 20:44:01 +0200
Subject: netfilter: conntrack: simplify early_drop

We don't need to acquire the bucket lock during early drop, we can
use lockless traveral just like ____nf_conntrack_find.

The timer deletion serves as synchronization point, if another cpu
attempts to evict same entry, only one will succeed with timer deletion.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5d3397f34583..2a5133e214c9 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -301,6 +301,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
+#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
 
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
         MODULE_ALIAS("nfct-helper-" helper)
-- 
cgit 


From 7c9664351980aaa6a4b8837a314360b3a4ad382a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 5 Jul 2016 12:07:23 +0200
Subject: netfilter: move nat hlist_head to nf_conn

The nat extension structure is 32bytes in size on x86_64:

struct nf_conn_nat {
        struct hlist_node          bysource;             /*     0    16 */
        struct nf_conn *           ct;                   /*    16     8 */
        union nf_conntrack_nat_help help;                /*    24     4 */
        int                        masq_index;           /*    28     4 */
        /* size: 32, cachelines: 1, members: 4 */
        /* last cacheline: 32 bytes */
};

The hlist is needed to quickly check for possible tuple collisions
when installing a new nat binding. Storing this in the extension
area has two drawbacks:

1. We need ct backpointer to get the conntrack struct from the extension.
2. When reallocation of extension area occurs we need to fixup the bysource
   hash head via hlist_replace_rcu.

We can avoid both by placing the hlist_head in nf_conn and place nf_conn in
the bysource hash rather than the extenstion.

We can also remove the ->move support; no other extension needs it.

Moving the entire nat extension into nf_conn would be possible as well but
then we have to add yet another callback for deletion from the bysource
hash table rather than just using nat extension ->destroy hook for this.

nf_conn size doesn't increase due to aligment, followup patch replaces
hlist_node with single pointer.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h        | 3 +++
 include/net/netfilter/nf_conntrack_extend.h | 3 ---
 include/net/netfilter/nf_nat.h              | 2 --
 3 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2a5133e214c9..e5135d8728b4 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -117,6 +117,9 @@ struct nf_conn {
 	/* Extensions */
 	struct nf_ct_ext *ext;
 
+#if IS_ENABLED(CONFIG_NF_NAT)
+	struct hlist_node	nat_bysource;
+#endif
 	/* Storage reserved for other modules, must be the last member */
 	union nf_conntrack_proto proto;
 };
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index b925395fa5ed..1c3035dda31f 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
 struct nf_ct_ext_type {
 	/* Destroys relationships (can be NULL). */
 	void (*destroy)(struct nf_conn *ct);
-	/* Called when realloacted (can be NULL).
-	   Contents has already been moved. */
-	void (*move)(void *new, void *old);
 
 	enum nf_ct_ext_id id;
 
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 344b1ab19220..02515f7ed4cc 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -29,8 +29,6 @@ struct nf_conn;
 
 /* The structure embedded in the conntrack structure. */
 struct nf_conn_nat {
-	struct hlist_node bysource;
-	struct nf_conn *ct;
 	union nf_conntrack_nat_help help;
 #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
     IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
-- 
cgit 


From 870190a9ec9075205c0fa795a09fa931694a3ff1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 5 Jul 2016 12:07:24 +0200
Subject: netfilter: nat: convert nat bysrc hash to rhashtable

It did use a fixed-size bucket list plus single lock to protect add/del.

Unlike the main conntrack table we only need to add and remove keys.
Convert it to rhashtable to get table autosizing and per-bucket locking.

The maximum number of entries is -- as before -- tied to the number of
conntracks so we do not need another upperlimit.

The change does not handle rhashtable_remove_fast error, only possible
"error" is -ENOENT, and that is something that can happen legitimetely,
e.g. because nat module was inserted at a later time and no src manip
took place yet.

Tested with http-client-benchmark + httpterm with DNAT and SNAT rules
in place.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 3 ++-
 include/net/netfilter/nf_nat.h       | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e5135d8728b4..a08825b7e955 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,6 +17,7 @@
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/atomic.h>
+#include <linux/rhashtable.h>
 
 #include <linux/netfilter/nf_conntrack_tcp.h>
 #include <linux/netfilter/nf_conntrack_dccp.h>
@@ -118,7 +119,7 @@ struct nf_conn {
 	struct nf_ct_ext *ext;
 
 #if IS_ENABLED(CONFIG_NF_NAT)
-	struct hlist_node	nat_bysource;
+	struct rhash_head	nat_bysource;
 #endif
 	/* Storage reserved for other modules, must be the last member */
 	union nf_conntrack_proto proto;
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 02515f7ed4cc..c327a431a6f3 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,5 +1,6 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
+#include <linux/rhashtable.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
-- 
cgit 


From d51ed8367bcbbb06f4f4986d1ef7dc2480bed1ad Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 8 Jul 2016 13:08:50 +0200
Subject: netfilter: constify arg to is_dying/confirmed

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a08825b7e955..1e04911b78ea 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -270,12 +270,12 @@ static inline int nf_ct_is_template(const struct nf_conn *ct)
 }
 
 /* It's confirmed if it is, or has been in the hash table. */
-static inline int nf_ct_is_confirmed(struct nf_conn *ct)
+static inline int nf_ct_is_confirmed(const struct nf_conn *ct)
 {
 	return test_bit(IPS_CONFIRMED_BIT, &ct->status);
 }
 
-static inline int nf_ct_is_dying(struct nf_conn *ct)
+static inline int nf_ct_is_dying(const struct nf_conn *ct)
 {
 	return test_bit(IPS_DYING_BIT, &ct->status);
 }
-- 
cgit 


From 42a55769132fdf4f44bac1471b371d7f80bcde35 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 8 Jul 2016 14:41:49 +0200
Subject: netfilter: nf_tables: get rid of possible_net_t from set and
 basechain

We can pass the netns pointer as parameter to the functions that need to
gain access to it. From basechains, I didn't find any client for this
field anymore so let's remove this too.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 30c1d9489ae2..f2f13399ce44 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -236,7 +236,8 @@ struct nft_expr;
  *	@features: features supported by the implementation
  */
 struct nft_set_ops {
-	bool				(*lookup)(const struct nft_set *set,
+	bool				(*lookup)(const struct net *net,
+						  const struct nft_set *set,
 						  const u32 *key,
 						  const struct nft_set_ext **ext);
 	bool				(*update)(struct nft_set *set,
@@ -248,11 +249,14 @@ struct nft_set_ops {
 						  struct nft_regs *regs,
 						  const struct nft_set_ext **ext);
 
-	int				(*insert)(const struct nft_set *set,
+	int				(*insert)(const struct net *net,
+						  const struct nft_set *set,
 						  const struct nft_set_elem *elem);
-	void				(*activate)(const struct nft_set *set,
+	void				(*activate)(const struct net *net,
+						    const struct nft_set *set,
 						    const struct nft_set_elem *elem);
-	void *				(*deactivate)(const struct nft_set *set,
+	void *				(*deactivate)(const struct net *net,
+						      const struct nft_set *set,
 						      const struct nft_set_elem *elem);
 	void				(*remove)(const struct nft_set *set,
 						  const struct nft_set_elem *elem);
@@ -295,7 +299,6 @@ void nft_unregister_set(struct nft_set_ops *ops);
  *	@udlen: user data length
  *	@udata: user data
  * 	@ops: set ops
- * 	@pnet: network namespace
  * 	@flags: set flags
  *	@genmask: generation mask
  * 	@klen: key length
@@ -318,7 +321,6 @@ struct nft_set {
 	unsigned char			*udata;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
-	possible_net_t			pnet;
 	u16				flags:14,
 					genmask:2;
 	u8				klen;
@@ -804,7 +806,6 @@ struct nft_stats {
  *	struct nft_base_chain - nf_tables base chain
  *
  *	@ops: netfilter hook ops
- *	@pnet: net namespace that this chain belongs to
  *	@type: chain type
  *	@policy: default policy
  *	@stats: per-cpu chain stats
@@ -813,7 +814,6 @@ struct nft_stats {
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops[NFT_HOOK_OPS_MAX];
-	possible_net_t			pnet;
 	const struct nf_chain_type	*type;
 	u8				policy;
 	u8				flags;
@@ -1009,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext,
 	return !(ext->genmask & genmask);
 }
 
-static inline void nft_set_elem_change_active(const struct nft_set *set,
+static inline void nft_set_elem_change_active(const struct net *net,
+					      const struct nft_set *set,
 					      struct nft_set_ext *ext)
 {
-	ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
+	ext->genmask ^= nft_genmask_next(net);
 }
 
 /*
-- 
cgit 


From 28aa4c26fce2202db8d42ae76b639ca1d9a23d25 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 9 Jul 2016 19:47:40 +0800
Subject: sctp: add SCTP_PR_SUPPORTED on sctp sockopt

According to section 4.5 of rfc7496, prsctp_enable should be per asoc.
We will add prsctp_enable to both asoc and ep, and replace the places
where it used net.sctp->prsctp_enable with asoc->prsctp_enable.

ep->prsctp_enable will be initialized with net.sctp->prsctp_enable, and
asoc->prsctp_enable will be initialized with ep->prsctp_enable. We can
also modify it's value through sockopt SCTP_PR_SUPPORTED.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 6 ++++--
 include/uapi/linux/sctp.h  | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 83c5ec58b93a..07115ca9de4d 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1256,7 +1256,8 @@ struct sctp_endpoint {
 	/* SCTP-AUTH: endpoint shared keys */
 	struct list_head endpoint_shared_keys;
 	__u16 active_key_id;
-	__u8  auth_enable;
+	__u8  auth_enable:1,
+	      prsctp_enable:1;
 };
 
 /* Recover the outter endpoint structure. */
@@ -1848,7 +1849,8 @@ struct sctp_association {
 	__u16 active_key_id;
 
 	__u8 need_ecne:1,	/* Need to send an ECNE Chunk? */
-	     temp:1;		/* Is it a temporary association? */
+	     temp:1,		/* Is it a temporary association? */
+	     prsctp_enable:1;
 
 	struct sctp_priv_assoc_stats stats;
 };
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ce70fe6b45df..aa08906f292d 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -112,6 +112,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_SOCKOPT_CONNECTX	110		/* CONNECTX requests. */
 #define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
 #define SCTP_GET_ASSOC_STATS	112	/* Read only */
+#define SCTP_PR_SUPPORTED	113
 
 /* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
 /* On user space Linux, these live in <bits/socket.h> as an enum.  */
-- 
cgit 


From f959fb442c35f4b61fea341401b8463dd0a1b959 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 9 Jul 2016 19:47:41 +0800
Subject: sctp: add SCTP_DEFAULT_PRINFO into sctp sockopt

This patch adds SCTP_DEFAULT_PRINFO to sctp sockopt. It is used
to set/get sctp Partially Reliable Policies' default params,
which includes 3 policies (ttl, rtx, prio) and their values.

Still, if we set policy params in sndinfo, we will use the params
of sndinfo against chunks, instead of the default params.

In this patch, we will use 5-8bit of sp/asoc->default_flags
to store prsctp policies, and reuse asoc->default_timetolive
to store their values. It means if we enable and set prsctp
policy, prior ttl timeout in sctp will not work any more.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index aa08906f292d..984cf2e9a61d 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -113,6 +113,29 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
 #define SCTP_GET_ASSOC_STATS	112	/* Read only */
 #define SCTP_PR_SUPPORTED	113
+#define SCTP_DEFAULT_PRINFO	114
+
+/* PR-SCTP policies */
+#define SCTP_PR_SCTP_NONE	0x0000
+#define SCTP_PR_SCTP_TTL	0x0010
+#define SCTP_PR_SCTP_RTX	0x0020
+#define SCTP_PR_SCTP_PRIO	0x0030
+#define SCTP_PR_SCTP_MAX	SCTP_PR_SCTP_PRIO
+#define SCTP_PR_SCTP_MASK	0x0030
+
+#define __SCTP_PR_INDEX(x)	((x >> 4) - 1)
+#define SCTP_PR_INDEX(x)	__SCTP_PR_INDEX(SCTP_PR_SCTP_ ## x)
+
+#define SCTP_PR_POLICY(x)	((x) & SCTP_PR_SCTP_MASK)
+#define SCTP_PR_SET_POLICY(flags, x)	\
+	do {				\
+		flags &= ~SCTP_PR_SCTP_MASK;	\
+		flags |= x;		\
+	} while (0)
+
+#define SCTP_PR_TTL_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define SCTP_PR_RTX_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define SCTP_PR_PRIO_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO)
 
 /* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
 /* On user space Linux, these live in <bits/socket.h> as an enum.  */
@@ -903,4 +926,10 @@ struct sctp_paddrthlds {
 	__u16 spt_pathpfthld;
 };
 
+struct sctp_default_prinfo {
+	sctp_assoc_t pr_assoc_id;
+	__u32 pr_value;
+	__u16 pr_policy;
+};
+
 #endif /* _UAPI_SCTP_H */
-- 
cgit 


From 826d253d57b11f69add81c8086d2e7f1dce5ec77 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 9 Jul 2016 19:47:42 +0800
Subject: sctp: add SCTP_PR_ASSOC_STATUS on sctp sockopt

This patch adds SCTP_PR_ASSOC_STATUS to sctp sockopt, which is used
to dump the prsctp statistics info from the asoc. The prsctp statistics
includes abandoned_sent/unsent from the asoc. abandoned_sent is the
count of the packets we drop packets from retransmit/transmited queue,
and abandoned_unsent is the count of the packets we drop from out_queue
according to the policy.

Note: another option for prsctp statistics dump described in rfc is
SCTP_PR_STREAM_STATUS, which is used to dump the prsctp statistics
info from each stream. But by now, linux doesn't yet have per stream
statistics info, it needs rfc6525 to be implemented. As the prsctp
statistics for each stream has to be based on per stream statistics,
we will delay it until rfc6525 is done in linux.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  3 +++
 include/uapi/linux/sctp.h  | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 07115ca9de4d..d8e464aacb20 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1853,6 +1853,9 @@ struct sctp_association {
 	     prsctp_enable:1;
 
 	struct sctp_priv_assoc_stats stats;
+
+	__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+	__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
 };
 
 
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 984cf2e9a61d..d304f4c9792c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -114,6 +114,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_GET_ASSOC_STATS	112	/* Read only */
 #define SCTP_PR_SUPPORTED	113
 #define SCTP_DEFAULT_PRINFO	114
+#define SCTP_PR_ASSOC_STATUS	115
 
 /* PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE	0x0000
@@ -926,6 +927,17 @@ struct sctp_paddrthlds {
 	__u16 spt_pathpfthld;
 };
 
+/*
+ * Socket Option for Getting the Association/Stream-Specific PR-SCTP Status
+ */
+struct sctp_prstatus {
+	sctp_assoc_t sprstat_assoc_id;
+	__u16 sprstat_sid;
+	__u16 sprstat_policy;
+	__u64 sprstat_abandoned_unsent;
+	__u64 sprstat_abandoned_sent;
+};
+
 struct sctp_default_prinfo {
 	sctp_assoc_t pr_assoc_id;
 	__u32 pr_value;
-- 
cgit 


From a6c2f792873aff332a4689717c3cd6104f46684c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 9 Jul 2016 19:47:43 +0800
Subject: sctp: implement prsctp TTL policy

prsctp TTL policy is a policy to abandon chunks when they expire
at the specific time in local stack. It's similar with expires_at
in struct sctp_datamsg.

This patch uses sinfo->sinfo_timetolive to set the specific time for
TTL policy. sinfo->sinfo_timetolive is also used for msg->expires_at.
So if prsctp_enable or TTL policy is not enabled, msg->expires_at
still works as before.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index d8e464aacb20..6bcda715008e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -602,6 +602,16 @@ struct sctp_chunk {
 	/* This needs to be recoverable for SCTP_SEND_FAILED events. */
 	struct sctp_sndrcvinfo sinfo;
 
+	/* We use this field to record param for prsctp policies,
+	 * for TTL policy, it is the time_to_drop of this chunk,
+	 * for RTX policy, it is the max_sent_count of this chunk,
+	 * for PRIO policy, it is the priority of this chunk.
+	 */
+	unsigned long prsctp_param;
+
+	/* How many times this chunk have been sent, for prsctp RTX policy */
+	int sent_count;
+
 	/* Which association does this belong to?  */
 	struct sctp_association *asoc;
 
-- 
cgit 


From 8dbdf1f5b09cb22560e7c7173b52fe3c631046bd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 9 Jul 2016 19:47:45 +0800
Subject: sctp: implement prsctp PRIO policy

prsctp PRIO policy is a policy to abandon lower priority chunks when
asoc doesn't have enough snd buffer, so that the current chunk with
higher priority can be queued successfully.

Similar to TTL/RTX policy, we will set the priority of the chunk to
prsctp_param with sinfo->sinfo_timetolive in sctp_set_prsctp_policy().
So if PRIO policy is enabled, msg->expire_at won't work.

asoc->sent_cnt_removable will record how many chunks can be checked to
remove. If priority policy is enabled, when the chunk is queued into
the out_queue, we will increase sent_cnt_removable. When the chunk is
moved to abandon_queue or dequeue and free, we will decrease
sent_cnt_removable.

In sctp_sendmsg, we will check if there is enough snd buffer for current
msg and if sent_cnt_removable is not 0. Then try to abandon chunks in
sctp_prune_prsctp when sendmsg from the retransmit/transmited queue, and
free chunks from out_queue in right order until the abandon+free size >
msg_len - sctp_wfree. For the abandon size, we have to wait until it
sends FORWARD TSN, receives the sack and the chunks are really freed.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6bcda715008e..8626bdd3249a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1084,6 +1084,8 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
 		     sctp_retransmit_reason_t);
 void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
 int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
+void sctp_prsctp_prune(struct sctp_association *asoc,
+		       struct sctp_sndrcvinfo *sinfo, int msg_len);
 /* Uncork and flush an outqueue.  */
 static inline void sctp_outq_cork(struct sctp_outq *q)
 {
@@ -1864,6 +1866,8 @@ struct sctp_association {
 
 	struct sctp_priv_assoc_stats stats;
 
+	int sent_cnt_removable;
+
 	__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
 	__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
 };
-- 
cgit 


From e5224f0fe2acddbc2fa9b419d8867ced7f5381fc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 12 Jul 2016 18:05:03 +0200
Subject: devlink: add hardware messages tracing facility

Define a tracepoint and allow user to trace messages going to and from
hardware associated with devlink instance.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/devlink.h | 68 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 include/trace/events/devlink.h

(limited to 'include')

diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
new file mode 100644
index 000000000000..333c32ac9bfa
--- /dev/null
+++ b/include/trace/events/devlink.h
@@ -0,0 +1,68 @@
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM devlink
+
+#if !defined(_TRACE_DEVLINK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DEVLINK_H
+
+#include <linux/device.h>
+#include <net/devlink.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for devlink hardware message:
+ */
+TRACE_EVENT(devlink_hwmsg,
+	TP_PROTO(const struct devlink *devlink, bool incoming,
+		 unsigned long type, const u8 *buf, size_t len),
+
+	TP_ARGS(devlink, incoming, type, buf, len),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(owner_name, devlink->dev->driver->owner->name)
+		__field(bool, incoming)
+		__field(unsigned long, type)
+		__dynamic_array(u8, buf, len)
+		__field(size_t, len)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(owner_name, devlink->dev->driver->owner->name);
+		__entry->incoming = incoming;
+		__entry->type = type;
+		memcpy(__get_dynamic_array(buf), buf, len);
+		__entry->len = len;
+	),
+
+	TP_printk("bus_name=%s dev_name=%s owner_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(owner_name), __entry->incoming, __entry->type,
+		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
+);
+
+#endif /* _TRACE_DEVLINK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#else /* CONFIG_NET_DEVLINK */
+
+#if !defined(_TRACE_DEVLINK_H)
+#define _TRACE_DEVLINK_H
+
+#include <net/devlink.h>
+
+static inline void trace_devlink_hwmsg(const struct devlink *devlink,
+				       bool incoming, unsigned long type,
+				       const u8 *buf, size_t len)
+{
+}
+
+#endif /* _TRACE_DEVLINK_H */
+
+#endif
-- 
cgit 


From 160b925163c0aabc2c2fbb7d58a75e38b7cd6a17 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@codecoup.pl>
Date: Tue, 12 Jul 2016 02:12:16 +0200
Subject: Bluetooth: Add Authentication Failed reason to Disconnected Mgmt
 event

If link is disconnected due to Authentication Failure (PIN or Key
Missing status) userspace will be notified about this with proper error
code. Many LE profiles define "PIN or Key Missing" status as indication
of remote lost bond so this allows userspace to take action on this.

@ Device Connected: 88:63:DF:88:0E:83 (1) flags 0x0000
        02 01 1a 05 03 0a 18 0d 18 0b 09 48 65 61 72 74  ...........Heart
        20 52 61 74 65                                    Rate
> HCI Event: Command Status (0x0f) plen 4
      LE Read Remote Used Features (0x08|0x0016) ncmd 1
        Status: Success (0x00)
> ACL Data RX: Handle 3585 flags 0x02 dlen 11
      ATT: Read By Group Type Request (0x10) len 6
        Handle range: 0x0001-0xffff
        Attribute group type: Primary Service (0x2800)
> HCI Event: LE Meta Event (0x3e) plen 12
      LE Read Remote Used Features (0x04)
        Status: Success (0x00)
        Handle: 3585
        Features: 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00
          LE Encryption
< HCI Command: LE Start Encryption (0x08|0x0019) plen 28
        Handle: 3585
        Random number: 0x0000000000000000
        Encrypted diversifier: 0x0000
        Long term key: 26201cd479a0921b6f949f0b1fa8dc82
> HCI Event: Command Status (0x0f) plen 4
      LE Start Encryption (0x08|0x0019) ncmd 1
        Status: Success (0x00)
> HCI Event: Encryption Change (0x08) plen 4
        Status: PIN or Key Missing (0x06)
        Handle: 3585
        Encryption: Disabled (0x00)
< HCI Command: Disconnect (0x01|0x0006) plen 3
        Handle: 3585
        Reason: Authentication Failure (0x05)
> HCI Event: Command Status (0x0f) plen 4
      Disconnect (0x01|0x0006) ncmd 1
        Status: Success (0x00)
> HCI Event: Disconnect Complete (0x05) plen 4
        Status: Success (0x00)
        Handle: 3585
        Reason: Connection Terminated By Local Host (0x16)
@ Device Disconnected: 88:63:DF:88:0E:83 (1) reason 4

@ Device Connected: C4:43:8F:A3:4D:83 (0) flags 0x0000
        08 09 4e 65 78 75 73 20 35                       ..Nexus 5
> HCI Event: Command Status (0x0f) plen 4
      Authentication Requested (0x01|0x0011) ncmd 1
        Status: Success (0x00)
> HCI Event: Link Key Request (0x17) plen 6
        Address: C4:43:8F:A3:4D:83 (LG Electronics)
< HCI Command: Link Key Request Reply (0x01|0x000b) plen 22
        Address: C4:43:8F:A3:4D:83 (LG Electronics)
        Link key: 080812e4aa97a863d11826f71f65a933
> HCI Event: Command Complete (0x0e) plen 10
      Link Key Request Reply (0x01|0x000b) ncmd 1
        Status: Success (0x00)
        Address: C4:43:8F:A3:4D:83 (LG Electronics)
> HCI Event: Auth Complete (0x06) plen 3
        Status: PIN or Key Missing (0x06)
        Handle: 75
@ Authentication Failed: C4:43:8F:A3:4D:83 (0) status 0x05
< HCI Command: Disconnect (0x01|0x0006) plen 3
        Handle: 75
        Reason: Remote User Terminated Connection (0x13)
> HCI Event: Command Status (0x0f) plen 4
      Disconnect (0x01|0x0006) ncmd 1
        Status: Success (0x00)
> HCI Event: Disconnect Complete (0x05) plen 4
        Status: Success (0x00)
        Handle: 75
        Reason: Connection Terminated By Local Host (0x16)
@ Device Disconnected: C4:43:8F:A3:4D:83 (0) reason 4

Signed-off-by: Szymon Janc <szymon.janc@codecoup.pl>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h      | 1 +
 include/net/bluetooth/hci_core.h | 1 +
 include/net/bluetooth/mgmt.h     | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index a3f86de6f100..003b25283407 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -445,6 +445,7 @@ enum {
 /* ---- HCI Error Codes ---- */
 #define HCI_ERROR_UNKNOWN_CONN_ID	0x02
 #define HCI_ERROR_AUTH_FAILURE		0x05
+#define HCI_ERROR_PIN_OR_KEY_MISSING	0x06
 #define HCI_ERROR_MEMORY_EXCEEDED	0x07
 #define HCI_ERROR_CONNECTION_TIMEOUT	0x08
 #define HCI_ERROR_REJ_LIMITED_RESOURCES	0x0d
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index dc71473462ac..77d7fe115a0d 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -654,6 +654,7 @@ enum {
 	HCI_CONN_PARAM_REMOVAL_PEND,
 	HCI_CONN_NEW_LINK_KEY,
 	HCI_CONN_SCANNING,
+	HCI_CONN_AUTH_FAILURE,
 };
 
 static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index ea73e0826aa7..7647964b1efa 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -645,6 +645,7 @@ struct mgmt_ev_device_connected {
 #define MGMT_DEV_DISCONN_TIMEOUT	0x01
 #define MGMT_DEV_DISCONN_LOCAL_HOST	0x02
 #define MGMT_DEV_DISCONN_REMOTE		0x03
+#define MGMT_DEV_DISCONN_AUTH_FAILURE	0x04
 
 #define MGMT_EV_DEVICE_DISCONNECTED	0x000C
 struct mgmt_ev_device_disconnected {
-- 
cgit 


From 9e238323799fb8c2add2b1de9a22edd4d4e51e30 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 13 Jul 2016 15:08:55 -0300
Subject: sctp: allow others to use sctp_input_cb

We process input path in other files too and having access to it is
nice, so move it to a header where it's shared.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 8626bdd3249a..966c3a40039c 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -59,6 +59,7 @@
 #include <linux/workqueue.h>	/* We need tq_struct.	 */
 #include <linux/sctp.h>		/* We need sctp* header structs.  */
 #include <net/sctp/auth.h>	/* We need auth specific structs */
+#include <net/ip.h>		/* For inet_skb_parm */
 
 /* A convenience structure for handling sockaddr structures.
  * We should wean ourselves off this.
@@ -1092,6 +1093,20 @@ static inline void sctp_outq_cork(struct sctp_outq *q)
 	q->cork = 1;
 }
 
+/* SCTP skb control block.
+ * sctp_input_cb is currently used on rx and sock rx queue
+ */
+struct sctp_input_cb {
+	union {
+		struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;
+	struct sctp_chunk *chunk;
+};
+#define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
+
 /* These bind address data fields common between endpoints and associations */
 struct sctp_bind_addr {
 
-- 
cgit 


From f5d258e60722142e88cb6f0f337d78bca67cf973 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 13 Jul 2016 15:08:56 -0300
Subject: sctp: reorder sctp_ulpevent and shrink msg_flags

The next patch needs 8 bytes in there. sctp_ulpevent has a hole due to
bad alignment; msg_flags is using 4 bytes while it actually uses only 2, so
we shrink it, and iif member (4 bytes) which can be easily fetched from
another place once the next patch is there, so we remove it and thus
creating space for 8 bytes.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/ulpevent.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index cccdcfd14973..aa342645dbce 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -48,15 +48,15 @@
  */
 struct sctp_ulpevent {
 	struct sctp_association *asoc;
-	__u16 stream;
-	__u16 ssn;
-	__u16 flags;
+	unsigned int rmem_len;
 	__u32 ppid;
 	__u32 tsn;
 	__u32 cumtsn;
-	int msg_flags;
 	int iif;
-	unsigned int rmem_len;
+	__u16 stream;
+	__u16 ssn;
+	__u16 flags;
+	__u16 msg_flags;
 };
 
 /* Retrieve the skb this event sits inside of. */
-- 
cgit 


From 1f45f78f8e511203f03138f2ccde3d2cf90d2cbf Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 13 Jul 2016 15:08:57 -0300
Subject: sctp: allow GSO frags to access the chunk too

SCTP will try to access original IP headers on sctp_recvmsg in order to
copy the addresses used. There are also other places that do similar access
to IP or even SCTP headers. But after 90017accff61 ("sctp: Add GSO
support") they aren't always there because they are only present in the
header skb.

SCTP handles the queueing of incoming data by cloning the incoming skb
and limiting to only the relevant payload. This clone has its cb updated
to something different and it's then queued on socket rx queue. Thus we
need to fix this in two moments.

For rx path, not related to socket queue yet, this patch uses a
partially copied sctp_input_cb to such GSO frags. This restores the
ability to access the headers for this part of the code.

Regarding the socket rx queue, it removes iif member from sctp_event and
also add a chunk pointer on it.

With these changes we're always able to reach the headers again.

The biggest change here is that now the sctp_chunk struct and the
original skb are only freed after the application consumed the buffer.
Note however that the original payload was already like this due to the
skb cloning.

For iif, SCTP's IPv4 code doesn't use it, so no change is necessary.
IPv6 now can fetch it directly from original's IPv6 CB as the original
skb is still accessible.

In the future we probably can simplify sctp_v*_skb_iif() stuff, as
sctp_v4_skb_iif() was called but it's return value not used, and now
it's not even called, but such cleanup is out of scope for this change.

Fixes: 90017accff61 ("sctp: Add GSO support")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h  | 7 +++++++
 include/net/sctp/ulpevent.h | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 966c3a40039c..f6f201de6fa4 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1107,6 +1107,13 @@ struct sctp_input_cb {
 };
 #define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
 
+static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb)
+{
+	const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
+
+	return chunk->head_skb ? : skb;
+}
+
 /* These bind address data fields common between endpoints and associations */
 struct sctp_bind_addr {
 
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index aa342645dbce..2c098cd7e7e2 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -48,11 +48,11 @@
  */
 struct sctp_ulpevent {
 	struct sctp_association *asoc;
+	struct sctp_chunk *chunk;
 	unsigned int rmem_len;
 	__u32 ppid;
 	__u32 tsn;
 	__u32 cumtsn;
-	int iif;
 	__u16 stream;
 	__u16 ssn;
 	__u16 flags;
-- 
cgit 


From e7487c86dc5c4a528a7dbd9dc14f453a0de61a84 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 13 Jul 2016 15:08:58 -0300
Subject: sctp: avoid identifying address family many times for a chunk

Identifying address family operations during rx path is not something
expensive but it's ugly to the eye to have it done multiple times,
specially when we already validated it during initial rx processing.

This patch takes advantage of the now shared sctp_input_cb and make the
pointer to the operations readily available.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f6f201de6fa4..ce93c4b10d26 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1104,6 +1104,7 @@ struct sctp_input_cb {
 #endif
 	} header;
 	struct sctp_chunk *chunk;
+	struct sctp_af *af;
 };
 #define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
 
-- 
cgit 


From 29cc6679076a00a6ce193004dcf2d14ae7c428a5 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amir@vadai.me>
Date: Thu, 14 Jul 2016 10:32:37 +0300
Subject: net/mlx5: Store counters in rbtree instead of list

In order to use bulk counters, we need to have counters sorted by id.

Signed-off-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 81e8396574f4..a041b99fceac 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -469,7 +469,7 @@ struct mlx5_irq_info {
 };
 
 struct mlx5_fc_stats {
-	struct list_head list;
+	struct rb_root counters;
 	struct list_head addlist;
 	/* protect addlist add/splice operations */
 	spinlock_t addlist_lock;
-- 
cgit 


From a351a1b03bf169f77891060be30036ef71cbe618 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amir@vadai.me>
Date: Thu, 14 Jul 2016 10:32:38 +0300
Subject: net/mlx5: Introduce bulk reading of flow counters

This commit utilize the ability of ConnectX-4 to bulk read flow counters.
Few bulk counter queries could be done instead of issuing thousands of
firmware commands per second to get statistics of all flows set to HW,
such as those programmed when we offload tc filters.

Counters are stored sorted by hardware id, and queried in blocks (id +
number of counters).

Due to hardware requirement, start of block and number of counters in a
block must be four aligned.

Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Amir Vadai <amir@vadai.me>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 152421cc6f44..d671e4e8e7db 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -893,7 +893,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_330[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_at_340[0x20];
+	u8         reserved_at_340[0x8];
+	u8         log_max_flow_counter_bulk[0x8];
+	u8         max_flow_counter[0x10];
+
 
 	u8         reserved_at_360[0x3];
 	u8         log_max_rq[0x5];
@@ -980,7 +983,8 @@ struct mlx5_ifc_dest_format_struct_bits {
 };
 
 struct mlx5_ifc_flow_counter_list_bits {
-	u8         reserved_at_0[0x10];
+	u8         clear[0x1];
+	u8         num_of_counters[0xf];
 	u8         flow_counter_id[0x10];
 
 	u8         reserved_at_20[0x20];
-- 
cgit 


From 8438884d4ab423161b974854ebb90c08219dd678 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 14 Jul 2016 10:32:43 +0300
Subject: net/switchdev: Export the same parent ID service function

This helper serves to know if two switchdev port netdevices belong to the
same HW ASIC, e.g to figure out if forwarding offload is possible between them.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 985619a59323..9023e3e3be0b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
 
+bool switchdev_port_same_parent_id(struct net_device *a,
+				   struct net_device *b);
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
 {
 }
 
+static inline bool switchdev_port_same_parent_id(struct net_device *a,
+						 struct net_device *b)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
-- 
cgit 


From 0e1824c98a0ffd7fd9ffb2a3da01ec49ff1348a2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 14 Jul 2016 11:37:28 +0200
Subject: tracing: change owner name to driver name for devlink hwmsg
 tracepoint

Turned on that driver->owner which is struct module is not available when
modules are disabled. Better to depend on a driver name which is
always available.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Fixes: e5224f0fe2 ("devlink: add hardware messages tracing facility")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/devlink.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 333c32ac9bfa..77dce71df42a 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -22,7 +22,7 @@ TRACE_EVENT(devlink_hwmsg,
 	TP_STRUCT__entry(
 		__string(bus_name, devlink->dev->bus->name)
 		__string(dev_name, dev_name(devlink->dev))
-		__string(owner_name, devlink->dev->driver->owner->name)
+		__string(driver_name, devlink->dev->driver->name)
 		__field(bool, incoming)
 		__field(unsigned long, type)
 		__dynamic_array(u8, buf, len)
@@ -32,16 +32,16 @@ TRACE_EVENT(devlink_hwmsg,
 	TP_fast_assign(
 		__assign_str(bus_name, devlink->dev->bus->name);
 		__assign_str(dev_name, dev_name(devlink->dev));
-		__assign_str(owner_name, devlink->dev->driver->owner->name);
+		__assign_str(driver_name, devlink->dev->driver->name);
 		__entry->incoming = incoming;
 		__entry->type = type;
 		memcpy(__get_dynamic_array(buf), buf, len);
 		__entry->len = len;
 	),
 
-	TP_printk("bus_name=%s dev_name=%s owner_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu",
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu",
 		  __get_str(bus_name), __get_str(dev_name),
-		  __get_str(owner_name), __entry->incoming, __entry->type,
+		  __get_str(driver_name), __entry->incoming, __entry->type,
 		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
 );
 
-- 
cgit 


From caeccd5180930eb8586771bb1935f4f2e456a8e8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Jul 2016 11:37:29 +0200
Subject: devlink: fix trace format string

Including devlink.h on ARM and probably other 32-bit architectures results in
a harmless warning:

In file included from ../include/trace/define_trace.h:95:0,
                 from ../include/trace/events/devlink.h:51,
                 from ../net/core/devlink.c:30:
include/trace/events/devlink.h: In function 'trace_raw_output_devlink_hwmsg':
include/trace/events/devlink.h:42:12: error: format '%lu' expects argument of type 'long unsigned int', but argument 10 has type 'size_t {aka unsigned int}' [-Werror=format=]

The correct format string for 'size_t' is %zu, not %lu, this works on all
architectures.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: e5224f0fe2ac ("devlink: add hardware messages tracing facility")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/devlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 77dce71df42a..09f1df228f2c 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -39,7 +39,7 @@ TRACE_EVENT(devlink_hwmsg,
 		__entry->len = len;
 	),
 
-	TP_printk("bus_name=%s dev_name=%s driver_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu",
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%zu",
 		  __get_str(bus_name), __get_str(dev_name),
 		  __get_str(driver_name), __entry->incoming, __entry->type,
 		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
-- 
cgit 


From 77501a79cec40eac65c59ee7af3f786c703ead9c Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 14 Jul 2016 16:29:43 +0200
Subject: net: phy: micrel: Add KSZ8041FTL fiber mode support

We can't detect the FXEN (fiber mode) bootstrap pin, so configure
it via a boolean device tree property "micrel,fiber-mode".
If it is enabled, auto-negotiation is not supported.
The only available modes are 100base-fx (full duplex and half duplex).

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 2e5b194b9b19..257173e0095e 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,6 +37,7 @@
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
+#define MICREL_PHY_FXEN		0x00000002
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC
-- 
cgit 


From 7e3f977edd0bd9ea6104156feba95bb5ae9bdd38 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 14 Jul 2016 18:08:03 +0200
Subject: perf, events: add non-linear data support for raw records

This patch adds support for non-linear data on raw records. It
extends raw records to have one or multiple fragments that will
be written linearly into the ring slot, where each fragment can
optionally have a custom callback handler to walk and extract
complex, possibly non-linear data.

If a callback handler is provided for a fragment, then the new
__output_custom() will be used instead of __output_copy() for
the perf_output_sample() part. perf_prepare_sample() does all
the size calculation only once, so perf_output_sample() doesn't
need to redo the same work anymore, meaning real_size and padding
will be cached in the raw record. The raw record becomes 32 bytes
in size without holes; to not increase it further and to avoid
doing unnecessary recalculations in fast-path, we can reuse
next pointer of the last fragment, idea here is borrowed from
ZERO_OR_NULL_PTR(), which should keep the perf_output_sample()
path for PERF_SAMPLE_RAW minimal.

This facility is needed for BPF's event output helper as a first
user that will, in a follow-up, add an additional perf_raw_frag
to its perf_raw_record in order to be able to more efficiently
dump skb context after a linear head meta data related to it.
skbs can be non-linear and thus need a custom output function to
dump buffers. Currently, the skb data needs to be copied twice;
with the help of __output_custom() this work only needs to be
done once. Future users could be things like XDP/BPF programs
that work on different context though and would thus also have
a different callback function.

The few users of raw records are adapted to initialize their frag
data from the raw record itself, no change in behavior for them.
The code is based upon a PoC diff provided by Peter Zijlstra [1].

  [1] http://thread.gmane.org/gmane.linux.network/421294

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/perf_event.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1a827cecd62f..e79e6c6fed89 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx {
 	bool			    contexts_maxed;
 };
 
+typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
+				     unsigned long len);
+
+struct perf_raw_frag {
+	union {
+		struct perf_raw_frag	*next;
+		unsigned long		pad;
+	};
+	perf_copy_f			copy;
+	void				*data;
+	u32				size;
+} __packed;
+
 struct perf_raw_record {
+	struct perf_raw_frag		frag;
 	u32				size;
-	void				*data;
 };
 
 /*
@@ -1283,6 +1296,11 @@ extern void perf_restore_debug_store(void);
 static inline void perf_restore_debug_store(void)			{ }
 #endif
 
+static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
+{
+	return frag->pad < sizeof(u64);
+}
+
 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 
 /*
-- 
cgit 


From 555c8a8623a3a87b3c990ba30b7fd2e5914e41d2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 14 Jul 2016 18:08:05 +0200
Subject: bpf: avoid stack copy and use skb ctx for event output

This work addresses a couple of issues bpf_skb_event_output()
helper currently has: i) We need two copies instead of just a
single one for the skb data when it should be part of a sample.
The data can be non-linear and thus needs to be extracted via
bpf_skb_load_bytes() helper first, and then copied once again
into the ring buffer slot. ii) Since bpf_skb_load_bytes()
currently needs to be used first, the helper needs to see a
constant size on the passed stack buffer to make sure BPF
verifier can do sanity checks on it during verification time.
Thus, just passing skb->len (or any other non-constant value)
wouldn't work, but changing bpf_skb_load_bytes() is also not
the proper solution, since the two copies are generally still
needed. iii) bpf_skb_load_bytes() is just for rather small
buffers like headers, since they need to sit on the limited
BPF stack anyway. Instead of working around in bpf_skb_load_bytes(),
this work improves the bpf_skb_event_output() helper to address
all 3 at once.

We can make use of the passed in skb context that we have in
the helper anyway, and use some of the reserved flag bits as
a length argument. The helper will use the new __output_custom()
facility from perf side with bpf_skb_copy() as callback helper
to walk and extract the data. It will pass the data for setup
to bpf_event_output(), which generates and pushes the raw record
with an additional frag part. The linear data used in the first
frag of the record serves as programmatically defined meta data
passed along with the appended sample.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      | 7 ++++++-
 include/uapi/linux/bpf.h | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b3336b4f5d04..c13e92b00bf5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
-const struct bpf_func_proto *bpf_get_event_output_proto(void);
+
+typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
+					unsigned long len);
+
+u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
 
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 262a7e883b19..c4d922439d20 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -401,6 +401,8 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK		0xffffffffULL
 #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
-- 
cgit 


From 02a198777e646a12a8aabae5639f1d33d81d79ef Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 15 Jul 2016 23:55:20 +0200
Subject: net: fixup for tracepoint napi:napi_poll

The recent change to tracepoint napi:napi_poll changed the order of
the parameters that perf scripts sees, the printk was correct.  The
problem was that the new parameters (work and budget) were pushed
in front of dev_name.

The new parameters obviously need to be appended to keep backward
compatible.

Fixes: 1db19db7f5ff ("net: tracepoint napi:napi_poll add work and budget")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/napi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 118ed7767639..0b9e5136a2a3 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -18,16 +18,16 @@ TRACE_EVENT(napi_poll,
 
 	TP_STRUCT__entry(
 		__field(	struct napi_struct *,	napi)
+		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
 		__field(	int,			work)
 		__field(	int,			budget)
-		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
 	),
 
 	TP_fast_assign(
 		__entry->napi = napi;
+		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
 		__entry->work = work;
 		__entry->budget = budget;
-		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
 	),
 
 	TP_printk("napi poll on napi struct %p for device %s work %d budget %d",
-- 
cgit 


From 43b9e127406079d187794a5140a2411fbc6df2df Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Thu, 14 Jul 2016 19:28:27 +0300
Subject: net: ipmr/ip6mr: add support for keeping an entry age

In preparation for hardware offloading of ipmr/ip6mr we need an
interface that allows to check (and later update) the age of entries.
Relying on stats alone can show activity but not actual age of the entry,
furthermore when there're tens of thousands of entries a lot of the
hardware implementations only support "hit" bits which are cleared on
read to denote that the entry was active and shouldn't be aged out,
these can then be naturally translated into age timestamp and will be
compatible with the software forwarding age. Using a lastuse entry doesn't
affect performance because the members in that cache line are written to
along with the age.
Since all new users are encouraged to use ipmr via netlink, this is
exported via the RTA_EXPIRES attribute.
Also do a minor local variable declaration style adjustment - arrange them
longest to shortest.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
CC: Shrijeet Mukherjee <shm@cumulusnetworks.com>
CC: Satish Ashok <sashok@cumulusnetworks.com>
CC: Donald Sharp <sharpd@cumulusnetworks.com>
CC: David S. Miller <davem@davemloft.net>
CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
CC: James Morris <jmorris@namei.org>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h  | 1 +
 include/linux/mroute6.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index bf9b322cb0b0..d351fd3e1049 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -104,6 +104,7 @@ struct mfc_cache {
 			unsigned long bytes;
 			unsigned long pkt;
 			unsigned long wrong_if;
+			unsigned long lastuse;
 			unsigned char ttls[MAXVIFS];	/* TTL thresholds		*/
 		} res;
 	} mfc_un;
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 66982e764051..3987b64040c5 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -92,6 +92,7 @@ struct mfc6_cache {
 			unsigned long bytes;
 			unsigned long pkt;
 			unsigned long wrong_if;
+			unsigned long lastuse;
 			unsigned char ttls[MAXMIFS];	/* TTL thresholds		*/
 		} res;
 	} mfc_un;
-- 
cgit 


From b786241253041c13d94309ca4dace301833f63d1 Mon Sep 17 00:00:00 2001
From: Dongpo Li <lidongpo@hisilicon.com>
Date: Fri, 15 Jul 2016 16:26:34 +0800
Subject: of_mdio: Abstract a general interface for phy connect

Abstract a general interface "of_phy_get_and_connect"
for PHY connect. User will have no bother with getting
"phy-mode" and "phy-handle" any more.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dongpo Li <lidongpo@hisilicon.com>
Reviewed-by: Jiancheng Xue <xuejiancheng@hisilicon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 4b04587d0441..2ab233661ae5 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -19,6 +19,9 @@ extern struct phy_device *of_phy_connect(struct net_device *dev,
 					 struct device_node *phy_np,
 					 void (*hndlr)(struct net_device *),
 					 u32 flags, phy_interface_t iface);
+extern struct phy_device *
+of_phy_get_and_connect(struct net_device *dev, struct device_node *np,
+		       void (*hndlr)(struct net_device *));
 struct phy_device *of_phy_attach(struct net_device *dev,
 				 struct device_node *phy_np, u32 flags,
 				 phy_interface_t iface);
@@ -52,6 +55,13 @@ static inline struct phy_device *of_phy_connect(struct net_device *dev,
 	return NULL;
 }
 
+static inline struct phy_device *
+of_phy_get_and_connect(struct net_device *dev, struct device_node *np,
+		       void (*hndlr)(struct net_device *))
+{
+	return NULL;
+}
+
 static inline struct phy_device *of_phy_attach(struct net_device *dev,
 					       struct device_node *phy_np,
 					       u32 flags, phy_interface_t iface)
-- 
cgit 


From 4360fa22ad5b48a1d1e10e31ffb383ed8c977435 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 6 Jun 2016 11:02:03 +0200
Subject: drivers: misc: ti-st: Use int instead of fuzzy char for callback
 status

On mips and parisc:

    drivers/bluetooth/btwilink.c: In function 'ti_st_open':
    drivers/bluetooth/btwilink.c:174:21: warning: overflow in implicit constant conversion [-Woverflow]
       hst->reg_status = -EINPROGRESS;

    drivers/nfc/nfcwilink.c: In function 'nfcwilink_open':
    drivers/nfc/nfcwilink.c:396:31: warning: overflow in implicit constant conversion [-Woverflow]
      drv->st_register_cb_status = -EINPROGRESS;

There are actually two issues:
  1. Whether "char" is signed or unsigned depends on the architecture.
     As the completion callback data is used to pass a (negative) error
     code, it should always be signed.
  2. EINPROGRESS is 150 on mips, 245 on parisc.
     Hence -EINPROGRESS doesn't fit in a signed 8-bit number.

Change the callback status from "char" to "int" to fix these.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ti_wilink_st.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 0a0d56834c8e..f2293028ab9d 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -71,7 +71,7 @@ struct st_proto_s {
 	enum proto_type type;
 	long (*recv) (void *, struct sk_buff *);
 	unsigned char (*match_packet) (const unsigned char *data);
-	void (*reg_complete_cb) (void *, char data);
+	void (*reg_complete_cb) (void *, int data);
 	long (*write) (struct sk_buff *skb);
 	void *priv_data;
 
-- 
cgit 


From f962fe32f2f85769cd835ddcecbff8c1d34cf561 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 17 Jul 2016 19:55:15 +0200
Subject: Bluetooth: Move hci_recv_frame and hci_recv_diag prototypes

The protoypes for hci_recv_frame and hci_recv_diag are in the wrong
location in the header file. Move them close to all the other hci_dev
related exported functions.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 77d7fe115a0d..84d0273d826a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1022,6 +1022,8 @@ void hci_unregister_dev(struct hci_dev *hdev);
 int hci_suspend_dev(struct hci_dev *hdev);
 int hci_resume_dev(struct hci_dev *hdev);
 int hci_reset_dev(struct hci_dev *hdev);
+int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
+int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
 int hci_dev_open(__u16 dev);
 int hci_dev_close(__u16 dev);
 int hci_dev_do_close(struct hci_dev *hdev);
@@ -1098,9 +1100,6 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance);
 
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
-int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
-int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
-
 void hci_init_sysfs(struct hci_dev *hdev);
 void hci_conn_init_sysfs(struct hci_conn *conn);
 void hci_conn_add_sysfs(struct hci_conn *conn);
-- 
cgit 


From 5177a83827cd0b8cf6ce0391b00dd4417352d2f1 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 17 Jul 2016 19:55:16 +0200
Subject: Bluetooth: Add debugfs fields for hardware and firmware info

Some Bluetooth controllers allow for reading hardware and firmware
related vendor specific infos. If they are available, then they can be
exposed via debugfs now.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 84d0273d826a..ee7fc47680a1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -372,6 +372,8 @@ struct hci_dev {
 
 	atomic_t		promisc;
 
+	const char		*hw_info;
+	const char		*fw_info;
 	struct dentry		*debugfs;
 
 	struct device		dev;
@@ -1024,6 +1026,8 @@ int hci_resume_dev(struct hci_dev *hdev);
 int hci_reset_dev(struct hci_dev *hdev);
 int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
 int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
+void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...);
+void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...);
 int hci_dev_open(__u16 dev);
 int hci_dev_close(__u16 dev);
 int hci_dev_do_close(struct hci_dev *hdev);
-- 
cgit 


From f4dc77713f8016d2e8a3295e1c9c53a21f296def Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 14 Jul 2016 17:51:26 +0200
Subject: netfilter: x_tables: speed up jump target validation

The dummy ruleset I used to test the original validation change was broken,
most rules were unreachable and were not tested by mark_source_chains().

In some cases rulesets that used to load in a few seconds now require
several minutes.

sample ruleset that shows the behaviour:

echo "*filter"
for i in $(seq 0 100000);do
        printf ":chain_%06x - [0:0]\n" $i
done
for i in $(seq 0 100000);do
   printf -- "-A INPUT -j chain_%06x\n" $i
   printf -- "-A INPUT -j chain_%06x\n" $i
   printf -- "-A INPUT -j chain_%06x\n" $i
done
echo COMMIT

[ pipe result into iptables-restore ]

This ruleset will be about 74mbyte in size, with ~500k searches
though all 500k[1] rule entries. iptables-restore will take forever
(gave up after 10 minutes)

Instead of always searching the entire blob for a match, fill an
array with the start offsets of every single ipt_entry struct,
then do a binary search to check if the jump target is present or not.

After this change ruleset restore times get again close to what one
gets when reverting 36472341017529e (~3 seconds on my workstation).

[1] every user-defined rule gets an implicit RETURN, so we get
300k jumps + 100k userchains + 100k returns -> 500k rule entries

Fixes: 36472341017529e ("netfilter: x_tables: validate targets of jumps")
Reported-by: Jeff Wu <wujiafu@gmail.com>
Tested-by: Jeff Wu <wujiafu@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index e94e81ab2b58..2ad1a2b289b5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -250,6 +250,10 @@ int xt_check_entry_offsets(const void *base, const char *elems,
 			   unsigned int target_offset,
 			   unsigned int next_offset);
 
+unsigned int *xt_alloc_entry_offsets(unsigned int size);
+bool xt_find_jump_offset(const unsigned int *offsets,
+			 unsigned int target, unsigned int size);
+
 int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
 		   bool inv_proto);
 int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
-- 
cgit 


From cc2d1de06f0572a51437d1f31633d81afea5eb47 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 8 Jul 2016 17:14:18 +0200
Subject: bcma: define ChipCommon B MII registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't have access to datasheets to document all the bits but we can
name these registers at least.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bcma/bcma_driver_chipcommon.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index a5ac2cad5cb7..b20e3d56253f 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -504,6 +504,9 @@
 #define BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_MASK	0x1ff00000
 #define BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_SHIFT	20
 
+#define BCMA_CCB_MII_MNG_CTL		0x0000
+#define BCMA_CCB_MII_MNG_CMD_DATA	0x0004
+
 /* BCM4331 ChipControl numbers. */
 #define BCMA_CHIPCTL_4331_BT_COEXIST		BIT(0)	/* 0 disable */
 #define BCMA_CHIPCTL_4331_SECI			BIT(1)	/* 0 SECI is disabled (JATG functional) */
-- 
cgit 


From 359ebda25aa06fe3a1d028f7e338a849165e661b Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Mon, 18 Jul 2016 14:49:33 +0300
Subject: net/ipv4: Introduce IPSKB_FRAG_SEGS bit to inet_skb_parm.flags

This flag indicates whether fragmentation of segments is allowed.

Formerly this policy was hardcoded according to IPSKB_FORWARDED (set by
either ip_forward or ipmr_forward).

Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 08f36cd2b874..9742b92dc933 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -47,6 +47,7 @@ struct inet_skb_parm {
 #define IPSKB_REROUTED		BIT(4)
 #define IPSKB_DOREDIRECT	BIT(5)
 #define IPSKB_FRAG_PMTU		BIT(6)
+#define IPSKB_FRAG_SEGS		BIT(7)
 
 	u16			frag_max_size;
 };
-- 
cgit 


From 34a79f63bbe49c888f95e75dd759685a238556b6 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Mon, 18 Jul 2016 20:45:38 -0400
Subject: net: dsa: support switchdev ageing time attr

Add a new function for DSA drivers to handle the switchdev
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME attribute.

The ageing time is passed as milliseconds.

Also because we can have multiple logical bridges on top of a physical
switch and ageing time are switch-wide, call the driver function with
the fastest ageing time in use on the chip instead of the requested one.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 52ab18bc2b0d..2217a3f817f8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -141,6 +141,7 @@ struct dsa_switch_tree {
 struct dsa_port {
 	struct net_device	*netdev;
 	struct device_node	*dn;
+	unsigned int		ageing_time;
 };
 
 struct dsa_switch {
@@ -329,6 +330,7 @@ struct dsa_switch_driver {
 	/*
 	 * Bridge integration
 	 */
+	int	(*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs);
 	int	(*port_bridge_join)(struct dsa_switch *ds, int port,
 				    struct net_device *bridge);
 	void	(*port_bridge_leave)(struct dsa_switch *ds, int port);
-- 
cgit 


From 2d283bdd079c0ad4da020bbc9e9c2a4280823098 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Tue, 19 Jul 2016 11:54:16 +1000
Subject: net/ncsi: Resource management

NCSI spec (DSP0222) defines several objects: package, channel, mode,
filter, version and statistics etc. This introduces the data structs
to represent those objects and implement functions to manage them.
Also, this introduces CONFIG_NET_NCSI for the newly implemented NCSI
stack.

   * The user (e.g. netdev driver) dereference NCSI device by
     "struct ncsi_dev", which is embedded to "struct ncsi_dev_priv".
     The later one is used by NCSI stack internally.
   * Every NCSI device can have multiple packages simultaneously, up
     to 8 packages. It's represented by "struct ncsi_package" and
     identified by 3-bits ID.
   * Every NCSI package can have multiple channels, up to 32. It's
     represented by "struct ncsi_channel" and identified by 5-bits ID.
   * Every NCSI channel has version, statistics, various modes and
     filters. They are represented by "struct ncsi_channel_version",
     "struct ncsi_channel_stats", "struct ncsi_channel_mode" and
     "struct ncsi_channel_filter" separately.
   * Apart from AEN (Asynchronous Event Notification), the NCSI stack
     works in terms of command and response. This introduces "struct
     ncsi_req" to represent a complete NCSI transaction made of NCSI
     request and response.

link: https://www.dmtf.org/sites/default/files/standards/documents/DSP0222_1.1.0.pdf
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ncsi.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 include/net/ncsi.h

(limited to 'include')

diff --git a/include/net/ncsi.h b/include/net/ncsi.h
new file mode 100644
index 000000000000..70d14ee1ef84
--- /dev/null
+++ b/include/net/ncsi.h
@@ -0,0 +1,46 @@
+#ifndef __NET_NCSI_H
+#define __NET_NCSI_H
+
+/*
+ * The NCSI device states seen from external. More NCSI device states are
+ * only visible internally (in net/ncsi/internal.h). When the NCSI device
+ * is registered, it's in ncsi_dev_state_registered state. The state
+ * ncsi_dev_state_start is used to drive to choose active package and
+ * channel. After that, its state is changed to ncsi_dev_state_functional.
+ *
+ * The state ncsi_dev_state_stop helps to shut down the currently active
+ * package and channel while ncsi_dev_state_config helps to reconfigure
+ * them.
+ */
+enum {
+	ncsi_dev_state_registered	= 0x0000,
+	ncsi_dev_state_functional	= 0x0100,
+	ncsi_dev_state_probe		= 0x0200,
+	ncsi_dev_state_config		= 0x0300,
+	ncsi_dev_state_suspend		= 0x0400,
+};
+
+struct ncsi_dev {
+	int               state;
+	int		  link_up;
+	struct net_device *dev;
+	void		  (*handler)(struct ncsi_dev *ndev);
+};
+
+#ifdef CONFIG_NET_NCSI
+struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
+				   void (*notifier)(struct ncsi_dev *nd));
+void ncsi_unregister_dev(struct ncsi_dev *nd);
+#else /* !CONFIG_NET_NCSI */
+static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
+					void (*notifier)(struct ncsi_dev *nd))
+{
+	return NULL;
+}
+
+static inline void ncsi_unregister_dev(struct ncsi_dev *nd)
+{
+}
+#endif /* CONFIG_NET_NCSI */
+
+#endif /* __NET_NCSI_H */
-- 
cgit 


From 6389eaa7fa9c3ee6c7d39f6087b86660d17236ac Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Tue, 19 Jul 2016 11:54:17 +1000
Subject: net/ncsi: NCSI command packet handler

The NCSI command packets are sent from MC (Management Controller)
to remote end. They are used for multiple purposes: probe existing
NCSI package/channel, retrieve NCSI channel's capability, configure
NCSI channel etc.

This defines struct to represent NCSI command packets and introduces
function ncsi_xmit_cmd(), which will be used to transmit NCSI command
packet according to the request. The request is represented by struct
ncsi_cmd_arg.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index cec849a239f6..117d02e0fc31 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -87,6 +87,7 @@
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
 #define ETH_P_MVRP	0x88F5          /* 802.1Q MVRP                  */
 #define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
+#define ETH_P_NCSI	0x88F8		/* NCSI protocol		*/
 #define ETH_P_PRP	0x88FB		/* IEC 62439-3 PRP/HSRv0	*/
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
 #define ETH_P_TDLS	0x890D          /* TDLS */
-- 
cgit 


From e6f44ed6d04d3185dcd8e8e98af8742d87bdffcc Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Tue, 19 Jul 2016 11:54:19 +1000
Subject: net/ncsi: Package and channel management

This manages NCSI packages and channels:

 * The available packages and channels are enumerated in the first
   time of calling ncsi_start_dev(). The channels' capabilities are
   probed in the meanwhile. The NCSI network topology won't change
   until the NCSI device is destroyed.
 * There in a queue in every NCSI device. The element in the queue,
   channel, is waiting for configuration (bringup) or suspending
   (teardown). The channel's state (inactive/active) indicates the
   futher action (configuration or suspending) will be applied on the
   channel. Another channel's state (invisible) means the requested
   action is being applied.
 * The hardware arbitration will be enabled if all available packages
   and channels support it. All available channels try to provide
   service when hardware arbitration is enabled. Otherwise, one channel
   is selected as the active one at once.
 * When channel is in active state, meaning it's providing service, a
   timer started to retrieve the channe's link status. If the channel's
   link status fails to be updated in the determined period, the channel
   is going to be reconfigured. It's the error handling implementation
   as defined in NCSI spec.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ncsi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index 70d14ee1ef84..1dbf42f79750 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -30,6 +30,7 @@ struct ncsi_dev {
 #ifdef CONFIG_NET_NCSI
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*notifier)(struct ncsi_dev *nd));
+int ncsi_start_dev(struct ncsi_dev *nd);
 void ncsi_unregister_dev(struct ncsi_dev *nd);
 #else /* !CONFIG_NET_NCSI */
 static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
@@ -38,6 +39,11 @@ static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	return NULL;
 }
 
+static inline int ncsi_start_dev(struct ncsi_dev *nd)
+{
+	return -ENOTTY;
+}
+
 static inline void ncsi_unregister_dev(struct ncsi_dev *nd)
 {
 }
-- 
cgit 


From 59d3656d5bf504f771fc44fdbc7a9a8590795f22 Mon Sep 17 00:00:00 2001
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Tue, 19 Jul 2016 12:16:46 -0700
Subject: bpf: add bpf_prog_add api for bulk prog refcnt

A subsystem may need to store many copies of a bpf program, each
deserving its own reference. Rather than requiring the caller to loop
one by one (with possible mid-loop failure), add a bulk bpf_prog_add
api.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c13e92b00bf5..75a5ae6bee07 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -224,6 +224,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
+struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 
-- 
cgit 


From 6a773a15a1e8874e5eccd2f29190c31085912c95 Mon Sep 17 00:00:00 2001
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Tue, 19 Jul 2016 12:16:47 -0700
Subject: bpf: add XDP prog type for early driver filter

Add a new bpf prog type that is intended to run in early stages of the
packet rx path. Only minimal packet metadata will be available, hence a
new context type, struct xdp_md, is exposed to userspace. So far only
expose the packet start and end pointers, and only in read mode.

An XDP program must return one of the well known enum values, all other
return codes are reserved for future use. Unfortunately, this
restriction is hard to enforce at verification time, so take the
approach of warning at runtime when such programs are encountered. Out
of bounds return codes should alias to XDP_ABORTED.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h   | 18 ++++++++++++++++++
 include/uapi/linux/bpf.h | 20 ++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6fc31ef1da2d..15d816a8b755 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -368,6 +368,11 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
+struct xdp_buff {
+	void *data;
+	void *data_end;
+};
+
 /* compute the linear packet data range [data, data_end) which
  * will be accessed by cls_bpf and act_bpf programs
  */
@@ -429,6 +434,18 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 	return BPF_PROG_RUN(prog, skb);
 }
 
+static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+				   struct xdp_buff *xdp)
+{
+	u32 ret;
+
+	rcu_read_lock();
+	ret = BPF_PROG_RUN(prog, (void *)xdp);
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
 	return max(sizeof(struct bpf_prog),
@@ -509,6 +526,7 @@ bool bpf_helper_changes_skb_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
+void bpf_warn_invalid_xdp_action(u32 act);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c4d922439d20..a51786566c2f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SCHED_CLS,
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
+	BPF_PROG_TYPE_XDP,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
@@ -439,4 +440,23 @@ struct bpf_tunnel_key {
 	__u32 tunnel_label;
 };
 
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+	XDP_ABORTED = 0,
+	XDP_DROP,
+	XDP_PASS,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+	__u32 data;
+	__u32 data_end;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit 


From a7862b45849fe2f8610a2bec89235580f55d337f Mon Sep 17 00:00:00 2001
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Tue, 19 Jul 2016 12:16:48 -0700
Subject: net: add ndo to setup/query xdp prog in adapter rx

Add one new netdev op for drivers implementing the BPF_PROG_TYPE_XDP
filter. The single op is used for both setup/query of the xdp program,
modelled after ndo_setup_tc.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 49736a31acaa..fab9a1c2a2ac 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -63,6 +63,7 @@ struct wpan_dev;
 struct mpls_dev;
 /* UDP Tunnel offloads */
 struct udp_tunnel_info;
+struct bpf_prog;
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
@@ -799,6 +800,33 @@ struct tc_to_netdev {
 	};
 };
 
+/* These structures hold the attributes of xdp state that are being passed
+ * to the netdevice through the xdp op.
+ */
+enum xdp_netdev_command {
+	/* Set or clear a bpf program used in the earliest stages of packet
+	 * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee
+	 * is responsible for calling bpf_prog_put on any old progs that are
+	 * stored. In case of error, the callee need not release the new prog
+	 * reference, but on success it takes ownership and must bpf_prog_put
+	 * when it is no longer used.
+	 */
+	XDP_SETUP_PROG,
+	/* Check if a bpf program is set on the device.  The callee should
+	 * return true if a program is currently attached and running.
+	 */
+	XDP_QUERY_PROG,
+};
+
+struct netdev_xdp {
+	enum xdp_netdev_command command;
+	union {
+		/* XDP_SETUP_PROG */
+		struct bpf_prog *prog;
+		/* XDP_QUERY_PROG */
+		bool prog_attached;
+	};
+};
 
 /*
  * This structure defines the management hooks for network devices.
@@ -1087,6 +1115,9 @@ struct tc_to_netdev {
  *	appropriate rx headroom value allows avoiding skb head copy on
  *	forward. Setting a negative value resets the rx headroom to the
  *	default value.
+ * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
+ *	This function is used to set or query state related to XDP on the
+ *	netdevice. See definition of enum xdp_netdev_command for details.
  *
  */
 struct net_device_ops {
@@ -1271,6 +1302,8 @@ struct net_device_ops {
 						       struct sk_buff *skb);
 	void			(*ndo_set_rx_headroom)(struct net_device *dev,
 						       int needed_headroom);
+	int			(*ndo_xdp)(struct net_device *dev,
+					   struct netdev_xdp *xdp);
 };
 
 /**
@@ -3257,6 +3290,7 @@ int dev_get_phys_port_id(struct net_device *dev,
 int dev_get_phys_port_name(struct net_device *dev,
 			   char *name, size_t len);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
+int dev_change_xdp_fd(struct net_device *dev, int fd);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
-- 
cgit 


From d1fdd9138682e0f272beee0cb08b6328c5478b26 Mon Sep 17 00:00:00 2001
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Tue, 19 Jul 2016 12:16:49 -0700
Subject: rtnl: add option for setting link xdp prog

Sets the bpf program represented by fd as an early filter in the rx path
of the netdev. The fd must have been created as BPF_PROG_TYPE_XDP.
Providing a negative value as fd clears the program. Getting the fd back
via rtnl is not possible, therefore reading of this value merely
provides a bool whether the program is valid on the link or not.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4285ac31e865..a1b5202c5f6b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -156,6 +156,7 @@ enum {
 	IFLA_GSO_MAX_SEGS,
 	IFLA_GSO_MAX_SIZE,
 	IFLA_PAD,
+	IFLA_XDP,
 	__IFLA_MAX
 };
 
@@ -843,4 +844,15 @@ enum {
 };
 #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
 
+/* XDP section */
+
+enum {
+	IFLA_XDP_UNSPEC,
+	IFLA_XDP_FD,
+	IFLA_XDP_ATTACHED,
+	__IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
-- 
cgit 


From 6ce96ca348a9e949f8c43f4d3e98db367d93cffd Mon Sep 17 00:00:00 2001
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Tue, 19 Jul 2016 12:16:53 -0700
Subject: bpf: add XDP_TX xdp_action for direct forwarding

XDP enabled drivers must transmit received packets back out on the same
port they were received on when a program returns this action.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a51786566c2f..2b7076f5b5ad 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -449,6 +449,7 @@ enum xdp_action {
 	XDP_ABORTED = 0,
 	XDP_DROP,
 	XDP_PASS,
+	XDP_TX,
 };
 
 /* user accessible metadata for XDP packet hook
-- 
cgit 


From 224e92e02a769b8028ca2450443586af8b4f1715 Mon Sep 17 00:00:00 2001
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Tue, 19 Jul 2016 12:16:54 -0700
Subject: net/mlx4_en: break out tx_desc write into separate function

In preparation for writing the tx descriptor from multiple functions,
create a helper for both normal and blueflame access.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/qp.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 587cdf943b52..deaa2217214d 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -291,16 +291,18 @@ enum {
 	MLX4_WQE_CTRL_FORCE_LOOPBACK	= 1 << 0,
 };
 
+union mlx4_wqe_qpn_vlan {
+	struct {
+		__be16	vlan_tag;
+		u8	ins_vlan;
+		u8	fence_size;
+	};
+	__be32		bf_qpn;
+};
+
 struct mlx4_wqe_ctrl_seg {
 	__be32			owner_opcode;
-	union {
-		struct {
-			__be16			vlan_tag;
-			u8			ins_vlan;
-			u8			fence_size;
-		};
-		__be32			bf_qpn;
-	};
+	union mlx4_wqe_qpn_vlan	qpn_vlan;
 	/*
 	 * High 24 bits are SRC remote buffer; low 8 bits are flags:
 	 * [7]   SO (strong ordering)
-- 
cgit 


From cc2e0b3fbcdd9667d7b7ecdf36d7b4d3647681d6 Mon Sep 17 00:00:00 2001
From: Brenden Blanco <bblanco@plumgrid.com>
Date: Wed, 20 Jul 2016 07:55:52 -0700
Subject: bpf: fix implicit declaration of bpf_prog_add

For the ifndef case of CONFIG_BPF_SYSCALL, an inline version of
bpf_prog_add needs to exist otherwise the build breaks on some configs.

 drivers/net/ethernet/mellanox/mlx4/en_netdev.c:2544:10: error: implicit declaration of function 'bpf_prog_add'
       prog = bpf_prog_add(prog, priv->rx_ring_num - 1);

The function is introduced in
59d3656d5bf50 ("bpf: add bpf_prog_add api for bulk prog refcnt")
and first used in
47f1afdba2b87 ("net/mlx4_en: add support for fast rx drop bpf program").

Fixes: 47f1afdba2b87 ("net/mlx4_en: add support for fast rx drop bpf program")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reported-by: Tariq Toukan <ttoukan.linux@gmail.com>
Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 75a5ae6bee07..36da0749205a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -289,6 +289,10 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
-- 
cgit 


From b02b94b331be5097d248d49242bd1fc0649a8092 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 20 Jul 2016 20:17:47 +0200
Subject: bpf, elf: add official ELF machine define for eBPF

Add the official BPF ELF e_machine value that was assigned recently [1,2]
and will be propagated to glibc, et al. LLVM is switching to it in 3.9
release.

  [1] https://github.com/llvm-mirror/llvm/commit/36b9c09330bfb5e771914cfe307588f30d5510d2
  [2] http://lists.iovisor.org/pipermail/iovisor-dev/2016-June/000266.html

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/elf-em.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index c3fdfe79e5cc..cb5d1a519202 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -40,6 +40,7 @@
 #define EM_TILEPRO	188	/* Tilera TILEPro */
 #define EM_MICROBLAZE	189	/* Xilinx MicroBlaze */
 #define EM_TILEGX	191	/* Tilera TILE-Gx */
+#define EM_BPF		247	/* Linux BPF - in-kernel virtual machine */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 #define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
-- 
cgit 


From 82de0be6862cdca2e6802267bda57cfc8844d3a7 Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Mon, 18 Jul 2016 11:39:23 +0800
Subject: netfilter: Add helper array register/unregister functions

Add nf_ct_helper_init(), nf_conntrack_helpers_register() and
nf_conntrack_helpers_unregister() functions to avoid repetitive
opencoded initialization in helpers.

This patch keeps an id parameter for nf_ct_helper_init() not to break
helper matching by name that has been inconsistently exposed to
userspace through ports, eg. ftp-2121, and through an incremental id,
eg. tftp-1.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 6cf614bc0029..1eaac1f4cd6a 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
 struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
 							       u16 l3num,
 							       u8 protonum);
+void nf_ct_helper_init(struct nf_conntrack_helper *helper,
+		       u16 l3num, u16 protonum, const char *name,
+		       u16 default_port, u16 spec_port, u32 id,
+		       const struct nf_conntrack_expect_policy *exp_pol,
+		       u32 expect_class_max, u32 data_len,
+		       int (*help)(struct sk_buff *skb, unsigned int protoff,
+				   struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo),
+		       int (*from_nlattr)(struct nlattr *attr,
+					  struct nf_conn *ct),
+		       struct module *module);
 
 int nf_conntrack_helper_register(struct nf_conntrack_helper *);
 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
 
+int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
+void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
+				     unsigned int);
+
 struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
 					  struct nf_conntrack_helper *helper,
 					  gfp_t gfp);
-- 
cgit 


From 5f652bb2eb3eb38f97194ce5c41da1fa12e914b8 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 20 Jul 2016 18:11:31 +0200
Subject: gro_cells: gro_cells_receive now return error code

so that the caller can update stats accordingly, if needed

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gro_cells.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
index cf6c74550baa..d15214d673b2 100644
--- a/include/net/gro_cells.h
+++ b/include/net/gro_cells.h
@@ -14,27 +14,26 @@ struct gro_cells {
 	struct gro_cell __percpu	*cells;
 };
 
-static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
+static inline int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 {
 	struct gro_cell *cell;
 	struct net_device *dev = skb->dev;
 
-	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
-		netif_rx(skb);
-		return;
-	}
+	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+		return netif_rx(skb);
 
 	cell = this_cpu_ptr(gcells->cells);
 
 	if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
-		return;
+		return NET_RX_DROP;
 	}
 
 	__skb_queue_tail(&cell->napi_skbs, skb);
 	if (skb_queue_len(&cell->napi_skbs) == 1)
 		napi_schedule(&cell->napi);
+	return NET_RX_SUCCESS;
 }
 
 /* called under BH context */
-- 
cgit 


From 23014011ba4209a086931ff402eac1c41abbe456 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 21 Jul 2016 12:51:16 +0200
Subject: netfilter: conntrack: support a fixed size of 128 distinct labels

The conntrack label extension is currently variable-sized, e.g. if
only 2 labels are used by iptables rules then the labels->bits[] array
will only contain one element.

We track size of each label storage area in the 'words' member.

But in nftables and openvswitch we always have to ask for worst-case
since we don't know what bit will be used at configuration time.

As most arches are 64bit we need to allocate 24 bytes in this case:

struct nf_conn_labels {
    u8            words;   /*     0     1 */
    /* XXX 7 bytes hole, try to pack */
    long unsigned bits[2]; /*     8     24 */

Make bits a fixed size and drop the words member, it simplifies
the code and only increases memory requirements on x86 when
less than 64bit labels are required.

We still only allocate the extension if its needed.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_labels.h | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index c5f8fc736b3d..0fd4989de836 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -10,8 +10,7 @@
 #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
 
 struct nf_conn_labels {
-	u8 words;
-	unsigned long bits[];
+	unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)];
 };
 
 static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
@@ -26,20 +25,13 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
 static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
 {
 #ifdef CONFIG_NF_CONNTRACK_LABELS
-	struct nf_conn_labels *cl_ext;
 	struct net *net = nf_ct_net(ct);
-	u8 words;
 
-	words = ACCESS_ONCE(net->ct.label_words);
-	if (words == 0)
+	if (net->ct.labels_used == 0)
 		return NULL;
 
-	cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
-				      words * sizeof(long), GFP_ATOMIC);
-	if (cl_ext != NULL)
-		cl_ext->words = words;
-
-	return cl_ext;
+	return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
+				    sizeof(struct nf_conn_labels), GFP_ATOMIC);
 #else
 	return NULL;
 #endif
-- 
cgit 


From 857ed310c013fe0d0059f955048dab589fa7a57a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 21 Jul 2016 12:51:17 +0200
Subject: netfilter: connlabels: move set helper to xt_connlabel

xt_connlabel is the only user so move it.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_labels.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index 0fd4989de836..498814626e28 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -37,8 +37,6 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
 #endif
 }
 
-int nf_connlabel_set(struct nf_conn *ct, u16 bit);
-
 int nf_connlabels_replace(struct nf_conn *ct,
 			  const u32 *data, const u32 *mask, unsigned int words);
 
-- 
cgit 


From bf3994d2ed310813da28362d87bfe9f0e1c3e37f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 21 Jul 2016 12:03:11 +0200
Subject: net/sched: introduce Match-all classifier

The matchall classifier matches every packet and allows the user to apply
actions on it. This filter is very useful in usecases where every packet
should be matched, for example, packet mirroring (SPAN) can be setup very
easily using that filter.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 5702e933dc07..a32494887e01 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -433,6 +433,17 @@ enum {
 
 #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
 
+/* Match-all classifier */
+
+enum {
+	TCA_MATCHALL_UNSPEC,
+	TCA_MATCHALL_CLASSID,
+	TCA_MATCHALL_ACT,
+	__TCA_MATCHALL_MAX,
+};
+
+#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
+
 /* Extended Matches */
 
 struct tcf_ematch_tree_hdr {
-- 
cgit 


From b87f7936a93246804cf70e7e2e0568799c948bb1 Mon Sep 17 00:00:00 2001
From: Yotam Gigi <yotamg@mellanox.com>
Date: Thu, 21 Jul 2016 12:03:12 +0200
Subject: net/sched: Add match-all classifier hw offloading.

Following the work that have been done on offloading classifiers like u32
and flower, now the match-all classifier hw offloading is possible. if
the interface supports tc offloading.

To control the offloading, two tc flags have been introduced: skip_sw and
skip_hw. Typical usage:

tc filter add dev eth25 parent ffff: 	\
	matchall skip_sw		\
	action mirred egress mirror	\
	dev eth27

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    |  2 ++
 include/net/pkt_cls.h        | 11 +++++++++++
 include/uapi/linux/pkt_cls.h |  1 +
 3 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 43c749b1b619..076df5360ba5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -787,6 +787,7 @@ enum {
 	TC_SETUP_MQPRIO,
 	TC_SETUP_CLSU32,
 	TC_SETUP_CLSFLOWER,
+	TC_SETUP_MATCHALL,
 };
 
 struct tc_cls_u32_offload;
@@ -797,6 +798,7 @@ struct tc_to_netdev {
 		u8 tc;
 		struct tc_cls_u32_offload *cls_u32;
 		struct tc_cls_flower_offload *cls_flower;
+		struct tc_cls_matchall_offload *cls_mall;
 	};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 3722dda0199d..6f8d65342d3a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -442,4 +442,15 @@ struct tc_cls_flower_offload {
 	struct tcf_exts *exts;
 };
 
+enum tc_matchall_command {
+	TC_CLSMATCHALL_REPLACE,
+	TC_CLSMATCHALL_DESTROY,
+};
+
+struct tc_cls_matchall_offload {
+	enum tc_matchall_command command;
+	struct tcf_exts *exts;
+	unsigned long cookie;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index a32494887e01..d1c1ccaba787 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -439,6 +439,7 @@ enum {
 	TCA_MATCHALL_UNSPEC,
 	TCA_MATCHALL_CLASSID,
 	TCA_MATCHALL_ACT,
+	TCA_MATCHALL_FLAGS,
 	__TCA_MATCHALL_MAX,
 };
 
-- 
cgit 


From 56a20680f70393d199fc5e8ecc7859549ca5a0c0 Mon Sep 17 00:00:00 2001
From: Yotam Gigi <yotamg@mellanox.com>
Date: Thu, 21 Jul 2016 12:03:16 +0200
Subject: net/sched: act_mirred: Add helper inlines to access tcf_mirred info.

The helper function is_tcf_mirred_mirror helps finding whether an action
struct is of type mirred and is configured to be of type mirror.

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_mirred.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index e891835eb74e..6a13a7c74e0c 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -24,6 +24,15 @@ static inline bool is_tcf_mirred_redirect(const struct tc_action *a)
 	return false;
 }
 
+static inline bool is_tcf_mirred_mirror(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_MIRRED)
+		return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR;
+#endif
+	return false;
+}
+
 static inline int tcf_mirred_ifindex(const struct tc_action *a)
 {
 	return to_mirred(a)->tcfm_ifindex;
-- 
cgit 


From aa7145c16d6bf086538ad7eb20c807513bfa5efc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 22 Jul 2016 01:19:42 +0200
Subject: bpf, events: fix offset in skb copy handler

This patch fixes the __output_custom() routine we currently use with
bpf_skb_copy(). I missed that when len is larger than the size of the
current handle, we can issue multiple invocations of copy_func, and
__output_custom() advances destination but also source buffer by the
written amount of bytes. When we have __output_custom(), this is actually
wrong since in that case the source buffer points to a non-linear object,
in our case an skb, which the copy_func helper is supposed to walk.
Therefore, since this is non-linear we thus need to pass the offset into
the helper, so that copy_func can use it for extracting the data from
the source object.

Therefore, adjust the callback signatures properly and pass offset
into the skb_header_pointer() invoked from bpf_skb_copy() callback. The
__DEFINE_OUTPUT_COPY_BODY() is adjusted to accommodate for two things:
i) to pass in whether we should advance source buffer or not; this is
a compile-time constant condition, ii) to pass in the offset for
__output_custom(), which we do with help of __VA_ARGS__, so everything
can stay inlined as is currently. Both changes allow for adapting the
__output_* fast-path helpers w/o extra overhead.

Fixes: 555c8a8623a3 ("bpf: avoid stack copy and use skb ctx for event output")
Fixes: 7e3f977edd0b ("perf, events: add non-linear data support for raw records")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h        | 2 +-
 include/linux/perf_event.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 36da0749205a..11134238417d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -211,7 +211,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
-					unsigned long len);
+					unsigned long off, unsigned long len);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e79e6c6fed89..15e55b7ee096 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -70,7 +70,7 @@ struct perf_callchain_entry_ctx {
 };
 
 typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
-				     unsigned long len);
+				     unsigned long off, unsigned long len);
 
 struct perf_raw_frag {
 	union {
-- 
cgit 


From 2ccbe2cb79f2f74ab739252299b6f9ff27586f2c Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 22 Jul 2016 15:07:56 +0200
Subject: macsec: limit ICV length to 16 octets

IEEE 802.1AE-2006 standard recommends that the ICV element in a MACsec
frame should not exceed 16 octets: add MACSEC_STD_ICV_LEN in uapi
definitions accordingly, and avoid accepting configurations where the ICV
length exceeds the standard value. Leave definition of MACSEC_MAX_ICV_LEN
unchanged for backwards compatibility with userspace programs.

Fixes: dece8d2b78d1 ("uapi: add MACsec bits")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_macsec.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index f7d4831a2cc7..02fc49cb72d8 100644
--- a/include/uapi/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
@@ -26,6 +26,8 @@
 
 #define MACSEC_MIN_ICV_LEN 8
 #define MACSEC_MAX_ICV_LEN 32
+/* upper limit for ICV length as recommended by IEEE802.1AE-2006 */
+#define MACSEC_STD_ICV_LEN 16
 
 enum macsec_attrs {
 	MACSEC_ATTR_UNSPEC,
-- 
cgit 


From ae76715d153e33c249b6850361e4d8d775388b5a Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Sun, 24 Jul 2016 16:12:39 +0300
Subject: net/mlx5e: Check the minimum inline header mode before xmit

Each send queue (SQ) has inline mode that defines the minimal required
inline headers in the SQ WQE.
Before sending each packet check that the minimum required headers
on the WQE are copied.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index e0a3ed758287..0b6d15cddb2f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -129,6 +129,13 @@ __mlx5_mask(typ, fld))
 		tmp;							  \
 		})
 
+enum mlx5_inline_modes {
+	MLX5_INLINE_MODE_NONE,
+	MLX5_INLINE_MODE_L2,
+	MLX5_INLINE_MODE_IP,
+	MLX5_INLINE_MODE_TCP_UDP,
+};
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,
-- 
cgit 


From cff92d7c7ebd7ceddd4def6b39e0302585b1eb14 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Sun, 24 Jul 2016 16:12:40 +0300
Subject: net/mlx5e: Query minimum required header copy during xmit

Add support for query the minimum inline mode from the Firmware.
It is required for correct TX steering according to L3/L4 packet
headers.

Each send queue (SQ) has inline mode that defines the minimal required
headers that needs to be copied into the SQ WQE.
The driver asks the Firmware for the wqe_inline_mode device capability
value.  In case the device capability defined as "vport context" the
driver must check the reported min inline mode from the vport context
before creating its SQs.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 10 +++++++---
 include/linux/mlx5/vport.h    |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d671e4e8e7db..21bc4557b67a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -536,7 +536,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         self_lb_en_modifiable[0x1];
 	u8         reserved_at_9[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_at_10[0x4];
+	u8         reserved_at_10[0x2];
+	u8	   wqe_inline_mode[0x2];
 	u8         rss_ind_tbl_cap[0x4];
 	u8         reg_umr_sq[0x1];
 	u8         scatter_fcs[0x1];
@@ -2270,7 +2271,8 @@ struct mlx5_ifc_sqc_bits {
 	u8         cd_master[0x1];
 	u8         fre[0x1];
 	u8         flush_in_error_en[0x1];
-	u8         reserved_at_4[0x4];
+	u8         reserved_at_4[0x1];
+	u8	   min_wqe_inline_mode[0x3];
 	u8         state[0x4];
 	u8         reg_umr[0x1];
 	u8         reserved_at_d[0x13];
@@ -2367,7 +2369,9 @@ struct mlx5_ifc_rmpc_bits {
 };
 
 struct mlx5_ifc_nic_vport_context_bits {
-	u8         reserved_at_0[0x1f];
+	u8         reserved_at_0[0x5];
+	u8         min_wqe_inline_mode[0x3];
+	u8         reserved_at_8[0x17];
 	u8         roce_en[0x1];
 
 	u8         arm_change_event[0x1];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 6c16c198f680..e087b7d047ac 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -43,6 +43,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr);
+void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				     u8 *min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
 				      u16 vport, u8 *addr);
 int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
-- 
cgit 


From 9b8ac4f9dd60ac7375fb0e221dbf596db4c4e622 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 24 Jul 2016 19:24:09 +0100
Subject: gtp: #define #define _GTP_H_ and not #define _GTP_H

Fix clang build warning:

./include/net/gtp.h:1:9: warning: '_GTP_H_' is used as a header
guard here, followed by #define of a different macro [-Wheader-guard]

fix by defining _GTP_H_ and not _GTP_H

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gtp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/gtp.h b/include/net/gtp.h
index 894a37b87d63..6398891b99ba 100644
--- a/include/net/gtp.h
+++ b/include/net/gtp.h
@@ -1,5 +1,5 @@
 #ifndef _GTP_H_
-#define _GTP_H
+#define _GTP_H_
 
 /* General GTP protocol related definitions. */
 
-- 
cgit 


From 86cb13e4ec5060d94069a8418fd4f3ccb38edee2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 Jul 2016 13:12:33 +0300
Subject: mlxsw: spectrum: Fix compilation error when CLS_ACT isn't set

When CONFIG_NET_CLS_ACT isn't set 'struct tcf_exts' has no member named
'actions' and we therefore must not access it. Otherwise compilation
fails.

Fix this by introducing a new macro similar to tc_no_actions(), which
always returns 'false' if CONFIG_NET_CLS_ACT isn't set.

Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index fb82b5b5d9e7..0bb210635e5f 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -192,6 +192,9 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 #define tc_for_each_action(_a, _exts) \
 	list_for_each_entry(a, &(_exts)->actions, list)
 
+#define tc_single_action(_exts) \
+	(list_is_singular(&(_exts)->actions))
+
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 					   u64 packets, u64 lastuse)
 {
@@ -205,6 +208,7 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 
 #define tc_no_actions(_exts) true
 #define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
+#define tc_single_action(_exts) false
 #define tcf_action_stats_update(a, bytes, packets, lastuse)
 
 #endif /* CONFIG_NET_CLS_ACT */
-- 
cgit 


From 96ae52279594470622ff0585621a13e96b700600 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Mon, 25 Jul 2016 05:54:46 -0700
Subject: bpf: Add bpf_probe_write_user BPF helper to be called in tracers

This allows user memory to be written to during the course of a kprobe.
It shouldn't be used to implement any kind of security mechanism
because of TOC-TOU attacks, but rather to debug, divert, and
manipulate execution of semi-cooperative processes.

Although it uses probe_kernel_write, we limit the address space
the probe can write into by checking the space with access_ok.
We do this as opposed to calling copy_to_user directly, in order
to avoid sleeping. In addition we ensure the threads's current fs
/ segment is USER_DS and the thread isn't exiting nor a kernel thread.

Given this feature is meant for experiments, and it has a risk of
crashing the system, and running programs, we print a warning on
when a proglet that attempts to use this helper is installed,
along with the pid and process name.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2b7076f5b5ad..da218fec6056 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -365,6 +365,16 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_get_current_task,
 
+	/**
+	 * bpf_probe_write_user(void *dst, void *src, int len)
+	 * safely attempt to write to a location
+	 * @dst: destination address in userspace
+	 * @src: source address on stack
+	 * @len: number of bytes to copy
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_probe_write_user,
+
 	__BPF_FUNC_MAX_ID,
 };
 
-- 
cgit 


From a85a970af265f156740977168b542234511b28a8 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 25 Jul 2016 16:09:41 -0700
Subject: net_sched: move tc_action into tcf_common

struct tc_action is confusing, currently we use it for two purposes:
1) Pass in arguments and carry out results from helper functions
2) A generic representation for tc actions

The first one is error-prone, since we need to make sure we don't
miss anything. This patch aims to get rid of this use, by moving
tc_action into tcf_common, so that they are allocated together
in hashtable and can be cast'ed easily.

And together with the following patch, we could really make
tc_action a generic representation for all tc actions and each
type of action can inherit from it.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h            | 52 ++++++++++++++++++++++------------------
 include/net/tc_act/tc_bpf.h      |  3 +--
 include/net/tc_act/tc_connmark.h |  3 +--
 include/net/tc_act/tc_csum.h     |  3 +--
 include/net/tc_act/tc_defact.h   |  3 +--
 include/net/tc_act/tc_gact.h     |  5 ++--
 include/net/tc_act/tc_ife.h      |  3 +--
 include/net/tc_act/tc_ipt.h      |  3 +--
 include/net/tc_act/tc_mirred.h   |  3 +--
 include/net/tc_act/tc_nat.h      |  5 +---
 include/net/tc_act/tc_pedit.h    |  3 +--
 include/net/tc_act/tc_skbedit.h  |  3 +--
 include/net/tc_act/tc_vlan.h     |  3 +--
 13 files changed, 42 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 0bb210635e5f..8b199095ea51 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -10,7 +10,26 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
+
+struct tcf_hashinfo {
+	struct hlist_head	*htab;
+	unsigned int		hmask;
+	spinlock_t		lock;
+	u32			index;
+};
+
+struct tc_action_ops;
+
+struct tc_action {
+	const struct tc_action_ops	*ops;
+	__u32			type; /* for backward compat(TCA_OLD_COMPAT) */
+	__u32			order;
+	struct list_head	list;
+	struct tcf_hashinfo	*hinfo;
+};
+
 struct tcf_common {
+	struct tc_action		tcfc_act;
 	struct hlist_node		tcfc_head;
 	u32				tcfc_index;
 	int				tcfc_refcnt;
@@ -26,6 +45,7 @@ struct tcf_common {
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue __percpu *cpu_qstats;
 };
+#define tcf_act		common.tcfc_act
 #define tcf_head	common.tcfc_head
 #define tcf_index	common.tcfc_index
 #define tcf_refcnt	common.tcfc_refcnt
@@ -39,13 +59,6 @@ struct tcf_common {
 #define tcf_lock	common.tcfc_lock
 #define tcf_rcu		common.tcfc_rcu
 
-struct tcf_hashinfo {
-	struct hlist_head	*htab;
-	unsigned int		hmask;
-	spinlock_t		lock;
-	u32			index;
-};
-
 static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
 {
 	return index & hmask;
@@ -88,15 +101,6 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
 	dtm->expires = jiffies_to_clock_t(stm->expires);
 }
 
-struct tc_action {
-	void			*priv;
-	const struct tc_action_ops	*ops;
-	__u32			type; /* for backward compat(TCA_OLD_COMPAT) */
-	__u32			order;
-	struct list_head	list;
-	struct tcf_hashinfo	*hinfo;
-};
-
 #ifdef CONFIG_NET_CLS_ACT
 
 #define ACT_P_CREATED 1
@@ -106,17 +110,18 @@ struct tc_action_ops {
 	struct list_head head;
 	char    kind[IFNAMSIZ];
 	__u32   type; /* TBD to match kind */
+	size_t	size;
 	struct module		*owner;
 	int     (*act)(struct sk_buff *, const struct tc_action *,
 		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *, int bind);
-	int     (*lookup)(struct net *, struct tc_action *, u32);
+	int     (*lookup)(struct net *, struct tc_action **, u32);
 	int     (*init)(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action *act, int ovr,
+			struct nlattr *est, struct tc_action **act, int ovr,
 			int bind);
 	int     (*walk)(struct net *, struct sk_buff *,
-			struct netlink_callback *, int, struct tc_action *);
+			struct netlink_callback *, int, const struct tc_action_ops *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 };
 
@@ -152,13 +157,14 @@ static inline void tc_action_net_exit(struct tc_action_net *tn)
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       struct tc_action *a);
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index);
+		       const struct tc_action_ops *ops);
+int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
 u32 tcf_hash_new_index(struct tc_action_net *tn);
-bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		    int bind);
 int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action *a, int size, int bind, bool cpustats);
+		    struct tc_action **a, const struct tc_action_ops *ops, int bind,
+		    bool cpustats);
 void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
 void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a);
 
diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
index 958d69cfb19c..80a4d6f49773 100644
--- a/include/net/tc_act/tc_bpf.h
+++ b/include/net/tc_act/tc_bpf.h
@@ -23,7 +23,6 @@ struct tcf_bpf {
 	struct sock_filter	*bpf_ops;
 	const char		*bpf_name;
 };
-#define to_bpf(a) \
-	container_of(a->priv, struct tcf_bpf, common)
+#define to_bpf(a) ((struct tcf_bpf *)a)
 
 #endif /* __NET_TC_BPF_H */
diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
index 02caa406611b..8a661135f4ac 100644
--- a/include/net/tc_act/tc_connmark.h
+++ b/include/net/tc_act/tc_connmark.h
@@ -9,7 +9,6 @@ struct tcf_connmark_info {
 	u16 zone;
 };
 
-#define to_connmark(a) \
-	container_of(a->priv, struct tcf_connmark_info, common)
+#define to_connmark(a) ((struct tcf_connmark_info *)a)
 
 #endif /* __NET_TC_CONNMARK_H */
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index fa8f5fac65e9..1a9ef15d573b 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -9,7 +9,6 @@ struct tcf_csum {
 
 	u32 update_flags;
 };
-#define to_tcf_csum(a) \
-	container_of(a->priv,struct tcf_csum,common)
+#define to_tcf_csum(a) ((struct tcf_csum *)a)
 
 #endif /* __NET_TC_CSUM_H */
diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h
index ab9b5d6be67b..e25b4eb4fc66 100644
--- a/include/net/tc_act/tc_defact.h
+++ b/include/net/tc_act/tc_defact.h
@@ -8,7 +8,6 @@ struct tcf_defact {
 	u32		tcfd_datalen;
 	void		*tcfd_defdata;
 };
-#define to_defact(a) \
-	container_of(a->priv, struct tcf_defact, common)
+#define to_defact(a) ((struct tcf_defact *)a)
 
 #endif /* __NET_TC_DEF_H */
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 93c520b83d10..119cdb418c23 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -13,8 +13,7 @@ struct tcf_gact {
 	atomic_t		packets;
 #endif
 };
-#define to_gact(a) \
-	container_of(a->priv, struct tcf_gact, common)
+#define to_gact(a) ((struct tcf_gact *)a)
 
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
@@ -24,7 +23,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a)
 	if (a->ops && a->ops->type != TCA_ACT_GACT)
 		return false;
 
-	gact = a->priv;
+	gact = to_gact(a);
 	if (gact->tcf_action == TC_ACT_SHOT)
 		return true;
 
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index c55facd17b7e..7921abe42adc 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -16,8 +16,7 @@ struct tcf_ife_info {
 	/* list of metaids allowed */
 	struct list_head metalist;
 };
-#define to_ife(a) \
-	container_of(a->priv, struct tcf_ife_info, common)
+#define to_ife(a) ((struct tcf_ife_info *)a)
 
 struct tcf_meta_info {
 	const struct tcf_meta_ops *ops;
diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
index c0f4193f432c..c22ae7ab66ed 100644
--- a/include/net/tc_act/tc_ipt.h
+++ b/include/net/tc_act/tc_ipt.h
@@ -11,7 +11,6 @@ struct tcf_ipt {
 	char			*tcfi_tname;
 	struct xt_entry_target	*tcfi_t;
 };
-#define to_ipt(a) \
-	container_of(a->priv, struct tcf_ipt, common)
+#define to_ipt(a) ((struct tcf_ipt *)a)
 
 #endif /* __NET_TC_IPT_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 6a13a7c74e0c..89aebd22cd79 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -12,8 +12,7 @@ struct tcf_mirred {
 	struct net_device __rcu	*tcfm_dev;
 	struct list_head	tcfm_list;
 };
-#define to_mirred(a) \
-	container_of(a->priv, struct tcf_mirred, common)
+#define to_mirred(a) ((struct tcf_mirred *)a)
 
 static inline bool is_tcf_mirred_redirect(const struct tc_action *a)
 {
diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h
index 63d8e9ca9d99..a91ad3ad565e 100644
--- a/include/net/tc_act/tc_nat.h
+++ b/include/net/tc_act/tc_nat.h
@@ -13,9 +13,6 @@ struct tcf_nat {
 	u32 flags;
 };
 
-static inline struct tcf_nat *to_tcf_nat(struct tc_action *a)
-{
-	return container_of(a->priv, struct tcf_nat, common);
-}
+#define to_tcf_nat(a) ((struct tcf_nat *)a)
 
 #endif /* __NET_TC_NAT_H */
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 5b80998879c7..2cccfbaae800 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -9,7 +9,6 @@ struct tcf_pedit {
 	unsigned char		tcfp_flags;
 	struct tc_pedit_key	*tcfp_keys;
 };
-#define to_pedit(a) \
-	container_of(a->priv, struct tcf_pedit, common)
+#define to_pedit(a) ((struct tcf_pedit *)a)
 
 #endif /* __NET_TC_PED_H */
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index d01a5d40cfb5..9e0548998327 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -30,8 +30,7 @@ struct tcf_skbedit {
 	u16		queue_mapping;
 	u16		ptype;
 };
-#define to_skbedit(a) \
-	container_of(a->priv, struct tcf_skbedit, common)
+#define to_skbedit(a) ((struct tcf_skbedit *)a)
 
 /* Return true iff action is mark */
 static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 93b70ade1ff3..584b80788d52 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -21,7 +21,6 @@ struct tcf_vlan {
 	u16			tcfv_push_vid;
 	__be16			tcfv_push_proto;
 };
-#define to_vlan(a) \
-	container_of(a->priv, struct tcf_vlan, common)
+#define to_vlan(a) ((struct tcf_vlan *)a)
 
 #endif /* __NET_TC_VLAN_H */
-- 
cgit 


From ec0595cc4495be579309b4bfd5e997af0f2ae6f9 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 25 Jul 2016 16:09:42 -0700
Subject: net_sched: get rid of struct tcf_common

After the previous patch, struct tc_action should be enough
to represent the generic tc action, tcf_common is not necessary
any more. This patch gets rid of it to make tc action code
more readable.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h            | 63 +++++++++++++++++++---------------------
 include/net/tc_act/tc_bpf.h      |  2 +-
 include/net/tc_act/tc_connmark.h |  2 +-
 include/net/tc_act/tc_csum.h     |  2 +-
 include/net/tc_act/tc_defact.h   |  2 +-
 include/net/tc_act/tc_gact.h     |  2 +-
 include/net/tc_act/tc_ife.h      |  2 +-
 include/net/tc_act/tc_ipt.h      |  2 +-
 include/net/tc_act/tc_mirred.h   |  2 +-
 include/net/tc_act/tc_nat.h      |  2 +-
 include/net/tc_act/tc_pedit.h    |  2 +-
 include/net/tc_act/tc_skbedit.h  |  2 +-
 include/net/tc_act/tc_vlan.h     |  2 +-
 13 files changed, 42 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8b199095ea51..41e6a24a44b9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -22,42 +22,39 @@ struct tc_action_ops;
 
 struct tc_action {
 	const struct tc_action_ops	*ops;
-	__u32			type; /* for backward compat(TCA_OLD_COMPAT) */
-	__u32			order;
-	struct list_head	list;
-	struct tcf_hashinfo	*hinfo;
-};
-
-struct tcf_common {
-	struct tc_action		tcfc_act;
-	struct hlist_node		tcfc_head;
-	u32				tcfc_index;
-	int				tcfc_refcnt;
-	int				tcfc_bindcnt;
-	u32				tcfc_capab;
-	int				tcfc_action;
-	struct tcf_t			tcfc_tm;
-	struct gnet_stats_basic_packed	tcfc_bstats;
-	struct gnet_stats_queue		tcfc_qstats;
-	struct gnet_stats_rate_est64	tcfc_rate_est;
-	spinlock_t			tcfc_lock;
-	struct rcu_head			tcfc_rcu;
+	__u32				type; /* for backward compat(TCA_OLD_COMPAT) */
+	__u32				order;
+	struct list_head		list;
+	struct tcf_hashinfo		*hinfo;
+
+	struct hlist_node		tcfa_head;
+	u32				tcfa_index;
+	int				tcfa_refcnt;
+	int				tcfa_bindcnt;
+	u32				tcfa_capab;
+	int				tcfa_action;
+	struct tcf_t			tcfa_tm;
+	struct gnet_stats_basic_packed	tcfa_bstats;
+	struct gnet_stats_queue		tcfa_qstats;
+	struct gnet_stats_rate_est64	tcfa_rate_est;
+	spinlock_t			tcfa_lock;
+	struct rcu_head			tcfa_rcu;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue __percpu *cpu_qstats;
 };
-#define tcf_act		common.tcfc_act
-#define tcf_head	common.tcfc_head
-#define tcf_index	common.tcfc_index
-#define tcf_refcnt	common.tcfc_refcnt
-#define tcf_bindcnt	common.tcfc_bindcnt
-#define tcf_capab	common.tcfc_capab
-#define tcf_action	common.tcfc_action
-#define tcf_tm		common.tcfc_tm
-#define tcf_bstats	common.tcfc_bstats
-#define tcf_qstats	common.tcfc_qstats
-#define tcf_rate_est	common.tcfc_rate_est
-#define tcf_lock	common.tcfc_lock
-#define tcf_rcu		common.tcfc_rcu
+#define tcf_act		common.tcfa_act
+#define tcf_head	common.tcfa_head
+#define tcf_index	common.tcfa_index
+#define tcf_refcnt	common.tcfa_refcnt
+#define tcf_bindcnt	common.tcfa_bindcnt
+#define tcf_capab	common.tcfa_capab
+#define tcf_action	common.tcfa_action
+#define tcf_tm		common.tcfa_tm
+#define tcf_bstats	common.tcfa_bstats
+#define tcf_qstats	common.tcfa_qstats
+#define tcf_rate_est	common.tcfa_rate_est
+#define tcf_lock	common.tcfa_lock
+#define tcf_rcu		common.tcfa_rcu
 
 static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
 {
diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
index 80a4d6f49773..2b94673a3dbc 100644
--- a/include/net/tc_act/tc_bpf.h
+++ b/include/net/tc_act/tc_bpf.h
@@ -14,7 +14,7 @@
 #include <net/act_api.h>
 
 struct tcf_bpf {
-	struct tcf_common	common;
+	struct tc_action	common;
 	struct bpf_prog __rcu	*filter;
 	union {
 		u32		bpf_fd;
diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
index 8a661135f4ac..59b515d32bb4 100644
--- a/include/net/tc_act/tc_connmark.h
+++ b/include/net/tc_act/tc_connmark.h
@@ -4,7 +4,7 @@
 #include <net/act_api.h>
 
 struct tcf_connmark_info {
-	struct tcf_common common;
+	struct tc_action common;
 	struct net *net;
 	u16 zone;
 };
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 1a9ef15d573b..f31fb6331a53 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -5,7 +5,7 @@
 #include <net/act_api.h>
 
 struct tcf_csum {
-	struct tcf_common common;
+	struct tc_action common;
 
 	u32 update_flags;
 };
diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h
index e25b4eb4fc66..d47f040a3bdf 100644
--- a/include/net/tc_act/tc_defact.h
+++ b/include/net/tc_act/tc_defact.h
@@ -4,7 +4,7 @@
 #include <net/act_api.h>
 
 struct tcf_defact {
-	struct tcf_common	common;
+	struct tc_action	common;
 	u32		tcfd_datalen;
 	void		*tcfd_defdata;
 };
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 119cdb418c23..b6f173910226 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -5,7 +5,7 @@
 #include <linux/tc_act/tc_gact.h>
 
 struct tcf_gact {
-	struct tcf_common	common;
+	struct tc_action	common;
 #ifdef CONFIG_GACT_PROB
 	u16			tcfg_ptype;
 	u16			tcfg_pval;
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index 7921abe42adc..5164bd7a38fb 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -8,7 +8,7 @@
 
 #define IFE_METAHDRLEN 2
 struct tcf_ife_info {
-	struct tcf_common common;
+	struct tc_action common;
 	u8 eth_dst[ETH_ALEN];
 	u8 eth_src[ETH_ALEN];
 	u16 eth_type;
diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
index c22ae7ab66ed..31309766e379 100644
--- a/include/net/tc_act/tc_ipt.h
+++ b/include/net/tc_act/tc_ipt.h
@@ -6,7 +6,7 @@
 struct xt_entry_target;
 
 struct tcf_ipt {
-	struct tcf_common	common;
+	struct tc_action	common;
 	u32			tcfi_hook;
 	char			*tcfi_tname;
 	struct xt_entry_target	*tcfi_t;
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 89aebd22cd79..62770add15bd 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -5,7 +5,7 @@
 #include <linux/tc_act/tc_mirred.h>
 
 struct tcf_mirred {
-	struct tcf_common	common;
+	struct tc_action	common;
 	int			tcfm_eaction;
 	int			tcfm_ifindex;
 	int			tcfm_ok_push;
diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h
index a91ad3ad565e..56681a320612 100644
--- a/include/net/tc_act/tc_nat.h
+++ b/include/net/tc_act/tc_nat.h
@@ -5,7 +5,7 @@
 #include <net/act_api.h>
 
 struct tcf_nat {
-	struct tcf_common common;
+	struct tc_action common;
 
 	__be32 old_addr;
 	__be32 new_addr;
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 2cccfbaae800..29e38d6823df 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -4,7 +4,7 @@
 #include <net/act_api.h>
 
 struct tcf_pedit {
-	struct tcf_common	common;
+	struct tc_action	common;
 	unsigned char		tcfp_nkeys;
 	unsigned char		tcfp_flags;
 	struct tc_pedit_key	*tcfp_keys;
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 9e0548998327..5767e9dbcf92 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -23,7 +23,7 @@
 #include <linux/tc_act/tc_skbedit.h>
 
 struct tcf_skbedit {
-	struct tcf_common	common;
+	struct tc_action	common;
 	u32		flags;
 	u32		priority;
 	u32		mark;
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 584b80788d52..e29f52e8bdf1 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -16,7 +16,7 @@
 #define VLAN_F_PUSH		0x2
 
 struct tcf_vlan {
-	struct tcf_common	common;
+	struct tc_action	common;
 	int			tcfv_action;
 	u16			tcfv_push_vid;
 	__be16			tcfv_push_proto;
-- 
cgit 


From 9ff26e9fabaf52f28fb5e875c0b9ffc2d1512039 Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 26 Jul 2016 08:47:18 +0200
Subject: tipc: introduce constants for tipc address validation

In this commit, we introduce defines for tipc address size,
offset and mask specification for Zone.Cluster.Node.
There is no functional change in this commit.

Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 6f71b9b41595..bf049e8fe31b 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -60,26 +60,48 @@ struct tipc_name_seq {
 	__u32 upper;
 };
 
+/* TIPC Address Size, Offset, Mask specification for Z.C.N
+ */
+#define TIPC_NODE_BITS          12
+#define TIPC_CLUSTER_BITS       12
+#define TIPC_ZONE_BITS          8
+
+#define TIPC_NODE_OFFSET        0
+#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
+#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
+
+#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
+#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
+#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
+
+#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
+#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
+#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
+
+#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
+
 static inline __u32 tipc_addr(unsigned int zone,
 			      unsigned int cluster,
 			      unsigned int node)
 {
-	return (zone << 24) | (cluster << 12) | node;
+	return (zone << TIPC_ZONE_OFFSET) |
+		(cluster << TIPC_CLUSTER_OFFSET) |
+		node;
 }
 
 static inline unsigned int tipc_zone(__u32 addr)
 {
-	return addr >> 24;
+	return addr >> TIPC_ZONE_OFFSET;
 }
 
 static inline unsigned int tipc_cluster(__u32 addr)
 {
-	return (addr >> 12) & 0xfff;
+	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
 }
 
 static inline unsigned int tipc_node(__u32 addr)
 {
-	return addr & 0xfff;
+	return addr & TIPC_NODE_MASK;
 }
 
 /*
-- 
cgit 


From 7b3f52296493656015f0c0deddb6e90e36b9cda2 Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 26 Jul 2016 08:47:19 +0200
Subject: tipc: make cluster size threshold for monitoring configurable

In this commit, we introduce support to configure the minimum
threshold to activate the new link monitoring algorithm.

Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d4c8f142ba63..d387b65a0d97 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -56,6 +56,7 @@ enum {
 	TIPC_NL_NET_GET,
 	TIPC_NL_NET_SET,
 	TIPC_NL_NAME_TABLE_GET,
+	TIPC_NL_MON_SET,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
@@ -72,6 +73,7 @@ enum {
 	TIPC_NLA_NODE,			/* nest */
 	TIPC_NLA_NET,			/* nest */
 	TIPC_NLA_NAME_TABLE,		/* nest */
+	TIPC_NLA_MON,			/* nest */
 
 	__TIPC_NLA_MAX,
 	TIPC_NLA_MAX = __TIPC_NLA_MAX - 1
@@ -166,6 +168,15 @@ enum {
 	TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1
 };
 
+/* Monitor info */
+enum {
+	TIPC_NLA_MON_UNSPEC,
+	TIPC_NLA_MON_ACTIVATION_THRESHOLD,	/* u32 */
+
+	__TIPC_NLA_MON_MAX,
+	TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1
+};
+
 /* Publication info */
 enum {
 	TIPC_NLA_PUBL_UNSPEC,
-- 
cgit 


From bf1035b2ff5296c7c49e262152253ce29d87e82d Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 26 Jul 2016 08:47:20 +0200
Subject: tipc: get monitor threshold for the cluster

In this commit, we add support to fetch the configured
cluster monitoring threshold.

Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d387b65a0d97..d07c6ec76062 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -57,6 +57,7 @@ enum {
 	TIPC_NL_NET_SET,
 	TIPC_NL_NAME_TABLE_GET,
 	TIPC_NL_MON_SET,
+	TIPC_NL_MON_GET,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
-- 
cgit 


From cf6f7e1d51090772d5ff7355aaf0fcff17f20d1a Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 26 Jul 2016 08:47:22 +0200
Subject: tipc: dump monitor attributes

In this commit, we dump the monitor attributes when queried.
The link monitor attributes are separated into two kinds:
1. general attributes per bearer
2. specific attributes per node/peer
This style resembles the socket attributes and the nametable
publications per socket.

Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d07c6ec76062..5f3f6d09fb79 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -58,6 +58,7 @@ enum {
 	TIPC_NL_NAME_TABLE_GET,
 	TIPC_NL_MON_SET,
 	TIPC_NL_MON_GET,
+	TIPC_NL_MON_PEER_GET,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
@@ -75,6 +76,7 @@ enum {
 	TIPC_NLA_NET,			/* nest */
 	TIPC_NLA_NAME_TABLE,		/* nest */
 	TIPC_NLA_MON,			/* nest */
+	TIPC_NLA_MON_PEER,		/* nest */
 
 	__TIPC_NLA_MAX,
 	TIPC_NLA_MAX = __TIPC_NLA_MAX - 1
@@ -173,6 +175,11 @@ enum {
 enum {
 	TIPC_NLA_MON_UNSPEC,
 	TIPC_NLA_MON_ACTIVATION_THRESHOLD,	/* u32 */
+	TIPC_NLA_MON_REF,			/* u32 */
+	TIPC_NLA_MON_ACTIVE,			/* flag */
+	TIPC_NLA_MON_BEARER_NAME,		/* string */
+	TIPC_NLA_MON_PEERCNT,			/* u32 */
+	TIPC_NLA_MON_LISTGEN,			/* u32 */
 
 	__TIPC_NLA_MON_MAX,
 	TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1
@@ -194,6 +201,24 @@ enum {
 	TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1
 };
 
+/* Monitor peer info */
+enum {
+	TIPC_NLA_MON_PEER_UNSPEC,
+
+	TIPC_NLA_MON_PEER_ADDR,			/* u32 */
+	TIPC_NLA_MON_PEER_DOMGEN,		/* u32 */
+	TIPC_NLA_MON_PEER_APPLIED,		/* u32 */
+	TIPC_NLA_MON_PEER_UPMAP,		/* u64 */
+	TIPC_NLA_MON_PEER_MEMBERS,		/* tlv */
+	TIPC_NLA_MON_PEER_UP,			/* flag */
+	TIPC_NLA_MON_PEER_HEAD,			/* flag */
+	TIPC_NLA_MON_PEER_LOCAL,		/* flag */
+	TIPC_NLA_MON_PEER_PAD,			/* flag */
+
+	__TIPC_NLA_MON_PEER_MAX,
+	TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1
+};
+
 /* Nest, connection info */
 enum {
 	TIPC_NLA_CON_UNSPEC,
-- 
cgit