From c4cbaf7973a794839af080f13748335976cf3f3f Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 9 Jun 2018 09:14:42 +0300
Subject: cfg80211: Add support for HE

Add support for the HE in cfg80211 and also add userspace API to
nl80211 to send rate information out, conforming with P802.11ax_D2.0.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/net/cfg80211.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 104 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5fbfe61f41c6..9ba1f289c439 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -285,6 +285,41 @@ struct ieee80211_sta_vht_cap {
 	struct ieee80211_vht_mcs_info vht_mcs;
 };
 
+#define IEEE80211_HE_PPE_THRES_MAX_LEN		25
+
+/**
+ * struct ieee80211_sta_he_cap - STA's HE capabilities
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11ax HE capabilities for a STA.
+ *
+ * @has_he: true iff HE data is valid.
+ * @he_cap_elem: Fixed portion of the HE capabilities element.
+ * @he_mcs_nss_supp: The supported NSS/MCS combinations.
+ * @ppe_thres: Holds the PPE Thresholds data.
+ */
+struct ieee80211_sta_he_cap {
+	bool has_he;
+	struct ieee80211_he_cap_elem he_cap_elem;
+	struct ieee80211_he_mcs_nss_supp he_mcs_nss_supp;
+	u8 ppe_thres[IEEE80211_HE_PPE_THRES_MAX_LEN];
+};
+
+/**
+ * struct ieee80211_sband_iftype_data
+ *
+ * This structure encapsulates sband data that is relevant for the
+ * interface types defined in @types_mask.  Each type in the
+ * @types_mask must be unique across all instances of iftype_data.
+ *
+ * @types_mask: interface types mask
+ * @he_cap: holds the HE capabilities
+ */
+struct ieee80211_sband_iftype_data {
+	u16 types_mask;
+	struct ieee80211_sta_he_cap he_cap;
+};
+
 /**
  * struct ieee80211_supported_band - frequency band definition
  *
@@ -301,6 +336,11 @@ struct ieee80211_sta_vht_cap {
  * @n_bitrates: Number of bitrates in @bitrates
  * @ht_cap: HT capabilities in this band
  * @vht_cap: VHT capabilities in this band
+ * @n_iftype_data: number of iftype data entries
+ * @iftype_data: interface type data entries.  Note that the bits in
+ *	@types_mask inside this structure cannot overlap (i.e. only
+ *	one occurrence of each type is allowed across all instances of
+ *	iftype_data).
  */
 struct ieee80211_supported_band {
 	struct ieee80211_channel *channels;
@@ -310,8 +350,55 @@ struct ieee80211_supported_band {
 	int n_bitrates;
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
+	u16 n_iftype_data;
+	const struct ieee80211_sband_iftype_data *iftype_data;
 };
 
+/**
+ * ieee80211_get_sband_iftype_data - return sband data for a given iftype
+ * @sband: the sband to search for the STA on
+ * @iftype: enum nl80211_iftype
+ *
+ * Return: pointer to struct ieee80211_sband_iftype_data, or NULL is none found
+ */
+static inline const struct ieee80211_sband_iftype_data *
+ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
+				u8 iftype)
+{
+	int i;
+
+	if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
+		return NULL;
+
+	for (i = 0; i < sband->n_iftype_data; i++)  {
+		const struct ieee80211_sband_iftype_data *data =
+			&sband->iftype_data[i];
+
+		if (data->types_mask & BIT(iftype))
+			return data;
+	}
+
+	return NULL;
+}
+
+/**
+ * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
+ * @sband: the sband to search for the STA on
+ *
+ * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
+ */
+static inline const struct ieee80211_sta_he_cap *
+ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
+{
+	const struct ieee80211_sband_iftype_data *data =
+		ieee80211_get_sband_iftype_data(sband, NL80211_IFTYPE_STATION);
+
+	if (data && data->he_cap.has_he)
+		return &data->he_cap;
+
+	return NULL;
+}
+
 /**
  * wiphy_read_of_freq_limits - read frequency limits from device tree
  *
@@ -899,6 +986,8 @@ enum station_parameters_apply_mask {
  * @opmode_notif: operating mode field from Operating Mode Notification
  * @opmode_notif_used: information if operating mode field is used
  * @support_p2p_ps: information if station supports P2P PS mechanism
+ * @he_capa: HE capabilities of station
+ * @he_capa_len: the length of the HE capabilities
  */
 struct station_parameters {
 	const u8 *supported_rates;
@@ -926,6 +1015,8 @@ struct station_parameters {
 	u8 opmode_notif;
 	bool opmode_notif_used;
 	int support_p2p_ps;
+	const struct ieee80211_he_cap_elem *he_capa;
+	u8 he_capa_len;
 };
 
 /**
@@ -1000,12 +1091,14 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
+ * @RATE_INFO_FLAGS_HE_MCS: HE MCS information
  */
 enum rate_info_flags {
 	RATE_INFO_FLAGS_MCS			= BIT(0),
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
 	RATE_INFO_FLAGS_SHORT_GI		= BIT(2),
 	RATE_INFO_FLAGS_60G			= BIT(3),
+	RATE_INFO_FLAGS_HE_MCS			= BIT(4),
 };
 
 /**
@@ -1019,6 +1112,7 @@ enum rate_info_flags {
  * @RATE_INFO_BW_40: 40 MHz bandwidth
  * @RATE_INFO_BW_80: 80 MHz bandwidth
  * @RATE_INFO_BW_160: 160 MHz bandwidth
+ * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
  */
 enum rate_info_bw {
 	RATE_INFO_BW_20 = 0,
@@ -1027,6 +1121,7 @@ enum rate_info_bw {
 	RATE_INFO_BW_40,
 	RATE_INFO_BW_80,
 	RATE_INFO_BW_160,
+	RATE_INFO_BW_HE_RU,
 };
 
 /**
@@ -1035,10 +1130,14 @@ enum rate_info_bw {
  * Information about a receiving or transmitting bitrate
  *
  * @flags: bitflag of flags from &enum rate_info_flags
- * @mcs: mcs index if struct describes a 802.11n bitrate
+ * @mcs: mcs index if struct describes an HT/VHT/HE rate
  * @legacy: bitrate in 100kbit/s for 802.11abg
- * @nss: number of streams (VHT only)
+ * @nss: number of streams (VHT & HE only)
  * @bw: bandwidth (from &enum rate_info_bw)
+ * @he_gi: HE guard interval (from &enum nl80211_he_gi)
+ * @he_dcm: HE DCM value
+ * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc,
+ *	only valid if bw is %RATE_INFO_BW_HE_RU)
  */
 struct rate_info {
 	u8 flags;
@@ -1046,6 +1145,9 @@ struct rate_info {
 	u16 legacy;
 	u8 nss;
 	u8 bw;
+	u8 he_gi;
+	u8 he_dcm;
+	u8 he_ru_alloc;
 };
 
 /**
-- 
cgit 


From 95a28eeaf1491bcb8bf521bad4784683333705ee Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 9 Jun 2018 09:14:43 +0300
Subject: radiotap: add structs for HE

Add radiotap structures for HE.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/net/ieee80211_radiotap.h | 123 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 960236fb1681..feef706e1158 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2017		Intel Deutschland GmbH
+ * Copyright (c) 2018		Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -72,6 +73,8 @@ enum ieee80211_radiotap_presence {
 	IEEE80211_RADIOTAP_AMPDU_STATUS = 20,
 	IEEE80211_RADIOTAP_VHT = 21,
 	IEEE80211_RADIOTAP_TIMESTAMP = 22,
+	IEEE80211_RADIOTAP_HE = 23,
+	IEEE80211_RADIOTAP_HE_MU = 24,
 
 	/* valid in every it_present bitmap, even vendor namespaces */
 	IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
@@ -202,6 +205,126 @@ enum ieee80211_radiotap_timestamp_flags {
 	IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY = 0x02,
 };
 
+struct ieee80211_radiotap_he {
+	__le16 data1, data2, data3, data4, data5, data6;
+};
+
+enum ieee80211_radiotap_he_bits {
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_MASK		= 3,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_SU		= 0,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_EXT_SU	= 1,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_MU		= 2,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_TRIG		= 3,
+
+	IEEE80211_RADIOTAP_HE_DATA1_BSS_COLOR_KNOWN	= 0x0004,
+	IEEE80211_RADIOTAP_HE_DATA1_BEAM_CHANGE_KNOWN	= 0x0008,
+	IEEE80211_RADIOTAP_HE_DATA1_UL_DL_KNOWN		= 0x0010,
+	IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN	= 0x0020,
+	IEEE80211_RADIOTAP_HE_DATA1_DATA_DCM_KNOWN	= 0x0040,
+	IEEE80211_RADIOTAP_HE_DATA1_CODING_KNOWN	= 0x0080,
+	IEEE80211_RADIOTAP_HE_DATA1_LDPC_XSYMSEG_KNOWN	= 0x0100,
+	IEEE80211_RADIOTAP_HE_DATA1_STBC_KNOWN		= 0x0200,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE_KNOWN	= 0x0400,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE2_KNOWN	= 0x0800,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE3_KNOWN	= 0x1000,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE4_KNOWN	= 0x2000,
+	IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN	= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA1_DOPPLER_KNOWN	= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_KNOWN	= 0x0001,
+	IEEE80211_RADIOTAP_HE_DATA2_GI_KNOWN		= 0x0002,
+	IEEE80211_RADIOTAP_HE_DATA2_NUM_LTF_SYMS_KNOWN	= 0x0004,
+	IEEE80211_RADIOTAP_HE_DATA2_PRE_FEC_PAD_KNOWN	= 0x0008,
+	IEEE80211_RADIOTAP_HE_DATA2_TXBF_KNOWN		= 0x0010,
+	IEEE80211_RADIOTAP_HE_DATA2_PE_DISAMBIG_KNOWN	= 0x0020,
+	IEEE80211_RADIOTAP_HE_DATA2_TXOP_KNOWN		= 0x0040,
+	IEEE80211_RADIOTAP_HE_DATA2_MIDAMBLE_KNOWN	= 0x0080,
+	IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET		= 0x3f00,
+	IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET_KNOWN	= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_SEC	= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA3_BSS_COLOR		= 0x003f,
+	IEEE80211_RADIOTAP_HE_DATA3_BEAM_CHANGE		= 0x0040,
+	IEEE80211_RADIOTAP_HE_DATA3_UL_DL		= 0x0080,
+	IEEE80211_RADIOTAP_HE_DATA3_DATA_MCS		= 0x0f00,
+	IEEE80211_RADIOTAP_HE_DATA3_DATA_DCM		= 0x1000,
+	IEEE80211_RADIOTAP_HE_DATA3_CODING		= 0x2000,
+	IEEE80211_RADIOTAP_HE_DATA3_LDPC_XSYMSEG	= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA3_STBC		= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA4_SU_MU_SPTL_REUSE	= 0x000f,
+	IEEE80211_RADIOTAP_HE_DATA4_MU_STA_ID		= 0x7ff0,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE1	= 0x000f,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE2	= 0x00f0,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE3	= 0x0f00,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE4	= 0xf000,
+
+	IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC	= 0x000f,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ	= 0,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ	= 1,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ	= 2,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ	= 3,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_26T	= 4,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_52T	= 5,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_106T	= 6,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_242T	= 7,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_484T	= 8,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_996T	= 9,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_2x996T	= 10,
+
+	IEEE80211_RADIOTAP_HE_DATA5_GI			= 0x0030,
+		IEEE80211_RADIOTAP_HE_DATA5_GI_0_8			= 0,
+		IEEE80211_RADIOTAP_HE_DATA5_GI_1_6			= 1,
+		IEEE80211_RADIOTAP_HE_DATA5_GI_3_2			= 2,
+
+	IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE		= 0x00c0,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN		= 0,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_1X			= 1,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_2X			= 2,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_4X			= 3,
+	IEEE80211_RADIOTAP_HE_DATA5_NUM_LTF_SYMS	= 0x0700,
+	IEEE80211_RADIOTAP_HE_DATA5_PRE_FEC_PAD		= 0x3000,
+	IEEE80211_RADIOTAP_HE_DATA5_TXBF		= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA5_PE_DISAMBIG		= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA6_NSTS		= 0x000f,
+	IEEE80211_RADIOTAP_HE_DATA6_DOPPLER		= 0x0010,
+	IEEE80211_RADIOTAP_HE_DATA6_TXOP		= 0x7f00,
+	IEEE80211_RADIOTAP_HE_DATA6_MIDAMBLE_PDCTY	= 0x8000,
+};
+
+struct ieee80211_radiotap_he_mu {
+	__le16 flags1, flags2;
+	u8 ru_ch1[4];
+	u8 ru_ch2[4];
+};
+
+enum ieee80211_radiotap_he_mu_bits {
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS		= 0x000f,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS_KNOWN		= 0x0010,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM		= 0x0020,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM_KNOWN		= 0x0040,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_CTR_26T_RU_KNOWN	= 0x0080,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_RU_KNOWN		= 0x0100,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_RU_KNOWN		= 0x0200,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU_KNOWN	= 0x1000,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU		= 0x2000,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_COMP_KNOWN	= 0x4000,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_SYMS_USERS_KNOWN	= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW	= 0x0003,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_20MHZ	= 0x0000,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_40MHZ	= 0x0001,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_80MHZ	= 0x0002,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_160MHZ	= 0x0003,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_KNOWN	= 0x0004,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_COMP		= 0x0008,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_SYMS_USERS	= 0x00f0,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW	= 0x0300,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW_KNOWN= 0x0400,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU		= 0x0800,
+};
+
 /**
  * ieee80211_get_radiotap_len - get radiotap header length
  */
-- 
cgit 


From 41cbb0f5a29592874355e4159489eb08337cd50e Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 9 Jun 2018 09:14:44 +0300
Subject: mac80211: add support for HE

Add support for HE in mac80211 conforming with P802.11ax_D1.4.

Johannes: Fix another bug with the buf_size comparison in agg-rx.c.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 64 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 851a5e19ae32..5790f55c241d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -23,6 +23,7 @@
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
 #include <net/codel.h>
+#include <net/ieee80211_radiotap.h>
 #include <asm/unaligned.h>
 
 /**
@@ -162,6 +163,8 @@ enum ieee80211_ac_numbers {
  * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled
  * @acm: is mandatory admission control required for the access category
  * @uapsd: is U-APSD mode enabled for the queue
+ * @mu_edca: is the MU EDCA configured
+ * @mu_edca_param_rec: MU EDCA Parameter Record for HE
  */
 struct ieee80211_tx_queue_params {
 	u16 txop;
@@ -170,6 +173,8 @@ struct ieee80211_tx_queue_params {
 	u8 aifs;
 	bool acm;
 	bool uapsd;
+	bool mu_edca;
+	struct ieee80211_he_mu_edca_param_ac_rec mu_edca_param_rec;
 };
 
 struct ieee80211_low_level_stats {
@@ -463,6 +468,15 @@ struct ieee80211_mu_group_data {
  * This structure keeps information about a BSS (and an association
  * to that BSS) that can change during the lifetime of the BSS.
  *
+ * @bss_color: 6-bit value to mark inter-BSS frame, if BSS supports HE
+ * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
+ * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK
+ * @uora_exists: is the UORA element advertised by AP
+ * @ack_enabled: indicates support to receive a multi-TID that solicits either
+ *	ACK, BACK or both
+ * @uora_ocw_range: UORA element's OCW Range field
+ * @frame_time_rts_th: HE duration RTS threshold, in units of 32us
+ * @he_support: does this BSS support HE
  * @assoc: association status
  * @ibss_joined: indicates whether this station is part of an IBSS
  *	or not
@@ -550,6 +564,14 @@ struct ieee80211_mu_group_data {
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
+	u8 bss_color;
+	u8 htc_trig_based_pkt_ext;
+	bool multi_sta_back_32bit;
+	bool uora_exists;
+	bool ack_enabled;
+	u8 uora_ocw_range;
+	u16 frame_time_rts_th;
+	bool he_support;
 	/* association related data */
 	bool assoc, ibss_joined;
 	bool ibss_creator;
@@ -1106,6 +1128,18 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
  *	frame
  * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
+ * @RX_FLAG_RADIOTAP_HE: HE radiotap data is present
+ *	(&struct ieee80211_radiotap_he, mac80211 will fill in
+ *	 - DATA3_DATA_MCS
+ *	 - DATA3_DATA_DCM
+ *	 - DATA3_CODING
+ *	 - DATA5_GI
+ *	 - DATA5_DATA_BW_RU_ALLOC
+ *	 - DATA6_NSTS
+ *	 - DATA3_STBC
+ *	from the RX info data, so leave those zeroed when building this data)
+ * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present
+ *	(&struct ieee80211_radiotap_he_mu)
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1134,6 +1168,8 @@ enum mac80211_rx_flags {
 	RX_FLAG_ICV_STRIPPED		= BIT(23),
 	RX_FLAG_AMPDU_EOF_BIT		= BIT(24),
 	RX_FLAG_AMPDU_EOF_BIT_KNOWN	= BIT(25),
+	RX_FLAG_RADIOTAP_HE		= BIT(26),
+	RX_FLAG_RADIOTAP_HE_MU		= BIT(27),
 };
 
 /**
@@ -1164,6 +1200,7 @@ enum mac80211_rx_encoding {
 	RX_ENC_LEGACY = 0,
 	RX_ENC_HT,
 	RX_ENC_VHT,
+	RX_ENC_HE,
 };
 
 /**
@@ -1198,6 +1235,9 @@ enum mac80211_rx_encoding {
  * @encoding: &enum mac80211_rx_encoding
  * @bw: &enum rate_info_bw
  * @enc_flags: uses bits from &enum mac80211_rx_encoding_flags
+ * @he_ru: HE RU, from &enum nl80211_he_ru_alloc
+ * @he_gi: HE GI, from &enum nl80211_he_gi
+ * @he_dcm: HE DCM value
  * @rx_flags: internal RX flags for mac80211
  * @ampdu_reference: A-MPDU reference number, must be a different value for
  *	each A-MPDU but the same for each subframe within one A-MPDU
@@ -1211,7 +1251,8 @@ struct ieee80211_rx_status {
 	u32 flag;
 	u16 freq;
 	u8 enc_flags;
-	u8 encoding:2, bw:3;
+	u8 encoding:2, bw:3, he_ru:3;
+	u8 he_gi:2, he_dcm:1;
 	u8 rate_idx;
 	u8 nss;
 	u8 rx_flags;
@@ -1770,6 +1811,7 @@ struct ieee80211_sta_rates {
  * @supp_rates: Bitmap of supported rates (per band)
  * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
  * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
+ * @he_cap: HE capabilities of this STA
  * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU
  *	that this station is allowed to transmit to us.
  *	Can be modified by driver.
@@ -1805,7 +1847,8 @@ struct ieee80211_sta {
 	u16 aid;
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
-	u8 max_rx_aggregation_subframes;
+	struct ieee80211_sta_he_cap he_cap;
+	u16 max_rx_aggregation_subframes;
 	bool wme;
 	u8 uapsd_queues;
 	u8 max_sp;
@@ -2196,10 +2239,11 @@ enum ieee80211_hw_flags {
  *	it shouldn't be set.
  *
  * @max_tx_aggregation_subframes: maximum number of subframes in an
- *	aggregate an HT driver will transmit. Though ADDBA will advertise
- *	a constant value of 64 as some older APs can crash if the window
- *	size is smaller (an example is LinkSys WRT120N with FW v1.0.07
- *	build 002 Jun 18 2012).
+ *	aggregate an HT/HE device will transmit. In HT AddBA we'll
+ *	advertise a constant value of 64 as some older APs crash if
+ *	the window size is smaller (an example is LinkSys WRT120N
+ *	with FW v1.0.07 build 002 Jun 18 2012).
+ *	For AddBA to HE capable peers this value will be used.
  *
  * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum
  *	of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list.
@@ -2216,6 +2260,8 @@ enum ieee80211_hw_flags {
  *	the default is _GI | _BANDWIDTH.
  *	Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values.
  *
+ * @radiotap_he: HE radiotap validity flags
+ *
  * @radiotap_timestamp: Information for the radiotap timestamp field; if the
  *	'units_pos' member is set to a non-negative value it must be set to
  *	a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a
@@ -2263,8 +2309,8 @@ struct ieee80211_hw {
 	u8 max_rates;
 	u8 max_report_rates;
 	u8 max_rate_tries;
-	u8 max_rx_aggregation_subframes;
-	u8 max_tx_aggregation_subframes;
+	u16 max_rx_aggregation_subframes;
+	u16 max_tx_aggregation_subframes;
 	u8 max_tx_fragments;
 	u8 offchannel_tx_hw_queue;
 	u8 radiotap_mcs_details;
@@ -2904,7 +2950,7 @@ struct ieee80211_ampdu_params {
 	struct ieee80211_sta *sta;
 	u16 tid;
 	u16 ssn;
-	u8 buf_size;
+	u16 buf_size;
 	bool amsdu;
 	u16 timeout;
 };
-- 
cgit 


From 0eb71a9da5796851fa87ddc1a534066c0fe54055 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: split rhashtable.h

Due to the use of rhashtables in net namespaces,
rhashtable.h is included in lots of the kernel,
so a small changes can required a large recompilation.
This makes development painful.

This patch splits out rhashtable-types.h which just includes
the major type declarations, and does not include (non-trivial)
inline code.  rhashtable.h is no longer included by anything
in the include/ directory.
Common include files only include rhashtable-types.h so a large
recompilation is only triggered when that changes.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h               | 2 +-
 include/net/netfilter/nf_flow_table.h | 2 +-
 include/net/sctp/structs.h            | 2 +-
 include/net/seg6.h                    | 2 +-
 include/net/seg6_hmac.h               | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index ed07e3786d98..f4272a29dc44 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -2,7 +2,7 @@
 #ifndef __NET_FRAG_H__
 #define __NET_FRAG_H__
 
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 struct netns_frags {
 	/* sysctls */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ba9fa4592f2b..0e355f4a3d76 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -4,7 +4,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/rcupdate.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/dst.h>
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index dbe1b911a24d..e0f962d27386 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -48,7 +48,7 @@
 #define __sctp_structs_h__
 
 #include <linux/ktime.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/socket.h>	/* linux/in.h needs this!!    */
 #include <linux/in.h>		/* We get struct sockaddr_in. */
 #include <linux/in6.h>		/* We get struct in6_addr     */
diff --git a/include/net/seg6.h b/include/net/seg6.h
index e029e301faa5..2567941a2f32 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -18,7 +18,7 @@
 #include <linux/ipv6.h>
 #include <net/lwtunnel.h>
 #include <linux/seg6.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
 				     __be32 to)
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
index 69c3a106056b..7fda469e2758 100644
--- a/include/net/seg6_hmac.h
+++ b/include/net/seg6_hmac.h
@@ -22,7 +22,7 @@
 #include <linux/route.h>
 #include <net/seg6.h>
 #include <linux/seg6_hmac.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 #define SEG6_HMAC_MAX_DIGESTSIZE	160
 #define SEG6_HMAC_RING_SIZE		256
-- 
cgit 


From cadefe5f584abaac40dce72009e4de738cbff467 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 20 Jun 2018 16:07:35 -0400
Subject: tcp_bbr: fix bbr pacing rate for internal pacing

This commit makes BBR use only the MSS (without any headers) to
calculate pacing rates when internal TCP-layer pacing is used.

This is necessary to achieve the correct pacing behavior in this case,
since tcp_internal_pacing() uses only the payload length to calculate
pacing delays.

Signed-off-by: Kevin Yang <yyd@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0448e7c5d2b4..822ee49ed0f9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1184,6 +1184,17 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
 	return tp->is_cwnd_limited;
 }
 
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static inline bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
 /* Something is really bad, we could not queue an additional packet,
  * because qdisc is full or receiver sent a 0 window.
  * We do not want to add fuel to the fire, or abort too early,
-- 
cgit 


From 5424ea27390f1f8903e5de0eaa0c5b561e8e877a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Jun 2018 16:27:47 -0700
Subject: netns: get more entropy from net_hash_mix()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct net are effectively allocated from order-1 pages on x86,
with one object per slab, meaning that the 13 low order bits
of their addresses are zero.

Once shifted by L1_CACHE_SHIFT, this leaves 7 zero-bits,
meaning that net_hash_mix() does not help spreading
objects on various hash tables.

For example, TCP listen table has 32 buckets, meaning that
all netns use the same bucket for port 80 or port 443.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/hash.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index 24c78183a4c2..16a842456189 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -9,12 +9,7 @@ struct net;
 static inline u32 net_hash_mix(const struct net *net)
 {
 #ifdef CONFIG_NET_NS
-	/*
-	 * shift this right to eliminate bits, that are
-	 * always zeroed
-	 */
-
-	return (u32)(((unsigned long)net) >> L1_CACHE_SHIFT);
+	return (u32)(((unsigned long)net) >> ilog2(sizeof(*net)));
 #else
 	return 0;
 #endif
-- 
cgit 


From 9b42c1f179a614e11893ae4619f0304a38f481ae Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 12:44:26 +0200
Subject: xfrm: Extend the output_mark to support input direction and masking.

We already support setting an output mark at the xfrm_state,
unfortunately this does not support the input direction and
masking the marks that will be applied to the skb. This change
adds support applying a masked value in both directions.

The existing XFRMA_OUTPUT_MARK number is reused for this purpose
and as it is now bi-directional, it is renamed to XFRMA_SET_MARK.

An additional XFRMA_SET_MARK_MASK attribute is added for setting the
mask. If the attribute mask not provided, it is set to 0xffffffff,
keeping the XFRMA_OUTPUT_MARK existing 'full mask' semantics.

Co-developed-by: Tobias Brunner <tobias@strongswan.org>
Co-developed-by: Eyal Birger <eyal.birger@gmail.com>
Co-developed-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Tobias Brunner <tobias@strongswan.org>
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/net/xfrm.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 557122846e0e..3dc83ba26f62 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -166,7 +166,7 @@ struct xfrm_state {
 		int		header_len;
 		int		trailer_len;
 		u32		extra_flags;
-		u32		output_mark;
+		struct xfrm_mark	smark;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
@@ -2012,6 +2012,13 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
 	return ret;
 }
 
+static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
+{
+	struct xfrm_mark *m = &x->props.smark;
+
+	return (m->v & m->m) | (mark & ~m->m);
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 				    unsigned int family)
 {
-- 
cgit 


From d159ce7957eec306eacda672e5909e26675ca8ef Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:06:57 +0200
Subject: flow: Extend flow informations with xfrm interface id.

Add a new flowi_xfrm structure with informations needed to do
a xfrm lookup. At the moment it keeps the informations about
the new xfrm interface id needed to lookup xfrm interfaces
that are introduced with a followup patch. We need this new
lookup key as other possible keys, like the ifindex is
already part of the xfrm selector and used as a key to
enforce the output device after the transformation in the
policy/state lookup.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Benedict Wong <benedictwong@google.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/flow.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 8ce21793094e..187c9bef672f 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,6 +26,10 @@ struct flowi_tunnel {
 	__be64			tun_id;
 };
 
+struct flowi_xfrm {
+	__u32			if_id;
+};
+
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -39,6 +43,7 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
+	struct flowi_xfrm xfrm;
 	kuid_t  flowic_uid;
 };
 
@@ -78,6 +83,7 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
+#define flowi4_xfrm		__fl_common.xfrm
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -109,6 +115,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
+	fl4->flowi4_xfrm.if_id = 0;
 	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
@@ -138,6 +145,7 @@ struct flowi6 {
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 #define flowi6_uid		__fl_common.flowic_uid
+#define flowi6_xfrm		__fl_common.xfrm
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -185,6 +193,7 @@ struct flowi {
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
 #define flowi_uid	u.__fl_common.flowic_uid
+#define flowi_xfrm	u.__fl_common.xfrm
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
-- 
cgit 


From 7e6526404adedf079279aa7aa11722deaca8fe2e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:07:07 +0200
Subject: xfrm: Add a new lookup key to match xfrm interfaces.

This patch adds the xfrm interface id as a lookup key
for xfrm states and policies. With this we can assign
states and policies to virtual xfrm interfaces.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Benedict Wong <benedictwong@google.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/xfrm.h | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3dc83ba26f62..e8bada4d2a45 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -147,6 +147,7 @@ struct xfrm_state {
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
+	u32			if_id;
 	u32			tfcpad;
 
 	u32			genid;
@@ -574,6 +575,7 @@ struct xfrm_policy {
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
+	u32			if_id;
 	struct xfrm_mark	mark;
 	struct xfrm_selector	selector;
 	struct xfrm_lifetime_cfg lft;
@@ -1533,7 +1535,7 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
 				   struct xfrm_tmpl *tmpl,
 				   struct xfrm_policy *pol, int *err,
 				   unsigned short family);
-struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark,
+struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 				       xfrm_address_t *daddr,
 				       xfrm_address_t *saddr,
 				       unsigned short family,
@@ -1690,20 +1692,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 		     void *);
 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
 					  u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
-				     u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
+				     int dir, u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
-				 u8 mode, u32 reqid, u8 proto,
+				 u8 mode, u32 reqid, u32 if_id, u8 proto,
 				 const xfrm_address_t *daddr,
 				 const xfrm_address_t *saddr, int create,
 				 unsigned short family);
@@ -2019,6 +2021,15 @@ static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
 	return (m->v & m->m) | (mark & ~m->m);
 }
 
+static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id)
+{
+	int ret = 0;
+
+	if (if_id)
+		ret = nla_put_u32(skb, XFRMA_IF_ID, if_id);
+	return ret;
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 				    unsigned int family)
 {
-- 
cgit 


From f203b76d78092faf248db3f851840fbecf80b40e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:07:12 +0200
Subject: xfrm: Add virtual xfrm interfaces

This patch adds support for virtual xfrm interfaces.
Packets that are routed through such an interface
are guaranteed to be IPsec transformed or dropped.
It is a generic virtual interface that ensures IPsec
transformation, no need to know what happens behind
the interface. This means that we can tunnel IPv4 and
IPv6 through the same interface and support all xfrm
modes (tunnel, transport and beet) on it.

Co-developed-by: Lorenzo Colitti <lorenzo@google.com>
Co-developed-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/xfrm.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e8bada4d2a45..3fa578a6a819 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -23,6 +23,7 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/flow.h>
+#include <net/gro_cells.h>
 
 #include <linux/interrupt.h>
 
@@ -293,6 +294,13 @@ struct xfrm_replay {
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
+struct xfrm_if_cb {
+	struct xfrm_if	*(*decode_session)(struct sk_buff *skb);
+};
+
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
+void xfrm_if_unregister_cb(void);
+
 struct net_device;
 struct xfrm_type;
 struct xfrm_dst;
@@ -1039,6 +1047,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
+struct xfrm_if_parms {
+	char name[IFNAMSIZ];	/* name of XFRM device */
+	int link;		/* ifindex of underlying L2 interface */
+	u32 if_id;		/* interface identifyer */
+};
+
+struct xfrm_if {
+	struct xfrm_if __rcu *next;	/* next interface in list */
+	struct net_device *dev;		/* virtual device associated with interface */
+	struct net_device *phydev;	/* physical device */
+	struct net *net;		/* netns for packet i/o */
+	struct xfrm_if_parms p;		/* interface parms */
+
+	struct gro_cells gro_cells;
+};
+
 struct xfrm_offload {
 	/* Output sequence number for replay protection on offloading. */
 	struct {
-- 
cgit 


From e4db5b61c572475bbbcf63e3c8a2606bfccf2c9d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:26:02 +0200
Subject: xfrm: policy: remove pcpu policy cache

Kristian Evensen says:
  In a project I am involved in, we are running ipsec (Strongswan) on
  different mt7621-based routers. Each router is configured as an
  initiator and has around ~30 tunnels to different responders (running
  on misc. devices). Before the flow cache was removed (kernel 4.9), we
  got a combined throughput of around 70Mbit/s for all tunnels on one
  router. However, we recently switched to kernel 4.14 (4.14.48), and
  the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
  drop of around 20%. Reverting the flow cache removal restores, as
  expected, performance levels to that of kernel 4.9.

When pcpu xdst exists, it has to be validated first before it can be
used.

A negative hit thus increases cost vs. no-cache.

As number of tunnels increases, hit rate decreases so this pcpu caching
isn't a viable strategy.

Furthermore, the xdst cache also needs to run with BH off, so when
removing this the bh disable/enable pairs can be removed too.

Kristian tested a 4.14.y backport of this change and reported
increased performance:

  In our tests, the throughput reduction has been reduced from around -20%
  to -5%. We also see that the overall throughput is independent of the
  number of tunnels, while before the throughput was reduced as the number
  of tunnels increased.

Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3fa578a6a819..a5378613a49c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -332,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
-void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
-- 
cgit 


From d4546c2509b1e9cd082e3682dcec98472e37ee5a Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 24 Jun 2018 14:13:49 +0900
Subject: net: Convert GRO SKB handling to list_head.

Manage pending per-NAPI GRO packets via list_head.

Return an SKB pointer from the GRO receive handlers.  When GRO receive
handlers return non-NULL, it means that this SKB needs to be completed
at this time and removed from the NAPI queue.

Several operations are greatly simplified by this transformation,
especially timing out the oldest SKB in the list when gro_count
exceeds MAX_GRO_SKBS, and napi_gro_flush() which walks the queue
in reverse order.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_common.h | 2 +-
 include/net/tcp.h         | 2 +-
 include/net/udp.h         | 4 ++--
 include/net/udp_tunnel.h  | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 384b90c62c0b..3ca969cbd161 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -43,7 +43,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,
 		    int *addr_len);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb);
 int inet_gro_complete(struct sk_buff *skb, int nhoff);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 822ee49ed0f9..402a88b0e8a8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1788,7 +1788,7 @@ void tcp_v4_destroy_sock(struct sock *sk);
 
 struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 				netdev_features_t features);
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
 int tcp_gro_complete(struct sk_buff *skb);
 
 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
diff --git a/include/net/udp.h b/include/net/udp.h
index b1ea8b0f5e6a..5723c6128ae4 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -170,8 +170,8 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
 typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
 				     __be16 dport);
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-				 struct udphdr *uh, udp_lookup_t lookup);
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				struct udphdr *uh, udp_lookup_t lookup);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index b95a6927c718..fe680ab6b15a 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -65,9 +65,9 @@ static inline int udp_sock_create(struct net *net,
 
 typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
 typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
-typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk,
-						     struct sk_buff **head,
-						     struct sk_buff *skb);
+typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
+						    struct list_head *head,
+						    struct sk_buff *skb);
 typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb,
 					 int nhoff);
 
-- 
cgit 


From 60513bd82c825b659c05957e4f8106ba06f0797f Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:04 -0700
Subject: net: sched: pass extack pointer to block binds and cb registration

Pass the extact struct from a tc qdisc add to the block bind function and,
in turn, to the setup_tc ndo of binding device via the tc_block_offload
struct. Pass this back to any block callback registrations to allow
netlink logging of fails in the bind process.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3c1a2c47cd4..a2c6d35ba057 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -73,10 +73,11 @@ void tcf_block_cb_incref(struct tcf_block_cb *block_cb);
 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb);
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv);
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack);
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv);
+			  void *cb_priv, struct netlink_ext_ack *extack);
 void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
 			     tc_setup_cb_t *cb, void *cb_ident);
@@ -161,7 +162,8 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
 static inline
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv)
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack)
 {
 	return NULL;
 }
@@ -169,7 +171,7 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 static inline
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv)
+			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	return 0;
 }
@@ -596,6 +598,7 @@ struct tc_block_offload {
 	enum tc_block_command command;
 	enum tcf_block_binder_type binder_type;
 	struct tcf_block *block;
+	struct netlink_ext_ack *extack;
 };
 
 struct tc_cls_common_offload {
-- 
cgit 


From e56185c78b500ac4d08768278ad8a25d5b756942 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:05 -0700
Subject: net: sched: add tcf_proto_op to offload a rule

Create a new tcf_proto_op called 'reoffload' that generates a new offload
message for each node in a tcf_proto. Pointers to the tcf_proto and
whether the offload request is to add or delete the node are included.
Also included is a callback function to send the offload message to and
the option of priv data to go with the cb.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h     | 3 ---
 include/net/sch_generic.h | 6 ++++++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9e59ebfded62..5ff11adbe2a6 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -190,9 +190,6 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 #endif
 }
 
-typedef int tc_setup_cb_t(enum tc_setup_type type,
-			  void *type_data, void *cb_priv);
-
 #ifdef CONFIG_NET_CLS_ACT
 int tc_setup_cb_egdev_register(const struct net_device *dev,
 			       tc_setup_cb_t *cb, void *cb_priv);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6488daa32f82..18adc9142b18 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -20,6 +20,9 @@ struct qdisc_walker;
 struct tcf_walker;
 struct module;
 
+typedef int tc_setup_cb_t(enum tc_setup_type type,
+			  void *type_data, void *cb_priv);
+
 struct qdisc_rate_table {
 	struct tc_ratespec rate;
 	u32		data[256];
@@ -256,6 +259,9 @@ struct tcf_proto_ops {
 					  bool *last,
 					  struct netlink_ext_ack *);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
+	int			(*reoffload)(struct tcf_proto *tp, bool add,
+					     tc_setup_cb_t *cb, void *cb_priv,
+					     struct netlink_ext_ack *extack);
 	void			(*bind_class)(void *, u32, unsigned long);
 
 	/* rtnetlink specific */
-- 
cgit 


From 31533cba4327aefeafe8a7d57de0c737a3b2faa6 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:06 -0700
Subject: net: sched: cls_flower: implement offload tcf_proto_op

Add the reoffload tcf_proto_op in flower to generate an offload message
for each filter in the given tcf_proto. Call the specified callback with
this new offload message. The function only returns an error if the
callback rejects adding a 'hardware only' rule.

A filter contains a flag to indicate if it is in hardware or not. To
ensure the reoffload function properly maintains this flag, keep a
reference counter for the number of instances of the filter that are in
hardware. Only update the flag when this counter changes from or to 0. Add
a generic helper function to implement this behaviour.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 18adc9142b18..7432100027b7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -336,6 +336,21 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
 	block->offloadcnt--;
 }
 
+static inline void
+tc_cls_offload_cnt_update(struct tcf_block *block, unsigned int *cnt,
+			  u32 *flags, bool add)
+{
+	if (add) {
+		if (!*cnt)
+			tcf_block_offload_inc(block, flags);
+		(*cnt)++;
+	} else {
+		(*cnt)--;
+		if (!*cnt)
+			tcf_block_offload_dec(block, flags);
+	}
+}
+
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 {
 	struct qdisc_skb_cb *qcb;
-- 
cgit 


From 326367427cc09d38e4c1d145131ee2e228ac94c5 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:10 -0700
Subject: net: sched: call reoffload op on block callback reg

Call the reoffload tcf_proto_op on all tcf_proto nodes in all chains of a
block when a callback tries to register to a block that already has
offloaded rules. If all existing rules cannot be offloaded then the
registration is rejected. This replaces the previous policy of rejecting
such callback registration outright.

On unregistration of a callback, the rules are flushed for that given cb.
The implementation of block sharing in the NFP driver, for example,
duplicates shared rules to all devs bound to a block. This meant that
rules could still exist in hw even after a device is unbound from a block
(assuming the block still remains active).

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a2c6d35ba057..4070b8eb6d14 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -78,7 +78,8 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
 			  void *cb_priv, struct netlink_ext_ack *extack);
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
 			     tc_setup_cb_t *cb, void *cb_ident);
 
@@ -177,7 +178,8 @@ int tcf_block_cb_register(struct tcf_block *block,
 }
 
 static inline
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb)
 {
 }
 
-- 
cgit 


From d020d4559de9baf47cafa2669f29ea59d11a914c Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Wed, 27 Jun 2018 13:33:31 -0400
Subject: net sched actions: fix coding style in pedit headers

Fix coding style issues in tc pedit headers detected by the
checkpatch script.

Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_pedit.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 227a6f1d02f4..fac3ad4a86de 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -17,6 +17,7 @@ struct tcf_pedit {
 	struct tc_pedit_key	*tcfp_keys;
 	struct tcf_pedit_key_ex	*tcfp_keys_ex;
 };
+
 #define to_pedit(a) ((struct tcf_pedit *)a)
 
 static inline bool is_tcf_pedit(const struct tc_action *a)
-- 
cgit 


From f564650106a6e85702660fefd59fdff0877ab46a Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Wed, 27 Jun 2018 10:34:25 -0300
Subject: netfilter: check if the socket netns is correct.

Netfilter assumes that if the socket is present in the skb, then
it can be used because that reference is cleaned up while the skb
is crossing netns.

We want to change that to preserve the socket reference in a future
patch, so this is a preparation updating netfilter to check if the
socket netns matches before use it.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_log.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index e811ac07ea94..0d3920896d50 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -106,7 +106,8 @@ int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 			   u8 proto, int fragment, unsigned int offset,
 			   unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk);
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk);
 void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
 			       unsigned int hooknum, const struct sk_buff *skb,
 			       const struct net_device *in,
-- 
cgit 


From b0e9a2fe3ff971950833bc0ffc383babd9443bc4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 28 Jun 2018 15:31:00 +0800
Subject: sctp: add support for SCTP_REUSE_PORT sockopt

This feature is actually already supported by sk->sk_reuse which can be
set by socket level opt SO_REUSEADDR. But it's not working exactly as
RFC6458 demands in section 8.1.27, like:

  - This option only supports one-to-one style SCTP sockets
  - This socket option must not be used after calling bind()
    or sctp_bindx().

Besides, SCTP_REUSE_PORT sockopt should be provided for user's programs.
Otherwise, the programs with SCTP_REUSE_PORT from other systems will not
work in linux.

To separate it from the socket level version, this patch adds 'reuse' in
sctp_sock and it works pretty much as sk->sk_reuse, but with some extra
setup limitations that are needed when it is being enabled.

"It should be noted that the behavior of the socket-level socket option
to reuse ports and/or addresses for SCTP sockets is unspecified", so it
leaves SO_REUSEADDR as is for the compatibility.

Note that the name SCTP_REUSE_PORT is somewhat confusing, as its
functionality is nearly identical to SO_REUSEADDR, but with some
extra restrictions. Here it uses 'reuse' in sctp_sock instead of
'reuseport'. As for sk->sk_reuseport support for SCTP, it will be
added in another patch.

Thanks to Neil to make this clear.

v1->v2:
  - add sctp_sk->reuse to separate it from the socket level version.
v2->v3:
  - improve changelog according to Marcelo's suggestion.

Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e0f962d27386..701a51736fa5 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -220,6 +220,7 @@ struct sctp_sock {
 	__u32 adaptation_ind;
 	__u32 pd_point;
 	__u16	nodelay:1,
+		reuse:1,
 		disable_fragments:1,
 		v4mapped:1,
 		frag_interleave:1,
-- 
cgit 


From 256c87c17c53e60882a43dcf3e98f3bf859eaf6f Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Tue, 26 Jun 2018 21:39:36 -0700
Subject: net: check tunnel option type in tunnel flags

Check the tunnel option type stored in tunnel flags when creating options
for tunnels. Thereby ensuring we do not set geneve, vxlan or erspan tunnel
options on interfaces that are not associated with them.

Make sure all users of the infrastructure set correct flags, for the BPF
helper we have to set all bits to keep backward compatibility.

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 90ff430f5e9d..b0d022ff6ea1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -466,10 +466,12 @@ static inline void ip_tunnel_info_opts_get(void *to,
 }
 
 static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
-					   const void *from, int len)
+					   const void *from, int len,
+					   __be16 flags)
 {
 	memcpy(ip_tunnel_info_opts(info), from, len);
 	info->options_len = len;
+	info->key.tun_flags |= flags;
 }
 
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -511,9 +513,11 @@ static inline void ip_tunnel_info_opts_get(void *to,
 }
 
 static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
-					   const void *from, int len)
+					   const void *from, int len,
+					   __be16 flags)
 {
 	info->options_len = 0;
+	info->key.tun_flags |= flags;
 }
 
 #endif /* CONFIG_INET */
-- 
cgit 


From 0afff91c6f5ecef27715ea71e34dc2baacba1060 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Thu, 28 Jun 2018 19:05:05 +0200
Subject: net/smc: add pnetid support

s390 hardware supports the definition of a so-call Physical NETwork
IDentifier (short PNETID) per network device port. These PNETIDS
can be used to identify network devices that are attached to the same
physical network (broadcast domain).

On s390 try to use the PNETID of the ethernet device port used for
initial connecting, and derive the IB device port used for SMC RDMA
traffic.

On platforms without PNETID support fall back to the existing
solution of a configured pnet table.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index 8381d163fefa..2173932fab9d 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -11,6 +11,8 @@
 #ifndef _SMC_H
 #define _SMC_H
 
+#define SMC_MAX_PNETID_LEN	16	/* Max. length of PNET id */
+
 struct smc_hashinfo {
 	rwlock_t lock;
 	struct hlist_head ht;
-- 
cgit 


From c6ba7c9ba43de1b57e9a53946e7ff988554c84ed Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Thu, 28 Jun 2018 19:05:07 +0200
Subject: net/smc: add base infrastructure for SMC-D and ISM

SMC supports two variants: SMC-R and SMC-D. For data transport, SMC-R
uses RDMA devices, SMC-D uses so-called Internal Shared Memory (ISM)
devices. An ISM device only allows shared memory communication between
SMC instances on the same machine. For example, this allows virtual
machines on the same host to communicate via SMC without RDMA devices.

This patch adds the base infrastructure for SMC-D and ISM devices to
the existing SMC code. It contains the following:

* ISM driver interface:
  This interface allows an ISM driver to register ISM devices in SMC. In
  the process, the driver provides a set of device ops for each device.
  SMC uses these ops to execute SMC specific operations on or transfer
  data over the device.

* Core SMC-D link group, connection, and buffer support:
  Link groups, SMC connections and SMC buffers (in smc_core) are
  extended to support SMC-D.

* SMC type checks:
  Some type checks are added to prevent using SMC-R specific code for
  SMC-D and vice versa.

To actually use SMC-D, additional changes to pnetid, CLC, CDC, etc. are
required. These are added in follow-up patches.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index 2173932fab9d..824a7af8d654 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -20,4 +20,66 @@ struct smc_hashinfo {
 
 int smc_hash_sk(struct sock *sk);
 void smc_unhash_sk(struct sock *sk);
+
+/* SMCD/ISM device driver interface */
+struct smcd_dmb {
+	u64 dmb_tok;
+	u64 rgid;
+	u32 dmb_len;
+	u32 sba_idx;
+	u32 vlan_valid;
+	u32 vlan_id;
+	void *cpu_addr;
+	dma_addr_t dma_addr;
+};
+
+#define ISM_EVENT_DMB	0
+#define ISM_EVENT_GID	1
+#define ISM_EVENT_SWR	2
+
+struct smcd_event {
+	u32 type;
+	u32 code;
+	u64 tok;
+	u64 time;
+	u64 info;
+};
+
+struct smcd_dev;
+
+struct smcd_ops {
+	int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid,
+				u32 vid);
+	int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
+	int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
+	int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
+	int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
+	int (*set_vlan_required)(struct smcd_dev *dev);
+	int (*reset_vlan_required)(struct smcd_dev *dev);
+	int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq,
+			    u32 event_code, u64 info);
+	int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
+			 bool sf, unsigned int offset, void *data,
+			 unsigned int size);
+};
+
+struct smcd_dev {
+	const struct smcd_ops *ops;
+	struct device dev;
+	void *priv;
+	u64 local_gid;
+	struct list_head list;
+	spinlock_t lock;
+	struct smc_connection **conn;
+	struct list_head vlan;
+	struct workqueue_struct *event_wq;
+};
+
+struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+				const struct smcd_ops *ops, int max_dmbs);
+int smcd_register_dev(struct smcd_dev *smcd);
+void smcd_unregister_dev(struct smcd_dev *smcd);
+void smcd_free_dev(struct smcd_dev *smcd);
+void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event);
+void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit);
 #endif	/* _SMC_H */
-- 
cgit 


From 1619f770589a183af56f248de261534b255122de Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Thu, 28 Jun 2018 19:05:08 +0200
Subject: net/smc: add pnetid support for SMC-D and ISM

SMC-D relies on PNETIDs to find usable SMC-D/ISM devices for a SMC
connection. This patch adds SMC-D/ISM support to the current PNETID
implementation.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index 824a7af8d654..9ef49f8b1002 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -73,6 +73,7 @@ struct smcd_dev {
 	struct smc_connection **conn;
 	struct list_head vlan;
 	struct workqueue_struct *event_wq;
+	u8 pnetid[SMC_MAX_PNETID_LEN];
 };
 
 struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
-- 
cgit 


From 755c31cd85aea35cf7a5e7253851b52c08eff6e9 Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:26:51 -0700
Subject: net: sock: Change tx_queue_mapping in sock_common to unsigned short

Change 'skc_tx_queue_mapping' field in sock_common structure from
'int' to 'unsigned short' type with ~0 indicating unset and
other positive queue values being set. This will accommodate adding
a new 'unsigned short' field in sock_common in the next patch for
rx_queue_mapping.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index b3b75419eafe..37b09c84504b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -214,7 +214,7 @@ struct sock_common {
 		struct hlist_node	skc_node;
 		struct hlist_nulls_node skc_nulls_node;
 	};
-	int			skc_tx_queue_mapping;
+	unsigned short		skc_tx_queue_mapping;
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -1681,17 +1681,25 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
 
 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
 {
+	/* sk_tx_queue_mapping accept only upto a 16-bit value */
+	if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX))
+		return;
 	sk->sk_tx_queue_mapping = tx_queue;
 }
 
+#define NO_QUEUE_MAPPING	USHRT_MAX
+
 static inline void sk_tx_queue_clear(struct sock *sk)
 {
-	sk->sk_tx_queue_mapping = -1;
+	sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
 }
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-	return sk ? sk->sk_tx_queue_mapping : -1;
+	if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING)
+		return sk->sk_tx_queue_mapping;
+
+	return -1;
 }
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
-- 
cgit 


From c6345ce7d361dce1b5d02a2181ccb598c27fd7ae Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:26:57 -0700
Subject: net: Record receive queue number for a connection

This patch adds a new field to sock_common 'skc_rx_queue_mapping'
which holds the receive queue number for the connection. The Rx queue
is marked in tcp_finish_connect() to allow a client app to do
SO_INCOMING_NAPI_ID after a connect() call to get the right queue
association for a socket. Rx queue is also marked in tcp_conn_request()
to allow syn-ack to go on the right tx-queue associated with
the queue on which syn is received.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h |  1 +
 include/net/sock.h      | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/net')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c5187438af38..9e36fda652b7 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -151,6 +151,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id = skb->napi_id;
 #endif
+	sk_rx_queue_set(sk, skb);
 }
 
 /* variant used for unconnected sockets */
diff --git a/include/net/sock.h b/include/net/sock.h
index 37b09c84504b..2b097cc89727 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -139,6 +139,7 @@ typedef __u64 __bitwise __addrpair;
  *	@skc_node: main hash linkage for various protocol lookup tables
  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_rx_queue_mapping: rx queue number for this connection
  *	@skc_flags: place holder for sk_flags
  *		%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *		%SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -215,6 +216,9 @@ struct sock_common {
 		struct hlist_nulls_node skc_nulls_node;
 	};
 	unsigned short		skc_tx_queue_mapping;
+#ifdef CONFIG_XPS
+	unsigned short		skc_rx_queue_mapping;
+#endif
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -326,6 +330,9 @@ struct sock {
 #define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
+#ifdef CONFIG_XPS
+#define sk_rx_queue_mapping	__sk_common.skc_rx_queue_mapping
+#endif
 
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
@@ -1702,6 +1709,27 @@ static inline int sk_tx_queue_get(const struct sock *sk)
 	return -1;
 }
 
+static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	if (skb_rx_queue_recorded(skb)) {
+		u16 rx_queue = skb_get_rx_queue(skb);
+
+		if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING))
+			return;
+
+		sk->sk_rx_queue_mapping = rx_queue;
+	}
+#endif
+}
+
+static inline void sk_rx_queue_clear(struct sock *sk)
+{
+#ifdef CONFIG_XPS
+	sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING;
+#endif
+}
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
-- 
cgit 


From fc9bab24e9c654f62f3d411fc0b041be9e487e9d Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:27:02 -0700
Subject: net: Enable Tx queue selection based on Rx queues

This patch adds support to pick Tx queue based on the Rx queue(s) map
configuration set by the admin through the sysfs attribute
for each Tx queue. If the user configuration for receive queue(s) map
does not apply, then the Tx queue selection falls back to CPU(s) map
based selection and finally to hashing.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2b097cc89727..2ed99bfa4595 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1730,6 +1730,16 @@ static inline void sk_rx_queue_clear(struct sock *sk)
 #endif
 }
 
+#ifdef CONFIG_XPS
+static inline int sk_rx_queue_get(const struct sock *sk)
+{
+	if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING)
+		return sk->sk_rx_queue_mapping;
+
+	return -1;
+}
+#endif
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
-- 
cgit 


From 69b9e1e07d98b57b972df3c44647ca8795284d39 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 2 Jul 2018 18:21:11 +0800
Subject: ipv4: add __ip_queue_xmit() that supports tos param

This patch introduces __ip_queue_xmit(), through which the callers
can pass tos param into it without having to set inet->tos. For
ipv6, ip6_xmit() already allows passing tclass parameter.

It's needed when some transport protocol doesn't use inet->tos,
like sctp's per transport dscp, which will be added in next patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0d2281b4b27a..09da79d8ceea 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -148,7 +148,8 @@ void ip_send_check(struct iphdr *ip);
 int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 
-int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
+int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+		    __u8 tos);
 void ip_init(void);
 int ip_append_data(struct sock *sk, struct flowi4 *fl4,
 		   int getfrag(void *from, char *to, int offset, int len,
@@ -174,6 +175,12 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
 			    struct ipcm_cookie *ipc, struct rtable **rtp,
 			    struct inet_cork *cork, unsigned int flags);
 
+static inline int ip_queue_xmit(struct sock *sk, struct sk_buff *skb,
+				struct flowi *fl)
+{
+	return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+}
+
 static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
 {
 	return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
-- 
cgit 


From 8a9c58d28d0f66569737a3295116710ed24573cd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 2 Jul 2018 18:21:12 +0800
Subject: sctp: add support for dscp and flowlabel per transport

Like some other per transport params, flowlabel and dscp are added
in transport, asoc and sctp_sock. By default, transport sets its
value from asoc's, and asoc does it from sctp_sock. flowlabel
only works for ipv6 transport.

Other than that they need to be passed down in sctp_xmit, flow4/6
also needs to set them before looking up route in get_dst.

Note that it uses '& 0x100000' to check if flowlabel is set and
'& 0x1' (tos 1st bit is unused) to check if dscp is set by users,
so that they could be set to 0 by sockopt in next patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 701a51736fa5..ab869e0d8326 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -193,6 +193,9 @@ struct sctp_sock {
 	/* This is the max_retrans value for new associations. */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* The initial Path MTU to use for new associations. */
 	__u32 pathmtu;
 
@@ -895,6 +898,9 @@ struct sctp_transport {
 	 */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* This is the partially failed retrans value for the transport
 	 * and will be initialized from the assocs value.  This can be changed
 	 * using the SCTP_PEER_ADDR_THLDS socket option
@@ -1772,6 +1778,9 @@ struct sctp_association {
 	 */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* Flag that path mtu update is pending */
 	__u8   pmtu_pending;
 
-- 
cgit 


From 17266ee939849cb095ed7dd9edbec4162172226b Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:14:12 +0100
Subject: net: ipv4: listified version of ip_rcv

Also involved adding a way to run a netfilter hook over a list of packets.
 Rather than attempting to make netfilter know about lists (which would be
 a major project in itself) we just let it call the regular okfn (in this
 case ip_rcv_finish()) for any packets it steals, and have it give us back
 a list of packets it's synchronously accepted (which normally NF_HOOK
 would automatically call okfn() on, but we want to be able to potentially
 pass the list to a listified version of okfn().)
The netfilter hooks themselves are indirect calls that still happen per-
 packet (see nf_hook_entry_hookfn()), but again, changing that can be left
 for future work.

There is potential for out-of-order receives if the netfilter hook ends up
 synchronously stealing packets, as they will be processed before any
 accepts earlier in the list.  However, it was already possible for an
 asynchronous accept to cause out-of-order receives, so presumably this is
 considered OK.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 09da79d8ceea..99d1b835d2aa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -138,6 +138,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 			  struct ip_options_rcu *opt);
 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	   struct net_device *orig_dev);
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+		 struct net_device *orig_dev);
 int ip_local_deliver(struct sk_buff *skb);
 int ip_mr_input(struct sk_buff *skb);
 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
-- 
cgit 


From 80b14dee2bea128928537d61c333f24cb8cbb62f Mon Sep 17 00:00:00 2001
From: Richard Cochran <rcochran@linutronix.de>
Date: Tue, 3 Jul 2018 15:42:48 -0700
Subject: net: Add a new socket option for a future transmit time.

This patch introduces SO_TXTIME. User space enables this option in
order to pass a desired future transmit time in a CMSG when calling
sendmsg(2). The argument to this socket option is a 8-bytes long struct
provided by the uapi header net_tstamp.h defined as:

struct sock_txtime {
	clockid_t 	clockid;
	u32		flags;
};

Note that new fields were added to struct sock by filling a 2-bytes
hole found in the struct. For that reason, neither the struct size or
number of cachelines were altered.

Signed-off-by: Richard Cochran <rcochran@linutronix.de>
Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2ed99bfa4595..68347b9821c6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -319,6 +319,9 @@ struct sock_common {
   *	@sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
   *	@sk_reuseport_cb: reuseport group container
   *	@sk_rcu: used during RCU grace period
+  *	@sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
+  *	@sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
+  *	@sk_txtime_unused: unused txtime flags
   */
 struct sock {
 	/*
@@ -475,6 +478,11 @@ struct sock {
 	u8			sk_shutdown;
 	u32			sk_tskey;
 	atomic_t		sk_zckey;
+
+	u8			sk_clockid;
+	u8			sk_txtime_deadline_mode : 1,
+				sk_txtime_unused : 7;
+
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 #ifdef CONFIG_SECURITY
@@ -790,6 +798,7 @@ enum sock_flags {
 	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
 	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
+	SOCK_TXTIME,
 };
 
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1585,6 +1594,7 @@ void sock_kzfree_s(struct sock *sk, void *mem, int size);
 void sk_send_sigurg(struct sock *sk);
 
 struct sockcm_cookie {
+	u64 transmit_time;
 	u32 mark;
 	u16 tsflags;
 };
-- 
cgit 


From bc969a977880511057053642a81371196303ca01 Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Tue, 3 Jul 2018 15:42:49 -0700
Subject: net: ipv4: Hook into time based transmission

Add a transmit_time field to struct inet_cork, then copy the
timestamp from the CMSG cookie at ip_setup_cork() so we can
safely copy it into the skb later during __ip_make_skb().

For the raw fast path, just perform the copy at raw_send_hdrinc().

Signed-off-by: Richard Cochran <rcochran@linutronix.de>
Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 83d5b3c2ac42..314be484c696 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -148,6 +148,7 @@ struct inet_cork {
 	__s16			tos;
 	char			priority;
 	__u16			gso_size;
+	u64			transmit_time;
 };
 
 struct inet_cork_full {
-- 
cgit 


From 860b642b9c33ea4a6ae2f416607b0b98a9d11bb0 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Tue, 3 Jul 2018 15:42:52 -0700
Subject: net/sched: Allow creating a Qdisc watchdog with other clocks

This adds 'qdisc_watchdog_init_clockid()' that allows a clockid to be
passed, this allows other time references to be used when scheduling
the Qdisc to run.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 815b92a23936..2466ea143d01 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -72,6 +72,8 @@ struct qdisc_watchdog {
 	struct Qdisc	*qdisc;
 };
 
+void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
+				 clockid_t clockid);
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
 
-- 
cgit 


From 88cab77162e86e0f6a2b7e4f859c1435c4e24feb Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Tue, 3 Jul 2018 15:42:54 -0700
Subject: net/sched: Add HW offloading capability to ETF

Add infra so etf qdisc supports HW offload of time-based transmission.

For hw offload, the time sorted list is still used, so packets are
dequeued always in order of txtime.

Example:

$ tc qdisc replace dev enp2s0 parent root handle 100 mqprio num_tc 3 \
           map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 0

$ tc qdisc add dev enp2s0 parent 100:1 etf offload delta 100000 \
	   clockid CLOCK_REALTIME

In this example, the Qdisc will use HW offload for the control of the
transmission time through the network adapter. The hrtimer used for
packets scheduling inside the qdisc will use the clockid CLOCK_REALTIME
as reference and packets leave the Qdisc "delta" (100000) nanoseconds
before their transmission time. Because this will be using HW offload and
since dynamic clocks are not supported by the hrtimer, the system clock
and the PHC clock must be synchronized for this mode to behave as
expected.

Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2466ea143d01..7dc769e5452b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -155,4 +155,9 @@ struct tc_cbs_qopt_offload {
 	s32 sendslope;
 };
 
+struct tc_etf_qopt_offload {
+	u8 enable;
+	s32 queue;
+};
+
 #endif
-- 
cgit 


From 4b15c7075352668d4467ced7594b676707d11cae Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Tue, 3 Jul 2018 15:43:00 -0700
Subject: net/sched: Make etf report drops on error_queue

Use the socket error queue for reporting dropped packets if the
socket has enabled that feature through the SO_TXTIME API.

Packets are dropped either on enqueue() if they aren't accepted by the
qdisc or on dequeue() if the system misses their deadline. Those are
reported as different errors so applications can react accordingly.

Userspace can retrieve the errors through the socket error queue and the
corresponding cmsg interfaces. A struct sock_extended_err* is used for
returning the error data, and the packet's timestamp can be retrieved by
adding both ee_data and ee_info fields as e.g.:

    ((__u64) serr->ee_data << 32) + serr->ee_info

This feature is disabled by default and must be explicitly enabled by
applications. Enabling it can bring some overhead for the Tx cycles
of the application.

Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 68347b9821c6..e0eac9ef44b5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -481,7 +481,8 @@ struct sock {
 
 	u8			sk_clockid;
 	u8			sk_txtime_deadline_mode : 1,
-				sk_txtime_unused : 7;
+				sk_txtime_report_errors : 1,
+				sk_txtime_unused : 6;
 
 	struct socket		*sk_socket;
 	void			*sk_user_data;
-- 
cgit 


From eabaef1896bc06319461a644e3aa139885454def Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:28 +0300
Subject: devlink: Add devlink_param register and unregister

Define configuration parameters data structure.
Add functions to register and unregister the driver supported
configuration parameters table.
For each parameter registered, the driver should fill all the parameter's
fields. In case the only supported configuration mode is "driverinit"
the parameter's get()/set() functions are not required and should be set
to NULL, for any other configuration mode, these functions are required
and should be set by the driver.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index e336ea9c73df..4a0687a1fb99 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -27,6 +27,7 @@ struct devlink {
 	struct list_head sb_list;
 	struct list_head dpipe_table_list;
 	struct list_head resource_list;
+	struct list_head param_list;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -295,6 +296,68 @@ struct devlink_resource {
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
 
+#define DEVLINK_PARAM_MAX_STRING_VALUE 32
+enum devlink_param_type {
+	DEVLINK_PARAM_TYPE_U8,
+	DEVLINK_PARAM_TYPE_U16,
+	DEVLINK_PARAM_TYPE_U32,
+	DEVLINK_PARAM_TYPE_STRING,
+	DEVLINK_PARAM_TYPE_BOOL,
+};
+
+union devlink_param_value {
+	u8 vu8;
+	u16 vu16;
+	u32 vu32;
+	const char *vstr;
+	bool vbool;
+};
+
+struct devlink_param_gset_ctx {
+	union devlink_param_value val;
+	enum devlink_param_cmode cmode;
+};
+
+/**
+ * struct devlink_param - devlink configuration parameter data
+ * @name: name of the parameter
+ * @generic: indicates if the parameter is generic or driver specific
+ * @type: parameter type
+ * @supported_cmodes: bitmap of supported configuration modes
+ * @get: get parameter value, used for runtime and permanent
+ *       configuration modes
+ * @set: set parameter value, used for runtime and permanent
+ *       configuration modes
+ *
+ * This struct should be used by the driver to fill the data for
+ * a parameter it registers.
+ */
+struct devlink_param {
+	u32 id;
+	const char *name;
+	bool generic;
+	enum devlink_param_type type;
+	unsigned long supported_cmodes;
+	int (*get)(struct devlink *devlink, u32 id,
+		   struct devlink_param_gset_ctx *ctx);
+	int (*set)(struct devlink *devlink, u32 id,
+		   struct devlink_param_gset_ctx *ctx);
+};
+
+struct devlink_param_item {
+	struct list_head list;
+	const struct devlink_param *param;
+	union devlink_param_value driverinit_value;
+	bool driverinit_value_valid;
+};
+
+enum devlink_param_generic_id {
+
+	/* add new param generic ids above here*/
+	__DEVLINK_PARAM_GENERIC_ID_MAX,
+	DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
+};
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -430,6 +493,12 @@ void devlink_resource_occ_get_register(struct devlink *devlink,
 				       void *occ_get_priv);
 void devlink_resource_occ_get_unregister(struct devlink *devlink,
 					 u64 resource_id);
+int devlink_params_register(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count);
+void devlink_params_unregister(struct devlink *devlink,
+			       const struct devlink_param *params,
+			       size_t params_count);
 
 #else
 
@@ -622,6 +691,22 @@ devlink_resource_occ_get_unregister(struct devlink *devlink,
 {
 }
 
+static inline int
+devlink_params_register(struct devlink *devlink,
+			const struct devlink_param *params,
+			size_t params_count)
+{
+	return 0;
+}
+
+static inline void
+devlink_params_unregister(struct devlink *devlink,
+			  const struct devlink_param *params,
+			  size_t params_count)
+{
+
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
-- 
cgit 


From e3b7ca18ad7b2f47ebd3b6e6ce58a42c6ec24746 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:30 +0300
Subject: devlink: Add param set command

Add param set command to set value for a parameter.
Value can be set to any of the supported configuration modes.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 4a0687a1fb99..88062752dcd7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -328,6 +328,7 @@ struct devlink_param_gset_ctx {
  *       configuration modes
  * @set: set parameter value, used for runtime and permanent
  *       configuration modes
+ * @validate: validate input value is applicable (within value range, etc.)
  *
  * This struct should be used by the driver to fill the data for
  * a parameter it registers.
@@ -342,6 +343,9 @@ struct devlink_param {
 		   struct devlink_param_gset_ctx *ctx);
 	int (*set)(struct devlink *devlink, u32 id,
 		   struct devlink_param_gset_ctx *ctx);
+	int (*validate)(struct devlink *devlink, u32 id,
+			union devlink_param_value val,
+			struct netlink_ext_ack *extack);
 };
 
 struct devlink_param_item {
-- 
cgit 


From ec01aeb1803eaaf0d006e7b07b5ddb5e429c38a4 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:31 +0300
Subject: devlink: Add support for get/set driverinit value

"driverinit" configuration mode value is held by devlink to enable
the driver query the value after reload. Two additional functions
added to help the driver get/set the value from/to devlink:
devlink_param_driverinit_value_set() and
devlink_param_driverinit_value_get().

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 88062752dcd7..3302e43b09a4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -503,6 +503,10 @@ int devlink_params_register(struct devlink *devlink,
 void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count);
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value *init_val);
+int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value init_val);
 
 #else
 
@@ -711,6 +715,20 @@ devlink_params_unregister(struct devlink *devlink,
 
 }
 
+static inline int
+devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				   union devlink_param_value *init_val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				   union devlink_param_value init_val)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
-- 
cgit 


From ea601e17098856ee059f35c2a75659e57df81f25 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:32 +0300
Subject: devlink: Add devlink notifications support for params

Add devlink_param_notify() function to support devlink param notifications.
Add notification call to devlink param set, register and unregister
functions.
Add devlink_param_value_changed() function to enable the driver notify
devlink on value change. Driver should use this function after value was
changed on any configuration mode part to driverinit.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3302e43b09a4..792edaa996ba 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -507,6 +507,7 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val);
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val);
+void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
 
 #else
 
@@ -729,6 +730,12 @@ devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 	return -EOPNOTSUPP;
 }
 
+static inline void
+devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
-- 
cgit 


From 036467c3990c75ec8ce97e517a864b52e184a1aa Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:33 +0300
Subject: devlink: Add generic parameters internal_err_reset and max_macs

Add 2 first generic parameters to devlink configuration parameters set:
internal_err_reset - When set enables reset device on internal errors.
max_macs - max number of MACs per ETH port.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 792edaa996ba..a1c230d18911 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -356,12 +356,43 @@ struct devlink_param_item {
 };
 
 enum devlink_param_generic_id {
+	DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
 	DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
 };
 
+#define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset"
+#define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_MAX_MACS_NAME "max_macs"
+#define DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE DEVLINK_PARAM_TYPE_U32
+
+#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
+{									\
+	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
+	.name = DEVLINK_PARAM_GENERIC_##_id##_NAME,			\
+	.type = DEVLINK_PARAM_GENERIC_##_id##_TYPE,			\
+	.generic = true,						\
+	.supported_cmodes = _cmodes,					\
+	.get = _get,							\
+	.set = _set,							\
+	.validate = _validate,						\
+}
+
+#define DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes, _get, _set, _validate)	\
+{									\
+	.id = _id,							\
+	.name = _name,							\
+	.type = _type,							\
+	.supported_cmodes = _cmodes,					\
+	.get = _get,							\
+	.set = _set,							\
+	.validate = _validate,						\
+}
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
-- 
cgit 


From f567bcdae2b052bab94be7903863cb9ab47c907c Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Wed, 4 Jul 2018 14:30:36 +0300
Subject: devlink: Add enable_sriov boolean generic parameter

enable_sriov - Enables Single-Root Input/Output Virtualization(SR-IOV)
characteristic of the device.

Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index a1c230d18911..8ed571385626 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -358,6 +358,7 @@ struct devlink_param_item {
 enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
 	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+	DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -370,6 +371,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_MAX_MACS_NAME "max_macs"
 #define DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE DEVLINK_PARAM_TYPE_U32
 
+#define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME "enable_sriov"
+#define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit 


From d8269e2cbf908f9d26aa5d3217236227dffd1d89 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Thu, 5 Jul 2018 15:49:42 +0100
Subject: net: ipv6: listify ipv6_rcv() and ip6_rcv_finish()

Essentially the same as the ipv4 equivalents.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 16475c269749..b7843e0b16ee 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -922,6 +922,8 @@ static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6)
 
 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev,
 	     struct packet_type *pt, struct net_device *orig_dev);
+void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
+		   struct net_device *orig_dev);
 
 int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 
-- 
cgit 


From cfdb0c2d095ac5d7f09cac1317b7d0a9e8178134 Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Fri, 29 Jun 2018 12:12:50 +0530
Subject: Bluetooth: Store Resolv list size

When the controller supports the Read LE Resolv List size feature, the
maximum list size are read and now stored.

Before patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 17.979791
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 17.980629
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 17.980786
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 17.981627
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #59 [hci0] 17.981786
> HCI Event: Command Complete (0x0e) plen 12               #60 [hci0] 17.982636
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

After patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 13.338168
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 13.338842
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 13.339029
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 13.339939
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 13.340152
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 13.340952
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #61 [hci0] 13.341180
> HCI Event: Command Complete (0x0e) plen 12               #62 [hci0] 13.341898
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 6 ++++++
 include/net/bluetooth/hci_core.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 1668211297a9..484f24c7a415 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1490,6 +1490,12 @@ struct hci_cp_le_write_def_data_len {
 	__le16	tx_time;
 } __packed;
 
+#define HCI_OP_LE_READ_RESOLV_LIST_SIZE	0x202a
+struct hci_rp_le_read_resolv_list_size {
+	__u8	status;
+	__u8	size;
+} __packed;
+
 #define HCI_OP_LE_READ_MAX_DATA_LEN	0x202f
 struct hci_rp_le_read_max_data_len {
 	__u8	status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 893bbbb5d2fa..409f49bd8338 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -221,6 +221,7 @@ struct hci_dev {
 	__u8		features[HCI_MAX_PAGES][8];
 	__u8		le_features[8];
 	__u8		le_white_list_size;
+	__u8		le_resolv_list_size;
 	__u8		le_states[8];
 	__u8		commands[64];
 	__u8		hci_ver;
@@ -367,6 +368,7 @@ struct hci_dev {
 	struct list_head	identity_resolving_keys;
 	struct list_head	remote_oob_data;
 	struct list_head	le_white_list;
+	struct list_head	le_resolv_list;
 	struct list_head	le_conn_params;
 	struct list_head	pend_le_conns;
 	struct list_head	pend_le_reports;
-- 
cgit 


From 545f2596b907f0747170c7cb71edc74cecf68c5c Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Fri, 29 Jun 2018 12:13:20 +0530
Subject: Bluetooth: Add HCI command for clear Resolv list

Check for Resolv list supported by controller. So check the supported
commmand first before issuing this command i.e.,HCI_OP_LE_CLEAR_RESOLV_LIST

Before patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 13.338168
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 13.338842
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 13.339029
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 13.339939
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 13.340152
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 13.340952
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #61 [hci0] 13.341180
> HCI Event: Command Complete (0x0e) plen 12               #62 [hci0] 13.341898
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

After patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 28.919131
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 28.920016
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 28.920164
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 28.920873
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 28.921109
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 28.922016
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear Resolving... (0x08|0x0029) plen 0  #61 [hci0] 28.922166
> HCI Event: Command Complete (0x0e) plen 4                #62 [hci0] 28.922872
      LE Clear Resolving List (0x08|0x0029) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #63 [hci0] 28.923117
> HCI Event: Command Complete (0x0e) plen 12               #64 [hci0] 28.924030
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 484f24c7a415..4af1a3a4d9b1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1490,6 +1490,8 @@ struct hci_cp_le_write_def_data_len {
 	__le16	tx_time;
 } __packed;
 
+#define HCI_OP_LE_CLEAR_RESOLV_LIST	0x2029
+
 #define HCI_OP_LE_READ_RESOLV_LIST_SIZE	0x202a
 struct hci_rp_le_read_resolv_list_size {
 	__u8	status;
-- 
cgit 


From a2344b9e3a8c5c2064306b0d99b0e9a6c4813c08 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 17:05:28 +0530
Subject: Bluetooth: Use extended scanning if controller supports

This implements Set extended scan param and set extended scan enable
commands and use it for start LE scan based on controller support.

The new features added in these commands are setting of new PHY for
scanning and setting of scan duration. Both features are disabled
for now, meaning only 1M PHY is set and scan duration is set to 0
which means that scanning will be done untill scan disable is called.

< HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 8
        Own address type: Random (0x01)
        Filter policy: Accept all advertisement (0x00)
        PHYs: 0x01
        Entry 0: LE 1M
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Parameters (0x08|0x0041) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6
        Extended scan: Enabled (0x01)
        Filter duplicates: Enabled (0x01)
        Duration: 0 msec (0x0000)
        Period: 0.00 sec (0x0000)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Enable (0x08|0x0042) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 24 ++++++++++++++++++++++++
 include/net/bluetooth/hci_core.h |  4 ++++
 2 files changed, 28 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4af1a3a4d9b1..8c2868f439e7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1514,6 +1514,30 @@ struct hci_cp_le_set_default_phy {
 	__u8    rx_phys;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_SCAN_PARAMS   0x2041
+struct hci_cp_le_set_ext_scan_params {
+	__u8    own_addr_type;
+	__u8    filter_policy;
+	__u8    scanning_phys;
+	__u8    data[0];
+} __packed;
+
+#define LE_SCAN_PHY_1M 0x01
+
+struct hci_cp_le_scan_phy_params {
+	__u8    type;
+	__le16  interval;
+	__le16  window;
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_SCAN_ENABLE   0x2042
+struct hci_cp_le_set_ext_scan_enable {
+	__u8    enable;
+	__u8    filter_dup;
+	__le16  duration;
+	__le16  period;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 409f49bd8338..cc0bde74dd45 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1158,6 +1158,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
 				hci_dev_test_flag(dev, HCI_SC_ENABLED))
 
+/* Use ext scanning if set ext scan param and ext scan enable is supported */
+#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
+			   ((dev)->commands[37] & 0x40))
+
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
 
-- 
cgit 


From c215e9397b00b3045a668120ed7dbd89f2866e74 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 17:05:29 +0530
Subject: Bluetooth: Process extended ADV report event

This patch enables Extended ADV report event if extended scanning
is supported in the controller and process the same.

The new features are not handled and for now its as good as
legacy ADV report.

> HCI Event: LE Meta Event (0x3e) plen 53
      LE Extended Advertising Report (0x0d)
        Num reports: 1
        Entry 0
          Event type: 0x0013
            Props: 0x0013
              Connectable
              Scannable
              Use legacy advertising PDUs
            Data status: Complete
          Legacy PDU Type: ADV_IND (0x0013)
          Address type: Random (0x01)
          Address: DB:7E:2E:1A:85:E8 (Static)
          Primary PHY: LE 1M
          Secondary PHY: LE 1M
          SID: 0x00
          TX power: 0 dBm
          RSSI: -90 dBm (0xa6)
          Periodic advertising invteral: 0.00 msec (0x0000)
          Direct address type: Public (0x00)
          Direct address: 00:00:00:00:00:00 (OUI 00-00-00)
          Data length: 0x1b
        0f 09 44 65 73 69 67 6e 65 72 20 4d 6f 75 73 65  ..Designer Mouse
        03 19 c2 03 02 01 05 03 03 12 18                 ...........

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8c2868f439e7..0ec51eb14810 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1925,6 +1925,15 @@ struct hci_ev_le_conn_complete {
 #define LE_ADV_SCAN_IND		0x02
 #define LE_ADV_NONCONN_IND	0x03
 #define LE_ADV_SCAN_RSP		0x04
+#define LE_ADV_INVALID		0x05
+
+/* Legacy event types in extended adv report */
+#define LE_LEGACY_ADV_IND		0x0013
+#define LE_LEGACY_ADV_DIRECT_IND 	0x0015
+#define LE_LEGACY_ADV_SCAN_IND		0x0012
+#define LE_LEGACY_NONCONN_IND		0x0010
+#define LE_LEGACY_SCAN_RSP_ADV		0x001b
+#define LE_LEGACY_SCAN_RSP_ADV_SCAN	0x001a
 
 #define ADDR_LE_DEV_PUBLIC	0x00
 #define ADDR_LE_DEV_RANDOM	0x01
@@ -1989,6 +1998,23 @@ struct hci_ev_le_direct_adv_info {
 	__s8	 rssi;
 } __packed;
 
+#define HCI_EV_LE_EXT_ADV_REPORT    0x0d
+struct hci_ev_le_ext_adv_report {
+	__le16 	 evt_type;
+	__u8	 bdaddr_type;
+	bdaddr_t bdaddr;
+	__u8	 primary_phy;
+	__u8	 secondary_phy;
+	__u8	 sid;
+	__u8	 tx_power;
+	__s8	 rssi;
+	__le16 	 interval;
+	__u8  	 direct_addr_type;
+	bdaddr_t direct_addr;
+	__u8  	 length;
+	__u8	 data[0];
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
-- 
cgit 


From 4d94f95d30c8fbfe86068e9abed110974d697cf5 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 22:50:32 +0200
Subject: Bluetooth: Use extended LE Connection if supported

This implements extended LE craete connection and enhanced
LE conn complete event if the controller supports.

For now it is as good as legacy LE connection and event as
no new features in the extended connection is handled.

< HCI Command: LE Extended Create Connection (0x08|0x0043) plen 26
        Filter policy: White list is not used (0x00)
        Own address type: Public (0x00)
        Peer address type: Random (0x01)
        Peer address: DB:7E:2E:1D:85:E8 (Static)
        Initiating PHYs: 0x01
        Entry 0: LE 1M
          Scan interval: 60.000 msec (0x0060)
          Scan window: 60.000 msec (0x0060)
          Min connection interval: 50.00 msec (0x0028)
          Max connection interval: 70.00 msec (0x0038)
          Connection latency: 0 (0x0000)
          Supervision timeout: 420 msec (0x002a)
          Min connection length: 0.000 msec (0x0000)
          Max connection length: 0.000 msec (0x0000)
> HCI Event: Command Status (0x0f) plen 4
      LE Extended Create Connection (0x08|0x0043) ncmd 2
        Status: Success (0x00)
> HCI Event: LE Meta Event (0x3e) plen 31
      LE Enhanced Connection Complete (0x0a)
        Status: Success (0x00)
        Handle: 3585
        Role: Master (0x00)
        Peer address type: Random (0x01)
        Peer address: DB:7E:2E:1D:85:E8 (Static)
        Local resolvable private address: 00:00:00:00:00:00 (Non-Resolvable)
        Peer resolvable private address: 00:00:00:00:00:00 (Non-Resolvable)
        Connection interval: 67.50 msec (0x0036)
        Connection latency: 0 (0x0000)
        Supervision timeout: 420 msec (0x002a)
        Master clock accuracy: 0x00
@ MGMT Event: Device Connected (0x000b) plen 40
        LE Address: DB:7E:2E:1D:85:E8 (Static)
        Flags: 0x00000000
        Data length: 27
        Name (complete): Designer Mouse
        Appearance: Mouse (0x03c2)
        Flags: 0x05
          LE Limited Discoverable Mode
          BR/EDR Not Supported
        16-bit Service UUIDs (complete): 1 entry
          Human Interface Device (0x1812)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 36 ++++++++++++++++++++++++++++++++++++
 include/net/bluetooth/hci_core.h |  2 ++
 2 files changed, 38 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0ec51eb14810..73e48be5bbb3 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1538,6 +1538,27 @@ struct hci_cp_le_set_ext_scan_enable {
 	__le16  period;
 } __packed;
 
+#define HCI_OP_LE_EXT_CREATE_CONN    0x2043
+struct hci_cp_le_ext_create_conn {
+	__u8      filter_policy;
+	__u8      own_addr_type;
+	__u8      peer_addr_type;
+	bdaddr_t  peer_addr;
+	__u8      phys;
+	__u8      data[0];
+} __packed;
+
+struct hci_cp_le_ext_conn_param {
+	__le16 scan_interval;
+	__le16 scan_window;
+	__le16 conn_interval_min;
+	__le16 conn_interval_max;
+	__le16 conn_latency;
+	__le16 supervision_timeout;
+	__le16 min_ce_len;
+	__le16 max_ce_len;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
@@ -2015,6 +2036,21 @@ struct hci_ev_le_ext_adv_report {
 	__u8	 data[0];
 } __packed;
 
+#define HCI_EV_LE_ENHANCED_CONN_COMPLETE    0x0a
+struct hci_ev_le_enh_conn_complete {
+	__u8      status;
+	__le16    handle;
+	__u8      role;
+	__u8      bdaddr_type;
+	bdaddr_t  bdaddr;
+	bdaddr_t  local_rpa;
+	bdaddr_t  peer_rpa;
+	__le16    interval;
+	__le16    latency;
+	__le16    supervision_timeout;
+	__u8      clk_accurancy;
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cc0bde74dd45..a74453571264 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1161,6 +1161,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 /* Use ext scanning if set ext scan param and ext scan enable is supported */
 #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
 			   ((dev)->commands[37] & 0x40))
+/* Use ext create connection if command is supported */
+#define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
 
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
-- 
cgit 


From 351782067b6be81879b0af0daf7bd3acbb32d986 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:54 -0400
Subject: ipv4: ipcm_cookie initializers

Initialize the cookie in one location to reduce code duplication and
avoid bugs from inconsistent initialization, such as that fixed in
commit 9887cba19978 ("ip: limit use of gso_size to udp").

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 99d1b835d2aa..6db23bf1e5eb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -79,6 +79,21 @@ struct ipcm_cookie {
 	__u16			gso_size;
 };
 
+static inline void ipcm_init(struct ipcm_cookie *ipcm)
+{
+	*ipcm = (struct ipcm_cookie) { .tos = -1 };
+}
+
+static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+				const struct inet_sock *inet)
+{
+	ipcm_init(ipcm);
+
+	ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+	ipcm->oif = inet->sk.sk_bound_dev_if;
+	ipcm->addr = inet->inet_saddr;
+}
+
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
 #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
 
-- 
cgit 


From b515430ac9c25d5192cf498af3c6be6c4f51caad Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:55 -0400
Subject: ipv6: ipcm6_cookie initializer

Initialize the cookie in one location to reduce code duplication and
avoid bugs from inconsistent initialization, such as that fixed in
commit 9887cba19978 ("ip: limit use of gso_size to udp").

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b7843e0b16ee..6cb247f54d4c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -301,6 +301,25 @@ struct ipcm6_cookie {
 	__u16 gso_size;
 };
 
+static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
+{
+	*ipc6 = (struct ipcm6_cookie) {
+		.hlimit = -1,
+		.tclass = -1,
+		.dontfrag = -1,
+	};
+}
+
+static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
+				 const struct ipv6_pinfo *np)
+{
+	*ipc6 = (struct ipcm6_cookie) {
+		.hlimit = -1,
+		.tclass = np->tclass,
+		.dontfrag = np->dontfrag,
+	};
+}
+
 static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
 {
 	struct ipv6_txoptions *opt;
-- 
cgit 


From 657a0667025e77cc17f8a38b93e60a2bc24d830c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:56 -0400
Subject: sock: sockc cookie initializer

Initialize the cookie in one location to reduce code duplication and
avoid bugs from inconsistent initialization, such as that fixed in
commit 9887cba19978 ("ip: limit use of gso_size to udp").

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index e0eac9ef44b5..83b747538bd0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1600,6 +1600,12 @@ struct sockcm_cookie {
 	u16 tsflags;
 };
 
+static inline void sockcm_init(struct sockcm_cookie *sockc,
+			       const struct sock *sk)
+{
+	*sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
+}
+
 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 		     struct sockcm_cookie *sockc);
 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
-- 
cgit 


From 5fdaa88dfefa87ee1ea92750e99950dca182ea41 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:57 -0400
Subject: ipv6: fold sockcm_cookie into ipcm6_cookie

ipcm_cookie includes sockcm_cookie. Do the same for ipcm6_cookie.

This reduces the number of arguments that need to be passed around,
applies ipcm6_init to all cookie fields at once and reduces code
differentiation between ipv4 and ipv6.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h      | 7 +++----
 include/net/transp_v6.h | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6cb247f54d4c..aa6fd11a887c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -294,6 +294,7 @@ struct ipv6_fl_socklist {
 };
 
 struct ipcm6_cookie {
+	struct sockcm_cookie sockc;
 	__s16 hlimit;
 	__s16 tclass;
 	__s8  dontfrag;
@@ -959,8 +960,7 @@ int ip6_append_data(struct sock *sk,
 				int odd, struct sk_buff *skb),
 		    void *from, int length, int transhdrlen,
 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-		    struct rt6_info *rt, unsigned int flags,
-		    const struct sockcm_cookie *sockc);
+		    struct rt6_info *rt, unsigned int flags);
 
 int ip6_push_pending_frames(struct sock *sk);
 
@@ -977,8 +977,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 			     void *from, int length, int transhdrlen,
 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
 			     struct rt6_info *rt, unsigned int flags,
-			     struct inet_cork_full *cork,
-			     const struct sockcm_cookie *sockc);
+			     struct inet_cork_full *cork);
 
 static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
 {
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index f6a3543e5247..a8f6020f1196 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -42,8 +42,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
 				    struct sk_buff *skb);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg,
-			  struct flowi6 *fl6, struct ipcm6_cookie *ipc6,
-			  struct sockcm_cookie *sockc);
+			  struct flowi6 *fl6, struct ipcm6_cookie *ipc6);
 
 void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 			       __u16 srcp, __u16 destp, int rqueue, int bucket);
-- 
cgit 


From 678ca42d688534adfc780b150abefaaac7c86687 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:58 -0400
Subject: ip: remove tx_flags from ipcm_cookie and use same logic for v4 and v6

skb_shinfo(skb)->tx_flags is derived from sk->sk_tsflags, possibly
after modification by __sock_cmsg_send, by calling sock_tx_timestamp.

The IPv4 and IPv6 paths do this conversion differently. In IPv4, the
individual protocols that support tx timestamps call this function
and store the result in ipc.tx_flags. In IPv6, sock_tx_timestamp is
called in __ip6_append_data.

There is no need to store both tx_flags and ts_flags in the cookie
as one is derived from the other. Convert when setting up the cork
and remove the redundant field. This is similar to IPv6, only have
the conversion happen only once per datagram, in ip(6)_setup_cork.

Also change __ip6_append_data to match __ip_append_data. Only update
tskey if timestamping is enabled with OPT_ID. The SOCK_.. test is
redundant: only valid protocols can have non-zero cork->tx_flags.

After this change the IPv4 and IPv6 logic is the same.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 6db23bf1e5eb..e44b1a44f67a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -72,7 +72,6 @@ struct ipcm_cookie {
 	__be32			addr;
 	int			oif;
 	struct ip_options_rcu	*opt;
-	__u8			tx_flags;
 	__u8			ttl;
 	__s16			tos;
 	char			priority;
-- 
cgit 


From 22dd149167359981ea6f4afde04026fb78747ddc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Jul 2018 14:58:51 +0200
Subject: devlink: fix incorrect return statement

A newly added dummy helper function tries to return a failure from a "void"
function:

In file included from include/net/dsa.h:24,
                 from arch/arm/plat-orion/common.c:21:
include/net/devlink.h: In function 'devlink_param_value_changed':
include/net/devlink.h:771:9: error: 'return' with a value, in function returning void [-Werror]
  return -EOPNOTSUPP;

This fixes it by removing the bogus statement.

Fixes: ea601e170988 ("devlink: Add devlink notifications support for params")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8ed571385626..f67c29cede15 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -768,7 +768,6 @@ devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 static inline void
 devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
-	return -EOPNOTSUPP;
 }
 
 #endif
-- 
cgit 


From 2064c3d4c02026572d4975177f28a58052f0a8b7 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Fri, 6 Jul 2018 05:38:12 +0000
Subject: net/flow_dissector: Save vlan ethertype from headers

Change vlan dissector key to save vlan tpid to support both 802.1Q
and 802.1AD ethertype.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index adc24df56b90..8f899688a965 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -47,7 +47,7 @@ struct flow_dissector_key_tags {
 struct flow_dissector_key_vlan {
 	u16	vlan_id:12,
 		vlan_priority:3;
-	u16	padding;
+	__be16	vlan_tpid;
 };
 
 struct flow_dissector_key_mpls {
-- 
cgit 


From 24c590e3b0f9eebe603ebe3d516990306d385f46 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Fri, 6 Jul 2018 05:38:14 +0000
Subject: net/flow_dissector: Add support for QinQ dissection

Dissect the QinQ packets to get both outer and inner vlan information,
then store to the extended flow keys.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8f899688a965..c64406717eee 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -206,6 +206,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */
 	FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
+	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
@@ -237,6 +238,7 @@ struct flow_keys {
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_tags tags;
 	struct flow_dissector_key_vlan vlan;
+	struct flow_dissector_key_vlan cvlan;
 	struct flow_dissector_key_keyid keyid;
 	struct flow_dissector_key_ports ports;
 	struct flow_dissector_key_addrs addrs;
-- 
cgit 


From eec94fdb04806790c7b7e6ea347820064cc6d467 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:23 +0300
Subject: net: sched: use rcu for action cookie update

Implement functions to atomically update and free action cookie
using rcu mechanism.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 2 +-
 include/net/pkt_cls.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 5ff11adbe2a6..ffc3ef321776 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -37,7 +37,7 @@ struct tc_action {
 	spinlock_t			tcfa_lock;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue __percpu *cpu_qstats;
-	struct tc_cookie	*act_cookie;
+	struct tc_cookie	__rcu *act_cookie;
 	struct tcf_chain	*goto_chain;
 };
 #define tcf_index	common.tcfa_index
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6641584b27f1..2081e4219f81 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -781,6 +781,7 @@ struct tc_mqprio_qopt_offload {
 struct tc_cookie {
 	u8  *data;
 	u32 len;
+	struct rcu_head rcu;
 };
 
 struct tc_qopt_offload_stats {
-- 
cgit 


From 036bb44327f50273e85ee4a2c9b56eebce1c0838 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:24 +0300
Subject: net: sched: change type of reference and bind counters

Change type of action reference counter to refcount_t.

Change type of action bind counter to atomic_t.
This type is used to allow decrementing bind counter without testing
for 0 result.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index ffc3ef321776..2759226527a2 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -6,6 +6,7 @@
  * Public action API for classifiers/qdiscs
 */
 
+#include <linux/refcount.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
@@ -26,8 +27,8 @@ struct tc_action {
 	struct tcf_idrinfo		*idrinfo;
 
 	u32				tcfa_index;
-	int				tcfa_refcnt;
-	int				tcfa_bindcnt;
+	refcount_t			tcfa_refcnt;
+	atomic_t			tcfa_bindcnt;
 	u32				tcfa_capab;
 	int				tcfa_action;
 	struct tcf_t			tcfa_tm;
-- 
cgit 


From 789871bb2a0381425b106d2a995bde1460d35a34 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:25 +0300
Subject: net: sched: implement unlocked action init API

Add additional 'rtnl_held' argument to act API init functions. It is
required to implement actions that need to release rtnl lock before loading
kernel module and reacquire if afterwards.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 2759226527a2..27823f4e24c4 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -92,7 +92,8 @@ struct tc_action_ops {
 			  struct netlink_ext_ack *extack);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
-			int bind, struct netlink_ext_ack *extack);
+			int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int,
 			const struct tc_action_ops *,
@@ -168,10 +169,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
 		    struct list_head *actions, size_t *attr_size,
-		    struct netlink_ext_ack *extack);
+		    bool rtnl_held, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
+				    bool rtnl_held,
 				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
-- 
cgit 


From 2a2ea349704fffade9526d5122299edbbfd122ca Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:27 +0300
Subject: net: sched: implement action API that deletes action by index

Implement new action API function that atomically finds and deletes action
from idr by index. Intended to be used by lockless actions that do not rely
on rtnl lock.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 27823f4e24c4..a8eaae67c264 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -153,6 +153,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   int bind, bool cpustats);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
 static inline int tcf_idr_release(struct tc_action *a, bool bind)
-- 
cgit 


From b409074e6693bcdaa7abbee2a035f22a9eabda53 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:28 +0300
Subject: net: sched: add 'delete' function to action ops

Extend action ops with 'delete' function. Each action type to implements
its own delete function that doesn't depend on rtnl lock.

Implement delete function that is required to delete actions without
holding rtnl lock. Use action API function that atomically deletes action
only if it is still in action idr. This implementation prevents concurrent
threads from deleting same action twice.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index a8eaae67c264..b9ed2b8256a5 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -101,6 +101,7 @@ struct tc_action_ops {
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
+	int     (*delete)(struct net *net, u32 index);
 };
 
 struct tc_action_net {
-- 
cgit 


From 0190c1d452a91c38a3462abdd81752be1b9006a8 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:32 +0300
Subject: net: sched: atomically check-allocate action

Implement function that atomically checks if action exists and either takes
reference to it, or allocates idr slot for action index to prevent
concurrent allocations of actions with same index. Use EBUSY error pointer
to indicate that idr slot is reserved.

Implement cleanup helper function that removes temporary error pointer from
idr. (in case of error between idr allocation and insertion of newly
created action to specified index)

Refactor all action init functions to insert new action to idr using this
API.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index b9ed2b8256a5..8090de2edab7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -154,6 +154,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   int bind, bool cpustats);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+			struct tc_action **a, int bind);
 int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
-- 
cgit 


From 90b73b77d08ec395311411b545c756ca710aae59 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:33 +0300
Subject: net: sched: change action API to use array of pointers to actions

Act API used linked list to pass set of actions to functions. It is
intrusive data structure that stores list nodes inside action structure
itself, which means it is not safe to modify such list concurrently.
However, action API doesn't use any linked list specific operations on this
set of actions, so it can be safely refactored into plain pointer array.

Refactor action API to use array of pointers to tc_actions instead of
linked list. Change argument 'actions' type of exported action init,
destroy and dump functions.

Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8090de2edab7..683ce41053d9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -168,19 +168,20 @@ static inline int tcf_idr_release(struct tc_action *a, bool bind)
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
 int tcf_unregister_action(struct tc_action_ops *a,
 			  struct pernet_operations *ops);
-int tcf_action_destroy(struct list_head *actions, int bind);
+int tcf_action_destroy(struct tc_action *actions[], int bind);
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, size_t *attr_size,
+		    struct tc_action *actions[], size_t *attr_size,
 		    bool rtnl_held, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
 				    bool rtnl_held,
 				    struct netlink_ext_ack *extack);
-int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
+		    int ref);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
-- 
cgit 


From 03dc7a35fcc83a199121a5156c4a7a976b836682 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jul 2018 12:19:14 +0200
Subject: ipv6: xfrm: use 64-bit timestamps

get_seconds() is deprecated because it can overflow on 32-bit
architectures.  For the xfrm_state->lastused member, we treat the data
as a 64-bit number already, so we just need to use the right accessor
that works on both 32-bit and 64-bit machines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a5378613a49c..1350e2cf0749 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -227,7 +227,7 @@ struct xfrm_state {
 	long		saved_tmo;
 
 	/* Last used time */
-	unsigned long		lastused;
+	time64_t		lastused;
 
 	struct page_frag xfrag;
 
-- 
cgit 


From 4929c9428a171145f82f81aae0c3c25ef7d82837 Mon Sep 17 00:00:00 2001
From: Deepti Raghavan <deeptir@mit.edu>
Date: Mon, 9 Jul 2018 17:53:39 +0000
Subject: tcp: expose both send and receive intervals for rate sample

Congestion control algorithms, which access the rate sample
through the tcp_cong_control function, only have access to the maximum
of the send and receive interval, for cases where the acknowledgment
rate may be inaccurate due to ACK compression or decimation. Algorithms
may want to use send rates and receive rates as separate signals.

Signed-off-by: Deepti Raghavan <deeptir@mit.edu>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cce37694776e..f6cb20e6e524 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -954,6 +954,8 @@ struct rate_sample {
 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
 	s32  delivered;		/* number of packets delivered over interval */
 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+	u32 snd_interval_us;	/* snd interval for delivered packets */
+	u32 rcv_interval_us;	/* rcv interval for delivered packets */
 	long rtt_us;		/* RTT of last (S)ACKed packet (or -1) */
 	int  losses;		/* number of packets marked lost upon ACK */
 	u32  acked_sacked;	/* number of packets newly (S)ACKed upon ACK */
-- 
cgit 


From eeed992b776c54af6108187c87ac60d028e69d37 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 10 Jul 2018 10:02:58 +0300
Subject: net: Add lag.h, net_lag_port_dev_txable()

LAG devices (team or bond) recognize for each one of their slave devices
whether LAG traffic is going to be sent through that device. Bond calls
such devices "active", team calls them "txable". When this state
changes, a NETDEV_CHANGELOWERSTATE notification is distributed, together
with a netdev_notifier_changelowerstate_info structure that for LAG
devices includes a tx_enabled flag that refers to the new state. The
notification thus makes it possible to react to the changes in txability
in drivers.

However there's no way to query txability from the outside on demand.
That is problematic namely for mlxsw, which when resolving ERSPAN packet
path, may encounter a LAG device, and needs to determine which of the
slaves it should choose.

To that end, introduce a new function, net_lag_port_dev_txable(), which
determines whether a given slave device is "active" or
"txable" (depending on the flavor of the LAG device). That function then
dispatches to per-LAG-flavor helpers, bond_is_active_slave_dev() resp.
team_port_dev_txable().

Because there currently is no good place where net_lag_port_dev_txable()
should be added, introduce a new header file, lag.h, which should from
now on hold any logic common to both team and bond. (But keep
netif_is_lag_master() together with the rest of netif_is_*_master()
functions).

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bonding.h | 13 +++++++++++++
 include/net/lag.h     | 17 +++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 include/net/lag.h

(limited to 'include/net')

diff --git a/include/net/bonding.h b/include/net/bonding.h
index 808f1d167349..a2d058170ea3 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -411,6 +411,19 @@ static inline bool bond_slave_can_tx(struct slave *slave)
 	       bond_is_active_slave(slave);
 }
 
+static inline bool bond_is_active_slave_dev(const struct net_device *slave_dev)
+{
+	struct slave *slave;
+	bool active;
+
+	rcu_read_lock();
+	slave = bond_slave_get_rcu(slave_dev);
+	active = bond_is_active_slave(slave);
+	rcu_read_unlock();
+
+	return active;
+}
+
 static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len)
 {
 	if (len == ETH_ALEN) {
diff --git a/include/net/lag.h b/include/net/lag.h
new file mode 100644
index 000000000000..95b880e6fdde
--- /dev/null
+++ b/include/net/lag.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IF_LAG_H
+#define _LINUX_IF_LAG_H
+
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+#include <net/bonding.h>
+
+static inline bool net_lag_port_dev_txable(const struct net_device *port_dev)
+{
+	if (netif_is_team_port(port_dev))
+		return team_port_dev_txable(port_dev);
+	else
+		return bond_is_active_slave_dev(port_dev);
+}
+
+#endif /* _LINUX_IF_LAG_H */
-- 
cgit 


From cca9bab1b72cd2296097c75f59ef11ef80461279 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jul 2018 12:16:12 +0200
Subject: tcp: use monotonic timestamps for PAWS

Using get_seconds() for timestamps is deprecated since it can lead
to overflows on 32-bit systems. While the interface generally doesn't
overflow until year 2106, the specific implementation of the TCP PAWS
algorithm breaks in 2038 when the intermediate signed 32-bit timestamps
overflow.

A related problem is that the local timestamps in CLOCK_REALTIME form
lead to unexpected behavior when settimeofday is called to set the system
clock backwards or forwards by more than 24 days.

While the first problem could be solved by using an overflow-safe method
of comparing the timestamps, a nicer solution is to use a monotonic
clocksource with ktime_get_seconds() that simply doesn't overflow (at
least not until 136 years after boot) and that doesn't change during
settimeofday().

To make 32-bit and 64-bit architectures behave the same way here, and
also save a few bytes in the tcp_options_received structure, I'm changing
the type to a 32-bit integer, which is now safe on all architectures.

Finally, the ts_recent_stamp field also (confusingly) gets used to store
a jiffies value in tcp_synq_overflow()/tcp_synq_no_recent_overflow().
This is currently safe, but changing the type to 32-bit requires
some small changes there to keep it working.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f6cb20e6e524..582304955087 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -472,19 +472,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
  */
 static inline void tcp_synq_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
-	unsigned long now = jiffies;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	if (time_after(now, last_overflow + HZ))
+	if (time_after32(now, last_overflow + HZ))
 		tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
 }
 
 /* syncookies: no recent synqueue overflow on this listening socket? */
 static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+	return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
 }
 
 static inline u32 tcp_cookie_time(void)
@@ -1375,7 +1376,8 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
 {
 	if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
 		return true;
-	if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
+	if (unlikely(!time_before32(ktime_get_seconds(),
+				    rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
 		return true;
 	/*
 	 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1405,7 +1407,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 
 	   However, we can relax time bounds for RST segments to MSL.
 	 */
-	if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+	if (rst && !time_before32(ktime_get_seconds(),
+				  rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
 		return false;
 	return true;
 }
-- 
cgit 


From c749cdda9089eb1fdb6a9ab98f945124d12f2595 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 11 Jul 2018 16:04:50 +0200
Subject: net/sched: act_skbedit: don't use spinlock in the data path

use RCU instead of spin_{,un}lock_bh, to protect concurrent read/write on
act_skbedit configuration. This reduces the effects of contention in the
data path, in case multiple readers are present.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_skbedit.h | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 19cd3d345804..911bbac838a2 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -22,14 +22,19 @@
 #include <net/act_api.h>
 #include <linux/tc_act/tc_skbedit.h>
 
+struct tcf_skbedit_params {
+	u32 flags;
+	u32 priority;
+	u32 mark;
+	u32 mask;
+	u16 queue_mapping;
+	u16 ptype;
+	struct rcu_head rcu;
+};
+
 struct tcf_skbedit {
-	struct tc_action	common;
-	u32		flags;
-	u32		priority;
-	u32		mark;
-	u32		mask;
-	u16		queue_mapping;
-	u16		ptype;
+	struct tc_action common;
+	struct tcf_skbedit_params __rcu *params;
 };
 #define to_skbedit(a) ((struct tcf_skbedit *)a)
 
@@ -37,15 +42,27 @@ struct tcf_skbedit {
 static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
-		return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+	u32 flags;
+
+	if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) {
+		rcu_read_lock();
+		flags = rcu_dereference(to_skbedit(a)->params)->flags;
+		rcu_read_unlock();
+		return flags == SKBEDIT_F_MARK;
+	}
 #endif
 	return false;
 }
 
 static inline u32 tcf_skbedit_mark(const struct tc_action *a)
 {
-	return to_skbedit(a)->mark;
+	u32 mark;
+
+	rcu_read_lock();
+	mark = rcu_dereference(to_skbedit(a)->params)->mark;
+	rcu_read_unlock();
+
+	return mark;
 }
 
 #endif /* __NET_TC_SKBEDIT_H */
-- 
cgit 


From b16ebe925a4400a2ec3dc663c81dce2fd9bf0998 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:08 +0300
Subject: devlink: Add support for creating and destroying regions

This allows a device to register its supported address regions.
Each address region can be accessed directly for example reading
the snapshots taken of this address space.
Drivers are not limited in the name selection for different regions.
An example of a region-name can be: pci cr-space, register-space.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index f67c29cede15..e5397652f2fb 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -28,6 +28,7 @@ struct devlink {
 	struct list_head dpipe_table_list;
 	struct list_head resource_list;
 	struct list_head param_list;
+	struct list_head region_list;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -397,6 +398,8 @@ enum devlink_param_generic_id {
 	.validate = _validate,						\
 }
 
+struct devlink_region;
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -543,6 +546,11 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+					     const char *region_name,
+					     u32 region_max_snapshots,
+					     u64 region_size);
+void devlink_region_destroy(struct devlink_region *region);
 
 #else
 
@@ -770,6 +778,20 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
 }
 
+static inline struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+		      const char *region_name,
+		      u32 region_max_snapshots,
+		      u64 region_size)
+{
+	return NULL;
+}
+
+static inline void
+devlink_region_destroy(struct devlink_region *region)
+{
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
-- 
cgit 


From ccadfa444b34c6ec7bb458eee17fdd8c9a456c63 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:09 +0300
Subject: devlink: Add callback to query for snapshot id before snapshot create

To restrict the driver with the snapshot ID selection a new callback
is introduced for the driver to get the snapshot ID before creating
a new snapshot. This will also allow giving the same ID for multiple
snapshots taken of different regions on the same time.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index e5397652f2fb..f27d8593687a 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -29,6 +29,7 @@ struct devlink {
 	struct list_head resource_list;
 	struct list_head param_list;
 	struct list_head region_list;
+	u32 snapshot_id;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -551,6 +552,7 @@ struct devlink_region *devlink_region_create(struct devlink *devlink,
 					     u32 region_max_snapshots,
 					     u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
+u32 devlink_region_shapshot_id_get(struct devlink *devlink);
 
 #else
 
@@ -792,6 +794,12 @@ devlink_region_destroy(struct devlink_region *region)
 {
 }
 
+static inline u32
+devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
-- 
cgit 


From d7e5272282d93bedbbeb6174b8af8425d7dcfd6f Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:10 +0300
Subject: devlink: Add support for creating region snapshots

Each device address region can store multiple snapshots,
each snapshot is identified using a different numerical ID.
This ID is used when deleting a snapshot or showing an address
region specific snapshot. This patch exposes a callback to add
a new snapshot to an address region.
The snapshot will be deleted using the destructor function
when destroying a region or when a snapshot delete command
from devlink user tool.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index f27d8593687a..905f0bb7b4ba 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -401,6 +401,8 @@ enum devlink_param_generic_id {
 
 struct devlink_region;
 
+typedef void devlink_snapshot_data_dest_t(const void *data);
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -553,6 +555,9 @@ struct devlink_region *devlink_region_create(struct devlink *devlink,
 					     u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
 u32 devlink_region_shapshot_id_get(struct devlink *devlink);
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+				   u8 *data, u32 snapshot_id,
+				   devlink_snapshot_data_dest_t *data_destructor);
 
 #else
 
@@ -800,6 +805,14 @@ devlink_region_shapshot_id_get(struct devlink *devlink)
 	return 0;
 }
 
+static inline int
+devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+			       u8 *data, u32 snapshot_id,
+			       devlink_snapshot_data_dest_t *data_destructor)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
-- 
cgit 


From f6a69885f2e38be0229ab9f6a2d9d4a1b4ba2be5 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:17 +0300
Subject: devlink: Add generic parameters region_snapshot

region_snapshot - When set enables capturing region snapshots

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 905f0bb7b4ba..b9b89d6604d4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -361,6 +361,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
 	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+	DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -376,6 +377,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME "enable_sriov"
 #define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable"
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit 


From 811e299f4645588cc7a1b78d97b6847c155324b9 Mon Sep 17 00:00:00 2001
From: Romuald CARI <romuald.cari@devialet.com>
Date: Thu, 7 Jun 2018 16:08:02 +0200
Subject: ieee802154: add rx LQI from userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Link Quality Indication data exposed by drivers could not be accessed from
userspace. Since this data is per-datagram received, it makes sense to make it
available to userspace application through the ancillary data mechanism in
recvmsg rather than through ioctls. This can be activated using the socket
option WPAN_WANTLQI under SOL_IEEE802154 protocol.

This LQI data is available in the ancillary data buffer under the SOL_IEEE802154
level as the type WPAN_LQI. The value is an unsigned byte indicating the link
quality with values ranging 0-255.

Signed-off-by: Romuald Cari <romuald.cari@devialet.com>
Signed-off-by: Clément Peron <clement.peron@devialet.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/af_ieee802154.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
index a5563d27a3eb..8003a9f6eb43 100644
--- a/include/net/af_ieee802154.h
+++ b/include/net/af_ieee802154.h
@@ -56,6 +56,7 @@ struct sockaddr_ieee802154 {
 #define WPAN_WANTACK		0
 #define WPAN_SECURITY		1
 #define WPAN_SECURITY_LEVEL	2
+#define WPAN_WANTLQI		3
 
 #define WPAN_SECURITY_DEFAULT	0
 #define WPAN_SECURITY_OFF	1
-- 
cgit 


From 05296620f6d14dce0030b87e1e57891a770fb65c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:40 -0700
Subject: xdp: factor out common program/flags handling from drivers

Basic operations drivers perform during xdp setup and query can
be moved to helpers in the core.  Encapsulate program and flags
into a structure and add helpers.  Note that the structure is
intended as the "main" program information source in the driver.
Most drivers will additionally place the program pointer in their
fast path or ring structures.

The helpers don't have a huge impact now, but they will
decrease the code duplication when programs can be installed
in HW and driver at the same time.  Encapsulating the basic
operations in helpers will hopefully also reduce the number
of changes to drivers which adopt them.

Helpers could really be static inline, but they depend on
definition of struct netdev_bpf which means they'd have
to be placed in netdevice.h, an already 4500 line header.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 2deea7166a34..fcb033f51d8c 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -144,4 +144,17 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
 	return unlikely(xdp->data_meta > xdp->data);
 }
 
+struct xdp_attachment_info {
+	struct bpf_prog *prog;
+	u32 flags;
+};
+
+struct netdev_bpf;
+int xdp_attachment_query(struct xdp_attachment_info *info,
+			 struct netdev_bpf *bpf);
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+			     struct netdev_bpf *bpf);
+void xdp_attachment_setup(struct xdp_attachment_info *info,
+			  struct netdev_bpf *bpf);
+
 #endif /* __LINUX_NET_XDP_H__ */
-- 
cgit 


From 01683a1469995cc7aaf833d6f8b3f1c1d2fc3b92 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 9 Jul 2018 13:29:11 +0300
Subject: net: sched: refactor flower walk to iterate over idr

Extend struct tcf_walker with additional 'cookie' field. It is intended to
be used by classifier walk implementations to continue iteration directly
from particular filter, instead of iterating 'skip' number of times.

Change flower walk implementation to save filter handle in 'cookie'. Each
time flower walk is called, it looks up filter with saved handle directly
with idr, instead of iterating over filter linked list 'skip' number of
times. This change improves complexity of dumping flower classifier from
quadratic to linearithmic. (assuming idr lookup has logarithmic complexity)

Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reported-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 2081e4219f81..e4252a176eec 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -13,6 +13,7 @@ struct tcf_walker {
 	int	stop;
 	int	skip;
 	int	count;
+	unsigned long cookie;
 	int	(*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
 
-- 
cgit 


From d80a1b9d186057ddb0d384ba601cf2b7d214539c Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:39 +0300
Subject: tls: Refactor tls_offload variable names

For symmetry, we rename tls_offload_context to
tls_offload_context_tx before we add tls_offload_context_rx.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 70c273777fe9..5dcd808236a7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -128,7 +128,7 @@ struct tls_record_info {
 	skb_frag_t frags[MAX_SKB_FRAGS];
 };
 
-struct tls_offload_context {
+struct tls_offload_context_tx {
 	struct crypto_aead *aead_send;
 	spinlock_t lock;	/* protects records list */
 	struct list_head records_list;
@@ -147,8 +147,8 @@ struct tls_offload_context {
 #define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
 };
 
-#define TLS_OFFLOAD_CONTEXT_SIZE                                               \
-	(ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) +           \
+#define TLS_OFFLOAD_CONTEXT_SIZE_TX                                            \
+	(ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) +        \
 	 TLS_DRIVER_STATE_SIZE)
 
 enum {
@@ -239,7 +239,7 @@ void tls_device_sk_destruct(struct sock *sk);
 void tls_device_init(void);
 void tls_device_cleanup(void);
 
-struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				       u32 seq, u64 *p_record_sn);
 
 static inline bool tls_record_is_start_marker(struct tls_record_info *rec)
@@ -380,10 +380,10 @@ static inline struct tls_sw_context_tx *tls_sw_ctx_tx(
 	return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
-static inline struct tls_offload_context *tls_offload_ctx(
-		const struct tls_context *tls_ctx)
+static inline struct tls_offload_context_tx *
+tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 {
-	return (struct tls_offload_context *)tls_ctx->priv_ctx_tx;
+	return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
@@ -396,7 +396,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 				      struct sk_buff *skb);
 
 int tls_sw_fallback_init(struct sock *sk,
-			 struct tls_offload_context *offload_ctx,
+			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
 
 #endif /* _TLS_OFFLOAD_H */
-- 
cgit 


From dafb67f3bb4a58a45fe92c1e362ea6429831688a Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:40 +0300
Subject: tls: Split decrypt_skb to two functions

Previously, decrypt_skb also updated the TLS context.
Now, decrypt_skb only decrypts the payload using the current context,
while decrypt_skb_update also updates the state.

Later, in the tls_device Rx flow, we will use decrypt_skb directly.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 5dcd808236a7..49b89221db43 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -390,6 +390,8 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
+int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+		struct scatterlist *sgout);
 
 struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 				      struct net_device *dev,
-- 
cgit 


From 39f56e1a78d647316db330c3b6f4c5637a895e3b Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:41 +0300
Subject: tls: Split tls_sw_release_resources_rx

This patch splits tls_sw_release_resources_rx into two functions one
which releases all inner software tls structures and another that also
frees the containing structure.

In TLS_DEVICE we will need to release the software structures without
freeeing the containing structure, which contains other information.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 49b89221db43..7a485de25646 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -223,6 +223,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
+void tls_sw_release_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
 unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-- 
cgit 


From 4799ac81e52a72a6404827bf2738337bb581a174 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:43 +0300
Subject: tls: Add rx inline crypto offload

This patch completes the generic infrastructure to offload TLS crypto to a
network device. It enables the kernel to skip decryption and
authentication of some skbs marked as decrypted by the NIC. In the fast
path, all packets received are decrypted by the NIC and the performance
is comparable to plain TCP.

This infrastructure doesn't require a TCP offload engine. Instead, the
NIC only decrypts packets that contain the expected TCP sequence number.
Out-Of-Order TCP packets are provided unmodified. As a result, at the
worst case a received TLS record consists of both plaintext and ciphertext
packets. These partially decrypted records must be reencrypted,
only to be decrypted.

The notable differences between SW KTLS Rx and this offload are as
follows:
1. Partial decryption - Software must handle the case of a TLS record
that was only partially decrypted by HW. This can happen due to packet
reordering.
2. Resynchronization - tls_read_size calls the device driver to
resynchronize HW after HW lost track of TLS record framing in
the TCP stream.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 7a485de25646..d8b3b6578c01 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -83,6 +83,16 @@ struct tls_device {
 	void (*unhash)(struct tls_device *device, struct sock *sk);
 };
 
+enum {
+	TLS_BASE,
+	TLS_SW,
+#ifdef CONFIG_TLS_DEVICE
+	TLS_HW,
+#endif
+	TLS_HW_RECORD,
+	TLS_NUM_CONFIG,
+};
+
 struct tls_sw_context_tx {
 	struct crypto_aead *aead_send;
 	struct crypto_wait async_wait;
@@ -197,6 +207,7 @@ struct tls_context {
 	int (*push_pending_record)(struct sock *sk, int flags);
 
 	void (*sk_write_space)(struct sock *sk);
+	void (*sk_destruct)(struct sock *sk);
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 
 	int  (*setsockopt)(struct sock *sk, int level,
@@ -209,13 +220,27 @@ struct tls_context {
 	void (*unhash)(struct sock *sk);
 };
 
+struct tls_offload_context_rx {
+	/* sw must be the first member of tls_offload_context_rx */
+	struct tls_sw_context_rx sw;
+	atomic64_t resync_req;
+	u8 driver_state[];
+	/* The TLS layer reserves room for driver specific state
+	 * Currently the belief is that there is not enough
+	 * driver specific state to justify another layer of indirection
+	 */
+};
+
+#define TLS_OFFLOAD_CONTEXT_SIZE_RX					\
+	(ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \
+	 TLS_DRIVER_STATE_SIZE)
+
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
 		int __user *optlen);
 int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 		  unsigned int optlen);
 
-
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
@@ -290,11 +315,19 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 	return tls_ctx->pending_open_record_frags;
 }
 
+struct sk_buff *
+tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
+		      struct sk_buff *skb);
+
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
-	return sk_fullsock(sk) &&
-	       /* matches smp_store_release in tls_set_device_offload */
-	       smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+	return sk_fullsock(sk) &
+	       (smp_load_acquire(&sk->sk_validate_xmit_skb) ==
+	       &tls_validate_xmit_skb);
+#else
+	return false;
+#endif
 }
 
 static inline void tls_err_abort(struct sock *sk, int err)
@@ -387,10 +420,27 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 	return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
+static inline struct tls_offload_context_rx *
+tls_offload_ctx_rx(const struct tls_context *tls_ctx)
+{
+	return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx;
+}
+
+/* The TLS context is valid until sk_destruct is called */
+static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+	atomic64_set(&rx_ctx->resync_req, ((((uint64_t)seq) << 32) | 1));
+}
+
+
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
 int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 		struct scatterlist *sgout);
 
@@ -402,4 +452,9 @@ int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
 
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
+
+void tls_device_offload_cleanup_rx(struct sock *sk);
+void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn);
+
 #endif /* _TLS_OFFLOAD_H */
-- 
cgit 


From f286586df68e7733a8e651098401f139dc2e17f4 Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Mon, 18 Jun 2018 15:12:52 +0200
Subject: netfilter: nft_tproxy: Move nf_tproxy_assign_sock() to nf_tproxy.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function is also necessary to implement nft tproxy support

Fixes: 45ca4e0cf273 ("netfilter: Libify xt_TPROXY")
Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tproxy.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 9754a50ecde9..d5a80888cbe4 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -17,6 +17,14 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
 	return false;
 }
 
+/* assign a socket to the skb -- consumes sk */
+static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_edemux;
+}
+
 __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
 
 /**
-- 
cgit 


From 60e3be94e6a1c5162a0763c9aafb5190b2b1fdce Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:55:32 +0200
Subject: openvswitch: use nf_ct_get_tuplepr, invert_tuplepr

These versions deal with the l3proto/l4proto details internally.
It removes only caller of nf_ct_get_tuple, so make it static.

After this, l3proto->get_l4proto() can be removed in a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 9b5e7634713e..90df45022c51 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -40,13 +40,6 @@ void nf_conntrack_cleanup_start(void);
 void nf_conntrack_init_end(void);
 void nf_conntrack_cleanup_end(void);
 
-bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff,
-		     unsigned int dataoff, u_int16_t l3num, u_int8_t protonum,
-		     struct net *net,
-		     struct nf_conntrack_tuple *tuple,
-		     const struct nf_conntrack_l3proto *l3proto,
-		     const struct nf_conntrack_l4proto *l4proto);
-
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_tuple *orig,
 			const struct nf_conntrack_l3proto *l3proto,
-- 
cgit 


From f957be9d349a3800940f823b16e12b0405cc305b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:44 +0200
Subject: netfilter: conntrack: remove ctnetlink callbacks from l3 protocol
 trackers

handle everything from ctnetlink directly.

After all these years we still only support ipv4 and ipv6, so it
seems reasonable to remove l3 protocol tracker support and instead
handle ipv4/ipv6 from a common, always builtin inet tracker.

Step 1: Get rid of all the l3proto->func() calls.

Start with ctnetlink, then move on to packet-path ones.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h    | 6 ++----
 include/net/netfilter/nf_conntrack_l3proto.h | 8 --------
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 90df45022c51..d454a53ba646 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -68,10 +68,8 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	return ret;
 }
 
-void
-print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
-            const struct nf_conntrack_l3proto *l3proto,
-            const struct nf_conntrack_l4proto *proto);
+void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_l4proto *proto);
 
 #define CONNTRACK_LOCKS 1024
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index d5808f3e2715..d07b5216a925 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -46,14 +46,6 @@ struct nf_conntrack_l3proto {
 	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
 			   unsigned int *dataoff, u_int8_t *protonum);
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	int (*tuple_to_nlattr)(struct sk_buff *skb,
-			       const struct nf_conntrack_tuple *t);
-	int (*nlattr_to_tuple)(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t);
-	const struct nla_policy *nla_policy;
-#endif
-
 	/* Called when netns wants to use connection tracking */
 	int (*net_ns_get)(struct net *);
 	void (*net_ns_put)(struct net *);
-- 
cgit 


From 47a91b14de62e35d1466820cbb4c024b6c02dff1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:45 +0200
Subject: netfilter: conntrack: remove pkt_to_tuple indirection from l3
 protocol trackers

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index d07b5216a925..ece231450f30 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,13 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Try to fill in the third arg: nhoff is offset of l3 proto
-         * hdr.  Return true if possible.
-	 */
-	bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
-			     struct nf_conntrack_tuple *tuple);
-
 	/*
 	 * Invert the per-proto part of the tuple: ie. turn xmit into reply.
 	 * Some packets can't be inverted: return 0 in that case.
-- 
cgit 


From d1b6fe94941f43e4743d5fea953d16b0a001c2c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:46 +0200
Subject: netfilter: conntrack: remove invert_tuple indirection from l3
 protocol trackers

Its simpler to just handle it directly in nf_ct_invert_tuple().
Also gets rid of need to pass l3proto pointer to resolve_conntrack().

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h    | 1 -
 include/net/netfilter/nf_conntrack_l3proto.h | 7 -------
 2 files changed, 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index d454a53ba646..35461b2d3462 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -42,7 +42,6 @@ void nf_conntrack_cleanup_end(void);
 
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_tuple *orig,
-			const struct nf_conntrack_l3proto *l3proto,
 			const struct nf_conntrack_l4proto *l4proto);
 
 /* Find a connection corresponding to a tuple. */
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index ece231450f30..164641c743a5 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,13 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Some packets can't be inverted: return 0 in that case.
-	 */
-	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
-			     const struct nf_conntrack_tuple *orig);
-
 	/*
 	 * Called before tracking. 
 	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
-- 
cgit 


From 6816d931cab009024b68c11c4cf752f8bf9a1e32 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:47 +0200
Subject: netfilter: conntrack: remove get_l4proto indirection from l3 protocol
 trackers

Handle it in the core instead.

ipv6_skip_exthdr() is built-in even if ipv6 is a module, i.e. this
doesn't create an ipv6 dependency.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 164641c743a5..5f160375c93a 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,14 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Called before tracking. 
-	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
-	 *	*protonum: protocol number
-	 */
-	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
-			   unsigned int *dataoff, u_int8_t *protonum);
-
 	/* Called when netns wants to use connection tracking */
 	int (*net_ns_get)(struct net *);
 	void (*net_ns_put)(struct net *);
-- 
cgit 


From 8b3892ea8718920d29432328fe9544d89a429614 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:48 +0200
Subject: netfilter: conntrack: avoid calls to l4proto invert_tuple

Handle the common cases (tcp, udp, etc). in the core and only
do the indirect call for the protocols that need it (GRE for instance).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index a7220eef9aee..6a55e337a161 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -36,7 +36,7 @@ struct nf_conntrack_l4proto {
 			     struct net *net, struct nf_conntrack_tuple *tuple);
 
 	/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Some packets can't be inverted: return 0 in that case.
+	 * Only used by icmp, most protocols use a generic version.
 	 */
 	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
 			     const struct nf_conntrack_tuple *orig);
-- 
cgit 


From c779e849608a875448f6ffc2a5c2a15523bdcd00 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:50 +0200
Subject: netfilter: conntrack: remove get_timeout() indirection

Not needed, we can have the l4trackers fetch it themselvs.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h |  8 ++------
 include/net/netfilter/nf_conntrack_timeout.h | 18 ++++--------------
 2 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6a55e337a161..c7a0075d96df 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -45,13 +45,12 @@ struct nf_conntrack_l4proto {
 	int (*packet)(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts);
+		      enum ip_conntrack_info ctinfo);
 
 	/* Called when a new connection for this protocol found;
 	 * returns TRUE if it's OK.  If so, packet() called next. */
 	bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts);
+		    unsigned int dataoff);
 
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
@@ -63,9 +62,6 @@ struct nf_conntrack_l4proto {
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
-	/* Return the array of timeouts for this protocol. */
-	unsigned int *(*get_timeouts)(struct net *net);
-
 	/* convert protoinfo to nfnetink attributes */
 	int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
 			 struct nf_conn *ct);
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 9468ab4ad12d..80ceb3d0291d 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -67,27 +67,17 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
 #endif
 };
 
-static inline unsigned int *
-nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
-		     const struct nf_conntrack_l4proto *l4proto)
+static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 {
+	unsigned int *timeouts = NULL;
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	struct nf_conn_timeout *timeout_ext;
-	unsigned int *timeouts;
 
 	timeout_ext = nf_ct_timeout_find(ct);
-	if (timeout_ext) {
+	if (timeout_ext)
 		timeouts = nf_ct_timeout_data(timeout_ext);
-		if (unlikely(!timeouts))
-			timeouts = l4proto->get_timeouts(net);
-	} else {
-		timeouts = l4proto->get_timeouts(net);
-	}
-
-	return timeouts;
-#else
-	return l4proto->get_timeouts(net);
 #endif
+	return timeouts;
 }
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-- 
cgit 


From a0ae2562c6c4b2721d9fddba63b7286c13517d9f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:51 +0200
Subject: netfilter: conntrack: remove l3proto abstraction

This unifies ipv4 and ipv6 protocol trackers and removes the l3proto
abstraction.

This gets rid of all l3proto indirect calls and the need to do
a lookup on the function to call for l3 demux.

It increases module size by only a small amount (12kbyte), so this reduces
size because nf_conntrack.ko is useless without either nf_conntrack_ipv4
or nf_conntrack_ipv6 module.

before:
   text    data     bss     dec     hex filename
   7357    1088       0    8445    20fd nf_conntrack_ipv4.ko
   7405    1084       4    8493    212d nf_conntrack_ipv6.ko
  72614   13689     236   86539   1520b nf_conntrack.ko
 19K nf_conntrack_ipv4.ko
 19K nf_conntrack_ipv6.ko
179K nf_conntrack.ko

after:
   text    data     bss     dec     hex filename
  79277   13937     236   93450   16d0a nf_conntrack.ko
  191K nf_conntrack.ko

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  3 --
 include/net/netfilter/nf_conntrack.h           |  5 +++
 include/net/netfilter/nf_conntrack_core.h      |  1 -
 include/net/netfilter/nf_conntrack_l3proto.h   | 54 --------------------------
 include/net/netfilter/nf_conntrack_l4proto.h   |  4 --
 5 files changed, 5 insertions(+), 62 deletions(-)
 delete mode 100644 include/net/netfilter/nf_conntrack_l3proto.h

(limited to 'include/net')

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 73f825732326..c84b51682f08 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -10,9 +10,6 @@
 #ifndef _NF_CONNTRACK_IPV4_H
 #define _NF_CONNTRACK_IPV4_H
 
-
-const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
-
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 062dc19b5840..a2b0ed025908 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -41,6 +41,11 @@ union nf_conntrack_expect_proto {
 	/* insert expect proto private data here */
 };
 
+struct nf_conntrack_net {
+	unsigned int users4;
+	unsigned int users6;
+};
+
 #include <linux/types.h>
 #include <linux/skbuff.h>
 
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 35461b2d3462..2a3e0974a6af 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -14,7 +14,6 @@
 #define _NF_CONNTRACK_CORE_H
 
 #include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
deleted file mode 100644
index 5f160375c93a..000000000000
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C)2003,2004 USAGI/WIDE Project
- *
- * Header for use in defining a given L3 protocol for connection tracking.
- *
- * Author:
- *	Yasuyuki Kozakai @USAGI	<yasuyuki.kozakai@toshiba.co.jp>
- *
- * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
- */
-
-#ifndef _NF_CONNTRACK_L3PROTO_H
-#define _NF_CONNTRACK_L3PROTO_H
-#include <linux/netlink.h>
-#include <net/netlink.h>
-#include <linux/seq_file.h>
-#include <net/netfilter/nf_conntrack.h>
-
-struct nf_conntrack_l3proto {
-	/* L3 Protocol Family number. ex) PF_INET */
-	u_int16_t l3proto;
-
-	/* size of tuple nlattr, fills a hole */
-	u16 nla_size;
-
-	/* Called when netns wants to use connection tracking */
-	int (*net_ns_get)(struct net *);
-	void (*net_ns_put)(struct net *);
-
-	/* Module (if any) which this is connected to. */
-	struct module *me;
-};
-
-extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
-
-/* Protocol global registration. */
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
-
-const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
-
-/* Existing built-in protocols */
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
-
-static inline struct nf_conntrack_l3proto *
-__nf_ct_l3proto_find(u_int16_t l3proto)
-{
-	if (unlikely(l3proto >= NFPROTO_NUMPROTO))
-		return &nf_conntrack_l3proto_generic;
-	return rcu_dereference(nf_ct_l3protos[l3proto]);
-}
-
-#endif /*_NF_CONNTRACK_L3PROTO_H*/
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index c7a0075d96df..6068c6da3eac 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -130,10 +130,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
-			   unsigned int num_proto);
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
-			      unsigned int num_proto);
 
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
-- 
cgit 


From cb2b36f5a97df76f547fcc4ab444a02522fb6c96 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:40 -0700
Subject: netfilter: nf_conncount: Switch to plain list

Original patch is from Florian Westphal.

This patch switches from hlist to plain list to store the list of
connections with the same filtering key in nf_conncount. With the
plain list, we can insert new connections at the tail, so over time
the beginning of list holds long-running connections and those are
expired, while the newly creates ones are at the end.

Later on, we could probably move checked ones to the end of the list,
so the next run has higher chance to reclaim stale entries in the front.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 3a188a0923a3..e4884e0e4f69 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -1,8 +1,15 @@
 #ifndef _NF_CONNTRACK_COUNT_H
 #define _NF_CONNTRACK_COUNT_H
 
+#include <linux/list.h>
+
 struct nf_conncount_data;
 
+struct nf_conncount_list {
+	struct list_head head;	/* connections with the same filtering key */
+	unsigned int count;	/* length of list */
+};
+
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
 					    unsigned int keylen);
 void nf_conncount_destroy(struct net *net, unsigned int family,
@@ -14,15 +21,17 @@ unsigned int nf_conncount_count(struct net *net,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
+unsigned int nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
 				 const struct nf_conntrack_tuple *tuple,
 				 const struct nf_conntrack_zone *zone,
 				 bool *addit);
 
-bool nf_conncount_add(struct hlist_head *head,
+void nf_conncount_list_init(struct nf_conncount_list *list);
+
+bool nf_conncount_add(struct nf_conncount_list *list,
 		      const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conntrack_zone *zone);
 
-void nf_conncount_cache_free(struct hlist_head *hhead);
+void nf_conncount_cache_free(struct nf_conncount_list *list);
 
 #endif
-- 
cgit 


From 976afca1ceba53df6f4a543014e15d1c7a962571 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:41 -0700
Subject: netfilter: nf_conncount: Early exit in nf_conncount_lookup() and
 cleanup

This patch is originally from Florian Westphal.

This patch does the following three tasks.

It applies the same early exit technique for nf_conncount_lookup().

Since now we keep the number of connections in 'struct nf_conncount_list',
we no longer need to return the count in nf_conncount_lookup().

Moreover, we expose the garbage collection function nf_conncount_gc_list()
for nft_connlimit.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index e4884e0e4f69..dbec17f674b7 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -21,10 +21,10 @@ unsigned int nf_conncount_count(struct net *net,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 
-unsigned int nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
-				 const struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_zone *zone,
-				 bool *addit);
+void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
+			 const struct nf_conntrack_tuple *tuple,
+			 const struct nf_conntrack_zone *zone,
+			 bool *addit);
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
@@ -32,6 +32,9 @@ bool nf_conncount_add(struct nf_conncount_list *list,
 		      const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conntrack_zone *zone);
 
+void nf_conncount_gc_list(struct net *net,
+			  struct nf_conncount_list *list);
+
 void nf_conncount_cache_free(struct nf_conncount_list *list);
 
 #endif
-- 
cgit 


From 5c789e131cbb997a528451564ea4613e812fc718 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:44 -0700
Subject: netfilter: nf_conncount: Add list lock and gc worker, and RCU for
 init tree search

This patch is originally from Florian Westphal.

This patch does the following 3 main tasks.

1) Add list lock to 'struct nf_conncount_list' so that we can
alter the lists containing the individual connections without holding the
main tree lock.  It would be useful when we only need to add/remove to/from
a list without allocate/remove a node in the tree.  With this change, we
update nft_connlimit accordingly since we longer need to maintain
a list lock in nft_connlimit now.

2) Use RCU for the initial tree search to improve tree look up performance.

3) Add a garbage collection worker. This worker is schedule when there
are excessive tree node that needed to be recycled.

Moreover,the rbnode reclaim logic is moved from search tree to insert tree
to avoid race condition.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index dbec17f674b7..4b2b2baf8ab4 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -5,9 +5,17 @@
 
 struct nf_conncount_data;
 
+enum nf_conncount_list_add {
+	NF_CONNCOUNT_ADDED, 	/* list add was ok */
+	NF_CONNCOUNT_ERR,	/* -ENOMEM, must drop skb */
+	NF_CONNCOUNT_SKIP,	/* list is already reclaimed by gc */
+};
+
 struct nf_conncount_list {
+	spinlock_t list_lock;
 	struct list_head head;	/* connections with the same filtering key */
 	unsigned int count;	/* length of list */
+	bool dead;
 };
 
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
@@ -28,11 +36,12 @@ void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
-bool nf_conncount_add(struct nf_conncount_list *list,
-		      const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conntrack_zone *zone);
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+		 const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_zone *zone);
 
-void nf_conncount_gc_list(struct net *net,
+bool nf_conncount_gc_list(struct net *net,
 			  struct nf_conncount_list *list);
 
 void nf_conncount_cache_free(struct nf_conncount_list *list);
-- 
cgit 


From ec1b28ca9674def4a158808a6493bdb87b993d81 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Jul 2018 08:25:52 +0300
Subject: ipvs: provide just conn to ip_vs_state_name

In preparation for followup patches, provide just the cp
ptr to ip_vs_state_name.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a0bec23c6d5e..4d76abcf1c41 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1221,7 +1221,7 @@ struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
 				  struct ip_vs_dest *dest, __u32 fwmark);
 void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
-const char *ip_vs_state_name(__u16 proto, int state);
+const char *ip_vs_state_name(const struct ip_vs_conn *cp);
 
 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
 int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
-- 
cgit 


From 275411430f892407b885be1de2548b2e632892c3 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Jul 2018 08:25:53 +0300
Subject: ipvs: add assured state for conn templates

cp->state was not used for templates. Add support for state bits
and for the first "assured" bit which indicates that some
connection controlled by this template was established or assured
by the real server. In a followup patch we will use it to drop
templates under SYN attack.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4d76abcf1c41..a0d2e0bb9a94 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -335,6 +335,11 @@ enum ip_vs_sctp_states {
 	IP_VS_SCTP_S_LAST
 };
 
+/* Connection templates use bits from state */
+#define IP_VS_CTPL_S_NONE		0x0000
+#define IP_VS_CTPL_S_ASSURED		0x0001
+#define IP_VS_CTPL_S_LAST		0x0002
+
 /* Delta sequence info structure
  * Each ip_vs_conn has 2 (output AND input seq. changes).
  * Only used in the VS/NAT.
@@ -1289,6 +1294,17 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 	atomic_inc(&ctl_cp->n_control);
 }
 
+/* Mark our template as assured */
+static inline void
+ip_vs_control_assure_ct(struct ip_vs_conn *cp)
+{
+	struct ip_vs_conn *ct = cp->control;
+
+	if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
+	    (ct->flags & IP_VS_CONN_F_TEMPLATE))
+		ct->state |= IP_VS_CTPL_S_ASSURED;
+}
+
 /* IPVS netns init & cleanup functions */
 int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
 int ip_vs_control_net_init(struct netns_ipvs *ipvs);
-- 
cgit 


From 440534d3c56be04abfb26850ee882d19d223557a Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Mon, 9 Jul 2018 18:06:33 +0800
Subject: netfilter: Remove useless param helper of nf_ct_helper_ext_add

The param helper of nf_ct_helper_ext_add is useless now, then remove
it now.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 32c2a94a219d..2492120b8097 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -103,9 +103,7 @@ int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
 void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
 				     unsigned int);
 
-struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
-					  struct nf_conntrack_helper *helper,
-					  gfp_t gfp);
+struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
 
 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 			      gfp_t flags);
-- 
cgit 


From f102d66b335a417d4848da9441f585695a838934 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 11 Jul 2018 13:45:14 +0200
Subject: netfilter: nf_tables: use dedicated mutex to guard transactions

Continue to use nftnl subsys mutex to protect (un)registration of hook types,
expressions and so on, but force batch operations to do their own
locking.

This allows distinct net namespaces to perform transactions in parallel.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/nftables.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 94767ea3a490..286fd960896f 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,6 +7,7 @@
 struct netns_nftables {
 	struct list_head	tables;
 	struct list_head	commit_list;
+	struct mutex		commit_mutex;
 	unsigned int		base_seq;
 	u8			gencursor;
 	u8			validate_state;
-- 
cgit 


From 70b095c84326640eeacfd69a411db8fc36e8ab1a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 14 Jul 2018 01:14:01 +0200
Subject: ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module

IPV6=m
DEFRAG_IPV6=m
CONNTRACK=y yields:

net/netfilter/nf_conntrack_proto.o: In function `nf_ct_netns_do_get':
net/netfilter/nf_conntrack_proto.c:802: undefined reference to `nf_defrag_ipv6_enable'
net/netfilter/nf_conntrack_proto.o:(.rodata+0x640): undefined reference to `nf_conntrack_l4proto_icmpv6'

Setting DEFRAG_IPV6=y causes undefined references to ip6_rhash_params
ip6_frag_init and ip6_expire_frag_queue so it would be needed to force
IPV6=y too.

This patch gets rid of the 'followup linker error' by removing
the dependency of ipv6.ko symbols from netfilter ipv6 defrag.

Shared code is placed into a header, then used from both.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ipv6.h      |  28 -------------
 include/net/ipv6_frag.h | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 28 deletions(-)
 create mode 100644 include/net/ipv6_frag.h

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index aa6fd11a887c..3720958cd4e1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -581,34 +581,6 @@ static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
 }
 #endif
 
-struct inet_frag_queue;
-
-enum ip6_defrag_users {
-	IP6_DEFRAG_LOCAL_DELIVER,
-	IP6_DEFRAG_CONNTRACK_IN,
-	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
-	IP6_DEFRAG_CONNTRACK_OUT,
-	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
-	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
-	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
-};
-
-void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-extern const struct rhashtable_params ip6_rhash_params;
-
-/*
- *	Equivalent of ipv4 struct ip
- */
-struct frag_queue {
-	struct inet_frag_queue	q;
-
-	int			iif;
-	__u16			nhoffset;
-	u8			ecn;
-};
-
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
-
 static inline bool ipv6_addr_any(const struct in6_addr *a)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
new file mode 100644
index 000000000000..6ced1e6899b6
--- /dev/null
+++ b/include/net/ipv6_frag.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _IPV6_FRAG_H
+#define _IPV6_FRAG_H
+#include <linux/kernel.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+enum ip6_defrag_users {
+	IP6_DEFRAG_LOCAL_DELIVER,
+	IP6_DEFRAG_CONNTRACK_IN,
+	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
+	IP6_DEFRAG_CONNTRACK_OUT,
+	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
+	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
+	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+};
+
+/*
+ *	Equivalent of ipv4 struct ip
+ */
+struct frag_queue {
+	struct inet_frag_queue	q;
+
+	int			iif;
+	__u16			nhoffset;
+	u8			ecn;
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void ip6frag_init(struct inet_frag_queue *q, const void *a)
+{
+	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+	const struct frag_v6_compare_key *key = a;
+
+	q->key.v6 = *key;
+	fq->ecn = 0;
+}
+
+static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed)
+{
+	return jhash2(data,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct inet_frag_queue *fq = data;
+
+	return jhash2((const u32 *)&fq->key.v6,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline int
+ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+	const struct frag_v6_compare_key *key = arg->key;
+	const struct inet_frag_queue *fq = ptr;
+
+	return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static inline void
+ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
+{
+	struct net_device *dev = NULL;
+	struct sk_buff *head;
+
+	rcu_read_lock();
+	spin_lock(&fq->q.lock);
+
+	if (fq->q.flags & INET_FRAG_COMPLETE)
+		goto out;
+
+	inet_frag_kill(&fq->q);
+
+	dev = dev_get_by_index_rcu(net, fq->iif);
+	if (!dev)
+		goto out;
+
+	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+	/* Don't send error if the first segment did not arrive. */
+	head = fq->q.fragments;
+	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+		goto out;
+
+	head->dev = dev;
+	skb_get(head);
+	spin_unlock(&fq->q.lock);
+
+	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+	kfree_skb(head);
+	goto out_rcu_unlock;
+
+out:
+	spin_unlock(&fq->q.lock);
+out_rcu_unlock:
+	rcu_read_unlock();
+	inet_frag_put(&fq->q);
+}
+#endif
+#endif
-- 
cgit 


From 0015b80abccecca82622d9e9d48eb210572a0c3b Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Mon, 16 Jul 2018 21:10:34 -0700
Subject: net: dsa: Remove VLA usage

We avoid 2 VLAs by using a pre-allocated field in dsa_switch. We also
try to avoid dynamic allocation whenever possible (when using fewer than
bits-per-long ports, which is the common case).

Link: http://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
Link: http://lkml.kernel.org/r/20180505185145.GB32630@lunn.ch
Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
[kees: tweak commit subject and message slightly]
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index fdbd6082945d..461e8a7661b7 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -259,6 +259,9 @@ struct dsa_switch {
 	/* Number of switch port queues */
 	unsigned int		num_tx_queues;
 
+	unsigned long		*bitmap;
+	unsigned long		_bitmap;
+
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
-- 
cgit 


From 5544adb9707fda5d54494c37940701894c16b9a0 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 17 Jul 2018 19:27:17 +0300
Subject: flow_dissector: Dissect tos and ttl from the tunnel info

Add dissection of the tos and ttl from the ip tunnel headers
fields in case a match is needed on them.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index c64406717eee..2a17f041f7a1 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -207,7 +207,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
-
+	FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_MAX,
 };
 
-- 
cgit 


From bc56b33404599edc412b91933d74b36873e8ea25 Mon Sep 17 00:00:00 2001
From: Benedict Wong <benedictwong@google.com>
Date: Thu, 19 Jul 2018 10:50:44 -0700
Subject: xfrm: Remove xfrmi interface ID from flowi

In order to remove performance impact of having the extra u32 in every
single flowi, this change removes the flowi_xfrm struct, prefering to
take the if_id as a method parameter where needed.

In the inbound direction, if_id is only needed during the
__xfrm_check_policy() function, and the if_id can be determined at that
point based on the skb. As such, xfrmi_decode_session() is only called
with the skb in __xfrm_check_policy().

In the outbound direction, the only place where if_id is needed is the
xfrm_lookup() call in xfrmi_xmit2(). With this change, the if_id is
directly passed into the xfrm_lookup_with_ifid() call. All existing
callers can still call xfrm_lookup(), which uses a default if_id of 0.

This change does not change any behavior of XFRMIs except for improving
overall system performance via flowi size reduction.

This change has been tested against the Android Kernel Networking Tests:

https://android.googlesource.com/kernel/tests/+/master/net/test

Signed-off-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h  | 14 ++++++++++++++
 include/net/flow.h |  9 ---------
 include/net/xfrm.h |  2 +-
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index b3219cd8a5a1..7f735e76ca73 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -475,6 +475,14 @@ static inline struct dst_entry *xfrm_lookup(struct net *net,
 	return dst_orig;
 }
 
+static inline struct dst_entry *
+xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
+		      const struct flowi *fl, const struct sock *sk,
+		      int flags, u32 if_id)
+{
+	return dst_orig;
+}
+
 static inline struct dst_entry *xfrm_lookup_route(struct net *net,
 						  struct dst_entry *dst_orig,
 						  const struct flowi *fl,
@@ -494,6 +502,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct flowi *fl, const struct sock *sk,
 			      int flags);
 
+struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+					struct dst_entry *dst_orig,
+					const struct flowi *fl,
+					const struct sock *sk, int flags,
+					u32 if_id);
+
 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 				    const struct flowi *fl, const struct sock *sk,
 				    int flags);
diff --git a/include/net/flow.h b/include/net/flow.h
index 187c9bef672f..8ce21793094e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,10 +26,6 @@ struct flowi_tunnel {
 	__be64			tun_id;
 };
 
-struct flowi_xfrm {
-	__u32			if_id;
-};
-
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -43,7 +39,6 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
-	struct flowi_xfrm xfrm;
 	kuid_t  flowic_uid;
 };
 
@@ -83,7 +78,6 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
-#define flowi4_xfrm		__fl_common.xfrm
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -115,7 +109,6 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
-	fl4->flowi4_xfrm.if_id = 0;
 	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
@@ -145,7 +138,6 @@ struct flowi6 {
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 #define flowi6_uid		__fl_common.flowic_uid
-#define flowi6_xfrm		__fl_common.xfrm
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -193,7 +185,6 @@ struct flowi {
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
 #define flowi_uid	u.__fl_common.flowic_uid
-#define flowi_xfrm	u.__fl_common.xfrm
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1350e2cf0749..ca820945f30c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1557,7 +1557,7 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
 				   const struct flowi *fl,
 				   struct xfrm_tmpl *tmpl,
 				   struct xfrm_policy *pol, int *err,
-				   unsigned short family);
+				   unsigned short family, u32 if_id);
 struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 				       xfrm_address_t *daddr,
 				       xfrm_address_t *saddr,
-- 
cgit 


From fbdeaed408cf2728c62640c10848ddb1b67e63d3 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 20 Jul 2018 21:56:53 +0000
Subject: net: create reusable function for getting ownership info of sysfs
 inodes

Make net_ns_get_ownership() reusable by networking code outside of core.
This is useful, for example, to allow bridge related sysfs files to be
owned by container root.

Add a function comment since this is a potentially dangerous function to
use given the way that kobject_get_ownership() works by initializing uid
and gid before calling .get_ownership().

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a71264d75d7f..9b5fdc50519a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -10,6 +10,7 @@
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <linux/sysctl.h>
+#include <linux/uidgid.h>
 
 #include <net/flow.h>
 #include <net/netns/core.h>
@@ -170,6 +171,8 @@ extern struct net init_net;
 struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
 			struct net *old_net);
 
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+
 void net_ns_barrier(void);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
@@ -182,6 +185,13 @@ static inline struct net *copy_net_ns(unsigned long flags,
 	return old_net;
 }
 
+static inline void net_ns_get_ownership(const struct net *net,
+					kuid_t *uid, kgid_t *gid)
+{
+	*uid = GLOBAL_ROOT_UID;
+	*gid = GLOBAL_ROOT_GID;
+}
+
 static inline void net_ns_barrier(void) {}
 #endif /* CONFIG_NET_NS */
 
-- 
cgit 


From 042f8825569d628517784d558aefe23c212f0fb2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 20 Jul 2018 21:14:38 -0700
Subject: nfp: bring back support for offloading shared blocks

Now that we have offload replay infrastructure added by
commit 326367427cc0 ("net: sched: call reoffload op on block callback reg")
and flows are guaranteed to be removed correctly, we can revert
commit 951a8ee6def3 ("nfp: reject binding to shared blocks").

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e4252a176eec..4f405ca8346f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -114,11 +114,6 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 {
 }
 
-static inline bool tcf_block_shared(struct tcf_block *block)
-{
-	return false;
-}
-
 static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 {
 	return NULL;
-- 
cgit 


From f71e0ca4db187af7c44987e9d21e9042c3046070 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:05 +0200
Subject: net: sched: Avoid implicit chain 0 creation

Currently, chain 0 is implicitly created during block creation. However
that does not align with chain object exposure, creation and destruction
api introduced later on. So make the chain 0 behave the same way as any
other chain and only create it when it is needed. Since chain 0 is
somehow special as the qdiscs need to hold pointer to the first chain
tp, this requires to move the chain head change callback infra to the
block structure.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7432100027b7..86f4651784e8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -300,7 +300,6 @@ typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
 
 struct tcf_chain {
 	struct tcf_proto __rcu *filter_chain;
-	struct list_head filter_chain_list;
 	struct list_head list;
 	struct tcf_block *block;
 	u32 index; /* chain index */
@@ -318,6 +317,10 @@ struct tcf_block {
 	bool keep_dst;
 	unsigned int offloadcnt; /* Number of oddloaded filters */
 	unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
+	struct {
+		struct tcf_chain *chain;
+		struct list_head filter_chain_list;
+	} chain0;
 };
 
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
-- 
cgit 


From 32a4f5ecd7381f30ae3bb36dea77a150ba68af2e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:06 +0200
Subject: net: sched: introduce chain object to uapi

Allow user to create, destroy, get and dump chain objects. Do that by
extending rtnl commands by the chain-specific ones. User will now be
able to explicitly create or destroy chains (so far this was done only
automatically according the filter/act needs and refcounting). Also, the
user will receive notification about any chain creation or destuction.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 86f4651784e8..81ec8276db9c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -304,6 +304,7 @@ struct tcf_chain {
 	struct tcf_block *block;
 	u32 index; /* chain index */
 	unsigned int refcnt;
+	bool explicitly_created;
 };
 
 struct tcf_block {
-- 
cgit 


From 9f407f1768d3e1a5ddd7bd49fa4d1f5a26e10ed2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:07 +0200
Subject: net: sched: introduce chain templates

Allow user to set a template for newly created chains. Template lock
down the chain for particular classifier type/options combinations.
The classifier needs to support templates, otherwise kernel would
reply with error.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 81ec8276db9c..085c509c8674 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -238,6 +238,8 @@ struct tcf_result {
 	};
 };
 
+struct tcf_chain;
+
 struct tcf_proto_ops {
 	struct list_head	head;
 	char			kind[IFNAMSIZ];
@@ -263,10 +265,18 @@ struct tcf_proto_ops {
 					     tc_setup_cb_t *cb, void *cb_priv,
 					     struct netlink_ext_ack *extack);
 	void			(*bind_class)(void *, u32, unsigned long);
+	void *			(*tmplt_create)(struct net *net,
+						struct tcf_chain *chain,
+						struct nlattr **tca,
+						struct netlink_ext_ack *extack);
+	void			(*tmplt_destroy)(void *tmplt_priv);
 
 	/* rtnetlink specific */
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
 					struct sk_buff *skb, struct tcmsg*);
+	int			(*tmplt_dump)(struct sk_buff *skb,
+					      struct net *net,
+					      void *tmplt_priv);
 
 	struct module		*owner;
 };
@@ -305,6 +315,8 @@ struct tcf_chain {
 	u32 index; /* chain index */
 	unsigned int refcnt;
 	bool explicitly_created;
+	const struct tcf_proto_ops *tmplt_ops;
+	void *tmplt_priv;
 };
 
 struct tcf_block {
-- 
cgit 


From 34738452739069947e528123810533f28dd8332b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:11 +0200
Subject: net: sched: cls_flower: propagate chain teplate creation and
 destruction to drivers

Introduce a couple of flower offload commands in order to propagate
template creation/destruction events down to device drivers.
Drivers may use this information to prepare HW in an optimal way
for future filter insertions.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4f405ca8346f..a3101582f642 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -721,6 +721,8 @@ enum tc_fl_command {
 	TC_CLSFLOWER_REPLACE,
 	TC_CLSFLOWER_DESTROY,
 	TC_CLSFLOWER_STATS,
+	TC_CLSFLOWER_TMPLT_CREATE,
+	TC_CLSFLOWER_TMPLT_DESTROY,
 };
 
 struct tc_cls_flower_offload {
-- 
cgit 


From 1f3ed383fb9a073ae2e408cd7a0717b04c7c3a21 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 27 Jul 2018 09:45:05 +0200
Subject: net: sched: don't dump chains only held by actions

In case a chain is empty and not explicitly created by a user,
such chain should not exist. The only exception is if there is
an action "goto chain" pointing to it. In that case, don't show the
chain in the dump. Track the chain references held by actions and
use them to find out if a chain should or should not be shown
in chain dump.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     | 3 +++
 include/net/sch_generic.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3101582f642..6d02f31abba8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -39,7 +39,10 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 #ifdef CONFIG_NET_CLS
 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 				bool create);
+struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
+				       u32 chain_index);
 void tcf_chain_put(struct tcf_chain *chain);
+void tcf_chain_put_by_act(struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 085c509c8674..c5432362dc26 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -314,6 +314,7 @@ struct tcf_chain {
 	struct tcf_block *block;
 	u32 index; /* chain index */
 	unsigned int refcnt;
+	unsigned int action_refcnt;
 	bool explicitly_created;
 	const struct tcf_proto_ops *tmplt_ops;
 	void *tmplt_priv;
-- 
cgit 


From b67c540b8a987e365dc548e5b2ddf023946e3d63 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Fri, 27 Jul 2018 15:26:56 +0300
Subject: net: dcb: Add priority-to-DSCP map getters

On ingress, a network device such as a switch assigns to packets
priority based on various criteria. Common options include interpreting
PCP and DSCP fields according to user configuration. When a packet
egresses the switch, a reverse process may rewrite PCP and/or DSCP
values according to packet priority.

The following three functions support a) obtaining a DSCP-to-priority
map or vice versa, and b) finding default-priority entries in APP
database.

The DCB subsystem supports for APP entries a very generous M:N mapping
between priorities and protocol identifiers. Understandably,
several (say) DSCP values can map to the same priority. But this
asymmetry holds the other way around as well--one priority can map to
several DSCP values. For this reason, the following functions operate in
terms of bitmaps, with ones in positions that match some APP entry.

- dcb_ieee_getapp_dscp_prio_mask_map() to compute for a given netdevice
  a map of DSCP-to-priority-mask, which gives for each DSCP value a
  bitmap of priorities related to that DSCP value by APP, along the
  lines of dcb_ieee_getapp_mask().

- dcb_ieee_getapp_prio_dscp_mask_map() similarly to compute for a given
  netdevice a map from priorities to a bitmap of DSCPs.

- dcb_ieee_getapp_default_prio_mask() which finds all default-priority
  rules for a given port in APP database, and returns a mask of
  priorities allowed by these default-priority rules.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 0e5e91be2d30..e22a8a3c089b 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -34,6 +34,19 @@ int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
 u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *);
 
+struct dcb_ieee_app_prio_map {
+	u64 map[IEEE_8021QAZ_MAX_TCS];
+};
+void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
+					struct dcb_ieee_app_prio_map *p_map);
+
+struct dcb_ieee_app_dscp_map {
+	u8 map[64];
+};
+void dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev,
+					struct dcb_ieee_app_dscp_map *p_map);
+u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev);
+
 int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd,
 		      u32 seq, u32 pid);
 int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
-- 
cgit 


From 222440b4e832059c0ddf18d1e409f0552ab53a7d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 4 Jul 2018 12:48:04 +0200
Subject: netfilter: nf_tables: handle meta/lookup with direct call

Currently nft uses inlined variants for common operations
such as 'ip saddr 1.2.3.4' instead of an indirect call.

Also handle meta get operations and lookups without indirect call,
both are builtin.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a05134507e7b..8da837d2aaf9 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -71,4 +71,11 @@ extern struct nft_set_type nft_set_hash_fast_type;
 extern struct nft_set_type nft_set_rbtree_type;
 extern struct nft_set_type nft_set_bitmap_type;
 
+struct nft_expr;
+struct nft_regs;
+struct nft_pktinfo;
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_lookup_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
 #endif /* _NET_NF_TABLES_CORE_H */
-- 
cgit 


From 6decb5b45e70d6ffff6488cc8e8bad6b9ac7f99b Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:32 +0530
Subject: Bluetooth: Define PHY flags in hdev and set 1M as default

1M is mandatory to be supported by LE controllers and the same
would be set in power on. This patch defines hdev flags for
LE PHYs and set 1M to default.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 4 ++++
 include/net/bluetooth/hci_core.h | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 73e48be5bbb3..664fe1ebf2c7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1514,6 +1514,10 @@ struct hci_cp_le_set_default_phy {
 	__u8    rx_phys;
 } __packed;
 
+#define HCI_LE_SET_PHY_1M		0x01
+#define HCI_LE_SET_PHY_2M		0x02
+#define HCI_LE_SET_PHY_CODED		0x04
+
 #define HCI_OP_LE_SET_EXT_SCAN_PARAMS   0x2041
 struct hci_cp_le_set_ext_scan_params {
 	__u8    own_addr_type;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a74453571264..71f79df9ee05 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -315,6 +315,9 @@ struct hci_dev {
 	unsigned long	sco_last_tx;
 	unsigned long	le_last_tx;
 
+	__u8		le_tx_def_phys;
+	__u8		le_rx_def_phys;
+
 	struct workqueue_struct	*workqueue;
 	struct workqueue_struct	*req_workqueue;
 
-- 
cgit 


From 5075b972f20ddad5bb19542ea4f5794d06673375 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:33 +0530
Subject: Bluetooth: Add defines for BREDR pkt_type and LE PHYs

This also add macros for checking LMP support for different
pkt_types

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 14 ++++++++++++++
 include/net/bluetooth/hci_core.h |  4 ++++
 2 files changed, 18 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 664fe1ebf2c7..89bf800f6eb1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -291,6 +291,14 @@ enum {
 #define HCI_DH3		0x0800
 #define HCI_DH5		0x8000
 
+/* HCI packet types inverted masks */
+#define HCI_2DH1	0x0002
+#define HCI_3DH1	0x0004
+#define HCI_2DH3	0x0100
+#define HCI_3DH3	0x0200
+#define HCI_2DH5	0x1000
+#define HCI_3DH5	0x2000
+
 #define HCI_HV1		0x0020
 #define HCI_HV2		0x0040
 #define HCI_HV3		0x0080
@@ -354,6 +362,8 @@ enum {
 #define LMP_PCONTROL	0x04
 #define LMP_TRANSPARENT	0x08
 
+#define LMP_EDR_2M		0x02
+#define LMP_EDR_3M		0x04
 #define LMP_RSSI_INQ	0x40
 #define LMP_ESCO	0x80
 
@@ -361,7 +371,9 @@ enum {
 #define LMP_EV5		0x02
 #define LMP_NO_BREDR	0x20
 #define LMP_LE		0x40
+#define LMP_EDR_3SLOT	0x80
 
+#define LMP_EDR_5SLOT	0x01
 #define LMP_SNIFF_SUBR	0x02
 #define LMP_PAUSE_ENC	0x04
 #define LMP_EDR_ESCO_2M	0x20
@@ -399,6 +411,8 @@ enum {
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
 #define HCI_LE_EXT_SCAN_POLICY		0x80
+#define HCI_LE_PHY_2M			0x01
+#define HCI_LE_PHY_CODED		0x08
 #define HCI_LE_CHAN_SEL_ALG2		0x40
 
 /* Connection modes */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 71f79df9ee05..a64d13f91d09 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1141,6 +1141,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR)
 #define lmp_ext_feat_capable(dev)  ((dev)->features[0][7] & LMP_EXTFEATURES)
 #define lmp_transp_capable(dev)    ((dev)->features[0][2] & LMP_TRANSPARENT)
+#define lmp_edr_2m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_2M)
+#define lmp_edr_3m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_3M)
+#define lmp_edr_3slot_capable(dev) ((dev)->features[0][4] & LMP_EDR_3SLOT)
+#define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT)
 
 /* ----- Extended LMP capabilities ----- */
 #define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER)
-- 
cgit 


From 6244691fec4dd0adebca255e60e0ed7ac8155b2e Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:34 +0530
Subject: Bluetooth: Implement Get PHY Configuration mgmt command

This commands basically retrieve the supported packet types of
BREDR and supported PHYs of the controller.

BR_1M_1SLOT, LE_1M_TX and LE_1M_RX would be supported by default.
Other PHYs are supported based on the local features.

Also this sets PHY_CONFIGURATION bit in supported settings.

@ MGMT Command: Get PHY Configuration (0x0044) plen 0
@ MGMT Event: Command Complete (0x0001) plen 15
      Get PHY Configuration (0x0044) plen 12
        Status: Success (0x00)
        Supported PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
        Configurable PHYs: 0x79fe
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
        Selected PHYs: 0x07ff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index e7303eee65cd..1c93d6e83a6c 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -101,6 +101,7 @@ struct mgmt_rp_read_index_list {
 #define MGMT_SETTING_PRIVACY		0x00002000
 #define MGMT_SETTING_CONFIGURATION	0x00004000
 #define MGMT_SETTING_STATIC_ADDRESS	0x00008000
+#define MGMT_SETTING_PHY_CONFIGURATION  0x00010000
 
 #define MGMT_OP_READ_INFO		0x0004
 #define MGMT_READ_INFO_SIZE		0
@@ -604,6 +605,30 @@ struct mgmt_cp_set_appearance {
 } __packed;
 #define MGMT_SET_APPEARANCE_SIZE	2
 
+#define MGMT_OP_GET_PHY_CONFIGURATION	0x0044
+struct mgmt_rp_get_phy_confguration {
+	__le32	supported_phys;
+	__le32	configurable_phys;
+	__le32	selected_phys;
+} __packed;
+#define MGMT_GET_PHY_CONFIGURATION_SIZE	0
+
+#define MGMT_PHY_BR_1M_1SLOT	0x00000001
+#define MGMT_PHY_BR_1M_3SLOT	0x00000002
+#define MGMT_PHY_BR_1M_5SLOT	0x00000004
+#define MGMT_PHY_EDR_2M_1SLOT	0x00000008
+#define MGMT_PHY_EDR_2M_3SLOT	0x00000010
+#define MGMT_PHY_EDR_2M_5SLOT	0x00000020
+#define MGMT_PHY_EDR_3M_1SLOT	0x00000040
+#define MGMT_PHY_EDR_3M_3SLOT	0x00000080
+#define MGMT_PHY_EDR_3M_5SLOT	0x00000100
+#define MGMT_PHY_LE_1M_TX		0x00000200
+#define MGMT_PHY_LE_1M_RX		0x00000400
+#define MGMT_PHY_LE_2M_TX		0x00000800
+#define MGMT_PHY_LE_2M_RX		0x00001000
+#define MGMT_PHY_LE_CODED_TX	0x00002000
+#define MGMT_PHY_LE_CODED_RX	0x00004000
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
-- 
cgit 


From 0314f2867fa0c46d0fc1c23c80e7fab9435079df Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:35 +0530
Subject: Bluetooth: Implement Set PHY Confguration command

This enables user to set phys which will be used in all subsequent
connections. Also host will use the same in LE scanning as well.

@ MGMT Command: Set PHY Configuration (0x0045) plen 4
        Selected PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
< HCI Command: LE Set Default PHY (0x08|0x0031) plen 3
        All PHYs preference: 0x00
        TX PHYs preference: 0x07
          LE 1M
          LE 2M
          LE Coded
        RX PHYs preference: 0x07
          LE 1M
          LE 2M
          LE Coded
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Default PHY (0x08|0x0031) ncmd 1
        Status: Success (0x00)
@ MGMT Event: Command Complete (0x0001) plen 3
      Set PHY Configuration (0x0045) plen 0
        Status: Success (0x00)
@ MGMT Event: PHY Configuration Changed (0x0026) plen 4
        Selected PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 1c93d6e83a6c..0916e203e5d9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -629,6 +629,25 @@ struct mgmt_rp_get_phy_confguration {
 #define MGMT_PHY_LE_CODED_TX	0x00002000
 #define MGMT_PHY_LE_CODED_RX	0x00004000
 
+#define MGMT_PHY_BREDR_MASK (MGMT_PHY_BR_1M_1SLOT | MGMT_PHY_BR_1M_3SLOT | \
+			     MGMT_PHY_BR_1M_5SLOT | MGMT_PHY_EDR_2M_1SLOT | \
+			     MGMT_PHY_EDR_2M_3SLOT | MGMT_PHY_EDR_2M_5SLOT | \
+			     MGMT_PHY_EDR_3M_1SLOT | MGMT_PHY_EDR_3M_3SLOT | \
+			     MGMT_PHY_EDR_3M_5SLOT)
+#define MGMT_PHY_LE_MASK (MGMT_PHY_LE_1M_TX | MGMT_PHY_LE_1M_RX | \
+			  MGMT_PHY_LE_2M_TX | MGMT_PHY_LE_2M_RX | \
+			  MGMT_PHY_LE_CODED_TX | MGMT_PHY_LE_CODED_RX)
+#define MGMT_PHY_LE_TX_MASK (MGMT_PHY_LE_1M_TX | MGMT_PHY_LE_2M_TX | \
+			     MGMT_PHY_LE_CODED_TX)
+#define MGMT_PHY_LE_RX_MASK (MGMT_PHY_LE_1M_RX | MGMT_PHY_LE_2M_RX | \
+			     MGMT_PHY_LE_CODED_RX)
+
+#define MGMT_OP_SET_PHY_CONFIGURATION	0x0045
+struct mgmt_cp_set_phy_confguration {
+	__le32	selected_phys;
+} __packed;
+#define MGMT_SET_PHY_CONFIGURATION_SIZE	4
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
-- 
cgit 


From b7c23df85b6a1c3bcfb591cfa938d341fc3a556e Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:36 +0530
Subject: Bluetooth: Implement PHY changed event

This defines and implement phy changed event and send it to user
whenever selected PHYs changes using SET_PHY_CONFIGURATION.

This will be also trigerred when BREDR pkt_type is changed using
the legacy ioctl HCISETPTYPE.

@ MGMT Command: Set PHY Configuration (0x0045) plen 4
		Selected PHYs: 0x7fff
		  BR 1M 1SLOT
		  BR 1M 3SLOT
		  BR 1M 5SLOT
		  EDR 2M 1SLOT
		  EDR 2M 3SLOT
		  EDR 2M 5SLOT
		  EDR 3M 1SLOT
		  EDR 3M 3SLOT
		  EDR 3M 5SLOT
		  LE 1M TX
		  LE 1M RX
		  LE 2M TX
		  LE 2M RX
		  LE CODED TX
		  LE CODED RX
< HCI Command: LE Set Default PHY (0x08|0x0031) plen 3
		All PHYs preference: 0x00
		TX PHYs preference: 0x07
		  LE 1M
		  LE 2M
		  LE Coded
		RX PHYs preference: 0x07
		  LE 1M
		  LE 2M
		  LE Coded
> HCI Event: Command Complete (0x0e) plen 4
	  LE Set Default PHY (0x08|0x0031) ncmd 1
		Status: Success (0x00)
@ MGMT Event: Command Complete (0x0001) plen 3
	  Set PHY Configuration (0x0045) plen 0
		Status: Success (0x00)
@ MGMT Event: PHY Configuration Changed (0x0026) plen 4
		Selected PHYs: 0x7fff
		  BR 1M 1SLOT
		  BR 1M 3SLOT
		  BR 1M 5SLOT
		  EDR 2M 1SLOT
		  EDR 2M 3SLOT
		  EDR 2M 5SLOT
		  EDR 3M 1SLOT
		  EDR 3M 3SLOT
		  EDR 3M 5SLOT
		  LE 1M TX
		  LE 1M RX
		  LE 2M TX
		  LE 2M RX
		  LE CODED TX
		  LE CODED RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 include/net/bluetooth/mgmt.h     | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a64d13f91d09..ab5d494a545a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1544,6 +1544,7 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
 			    u8 instance);
 void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
 			      u8 instance);
+int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
 
 u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
 		      u16 to_multiplier);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 0916e203e5d9..7f372e9067c9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -868,3 +868,8 @@ struct mgmt_ev_ext_info_changed {
 	__le16	eir_len;
 	__u8	eir[0];
 } __packed;
+
+#define MGMT_EV_PHY_CONFIGURATION_CHANGED	0x0026
+struct mgmt_ev_phy_configuration_changed {
+	__le32	selected_phys;
+} __packed;
-- 
cgit 


From 45bdd86eafc7d29e0b4b6681bec9c6ab8eddc6bf Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:37 +0530
Subject: Bluetooth: Set Scan PHYs based on selected PHYs by user

Use the PHYs selected in Set Phy Configuration management command
while scanning.

< HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 13
        Own address type: Random (0x01)
        Filter policy: Accept all advertisement (0x00)
        PHYs: 0x05
        Entry 0: LE 1M
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
        Entry 1: LE Coded
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Parameters (0x08|0x0041) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6
        Extended scan: Enabled (0x01)
        Filter duplicates: Enabled (0x01)
        Duration: 0 msec (0x0000)
        Period: 0.00 sec (0x0000)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Enable (0x08|0x0042) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 4 +++-
 include/net/bluetooth/hci_core.h | 9 +++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 89bf800f6eb1..04211457367a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1540,7 +1540,9 @@ struct hci_cp_le_set_ext_scan_params {
 	__u8    data[0];
 } __packed;
 
-#define LE_SCAN_PHY_1M 0x01
+#define LE_SCAN_PHY_1M		0x01
+#define LE_SCAN_PHY_2M		0x02
+#define LE_SCAN_PHY_CODED	0x04
 
 struct hci_cp_le_scan_phy_params {
 	__u8    type;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ab5d494a545a..113c9bb609c7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1165,6 +1165,15 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
 				hci_dev_test_flag(dev, HCI_SC_ENABLED))
 
+#define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
+		      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))
+
+#define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \
+		      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))
+
+#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
+			 ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
+
 /* Use ext scanning if set ext scan param and ext scan enable is supported */
 #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
 			   ((dev)->commands[37] & 0x40))
-- 
cgit 


From b2cc9761f144e8ef714be8c590603073b80ddc13 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:38 +0530
Subject: Bluetooth: Handle extended ADV PDU types

This patch defines the extended ADV types and handle it in ADV report.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 04211457367a..83a1593a128e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1976,6 +1976,14 @@ struct hci_ev_le_conn_complete {
 #define LE_LEGACY_SCAN_RSP_ADV		0x001b
 #define LE_LEGACY_SCAN_RSP_ADV_SCAN	0x001a
 
+/* Extended Advertising event types */
+#define LE_EXT_ADV_NON_CONN_IND		0x0000
+#define LE_EXT_ADV_CONN_IND		0x0001
+#define LE_EXT_ADV_SCAN_IND		0x0002
+#define LE_EXT_ADV_DIRECT_IND		0x0004
+#define LE_EXT_ADV_SCAN_RSP		0x0008
+#define LE_EXT_ADV_LEGACY_PDU		0x0010
+
 #define ADDR_LE_DEV_PUBLIC	0x00
 #define ADDR_LE_DEV_RANDOM	0x01
 
-- 
cgit 


From 6b49bcb4bce2ed0f0aefe8e304a8b9cbaeeaa3f0 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:40 +0530
Subject: Bluetooth: Read no of adv sets during init

This patch reads the number of advertising sets in the controller
during init and save it in hdev.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 7 +++++++
 include/net/bluetooth/hci_core.h | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 83a1593a128e..3f93ae9765a4 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -410,6 +410,7 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
+#define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
 #define HCI_LE_PHY_CODED		0x08
@@ -1579,6 +1580,12 @@ struct hci_cp_le_ext_conn_param {
 	__le16 max_ce_len;
 } __packed;
 
+#define HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS	0x203b
+struct hci_rp_le_read_num_supported_adv_sets {
+	__u8  status;
+	__u8  num_of_sets;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 113c9bb609c7..2aad4a863176 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -222,6 +222,7 @@ struct hci_dev {
 	__u8		le_features[8];
 	__u8		le_white_list_size;
 	__u8		le_resolv_list_size;
+	__u8		le_num_of_adv_sets;
 	__u8		le_states[8];
 	__u8		commands[64];
 	__u8		hci_ver;
@@ -1180,6 +1181,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 /* Use ext create connection if command is supported */
 #define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
 
+/* Extended advertising support */
+#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
+
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
 
-- 
cgit 


From de181e887ac27dadda127c7d4c3e89c6da8fb6d2 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:41 +0530
Subject: Bluetooth: Impmlement extended adv enable

This patch basically replaces legacy adv with extended adv
based on the controller support. Currently there is no
design change. ie only one adv set will be enabled at a time.

This also adds tx_power in instance and store whatever returns
from Set_ext_parameter, use the same in adv data as well.
For instance 0 tx_power is stored in hdev only.

< HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25
        Handle: 0x00
        Properties: 0x0010
          Use legacy advertising PDUs: ADV_NONCONN_IND
        Min advertising interval: 1280.000 msec (0x0800)
        Max advertising interval: 1280.000 msec (0x0800)
        Channel map: 37, 38, 39 (0x07)
        Own address type: Random (0x01)
        Peer address type: Public (0x00)
        Peer address: 00:00:00:00:00:00 (OUI 00-00-00)
        Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00)
        TX power: 127 dbm (0x7f)
        Primary PHY: LE 1M (0x01)
        Secondary max skip: 0x00
        Secondary PHY: LE 1M (0x01)
        SID: 0x00
        Scan request notifications: Disabled (0x00)
> HCI Event: Command Complete (0x0e) plen 5
      LE Set Extended Advertising Parameters (0x08|0x0036) ncmd 1
        Status: Success (0x00)
        TX power (selected): 7 dbm (0x07)
< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 6
        Extended advertising: Enabled (0x01)
        Number of sets: 1 (0x01)
        Entry 0
          Handle: 0x00
          Duration: 0 ms (0x00)
          Max ext adv events: 0
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 39 +++++++++++++++++++++++++++++++++++++++
 include/net/bluetooth/hci_core.h |  1 +
 2 files changed, 40 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 3f93ae9765a4..b447b127879e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1586,6 +1586,45 @@ struct hci_rp_le_read_num_supported_adv_sets {
 	__u8  num_of_sets;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_ADV_PARAMS		0x2036
+struct hci_cp_le_set_ext_adv_params {
+	__u8      handle;
+	__le16    evt_properties;
+	__u8      min_interval[3];
+	__u8      max_interval[3];
+	__u8      channel_map;
+	__u8      own_addr_type;
+	__u8      peer_addr_type;
+	bdaddr_t  peer_addr;
+	__u8      filter_policy;
+	__u8      tx_power;
+	__u8      primary_phy;
+	__u8      secondary_max_skip;
+	__u8      secondary_phy;
+	__u8      sid;
+	__u8      notif_enable;
+} __packed;
+
+#define HCI_ADV_PHY_1M		0X01
+
+struct hci_rp_le_set_ext_adv_params {
+	__u8  status;
+	__u8  tx_power;
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_ADV_ENABLE		0x2039
+struct hci_cp_le_set_ext_adv_enable {
+	__u8  enable;
+	__u8  num_of_sets;
+	__u8  data[0];
+} __packed;
+
+struct hci_cp_ext_adv_set {
+	__u8  handle;
+	__le16 duration;
+	__u8  max_events;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2aad4a863176..ad3518303a0c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -171,6 +171,7 @@ struct adv_info {
 	__u8	adv_data[HCI_MAX_AD_LENGTH];
 	__u16	scan_rsp_len;
 	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
+	__s8	tx_power;
 };
 
 #define HCI_MAX_ADV_INSTANCES		5
-- 
cgit 


From a0fb3726ba55138ef6fdd5dc67da6d9a70360696 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:42 +0530
Subject: Bluetooth: Use Set ext adv/scan rsp data if controller supports

This patch implements Set Ext Adv data and Set Ext Scan rsp data
if controller support extended advertising.

Currently the operation is set as Complete data and fragment
preference is set as no fragment

< HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 35
        Handle: 0x00
        Operation: Complete extended advertising data (0x03)
        Fragment preference: Minimize fragmentation (0x01)
        Data length: 0x15
        16-bit Service UUIDs (complete): 2 entries
          Heart Rate (0x180d)
          Battery Service (0x180f)
        Name (complete): Test LE
        Company: Google (224)
          Data: 0102
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Data (0x08|0x0037) ncmd 1
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index b447b127879e..aace97099ead 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1625,6 +1625,28 @@ struct hci_cp_ext_adv_set {
 	__u8  max_events;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_ADV_DATA		0x2037
+struct hci_cp_le_set_ext_adv_data {
+	__u8  handle;
+	__u8  operation;
+	__u8  frag_pref;
+	__u8  length;
+	__u8  data[HCI_MAX_AD_LENGTH];
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_SCAN_RSP_DATA		0x2038
+struct hci_cp_le_set_ext_scan_rsp_data {
+	__u8  handle;
+	__u8  operation;
+	__u8  frag_pref;
+	__u8  length;
+	__u8  data[HCI_MAX_AD_LENGTH];
+} __packed;
+
+#define LE_SET_ADV_DATA_OP_COMPLETE	0x03
+
+#define LE_SET_ADV_DATA_NO_FRAG		0x01
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
-- 
cgit 


From 45b7749f16aacd9ffab8e958caa77e2aa2358c0b Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:43 +0530
Subject: Bluetooth: Implement disable and removal of adv instance

If ext adv is enabled then use ext adv to disable as well.
Also remove the adv set during LE disable.

< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 2
        Extended advertising: Disabled (0x00)
        Number of sets: Disable all sets (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index aace97099ead..faa2922a69fd 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1647,6 +1647,8 @@ struct hci_cp_le_set_ext_scan_rsp_data {
 
 #define LE_SET_ADV_DATA_NO_FRAG		0x01
 
+#define HCI_OP_LE_CLEAR_ADV_SETS	0x203d
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
-- 
cgit 


From a73c046a2869048430c332a871a5b169f192c6c3 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:45 +0530
Subject: Bluetooth: Implement Set ADV set random address

This basically sets the random address for the adv instance
Random address can be set only if the instance is created which
is done in Set ext adv param.

Random address and rpa expire timer and flags have been added
to adv instance which will be used when the respective
instance is scheduled.

This introduces a hci_get_random_address() which returns the
own address type and random address (rpa or nrpa) based
on the instance flags and hdev flags. New function is required
since own address type should be known before setting adv params
but address can be set only after setting params.

< HCI Command: LE Set Advertising Set Random Address (0x08|0x0035) plen 7
        Advertising handle: 0x00
        Advertising random address: 3C:8E:56:9B:77:84 (OUI 3C-8E-56)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Advertising Set Random Address (0x08|0x0035) ncmd 1
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 6 ++++++
 include/net/bluetooth/hci_core.h | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index faa2922a69fd..8d348d0d3eea 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1649,6 +1649,12 @@ struct hci_cp_le_set_ext_scan_rsp_data {
 
 #define HCI_OP_LE_CLEAR_ADV_SETS	0x203d
 
+#define HCI_OP_LE_SET_ADV_SET_RAND_ADDR	0x2035
+struct hci_cp_le_set_adv_set_rand_addr {
+	__u8  handle;
+	bdaddr_t  bdaddr;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ad3518303a0c..0db1b9b428b7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -172,6 +172,9 @@ struct adv_info {
 	__u16	scan_rsp_len;
 	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
 	__s8	tx_power;
+	bdaddr_t	random_addr;
+	bool 		rpa_expired;
+	struct delayed_work	rpa_expired_cb;
 };
 
 #define HCI_MAX_ADV_INSTANCES		5
@@ -1113,6 +1116,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
 			 u16 scan_rsp_len, u8 *scan_rsp_data,
 			 u16 timeout, u16 duration);
 int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance);
+void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired);
 
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
-- 
cgit 


From acf0aeae431a0f1723385cd1cb50177e4cc10edd Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:46 +0530
Subject: Bluetooth: Handle ADv set terminated event

This event comes after connection complete event for incoming
connections. Since we now have different random address for
each instance, conn resp address is assigned from this event.

As of now only connection part is handled as we are not
enabling duration or max num of events while starting ext adv.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8d348d0d3eea..57e3e3675d66 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2155,6 +2155,14 @@ struct hci_ev_le_enh_conn_complete {
 	__u8      clk_accurancy;
 } __packed;
 
+#define HCI_EV_LE_EXT_ADV_SET_TERM	0x12
+struct hci_evt_le_ext_adv_set_term {
+	__u8	status;
+	__u8	handle;
+	__le16	conn_handle;
+	__u8	num_evts;
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
-- 
cgit 


From 85a721a8b0b6880d8cf6b9def70404ade8563225 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:47 +0530
Subject: Bluetooth: Implement secondary advertising on different PHYs

This patch adds support for advertising in primary and secondary
channel on different PHYs. User can add the phy preference in
the flag based on which phy type will be added in extended
advertising parameter would be set.

@ MGMT Command: Add Advertising (0x003e) plen 11
        Instance: 1
        Flags: 0x00000200
          Advertise in CODED on Secondary channel
        Duration: 0
        Timeout: 0
        Advertising data length: 0
        Scan response length: 0
< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 2
        Extended advertising: Disabled (0x00)
        Number of sets: Disable all sets (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)
< HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25
        Handle: 0x00
        Properties: 0x0000
        Min advertising interval: 1280.000 msec (0x0800)
        Max advertising interval: 1280.000 msec (0x0800)
        Channel map: 37, 38, 39 (0x07)
        Own address type: Random (0x01)
        Peer address type: Public (0x00)
        Peer address: 00:00:00:00:00:00 (OUI 00-00-00)
        Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00)
        TX power: 127 dbm (0x7f)
        Primary PHY: LE Coded (0x03)
        Secondary max skip: 0x00
        Secondary PHY: LE Coded (0x03)
        SID: 0x00
        Scan request notifications: Disabled (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h  | 4 ++++
 include/net/bluetooth/mgmt.h | 6 ++++++
 2 files changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 57e3e3675d66..8ff36463719f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -410,6 +410,8 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
+#define HCI_LE_PHY_2M			0x01
+#define HCI_LE_PHY_CODED		0x08
 #define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
@@ -1606,6 +1608,8 @@ struct hci_cp_le_set_ext_adv_params {
 } __packed;
 
 #define HCI_ADV_PHY_1M		0X01
+#define HCI_ADV_PHY_2M		0x02
+#define HCI_ADV_PHY_CODED	0x03
 
 struct hci_rp_le_set_ext_adv_params {
 	__u8  status;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 7f372e9067c9..9cee7ddc6741 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -562,6 +562,12 @@ struct mgmt_rp_add_advertising {
 #define MGMT_ADV_FLAG_TX_POWER		BIT(4)
 #define MGMT_ADV_FLAG_APPEARANCE	BIT(5)
 #define MGMT_ADV_FLAG_LOCAL_NAME	BIT(6)
+#define MGMT_ADV_FLAG_SEC_1M 		BIT(7)
+#define MGMT_ADV_FLAG_SEC_2M 		BIT(8)
+#define MGMT_ADV_FLAG_SEC_CODED 	BIT(9)
+
+#define MGMT_ADV_FLAG_SEC_MASK	(MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \
+				 MGMT_ADV_FLAG_SEC_CODED)
 
 #define MGMT_OP_REMOVE_ADVERTISING	0x003F
 struct mgmt_cp_remove_advertising {
-- 
cgit 


From 740011cfe94859df8d05f5400d589a8693b095e7 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 20 Jul 2018 13:12:28 +0800
Subject: Bluetooth: Add new quirk for non-persistent setup settings

Add a new quirk HCI_QUIRK_NON_PERSISTENT_SETUP allowing that a quirk that
runs setup() after every open() and not just after the first open().

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8ff36463719f..7f008097552e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -183,6 +183,15 @@ enum {
 	 * during the hdev->setup vendor callback.
 	 */
 	HCI_QUIRK_NON_PERSISTENT_DIAG,
+
+	/* When this quirk is set, setup() would be run after every
+	 * open() and not just after the first open().
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
+	 *
+	 */
+	HCI_QUIRK_NON_PERSISTENT_SETUP,
 };
 
 /* HCI device flags */
-- 
cgit 


From dd979b4df817e9976f18fb6f9d134d6bc4a3c317 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:10 +0200
Subject: net: simplify sock_poll_wait

The wait_address argument is always directly derived from the filp
argument, so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 83b747538bd0..0518f61926ec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2057,16 +2057,17 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 /**
  * sock_poll_wait - place memory barrier behind the poll_wait call.
  * @filp:           file
- * @wait_address:   socket wait queue
  * @p:              poll_table
  *
  * See the comments in the wq_has_sleeper function.
  */
-static inline void sock_poll_wait(struct file *filp,
-		wait_queue_head_t *wait_address, poll_table *p)
+static inline void sock_poll_wait(struct file *filp, poll_table *p)
 {
-	if (!poll_does_not_wait(p) && wait_address) {
-		poll_wait(filp, wait_address, p);
+	struct socket *sock = filp->private_data;
+	wait_queue_head_t *wq = sk_sleep(sock->sk);
+
+	if (!poll_does_not_wait(p) && wq) {
+		poll_wait(filp, wq, p);
 		/* We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
-- 
cgit 


From d8bbd13beeaacd6494954bf5b945b54ccb2af309 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:11 +0200
Subject: net: don not detour through struct sock to find the poll waitqueue

For any open socket file descriptor sock->sk->sk_wq->wait will always
point to sock->wq->wait.  That means we can do the shorter dereference
and removal a NULL check and don't have to not worry about any RCU
protection.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 0518f61926ec..2afea5d1bdfe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2064,10 +2064,9 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 static inline void sock_poll_wait(struct file *filp, poll_table *p)
 {
 	struct socket *sock = filp->private_data;
-	wait_queue_head_t *wq = sk_sleep(sock->sk);
 
-	if (!poll_does_not_wait(p) && wq) {
-		poll_wait(filp, wq, p);
+	if (!poll_does_not_wait(p)) {
+		poll_wait(filp, &sock->wq->wait, p);
 		/* We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
-- 
cgit 


From f641f13b992979b97e595b761a9ba1a64fed7c4e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:12 +0200
Subject: net: remove sock_poll_busy_loop

There is no point in hiding this logic in a helper.  Also remove the
useless events != 0 check and only busy loop once we know we actually
have a poll method.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 9e36fda652b7..85777e68f738 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,15 +121,6 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
-static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
-{
-	if (sk_can_busy_loop(sock->sk) &&
-	    events && (events & POLL_BUSY_LOOP)) {
-		/* once, only if requested by syscall */
-		sk_busy_loop(sock->sk, 1);
-	}
-}
-
 /* if this socket can poll_ll, tell the system call */
 static inline __poll_t sock_poll_busy_flag(struct socket *sock)
 {
-- 
cgit 


From a331de3bf0e66ab2437fc8c5b99bd3c0d9da3088 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:13 +0200
Subject: net: remove sock_poll_busy_flag

Fold it into the only caller to make the code simpler and easier to read.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 85777e68f738..ba61cdd09eaa 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,12 +121,6 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
-/* if this socket can poll_ll, tell the system call */
-static inline __poll_t sock_poll_busy_flag(struct socket *sock)
-{
-	return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
-}
-
 /* used in the NIC receive handler to mark the skb */
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 				    struct napi_struct *napi)
-- 
cgit 


From 7fd4b288ea6a3e45ad8afbcd5ec39554d57f1ae0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 30 Jul 2018 14:30:43 +0200
Subject: tc/act: remove unneeded RCU lock in action callback

Each lockless action currently does its own RCU locking in ->act().
This allows using plain RCU accessor, even if the context
is really RCU BH.

This change drops the per action RCU lock, replace the accessors
with the _bh variant, cleans up a bit the surrounding code and
documents the RCU status in the relevant header.
No functional nor performance change is intended.

The goal of this patch is clarifying that the RCU critical section
used by the tc actions extends up to the classifier's caller.

v1 -> v2:
 - preserve rcu lock in act_bpf: it's needed by eBPF helpers,
   as pointed out by Daniel

v3 -> v4:
 - fixed some typos in the commit message (JiriP)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h     | 2 +-
 include/net/sch_generic.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 683ce41053d9..8c9bc02d05e1 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -85,7 +85,7 @@ struct tc_action_ops {
 	size_t	size;
 	struct module		*owner;
 	int     (*act)(struct sk_buff *, const struct tc_action *,
-		       struct tcf_result *);
+		       struct tcf_result *); /* called under RCU BH lock*/
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
 	int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c5432362dc26..bcae181c1857 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -285,6 +285,8 @@ struct tcf_proto {
 	/* Fast access part */
 	struct tcf_proto __rcu	*next;
 	void __rcu		*root;
+
+	/* called under RCU BH lock*/
 	int			(*classify)(struct sk_buff *,
 					    const struct tcf_proto *,
 					    struct tcf_result *);
-- 
cgit 


From cd11b164073b719203318227918f9510809d5e10 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 30 Jul 2018 14:30:44 +0200
Subject: net/tc: introduce TC_ACT_REINSERT.

This is similar TC_ACT_REDIRECT, but with a slightly different
semantic:
- on ingress the mirred skbs are passed to the target device
network stack without any additional check not scrubbing.
- the rcu-protected stats provided via the tcf_result struct
  are updated on error conditions.

This new tcfa_action value is not exposed to the user-space
and can be used only internally by clsact.

v1 -> v2: do not touch TC_ACT_REDIRECT code path, introduce
 a new action type instead
v2 -> v3:
 - rename the new action value TC_ACT_REINJECT, update the
   helper accordingly
 - take care of uncloned reinjected packets in XDP generic
   hook
v3 -> v4:
 - renamed again the new action value (JiriP)
v4 -> v5:
 - fix build error with !NET_CLS_ACT (kbuild bot)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |  3 +++
 include/net/sch_generic.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6d02f31abba8..22bfc3a13c25 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -7,6 +7,9 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
+/* TC action not accessible from user space */
+#define TC_ACT_REINSERT		(TC_ACT_VALUE_MAX + 1)
+
 /* Basic packet classifier frontend definitions. */
 
 struct tcf_walker {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bcae181c1857..a6d00093f35e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -235,6 +235,12 @@ struct tcf_result {
 			u32		classid;
 		};
 		const struct tcf_proto *goto_tp;
+
+		/* used by the TC_ACT_REINSERT action */
+		struct {
+			bool		ingress;
+			struct gnet_stats_queue *qstats;
+		};
 	};
 };
 
@@ -569,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
 #endif
 }
 
+static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return skb->tc_redirected;
+#else
+	return false;
+#endif
+}
+
 static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1108,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
 			  struct mini_Qdisc __rcu **p_miniq);
 
+static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+{
+	struct gnet_stats_queue *stats = res->qstats;
+	int ret;
+
+	if (res->ingress)
+		ret = netif_receive_skb(skb);
+	else
+		ret = dev_queue_xmit(skb);
+	if (ret && stats)
+		qstats_overlimit_inc(res->qstats);
+}
+
 #endif
-- 
cgit 


From 486cdf21583e5b1fad488a3e4f0a5242a31c0ffa Mon Sep 17 00:00:00 2001
From: Mathieu Xhonneux <m.xhonneux@gmail.com>
Date: Thu, 26 Jul 2018 02:10:40 +0000
Subject: bpf: add End.DT6 action to bpf_lwt_seg6_action helper

The seg6local LWT provides the End.DT6 action, which allows to
decapsulate an outer IPv6 header containing a Segment Routing Header
(SRH), full specification is available here:

https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-05

This patch adds this action now to the seg6local BPF
interface. Since it is not mandatory that the inner IPv6 header also
contains a SRH, seg6_bpf_srh_state has been extended with a pointer to
a possible SRH of the outermost IPv6 header. This helps assessing if the
validation must be triggered or not, and avoids some calls to
ipv6_find_hdr.

v3: s/1/true, s/0/false for boolean values
v2: - changed true/false -> 1/0
    - preempt_enable no longer called in first conditional block

Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/seg6_local.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
index 661fd5b4d3e0..08359e2d8b35 100644
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
@@ -21,10 +21,12 @@
 
 extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 			       u32 tbl_id);
+extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb);
 
 struct seg6_bpf_srh_state {
-	bool valid;
+	struct ipv6_sr_hdr *srh;
 	u16 hdrlen;
+	bool valid;
 };
 
 DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
-- 
cgit 


From e6476c21447c4b17c47e476aade6facf050f31e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:45:07 +0200
Subject: net: remove bogus RCU annotations on socket.wq

We never use RCU protection for it, just a lot of cargo-cult
rcu_deference_protects calls.

Note that we do keep the kfree_rcu call for it, as the references through
struct sock are RCU protected and thus might require a grace period before
freeing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2afea5d1bdfe..433f45fc2d68 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1788,7 +1788,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
 	WARN_ON(parent->sk);
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_wq = parent->wq;
+	rcu_assign_pointer(sk->sk_wq, parent->wq);
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
 	sk->sk_uid = SOCK_INODE(parent)->i_uid;
-- 
cgit 


From 83ba4645152d1177c161750e1064e3a8e7cee19b Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Tue, 31 Jul 2018 21:18:11 +0200
Subject: net: add helpers checking if socket can be bound to nonlocal address

The construction "net->ipv4.sysctl_ip_nonlocal_bind || inet->freebind
|| inet->transparent" is present three times and its IPv6 counterpart
is also present three times. We introduce two small helpers to
characterize these tests uniformly.

Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 8 ++++++++
 include/net/ipv6.h      | 7 +++++++
 2 files changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 314be484c696..e03b93360f33 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -359,4 +359,12 @@ static inline bool inet_get_convert_csum(struct sock *sk)
 	return !!inet_sk(sk)->convert_csum;
 }
 
+
+static inline bool inet_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv4.sysctl_ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index a44509f4e985..82deb684ba73 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -766,6 +766,13 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	return hlimit;
 }
 
+static inline bool ipv6_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv6.sysctl.ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 /* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
  * Equivalent to :	flow->v6addrs.src = iph->saddr;
  *			flow->v6addrs.dst = iph->daddr;
-- 
cgit 


From 432e05d328921c68c35bfdeff7d7b7400b8e3d1a Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 1 Aug 2018 00:36:03 +0200
Subject: net: ipv4: Control SKB reprioritization after forwarding

After IPv4 packets are forwarded, the priority of the corresponding SKB
is updated according to the TOS field of IPv4 header. This overrides any
prioritization done earlier by e.g. an skbedit action or ingress-qos-map
defined at a vlan device.

Such overriding may not always be desirable. Even if the packet ends up
being routed, which implies this is an L3 network node, an administrator
may wish to preserve whatever prioritization was done earlier on in the
pipeline.

Therefore introduce a sysctl that controls this behavior. Keep the
default value at 1 to maintain backward-compatible behavior.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 661348f23ea5..e47503b4e4d1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -98,6 +98,7 @@ struct netns_ipv4 {
 	int sysctl_ip_default_ttl;
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
+	int sysctl_ip_fwd_update_priority;
 	int sysctl_ip_nonlocal_bind;
 	/* Shall we try to damage output packets if routing dev changes? */
 	int sysctl_ip_dynaddr;
-- 
cgit 


From d18c5d1995aa322b722fa731397e28ebcd00b3c6 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 1 Aug 2018 00:36:42 +0200
Subject: net: ipv4: Notify about changes to ip_forward_update_priority

Drivers may make offloading decision based on whether
ip_forward_update_priority is enabled or not. Therefore distribute
netevent notifications to give them a chance to react to a change.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netevent.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netevent.h b/include/net/netevent.h
index d9918261701c..4107016c3bb4 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -28,6 +28,7 @@ enum netevent_notif_type {
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 	NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
-- 
cgit 


From 290b1c8b1a902c0902df9ec05577ab209296f345 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 1 Aug 2018 12:36:57 +0200
Subject: net: sched: make tcf_chain_{get,put}() static

These are no longer used outside of cls_api.c so make them static.
Move tcf_chain_flush() to avoid fwd declaration of tcf_chain_put().

Signed-off-by: Jiri Pirko <jiri@mellanox.com>

v1->v2:
- new patch

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 22bfc3a13c25..ef727f71336e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -40,11 +40,8 @@ struct tcf_block_cb;
 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 
 #ifdef CONFIG_NET_CLS
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
-				bool create);
 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
 				       u32 chain_index);
-void tcf_chain_put(struct tcf_chain *chain);
 void tcf_chain_put_by_act(struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
-- 
cgit 


From db57dc7c7a5c42bb653425a01b6d73c49514b5db Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Wed, 1 Aug 2018 22:05:10 +0200
Subject: net: don't declare IPv6 non-local bind helper if CONFIG_IPV6
 undefined

Fixes: 83ba4645152d ("net: add helpers checking if socket can be bound to nonlocal address")
Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 82deb684ba73..ff33f498c137 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -766,13 +766,6 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	return hlimit;
 }
 
-static inline bool ipv6_can_nonlocal_bind(struct net *net,
-					  struct inet_sock *inet)
-{
-	return net->ipv6.sysctl.ip_nonlocal_bind ||
-		inet->freebind || inet->transparent;
-}
-
 /* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
  * Equivalent to :	flow->v6addrs.src = iph->saddr;
  *			flow->v6addrs.dst = iph->daddr;
@@ -789,6 +782,13 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 
 #if IS_ENABLED(CONFIG_IPV6)
 
+static inline bool ipv6_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv6.sysctl.ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 /* Sysctl settings for net ipv6.auto_flowlabels */
 #define IP6_AUTO_FLOW_LABEL_OFF		0
 #define IP6_AUTO_FLOW_LABEL_OPTOUT	1
-- 
cgit 


From 285189c78eeb6f684a024b86fb5997d10c6aa564 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Wed, 25 Jul 2018 15:52:13 +0800
Subject: netfilter: use kvmalloc_array to allocate memory for hashtable

nf_ct_alloc_hashtable is used to allocate memory for conntrack,
NAT bysrc and expectation hashtable. Assuming 64k bucket size,
which means 7th order page allocation, __get_free_pages, called
by nf_ct_alloc_hashtable, will trigger the direct memory reclaim
and stall for a long time, when system has lots of memory stress

so replace combination of __get_free_pages and vzalloc with
kvmalloc_array, which provides a overflow check and a fallback
if no high order memory is available, and do not retry to reclaim
memory, reduce stall

and remove nf_ct_free_hashtable, since it is just a kvfree

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Wang Li <wangli39@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a2b0ed025908..7e012312cd61 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -176,8 +176,6 @@ void nf_ct_netns_put(struct net *net, u8 nfproto);
  */
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
 
-void nf_ct_free_hashtable(void *hash, unsigned int size);
-
 int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
 
-- 
cgit 


From eb9950eb31f56e57582a61c92073336d04a26542 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 3 Aug 2018 17:06:56 +0100
Subject: rxrpc: Push iov_iter up from rxrpc_kernel_recv_data() to caller

Push iov_iter up from rxrpc_kernel_recv_data() to its caller to allow
non-contiguous iovs to be passed down, thereby permitting file reading to
be simplified in the AFS filesystem in a future patch.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_rxrpc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 8ae8ee004258..f53edb3754bc 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -61,7 +61,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
 			   struct msghdr *, size_t,
 			   rxrpc_notify_end_tx_t);
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
-			   void *, size_t, size_t *, bool, u32 *, u16 *);
+			   struct iov_iter *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
 			     u32, int, const char *);
 void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
-- 
cgit 


From 31ba191bf5ab2975c1955f1adf771a5a0b57afaa Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Fri, 3 Aug 2018 14:53:15 +0800
Subject: include/net/bond_3ad: Simplify the code by using the ARRAY_SIZE

We prefer to ARRAY_SIZE rather than the open code to calculate size.

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bond_3ad.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index f358ad5e4214..fc3111515f5c 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -283,7 +283,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state)
 		"none",
 		"unknown"
 	};
-	int max_size = sizeof(churn_description) / sizeof(churn_description[0]);
+	int max_size = ARRAY_SIZE(churn_description);
 
 	if (state >= max_size)
 		state = max_size - 1;
-- 
cgit 


From fa0f527358bd900ef92f925878ed6bfbd51305cc Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Thu, 2 Aug 2018 23:34:39 +0000
Subject: ip: use rb trees for IP frag queue.

Similar to TCP OOO RX queue, it makes sense to use rb trees to store
IP fragments, so that OOO fragments are inserted faster.

Tested:

- a follow-up patch contains a rather comprehensive ip defrag
  self-test (functional)
- ran neper `udp_stream -c -H <host> -F 100 -l 300 -T 20`:
    netstat --statistics
    Ip:
        282078937 total packets received
        0 forwarded
        0 incoming packets discarded
        946760 incoming packets delivered
        18743456 requests sent out
        101 fragments dropped after timeout
        282077129 reassemblies required
        944952 packets reassembled ok
        262734239 packet reassembles failed
   (The numbers/stats above are somewhat better re:
    reassemblies vs a kernel without this patchset. More
    comprehensive performance testing TBD).

Reported-by: Jann Horn <jannh@google.com>
Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index f4272a29dc44..b86d14528188 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -75,7 +75,8 @@ struct inet_frag_queue {
 	struct timer_list	timer;
 	spinlock_t		lock;
 	refcount_t		refcnt;
-	struct sk_buff		*fragments;
+	struct sk_buff		*fragments;  /* Used in IPv6. */
+	struct rb_root		rb_fragments; /* Used in IPv4. */
 	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;
-- 
cgit 


From e4cc5a1873ac1297615962185f94adbbfaf6456b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 6 Aug 2018 17:58:40 +0200
Subject: Bluetooth: btqca: Introduce HCI_EV_VENDOR and use it

Using HCI_VENDOR_PKT for vendor specific events does work since it has
also the value 0xff, but it is actually the packet type indicator
constant and not the event constant. So introduce HCI_EV_VENDOR and
use it.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 7f008097552e..4619a79b1bbb 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2176,6 +2176,8 @@ struct hci_evt_le_ext_adv_set_term {
 	__u8	num_evts;
 } __packed;
 
+#define HCI_EV_VENDOR			0xff
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
-- 
cgit 


From 4e665afbd7bee29b44b5d22821b56207f8459e39 Mon Sep 17 00:00:00 2001
From: Harsha Sharma <harshasharmaiitr@gmail.com>
Date: Tue, 7 Aug 2018 17:14:10 +0200
Subject: netfilter: cttimeout: move ctnl_untimeout to nf_conntrack

As, ctnl_untimeout is required by nft_ct, so move ctnl_timeout from
nfnetlink_cttimeout to nf_conntrack_timeout and rename as nf_ct_timeout.

Signed-off-by: Harsha Sharma <harshasharmaiitr@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_timeout.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 80ceb3d0291d..7a21bc0f00eb 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -83,6 +83,7 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 int nf_conntrack_timeout_init(void);
 void nf_conntrack_timeout_fini(void);
+void nf_ct_untimeout(struct net *net, struct ctnl_timeout *timeout);
 #else
 static inline int nf_conntrack_timeout_init(void)
 {
-- 
cgit 


From 6c1fd7dc489d9bf64196f5b0fa33e059f64460c8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 7 Aug 2018 17:14:15 +0200
Subject: netfilter: cttimeout: decouple timeout policy from
 nfnetlink_cttimeout object

The timeout policy is currently embedded into the nfnetlink_cttimeout
object, move the policy into an independent object. This allows us to
reuse part of the existing conntrack timeout extension from nf_tables
without adding dependencies with the nfnetlink_cttimeout object layout.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_timeout.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 7a21bc0f00eb..d5f62cc6c2ae 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -11,24 +11,28 @@
 
 #define CTNL_TIMEOUT_NAME_MAX	32
 
+struct nf_ct_timeout {
+	__u16			l3num;
+	const struct nf_conntrack_l4proto *l4proto;
+	char			data[0];
+};
+
 struct ctnl_timeout {
 	struct list_head	head;
 	struct rcu_head		rcu_head;
 	refcount_t		refcnt;
 	char			name[CTNL_TIMEOUT_NAME_MAX];
-	__u16			l3num;
-	const struct nf_conntrack_l4proto *l4proto;
-	char			data[0];
+	struct nf_ct_timeout	timeout;
 };
 
 struct nf_conn_timeout {
-	struct ctnl_timeout __rcu *timeout;
+	struct nf_ct_timeout __rcu *timeout;
 };
 
 static inline unsigned int *
 nf_ct_timeout_data(struct nf_conn_timeout *t)
 {
-	struct ctnl_timeout *timeout;
+	struct nf_ct_timeout *timeout;
 
 	timeout = rcu_dereference(t->timeout);
 	if (timeout == NULL)
@@ -49,7 +53,7 @@ struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct)
 
 static inline
 struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
-					      struct ctnl_timeout *timeout,
+					      struct nf_ct_timeout *timeout,
 					      gfp_t gfp)
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
@@ -83,7 +87,7 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 int nf_conntrack_timeout_init(void);
 void nf_conntrack_timeout_fini(void);
-void nf_ct_untimeout(struct net *net, struct ctnl_timeout *timeout);
+void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout);
 #else
 static inline int nf_conntrack_timeout_init(void)
 {
@@ -97,8 +101,8 @@ static inline void nf_conntrack_timeout_fini(void)
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
-extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout);
+extern struct nf_ct_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
+extern void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout);
 #endif
 
 #endif /* _NF_CONNTRACK_TIMEOUT_H */
-- 
cgit 


From ad83f2a9ce37a264202f48f4fd8889ee9056b703 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 7 Aug 2018 17:14:19 +0200
Subject: netfilter: remove ifdef around cttimeout in struct
 nf_conntrack_l4proto

Simplify this, include it inconditionally in this structure layout as we
do with ctnetlink.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6068c6da3eac..8465263b297d 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -77,7 +77,6 @@ struct nf_conntrack_l4proto {
 			       struct nf_conntrack_tuple *t);
 	const struct nla_policy *nla_policy;
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
 	struct {
 		int (*nlattr_to_obj)(struct nlattr *tb[],
 				     struct net *net, void *data);
@@ -87,7 +86,6 @@ struct nf_conntrack_l4proto {
 		u16 nlattr_max;
 		const struct nla_policy *nla_policy;
 	} ctnl_timeout;
-#endif
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	/* Print out the private part of the conntrack. */
 	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
-- 
cgit 


From 92e2c4053623f21d61a683f7ef7bd61c8300ac7d Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Tue, 7 Aug 2018 17:36:00 +0200
Subject: flow_dissector: allow dissection of tunnel options from metadata

Allow the existing 'dissection' of tunnel metadata to 'dissect'
options already present in tunnel metadata. This dissection is
controlled by a new dissector key, FLOW_DISSECTOR_KEY_ENC_OPTS.

This dissection only occurs when skb_flow_dissect_tunnel_info()
is called, currently only the Flower classifier makes that call.
So there should be no impact on other users of the flow dissector.

This is in preparation for allowing the flower classifier to
match on Geneve options.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2a17f041f7a1..6a4586dcdede 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -57,6 +57,21 @@ struct flow_dissector_key_mpls {
 		mpls_label:20;
 };
 
+#define FLOW_DIS_TUN_OPTS_MAX 255
+/**
+ * struct flow_dissector_key_enc_opts:
+ * @data: tunnel option data
+ * @len: length of tunnel option data
+ * @dst_opt_type: tunnel option type
+ */
+struct flow_dissector_key_enc_opts {
+	u8 data[FLOW_DIS_TUN_OPTS_MAX];	/* Using IP_TUNNEL_OPTS_MAX is desired
+					 * here but seems difficult to #include
+					 */
+	u8 len;
+	__be16 dst_opt_type;
+};
+
 struct flow_dissector_key_keyid {
 	__be32	keyid;
 };
@@ -208,6 +223,8 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
 	FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
+	FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
+
 	FLOW_DISSECTOR_KEY_MAX,
 };
 
-- 
cgit 


From a8d5b4ab353738e16e5f9d21ab1e3d44b37983d0 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:12 +0900
Subject: xdp: Helper function to clear kernel pointers in xdp_frame

xdp_frame has kernel pointers which should not be readable from bpf
programs. When we want to reuse xdp_frame region but it may be read by
bpf programs later, we can use this helper to clear kernel pointers.
This is more efficient than calling memset() for the entire struct.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index fcb033f51d8c..76b95256c266 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -84,6 +84,13 @@ struct xdp_frame {
 	struct net_device *dev_rx; /* used by cpumap */
 };
 
+/* Clear kernel pointers in xdp_frame */
+static inline void xdp_scrub_frame(struct xdp_frame *frame)
+{
+	frame->data = NULL;
+	frame->dev_rx = NULL;
+}
+
 /* Convert xdp_buff to xdp_frame */
 static inline
 struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
-- 
cgit 


From aa12af77aae05008b3e637b85944dcd512f75eba Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Tue, 7 Aug 2018 13:16:35 +0530
Subject: Bluetooth: Add definitions for LE set address resolution

Add the definitions for LE address resolution enable HCI commands.
When the LE address resolution enable gets changed via HCI commands
make sure that flag gets updated.

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4619a79b1bbb..cdd9f1fe7cfa 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -269,6 +269,7 @@ enum {
 	HCI_VENDOR_DIAG,
 	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
+	HCI_LL_RPA_RESOLUTION,
 
 	__HCI_NUM_FLAGS,
 };
@@ -1524,6 +1525,8 @@ struct hci_rp_le_read_resolv_list_size {
 	__u8	size;
 } __packed;
 
+#define HCI_OP_LE_SET_ADDR_RESOLV_ENABLE 0x202d
+
 #define HCI_OP_LE_READ_MAX_DATA_LEN	0x202f
 struct hci_rp_le_read_max_data_len {
 	__u8	status;
-- 
cgit 


From 40a1227ea845a37ab197dd1caffb60b047fa36b1 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:21 -0700
Subject: tcp: Avoid TCP syncookie rejected by SO_REUSEPORT socket

Although the actual cookie check "__cookie_v[46]_check()" does
not involve sk specific info, it checks whether the sk has recent
synq overflow event in "tcp_synq_no_recent_overflow()".  The
tcp_sk(sk)->rx_opt.ts_recent_stamp is updated every second
when it has sent out a syncookie (through "tcp_synq_overflow()").

The above per sk "recent synq overflow event timestamp" works well
for non SO_REUSEPORT use case.  However, it may cause random
connection request reject/discard when SO_REUSEPORT is used with
syncookie because it fails the "tcp_synq_no_recent_overflow()"
test.

When SO_REUSEPORT is used, it usually has multiple listening
socks serving TCP connection requests destinated to the same local IP:PORT.
There are cases that the TCP-ACK-COOKIE may not be received
by the same sk that sent out the syncookie.  For example,
if reuse->socks[] began with {sk0, sk1},
1) sk1 sent out syncookies and tcp_sk(sk1)->rx_opt.ts_recent_stamp
   was updated.
2) the reuse->socks[] became {sk1, sk2} later.  e.g. sk0 was first closed
   and then sk2 was added.  Here, sk2 does not have ts_recent_stamp set.
   There are other ordering that will trigger the similar situation
   below but the idea is the same.
3) When the TCP-ACK-COOKIE comes back, sk2 was selected.
   "tcp_synq_no_recent_overflow(sk2)" returns true. In this case,
   all syncookies sent by sk1 will be handled (and rejected)
   by sk2 while sk1 is still alive.

The userspace may create and remove listening SO_REUSEPORT sockets
as it sees fit.  E.g. Adding new thread (and SO_REUSEPORT sock) to handle
incoming requests, old process stopping and new process starting...etc.
With or without SO_ATTACH_REUSEPORT_[CB]BPF,
the sockets leaving and joining a reuseport group makes picking
the same sk to check the syncookie very difficult (if not impossible).

The later patches will allow bpf prog more flexibility in deciding
where a sk should be located in a bpf map and selecting a particular
SO_REUSEPORT sock as it sees fit.  e.g. Without closing any sock,
replace the whole bpf reuseport_array in one map_update() by using
map-in-map.  Getting the syncookie check working smoothly across
socks in the same "reuse->socks[]" is important.

A partial solution is to set the newly added sk's ts_recent_stamp
to the max ts_recent_stamp of a reuseport group but that will require
to iterate through reuse->socks[]  OR
pessimistically set it to "now - TCP_SYNCOOKIE_VALID" when a sk is
joining a reuseport group.  However, neither of them will solve the
existing sk getting moved around the reuse->socks[] and that
sk may not have ts_recent_stamp updated, unlikely under continuous
synflood but not impossible.

This patch opts to treat the reuseport group as a whole when
considering the last synq overflow timestamp since
they are serving the same IP:PORT from the userspace
(and BPF program) perspective.

"synq_overflow_ts" is added to "struct sock_reuseport".
The tcp_synq_overflow() and tcp_synq_no_recent_overflow()
will update/check reuse->synq_overflow_ts if the sk is
in a reuseport group.  Similar to the reuseport decision in
__inet_lookup_listener(), both sk->sk_reuseport and
sk->sk_reuseport_cb are tested for SO_REUSEPORT usage.
Update on "synq_overflow_ts" happens at roughly once
every second.

A synflood test was done with a 16 rx-queues and 16 reuseport sockets.
No meaningful performance change is observed.  Before and
after the change is ~9Mpps in IPv4.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/sock_reuseport.h |  4 ++++
 include/net/tcp.h            | 30 ++++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 0054b3a9b923..6bef7a0052f2 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -12,6 +12,10 @@ struct sock_reuseport {
 
 	u16			max_socks;	/* length of socks */
 	u16			num_socks;	/* elements in socks */
+	/* The last synq overflow event timestamp of this
+	 * reuse->socks[] group.
+	 */
+	unsigned int		synq_overflow_ts;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d769dc20359b..d196901c9dba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -36,6 +36,7 @@
 #include <net/inet_hashtables.h>
 #include <net/checksum.h>
 #include <net/request_sock.h>
+#include <net/sock_reuseport.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 #include <net/ip.h>
@@ -473,9 +474,22 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
  */
 static inline void tcp_synq_overflow(const struct sock *sk)
 {
-	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow;
 	unsigned int now = jiffies;
 
+	if (sk->sk_reuseport) {
+		struct sock_reuseport *reuse;
+
+		reuse = rcu_dereference(sk->sk_reuseport_cb);
+		if (likely(reuse)) {
+			last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+			if (time_after32(now, last_overflow + HZ))
+				WRITE_ONCE(reuse->synq_overflow_ts, now);
+			return;
+		}
+	}
+
+	last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
 	if (time_after32(now, last_overflow + HZ))
 		tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
 }
@@ -483,9 +497,21 @@ static inline void tcp_synq_overflow(const struct sock *sk)
 /* syncookies: no recent synqueue overflow on this listening socket? */
 static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 {
-	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow;
 	unsigned int now = jiffies;
 
+	if (sk->sk_reuseport) {
+		struct sock_reuseport *reuse;
+
+		reuse = rcu_dereference(sk->sk_reuseport_cb);
+		if (likely(reuse)) {
+			last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+			return time_after32(now, last_overflow +
+					    TCP_SYNCOOKIE_VALID);
+		}
+	}
+
+	last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
 	return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
 }
 
-- 
cgit 


From 736b46027eb4a4c602d3b8b93d2f48c9facbd915 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:22 -0700
Subject: net: Add ID (if needed) to sock_reuseport and expose reuseport_lock

A later patch will introduce a BPF_MAP_TYPE_REUSEPORT_ARRAY which
allows a SO_REUSEPORT sk to be added to a bpf map.  When a sk
is removed from reuse->socks[], it also needs to be removed from
the bpf map.  Also, when adding a sk to a bpf map, the bpf
map needs to ensure it is indeed in a reuse->socks[].
Hence, reuseport_lock is needed by the bpf map to ensure its
map_update_elem() and map_delete_elem() operations are in-sync with
the reuse->socks[].  The BPF_MAP_TYPE_REUSEPORT_ARRAY map will only
acquire the reuseport_lock after ensuring the adding sk is already
in a reuseport group (i.e. reuse->socks[]).  The map_lookup_elem()
will be lockless.

This patch also adds an ID to sock_reuseport.  A later patch
will introduce BPF_PROG_TYPE_SK_REUSEPORT which allows
a bpf prog to select a sk from a bpf map.  It is inflexible to
statically enforce a bpf map can only contain the sk belonging to
a particular reuse->socks[] (i.e. same IP:PORT) during the bpf
verification time. For example, think about the the map-in-map situation
where the inner map can be dynamically changed in runtime and the outer
map may have inner maps belonging to different reuseport groups.
Hence, when the bpf prog (in the new BPF_PROG_TYPE_SK_REUSEPORT
type) selects a sk,  this selected sk has to be checked to ensure it
belongs to the requesting reuseport group (i.e. the group serving
that IP:PORT).

The "sk->sk_reuseport_cb" pointer cannot be used for this checking
purpose because the pointer value will change after reuseport_grow().
Instead of saving all checking conditions like the ones
preced calling "reuseport_add_sock()" and compare them everytime a
bpf_prog is run, a 32bits ID is introduced to survive the
reuseport_grow().  The ID is only acquired if any of the
reuse->socks[] is added to the newly introduced
"BPF_MAP_TYPE_REUSEPORT_ARRAY" map.

If "BPF_MAP_TYPE_REUSEPORT_ARRAY" is not used,  the changes in this
patch is a no-op.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/sock_reuseport.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 6bef7a0052f2..e1a7681856f7 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -5,8 +5,11 @@
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
+#include <linux/spinlock.h>
 #include <net/sock.h>
 
+extern spinlock_t reuseport_lock;
+
 struct sock_reuseport {
 	struct rcu_head		rcu;
 
@@ -16,6 +19,8 @@ struct sock_reuseport {
 	 * reuse->socks[] group.
 	 */
 	unsigned int		synq_overflow_ts;
+	/* ID stays the same even after the size of socks[] grows. */
+	unsigned int		reuseport_id;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
@@ -29,5 +34,6 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 					  int hdr_len);
 extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
 					      struct bpf_prog *prog);
+int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
-- 
cgit 


From 2dbb9b9e6df67d444fbe425c7f6014858d337adf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:25 -0700
Subject: bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT

This patch adds a BPF_PROG_TYPE_SK_REUSEPORT which can select
a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY.  Like other
non SK_FILTER/CGROUP_SKB program, it requires CAP_SYS_ADMIN.

BPF_PROG_TYPE_SK_REUSEPORT introduces "struct sk_reuseport_kern"
to store the bpf context instead of using the skb->cb[48].

At the SO_REUSEPORT sk lookup time, it is in the middle of transiting
from a lower layer (ipv4/ipv6) to a upper layer (udp/tcp).  At this
point,  it is not always clear where the bpf context can be appended
in the skb->cb[48] to avoid saving-and-restoring cb[].  Even putting
aside the difference between ipv4-vs-ipv6 and udp-vs-tcp.  It is not
clear if the lower layer is only ipv4 and ipv6 in the future and
will it not touch the cb[] again before transiting to the upper
layer.

For example, in udp_gro_receive(), it uses the 48 byte NAPI_GRO_CB
instead of IP[6]CB and it may still modify the cb[] after calling
the udp[46]_lib_lookup_skb().  Because of the above reason, if
sk->cb is used for the bpf ctx, saving-and-restoring is needed
and likely the whole 48 bytes cb[] has to be saved and restored.

Instead of saving, setting and restoring the cb[], this patch opts
to create a new "struct sk_reuseport_kern" and setting the needed
values in there.

The new BPF_PROG_TYPE_SK_REUSEPORT and "struct sk_reuseport_(kern|md)"
will serve all ipv4/ipv6 + udp/tcp combinations.  There is no protocol
specific usage at this point and it is also inline with the current
sock_reuseport.c implementation (i.e. no protocol specific requirement).

In "struct sk_reuseport_md", this patch exposes data/data_end/len
with semantic similar to other existing usages.  Together
with "bpf_skb_load_bytes()" and "bpf_skb_load_bytes_relative()",
the bpf prog can peek anywhere in the skb.  The "bind_inany" tells
the bpf prog that the reuseport group is bind-ed to a local
INANY address which cannot be learned from skb.

The new "bind_inany" is added to "struct sock_reuseport" which will be
used when running the new "BPF_PROG_TYPE_SK_REUSEPORT" bpf prog in order
to avoid repeating the "bind INANY" test on
"sk_v6_rcv_saddr/sk->sk_rcv_saddr" every time a bpf prog is run.  It can
only be properly initialized when a "sk->sk_reuseport" enabled sk is
adding to a hashtable (i.e. during "reuseport_alloc()" and
"reuseport_add_sock()").

The new "sk_select_reuseport()" is the main helper that the
bpf prog will use to select a SO_REUSEPORT sk.  It is the only function
that can use the new BPF_MAP_TYPE_REUSEPORT_ARRAY.  As mentioned in
the earlier patch, the validity of a selected sk is checked in
run time in "sk_select_reuseport()".  Doing the check in
verification time is difficult and inflexible (consider the map-in-map
use case).  The runtime check is to compare the selected sk's reuseport_id
with the reuseport_id that we want.  This helper will return -EXXX if the
selected sk cannot serve the incoming request (e.g. reuseport_id
not match).  The bpf prog can decide if it wants to do SK_DROP as its
discretion.

When the bpf prog returns SK_PASS, the kernel will check if a
valid sk has been selected (i.e. "reuse_kern->selected_sk != NULL").
If it does , it will use the selected sk.  If not, the kernel
will select one from "reuse->socks[]" (as before this patch).

The SK_DROP and SK_PASS handling logic will be in the next patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/addrconf.h       | 1 +
 include/net/sock_reuseport.h | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 5f43f7a70fe6..6def0351bcc3 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -108,6 +108,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 		    u32 banned_flags);
 bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 			  bool match_wildcard);
+bool inet_rcv_saddr_any(const struct sock *sk);
 void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
 void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
 
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index e1a7681856f7..73b569556be6 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,12 +21,14 @@ struct sock_reuseport {
 	unsigned int		synq_overflow_ts;
 	/* ID stays the same even after the size of socks[] grows. */
 	unsigned int		reuseport_id;
+	bool			bind_inany;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
 
-extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
+extern int reuseport_alloc(struct sock *sk, bool bind_inany);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
+			      bool bind_inany);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
-- 
cgit 


From 8217ca653ec601246832d562207bc24bdf652d2f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:26 -0700
Subject: bpf: Enable BPF_PROG_TYPE_SK_REUSEPORT bpf prog in reuseport
 selection

This patch allows a BPF_PROG_TYPE_SK_REUSEPORT bpf prog to select a
SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY introduced in
the earlier patch.  "bpf_run_sk_reuseport()" will return -ECONNREFUSED
when the BPF_PROG_TYPE_SK_REUSEPORT prog returns SK_DROP.
The callers, in inet[6]_hashtable.c and ipv[46]/udp.c, are modified to
handle this case and return NULL immediately instead of continuing the
sk search from its hashtable.

It re-uses the existing SO_ATTACH_REUSEPORT_EBPF setsockopt to attach
BPF_PROG_TYPE_SK_REUSEPORT.  The "sk_reuseport_attach_bpf()" will check
if the attaching bpf prog is in the new SK_REUSEPORT or the existing
SOCKET_FILTER type and then check different things accordingly.

One level of "__reuseport_attach_prog()" call is removed.  The
"sk_unhashed() && ..." and "sk->sk_reuseport_cb" tests are pushed
back to "reuseport_attach_prog()" in sock_reuseport.c.  sock_reuseport.c
seems to have more knowledge on those test requirements than filter.c.
In "reuseport_attach_prog()", after new_prog is attached to reuse->prog,
the old_prog (if any) is also directly freed instead of returning the
old_prog to the caller and asking the caller to free.

The sysctl_optmem_max check is moved back to the
"sk_reuseport_attach_filter()" and "sk_reuseport_attach_bpf()".
As of other bpf prog types, the new BPF_PROG_TYPE_SK_REUSEPORT is only
bounded by the usual "bpf_prog_charge_memlock()" during load time
instead of bounded by both bpf_prog_charge_memlock and sysctl_optmem_max.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/sock_reuseport.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 73b569556be6..8a5f70c7cdf2 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -34,8 +34,7 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
 					  struct sk_buff *skb,
 					  int hdr_len);
-extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
-					      struct bpf_prog *prog);
+extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
-- 
cgit 


From 466466dc6c28ca9dc401f10e235b9cde9a7c9162 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 9 Aug 2018 09:38:09 -0700
Subject: tcp: mandate a one-time immediate ACK

Add a new flag to indicate a one-time immediate ACK. This flag is
occasionaly set under specific TCP protocol states in addition to
the more common quickack mechanism for interactive application.

In several cases in the TCP code we want to force an immediate ACK
but do not want to call tcp_enter_quickack_mode() because we do
not want to forget the icsk_ack.pingpong or icsk_ack.ato state.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 0a6c9e0f2b5a..fa43b82607d9 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -167,7 +167,8 @@ enum inet_csk_ack_state_t {
 	ICSK_ACK_SCHED	= 1,
 	ICSK_ACK_TIMER  = 2,
 	ICSK_ACK_PUSHED = 4,
-	ICSK_ACK_PUSHED2 = 8
+	ICSK_ACK_PUSHED2 = 8,
+	ICSK_ACK_NOW = 16	/* Send the next ACK immediately (once) */
 };
 
 void inet_csk_init_xmit_timers(struct sock *sk,
-- 
cgit 


From 05364ca03cfd419caecb292fede20eb39667eaae Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@virtuozzo.com>
Date: Fri, 10 Aug 2018 20:11:42 +0300
Subject: net/sctp: Make wrappers for accessing in/out streams

This patch introduces wrappers for accessing in/out streams indirectly.
This will enable to replace physically contiguous memory arrays
of streams with flexible arrays (or maybe any other appropriate
mechanism) which do memory allocation on a per-page basis.

Signed-off-by: Oleg Babin <obabin@virtuozzo.com>
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ab869e0d8326..6b2b8df8a1d2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -398,37 +398,35 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
 
 /* What is the current SSN number for this stream? */
 #define sctp_ssn_peek(stream, type, sid) \
-	((stream)->type[sid].ssn)
+	(sctp_stream_##type((stream), (sid))->ssn)
 
 /* Return the next SSN number for this stream.	*/
 #define sctp_ssn_next(stream, type, sid) \
-	((stream)->type[sid].ssn++)
+	(sctp_stream_##type((stream), (sid))->ssn++)
 
 /* Skip over this ssn and all below. */
 #define sctp_ssn_skip(stream, type, sid, ssn) \
-	((stream)->type[sid].ssn = ssn + 1)
+	(sctp_stream_##type((stream), (sid))->ssn = ssn + 1)
 
 /* What is the current MID number for this stream? */
 #define sctp_mid_peek(stream, type, sid) \
-	((stream)->type[sid].mid)
+	(sctp_stream_##type((stream), (sid))->mid)
 
 /* Return the next MID number for this stream.  */
 #define sctp_mid_next(stream, type, sid) \
-	((stream)->type[sid].mid++)
+	(sctp_stream_##type((stream), (sid))->mid++)
 
 /* Skip over this mid and all below. */
 #define sctp_mid_skip(stream, type, sid, mid) \
-	((stream)->type[sid].mid = mid + 1)
-
-#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid])
+	(sctp_stream_##type((stream), (sid))->mid = mid + 1)
 
 /* What is the current MID_uo number for this stream? */
 #define sctp_mid_uo_peek(stream, type, sid) \
-	((stream)->type[sid].mid_uo)
+	(sctp_stream_##type((stream), (sid))->mid_uo)
 
 /* Return the next MID_uo number for this stream.  */
 #define sctp_mid_uo_next(stream, type, sid) \
-	((stream)->type[sid].mid_uo++)
+	(sctp_stream_##type((stream), (sid))->mid_uo++)
 
 /*
  * Pointers to address related SCTP functions.
@@ -1463,6 +1461,23 @@ struct sctp_stream {
 	struct sctp_stream_interleave *si;
 };
 
+static inline struct sctp_stream_out *sctp_stream_out(
+	const struct sctp_stream *stream,
+	__u16 sid)
+{
+	return ((struct sctp_stream_out *)(stream->out)) + sid;
+}
+
+static inline struct sctp_stream_in *sctp_stream_in(
+	const struct sctp_stream *stream,
+	__u16 sid)
+{
+	return ((struct sctp_stream_in *)(stream->in)) + sid;
+}
+
+#define SCTP_SO(s, i) sctp_stream_out((s), (i))
+#define SCTP_SI(s, i) sctp_stream_in((s), (i))
+
 #define SCTP_STREAM_CLOSED		0x00
 #define SCTP_STREAM_OPEN		0x01
 
-- 
cgit 


From 0d493b4d0be352b5e361e4fa0bc3efe952d8b10e Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@virtuozzo.com>
Date: Fri, 10 Aug 2018 20:11:43 +0300
Subject: net/sctp: Replace in/out stream arrays with flex_array

This path replaces physically contiguous memory arrays
allocated using kmalloc_array() with flexible arrays.
This enables to avoid memory allocation failures on the
systems under a memory stress.

Signed-off-by: Oleg Babin <obabin@virtuozzo.com>
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6b2b8df8a1d2..28a7c8e44636 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -57,6 +57,7 @@
 #include <linux/atomic.h>		/* This gets us atomic counters.  */
 #include <linux/skbuff.h>	/* We need sk_buff_head. */
 #include <linux/workqueue.h>	/* We need tq_struct.	 */
+#include <linux/flex_array.h>	/* We need flex_array.   */
 #include <linux/sctp.h>		/* We need sctp* header structs.  */
 #include <net/sctp/auth.h>	/* We need auth specific structs */
 #include <net/ip.h>		/* For inet_skb_parm */
@@ -1438,8 +1439,8 @@ struct sctp_stream_in {
 };
 
 struct sctp_stream {
-	struct sctp_stream_out *out;
-	struct sctp_stream_in *in;
+	struct flex_array *out;
+	struct flex_array *in;
 	__u16 outcnt;
 	__u16 incnt;
 	/* Current stream being sent, if any */
@@ -1465,14 +1466,14 @@ static inline struct sctp_stream_out *sctp_stream_out(
 	const struct sctp_stream *stream,
 	__u16 sid)
 {
-	return ((struct sctp_stream_out *)(stream->out)) + sid;
+	return flex_array_get(stream->out, sid);
 }
 
 static inline struct sctp_stream_in *sctp_stream_in(
 	const struct sctp_stream *stream,
 	__u16 sid)
 {
-	return ((struct sctp_stream_in *)(stream->in)) + sid;
+	return flex_array_get(stream->in, sid);
 }
 
 #define SCTP_SO(s, i) sctp_stream_out((s), (i))
-- 
cgit 


From 84a75b329be84c108a21ab9c02a52a9bf9e5a919 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 10 Aug 2018 20:51:52 +0300
Subject: net: sched: extend action ops with put_dev callback

As a preparation for removing dependency on rtnl lock from rules update
path, all users of shared objects must take reference while working with
them.

Extend action ops with put_dev() API to be used on net device returned by
get_dev().

Modify mirred action (only action that implements get_dev callback):
- Take reference to net device in get_dev.
- Implement put_dev API that releases reference to net device.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8c9bc02d05e1..1ad5b19e83a9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -101,6 +101,7 @@ struct tc_action_ops {
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
+	void	(*put_dev)(struct net_device *dev);
 	int     (*delete)(struct net *net, u32 index);
 };
 
-- 
cgit 


From 51a9f5ae653979ac4bdbd81778a10431f0177e3c Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 10 Aug 2018 20:51:54 +0300
Subject: net: core: protect rate estimator statistics pointer with lock

Extend gen_new_estimator() to also take stats_lock when re-assigning rate
estimator statistics pointer. (to be used by unlocked actions)

Rename 'stats_lock' to 'lock' and change argument description to explain
that it is now also used for control path.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 0304ba2ae353..883bb9085f15 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -59,13 +59,13 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
-		      spinlock_t *stats_lock,
+		      spinlock_t *lock,
 		      seqcount_t *running, struct nlattr *opt);
 void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **ptr,
-			  spinlock_t *stats_lock,
+			  spinlock_t *lock,
 			  seqcount_t *running, struct nlattr *opt);
 bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
 bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
-- 
cgit 


From 353c9cb360874e737fb000545f783df756c06f9a Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Sat, 11 Aug 2018 20:27:24 +0000
Subject: ip: add helpers to process in-order fragments faster.

This patch introduces several helper functions/macros that will be
used in the follow-up patch. No runtime changes yet.

The new logic (fully implemented in the second patch) is as follows:

* Nodes in the rb-tree will now contain not single fragments, but lists
  of consecutive fragments ("runs").

* At each point in time, the current "active" run at the tail is
  maintained/tracked. Fragments that arrive in-order, adjacent
  to the previous tail fragment, are added to this tail run without
  triggering the re-balancing of the rb-tree.

* If a fragment arrives out of order with the offset _before_ the tail run,
  it is inserted into the rb-tree as a single fragment.

* If a fragment arrives after the current tail fragment (with a gap),
  it starts a new "tail" run, as is inserted into the rb-tree
  at the end as the head of the new run.

skb->cb is used to store additional information
needed here (suggested by Eric Dumazet).

Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index b86d14528188..1662cbc0b46b 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -57,7 +57,9 @@ struct frag_v6_compare_key {
  * @lock: spinlock protecting this frag
  * @refcnt: reference count of the queue
  * @fragments: received fragments head
+ * @rb_fragments: received fragments rb-tree root
  * @fragments_tail: received fragments tail
+ * @last_run_head: the head of the last "run". see ip_fragment.c
  * @stamp: timestamp of the last received fragment
  * @len: total length of the original datagram
  * @meat: length of received fragments so far
@@ -78,6 +80,7 @@ struct inet_frag_queue {
 	struct sk_buff		*fragments;  /* Used in IPv6. */
 	struct rb_root		rb_fragments; /* Used in IPv4. */
 	struct sk_buff		*fragments_tail;
+	struct sk_buff		*last_run_head;
 	ktime_t			stamp;
 	int			len;
 	int			meat;
@@ -113,6 +116,9 @@ void inet_frag_kill(struct inet_frag_queue *q);
 void inet_frag_destroy(struct inet_frag_queue *q);
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
 
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+
 static inline void inet_frag_put(struct inet_frag_queue *q)
 {
 	if (refcount_dec_and_test(&q->refcnt))
-- 
cgit 


From 0b243d004ea640875115d1500ec429a3e9f9fae9 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Fri, 10 Aug 2018 20:46:41 +0530
Subject: net/tls: Combined memory allocation for decryption request

For preparing decryption request, several memory chunks are required
(aead_req, sgin, sgout, iv, aad). For submitting the decrypt request to
an accelerator, it is required that the buffers which are read by the
accelerator must be dma-able and not come from stack. The buffers for
aad and iv can be separately kmalloced each, but it is inefficient.
This patch does a combined allocation for preparing decryption request
and then segments into aead_req || sgin || sgout || iv || aad.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index d8b3b6578c01..d5c683e8bb22 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -124,10 +124,6 @@ struct tls_sw_context_rx {
 	struct sk_buff *recv_pkt;
 	u8 control;
 	bool decrypted;
-
-	char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE];
-	char rx_aad_plaintext[TLS_AAD_SPACE_SIZE];
-
 };
 
 struct tls_record_info {
-- 
cgit 


From e6f86b0f7ae473969a3301b74bf98af9e42ecd0e Mon Sep 17 00:00:00 2001
From: Virgile Jarry <virgile@acceis.fr>
Date: Fri, 10 Aug 2018 17:48:15 +0200
Subject: ipv6: Add icmp_echo_ignore_all support for ICMPv6

Preventing the kernel from responding to ICMP Echo Requests messages
can be useful in several ways. The sysctl parameter
'icmp_echo_ignore_all' can be used to prevent the kernel from
responding to IPv4 ICMP echo requests. For IPv6 pings, such
a sysctl kernel parameter did not exist.

Add the ability to prevent the kernel from responding to IPv6
ICMP echo requests through the use of the following sysctl
parameter : /proc/sys/net/ipv6/icmp/echo_ignore_all.
Update the documentation to reflect this change.

Signed-off-by: Virgile Jarry <virgile@acceis.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 762ac9931b62..f0e396ab9bec 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -32,6 +32,7 @@ struct netns_sysctl_ipv6 {
 	int flowlabel_consistency;
 	int auto_flowlabels;
 	int icmpv6_time;
+	int icmpv6_echo_ignore_all;
 	int anycast_src_echo_reply;
 	int ip_nonlocal_bind;
 	int fwmark_reflect;
-- 
cgit 


From 96d18d8254dc5a3f0067a629866af4165b3afe32 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 1 Aug 2018 14:57:59 -0700
Subject: inet/connection_sock: prefer _THIS_IP_ to current_text_addr

As part of the effort to reduce the code duplication between _THIS_IP_
and current_text_addr(), let's consolidate callers of
current_text_addr() to use _THIS_IP_.

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index fa43b82607d9..371b3b45fd5c 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/poll.h>
+#include <linux/kernel.h>
 
 #include <net/inet_sock.h>
 #include <net/request_sock.h>
@@ -225,7 +226,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 
 	if (when > max_when) {
 		pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
-			 sk, what, when, current_text_addr());
+			 sk, what, when, (void *)_THIS_IP_);
 		when = max_when;
 	}
 
-- 
cgit