From bf1ecd210541ef5f3a110e88e8ca5d33b4aa5c23 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 31 May 2016 00:16:50 +0300 Subject: cfg80211: Allow cfg80211_connect_result() errors to be distinguished Previously, the status parameter to cfg80211_connect_result() was documented as using WLAN_STATUS_UNSPECIFIED_FAILURE (1) when the real status code for the failure is not known. This value can be used by an AP (and often is) and as such, user space cannot distinguish between explicitly rejected authentication/association and not being able to even try to associate or not receiving a response from the AP. Add a new inline function, cfg80211_connect_timeout(), to be used when the driver knows that the connection attempt failed due to a reason where connection could not be attempt or no response was received from the AP. The internal functions now allow a negative status value (-1) to be used as an indication of this special case. This results in the NL80211_ATTR_TIMED_OUT to be added to the NL80211_CMD_CONNECT event to allow user space to determine this case was hit. For backwards compatibility, NL80211_STATUS_CODE with the value WLAN_STATUS_UNSPECIFIED_FAILURE is still indicated in the event in such a case. Signed-off-by: Jouni Malinen [johannes: fix cfg80211_connect_bss() prototype to use int for status, add cfg80211_connect_timeout() to docbook, fix docbook] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 53 +++++++++++++++++++++++++++++++++----------- include/uapi/linux/nl80211.h | 7 +++++- 2 files changed, 46 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 63921672bed0..537f010cf5e1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2367,19 +2367,23 @@ struct cfg80211_qos_map { * (invoked with the wireless_dev mutex held) * * @connect: Connect to the ESS with the specified parameters. When connected, - * call cfg80211_connect_result() with status code %WLAN_STATUS_SUCCESS. - * If the connection fails for some reason, call cfg80211_connect_result() - * with the status from the AP. The driver is allowed to roam to other - * BSSes within the ESS when the other BSS matches the connect parameters. - * When such roaming is initiated by the driver, the driver is expected to - * verify that the target matches the configured security parameters and - * to use Reassociation Request frame instead of Association Request frame. - * The connect function can also be used to request the driver to perform - * a specific roam when connected to an ESS. In that case, the prev_bssid + * call cfg80211_connect_result()/cfg80211_connect_bss() with status code + * %WLAN_STATUS_SUCCESS. If the connection fails for some reason, call + * cfg80211_connect_result()/cfg80211_connect_bss() with the status code + * from the AP or cfg80211_connect_timeout() if no frame with status code + * was received. + * The driver is allowed to roam to other BSSes within the ESS when the + * other BSS matches the connect parameters. When such roaming is initiated + * by the driver, the driver is expected to verify that the target matches + * the configured security parameters and to use Reassociation Request + * frame instead of Association Request frame. + * The connect function can also be used to request the driver to perform a + * specific roam when connected to an ESS. In that case, the prev_bssid * parameter is set to the BSSID of the currently associated BSS as an - * indication of requesting reassociation. In both the driver-initiated and - * new connect() call initiated roaming cases, the result of roaming is - * indicated with a call to cfg80211_roamed() or cfg80211_roamed_bss(). + * indication of requesting reassociation. + * In both the driver-initiated and new connect() call initiated roaming + * cases, the result of roaming is indicated with a call to + * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) * @disconnect: Disconnect from the BSS/ESS. * (invoked with the wireless_dev mutex held) @@ -4680,7 +4684,7 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, - size_t resp_ie_len, u16 status, gfp_t gfp); + size_t resp_ie_len, int status, gfp_t gfp); /** * cfg80211_connect_result - notify cfg80211 of connection result @@ -4709,6 +4713,29 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid, resp_ie_len, status, gfp); } +/** + * cfg80211_connect_timeout - notify cfg80211 of connection timeout + * + * @dev: network device + * @bssid: the BSSID of the AP + * @req_ie: association request IEs (maybe be %NULL) + * @req_ie_len: association request IEs length + * @gfp: allocation flags + * + * It should be called by the underlying driver whenever connect() has failed + * in a sequence where no explicit authentication/association rejection was + * received from the AP. This could happen, e.g., due to not being able to send + * out the Authentication or Association Request frame or timing out while + * waiting for the response. + */ +static inline void +cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, gfp_t gfp) +{ + cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, NULL, 0, -1, + gfp); +} + /** * cfg80211_roamed - notify cfg80211 of roaming * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e23d78685a01..8d995316aadb 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -493,7 +493,12 @@ * This attribute is ignored if driver does not support roam scan. * It is also sent as an event, with the BSSID and response IEs when the * connection is established or failed to be established. This can be - * determined by the STATUS_CODE attribute. + * determined by the %NL80211_ATTR_STATUS_CODE attribute (0 = success, + * non-zero = failure). If %NL80211_ATTR_TIMED_OUT is included in the + * event, the connection attempt failed due to not being able to initiate + * authentication/association or not receiving a response from the AP. + * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as + * well to remain backwards compatible. * @NL80211_CMD_ROAM: request that the card roam (currently not implemented), * sent as an event when the card/driver roamed by itself. * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify -- cgit From 019ae3a918811715192b22c400ac78d54acc26a9 Mon Sep 17 00:00:00 2001 From: "Kanchanapally, Vidyullatha" Date: Mon, 16 May 2016 10:41:04 +0530 Subject: cfg80211: Advertise extended capabilities per interface type to userspace The driver extended capabilities may differ for different interface types which the userspace needs to know (for example the fine timing measurement initiator and responder bits might differ for a station and AP). Add a new nl80211 attribute to provide extended capabilities per interface type to userspace. Signed-off-by: Vidyullatha Kanchanapally Reviewed-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 28 +++++++++++++++++++++++++++- include/uapi/linux/nl80211.h | 7 +++++++ 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 537f010cf5e1..7bbb00d8b2cd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3083,6 +3083,24 @@ struct wiphy_vendor_command { unsigned long *storage); }; +/** + * struct wiphy_iftype_ext_capab - extended capabilities per interface type + * @iftype: interface type + * @extended_capabilities: extended capabilities supported by the driver, + * additional capabilities might be supported by userspace; these are the + * 802.11 extended capabilities ("Extended Capabilities element") and are + * in the same format as in the information element. See IEEE Std + * 802.11-2012 8.4.2.29 for the defined fields. + * @extended_capabilities_mask: mask of the valid values + * @extended_capabilities_len: length of the extended capabilities + */ +struct wiphy_iftype_ext_capab { + enum nl80211_iftype iftype; + const u8 *extended_capabilities; + const u8 *extended_capabilities_mask; + u8 extended_capabilities_len; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback, @@ -3203,9 +3221,14 @@ struct wiphy_vendor_command { * additional capabilities might be supported by userspace; these are * the 802.11 extended capabilities ("Extended Capabilities element") * and are in the same format as in the information element. See - * 802.11-2012 8.4.2.29 for the defined fields. + * 802.11-2012 8.4.2.29 for the defined fields. These are the default + * extended capabilities to be used if the capabilities are not specified + * for a specific interface type in iftype_ext_capab. * @extended_capabilities_mask: mask of the valid values * @extended_capabilities_len: length of the extended capabilities + * @iftype_ext_capab: array of extended capabilities per interface type + * @num_iftype_ext_capab: number of interface types for which extended + * capabilities are specified separately. * @coalesce: packet coalescing support information * * @vendor_commands: array of vendor commands supported by the hardware @@ -3305,6 +3328,9 @@ struct wiphy { const u8 *extended_capabilities, *extended_capabilities_mask; u8 extended_capabilities_len; + const struct wiphy_iftype_ext_capab *iftype_ext_capab; + unsigned int num_iftype_ext_capab; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8d995316aadb..53c8278827a0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1824,6 +1824,11 @@ enum nl80211_commands { * * @NL80211_ATTR_PAD: attribute used for padding for 64-bit alignment * + * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes: + * %NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA, + * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per + * interface type. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2206,6 +2211,8 @@ enum nl80211_attrs { NL80211_ATTR_PAD, + NL80211_ATTR_IFTYPE_EXT_CAPA, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit From 595d0b29463343c3be995d3948930b8231e5b8cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 May 2016 15:22:41 -0700 Subject: udp: avoid csum_partial() for validated skb In commit e6afc8ace6dd5 ("udp: remove headers from UDP packets before queueing"), udp_csum_pull_header() helper was added but missed fact that CHECKSUM_UNNECESSARY packets were now converted to CHECKSUM_NONE and skb->csum_valid was set to 1 for them. Since csum_partial() is quite expensive, even for 8-byte area, it is worth adding a test. We also can use skb->data instead of udp_hdr() as we are pulling UDP headers, as it is sightly faster. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index ae07f375370d..8894d7144189 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -160,8 +160,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, static inline void udp_csum_pull_header(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_NONE) - skb->csum = csum_partial(udp_hdr(skb), sizeof(struct udphdr), + if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE) + skb->csum = csum_partial(skb->data, sizeof(struct udphdr), skb->csum); skb_pull_rcsum(skb, sizeof(struct udphdr)); UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); -- cgit From 351a4dedb34cbeb9f747f0e2309e891b6fb906cb Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 2 Jun 2016 10:23:29 +0300 Subject: qed: Utilize FW 8.10.3.0 The New QED firmware contains several fixes, including: - Wrong classification of packets in 4-port devices. - Anti-spoof interoperability with encapsulated packets. - Tx-switching of encapsulated packets. It also slightly improves Tx performance of the device. In addition, this firmware contains the necessary logic for supporting iscsi & rdma, for which we plan on pushing protocol drivers in the imminent future. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 205 +++++++++++++++++++++++++++++------------ include/linux/qed/eth_common.h | 124 +++++++++++++++---------- include/linux/qed/qed_eth_if.h | 1 + 3 files changed, 222 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 3f14c7efe68f..285189a5ea6d 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -13,9 +13,19 @@ #define X_FINAL_CLEANUP_AGG_INT 1 +/* Queue Zone sizes in bytes */ +#define TSTORM_QZONE_SIZE 8 +#define MSTORM_QZONE_SIZE 0 +#define USTORM_QZONE_SIZE 8 +#define XSTORM_QZONE_SIZE 8 +#define YSTORM_QZONE_SIZE 0 +#define PSTORM_QZONE_SIZE 0 + +#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16 + #define FW_MAJOR_VERSION 8 -#define FW_MINOR_VERSION 7 -#define FW_REVISION_VERSION 3 +#define FW_MINOR_VERSION 10 +#define FW_REVISION_VERSION 5 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -97,45 +107,86 @@ #define DQ_XCM_AGG_VAL_SEL_REG6 7 /* XCM agg val selection */ -#define DQ_XCM_ETH_EDPM_NUM_BDS_CMD \ - DQ_XCM_AGG_VAL_SEL_WORD2 -#define DQ_XCM_ETH_TX_BD_CONS_CMD \ - DQ_XCM_AGG_VAL_SEL_WORD3 -#define DQ_XCM_CORE_TX_BD_CONS_CMD \ - DQ_XCM_AGG_VAL_SEL_WORD3 -#define DQ_XCM_ETH_TX_BD_PROD_CMD \ - DQ_XCM_AGG_VAL_SEL_WORD4 -#define DQ_XCM_CORE_TX_BD_PROD_CMD \ - DQ_XCM_AGG_VAL_SEL_WORD4 -#define DQ_XCM_CORE_SPQ_PROD_CMD \ - DQ_XCM_AGG_VAL_SEL_WORD4 -#define DQ_XCM_ETH_GO_TO_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD5 +#define DQ_XCM_CORE_TX_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_CORE_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_CORE_SPQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_ETH_EDPM_NUM_BDS_CMD DQ_XCM_AGG_VAL_SEL_WORD2 +#define DQ_XCM_ETH_TX_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_ETH_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_ETH_GO_TO_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD5 + +/* UCM agg val selection (HW) */ +#define DQ_UCM_AGG_VAL_SEL_WORD0 0 +#define DQ_UCM_AGG_VAL_SEL_WORD1 1 +#define DQ_UCM_AGG_VAL_SEL_WORD2 2 +#define DQ_UCM_AGG_VAL_SEL_WORD3 3 +#define DQ_UCM_AGG_VAL_SEL_REG0 4 +#define DQ_UCM_AGG_VAL_SEL_REG1 5 +#define DQ_UCM_AGG_VAL_SEL_REG2 6 +#define DQ_UCM_AGG_VAL_SEL_REG3 7 + +/* UCM agg val selection (FW) */ +#define DQ_UCM_ETH_PMD_TX_CONS_CMD DQ_UCM_AGG_VAL_SEL_WORD2 +#define DQ_UCM_ETH_PMD_RX_CONS_CMD DQ_UCM_AGG_VAL_SEL_WORD3 +#define DQ_UCM_ROCE_CQ_CONS_CMD DQ_UCM_AGG_VAL_SEL_REG0 +#define DQ_UCM_ROCE_CQ_PROD_CMD DQ_UCM_AGG_VAL_SEL_REG2 + +/* TCM agg val selection (HW) */ +#define DQ_TCM_AGG_VAL_SEL_WORD0 0 +#define DQ_TCM_AGG_VAL_SEL_WORD1 1 +#define DQ_TCM_AGG_VAL_SEL_WORD2 2 +#define DQ_TCM_AGG_VAL_SEL_WORD3 3 +#define DQ_TCM_AGG_VAL_SEL_REG1 4 +#define DQ_TCM_AGG_VAL_SEL_REG2 5 +#define DQ_TCM_AGG_VAL_SEL_REG6 6 +#define DQ_TCM_AGG_VAL_SEL_REG9 7 + +/* TCM agg val selection (FW) */ +#define DQ_TCM_L2B_BD_PROD_CMD \ + DQ_TCM_AGG_VAL_SEL_WORD1 +#define DQ_TCM_ROCE_RQ_PROD_CMD \ + DQ_TCM_AGG_VAL_SEL_WORD0 /* XCM agg counter flag selection */ -#define DQ_XCM_AGG_FLG_SHIFT_BIT14 0 -#define DQ_XCM_AGG_FLG_SHIFT_BIT15 1 -#define DQ_XCM_AGG_FLG_SHIFT_CF12 2 -#define DQ_XCM_AGG_FLG_SHIFT_CF13 3 -#define DQ_XCM_AGG_FLG_SHIFT_CF18 4 -#define DQ_XCM_AGG_FLG_SHIFT_CF19 5 -#define DQ_XCM_AGG_FLG_SHIFT_CF22 6 -#define DQ_XCM_AGG_FLG_SHIFT_CF23 7 +#define DQ_XCM_AGG_FLG_SHIFT_BIT14 0 +#define DQ_XCM_AGG_FLG_SHIFT_BIT15 1 +#define DQ_XCM_AGG_FLG_SHIFT_CF12 2 +#define DQ_XCM_AGG_FLG_SHIFT_CF13 3 +#define DQ_XCM_AGG_FLG_SHIFT_CF18 4 +#define DQ_XCM_AGG_FLG_SHIFT_CF19 5 +#define DQ_XCM_AGG_FLG_SHIFT_CF22 6 +#define DQ_XCM_AGG_FLG_SHIFT_CF23 7 /* XCM agg counter flag selection */ -#define DQ_XCM_ETH_DQ_CF_CMD (1 << \ - DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_CORE_DQ_CF_CMD (1 << \ - DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_ETH_TERMINATE_CMD (1 << \ - DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_CORE_TERMINATE_CMD (1 << \ - DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_ETH_SLOW_PATH_CMD (1 << \ - DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_CORE_SLOW_PATH_CMD (1 << \ - DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_ETH_TPH_EN_CMD (1 << \ - DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_CORE_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_CORE_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_CORE_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_ETH_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ETH_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_TPH_EN_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF23) + +/* UCM agg counter flag selection (HW) */ +#define DQ_UCM_AGG_FLG_SHIFT_CF0 0 +#define DQ_UCM_AGG_FLG_SHIFT_CF1 1 +#define DQ_UCM_AGG_FLG_SHIFT_CF3 2 +#define DQ_UCM_AGG_FLG_SHIFT_CF4 3 +#define DQ_UCM_AGG_FLG_SHIFT_CF5 4 +#define DQ_UCM_AGG_FLG_SHIFT_CF6 5 +#define DQ_UCM_AGG_FLG_SHIFT_RULE0EN 6 +#define DQ_UCM_AGG_FLG_SHIFT_RULE1EN 7 + +/* UCM agg counter flag selection (FW) */ +#define DQ_UCM_ETH_PMD_TX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_ETH_PMD_RX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF5) + +#define DQ_REGION_SHIFT (12) + +/* DPM */ +#define DQ_DPM_WQE_BUFF_SIZE (320) + +/* Conn type ranges */ +#define DQ_CONN_TYPE_RANGE_SHIFT (4) /*****************/ /* QM CONSTANTS */ @@ -282,8 +333,6 @@ (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \ PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1) -#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN 12 -#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER 1024 #define PXP_VF_BAR0_START_IGU 0 #define PXP_VF_BAR0_IGU_LENGTH 0x3000 @@ -342,6 +391,9 @@ #define PXP_VF_BAR0_GRC_WINDOW_LENGTH 32 +#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN 12 +#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER 1024 + /* ILT Records */ #define PXP_NUM_ILT_RECORDS_BB 7600 #define PXP_NUM_ILT_RECORDS_K2 11000 @@ -379,6 +431,38 @@ struct async_data { u8 fw_debug_param; }; +struct coalescing_timeset { + u8 value; +#define COALESCING_TIMESET_TIMESET_MASK 0x7F +#define COALESCING_TIMESET_TIMESET_SHIFT 0 +#define COALESCING_TIMESET_VALID_MASK 0x1 +#define COALESCING_TIMESET_VALID_SHIFT 7 +}; + +struct common_prs_pf_msg_info { + __le32 value; +#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK 0x1 +#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT 0 +#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK 0x1 +#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT 1 +#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK 0x1 +#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT 2 +#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK 0x1 +#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT 3 +#define COMMON_PRS_PF_MSG_INFO_RESERVED_MASK 0xFFFFFFF +#define COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT 4 +}; + +struct common_queue_zone { + __le16 ring_drv_data_consumer; + __le16 reserved; +}; + +struct eth_rx_prod_data { + __le16 bd_prod; + __le16 cqe_prod; +}; + struct regpair { __le32 lo; __le32 hi; @@ -388,11 +472,23 @@ struct vf_pf_channel_eqe_data { struct regpair msg_addr; }; +struct malicious_vf_eqe_data { + u8 vf_id; + u8 err_id; + __le16 reserved[3]; +}; + +struct initial_cleanup_eqe_data { + u8 vf_id; + u8 reserved[7]; +}; + /* Event Data Union */ union event_ring_data { - u8 bytes[8]; - struct vf_pf_channel_eqe_data vf_pf_channel; - struct async_data async_info; + u8 bytes[8]; + struct vf_pf_channel_eqe_data vf_pf_channel; + struct malicious_vf_eqe_data malicious_vf; + struct initial_cleanup_eqe_data vf_init_cleanup; }; /* Event Ring Entry */ @@ -433,6 +529,16 @@ enum protocol_type { MAX_PROTOCOL_TYPE }; +struct ustorm_eth_queue_zone { + struct coalescing_timeset int_coalescing_timeset; + u8 reserved[3]; +}; + +struct ustorm_queue_zone { + struct ustorm_eth_queue_zone eth; + struct common_queue_zone common; +}; + /* status block structure */ struct cau_pi_entry { u32 prod; @@ -683,19 +789,4 @@ struct status_block { #define STATUS_BLOCK_ZERO_PAD3_SHIFT 24 }; -struct tunnel_parsing_flags { - u8 flags; -#define TUNNEL_PARSING_FLAGS_TYPE_MASK 0x3 -#define TUNNEL_PARSING_FLAGS_TYPE_SHIFT 0 -#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK 0x1 -#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT 2 -#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK 0x3 -#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT 3 -#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK 0x1 -#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT 5 -#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK 0x1 -#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT 6 -#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK 0x1 -#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT 7 -}; #endif /* __COMMON_HSI__ */ diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 092cb0c1afcb..b5ebc697d05f 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -12,6 +12,8 @@ /********************/ /* ETH FW CONSTANTS */ /********************/ +#define ETH_HSI_VER_MAJOR 3 +#define ETH_HSI_VER_MINOR 0 #define ETH_CACHE_LINE_SIZE 64 #define ETH_MAX_RAMROD_PER_CON 8 @@ -57,19 +59,6 @@ #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE 6 #define ETH_TPA_CQE_END_LEN_LIST_SIZE 4 -/* Queue Zone sizes */ -#define TSTORM_QZONE_SIZE 0 -#define MSTORM_QZONE_SIZE sizeof(struct mstorm_eth_queue_zone) -#define USTORM_QZONE_SIZE sizeof(struct ustorm_eth_queue_zone) -#define XSTORM_QZONE_SIZE 0 -#define YSTORM_QZONE_SIZE sizeof(struct ystorm_eth_queue_zone) -#define PSTORM_QZONE_SIZE 0 - -/* Interrupt coalescing TimeSet */ -struct coalescing_timeset { - u8 timeset; - u8 valid; -}; struct eth_tx_1st_bd_flags { u8 bitfields; @@ -97,12 +86,12 @@ struct eth_tx_data_1st_bd { u8 nbds; struct eth_tx_1st_bd_flags bd_flags; __le16 bitfields; -#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_MASK 0x1 -#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT 0 +#define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK 0x1 +#define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK 0x1 #define ETH_TX_DATA_1ST_BD_RESERVED0_SHIFT 1 -#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_MASK 0x3FFF -#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_SHIFT 2 +#define ETH_TX_DATA_1ST_BD_PKT_LEN_MASK 0x3FFF +#define ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT 2 }; /* The parsing information data for the second tx bd of a given packet. */ @@ -136,28 +125,51 @@ struct eth_tx_data_2nd_bd { #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT 13 }; +struct eth_fast_path_cqe_fw_debug { + u8 reserved0; + u8 reserved1; + __le16 reserved2; +}; + +/* tunneling parsing flags */ +struct eth_tunnel_parsing_flags { + u8 flags; +#define ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK 0x3 +#define ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT 0 +#define ETH_TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK 0x1 +#define ETH_TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT 2 +#define ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK 0x3 +#define ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT 3 +#define ETH_TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK 0x1 +#define ETH_TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT 5 +#define ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK 0x1 +#define ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT 6 +#define ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK 0x1 +#define ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT 7 +}; + /* Regular ETH Rx FP CQE. */ struct eth_fast_path_rx_reg_cqe { - u8 type; - u8 bitfields; + u8 type; + u8 bitfields; #define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK 0x7 #define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT 0 #define ETH_FAST_PATH_RX_REG_CQE_TC_MASK 0xF #define ETH_FAST_PATH_RX_REG_CQE_TC_SHIFT 3 #define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_MASK 0x1 #define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_SHIFT 7 - __le16 pkt_len; - struct parsing_and_err_flags pars_flags; - __le16 vlan_tag; - __le32 rss_hash; - __le16 len_on_first_bd; - u8 placement_offset; - struct tunnel_parsing_flags tunnel_pars_flags; - u8 bd_num; - u8 reserved[7]; - u32 fw_debug; - u8 reserved1[3]; - u8 flags; + __le16 pkt_len; + struct parsing_and_err_flags pars_flags; + __le16 vlan_tag; + __le32 rss_hash; + __le16 len_on_first_bd; + u8 placement_offset; + struct eth_tunnel_parsing_flags tunnel_pars_flags; + u8 bd_num; + u8 reserved[7]; + struct eth_fast_path_cqe_fw_debug fw_debug; + u8 reserved1[3]; + u8 flags; #define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK 0x1 #define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT 0 #define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK 0x1 @@ -207,11 +219,11 @@ struct eth_fast_path_rx_tpa_start_cqe { __le32 rss_hash; __le16 len_on_first_bd; u8 placement_offset; - struct tunnel_parsing_flags tunnel_pars_flags; + struct eth_tunnel_parsing_flags tunnel_pars_flags; u8 tpa_agg_index; u8 header_len; __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; - u32 fw_debug; + struct eth_fast_path_cqe_fw_debug fw_debug; }; /* The L4 pseudo checksum mode for Ethernet */ @@ -264,12 +276,25 @@ enum eth_rx_cqe_type { MAX_ETH_RX_CQE_TYPE }; -/* ETH Rx producers data */ -struct eth_rx_prod_data { - __le16 bd_prod; - __le16 cqe_prod; - __le16 reserved; - __le16 reserved1; +enum eth_rx_tunn_type { + ETH_RX_NO_TUNN, + ETH_RX_TUNN_GENEVE, + ETH_RX_TUNN_GRE, + ETH_RX_TUNN_VXLAN, + MAX_ETH_RX_TUNN_TYPE +}; + +/* Aggregation end reason. */ +enum eth_tpa_end_reason { + ETH_AGG_END_UNUSED, + ETH_AGG_END_SP_UPDATE, + ETH_AGG_END_MAX_LEN, + ETH_AGG_END_LAST_SEG, + ETH_AGG_END_TIMEOUT, + ETH_AGG_END_NOT_CONSISTENT, + ETH_AGG_END_OUT_OF_ORDER, + ETH_AGG_END_NON_TPA_SEG, + MAX_ETH_TPA_END_REASON }; /* The first tx bd of a given packet */ @@ -337,21 +362,18 @@ union eth_tx_bd_types { }; /* Mstorm Queue Zone */ -struct mstorm_eth_queue_zone { - struct eth_rx_prod_data rx_producers; - __le32 reserved[2]; -}; - -/* Ustorm Queue Zone */ -struct ustorm_eth_queue_zone { - struct coalescing_timeset int_coalescing_timeset; - __le16 reserved[3]; +enum eth_tx_tunn_type { + ETH_TX_TUNN_GENEVE, + ETH_TX_TUNN_TTAG, + ETH_TX_TUNN_GRE, + ETH_TX_TUNN_VXLAN, + MAX_ETH_TX_TUNN_TYPE }; /* Ystorm Queue Zone */ -struct ystorm_eth_queue_zone { - struct coalescing_timeset int_coalescing_timeset; - __le16 reserved[3]; +struct xstorm_eth_queue_zone { + struct coalescing_timeset int_coalescing_timeset; + u8 reserved[7]; }; /* ETH doorbell data */ diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 6ae8cb4a61d3..f8ff71126d9e 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -113,6 +113,7 @@ struct qed_queue_start_common_params { u8 vport_id; u16 sb; u16 sb_idx; + u16 vf_qid; }; struct qed_tunn_params { -- cgit From 3953c46c3ac7eef31a9935427371c6f54a22f1ba Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:40 -0300 Subject: sk_buff: allow segmenting based on frag sizes This patch allows segmenting a skb based on its frags sizes instead of based on a fixed value. Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ee38a4127475..329a0a9ef671 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -301,6 +301,11 @@ struct sk_buff; #endif extern int sysctl_max_skb_frags; +/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to + * segment using its current segmentation instead. + */ +#define GSO_BY_FRAGS 0xFFFF + typedef struct skb_frag_struct skb_frag_t; struct skb_frag_struct { -- cgit From ae7ef81ef000adeee7a87585b9135ff8a8064acc Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:41 -0300 Subject: skbuff: introduce skb_gso_validate_mtu skb_gso_network_seglen is not enough for checking fragment sizes if skb is using GSO_BY_FRAGS as we have to check frag per frag. This patch introduces skb_gso_validate_mtu, based on the former, which will wrap the use case inside it as all calls to skb_gso_network_seglen were to validate if it fits on a given TMU, and improve the check. Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 329a0a9ef671..aa3f9d7e8d5c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2992,6 +2992,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); +bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); -- cgit From 90017accff61ae89283ad9a51f9ac46ca01633fb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:43 -0300 Subject: sctp: Add GSO support SCTP has this pecualiarity that its packets cannot be just segmented to (P)MTU. Its chunks must be contained in IP segments, padding respected. So we can't just generate a big skb, set gso_size to the fragmentation point and deliver it to IP layer. This patch takes a different approach. SCTP will now build a skb as it would be if it was received using GRO. That is, there will be a cover skb with protocol headers and children ones containing the actual segments, already segmented to a way that respects SCTP RFCs. With that, we can tell skb_segment() to just split based on frag_list, trusting its sizes are already in accordance. This way SCTP can benefit from GSO and instead of passing several packets through the stack, it can pass a single large packet. v2: - Added support for receiving GSO frames, as requested by Dave Miller. - Clear skb->cb if packet is GSO (otherwise it's not used by SCTP) - Added heuristics similar to what we have in TCP for not generating single GSO packets that fills cwnd. v3: - consider sctphdr size in skb_gso_transport_seglen() - rebased due to 5c7cdf339af5 ("gso: Remove arbitrary checks for unsupported GSO") Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 7 +++++-- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 2 ++ include/net/sctp/sctp.h | 4 ++++ include/net/sctp/structs.h | 5 +++++ 5 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index aa7b2400f98c..9c6c8ef2e9e7 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -53,8 +53,9 @@ enum { * headers in software. */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ + NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_TUNNEL_REMCSUM_BIT, + NETIF_F_GSO_SCTP_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -128,6 +129,7 @@ enum { #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) #define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) +#define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) @@ -166,7 +168,8 @@ enum { NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO) +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \ + NETIF_F_GSO_SCTP) /* * If one device supports one of these features, then enable them diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f45929ce8157..fa6df2699532 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aa3f9d7e8d5c..dc0fca747c5e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -487,6 +487,8 @@ enum { SKB_GSO_PARTIAL = 1 << 13, SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + + SKB_GSO_SCTP = 1 << 15, }; #if BITS_PER_LONG > 32 diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index b392ac8382f2..632e205ca54b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net); int sctp_remaddr_proc_init(struct net *net); void sctp_remaddr_proc_exit(struct net *net); +/* + * sctp/offload.c + */ +int sctp_offload_init(void); /* * Module global variables diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 16b013a6191c..83c5ec58b93a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -566,6 +566,9 @@ struct sctp_chunk { /* This points to the sk_buff containing the actual data. */ struct sk_buff *skb; + /* In case of GSO packets, this will store the head one */ + struct sk_buff *head_skb; + /* These are the SCTP headers by reverse order in a packet. * Note that some of these may happen more than once. In that * case, we point at the "current" one, whatever that means @@ -696,6 +699,8 @@ struct sctp_packet { size_t overhead; /* This is the total size of all chunks INCLUDING padding. */ size_t size; + /* This is the maximum size this packet may have */ + size_t max_size; /* The packet is destined for this transport address. * The function we finally use to pass down to the next lower -- cgit From a91eb52abb504a1dd3248a5d07b54e7f95d5fcf1 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 3 Jun 2016 14:35:32 +0300 Subject: qed: Revisit chain implementation RoCE driver is going to need a 32-bit chain [current chain implementation for qed* currently supports only 16-bit producer/consumer chains]. This patch adds said support, as well as doing other slight tweaks and modifications to qed's chain API. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 553 +++++++++++++++++++++++++----------------- include/linux/qed/qed_if.h | 3 +- 2 files changed, 336 insertions(+), 220 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 5f8fcaaa6504..eceaa9ed2ae9 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -47,16 +47,56 @@ enum qed_chain_use_mode { QED_CHAIN_USE_TO_CONSUME_PRODUCE, /* Chain starts empty */ }; +enum qed_chain_cnt_type { + /* The chain's size/prod/cons are kept in 16-bit variables */ + QED_CHAIN_CNT_TYPE_U16, + + /* The chain's size/prod/cons are kept in 32-bit variables */ + QED_CHAIN_CNT_TYPE_U32, +}; + struct qed_chain_next { struct regpair next_phys; void *next_virt; }; +struct qed_chain_pbl_u16 { + u16 prod_page_idx; + u16 cons_page_idx; +}; + +struct qed_chain_pbl_u32 { + u32 prod_page_idx; + u32 cons_page_idx; +}; + struct qed_chain_pbl { + /* Base address of a pre-allocated buffer for pbl */ dma_addr_t p_phys_table; void *p_virt_table; - u16 prod_page_idx; - u16 cons_page_idx; + + /* Table for keeping the virtual addresses of the chain pages, + * respectively to the physical addresses in the pbl table. + */ + void **pp_virt_addr_tbl; + + /* Index to current used page by producer/consumer */ + union { + struct qed_chain_pbl_u16 pbl16; + struct qed_chain_pbl_u32 pbl32; + } u; +}; + +struct qed_chain_u16 { + /* Cyclic index of next element to produce/consme */ + u16 prod_idx; + u16 cons_idx; +}; + +struct qed_chain_u32 { + /* Cyclic index of next element to produce/consme */ + u32 prod_idx; + u32 cons_idx; }; struct qed_chain { @@ -64,13 +104,25 @@ struct qed_chain { dma_addr_t p_phys_addr; void *p_prod_elem; void *p_cons_elem; - u16 page_cnt; + enum qed_chain_mode mode; enum qed_chain_use_mode intended_use; /* used to produce/consume */ - u16 capacity; /*< number of _usable_ elements */ - u16 size; /* number of elements */ - u16 prod_idx; - u16 cons_idx; + enum qed_chain_cnt_type cnt_type; + + union { + struct qed_chain_u16 chain16; + struct qed_chain_u32 chain32; + } u; + + u32 page_cnt; + + /* Number of elements - capacity is for usable elements only, + * while size will contain total number of elements [for entire chain]. + */ + u32 capacity; + u32 size; + + /* Elements information for fast calculations */ u16 elem_per_page; u16 elem_per_page_mask; u16 elem_unusable; @@ -96,66 +148,69 @@ struct qed_chain { #define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \ DIV_ROUND_UP(elem_cnt, USABLE_ELEMS_PER_PAGE(elem_size, mode)) +#define is_chain_u16(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16) +#define is_chain_u32(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32) + /* Accessors */ static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain) { - return p_chain->prod_idx; + return p_chain->u.chain16.prod_idx; } static inline u16 qed_chain_get_cons_idx(struct qed_chain *p_chain) { - return p_chain->cons_idx; + return p_chain->u.chain16.cons_idx; +} + +static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain) +{ + return p_chain->u.chain32.cons_idx; } static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) { u16 used; - /* we don't need to trancate upon assignmet, as we assign u32->u16 */ - used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) - - (u32)p_chain->cons_idx; + used = (u16) (((u32)0x10000 + + (u32)p_chain->u.chain16.prod_idx) - + (u32)p_chain->u.chain16.cons_idx); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->prod_idx / p_chain->elem_per_page - - p_chain->cons_idx / p_chain->elem_per_page; + used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page - + p_chain->u.chain16.cons_idx / p_chain->elem_per_page; - return p_chain->capacity - used; + return (u16)(p_chain->capacity - used); } -static inline u8 qed_chain_is_full(struct qed_chain *p_chain) +static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain) { - return qed_chain_get_elem_left(p_chain) == p_chain->capacity; -} + u32 used; -static inline u8 qed_chain_is_empty(struct qed_chain *p_chain) -{ - return qed_chain_get_elem_left(p_chain) == 0; -} + used = (u32) (((u64)0x100000000ULL + + (u64)p_chain->u.chain32.prod_idx) - + (u64)p_chain->u.chain32.cons_idx); + if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) + used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page - + p_chain->u.chain32.cons_idx / p_chain->elem_per_page; -static inline u16 qed_chain_get_elem_per_page( - struct qed_chain *p_chain) -{ - return p_chain->elem_per_page; + return p_chain->capacity - used; } -static inline u16 qed_chain_get_usable_per_page( - struct qed_chain *p_chain) +static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain) { return p_chain->usable_per_page; } -static inline u16 qed_chain_get_unusable_per_page( - struct qed_chain *p_chain) +static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain) { return p_chain->elem_unusable; } -static inline u16 qed_chain_get_size(struct qed_chain *p_chain) +static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain) { - return p_chain->size; + return p_chain->page_cnt; } -static inline dma_addr_t -qed_chain_get_pbl_phys(struct qed_chain *p_chain) +static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain) { return p_chain->pbl.p_phys_table; } @@ -172,64 +227,62 @@ qed_chain_get_pbl_phys(struct qed_chain *p_chain) */ static inline void qed_chain_advance_page(struct qed_chain *p_chain, - void **p_next_elem, - u16 *idx_to_inc, - u16 *page_to_inc) + void **p_next_elem, void *idx_to_inc, void *page_to_inc) { + struct qed_chain_next *p_next = NULL; + u32 page_index = 0; switch (p_chain->mode) { case QED_CHAIN_MODE_NEXT_PTR: - { - struct qed_chain_next *p_next = *p_next_elem; + p_next = *p_next_elem; *p_next_elem = p_next->next_virt; - *idx_to_inc += p_chain->elem_unusable; + if (is_chain_u16(p_chain)) + *(u16 *)idx_to_inc += p_chain->elem_unusable; + else + *(u32 *)idx_to_inc += p_chain->elem_unusable; break; - } case QED_CHAIN_MODE_SINGLE: *p_next_elem = p_chain->p_virt_addr; break; case QED_CHAIN_MODE_PBL: - /* It is assumed pages are sequential, next element needs - * to change only when passing going back to first from last. - */ - if (++(*page_to_inc) == p_chain->page_cnt) { - *page_to_inc = 0; - *p_next_elem = p_chain->p_virt_addr; + if (is_chain_u16(p_chain)) { + if (++(*(u16 *)page_to_inc) == p_chain->page_cnt) + *(u16 *)page_to_inc = 0; + page_index = *(u16 *)page_to_inc; + } else { + if (++(*(u32 *)page_to_inc) == p_chain->page_cnt) + *(u32 *)page_to_inc = 0; + page_index = *(u32 *)page_to_inc; } + *p_next_elem = p_chain->pbl.pp_virt_addr_tbl[page_index]; } } #define is_unusable_idx(p, idx) \ - (((p)->idx & (p)->elem_per_page_mask) == (p)->usable_per_page) + (((p)->u.chain16.idx & (p)->elem_per_page_mask) == (p)->usable_per_page) + +#define is_unusable_idx_u32(p, idx) \ + (((p)->u.chain32.idx & (p)->elem_per_page_mask) == (p)->usable_per_page) +#define is_unusable_next_idx(p, idx) \ + ((((p)->u.chain16.idx + 1) & (p)->elem_per_page_mask) == \ + (p)->usable_per_page) -#define is_unusable_next_idx(p, idx) \ - ((((p)->idx + 1) & (p)->elem_per_page_mask) == (p)->usable_per_page) +#define is_unusable_next_idx_u32(p, idx) \ + ((((p)->u.chain32.idx + 1) & (p)->elem_per_page_mask) == \ + (p)->usable_per_page) -#define test_ans_skip(p, idx) \ +#define test_and_skip(p, idx) \ do { \ - if (is_unusable_idx(p, idx)) { \ - (p)->idx += (p)->elem_unusable; \ + if (is_chain_u16(p)) { \ + if (is_unusable_idx(p, idx)) \ + (p)->u.chain16.idx += (p)->elem_unusable; \ + } else { \ + if (is_unusable_idx_u32(p, idx)) \ + (p)->u.chain32.idx += (p)->elem_unusable; \ } \ } while (0) -/** - * @brief qed_chain_return_multi_produced - - * - * A chain in which the driver "Produces" elements should use this API - * to indicate previous produced elements are now consumed. - * - * @param p_chain - * @param num - */ -static inline void -qed_chain_return_multi_produced(struct qed_chain *p_chain, - u16 num) -{ - p_chain->cons_idx += num; - test_ans_skip(p_chain, cons_idx); -} - /** * @brief qed_chain_return_produced - * @@ -240,8 +293,11 @@ qed_chain_return_multi_produced(struct qed_chain *p_chain, */ static inline void qed_chain_return_produced(struct qed_chain *p_chain) { - p_chain->cons_idx++; - test_ans_skip(p_chain, cons_idx); + if (is_chain_u16(p_chain)) + p_chain->u.chain16.cons_idx++; + else + p_chain->u.chain32.cons_idx++; + test_and_skip(p_chain, cons_idx); } /** @@ -257,21 +313,33 @@ static inline void qed_chain_return_produced(struct qed_chain *p_chain) */ static inline void *qed_chain_produce(struct qed_chain *p_chain) { - void *ret = NULL; - - if ((p_chain->prod_idx & p_chain->elem_per_page_mask) == - p_chain->next_page_mask) { - qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, - &p_chain->prod_idx, - &p_chain->pbl.prod_page_idx); + void *p_ret = NULL, *p_prod_idx, *p_prod_page_idx; + + if (is_chain_u16(p_chain)) { + if ((p_chain->u.chain16.prod_idx & + p_chain->elem_per_page_mask) == p_chain->next_page_mask) { + p_prod_idx = &p_chain->u.chain16.prod_idx; + p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx; + qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, + p_prod_idx, p_prod_page_idx); + } + p_chain->u.chain16.prod_idx++; + } else { + if ((p_chain->u.chain32.prod_idx & + p_chain->elem_per_page_mask) == p_chain->next_page_mask) { + p_prod_idx = &p_chain->u.chain32.prod_idx; + p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx; + qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, + p_prod_idx, p_prod_page_idx); + } + p_chain->u.chain32.prod_idx++; } - ret = p_chain->p_prod_elem; - p_chain->prod_idx++; + p_ret = p_chain->p_prod_elem; p_chain->p_prod_elem = (void *)(((u8 *)p_chain->p_prod_elem) + p_chain->elem_size); - return ret; + return p_ret; } /** @@ -282,9 +350,9 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain) * @param p_chain * @param num * - * @return u16, number of unusable BDs + * @return number of unusable BDs */ -static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain) +static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain) { return p_chain->capacity; } @@ -297,11 +365,13 @@ static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain) * * @param p_chain */ -static inline void -qed_chain_recycle_consumed(struct qed_chain *p_chain) +static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain) { - test_ans_skip(p_chain, prod_idx); - p_chain->prod_idx++; + test_and_skip(p_chain, prod_idx); + if (is_chain_u16(p_chain)) + p_chain->u.chain16.prod_idx++; + else + p_chain->u.chain32.prod_idx++; } /** @@ -316,21 +386,33 @@ qed_chain_recycle_consumed(struct qed_chain *p_chain) */ static inline void *qed_chain_consume(struct qed_chain *p_chain) { - void *ret = NULL; - - if ((p_chain->cons_idx & p_chain->elem_per_page_mask) == - p_chain->next_page_mask) { + void *p_ret = NULL, *p_cons_idx, *p_cons_page_idx; + + if (is_chain_u16(p_chain)) { + if ((p_chain->u.chain16.cons_idx & + p_chain->elem_per_page_mask) == p_chain->next_page_mask) { + p_cons_idx = &p_chain->u.chain16.cons_idx; + p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx; + qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, + p_cons_idx, p_cons_page_idx); + } + p_chain->u.chain16.cons_idx++; + } else { + if ((p_chain->u.chain32.cons_idx & + p_chain->elem_per_page_mask) == p_chain->next_page_mask) { + p_cons_idx = &p_chain->u.chain32.cons_idx; + p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx; qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, - &p_chain->cons_idx, - &p_chain->pbl.cons_page_idx); + p_cons_idx, p_cons_page_idx); + } + p_chain->u.chain32.cons_idx++; } - ret = p_chain->p_cons_elem; - p_chain->cons_idx++; + p_ret = p_chain->p_cons_elem; p_chain->p_cons_elem = (void *)(((u8 *)p_chain->p_cons_elem) + p_chain->elem_size); - return ret; + return p_ret; } /** @@ -340,16 +422,33 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) */ static inline void qed_chain_reset(struct qed_chain *p_chain) { - int i; - - p_chain->prod_idx = 0; - p_chain->cons_idx = 0; - p_chain->p_cons_elem = p_chain->p_virt_addr; - p_chain->p_prod_elem = p_chain->p_virt_addr; + u32 i; + + if (is_chain_u16(p_chain)) { + p_chain->u.chain16.prod_idx = 0; + p_chain->u.chain16.cons_idx = 0; + } else { + p_chain->u.chain32.prod_idx = 0; + p_chain->u.chain32.cons_idx = 0; + } + p_chain->p_cons_elem = p_chain->p_virt_addr; + p_chain->p_prod_elem = p_chain->p_virt_addr; if (p_chain->mode == QED_CHAIN_MODE_PBL) { - p_chain->pbl.prod_page_idx = p_chain->page_cnt - 1; - p_chain->pbl.cons_page_idx = p_chain->page_cnt - 1; + /* Use (page_cnt - 1) as a reset value for the prod/cons page's + * indices, to avoid unnecessary page advancing on the first + * call to qed_chain_produce/consume. Instead, the indices + * will be advanced to page_cnt and then will be wrapped to 0. + */ + u32 reset_val = p_chain->page_cnt - 1; + + if (is_chain_u16(p_chain)) { + p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val; + p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val; + } else { + p_chain->pbl.u.pbl32.prod_page_idx = reset_val; + p_chain->pbl.u.pbl32.cons_page_idx = reset_val; + } } switch (p_chain->intended_use) { @@ -377,168 +476,184 @@ static inline void qed_chain_reset(struct qed_chain *p_chain) * @param intended_use * @param mode */ -static inline void qed_chain_init(struct qed_chain *p_chain, - void *p_virt_addr, - dma_addr_t p_phys_addr, - u16 page_cnt, - u8 elem_size, - enum qed_chain_use_mode intended_use, - enum qed_chain_mode mode) +static inline void qed_chain_init_params(struct qed_chain *p_chain, + u32 page_cnt, + u8 elem_size, + enum qed_chain_use_mode intended_use, + enum qed_chain_mode mode, + enum qed_chain_cnt_type cnt_type) { /* chain fixed parameters */ - p_chain->p_virt_addr = p_virt_addr; - p_chain->p_phys_addr = p_phys_addr; + p_chain->p_virt_addr = NULL; + p_chain->p_phys_addr = 0; p_chain->elem_size = elem_size; - p_chain->page_cnt = page_cnt; + p_chain->intended_use = intended_use; p_chain->mode = mode; + p_chain->cnt_type = cnt_type; - p_chain->intended_use = intended_use; p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size); - p_chain->usable_per_page = - USABLE_ELEMS_PER_PAGE(elem_size, mode); - p_chain->capacity = p_chain->usable_per_page * page_cnt; - p_chain->size = p_chain->elem_per_page * page_cnt; + p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode); p_chain->elem_per_page_mask = p_chain->elem_per_page - 1; - p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode); - p_chain->next_page_mask = (p_chain->usable_per_page & p_chain->elem_per_page_mask); - if (mode == QED_CHAIN_MODE_NEXT_PTR) { - struct qed_chain_next *p_next; - u16 i; - - for (i = 0; i < page_cnt - 1; i++) { - /* Increment mem_phy to the next page. */ - p_phys_addr += QED_CHAIN_PAGE_SIZE; - - /* Initialize the physical address of the next page. */ - p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + - elem_size * - p_chain-> - usable_per_page); - - p_next->next_phys.lo = DMA_LO_LE(p_phys_addr); - p_next->next_phys.hi = DMA_HI_LE(p_phys_addr); - - /* Initialize the virtual address of the next page. */ - p_next->next_virt = (void *)((u8 *)p_virt_addr + - QED_CHAIN_PAGE_SIZE); - - /* Move to the next page. */ - p_virt_addr = p_next->next_virt; - } - - /* Last page's next should point to beginning of the chain */ - p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + - elem_size * - p_chain->usable_per_page); + p_chain->page_cnt = page_cnt; + p_chain->capacity = p_chain->usable_per_page * page_cnt; + p_chain->size = p_chain->elem_per_page * page_cnt; - p_next->next_phys.lo = DMA_LO_LE(p_chain->p_phys_addr); - p_next->next_phys.hi = DMA_HI_LE(p_chain->p_phys_addr); - p_next->next_virt = p_chain->p_virt_addr; - } - qed_chain_reset(p_chain); + p_chain->pbl.p_phys_table = 0; + p_chain->pbl.p_virt_table = NULL; + p_chain->pbl.pp_virt_addr_tbl = NULL; } /** - * @brief qed_chain_pbl_init - Initalizes a basic pbl chain - * struct + * @brief qed_chain_init_mem - + * + * Initalizes a basic chain struct with its chain buffers + * * @param p_chain * @param p_virt_addr virtual address of allocated buffer's beginning * @param p_phys_addr physical address of allocated buffer's beginning - * @param page_cnt number of pages in the allocated buffer - * @param elem_size size of each element in the chain - * @param use_mode - * @param p_phys_pbl pointer to a pre-allocated side table - * which will hold physical page addresses. - * @param p_virt_pbl pointer to a pre allocated side table - * which will hold virtual page addresses. + * */ -static inline void -qed_chain_pbl_init(struct qed_chain *p_chain, - void *p_virt_addr, - dma_addr_t p_phys_addr, - u16 page_cnt, - u8 elem_size, - enum qed_chain_use_mode use_mode, - dma_addr_t p_phys_pbl, - dma_addr_t *p_virt_pbl) +static inline void qed_chain_init_mem(struct qed_chain *p_chain, + void *p_virt_addr, dma_addr_t p_phys_addr) { - dma_addr_t *p_pbl_dma = p_virt_pbl; - int i; - - qed_chain_init(p_chain, p_virt_addr, p_phys_addr, page_cnt, - elem_size, use_mode, QED_CHAIN_MODE_PBL); + p_chain->p_virt_addr = p_virt_addr; + p_chain->p_phys_addr = p_phys_addr; +} +/** + * @brief qed_chain_init_pbl_mem - + * + * Initalizes a basic chain struct with its pbl buffers + * + * @param p_chain + * @param p_virt_pbl pointer to a pre allocated side table which will hold + * virtual page addresses. + * @param p_phys_pbl pointer to a pre-allocated side table which will hold + * physical page addresses. + * @param pp_virt_addr_tbl + * pointer to a pre-allocated side table which will hold + * the virtual addresses of the chain pages. + * + */ +static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, + void *p_virt_pbl, + dma_addr_t p_phys_pbl, + void **pp_virt_addr_tbl) +{ p_chain->pbl.p_phys_table = p_phys_pbl; p_chain->pbl.p_virt_table = p_virt_pbl; - - /* Fill the PBL with physical addresses*/ - for (i = 0; i < page_cnt; i++) { - *p_pbl_dma = p_phys_addr; - p_phys_addr += QED_CHAIN_PAGE_SIZE; - p_pbl_dma++; - } + p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl; } /** - * @brief qed_chain_set_prod - sets the prod to the given - * value + * @brief qed_chain_init_next_ptr_elem - + * + * Initalizes a next pointer element + * + * @param p_chain + * @param p_virt_curr virtual address of a chain page of which the next + * pointer element is initialized + * @param p_virt_next virtual address of the next chain page + * @param p_phys_next physical address of the next chain page * - * @param prod_idx - * @param p_prod_elem */ -static inline void qed_chain_set_prod(struct qed_chain *p_chain, - u16 prod_idx, - void *p_prod_elem) +static inline void +qed_chain_init_next_ptr_elem(struct qed_chain *p_chain, + void *p_virt_curr, + void *p_virt_next, dma_addr_t p_phys_next) { - p_chain->prod_idx = prod_idx; - p_chain->p_prod_elem = p_prod_elem; + struct qed_chain_next *p_next; + u32 size; + + size = p_chain->elem_size * p_chain->usable_per_page; + p_next = (struct qed_chain_next *)((u8 *)p_virt_curr + size); + + DMA_REGPAIR_LE(p_next->next_phys, p_phys_next); + + p_next->next_virt = p_virt_next; } /** - * @brief qed_chain_get_elem - + * @brief qed_chain_get_last_elem - * - * get a pointer to an element represented by absolute idx + * Returns a pointer to the last element of the chain * * @param p_chain - * @assumption p_chain->size is a power of 2 * - * @return void*, a pointer to next element + * @return void* */ -static inline void *qed_chain_sge_get_elem(struct qed_chain *p_chain, - u16 idx) +static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain) { - void *ret = NULL; - - if (idx >= p_chain->size) - return NULL; + struct qed_chain_next *p_next = NULL; + void *p_virt_addr = NULL; + u32 size, last_page_idx; - ret = (u8 *)p_chain->p_virt_addr + p_chain->elem_size * idx; + if (!p_chain->p_virt_addr) + goto out; - return ret; + switch (p_chain->mode) { + case QED_CHAIN_MODE_NEXT_PTR: + size = p_chain->elem_size * p_chain->usable_per_page; + p_virt_addr = p_chain->p_virt_addr; + p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + size); + while (p_next->next_virt != p_chain->p_virt_addr) { + p_virt_addr = p_next->next_virt; + p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + + size); + } + break; + case QED_CHAIN_MODE_SINGLE: + p_virt_addr = p_chain->p_virt_addr; + break; + case QED_CHAIN_MODE_PBL: + last_page_idx = p_chain->page_cnt - 1; + p_virt_addr = p_chain->pbl.pp_virt_addr_tbl[last_page_idx]; + break; + } + /* p_virt_addr points at this stage to the last page of the chain */ + size = p_chain->elem_size * (p_chain->usable_per_page - 1); + p_virt_addr = (u8 *)p_virt_addr + size; +out: + return p_virt_addr; } /** - * @brief qed_chain_sge_inc_cons_prod + * @brief qed_chain_set_prod - sets the prod to the given value * - * for sge chains, producer isn't increased serially, the ring - * is expected to be full at all times. Once elements are - * consumed, they are immediately produced. + * @param prod_idx + * @param p_prod_elem + */ +static inline void qed_chain_set_prod(struct qed_chain *p_chain, + u32 prod_idx, void *p_prod_elem) +{ + if (is_chain_u16(p_chain)) + p_chain->u.chain16.prod_idx = (u16) prod_idx; + else + p_chain->u.chain32.prod_idx = prod_idx; + p_chain->p_prod_elem = p_prod_elem; +} + +/** + * @brief qed_chain_pbl_zero_mem - set chain memory to 0 * * @param p_chain - * @param cnt - * - * @return inline void */ -static inline void -qed_chain_sge_inc_cons_prod(struct qed_chain *p_chain, - u16 cnt) +static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain) { - p_chain->prod_idx += cnt; - p_chain->cons_idx += cnt; + u32 i, page_cnt; + + if (p_chain->mode != QED_CHAIN_MODE_PBL) + return; + + page_cnt = qed_chain_get_page_cnt(p_chain); + + for (i = 0; i < page_cnt; i++) + memset(p_chain->pbl.pp_virt_addr_tbl[i], 0, + QED_CHAIN_PAGE_SIZE); } #endif diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 4c29439f54bf..15efccfdc46e 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -325,7 +325,8 @@ struct qed_common_ops { int (*chain_alloc)(struct qed_dev *cdev, enum qed_chain_use_mode intended_use, enum qed_chain_mode mode, - u16 num_elems, + enum qed_chain_cnt_type cnt_type, + u32 num_elems, size_t elem_size, struct qed_chain *p_chain); -- cgit From 7a9b6b8f6e4c52b31830d32570c2a226e27651f9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 3 Jun 2016 14:35:33 +0300 Subject: qed: Add common HSI for new protocols This adds the qed portion of the RoCE & iSCSI firmware HSI, as well as adding several new common HSI files which would be required by both qed and qed* protocols. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 190 ++++- include/linux/qed/iscsi_common.h | 1439 ++++++++++++++++++++++++++++++++++++ include/linux/qed/rdma_common.h | 44 ++ include/linux/qed/roce_common.h | 17 + include/linux/qed/storage_common.h | 91 +++ include/linux/qed/tcp_common.h | 226 ++++++ 6 files changed, 2006 insertions(+), 1 deletion(-) create mode 100644 include/linux/qed/iscsi_common.h create mode 100644 include/linux/qed/rdma_common.h create mode 100644 include/linux/qed/roce_common.h create mode 100644 include/linux/qed/storage_common.h create mode 100644 include/linux/qed/tcp_common.h (limited to 'include') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 285189a5ea6d..077a3b6cc80f 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -12,6 +12,7 @@ #define CORE_SPQE_PAGE_SIZE_BYTES 4096 #define X_FINAL_CLEANUP_AGG_INT 1 +#define NUM_OF_GLOBAL_QUEUES 128 /* Queue Zone sizes in bytes */ #define TSTORM_QZONE_SIZE 8 @@ -694,7 +695,10 @@ struct parsing_and_err_flags { #define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT 15 }; -/* Concrete Function ID. */ +struct pb_context { + __le32 crc[4]; +}; + struct pxp_concrete_fid { __le16 fid; #define PXP_CONCRETE_FID_PFID_MASK 0xF @@ -761,6 +765,72 @@ struct pxp_ptt_entry { }; /* RSS hash type */ +struct rdif_task_context { + __le32 initial_ref_tag; + __le16 app_tag_value; + __le16 app_tag_mask; + u8 flags0; +#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK 0x1 +#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT 0 +#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK 0x1 +#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT 1 +#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK 0x1 +#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT 2 +#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK 0x1 +#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT 3 +#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK 0x3 +#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT 4 +#define RDIF_TASK_CONTEXT_CRC_SEED_MASK 0x1 +#define RDIF_TASK_CONTEXT_CRC_SEED_SHIFT 6 +#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK 0x1 +#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT 7 + u8 partial_dif_data[7]; + __le16 partial_crc_value; + __le16 partial_checksum_value; + __le32 offset_in_io; + __le16 flags1; +#define RDIF_TASK_CONTEXT_VALIDATEGUARD_MASK 0x1 +#define RDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT 0 +#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK 0x1 +#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT 1 +#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK 0x1 +#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT 2 +#define RDIF_TASK_CONTEXT_FORWARDGUARD_MASK 0x1 +#define RDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT 3 +#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK 0x1 +#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT 4 +#define RDIF_TASK_CONTEXT_FORWARDREFTAG_MASK 0x1 +#define RDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT 5 +#define RDIF_TASK_CONTEXT_INTERVALSIZE_MASK 0x7 +#define RDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT 6 +#define RDIF_TASK_CONTEXT_HOSTINTERFACE_MASK 0x3 +#define RDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT 9 +#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK 0x1 +#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT 11 +#define RDIF_TASK_CONTEXT_RESERVED0_MASK 0x1 +#define RDIF_TASK_CONTEXT_RESERVED0_SHIFT 12 +#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK 0x1 +#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT 13 +#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK 0x1 +#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT 14 +#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK 0x1 +#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT 15 + __le16 state; +#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_MASK 0xF +#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_SHIFT 0 +#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_MASK 0xF +#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_SHIFT 4 +#define RDIF_TASK_CONTEXT_ERRORINIO_MASK 0x1 +#define RDIF_TASK_CONTEXT_ERRORINIO_SHIFT 8 +#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK 0x1 +#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT 9 +#define RDIF_TASK_CONTEXT_REFTAGMASK_MASK 0xF +#define RDIF_TASK_CONTEXT_REFTAGMASK_SHIFT 10 +#define RDIF_TASK_CONTEXT_RESERVED1_MASK 0x3 +#define RDIF_TASK_CONTEXT_RESERVED1_SHIFT 14 + __le32 reserved2; +}; + enum rss_hash_type { RSS_HASH_TYPE_DEFAULT = 0, RSS_HASH_TYPE_IPV4 = 1, @@ -789,4 +859,122 @@ struct status_block { #define STATUS_BLOCK_ZERO_PAD3_SHIFT 24 }; +struct tdif_task_context { + __le32 initial_ref_tag; + __le16 app_tag_value; + __le16 app_tag_mask; + __le16 partial_crc_valueB; + __le16 partial_checksum_valueB; + __le16 stateB; +#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_MASK 0xF +#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_SHIFT 0 +#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_MASK 0xF +#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_SHIFT 4 +#define TDIF_TASK_CONTEXT_ERRORINIOB_MASK 0x1 +#define TDIF_TASK_CONTEXT_ERRORINIOB_SHIFT 8 +#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK 0x1 +#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT 9 +#define TDIF_TASK_CONTEXT_RESERVED0_MASK 0x3F +#define TDIF_TASK_CONTEXT_RESERVED0_SHIFT 10 + u8 reserved1; + u8 flags0; +#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK 0x1 +#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT 0 +#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK 0x1 +#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT 1 +#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK 0x1 +#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT 2 +#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK 0x1 +#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT 3 +#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK 0x3 +#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT 4 +#define TDIF_TASK_CONTEXT_CRC_SEED_MASK 0x1 +#define TDIF_TASK_CONTEXT_CRC_SEED_SHIFT 6 +#define TDIF_TASK_CONTEXT_RESERVED2_MASK 0x1 +#define TDIF_TASK_CONTEXT_RESERVED2_SHIFT 7 + __le32 flags1; +#define TDIF_TASK_CONTEXT_VALIDATEGUARD_MASK 0x1 +#define TDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT 0 +#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK 0x1 +#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT 1 +#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK 0x1 +#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT 2 +#define TDIF_TASK_CONTEXT_FORWARDGUARD_MASK 0x1 +#define TDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT 3 +#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK 0x1 +#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT 4 +#define TDIF_TASK_CONTEXT_FORWARDREFTAG_MASK 0x1 +#define TDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT 5 +#define TDIF_TASK_CONTEXT_INTERVALSIZE_MASK 0x7 +#define TDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT 6 +#define TDIF_TASK_CONTEXT_HOSTINTERFACE_MASK 0x3 +#define TDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT 9 +#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK 0x1 +#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT 11 +#define TDIF_TASK_CONTEXT_RESERVED3_MASK 0x1 +#define TDIF_TASK_CONTEXT_RESERVED3_SHIFT 12 +#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK 0x1 +#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT 13 +#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_MASK 0xF +#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_SHIFT 14 +#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_MASK 0xF +#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_SHIFT 18 +#define TDIF_TASK_CONTEXT_ERRORINIOA_MASK 0x1 +#define TDIF_TASK_CONTEXT_ERRORINIOA_SHIFT 22 +#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_MASK 0x1 +#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_SHIFT 23 +#define TDIF_TASK_CONTEXT_REFTAGMASK_MASK 0xF +#define TDIF_TASK_CONTEXT_REFTAGMASK_SHIFT 24 +#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK 0x1 +#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT 28 +#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK 0x1 +#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT 29 +#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK 0x1 +#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT 30 +#define TDIF_TASK_CONTEXT_RESERVED4_MASK 0x1 +#define TDIF_TASK_CONTEXT_RESERVED4_SHIFT 31 + __le32 offset_in_iob; + __le16 partial_crc_value_a; + __le16 partial_checksum_valuea_; + __le32 offset_in_ioa; + u8 partial_dif_data_a[8]; + u8 partial_dif_data_b[8]; +}; + +struct timers_context { + __le32 logical_client0; +#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK 0xFFFFFFF +#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT 0 +#define TIMERS_CONTEXT_VALIDLC0_MASK 0x1 +#define TIMERS_CONTEXT_VALIDLC0_SHIFT 28 +#define TIMERS_CONTEXT_ACTIVELC0_MASK 0x1 +#define TIMERS_CONTEXT_ACTIVELC0_SHIFT 29 +#define TIMERS_CONTEXT_RESERVED0_MASK 0x3 +#define TIMERS_CONTEXT_RESERVED0_SHIFT 30 + __le32 logical_client1; +#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK 0xFFFFFFF +#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT 0 +#define TIMERS_CONTEXT_VALIDLC1_MASK 0x1 +#define TIMERS_CONTEXT_VALIDLC1_SHIFT 28 +#define TIMERS_CONTEXT_ACTIVELC1_MASK 0x1 +#define TIMERS_CONTEXT_ACTIVELC1_SHIFT 29 +#define TIMERS_CONTEXT_RESERVED1_MASK 0x3 +#define TIMERS_CONTEXT_RESERVED1_SHIFT 30 + __le32 logical_client2; +#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK 0xFFFFFFF +#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT 0 +#define TIMERS_CONTEXT_VALIDLC2_MASK 0x1 +#define TIMERS_CONTEXT_VALIDLC2_SHIFT 28 +#define TIMERS_CONTEXT_ACTIVELC2_MASK 0x1 +#define TIMERS_CONTEXT_ACTIVELC2_SHIFT 29 +#define TIMERS_CONTEXT_RESERVED2_MASK 0x3 +#define TIMERS_CONTEXT_RESERVED2_SHIFT 30 + __le32 host_expiration_fields; +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK 0xFFFFFFF +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT 0 +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK 0x1 +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT 28 +#define TIMERS_CONTEXT_RESERVED3_MASK 0x7 +#define TIMERS_CONTEXT_RESERVED3_SHIFT 29 +}; #endif /* __COMMON_HSI__ */ diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h new file mode 100644 index 000000000000..b3c0feb15ae9 --- /dev/null +++ b/include/linux/qed/iscsi_common.h @@ -0,0 +1,1439 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __ISCSI_COMMON__ +#define __ISCSI_COMMON__ +/**********************/ +/* ISCSI FW CONSTANTS */ +/**********************/ + +/* iSCSI HSI constants */ +#define ISCSI_DEFAULT_MTU (1500) + +/* Current iSCSI HSI version number composed of two fields (16 bit) */ +#define ISCSI_HSI_MAJOR_VERSION (0) +#define ISCSI_HSI_MINOR_VERSION (0) + +/* KWQ (kernel work queue) layer codes */ +#define ISCSI_SLOW_PATH_LAYER_CODE (6) + +/* CQE completion status */ +#define ISCSI_EQE_COMPLETION_SUCCESS (0x0) +#define ISCSI_EQE_RST_CONN_RCVD (0x1) + +/* iSCSI parameter defaults */ +#define ISCSI_DEFAULT_HEADER_DIGEST (0) +#define ISCSI_DEFAULT_DATA_DIGEST (0) +#define ISCSI_DEFAULT_INITIAL_R2T (1) +#define ISCSI_DEFAULT_IMMEDIATE_DATA (1) +#define ISCSI_DEFAULT_MAX_PDU_LENGTH (0x2000) +#define ISCSI_DEFAULT_FIRST_BURST_LENGTH (0x10000) +#define ISCSI_DEFAULT_MAX_BURST_LENGTH (0x40000) +#define ISCSI_DEFAULT_MAX_OUTSTANDING_R2T (1) + +/* iSCSI parameter limits */ +#define ISCSI_MIN_VAL_MAX_PDU_LENGTH (0x200) +#define ISCSI_MAX_VAL_MAX_PDU_LENGTH (0xffffff) +#define ISCSI_MIN_VAL_BURST_LENGTH (0x200) +#define ISCSI_MAX_VAL_BURST_LENGTH (0xffffff) +#define ISCSI_MIN_VAL_MAX_OUTSTANDING_R2T (1) +#define ISCSI_MAX_VAL_MAX_OUTSTANDING_R2T (0xff) + +/* iSCSI reserved params */ +#define ISCSI_ITT_ALL_ONES (0xffffffff) +#define ISCSI_TTT_ALL_ONES (0xffffffff) + +#define ISCSI_OPTION_1_OFF_CHIP_TCP 1 +#define ISCSI_OPTION_2_ON_CHIP_TCP 2 + +#define ISCSI_INITIATOR_MODE 0 +#define ISCSI_TARGET_MODE 1 + +/* iSCSI request op codes */ +#define ISCSI_OPCODE_NOP_OUT_NO_IMM (0) +#define ISCSI_OPCODE_NOP_OUT ( \ + ISCSI_OPCODE_NOP_OUT_NO_IMM | 0x40) +#define ISCSI_OPCODE_SCSI_CMD_NO_IMM (1) +#define ISCSI_OPCODE_SCSI_CMD ( \ + ISCSI_OPCODE_SCSI_CMD_NO_IMM | 0x40) +#define ISCSI_OPCODE_TMF_REQUEST_NO_IMM (2) +#define ISCSI_OPCODE_TMF_REQUEST ( \ + ISCSI_OPCODE_TMF_REQUEST_NO_IMM | 0x40) +#define ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM (3) +#define ISCSI_OPCODE_LOGIN_REQUEST ( \ + ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM | 0x40) +#define ISCSI_OPCODE_TEXT_REQUEST_NO_IMM (4) +#define ISCSI_OPCODE_TEXT_REQUEST ( \ + ISCSI_OPCODE_TEXT_REQUEST_NO_IMM | 0x40) +#define ISCSI_OPCODE_DATA_OUT (5) +#define ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM (6) +#define ISCSI_OPCODE_LOGOUT_REQUEST ( \ + ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM | 0x40) + +/* iSCSI response/messages op codes */ +#define ISCSI_OPCODE_NOP_IN (0x20) +#define ISCSI_OPCODE_SCSI_RESPONSE (0x21) +#define ISCSI_OPCODE_TMF_RESPONSE (0x22) +#define ISCSI_OPCODE_LOGIN_RESPONSE (0x23) +#define ISCSI_OPCODE_TEXT_RESPONSE (0x24) +#define ISCSI_OPCODE_DATA_IN (0x25) +#define ISCSI_OPCODE_LOGOUT_RESPONSE (0x26) +#define ISCSI_OPCODE_R2T (0x31) +#define ISCSI_OPCODE_ASYNC_MSG (0x32) +#define ISCSI_OPCODE_REJECT (0x3f) + +/* iSCSI stages */ +#define ISCSI_STAGE_SECURITY_NEGOTIATION (0) +#define ISCSI_STAGE_LOGIN_OPERATIONAL_NEGOTIATION (1) +#define ISCSI_STAGE_FULL_FEATURE_PHASE (3) + +/* iSCSI CQE errors */ +#define CQE_ERROR_BITMAP_DATA_DIGEST (0x08) +#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN (0x10) +#define CQE_ERROR_BITMAP_DATA_TRUNCATED (0x20) + +struct cqe_error_bitmap { + u8 cqe_error_status_bits; +#define CQE_ERROR_BITMAP_DIF_ERR_BITS_MASK 0x7 +#define CQE_ERROR_BITMAP_DIF_ERR_BITS_SHIFT 0 +#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_MASK 0x1 +#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_SHIFT 3 +#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_MASK 0x1 +#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_SHIFT 4 +#define CQE_ERROR_BITMAP_DATA_TRUNCATED_ERR_MASK 0x1 +#define CQE_ERROR_BITMAP_DATA_TRUNCATED_ERR_SHIFT 5 +#define CQE_ERROR_BITMAP_UNDER_RUN_ERR_MASK 0x1 +#define CQE_ERROR_BITMAP_UNDER_RUN_ERR_SHIFT 6 +#define CQE_ERROR_BITMAP_RESERVED2_MASK 0x1 +#define CQE_ERROR_BITMAP_RESERVED2_SHIFT 7 +}; + +union cqe_error_status { + u8 error_status; + struct cqe_error_bitmap error_bits; +}; + +struct data_hdr { + __le32 data[12]; +}; + +struct iscsi_async_msg_hdr { + __le16 reserved0; + u8 flags_attr; +#define ISCSI_ASYNC_MSG_HDR_RSRV_MASK 0x7F +#define ISCSI_ASYNC_MSG_HDR_RSRV_SHIFT 0 +#define ISCSI_ASYNC_MSG_HDR_CONST1_MASK 0x1 +#define ISCSI_ASYNC_MSG_HDR_CONST1_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_ASYNC_MSG_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_ASYNC_MSG_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_ASYNC_MSG_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_ASYNC_MSG_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair lun; + __le32 all_ones; + __le32 reserved1; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le16 param1_rsrv; + u8 async_vcode; + u8 async_event; + __le16 param3_rsrv; + __le16 param2_rsrv; + __le32 reserved7; +}; + +struct iscsi_sge { + struct regpair sge_addr; + __le16 sge_len; + __le16 reserved0; + __le32 reserved1; +}; + +struct iscsi_cached_sge_ctx { + struct iscsi_sge sge; + struct regpair reserved; + __le32 dsgl_curr_offset[2]; +}; + +struct iscsi_cmd_hdr { + __le16 reserved1; + u8 flags_attr; +#define ISCSI_CMD_HDR_ATTR_MASK 0x7 +#define ISCSI_CMD_HDR_ATTR_SHIFT 0 +#define ISCSI_CMD_HDR_RSRV_MASK 0x3 +#define ISCSI_CMD_HDR_RSRV_SHIFT 3 +#define ISCSI_CMD_HDR_WRITE_MASK 0x1 +#define ISCSI_CMD_HDR_WRITE_SHIFT 5 +#define ISCSI_CMD_HDR_READ_MASK 0x1 +#define ISCSI_CMD_HDR_READ_SHIFT 6 +#define ISCSI_CMD_HDR_FINAL_MASK 0x1 +#define ISCSI_CMD_HDR_FINAL_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_CMD_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_CMD_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_CMD_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_CMD_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair lun; + __le32 itt; + __le32 expected_transfer_length; + __le32 cmd_sn; + __le32 exp_stat_sn; + __le32 cdb[4]; +}; + +struct iscsi_common_hdr { + u8 hdr_status; + u8 hdr_response; + u8 hdr_flags; + u8 hdr_first_byte; +#define ISCSI_COMMON_HDR_OPCODE_MASK 0x3F +#define ISCSI_COMMON_HDR_OPCODE_SHIFT 0 +#define ISCSI_COMMON_HDR_IMM_MASK 0x1 +#define ISCSI_COMMON_HDR_IMM_SHIFT 6 +#define ISCSI_COMMON_HDR_RSRV_MASK 0x1 +#define ISCSI_COMMON_HDR_RSRV_SHIFT 7 + __le32 hdr_second_dword; +#define ISCSI_COMMON_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_COMMON_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_SHIFT 24 + __le32 lun_reserved[4]; + __le32 data[6]; +}; + +struct iscsi_conn_offload_params { + struct regpair sq_pbl_addr; + struct regpair r2tq_pbl_addr; + struct regpair xhq_pbl_addr; + struct regpair uhq_pbl_addr; + __le32 initial_ack; + __le16 physical_q0; + __le16 physical_q1; + u8 flags; +#define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_MASK 0x1 +#define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_SHIFT 0 +#define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_MASK 0x1 +#define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_SHIFT 1 +#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK 0x3F +#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT 2 + u8 pbl_page_size_log; + u8 pbe_page_size_log; + u8 default_cq; + __le32 stat_sn; +}; + +struct iscsi_slow_path_hdr { + u8 op_code; + u8 flags; +#define ISCSI_SLOW_PATH_HDR_RESERVED0_MASK 0xF +#define ISCSI_SLOW_PATH_HDR_RESERVED0_SHIFT 0 +#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_MASK 0x7 +#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_SHIFT 4 +#define ISCSI_SLOW_PATH_HDR_RESERVED1_MASK 0x1 +#define ISCSI_SLOW_PATH_HDR_RESERVED1_SHIFT 7 +}; + +struct iscsi_conn_update_ramrod_params { + struct iscsi_slow_path_hdr hdr; + __le16 conn_id; + __le32 fw_cid; + u8 flags; +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_SHIFT 0 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DD_EN_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DD_EN_SHIFT 1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_SHIFT 2 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_SHIFT 3 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK 0xF +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT 4 + u8 reserved0[3]; + __le32 max_seq_size; + __le32 max_send_pdu_length; + __le32 max_recv_pdu_length; + __le32 first_seq_length; + __le32 exp_stat_sn; +}; + +struct iscsi_ext_cdb_cmd_hdr { + __le16 reserved1; + u8 flags_attr; +#define ISCSI_EXT_CDB_CMD_HDR_ATTR_MASK 0x7 +#define ISCSI_EXT_CDB_CMD_HDR_ATTR_SHIFT 0 +#define ISCSI_EXT_CDB_CMD_HDR_RSRV_MASK 0x3 +#define ISCSI_EXT_CDB_CMD_HDR_RSRV_SHIFT 3 +#define ISCSI_EXT_CDB_CMD_HDR_WRITE_MASK 0x1 +#define ISCSI_EXT_CDB_CMD_HDR_WRITE_SHIFT 5 +#define ISCSI_EXT_CDB_CMD_HDR_READ_MASK 0x1 +#define ISCSI_EXT_CDB_CMD_HDR_READ_SHIFT 6 +#define ISCSI_EXT_CDB_CMD_HDR_FINAL_MASK 0x1 +#define ISCSI_EXT_CDB_CMD_HDR_FINAL_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_EXT_CDB_CMD_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_EXT_CDB_CMD_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_EXT_CDB_CMD_HDR_CDB_SIZE_MASK 0xFF +#define ISCSI_EXT_CDB_CMD_HDR_CDB_SIZE_SHIFT 24 + struct regpair lun; + __le32 itt; + __le32 expected_transfer_length; + __le32 cmd_sn; + __le32 exp_stat_sn; + struct iscsi_sge cdb_sge; +}; + +struct iscsi_login_req_hdr { + u8 version_min; + u8 version_max; + u8 flags_attr; +#define ISCSI_LOGIN_REQ_HDR_NSG_MASK 0x3 +#define ISCSI_LOGIN_REQ_HDR_NSG_SHIFT 0 +#define ISCSI_LOGIN_REQ_HDR_CSG_MASK 0x3 +#define ISCSI_LOGIN_REQ_HDR_CSG_SHIFT 2 +#define ISCSI_LOGIN_REQ_HDR_RSRV_MASK 0x3 +#define ISCSI_LOGIN_REQ_HDR_RSRV_SHIFT 4 +#define ISCSI_LOGIN_REQ_HDR_C_MASK 0x1 +#define ISCSI_LOGIN_REQ_HDR_C_SHIFT 6 +#define ISCSI_LOGIN_REQ_HDR_T_MASK 0x1 +#define ISCSI_LOGIN_REQ_HDR_T_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24 + __le32 isid_TABC; + __le16 tsih; + __le16 isid_d; + __le32 itt; + __le16 reserved1; + __le16 cid; + __le32 cmd_sn; + __le32 exp_stat_sn; + __le32 reserved2[4]; +}; + +struct iscsi_logout_req_hdr { + __le16 reserved0; + u8 reason_code; + u8 opcode; + __le32 reserved1; + __le32 reserved2[2]; + __le32 itt; + __le16 reserved3; + __le16 cid; + __le32 cmd_sn; + __le32 exp_stat_sn; + __le32 reserved4[4]; +}; + +struct iscsi_data_out_hdr { + __le16 reserved1; + u8 flags_attr; +#define ISCSI_DATA_OUT_HDR_RSRV_MASK 0x7F +#define ISCSI_DATA_OUT_HDR_RSRV_SHIFT 0 +#define ISCSI_DATA_OUT_HDR_FINAL_MASK 0x1 +#define ISCSI_DATA_OUT_HDR_FINAL_SHIFT 7 + u8 opcode; + __le32 reserved2; + struct regpair lun; + __le32 itt; + __le32 ttt; + __le32 reserved3; + __le32 exp_stat_sn; + __le32 reserved4; + __le32 data_sn; + __le32 buffer_offset; + __le32 reserved5; +}; + +struct iscsi_data_in_hdr { + u8 status_rsvd; + u8 reserved1; + u8 flags; +#define ISCSI_DATA_IN_HDR_STATUS_MASK 0x1 +#define ISCSI_DATA_IN_HDR_STATUS_SHIFT 0 +#define ISCSI_DATA_IN_HDR_UNDERFLOW_MASK 0x1 +#define ISCSI_DATA_IN_HDR_UNDERFLOW_SHIFT 1 +#define ISCSI_DATA_IN_HDR_OVERFLOW_MASK 0x1 +#define ISCSI_DATA_IN_HDR_OVERFLOW_SHIFT 2 +#define ISCSI_DATA_IN_HDR_RSRV_MASK 0x7 +#define ISCSI_DATA_IN_HDR_RSRV_SHIFT 3 +#define ISCSI_DATA_IN_HDR_ACK_MASK 0x1 +#define ISCSI_DATA_IN_HDR_ACK_SHIFT 6 +#define ISCSI_DATA_IN_HDR_FINAL_MASK 0x1 +#define ISCSI_DATA_IN_HDR_FINAL_SHIFT 7 + u8 opcode; + __le32 reserved2; + struct regpair lun; + __le32 itt; + __le32 ttt; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 data_sn; + __le32 buffer_offset; + __le32 residual_count; +}; + +struct iscsi_r2t_hdr { + u8 reserved0[3]; + u8 opcode; + __le32 reserved2; + struct regpair lun; + __le32 itt; + __le32 ttt; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 r2t_sn; + __le32 buffer_offset; + __le32 desired_data_trns_len; +}; + +struct iscsi_nop_out_hdr { + __le16 reserved1; + u8 flags_attr; +#define ISCSI_NOP_OUT_HDR_RSRV_MASK 0x7F +#define ISCSI_NOP_OUT_HDR_RSRV_SHIFT 0 +#define ISCSI_NOP_OUT_HDR_CONST1_MASK 0x1 +#define ISCSI_NOP_OUT_HDR_CONST1_SHIFT 7 + u8 opcode; + __le32 reserved2; + struct regpair lun; + __le32 itt; + __le32 ttt; + __le32 cmd_sn; + __le32 exp_stat_sn; + __le32 reserved3; + __le32 reserved4; + __le32 reserved5; + __le32 reserved6; +}; + +struct iscsi_nop_in_hdr { + __le16 reserved0; + u8 flags_attr; +#define ISCSI_NOP_IN_HDR_RSRV_MASK 0x7F +#define ISCSI_NOP_IN_HDR_RSRV_SHIFT 0 +#define ISCSI_NOP_IN_HDR_CONST1_MASK 0x1 +#define ISCSI_NOP_IN_HDR_CONST1_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_NOP_IN_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_NOP_IN_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_NOP_IN_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_NOP_IN_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair lun; + __le32 itt; + __le32 ttt; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 reserved5; + __le32 reserved6; + __le32 reserved7; +}; + +struct iscsi_login_response_hdr { + u8 version_active; + u8 version_max; + u8 flags_attr; +#define ISCSI_LOGIN_RESPONSE_HDR_NSG_MASK 0x3 +#define ISCSI_LOGIN_RESPONSE_HDR_NSG_SHIFT 0 +#define ISCSI_LOGIN_RESPONSE_HDR_CSG_MASK 0x3 +#define ISCSI_LOGIN_RESPONSE_HDR_CSG_SHIFT 2 +#define ISCSI_LOGIN_RESPONSE_HDR_RSRV_MASK 0x3 +#define ISCSI_LOGIN_RESPONSE_HDR_RSRV_SHIFT 4 +#define ISCSI_LOGIN_RESPONSE_HDR_C_MASK 0x1 +#define ISCSI_LOGIN_RESPONSE_HDR_C_SHIFT 6 +#define ISCSI_LOGIN_RESPONSE_HDR_T_MASK 0x1 +#define ISCSI_LOGIN_RESPONSE_HDR_T_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 + __le32 isid_TABC; + __le16 tsih; + __le16 isid_d; + __le32 itt; + __le32 reserved1; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le16 reserved2; + u8 status_detail; + u8 status_class; + __le32 reserved4[2]; +}; + +struct iscsi_logout_response_hdr { + u8 reserved1; + u8 response; + u8 flags; + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_LOGOUT_RESPONSE_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_LOGOUT_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_LOGOUT_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_LOGOUT_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 + __le32 reserved2[2]; + __le32 itt; + __le32 reserved3; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 reserved4; + __le16 time2retain; + __le16 time2wait; + __le32 reserved5[1]; +}; + +struct iscsi_text_request_hdr { + __le16 reserved0; + u8 flags_attr; +#define ISCSI_TEXT_REQUEST_HDR_RSRV_MASK 0x3F +#define ISCSI_TEXT_REQUEST_HDR_RSRV_SHIFT 0 +#define ISCSI_TEXT_REQUEST_HDR_C_MASK 0x1 +#define ISCSI_TEXT_REQUEST_HDR_C_SHIFT 6 +#define ISCSI_TEXT_REQUEST_HDR_F_MASK 0x1 +#define ISCSI_TEXT_REQUEST_HDR_F_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_TEXT_REQUEST_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_TEXT_REQUEST_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_TEXT_REQUEST_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_TEXT_REQUEST_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair lun; + __le32 itt; + __le32 ttt; + __le32 cmd_sn; + __le32 exp_stat_sn; + __le32 reserved4[4]; +}; + +struct iscsi_text_response_hdr { + __le16 reserved1; + u8 flags; +#define ISCSI_TEXT_RESPONSE_HDR_RSRV_MASK 0x3F +#define ISCSI_TEXT_RESPONSE_HDR_RSRV_SHIFT 0 +#define ISCSI_TEXT_RESPONSE_HDR_C_MASK 0x1 +#define ISCSI_TEXT_RESPONSE_HDR_C_SHIFT 6 +#define ISCSI_TEXT_RESPONSE_HDR_F_MASK 0x1 +#define ISCSI_TEXT_RESPONSE_HDR_F_SHIFT 7 + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_TEXT_RESPONSE_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_TEXT_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_TEXT_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_TEXT_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair lun; + __le32 itt; + __le32 ttt; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 reserved4[3]; +}; + +struct iscsi_tmf_request_hdr { + __le16 reserved0; + u8 function; + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_TMF_REQUEST_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_TMF_REQUEST_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_TMF_REQUEST_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_TMF_REQUEST_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair lun; + __le32 itt; + __le32 rtt; + __le32 cmd_sn; + __le32 exp_stat_sn; + __le32 ref_cmd_sn; + __le32 exp_data_sn; + __le32 reserved4[2]; +}; + +struct iscsi_tmf_response_hdr { + u8 reserved2; + u8 hdr_response; + u8 hdr_flags; + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_TMF_RESPONSE_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_TMF_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair reserved0; + __le32 itt; + __le32 rtt; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 reserved4[3]; +}; + +struct iscsi_response_hdr { + u8 hdr_status; + u8 hdr_response; + u8 hdr_flags; + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_RESPONSE_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair lun; + __le32 itt; + __le32 snack_tag; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 exp_data_sn; + __le32 bi_residual_count; + __le32 residual_count; +}; + +struct iscsi_reject_hdr { + u8 reserved4; + u8 hdr_reason; + u8 hdr_flags; + u8 opcode; + __le32 hdr_second_dword; +#define ISCSI_REJECT_HDR_DATA_SEG_LEN_MASK 0xFFFFFF +#define ISCSI_REJECT_HDR_DATA_SEG_LEN_SHIFT 0 +#define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_MASK 0xFF +#define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_SHIFT 24 + struct regpair reserved0; + __le32 reserved1; + __le32 reserved2; + __le32 stat_sn; + __le32 exp_cmd_sn; + __le32 max_cmd_sn; + __le32 data_sn; + __le32 reserved3[2]; +}; + +union iscsi_task_hdr { + struct iscsi_common_hdr common; + struct data_hdr data; + struct iscsi_cmd_hdr cmd; + struct iscsi_ext_cdb_cmd_hdr ext_cdb_cmd; + struct iscsi_login_req_hdr login_req; + struct iscsi_logout_req_hdr logout_req; + struct iscsi_data_out_hdr data_out; + struct iscsi_data_in_hdr data_in; + struct iscsi_r2t_hdr r2t; + struct iscsi_nop_out_hdr nop_out; + struct iscsi_nop_in_hdr nop_in; + struct iscsi_login_response_hdr login_response; + struct iscsi_logout_response_hdr logout_response; + struct iscsi_text_request_hdr text_request; + struct iscsi_text_response_hdr text_response; + struct iscsi_tmf_request_hdr tmf_request; + struct iscsi_tmf_response_hdr tmf_response; + struct iscsi_response_hdr response; + struct iscsi_reject_hdr reject; + struct iscsi_async_msg_hdr async_msg; +}; + +struct iscsi_cqe_common { + __le16 conn_id; + u8 cqe_type; + union cqe_error_status error_bitmap; + __le32 reserved[3]; + union iscsi_task_hdr iscsi_hdr; +}; + +struct iscsi_cqe_solicited { + __le16 conn_id; + u8 cqe_type; + union cqe_error_status error_bitmap; + __le16 itid; + u8 task_type; + u8 fw_dbg_field; + __le32 reserved1[2]; + union iscsi_task_hdr iscsi_hdr; +}; + +struct iscsi_cqe_unsolicited { + __le16 conn_id; + u8 cqe_type; + union cqe_error_status error_bitmap; + __le16 reserved0; + u8 reserved1; + u8 unsol_cqe_type; + struct regpair rqe_opaque; + union iscsi_task_hdr iscsi_hdr; +}; + +union iscsi_cqe { + struct iscsi_cqe_common cqe_common; + struct iscsi_cqe_solicited cqe_solicited; + struct iscsi_cqe_unsolicited cqe_unsolicited; +}; + +enum iscsi_cqes_type { + ISCSI_CQE_TYPE_SOLICITED = 1, + ISCSI_CQE_TYPE_UNSOLICITED, + ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE + , + ISCSI_CQE_TYPE_TASK_CLEANUP, + ISCSI_CQE_TYPE_DUMMY, + MAX_ISCSI_CQES_TYPE +}; + +enum iscsi_cqe_unsolicited_type { + ISCSI_CQE_UNSOLICITED_NONE, + ISCSI_CQE_UNSOLICITED_SINGLE, + ISCSI_CQE_UNSOLICITED_FIRST, + ISCSI_CQE_UNSOLICITED_MIDDLE, + ISCSI_CQE_UNSOLICITED_LAST, + MAX_ISCSI_CQE_UNSOLICITED_TYPE +}; + +struct iscsi_virt_sgl_ctx { + struct regpair sgl_base; + struct regpair dsgl_base; + __le32 sgl_initial_offset; + __le32 dsgl_initial_offset; + __le32 dsgl_curr_offset[2]; +}; + +struct iscsi_sgl_var_params { + u8 sgl_ptr; + u8 dsgl_ptr; + __le16 sge_offset; + __le16 dsge_offset; +}; + +struct iscsi_phys_sgl_ctx { + struct regpair sgl_base; + struct regpair dsgl_base; + u8 sgl_size; + u8 dsgl_size; + __le16 reserved; + struct iscsi_sgl_var_params var_params[2]; +}; + +union iscsi_data_desc_ctx { + struct iscsi_virt_sgl_ctx virt_sgl; + struct iscsi_phys_sgl_ctx phys_sgl; + struct iscsi_cached_sge_ctx cached_sge; +}; + +struct iscsi_debug_modes { + u8 flags; +#define ISCSI_DEBUG_MODES_ASSERT_IF_RX_CONN_ERROR_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RX_CONN_ERROR_SHIFT 0 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_RESET_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_RESET_SHIFT 1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_FIN_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_FIN_SHIFT 2 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_CLEANUP_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_CLEANUP_SHIFT 3 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_SHIFT 4 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_SHIFT 5 +#define ISCSI_DEBUG_MODES_RESERVED0_MASK 0x3 +#define ISCSI_DEBUG_MODES_RESERVED0_SHIFT 6 +}; + +struct iscsi_dif_flags { + u8 flags; +#define ISCSI_DIF_FLAGS_PROT_INTERVAL_SIZE_LOG_MASK 0xF +#define ISCSI_DIF_FLAGS_PROT_INTERVAL_SIZE_LOG_SHIFT 0 +#define ISCSI_DIF_FLAGS_DIF_TO_PEER_MASK 0x1 +#define ISCSI_DIF_FLAGS_DIF_TO_PEER_SHIFT 4 +#define ISCSI_DIF_FLAGS_HOST_INTERFACE_MASK 0x7 +#define ISCSI_DIF_FLAGS_HOST_INTERFACE_SHIFT 5 +}; + +enum iscsi_eqe_opcode { + ISCSI_EVENT_TYPE_INIT_FUNC = 0, + ISCSI_EVENT_TYPE_DESTROY_FUNC, + ISCSI_EVENT_TYPE_OFFLOAD_CONN, + ISCSI_EVENT_TYPE_UPDATE_CONN, + ISCSI_EVENT_TYPE_CLEAR_SQ, + ISCSI_EVENT_TYPE_TERMINATE_CONN, + ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE, + ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE, + RESERVED8, + RESERVED9, + ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10, + ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD, + ISCSI_EVENT_TYPE_ASYN_CLOSE_RCVD, + ISCSI_EVENT_TYPE_ASYN_SYN_RCVD, + ISCSI_EVENT_TYPE_ASYN_MAX_RT_TIME, + ISCSI_EVENT_TYPE_ASYN_MAX_RT_CNT, + ISCSI_EVENT_TYPE_ASYN_MAX_KA_PROBES_CNT, + ISCSI_EVENT_TYPE_ASYN_FIN_WAIT2, + ISCSI_EVENT_TYPE_ISCSI_CONN_ERROR, + ISCSI_EVENT_TYPE_TCP_CONN_ERROR, + ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES, + MAX_ISCSI_EQE_OPCODE +}; + +enum iscsi_error_types { + ISCSI_STATUS_NONE = 0, + ISCSI_CQE_ERROR_UNSOLICITED_RCV_ON_INVALID_CONN = 1, + ISCSI_CONN_ERROR_TASK_CID_MISMATCH, + ISCSI_CONN_ERROR_TASK_NOT_VALID, + ISCSI_CONN_ERROR_RQ_RING_IS_FULL, + ISCSI_CONN_ERROR_CMDQ_RING_IS_FULL, + ISCSI_CONN_ERROR_HQE_CACHING_FAILED, + ISCSI_CONN_ERROR_HEADER_DIGEST_ERROR, + ISCSI_CONN_ERROR_LOCAL_COMPLETION_ERROR, + ISCSI_CONN_ERROR_DATA_OVERRUN, + ISCSI_CONN_ERROR_OUT_OF_SGES_ERROR, + ISCSI_CONN_ERROR_TCP_SEG_PROC_URG_ERROR, + ISCSI_CONN_ERROR_TCP_SEG_PROC_IP_OPTIONS_ERROR, + ISCSI_CONN_ERROR_TCP_SEG_PROC_CONNECT_INVALID_WS_OPTION, + ISCSI_CONN_ERROR_TCP_IP_FRAGMENT_ERROR, + ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_LEN, + ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_TYPE, + ISCSI_CONN_ERROR_PROTOCOL_ERR_ITT_OUT_OF_RANGE, + ISCSI_CONN_ERROR_PROTOCOL_ERR_TTT_OUT_OF_RANGE, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SEG_LEN_EXCEEDS_PDU_SIZE, + ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_OPCODE, + ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_OPCODE_BEFORE_UPDATE, + ISCSI_CONN_ERROR_UNVALID_NOPIN_DSL, + ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_CARRIES_NO_DATA, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SN, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_IN_TTT, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_OUT_ITT, + ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_TTT, + ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_BUFFER_OFFSET, + ISCSI_CONN_ERROR_PROTOCOL_ERR_BUFFER_OFFSET_OOO, + ISCSI_CONN_ERROR_PROTOCOL_ERR_R2T_SN, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_0, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_1, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_2, + ISCSI_CONN_ERROR_PROTOCOL_ERR_LUN, + ISCSI_CONN_ERROR_PROTOCOL_ERR_F_BIT_ZERO, + ISCSI_CONN_ERROR_PROTOCOL_ERR_F_BIT_ZERO_S_BIT_ONE, + ISCSI_CONN_ERROR_PROTOCOL_ERR_EXP_STAT_SN, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DSL_NOT_ZERO, + ISCSI_CONN_ERROR_PROTOCOL_ERR_INVALID_DSL, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DATA_SEG_LEN_TOO_BIG, + ISCSI_CONN_ERROR_PROTOCOL_ERR_OUTSTANDING_R2T_COUNT, + ISCSI_CONN_ERROR_PROTOCOL_ERR_DIF_TX, + ISCSI_CONN_ERROR_SENSE_DATA_LENGTH, + ISCSI_CONN_ERROR_DATA_PLACEMENT_ERROR, + ISCSI_ERROR_UNKNOWN, + MAX_ISCSI_ERROR_TYPES +}; + +struct iscsi_mflags { + u8 mflags; +#define ISCSI_MFLAGS_SLOW_IO_MASK 0x1 +#define ISCSI_MFLAGS_SLOW_IO_SHIFT 0 +#define ISCSI_MFLAGS_SINGLE_SGE_MASK 0x1 +#define ISCSI_MFLAGS_SINGLE_SGE_SHIFT 1 +#define ISCSI_MFLAGS_RESERVED_MASK 0x3F +#define ISCSI_MFLAGS_RESERVED_SHIFT 2 +}; + +struct iscsi_sgl { + struct regpair sgl_addr; + __le16 updated_sge_size; + __le16 updated_sge_offset; + __le32 byte_offset; +}; + +union iscsi_mstorm_sgl { + struct iscsi_sgl sgl_struct; + struct iscsi_sge single_sge; +}; + +enum iscsi_ramrod_cmd_id { + ISCSI_RAMROD_CMD_ID_UNUSED = 0, + ISCSI_RAMROD_CMD_ID_INIT_FUNC = 1, + ISCSI_RAMROD_CMD_ID_DESTROY_FUNC = 2, + ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN = 3, + ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4, + ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5, + ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6, + MAX_ISCSI_RAMROD_CMD_ID +}; + +struct iscsi_reg1 { + __le32 reg1_map; +#define ISCSI_REG1_NUM_FAST_SGES_MASK 0x7 +#define ISCSI_REG1_NUM_FAST_SGES_SHIFT 0 +#define ISCSI_REG1_RESERVED1_MASK 0x1FFFFFFF +#define ISCSI_REG1_RESERVED1_SHIFT 3 +}; + +union iscsi_seq_num { + __le16 data_sn; + __le16 r2t_sn; +}; + +struct iscsi_spe_conn_offload { + struct iscsi_slow_path_hdr hdr; + __le16 conn_id; + __le32 fw_cid; + struct iscsi_conn_offload_params iscsi; + struct tcp_offload_params tcp; +}; + +struct iscsi_spe_conn_offload_option2 { + struct iscsi_slow_path_hdr hdr; + __le16 conn_id; + __le32 fw_cid; + struct iscsi_conn_offload_params iscsi; + struct tcp_offload_params_opt2 tcp; +}; + +struct iscsi_spe_conn_termination { + struct iscsi_slow_path_hdr hdr; + __le16 conn_id; + __le32 fw_cid; + u8 abortive; + u8 reserved0[7]; + struct regpair queue_cnts_addr; + struct regpair query_params_addr; +}; + +struct iscsi_spe_func_dstry { + struct iscsi_slow_path_hdr hdr; + __le16 reserved0; + __le32 reserved1; +}; + +struct iscsi_spe_func_init { + struct iscsi_slow_path_hdr hdr; + __le16 half_way_close_timeout; + u8 num_sq_pages_in_ring; + u8 num_r2tq_pages_in_ring; + u8 num_uhq_pages_in_ring; + u8 ll2_rx_queue_id; + u8 ooo_enable; + struct iscsi_debug_modes debug_mode; + __le16 reserved1; + __le32 reserved2; + __le32 reserved3; + __le32 reserved4; + struct scsi_init_func_params func_params; + struct scsi_init_func_queues q_params; +}; + +struct ystorm_iscsi_task_state { + union iscsi_data_desc_ctx sgl_ctx_union; + __le32 buffer_offset[2]; + __le16 bytes_nxt_dif; + __le16 rxmit_bytes_nxt_dif; + union iscsi_seq_num seq_num_union; + u8 dif_bytes_leftover; + u8 rxmit_dif_bytes_leftover; + __le16 reuse_count; + struct iscsi_dif_flags dif_flags; + u8 local_comp; + __le32 exp_r2t_sn; + __le32 sgl_offset[2]; +}; + +struct ystorm_iscsi_task_st_ctx { + struct ystorm_iscsi_task_state state; + union iscsi_task_hdr pdu_hdr; +}; + +struct ystorm_iscsi_task_ag_ctx { + u8 reserved; + u8 byte1; + __le16 word0; + u8 flags0; +#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK 0xF +#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT 4 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 +#define YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT 7 + u8 flags1; +#define YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK 0x3 +#define YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT 0 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 2 +#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_MASK 0x3 +#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_SHIFT 4 +#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT 6 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 7 + u8 flags2; +#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT 0 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 6 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 7 + u8 byte2; + __le32 TTT; + u8 byte3; + u8 byte4; + __le16 word1; +}; + +struct mstorm_iscsi_task_ag_ctx { + u8 cdu_validation; + u8 byte1; + __le16 task_cid; + u8 flags0; +#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define MSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 +#define MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT 7 + u8 flags1; +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_MASK 0x3 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT 0 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 2 +#define MSTORM_ISCSI_TASK_AG_CTX_CF2_MASK 0x3 +#define MSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT 4 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT 6 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 7 + u8 flags2; +#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT 0 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 6 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 7 + u8 byte2; + __le32 reg0; + u8 byte3; + u8 byte4; + __le16 word1; +}; + +struct ustorm_iscsi_task_ag_ctx { + u8 reserved; + u8 state; + __le16 icid; + u8 flags0; +#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define USTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT 6 + u8 flags1; +#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_SHIFT 2 +#define USTORM_ISCSI_TASK_AG_CTX_CF3_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT 4 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_SHIFT 6 + u8 flags2; +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT 1 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_SHIFT 2 +#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT 3 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT 5 +#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 6 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT 7 + u8 flags3; +#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 2 +#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 3 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4 + __le32 dif_err_intervals; + __le32 dif_error_1st_interval; + __le32 rcv_cont_len; + __le32 exp_cont_len; + __le32 total_data_acked; + __le32 exp_data_acked; + u8 next_tid_valid; + u8 byte3; + __le16 word1; + __le16 next_tid; + __le16 word3; + __le32 hdr_residual_count; + __le32 exp_r2t_sn; +}; + +struct mstorm_iscsi_task_st_ctx { + union iscsi_mstorm_sgl sgl_union; + struct iscsi_dif_flags dif_flags; + struct iscsi_mflags flags; + u8 sgl_size; + u8 host_sge_index; + __le16 dix_cur_sge_offset; + __le16 dix_cur_sge_size; + __le32 data_offset_rtid; + u8 dif_offset; + u8 dix_sgl_size; + u8 dix_sge_index; + u8 task_type; + struct regpair sense_db; + struct regpair dix_sgl_cur_sge; + __le32 rem_task_size; + __le16 reuse_count; + __le16 dif_data_residue; + u8 reserved0[4]; + __le32 reserved1[1]; +}; + +struct ustorm_iscsi_task_st_ctx { + __le32 rem_rcv_len; + __le32 exp_data_transfer_len; + __le32 exp_data_sn; + struct regpair lun; + struct iscsi_reg1 reg1; + u8 flags2; +#define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_SHIFT 0 +#define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_MASK 0x7F +#define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_SHIFT 1 + u8 reserved2; + __le16 reserved3; + __le32 reserved4; + __le32 reserved5; + __le32 reserved6; + __le32 reserved7; + u8 task_type; + u8 error_flags; +#define USTORM_ISCSI_TASK_ST_CTX_DATA_DIGEST_ERROR_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_DATA_DIGEST_ERROR_SHIFT 0 +#define USTORM_ISCSI_TASK_ST_CTX_DATA_TRUNCATED_ERROR_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_DATA_TRUNCATED_ERROR_SHIFT 1 +#define USTORM_ISCSI_TASK_ST_CTX_UNDER_RUN_ERROR_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_UNDER_RUN_ERROR_SHIFT 2 +#define USTORM_ISCSI_TASK_ST_CTX_RESERVED8_MASK 0x1F +#define USTORM_ISCSI_TASK_ST_CTX_RESERVED8_SHIFT 3 + u8 flags; +#define USTORM_ISCSI_TASK_ST_CTX_CQE_WRITE_MASK 0x3 +#define USTORM_ISCSI_TASK_ST_CTX_CQE_WRITE_SHIFT 0 +#define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_SHIFT 2 +#define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_SHIFT 3 +#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_SHIFT 4 +#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_SHIFT 5 +#define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_SHIFT 6 +#define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_SHIFT 7 + u8 cq_rss_number; +}; + +struct iscsi_task_context { + struct ystorm_iscsi_task_st_ctx ystorm_st_context; + struct regpair ystorm_st_padding[2]; + struct ystorm_iscsi_task_ag_ctx ystorm_ag_context; + struct regpair ystorm_ag_padding[2]; + struct tdif_task_context tdif_context; + struct mstorm_iscsi_task_ag_ctx mstorm_ag_context; + struct regpair mstorm_ag_padding[2]; + struct ustorm_iscsi_task_ag_ctx ustorm_ag_context; + struct mstorm_iscsi_task_st_ctx mstorm_st_context; + struct ustorm_iscsi_task_st_ctx ustorm_st_context; + struct rdif_task_context rdif_context; +}; + +enum iscsi_task_type { + ISCSI_TASK_TYPE_INITIATOR_WRITE, + ISCSI_TASK_TYPE_INITIATOR_READ, + ISCSI_TASK_TYPE_MIDPATH, + ISCSI_TASK_TYPE_UNSOLIC, + ISCSI_TASK_TYPE_EXCHCLEANUP, + ISCSI_TASK_TYPE_IRRELEVANT, + ISCSI_TASK_TYPE_TARGET_WRITE, + ISCSI_TASK_TYPE_TARGET_READ, + ISCSI_TASK_TYPE_TARGET_RESPONSE, + ISCSI_TASK_TYPE_LOGIN_RESPONSE, + MAX_ISCSI_TASK_TYPE +}; + +union iscsi_ttt_txlen_union { + __le32 desired_tx_len; + __le32 ttt; +}; + +struct iscsi_uhqe { + __le32 reg1; +#define ISCSI_UHQE_PDU_PAYLOAD_LEN_MASK 0xFFFFF +#define ISCSI_UHQE_PDU_PAYLOAD_LEN_SHIFT 0 +#define ISCSI_UHQE_LOCAL_COMP_MASK 0x1 +#define ISCSI_UHQE_LOCAL_COMP_SHIFT 20 +#define ISCSI_UHQE_TOGGLE_BIT_MASK 0x1 +#define ISCSI_UHQE_TOGGLE_BIT_SHIFT 21 +#define ISCSI_UHQE_PURE_PAYLOAD_MASK 0x1 +#define ISCSI_UHQE_PURE_PAYLOAD_SHIFT 22 +#define ISCSI_UHQE_LOGIN_RESPONSE_PDU_MASK 0x1 +#define ISCSI_UHQE_LOGIN_RESPONSE_PDU_SHIFT 23 +#define ISCSI_UHQE_TASK_ID_HI_MASK 0xFF +#define ISCSI_UHQE_TASK_ID_HI_SHIFT 24 + __le32 reg2; +#define ISCSI_UHQE_BUFFER_OFFSET_MASK 0xFFFFFF +#define ISCSI_UHQE_BUFFER_OFFSET_SHIFT 0 +#define ISCSI_UHQE_TASK_ID_LO_MASK 0xFF +#define ISCSI_UHQE_TASK_ID_LO_SHIFT 24 +}; + +struct iscsi_wqe_field { + __le32 contlen_cdbsize_field; +#define ISCSI_WQE_FIELD_CONT_LEN_MASK 0xFFFFFF +#define ISCSI_WQE_FIELD_CONT_LEN_SHIFT 0 +#define ISCSI_WQE_FIELD_CDB_SIZE_MASK 0xFF +#define ISCSI_WQE_FIELD_CDB_SIZE_SHIFT 24 +}; + +union iscsi_wqe_field_union { + struct iscsi_wqe_field cont_field; + __le32 prev_tid; +}; + +struct iscsi_wqe { + __le16 task_id; + u8 flags; +#define ISCSI_WQE_WQE_TYPE_MASK 0x7 +#define ISCSI_WQE_WQE_TYPE_SHIFT 0 +#define ISCSI_WQE_NUM_FAST_SGES_MASK 0x7 +#define ISCSI_WQE_NUM_FAST_SGES_SHIFT 3 +#define ISCSI_WQE_PTU_INVALIDATE_MASK 0x1 +#define ISCSI_WQE_PTU_INVALIDATE_SHIFT 6 +#define ISCSI_WQE_RESPONSE_MASK 0x1 +#define ISCSI_WQE_RESPONSE_SHIFT 7 + struct iscsi_dif_flags prot_flags; + union iscsi_wqe_field_union cont_prevtid_union; +}; + +enum iscsi_wqe_type { + ISCSI_WQE_TYPE_NORMAL, + ISCSI_WQE_TYPE_TASK_CLEANUP, + ISCSI_WQE_TYPE_MIDDLE_PATH, + ISCSI_WQE_TYPE_LOGIN, + ISCSI_WQE_TYPE_FIRST_R2T_CONT, + ISCSI_WQE_TYPE_NONFIRST_R2T_CONT, + ISCSI_WQE_TYPE_RESPONSE, + MAX_ISCSI_WQE_TYPE +}; + +struct iscsi_xhqe { + union iscsi_ttt_txlen_union ttt_or_txlen; + __le32 exp_stat_sn; + struct iscsi_dif_flags prot_flags; + u8 total_ahs_length; + u8 opcode; + u8 flags; +#define ISCSI_XHQE_NUM_FAST_SGES_MASK 0x7 +#define ISCSI_XHQE_NUM_FAST_SGES_SHIFT 0 +#define ISCSI_XHQE_FINAL_MASK 0x1 +#define ISCSI_XHQE_FINAL_SHIFT 3 +#define ISCSI_XHQE_SUPER_IO_MASK 0x1 +#define ISCSI_XHQE_SUPER_IO_SHIFT 4 +#define ISCSI_XHQE_STATUS_BIT_MASK 0x1 +#define ISCSI_XHQE_STATUS_BIT_SHIFT 5 +#define ISCSI_XHQE_RESERVED_MASK 0x3 +#define ISCSI_XHQE_RESERVED_SHIFT 6 + union iscsi_seq_num seq_num_union; + __le16 reserved1; +}; + +struct mstorm_iscsi_stats_drv { + struct regpair iscsi_rx_dropped_pdus_task_not_valid; +}; + +struct ooo_opaque { + __le32 cid; + u8 drop_isle; + u8 drop_size; + u8 ooo_opcode; + u8 ooo_isle; +}; + +struct pstorm_iscsi_stats_drv { + struct regpair iscsi_tx_bytes_cnt; + struct regpair iscsi_tx_packet_cnt; +}; + +struct tstorm_iscsi_stats_drv { + struct regpair iscsi_rx_bytes_cnt; + struct regpair iscsi_rx_packet_cnt; + struct regpair iscsi_rx_new_ooo_isle_events_cnt; + __le32 iscsi_cmdq_threshold_cnt; + __le32 iscsi_rq_threshold_cnt; + __le32 iscsi_immq_threshold_cnt; +}; + +struct ustorm_iscsi_stats_drv { + struct regpair iscsi_rx_data_pdu_cnt; + struct regpair iscsi_rx_r2t_pdu_cnt; + struct regpair iscsi_rx_total_pdu_cnt; +}; + +struct xstorm_iscsi_stats_drv { + struct regpair iscsi_tx_go_to_slow_start_event_cnt; + struct regpair iscsi_tx_fast_retransmit_event_cnt; +}; + +struct ystorm_iscsi_stats_drv { + struct regpair iscsi_tx_data_pdu_cnt; + struct regpair iscsi_tx_r2t_pdu_cnt; + struct regpair iscsi_tx_total_pdu_cnt; +}; + +struct iscsi_db_data { + u8 params; +#define ISCSI_DB_DATA_DEST_MASK 0x3 +#define ISCSI_DB_DATA_DEST_SHIFT 0 +#define ISCSI_DB_DATA_AGG_CMD_MASK 0x3 +#define ISCSI_DB_DATA_AGG_CMD_SHIFT 2 +#define ISCSI_DB_DATA_BYPASS_EN_MASK 0x1 +#define ISCSI_DB_DATA_BYPASS_EN_SHIFT 4 +#define ISCSI_DB_DATA_RESERVED_MASK 0x1 +#define ISCSI_DB_DATA_RESERVED_SHIFT 5 +#define ISCSI_DB_DATA_AGG_VAL_SEL_MASK 0x3 +#define ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT 6 + u8 agg_flags; + __le16 sq_prod; +}; + +struct tstorm_iscsi_task_ag_ctx { + u8 byte0; + u8 byte1; + __le16 word0; + u8 flags0; +#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK 0xF +#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT 7 + u8 flags1; +#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_SHIFT 1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT 6 + u8 flags2; +#define TSTORM_ISCSI_TASK_AG_CTX_CF3_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_CF6_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF6_SHIFT 6 + u8 flags3; +#define TSTORM_ISCSI_TASK_AG_CTX_CF7_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF7_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT 5 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_SHIFT 7 + u8 flags4; +#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_SHIFT 1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 3 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 5 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 7 + u8 byte2; + __le16 word1; + __le32 reg0; + u8 byte3; + u8 byte4; + __le16 word2; + __le16 word3; + __le16 word4; + __le32 reg1; + __le32 reg2; +}; + +#endif /* __ISCSI_COMMON__ */ diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h new file mode 100644 index 000000000000..187991c1f439 --- /dev/null +++ b/include/linux/qed/rdma_common.h @@ -0,0 +1,44 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __RDMA_COMMON__ +#define __RDMA_COMMON__ +/************************/ +/* RDMA FW CONSTANTS */ +/************************/ + +#define RDMA_RESERVED_LKEY (0) +#define RDMA_RING_PAGE_SIZE (0x1000) + +#define RDMA_MAX_SGE_PER_SQ_WQE (4) +#define RDMA_MAX_SGE_PER_RQ_WQE (4) + +#define RDMA_MAX_DATA_SIZE_IN_WQE (0x7FFFFFFF) + +#define RDMA_REQ_RD_ATOMIC_ELM_SIZE (0x50) +#define RDMA_RESP_RD_ATOMIC_ELM_SIZE (0x20) + +#define RDMA_MAX_CQS (64 * 1024) +#define RDMA_MAX_TIDS (128 * 1024 - 1) +#define RDMA_MAX_PDS (64 * 1024) + +#define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS + +#define RDMA_TASK_TYPE (PROTOCOLID_ROCE) + +struct rdma_srq_id { + __le16 srq_idx; + __le16 opaque_fid; +}; + +struct rdma_srq_producers { + __le32 sge_prod; + __le32 wqe_prod; +}; + +#endif /* __RDMA_COMMON__ */ diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h new file mode 100644 index 000000000000..2eeaf3dc6646 --- /dev/null +++ b/include/linux/qed/roce_common.h @@ -0,0 +1,17 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __ROCE_COMMON__ +#define __ROCE_COMMON__ + +#define ROCE_REQ_MAX_INLINE_DATA_SIZE (256) +#define ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE (288) + +#define ROCE_MAX_QPS (32 * 1024) + +#endif /* __ROCE_COMMON__ */ diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h new file mode 100644 index 000000000000..3b8e1efd9bc2 --- /dev/null +++ b/include/linux/qed/storage_common.h @@ -0,0 +1,91 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __STORAGE_COMMON__ +#define __STORAGE_COMMON__ + +#define NUM_OF_CMDQS_CQS (NUM_OF_GLOBAL_QUEUES / 2) +#define BDQ_NUM_RESOURCES (4) + +#define BDQ_ID_RQ (0) +#define BDQ_ID_IMM_DATA (1) +#define BDQ_NUM_IDS (2) + +#define BDQ_MAX_EXTERNAL_RING_SIZE (1 << 15) + +struct scsi_bd { + struct regpair address; + struct regpair opaque; +}; + +struct scsi_bdq_ram_drv_data { + __le16 external_producer; + __le16 reserved0[3]; +}; + +struct scsi_drv_cmdq { + __le16 cmdq_cons; + __le16 reserved0; + __le32 reserved1; +}; + +struct scsi_init_func_params { + __le16 num_tasks; + u8 log_page_size; + u8 debug_mode; + u8 reserved2[12]; +}; + +struct scsi_init_func_queues { + struct regpair glbl_q_params_addr; + __le16 rq_buffer_size; + __le16 cq_num_entries; + __le16 cmdq_num_entries; + u8 bdq_resource_id; + u8 q_validity; +#define SCSI_INIT_FUNC_QUEUES_RQ_VALID_MASK 0x1 +#define SCSI_INIT_FUNC_QUEUES_RQ_VALID_SHIFT 0 +#define SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID_MASK 0x1 +#define SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID_SHIFT 1 +#define SCSI_INIT_FUNC_QUEUES_CMD_VALID_MASK 0x1 +#define SCSI_INIT_FUNC_QUEUES_CMD_VALID_SHIFT 2 +#define SCSI_INIT_FUNC_QUEUES_RESERVED_VALID_MASK 0x1F +#define SCSI_INIT_FUNC_QUEUES_RESERVED_VALID_SHIFT 3 + u8 num_queues; + u8 queue_relative_offset; + u8 cq_sb_pi; + u8 cmdq_sb_pi; + __le16 cq_cmdq_sb_num_arr[NUM_OF_CMDQS_CQS]; + __le16 reserved0; + u8 bdq_pbl_num_entries[BDQ_NUM_IDS]; + struct regpair bdq_pbl_base_address[BDQ_NUM_IDS]; + __le16 bdq_xoff_threshold[BDQ_NUM_IDS]; + __le16 bdq_xon_threshold[BDQ_NUM_IDS]; + __le16 cmdq_xoff_threshold; + __le16 cmdq_xon_threshold; + __le32 reserved1; +}; + +struct scsi_ram_per_bdq_resource_drv_data { + struct scsi_bdq_ram_drv_data drv_data_per_bdq_id[BDQ_NUM_IDS]; +}; + +struct scsi_sge { + struct regpair sge_addr; + __le16 sge_len; + __le16 reserved0; + __le32 reserved1; +}; + +struct scsi_terminate_extra_params { + __le16 unsolicited_cq_count; + __le16 cmdq_count; + u8 reserved[4]; +}; + +#endif /* __STORAGE_COMMON__ */ diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h new file mode 100644 index 000000000000..accba0e6b704 --- /dev/null +++ b/include/linux/qed/tcp_common.h @@ -0,0 +1,226 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __TCP_COMMON__ +#define __TCP_COMMON__ + +#define TCP_INVALID_TIMEOUT_VAL -1 + +enum tcp_connect_mode { + TCP_CONNECT_ACTIVE, + TCP_CONNECT_PASSIVE, + MAX_TCP_CONNECT_MODE +}; + +struct tcp_init_params { + __le32 max_cwnd; + __le16 dup_ack_threshold; + __le16 tx_sws_timer; + __le16 min_rto; + __le16 min_rto_rt; + __le16 max_rto; + u8 maxfinrt; + u8 reserved[1]; +}; + +enum tcp_ip_version { + TCP_IPV4, + TCP_IPV6, + MAX_TCP_IP_VERSION +}; + +struct tcp_offload_params { + __le16 local_mac_addr_lo; + __le16 local_mac_addr_mid; + __le16 local_mac_addr_hi; + __le16 remote_mac_addr_lo; + __le16 remote_mac_addr_mid; + __le16 remote_mac_addr_hi; + __le16 vlan_id; + u8 flags; +#define TCP_OFFLOAD_PARAMS_TS_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_TS_EN_SHIFT 0 +#define TCP_OFFLOAD_PARAMS_DA_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_DA_EN_SHIFT 1 +#define TCP_OFFLOAD_PARAMS_KA_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_KA_EN_SHIFT 2 +#define TCP_OFFLOAD_PARAMS_NAGLE_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_NAGLE_EN_SHIFT 3 +#define TCP_OFFLOAD_PARAMS_DA_CNT_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_DA_CNT_EN_SHIFT 4 +#define TCP_OFFLOAD_PARAMS_FIN_SENT_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_FIN_SENT_SHIFT 5 +#define TCP_OFFLOAD_PARAMS_FIN_RECEIVED_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_FIN_RECEIVED_SHIFT 6 +#define TCP_OFFLOAD_PARAMS_RESERVED0_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_RESERVED0_SHIFT 7 + u8 ip_version; + __le32 remote_ip[4]; + __le32 local_ip[4]; + __le32 flow_label; + u8 ttl; + u8 tos_or_tc; + __le16 remote_port; + __le16 local_port; + __le16 mss; + u8 rcv_wnd_scale; + u8 connect_mode; + __le16 srtt; + __le32 cwnd; + __le32 ss_thresh; + __le16 reserved1; + u8 ka_max_probe_cnt; + u8 dup_ack_theshold; + __le32 rcv_next; + __le32 snd_una; + __le32 snd_next; + __le32 snd_max; + __le32 snd_wnd; + __le32 rcv_wnd; + __le32 snd_wl1; + __le32 ts_time; + __le32 ts_recent; + __le32 ts_recent_age; + __le32 total_rt; + __le32 ka_timeout_delta; + __le32 rt_timeout_delta; + u8 dup_ack_cnt; + u8 snd_wnd_probe_cnt; + u8 ka_probe_cnt; + u8 rt_cnt; + __le16 rtt_var; + __le16 reserved2; + __le32 ka_timeout; + __le32 ka_interval; + __le32 max_rt_time; + __le32 initial_rcv_wnd; + u8 snd_wnd_scale; + u8 ack_frequency; + __le16 da_timeout_value; + __le32 ts_ticks_per_second; +}; + +struct tcp_offload_params_opt2 { + __le16 local_mac_addr_lo; + __le16 local_mac_addr_mid; + __le16 local_mac_addr_hi; + __le16 remote_mac_addr_lo; + __le16 remote_mac_addr_mid; + __le16 remote_mac_addr_hi; + __le16 vlan_id; + u8 flags; +#define TCP_OFFLOAD_PARAMS_OPT2_TS_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_OPT2_TS_EN_SHIFT 0 +#define TCP_OFFLOAD_PARAMS_OPT2_DA_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_OPT2_DA_EN_SHIFT 1 +#define TCP_OFFLOAD_PARAMS_OPT2_KA_EN_MASK 0x1 +#define TCP_OFFLOAD_PARAMS_OPT2_KA_EN_SHIFT 2 +#define TCP_OFFLOAD_PARAMS_OPT2_RESERVED0_MASK 0x1F +#define TCP_OFFLOAD_PARAMS_OPT2_RESERVED0_SHIFT 3 + u8 ip_version; + __le32 remote_ip[4]; + __le32 local_ip[4]; + __le32 flow_label; + u8 ttl; + u8 tos_or_tc; + __le16 remote_port; + __le16 local_port; + __le16 mss; + u8 rcv_wnd_scale; + u8 connect_mode; + __le16 syn_ip_payload_length; + __le32 syn_phy_addr_lo; + __le32 syn_phy_addr_hi; + __le32 reserved1[22]; +}; + +enum tcp_seg_placement_event { + TCP_EVENT_ADD_PEN, + TCP_EVENT_ADD_NEW_ISLE, + TCP_EVENT_ADD_ISLE_RIGHT, + TCP_EVENT_ADD_ISLE_LEFT, + TCP_EVENT_JOIN, + TCP_EVENT_NOP, + MAX_TCP_SEG_PLACEMENT_EVENT +}; + +struct tcp_update_params { + __le16 flags; +#define TCP_UPDATE_PARAMS_REMOTE_MAC_ADDR_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_REMOTE_MAC_ADDR_CHANGED_SHIFT 0 +#define TCP_UPDATE_PARAMS_MSS_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_MSS_CHANGED_SHIFT 1 +#define TCP_UPDATE_PARAMS_TTL_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_TTL_CHANGED_SHIFT 2 +#define TCP_UPDATE_PARAMS_TOS_OR_TC_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_TOS_OR_TC_CHANGED_SHIFT 3 +#define TCP_UPDATE_PARAMS_KA_TIMEOUT_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_KA_TIMEOUT_CHANGED_SHIFT 4 +#define TCP_UPDATE_PARAMS_KA_INTERVAL_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_KA_INTERVAL_CHANGED_SHIFT 5 +#define TCP_UPDATE_PARAMS_MAX_RT_TIME_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_MAX_RT_TIME_CHANGED_SHIFT 6 +#define TCP_UPDATE_PARAMS_FLOW_LABEL_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_FLOW_LABEL_CHANGED_SHIFT 7 +#define TCP_UPDATE_PARAMS_INITIAL_RCV_WND_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_INITIAL_RCV_WND_CHANGED_SHIFT 8 +#define TCP_UPDATE_PARAMS_KA_MAX_PROBE_CNT_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_KA_MAX_PROBE_CNT_CHANGED_SHIFT 9 +#define TCP_UPDATE_PARAMS_KA_EN_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_KA_EN_CHANGED_SHIFT 10 +#define TCP_UPDATE_PARAMS_NAGLE_EN_CHANGED_MASK 0x1 +#define TCP_UPDATE_PARAMS_NAGLE_EN_CHANGED_SHIFT 11 +#define TCP_UPDATE_PARAMS_KA_EN_MASK 0x1 +#define TCP_UPDATE_PARAMS_KA_EN_SHIFT 12 +#define TCP_UPDATE_PARAMS_NAGLE_EN_MASK 0x1 +#define TCP_UPDATE_PARAMS_NAGLE_EN_SHIFT 13 +#define TCP_UPDATE_PARAMS_KA_RESTART_MASK 0x1 +#define TCP_UPDATE_PARAMS_KA_RESTART_SHIFT 14 +#define TCP_UPDATE_PARAMS_RETRANSMIT_RESTART_MASK 0x1 +#define TCP_UPDATE_PARAMS_RETRANSMIT_RESTART_SHIFT 15 + __le16 remote_mac_addr_lo; + __le16 remote_mac_addr_mid; + __le16 remote_mac_addr_hi; + __le16 mss; + u8 ttl; + u8 tos_or_tc; + __le32 ka_timeout; + __le32 ka_interval; + __le32 max_rt_time; + __le32 flow_label; + __le32 initial_rcv_wnd; + u8 ka_max_probe_cnt; + u8 reserved1[7]; +}; + +struct tcp_upload_params { + __le32 rcv_next; + __le32 snd_una; + __le32 snd_next; + __le32 snd_max; + __le32 snd_wnd; + __le32 rcv_wnd; + __le32 snd_wl1; + __le32 cwnd; + __le32 ss_thresh; + __le16 srtt; + __le16 rtt_var; + __le32 ts_time; + __le32 ts_recent; + __le32 ts_recent_age; + __le32 total_rt; + __le32 ka_timeout_delta; + __le32 rt_timeout_delta; + u8 dup_ack_cnt; + u8 snd_wnd_probe_cnt; + u8 ka_probe_cnt; + u8 rt_cnt; + __le32 reserved; +}; + +#endif /* __TCP_COMMON__ */ -- cgit From c5ac93191d7e6977c5c3465ac94c73ebb8a8ecba Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 3 Jun 2016 14:35:34 +0300 Subject: qed: Add iscsi/rdma personalities This patch adds in the ecore 2 new personalities in addition to QED_PCI_ETH - QED_PCI_ISCSI and QED_PCI_ETH_ROCE. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 4 +-- include/linux/qed/qed_if.h | 65 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 077a3b6cc80f..40c0ada01806 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -517,9 +517,9 @@ enum mf_mode { /* Per-protocol connection types */ enum protocol_type { - PROTOCOLID_RESERVED1, + PROTOCOLID_ISCSI, PROTOCOLID_RESERVED2, - PROTOCOLID_RESERVED3, + PROTOCOLID_ROCE, PROTOCOLID_CORE, PROTOCOLID_ETH, PROTOCOLID_RESERVED4, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 15efccfdc46e..e8cc49f9688a 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -58,8 +58,70 @@ struct qed_eth_pf_params { u16 num_cons; }; +/* Most of the the parameters below are described in the FW iSCSI / TCP HSI */ +struct qed_iscsi_pf_params { + u64 glbl_q_params_addr; + u64 bdq_pbl_base_addr[2]; + u32 max_cwnd; + u16 cq_num_entries; + u16 cmdq_num_entries; + u16 dup_ack_threshold; + u16 tx_sws_timer; + u16 min_rto; + u16 min_rto_rt; + u16 max_rto; + + /* The following parameters are used during HW-init + * and these parameters need to be passed as arguments + * to update_pf_params routine invoked before slowpath start + */ + u16 num_cons; + u16 num_tasks; + + /* The following parameters are used during protocol-init */ + u16 half_way_close_timeout; + u16 bdq_xoff_threshold[2]; + u16 bdq_xon_threshold[2]; + u16 cmdq_xoff_threshold; + u16 cmdq_xon_threshold; + u16 rq_buffer_size; + + u8 num_sq_pages_in_ring; + u8 num_r2tq_pages_in_ring; + u8 num_uhq_pages_in_ring; + u8 num_queues; + u8 log_page_size; + u8 rqe_log_size; + u8 max_fin_rt; + u8 gl_rq_pi; + u8 gl_cmd_pi; + u8 debug_mode; + u8 ll2_ooo_queue_id; + u8 ooo_enable; + + u8 is_target; + u8 bdq_pbl_num_entries[2]; +}; + +struct qed_rdma_pf_params { + /* Supplied to QED during resource allocation (may affect the ILT and + * the doorbell BAR). + */ + u32 min_dpis; /* number of requested DPIs */ + u32 num_mrs; /* number of requested memory regions */ + u32 num_qps; /* number of requested Queue Pairs */ + u32 num_srqs; /* number of requested SRQ */ + u8 roce_edpm_mode; /* see QED_ROCE_EDPM_MODE_ENABLE */ + u8 gl_pi; /* protocol index */ + + /* Will allocate rate limiters to be used with QPs */ + u8 enable_dcqcn; +}; + struct qed_pf_params { struct qed_eth_pf_params eth_pf_params; + struct qed_iscsi_pf_params iscsi_pf_params; + struct qed_rdma_pf_params rdma_pf_params; }; enum qed_int_mode { @@ -100,6 +162,8 @@ struct qed_dev_info { /* MFW version */ u32 mfw_rev; + bool rdma_supported; + u32 flash_size; u8 mf_mode; bool tx_switching; @@ -111,6 +175,7 @@ enum qed_sb_type { enum qed_protocol { QED_PROTOCOL_ETH, + QED_PROTOCOL_ISCSI, }; struct qed_link_params { -- cgit From 81b1251d3b761b303955408dd6a49618b5683c2b Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sat, 4 Jun 2016 08:20:16 +0300 Subject: qed: Fix next-ptr chains for BE / 32-bit Commit a91eb52abb50 ("qed: Revisit chain implementation") contains an incorrect implementation for BE platforms, as device's regpairs containing addresses are LE and they're not converted correctly when read back. In addition, it raises a compilation warning for 32-bit platforms where dma_addr_t is a 32-bit variable. Reported-by: kbuild test robot Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index eceaa9ed2ae9..7e441bdeabdc 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -25,10 +25,9 @@ } while (0) #define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) -#define HILO_DMA(hi, lo) HILO_GEN(hi, lo, dma_addr_t) #define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) -#define HILO_DMA_REGPAIR(regpair) (HILO_DMA(regpair.hi, regpair.lo)) #define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) +#define HILO_DMA_REGPAIR(regpair) ((dma_addr_t)HILO_64_REGPAIR(regpair)) enum qed_chain_mode { /* Each Page contains a next pointer at its end */ -- cgit From c8b098086b4c744084350d2757a637ad756adf34 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Jun 2016 21:16:57 +0200 Subject: net: dsa: Add a ports structure and use it in the switch structure There are going to be more per-port members added to the switch structure. So add a port structure and move the netdev into it. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 17c3d37b6779..9aed8572037c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -119,6 +119,10 @@ struct dsa_switch_tree { struct dsa_switch *ds[DSA_MAX_SWITCHES]; }; +struct dsa_port { + struct net_device *netdev; +}; + struct dsa_switch { struct device *dev; @@ -158,8 +162,8 @@ struct dsa_switch { u32 dsa_port_mask; u32 enabled_port_mask; u32 phys_mii_mask; + struct dsa_port ports[DSA_MAX_PORTS]; struct mii_bus *slave_mii_bus; - struct net_device *ports[DSA_MAX_PORTS]; }; static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) @@ -174,7 +178,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { - return ds->enabled_port_mask & (1 << p) && ds->ports[p]; + return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; } static inline u8 dsa_upstream_port(struct dsa_switch *ds) -- cgit From 189b0d93ec61e1f991e96d7bc03b03cf929d164c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Jun 2016 21:16:58 +0200 Subject: net: dsa: Move port device node into port structure Move the port device node structure into the port structure, from the chip data. This information is needed in the next step of implementing the new binding. The chip data structure is used while parsing the whole old binding, before the individual switch structures exist. With the new bindings, this is reversed, the switches exist first, and the interconnections between the switches is derived from the individual switch bindings. Thus this chip data structure becomes unneeded. Signed-off-by: Andrew Lunn eviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 9aed8572037c..8314197d028f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -121,6 +121,7 @@ struct dsa_switch_tree { struct dsa_port { struct net_device *netdev; + struct device_node *dn; }; struct dsa_switch { -- cgit From 4a7704ffa86705b0580b6473c407b7b7618e072d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Jun 2016 21:16:59 +0200 Subject: net: dsa: Remove dynamic allocate of routing table With a maximum of four switches, the size of the routing table is the same as the pointer to it. Removing it makes the code simpler. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 8314197d028f..4e3afa9648ca 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -58,12 +58,11 @@ struct dsa_chip_data { struct device_node *port_dn[DSA_MAX_PORTS]; /* - * An array (with nr_chips elements) of which element [a] - * indicates which port on this switch should be used to - * send packets to that are destined for switch a. Can be - * NULL if there is only one switch chip. + * An array of which element [a] indicates which port on this + * switch should be used to send packets to that are destined + * for switch a. Can be NULL if there is only one switch chip. */ - s8 *rtable; + s8 rtable[DSA_MAX_SWITCHES]; }; struct dsa_platform_data { -- cgit From 66472fc04e8be62858f29c7798ed17e984c1ab3b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Jun 2016 21:17:00 +0200 Subject: net: dsa: Copy the routing table into the switch structure The new binding will not have a chip data structure, it will place the routing directly into the switch structure. To enable backwards compatibility, copy the routing from the chip data into the switch structure. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4e3afa9648ca..b666f27b3daa 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -148,6 +148,13 @@ struct dsa_switch { */ struct dsa_switch_driver *drv; + /* + * An array of which element [a] indicates which port on this + * switch should be used to send packets to that are destined + * for switch a. Can be NULL if there is only one switch chip. + */ + s8 rtable[DSA_MAX_SWITCHES]; + #ifdef CONFIG_NET_DSA_HWMON /* * Hardware monitoring information @@ -194,7 +201,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) if (dst->cpu_switch == ds->index) return dst->cpu_port; else - return ds->cd->rtable[dst->cpu_switch]; + return ds->rtable[dst->cpu_switch]; } struct switchdev_trans; -- cgit From 39a7f2a4eb496c0c68cc93fcb403190b48605168 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Jun 2016 21:17:03 +0200 Subject: net: dsa: Refactor selection of tag ops into a function Replace the two switch statements with an array lookup, and store the result in the dsa tree structure. The drivers no longer need to know the selected tag protocol, so remove it from the dsa switch structure. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index b666f27b3daa..bd6ecaa0de96 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -26,6 +26,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_BRCM, + DSA_TAG_LAST, /* MUST BE LAST */ }; #define DSA_MAX_SWITCHES 4 @@ -99,7 +100,6 @@ struct dsa_switch_tree { struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); - enum dsa_tag_protocol tag_protocol; /* * Original copy of the master netdev ethtool_ops @@ -116,6 +116,12 @@ struct dsa_switch_tree { * Data for the individual switch chips. */ struct dsa_switch *ds[DSA_MAX_SWITCHES]; + + /* + * Tagging protocol operations for adding and removing an + * encapsulation tag. + */ + const struct dsa_device_ops *tag_ops; }; struct dsa_port { -- cgit From 83c0afaec7b730b16c518aecc8e6246ec91b265e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Jun 2016 21:17:07 +0200 Subject: net: dsa: Add new binding implementation The existing DSA binding has a number of limitations and problems. The main problem is that it cannot represent a switch as a linux device, hanging off some bus. It is limited to one CPU port. The DSA platform device is artificial, and does not really represent hardware. Implement a new binding which can be embedded into any type of node on a bus to represent one switch device, and its links to other switches. Signed-off-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index bd6ecaa0de96..cca7ef230742 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -85,6 +85,17 @@ struct dsa_platform_data { struct packet_type; struct dsa_switch_tree { + struct list_head list; + + /* Tree identifier */ + u32 tree; + + /* Number of switches attached to this tree */ + struct kref refcount; + + /* Has this tree been applied to the hardware? */ + bool applied; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. @@ -169,10 +180,16 @@ struct dsa_switch { struct device *hwmon_dev; #endif + /* + * The lower device this switch uses to talk to the host + */ + struct net_device *master_netdev; + /* * Slave mii_bus and devices for the individual ports. */ u32 dsa_port_mask; + u32 cpu_port_mask; u32 enabled_port_mask; u32 phys_mii_mask; struct dsa_port ports[DSA_MAX_PORTS]; @@ -361,4 +378,7 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) { return dst->rcv != NULL; } + +void dsa_unregister_switch(struct dsa_switch *ds); +int dsa_register_switch(struct dsa_switch *ds, struct device_node *np); #endif -- cgit From 6eb17e0df74c036eba7548915b37f009403fe09e Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Fri, 3 Jun 2016 10:55:09 +0800 Subject: ACPI: bus: add stub acpi_dev_found() to linux/acpi.h acpi_dev_found() will be used to detect if a given ACPI device is in the system. It will be compiled in non-ACPI case, but the function is in acpi_bus.h and acpi_bus.h can only be used in ACPI case, so this patch add the stub function to linux/acpi.h to make compiled successfully in non-ACPI cases. Cc: Rafael J. Wysocki Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 288fac5294f5..3025d1930f6e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -543,6 +543,11 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *); struct fwnode_handle; +static inline bool acpi_dev_found(const char *hid) +{ + return false; +} + static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; -- cgit From 4ae399241adba66ad72e5973a1004f37ffbe67cd Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Fri, 3 Jun 2016 10:55:10 +0800 Subject: ACPI: bus: add stub acpi_evaluate_dsm() to linux/acpi.h acpi_evaluate_dsm() will be used to handle the _DSM method in ACPI case. It will be compiled in non-ACPI case, but the function is in acpi_bus.h and acpi_bus.h can only be used in ACPI case, so this patch add the stub function to linux/acpi.h to make compiled successfully in non-ACPI cases. Cc: Rafael J. Wysocki Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- include/linux/acpi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3025d1930f6e..4d4bb4955682 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -659,6 +659,14 @@ static inline bool acpi_driver_match_device(struct device *dev, return false; } +static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, + const u8 *uuid, + int rev, int func, + union acpi_object *argv4) +{ + return NULL; +} + static inline int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) { -- cgit From 14de9d114a82a564b94388c95af79a701dc93134 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Fri, 3 Jun 2016 16:57:12 -0400 Subject: virtio-net: Add initial MTU advice feature This commit adds the feature bit and associated mtu device entry for the virtio network device. When a virtio device comes up, it checks the feature bit for the VIRTIO_NET_F_MTU feature. If such feature bit is enabled, the driver will read the advised MTU and use it as the initial value. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- include/uapi/linux/virtio_net.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index ec32293a00db..1ab4ea6ec847 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -55,6 +55,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_MTU 25 /* Initial MTU advice */ #ifndef VIRTIO_NET_NO_LEGACY #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ @@ -73,6 +74,8 @@ struct virtio_net_config { * Legal values are between 1 and 0x8000 */ __u16 max_virtqueue_pairs; + /* Default maximum transmit unit advice */ + __u16 mtu; } __attribute__((packed)); /* -- cgit From 53eb440f4ada034ea43b295891feec3df0fa7a29 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 6 Jun 2016 06:32:54 -0400 Subject: net sched actions: introduce timestamp for firsttime use Useful to know when the action was first used for accounting (and debugging) Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 2 ++ include/uapi/linux/pkt_cls.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 9a9a8edc138f..8389c007076f 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -76,6 +76,8 @@ static inline void tcf_lastuse_update(struct tcf_t *tm) if (tm->lastuse != now) tm->lastuse = now; + if (unlikely(!tm->firstuse)) + tm->firstuse = now; } struct tc_action { diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index f4297c8a42fe..9ba1410bd21d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -124,6 +124,7 @@ struct tcf_t { __u64 install; __u64 lastuse; __u64 expires; + __u64 firstuse; }; struct tc_cnt { -- cgit From 48d8ee1694dd1ab25614b58f968123a4598f887e Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 6 Jun 2016 06:32:55 -0400 Subject: net sched actions: aggregate dumping of actions timeinfo Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8389c007076f..a891978310e9 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -80,6 +80,14 @@ static inline void tcf_lastuse_update(struct tcf_t *tm) tm->firstuse = now; } +static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) +{ + dtm->install = jiffies_to_clock_t(jiffies - stm->install); + dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse); + dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse); + dtm->expires = jiffies_to_clock_t(stm->expires); +} + struct tc_action { void *priv; const struct tc_action_ops *ops; -- cgit From 0b0f43fe2e7291aa97b1febeaa5a0de453d007ca Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 5 Jun 2016 10:41:32 -0400 Subject: net sched: indentation and other OCD stylistic fixes Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang --- include/net/act_api.h | 14 ++++++++------ include/net/tc_act/tc_defact.h | 4 ++-- include/uapi/linux/pkt_cls.h | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index a891978310e9..db218a12efb5 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -2,8 +2,8 @@ #define __NET_ACT_API_H /* - * Public police action API for classifiers/qdiscs - */ + * Public action API for classifiers/qdiscs +*/ #include #include @@ -107,7 +107,8 @@ struct tc_action_ops { char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ struct module *owner; - int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); + int (*act)(struct sk_buff *, const struct tc_action *, + struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *, int bind); int (*lookup)(struct net *, struct tc_action *, u32); @@ -125,8 +126,8 @@ struct tc_action_net { }; static inline -int tc_action_net_init(struct tc_action_net *tn, const struct tc_action_ops *ops, - unsigned int mask) +int tc_action_net_init(struct tc_action_net *tn, + const struct tc_action_ops *ops, unsigned int mask) { int err = 0; @@ -169,7 +170,8 @@ static inline int tcf_hash_release(struct tc_action *a, bool bind) } int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); -int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops); +int tcf_unregister_action(struct tc_action_ops *a, + struct pernet_operations *ops); int tcf_action_destroy(struct list_head *actions, int bind); int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, struct tcf_result *res); diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h index 9763dcbb9bc3..ab9b5d6be67b 100644 --- a/include/net/tc_act/tc_defact.h +++ b/include/net/tc_act/tc_defact.h @@ -5,8 +5,8 @@ struct tcf_defact { struct tcf_common common; - u32 tcfd_datalen; - void *tcfd_defdata; + u32 tcfd_datalen; + void *tcfd_defdata; }; #define to_defact(a) \ container_of(a->priv, struct tcf_defact, common) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 9ba1410bd21d..5702e933dc07 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -115,8 +115,8 @@ struct tc_police { __u32 mtu; struct tc_ratespec rate; struct tc_ratespec peakrate; - int refcnt; - int bindcnt; + int refcnt; + int bindcnt; __u32 capab; }; @@ -128,7 +128,7 @@ struct tcf_t { }; struct tc_cnt { - int refcnt; + int refcnt; int bindcnt; }; -- cgit From 68f047e3d62ebfac24ff9be551476cf30eafb00e Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Mon, 6 Jun 2016 14:29:58 +0800 Subject: fsl/qe: add rx_sync and tx_sync for TDM mode Rx_sync and tx_sync are used by QE-TDM mode, add them to struct ucc_fast_info. Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- include/soc/fsl/qe/qe.h | 2 ++ include/soc/fsl/qe/ucc_fast.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index 33b29ead3d55..f91874582ab3 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -80,6 +80,8 @@ enum qe_clock { QE_CLK22, /* Clock 22 */ QE_CLK23, /* Clock 23 */ QE_CLK24, /* Clock 24 */ + QE_RSYNC_PIN, /* RSYNC from pin */ + QE_TSYNC_PIN, /* TSYNC from pin */ QE_CLK_DUMMY }; diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index df8ea7958c63..31548b7aa50e 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -120,6 +120,8 @@ struct ucc_fast_info { int ucc_num; enum qe_clock rx_clock; enum qe_clock tx_clock; + enum qe_clock rx_sync; + enum qe_clock tx_sync; u32 regs; int irq; u32 uccm_mask; -- cgit From bb8b2062aff321af1fc58781cc07fbbea01cceb3 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Mon, 6 Jun 2016 14:29:59 +0800 Subject: fsl/qe: setup clock source for TDM mode Add tdm clock configuration in both qe clock system and ucc fast controller. Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- include/soc/fsl/qe/qe.h | 16 ++++++++++++++++ include/soc/fsl/qe/ucc.h | 4 ++++ include/soc/fsl/qe/ucc_fast.h | 1 + 3 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index f91874582ab3..c3b1dc8a21a7 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -244,6 +244,22 @@ static inline int qe_alive_during_sleep(void) #define qe_muram_addr cpm_muram_addr #define qe_muram_offset cpm_muram_offset +#define qe_setbits32(_addr, _v) iowrite32be(ioread32be(_addr) | (_v), (_addr)) +#define qe_clrbits32(_addr, _v) iowrite32be(ioread32be(_addr) & ~(_v), (_addr)) + +#define qe_setbits16(_addr, _v) iowrite16be(ioread16be(_addr) | (_v), (_addr)) +#define qe_clrbits16(_addr, _v) iowrite16be(ioread16be(_addr) & ~(_v), (_addr)) + +#define qe_setbits8(_addr, _v) iowrite8(ioread8(_addr) | (_v), (_addr)) +#define qe_clrbits8(_addr, _v) iowrite8(ioread8(_addr) & ~(_v), (_addr)) + +#define qe_clrsetbits32(addr, clear, set) \ + iowrite32be((ioread32be(addr) & ~(clear)) | (set), (addr)) +#define qe_clrsetbits16(addr, clear, set) \ + iowrite16be((ioread16be(addr) & ~(clear)) | (set), (addr)) +#define qe_clrsetbits8(addr, clear, set) \ + iowrite8((ioread8(addr) & ~(clear)) | (set), (addr)) + /* Structure that defines QE firmware binary files. * * See Documentation/powerpc/qe_firmware.txt for a description of these diff --git a/include/soc/fsl/qe/ucc.h b/include/soc/fsl/qe/ucc.h index 894f14cbb044..6bbbb597f2af 100644 --- a/include/soc/fsl/qe/ucc.h +++ b/include/soc/fsl/qe/ucc.h @@ -41,6 +41,10 @@ int ucc_set_qe_mux_mii_mng(unsigned int ucc_num); int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock, enum comm_dir mode); +int ucc_set_tdm_rxtx_clk(unsigned int tdm_num, enum qe_clock clock, + enum comm_dir mode); +int ucc_set_tdm_rxtx_sync(unsigned int tdm_num, enum qe_clock clock, + enum comm_dir mode); int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask); diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index 31548b7aa50e..b2633b7dcc00 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -118,6 +118,7 @@ enum ucc_fast_transparent_tcrc { /* Fast UCC initialization structure */ struct ucc_fast_info { int ucc_num; + int tdm_num; enum qe_clock rx_clock; enum qe_clock tx_clock; enum qe_clock rx_sync; -- cgit From 19163ac3123e7fef8b1ecb2f1d4223f58ed5e884 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Mon, 6 Jun 2016 14:30:00 +0800 Subject: fsl/qe: Make regs resouce_size_t Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- include/soc/fsl/qe/ucc_fast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index b2633b7dcc00..e898895223f9 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -123,7 +123,7 @@ struct ucc_fast_info { enum qe_clock tx_clock; enum qe_clock rx_sync; enum qe_clock tx_sync; - u32 regs; + resource_size_t regs; int irq; u32 uccm_mask; int bd_mem_part; -- cgit From 35ef1c20fdb26779b6c3c4fd74bbdd5028e70005 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Mon, 6 Jun 2016 14:30:01 +0800 Subject: fsl/qe: Add QE TDM lib QE has module to support TDM, some other protocols supported by QE are based on TDM. add a qe-tdm lib, this lib provides functions to the protocols using TDM to configurate QE-TDM. Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- include/soc/fsl/qe/immap_qe.h | 5 +-- include/soc/fsl/qe/qe_tdm.h | 94 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 include/soc/fsl/qe/qe_tdm.h (limited to 'include') diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h index bedbff891423..c76ef30b05ba 100644 --- a/include/soc/fsl/qe/immap_qe.h +++ b/include/soc/fsl/qe/immap_qe.h @@ -159,10 +159,7 @@ struct spi { /* SI */ struct si1 { - __be16 siamr1; /* SI1 TDMA mode register */ - __be16 sibmr1; /* SI1 TDMB mode register */ - __be16 sicmr1; /* SI1 TDMC mode register */ - __be16 sidmr1; /* SI1 TDMD mode register */ + __be16 sixmr1[4]; /* SI1 TDMx (x = A B C D) mode register */ u8 siglmr1_h; /* SI1 global mode register high */ u8 res0[0x1]; u8 sicmdr1_h; /* SI1 command register high */ diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h new file mode 100644 index 000000000000..4c91498ab117 --- /dev/null +++ b/include/soc/fsl/qe/qe_tdm.h @@ -0,0 +1,94 @@ +/* + * Internal header file for QE TDM mode routines. + * + * Copyright (C) 2016 Freescale Semiconductor, Inc. All rights reserved. + * + * Authors: Zhao Qiang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version + */ + +#ifndef CONFIG_QE_TDM_H +#define CONFIG_QE_TDM_H + +#include +#include + +#include +#include + +#include +#include + +/* SI RAM entries */ +#define SIR_LAST 0x0001 +#define SIR_BYTE 0x0002 +#define SIR_CNT(x) ((x) << 2) +#define SIR_CSEL(x) ((x) << 5) +#define SIR_SGS 0x0200 +#define SIR_SWTR 0x4000 +#define SIR_MCC 0x8000 +#define SIR_IDLE 0 + +/* SIxMR fields */ +#define SIMR_SAD(x) ((x) << 12) +#define SIMR_SDM_NORMAL 0x0000 +#define SIMR_SDM_INTERNAL_LOOPBACK 0x0800 +#define SIMR_SDM_MASK 0x0c00 +#define SIMR_CRT 0x0040 +#define SIMR_SL 0x0020 +#define SIMR_CE 0x0010 +#define SIMR_FE 0x0008 +#define SIMR_GM 0x0004 +#define SIMR_TFSD(n) (n) +#define SIMR_RFSD(n) ((n) << 8) + +enum tdm_ts_t { + TDM_TX_TS, + TDM_RX_TS +}; + +enum tdm_framer_t { + TDM_FRAMER_T1, + TDM_FRAMER_E1 +}; + +enum tdm_mode_t { + TDM_INTERNAL_LOOPBACK, + TDM_NORMAL +}; + +struct si_mode_info { + u8 simr_rfsd; + u8 simr_tfsd; + u8 simr_crt; + u8 simr_sl; + u8 simr_ce; + u8 simr_fe; + u8 simr_gm; +}; + +struct ucc_tdm_info { + struct ucc_fast_info uf_info; + struct si_mode_info si_info; +}; + +struct ucc_tdm { + u16 tdm_port; /* port for this tdm:TDMA,TDMB */ + u32 siram_entry_id; + u16 __iomem *siram; + struct si1 __iomem *si_regs; + enum tdm_framer_t tdm_framer_type; + enum tdm_mode_t tdm_mode; + u8 num_of_ts; /* the number of timeslots in this tdm frame */ + u32 tx_ts_mask; /* tx time slot mask */ + u32 rx_ts_mask; /* rx time slot mask */ +}; + +int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm, + struct ucc_tdm_info *ut_info); +void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info); +#endif -- cgit From c19b6d246a35627c3a69b2fa6bdece212b48214b Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Mon, 6 Jun 2016 14:30:02 +0800 Subject: drivers/net: support hdlc function for QE-UCC The driver add hdlc support for Freescale QUICC Engine. It support NMSI and TSA mode. Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- include/soc/fsl/qe/qe.h | 1 + include/soc/fsl/qe/ucc_fast.h | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index c3b1dc8a21a7..70339d7958c0 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -657,6 +657,7 @@ struct ucc_slow_pram { #define UCC_SLOW_GUMR_L_MODE_QMC 0x00000002 /* General UCC FAST Mode Register */ +#define UCC_FAST_GUMR_LOOPBACK 0x40000000 #define UCC_FAST_GUMR_TCI 0x20000000 #define UCC_FAST_GUMR_TRX 0x10000000 #define UCC_FAST_GUMR_TTX 0x08000000 diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index e898895223f9..3ee9e7c1a7d7 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -21,19 +21,37 @@ #include -/* Receive BD's status */ +/* Receive BD's status and length*/ #define R_E 0x80000000 /* buffer empty */ #define R_W 0x20000000 /* wrap bit */ #define R_I 0x10000000 /* interrupt on reception */ #define R_L 0x08000000 /* last */ #define R_F 0x04000000 /* first */ -/* transmit BD's status */ +/* transmit BD's status and length*/ #define T_R 0x80000000 /* ready bit */ #define T_W 0x20000000 /* wrap bit */ #define T_I 0x10000000 /* interrupt on completion */ #define T_L 0x08000000 /* last */ +/* Receive BD's status */ +#define R_E_S 0x8000 /* buffer empty */ +#define R_W_S 0x2000 /* wrap bit */ +#define R_I_S 0x1000 /* interrupt on reception */ +#define R_L_S 0x0800 /* last */ +#define R_F_S 0x0400 /* first */ +#define R_CM_S 0x0200 /* continuous mode */ +#define R_CR_S 0x0004 /* crc */ +#define R_OV_S 0x0002 /* crc */ + +/* transmit BD's status */ +#define T_R_S 0x8000 /* ready bit */ +#define T_W_S 0x2000 /* wrap bit */ +#define T_I_S 0x1000 /* interrupt on completion */ +#define T_L_S 0x0800 /* last */ +#define T_TC_S 0x0400 /* crc */ +#define T_TM_S 0x0200 /* continuous mode */ + /* Rx Data buffer must be 4 bytes aligned in most cases */ #define UCC_FAST_RX_ALIGN 4 #define UCC_FAST_MRBLR_ALIGNMENT 4 -- cgit From f9eb8aea2a1e12fc2f584d1627deeb957435a801 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:37:15 -0700 Subject: net_sched: transform qdisc running bit into a seqcount Instead of using a single bit (__QDISC___STATE_RUNNING) in sch->__state, use a seqcount. This adds lockdep support, but more importantly it will allow us to sample qdisc/class statistics without having to grab qdisc root lock. Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/sch_generic.h | 15 ++++----------- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa6df2699532..59d7e06d88d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1862,6 +1862,7 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; + struct lock_class_key *qdisc_running_key; bool proto_down; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a1fd76c22a59..bff8d895ef8a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -29,13 +29,6 @@ enum qdisc_state_t { __QDISC_STATE_THROTTLED, }; -/* - * following bits are only changed while qdisc lock is held - */ -enum qdisc___state_t { - __QDISC___STATE_RUNNING = 1, -}; - struct qdisc_size_table { struct rcu_head rcu; struct list_head list; @@ -93,7 +86,7 @@ struct Qdisc { unsigned long state; struct sk_buff_head q; struct gnet_stats_basic_packed bstats; - unsigned int __state; + seqcount_t running; struct gnet_stats_queue qstats; struct rcu_head rcu_head; int padded; @@ -104,20 +97,20 @@ struct Qdisc { static inline bool qdisc_is_running(const struct Qdisc *qdisc) { - return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false; + return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc_is_running(qdisc)) return false; - qdisc->__state |= __QDISC___STATE_RUNNING; + write_seqcount_begin(&qdisc->running); return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) { - qdisc->__state &= ~__QDISC___STATE_RUNNING; + write_seqcount_end(&qdisc->running); } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) -- cgit From edb09eb17ed89eaa82a52dd306beac93e292b485 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:37:16 -0700 Subject: net: sched: do not acquire qdisc spinlock in qdisc/class stats dump Large tc dumps (tc -s {qdisc|class} sh dev ethX) done by Google BwE host agent [1] are problematic at scale : For each qdisc/class found in the dump, we currently lock the root qdisc spinlock in order to get stats. Sampling stats every 5 seconds from thousands of HTB classes is a challenge when the root qdisc spinlock is under high pressure. Not only the dumps take time, they also slow down the fast path (queue/dequeue packets) by 10 % to 20 % in some cases. An audit of existing qdiscs showed that sch_fq_codel is the only qdisc that might need the qdisc lock in fq_codel_dump_stats() and fq_codel_dump_class_stats() In v2 of this patch, I now use the Qdisc running seqcount to provide consistent reads of packets/bytes counters, regardless of 32/64 bit arches. I also changed rate estimators to use the same infrastructure so that they no longer need to lock root qdisc lock. [1] http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43838.pdf Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Cc: John Fastabend Cc: Kevin Athey Cc: Xiaotian Pei Signed-off-by: David S. Miller --- include/net/gen_stats.h | 12 ++++++++---- include/net/sch_generic.h | 8 ++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 610cd397890e..231e121cc7d9 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -33,10 +33,12 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); -int gnet_stats_copy_basic(struct gnet_dump *d, +int gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats, +void __gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, @@ -52,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d); int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, - spinlock_t *stats_lock, struct nlattr *opt); + spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt); void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est64 *rate_est); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, - spinlock_t *stats_lock, struct nlattr *opt); + spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt); bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est64 *rate_est); #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index bff8d895ef8a..c4f5749342ec 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -314,6 +314,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } +static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) +{ + struct Qdisc *root = qdisc_root_sleeping(qdisc); + + ASSERT_RTNL(); + return &root->running; +} + static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) { return qdisc->dev_queue->dev; -- cgit From 6f23d96cfa4fb68c4c9683f161f831057a5a134f Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Wed, 8 Jun 2016 11:36:56 +0200 Subject: fsl/qe: Do not prefix header guard with CONFIG_ The CONFIG_ prefix should only be used for options which can be configured through Kconfig and not for guarding headers. Signed-off-by: Andreas Ziegler Signed-off-by: David S. Miller --- include/soc/fsl/qe/qe_tdm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h index 4c91498ab117..a1664b635f1a 100644 --- a/include/soc/fsl/qe/qe_tdm.h +++ b/include/soc/fsl/qe/qe_tdm.h @@ -11,8 +11,8 @@ * option) any later version */ -#ifndef CONFIG_QE_TDM_H -#define CONFIG_QE_TDM_H +#ifndef _QE_TDM_H_ +#define _QE_TDM_H_ #include #include -- cgit From 6ad8c632ee48ae099aa13704ef18a641220fe211 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 8 Jun 2016 06:22:10 -0400 Subject: qed: Add support for query/config dcbx. Query API reads the dcbx data from the device shared memory and return it to the caller. The config API configures the user provided dcbx values on the device, and initiates the dcbx negotiation with the peer. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 90 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e8cc49f9688a..e1d5122e8a96 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -34,6 +34,96 @@ enum dcbx_protocol_type { DCBX_MAX_PROTOCOL_TYPE }; +#ifdef CONFIG_DCB +#define QED_LLDP_CHASSIS_ID_STAT_LEN 4 +#define QED_LLDP_PORT_ID_STAT_LEN 4 +#define QED_DCBX_MAX_APP_PROTOCOL 32 +#define QED_MAX_PFC_PRIORITIES 8 +#define QED_DCBX_DSCP_SIZE 64 + +struct qed_dcbx_lldp_remote { + u32 peer_chassis_id[QED_LLDP_CHASSIS_ID_STAT_LEN]; + u32 peer_port_id[QED_LLDP_PORT_ID_STAT_LEN]; + bool enable_rx; + bool enable_tx; + u32 tx_interval; + u32 max_credit; +}; + +struct qed_dcbx_lldp_local { + u32 local_chassis_id[QED_LLDP_CHASSIS_ID_STAT_LEN]; + u32 local_port_id[QED_LLDP_PORT_ID_STAT_LEN]; +}; + +struct qed_dcbx_app_prio { + u8 roce; + u8 roce_v2; + u8 fcoe; + u8 iscsi; + u8 eth; +}; + +struct qed_dbcx_pfc_params { + bool willing; + bool enabled; + u8 prio[QED_MAX_PFC_PRIORITIES]; + u8 max_tc; +}; + +struct qed_app_entry { + bool ethtype; + bool enabled; + u8 prio; + u16 proto_id; + enum dcbx_protocol_type proto_type; +}; + +struct qed_dcbx_params { + struct qed_app_entry app_entry[QED_DCBX_MAX_APP_PROTOCOL]; + u16 num_app_entries; + bool app_willing; + bool app_valid; + bool app_error; + bool ets_willing; + bool ets_enabled; + bool ets_cbs; + bool valid; + u8 ets_pri_tc_tbl[QED_MAX_PFC_PRIORITIES]; + u8 ets_tc_bw_tbl[QED_MAX_PFC_PRIORITIES]; + u8 ets_tc_tsa_tbl[QED_MAX_PFC_PRIORITIES]; + struct qed_dbcx_pfc_params pfc; + u8 max_ets_tc; +}; + +struct qed_dcbx_admin_params { + struct qed_dcbx_params params; + bool valid; +}; + +struct qed_dcbx_remote_params { + struct qed_dcbx_params params; + bool valid; +}; + +struct qed_dcbx_operational_params { + struct qed_dcbx_app_prio app_prio; + struct qed_dcbx_params params; + bool valid; + bool enabled; + bool ieee; + bool cee; + u32 err; +}; + +struct qed_dcbx_get { + struct qed_dcbx_operational_params operational; + struct qed_dcbx_lldp_remote lldp_remote; + struct qed_dcbx_lldp_local lldp_local; + struct qed_dcbx_remote_params remote; + struct qed_dcbx_admin_params local; +}; +#endif + enum qed_led_mode { QED_LED_MODE_OFF, QED_LED_MODE_ON, -- cgit From a1d8d8a51e8317269dd127d94b9de14f67d9563f Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 8 Jun 2016 06:22:11 -0400 Subject: qed: Add dcbnl support. This patch adds the implementation for both cee/ieee dcbnl callbacks by using the qed query/config APIs. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index f8ff71126d9e..71d523b4bc54 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -128,11 +128,73 @@ struct qed_eth_cb_ops { void (*force_mac) (void *dev, u8 *mac); }; +#ifdef CONFIG_DCB +/* Prototype declaration of qed_eth_dcbnl_ops should match with the declaration + * of dcbnl_rtnl_ops structure. + */ +struct qed_eth_dcbnl_ops { + /* IEEE 802.1Qaz std */ + int (*ieee_getpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc); + int (*ieee_setpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc); + int (*ieee_getets)(struct qed_dev *cdev, struct ieee_ets *ets); + int (*ieee_setets)(struct qed_dev *cdev, struct ieee_ets *ets); + int (*ieee_peer_getets)(struct qed_dev *cdev, struct ieee_ets *ets); + int (*ieee_peer_getpfc)(struct qed_dev *cdev, struct ieee_pfc *pfc); + int (*ieee_getapp)(struct qed_dev *cdev, struct dcb_app *app); + int (*ieee_setapp)(struct qed_dev *cdev, struct dcb_app *app); + + /* CEE std */ + u8 (*getstate)(struct qed_dev *cdev); + u8 (*setstate)(struct qed_dev *cdev, u8 state); + void (*getpgtccfgtx)(struct qed_dev *cdev, int prio, u8 *prio_type, + u8 *pgid, u8 *bw_pct, u8 *up_map); + void (*getpgbwgcfgtx)(struct qed_dev *cdev, int pgid, u8 *bw_pct); + void (*getpgtccfgrx)(struct qed_dev *cdev, int prio, u8 *prio_type, + u8 *pgid, u8 *bw_pct, u8 *up_map); + void (*getpgbwgcfgrx)(struct qed_dev *cdev, int pgid, u8 *bw_pct); + void (*getpfccfg)(struct qed_dev *cdev, int prio, u8 *setting); + void (*setpfccfg)(struct qed_dev *cdev, int prio, u8 setting); + u8 (*getcap)(struct qed_dev *cdev, int capid, u8 *cap); + int (*getnumtcs)(struct qed_dev *cdev, int tcid, u8 *num); + u8 (*getpfcstate)(struct qed_dev *cdev); + int (*getapp)(struct qed_dev *cdev, u8 idtype, u16 id); + u8 (*getfeatcfg)(struct qed_dev *cdev, int featid, u8 *flags); + + /* DCBX configuration */ + u8 (*getdcbx)(struct qed_dev *cdev); + void (*setpgtccfgtx)(struct qed_dev *cdev, int prio, + u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map); + void (*setpgtccfgrx)(struct qed_dev *cdev, int prio, + u8 pri_type, u8 pgid, u8 bw_pct, u8 up_map); + void (*setpgbwgcfgtx)(struct qed_dev *cdev, int pgid, u8 bw_pct); + void (*setpgbwgcfgrx)(struct qed_dev *cdev, int pgid, u8 bw_pct); + u8 (*setall)(struct qed_dev *cdev); + int (*setnumtcs)(struct qed_dev *cdev, int tcid, u8 num); + void (*setpfcstate)(struct qed_dev *cdev, u8 state); + int (*setapp)(struct qed_dev *cdev, u8 idtype, u16 idval, u8 up); + u8 (*setdcbx)(struct qed_dev *cdev, u8 state); + u8 (*setfeatcfg)(struct qed_dev *cdev, int featid, u8 flags); + + /* Peer apps */ + int (*peer_getappinfo)(struct qed_dev *cdev, + struct dcb_peer_app_info *info, + u16 *app_count); + int (*peer_getapptable)(struct qed_dev *cdev, struct dcb_app *table); + + /* CEE peer */ + int (*cee_peer_getpfc)(struct qed_dev *cdev, struct cee_pfc *pfc); + int (*cee_peer_getpg)(struct qed_dev *cdev, struct cee_pg *pg); +}; +#endif + struct qed_eth_ops { const struct qed_common_ops *common; #ifdef CONFIG_QED_SRIOV const struct qed_iov_hv_ops *iov; #endif +#ifdef CONFIG_DCB + const struct qed_eth_dcbnl_ops *dcb; +#endif int (*fill_dev_info)(struct qed_dev *cdev, struct qed_dev_eth_info *info); -- cgit From 123b36526592f009bf8eccb7c8833aeda296d9cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2016 07:22:49 -0700 Subject: net: sched: fix missing doc annotations "make htmldocs" complains otherwise: .//net/core/gen_stats.c:168: warning: No description found for parameter 'running' .//include/linux/netdevice.h:1867: warning: No description found for parameter 'qdisc_running_key' Fixes: f9eb8aea2a1e ("net_sched: transform qdisc running bit into a seqcount") Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 59d7e06d88d5..541562333ba5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1594,7 +1594,8 @@ enum netdev_priv_flags { * @phydev: Physical device may attach itself * for hardware timestamping * - * @qdisc_tx_busylock: XXX: need comments on this one + * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock + * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the -- cgit From 0c73c523cf737b5d446705392e0e14ee0411a351 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 7 Jun 2016 16:32:42 -0700 Subject: net: dsa: Initialize CPU port ethtool ops per tree Now that we can properly support multiple distinct trees in the system, using a global variable: dsa_cpu_port_ethtool_ops is getting clobbered as soon as the second switch tree gets probed, and we don't want that. We need to move this to be dynamically allocated, and since we can't really be comparing addresses anymore to determine first time initialization versus any other times, just move this to dsa.c and dsa2.c where the remainder of the dst/ds initialization happens. The operations teardown restores the master netdev's ethtool_ops to its original ethtool_ops pointer (typically within the Ethernet driver) Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index cca7ef230742..20b3087ad193 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,6 +116,7 @@ struct dsa_switch_tree { * Original copy of the master netdev ethtool_ops */ struct ethtool_ops master_ethtool_ops; + const struct ethtool_ops *master_orig_ethtool_ops; /* * The switch and port to which the CPU is attached. -- cgit From 96c63fa7393d0a346acfe5a91e0c7d4c7782641b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 8 Jun 2016 10:55:39 -0700 Subject: net: Add l3mdev rule Currently, VRFs require 1 oif and 1 iif rule per address family per VRF. As the number of VRF devices increases it brings scalability issues with the increasing rule list. All of the VRF rules have the same format with the exception of the specific table id to direct the lookup. Since the table id is available from the oif or iif in the loopup, the VRF rules can be consolidated to a single rule that pulls the table from the VRF device. This patch introduces a new rule attribute l3mdev. The l3mdev rule means the table id used for the lookup is pulled from the L3 master device (e.g., VRF) rather than being statically defined. With the l3mdev rule all of the basic VRF FIB rules are reduced to 1 l3mdev rule per address family (IPv4 and IPv6). If an admin wishes to insert higher priority rules for specific VRFs those rules will co-exist with the l3mdev rule. This capability means current VRF scripts will co-exist with this new simpler implementation. Currently, the rules list for both ipv4 and ipv6 look like this: $ ip ru ls 1000: from all oif vrf1 lookup 1001 1000: from all iif vrf1 lookup 1001 1000: from all oif vrf2 lookup 1002 1000: from all iif vrf2 lookup 1002 1000: from all oif vrf3 lookup 1003 1000: from all iif vrf3 lookup 1003 1000: from all oif vrf4 lookup 1004 1000: from all iif vrf4 lookup 1004 1000: from all oif vrf5 lookup 1005 1000: from all iif vrf5 lookup 1005 1000: from all oif vrf6 lookup 1006 1000: from all iif vrf6 lookup 1006 1000: from all oif vrf7 lookup 1007 1000: from all iif vrf7 lookup 1007 1000: from all oif vrf8 lookup 1008 1000: from all iif vrf8 lookup 1008 ... 32765: from all lookup local 32766: from all lookup main 32767: from all lookup default With the l3mdev rule the list is just the following regardless of the number of VRFs: $ ip ru ls 1000: from all lookup [l3mdev table] 32765: from all lookup local 32766: from all lookup main 32767: from all lookup default (Note: the above pretty print of the rule is based on an iproute2 prototype. Actual verbage may change) Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/fib_rules.h | 24 ++++++++++++++++++++++-- include/net/l3mdev.h | 12 ++++++++++++ include/uapi/linux/fib_rules.h | 1 + 3 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 59160de702b6..456e4a6006ab 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -17,7 +17,8 @@ struct fib_rule { u32 flags; u32 table; u8 action; - /* 3 bytes hole, try to use */ + u8 l3mdev; + /* 2 bytes hole, try to use */ u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; @@ -36,6 +37,7 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 @@ -89,7 +91,8 @@ struct fib_rules_ops { [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_L3MDEV] = { .type = NLA_U8 } static inline void fib_rule_get(struct fib_rule *rule) { @@ -102,6 +105,20 @@ static inline void fib_rule_put(struct fib_rule *rule) kfree_rcu(rule, rcu); } +#ifdef CONFIG_NET_L3_MASTER_DEV +static inline u32 fib_rule_get_table(struct fib_rule *rule, + struct fib_lookup_arg *arg) +{ + return rule->l3mdev ? arg->table : rule->table; +} +#else +static inline u32 fib_rule_get_table(struct fib_rule *rule, + struct fib_lookup_arg *arg) +{ + return rule->table; +} +#endif + static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) @@ -117,4 +134,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh); +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh); #endif diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 374388dc01c8..34f33eb96a5e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,8 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include + /** * struct l3mdev_ops - l3mdev operations * @@ -41,6 +43,9 @@ struct l3mdev_ops { #ifdef CONFIG_NET_L3_MASTER_DEV +int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct fib_lookup_arg *arg); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -236,6 +241,13 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) { return skb; } + +static inline +int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct fib_lookup_arg *arg) +{ + return 1; +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 620c8a5ddc00..14404b3ebb89 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -50,6 +50,7 @@ enum { FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, FRA_PAD, + FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ __FRA_MAX }; -- cgit From dd47c1fa776cda48531b651c88341e951140b0a7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Jun 2016 00:27:40 +0200 Subject: cbq: remove TCA_CBQ_POLICE support iproute2 doesn't implement any cbq option that results in this attribute being sent to kernel. To make use of it, user would have to - patch iproute2 - add a class - attach a qdisc to the class (default pfifo doesn't work as q->handle is 0 and cbq_set_police() is a no-op in this case) - re-'add' the same class (tc class change ...) again - user must also specifiy a defmap (e.g. 'split 1:0 defmap 3f'), since this 'police' feature relies on its presence - the added qdisc must be one of bfifo, pfifo or netem If all of these conditions are met and _some_ leaf qdiscs, namely p/bfifo, netem, plug or tbf would drop a packet, kernel calls back into cbq, which will attempt to re-queue the skb into a different class as indicated by the parents' defmap entry for TC_PRIO_BESTEFFORT. [ i.e. we behave as if tc_classify returned TC_ACT_RECLASSIFY ]. This feature, which isn't documented or implemented in iproute2, and isn't implemented consistently (most qdiscs like sfq, codel, etc drop right away instead of attempting this reclassification) is the sole reason for the reshape_fail and __parent member in Qdisc struct. So remove TCA_CBQ_POLICE support from the kernel, reject it via EOPNOTSUPP so userspace knows we don't support it, and then remove no-longer needed infrastructure in followup commit. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c4f5749342ec..c069ac1dd75d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -68,10 +68,6 @@ struct Qdisc { void *u32_node; - /* This field is deprecated, but it is still used by CBQ - * and it will live until better solution will be invented. - */ - struct Qdisc *__parent; struct netdev_queue *dev_queue; struct gnet_stats_rate_est64 rate_est; -- cgit From c3a173d7dba2d7c74dd4ab871b8f22bf56ac10b2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Jun 2016 00:27:41 +0200 Subject: sched: remove qdisc_rehape_fail After the removal of TCA_CBQ_POLICE in cbq scheduler qdisc->reshape_fail is always NULL, i.e. qdisc_rehape_fail is now the same as qdisc_drop. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c069ac1dd75d..a9aec633d467 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -63,9 +63,6 @@ struct Qdisc { struct list_head list; u32 handle; u32 parent; - int (*reshape_fail)(struct sk_buff *skb, - struct Qdisc *q); - void *u32_node; struct netdev_queue *dev_queue; @@ -771,22 +768,6 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_DROP; } -static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) -{ - qdisc_qstats_drop(sch); - -#ifdef CONFIG_NET_CLS_ACT - if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) - goto drop; - - return NET_XMIT_SUCCESS; - -drop: -#endif - kfree_skb(skb); - return NET_XMIT_DROP; -} - /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how long it will take to send a packet given its size. */ -- cgit From a09ceb0e08140a1eec05b49b4c232d3481339cb0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Jun 2016 00:27:42 +0200 Subject: sched: remove qdisc->drop after removal of TCA_CBQ_OVL_STRATEGY from cbq scheduler, there are no more callers of ->drop() outside of other ->drop functions, i.e. nothing calls them. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a9aec633d467..4dedb7f12ed5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -174,7 +174,6 @@ struct Qdisc_ops { int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); - unsigned int (*drop)(struct Qdisc *); int (*init)(struct Qdisc *, struct nlattr *arg); void (*reset)(struct Qdisc *); @@ -658,22 +657,6 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch) return __qdisc_queue_drop_head(sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch, - struct sk_buff_head *list) -{ - struct sk_buff *skb = __skb_dequeue_tail(list); - - if (likely(skb != NULL)) - qdisc_qstats_backlog_dec(sch, skb); - - return skb; -} - -static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch) -{ - return __qdisc_dequeue_tail(sch, &sch->q); -} - static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { return skb_peek(&sch->q); @@ -741,25 +724,6 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, return old; } -static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch, - struct sk_buff_head *list) -{ - struct sk_buff *skb = __qdisc_dequeue_tail(sch, list); - - if (likely(skb != NULL)) { - unsigned int len = qdisc_pkt_len(skb); - kfree_skb(skb); - return len; - } - - return 0; -} - -static inline unsigned int qdisc_queue_drop(struct Qdisc *sch) -{ - return __qdisc_queue_drop(sch, &sch->q); -} - static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) { kfree_skb(skb); -- cgit From c8945043cdc687388b7a43fc6f474bddd9607e80 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Jun 2016 00:27:43 +0200 Subject: sched: place state, next_sched and gso_skb in same cacheline again Earlier commits removed two members from struct Qdisc which places next_sched/gso_skb into a different cacheline than ->state. This restores the struct layout to what it was before the removal. Move the two members, then add an annotation so they all reside in the same cacheline. This adds a 16 byte hole after cpu_qstats. The hole could be closed but as it doesn't decrease total struct size just do it this way. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4dedb7f12ed5..49534e28824b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -71,11 +71,11 @@ struct Qdisc { struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; - struct Qdisc *next_sched; - struct sk_buff *gso_skb; /* * For performance sake on SMP, we put highly modified fields at the end */ + struct Qdisc *next_sched ____cacheline_aligned_in_smp; + struct sk_buff *gso_skb; unsigned long state; struct sk_buff_head q; struct gnet_stats_basic_packed bstats; -- cgit From 1dad640b9ef320e8de92c418bcc08448d67590a4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Jun 2016 10:14:39 +0200 Subject: wext: reformat struct/union declarations Everytime I need to look for these, my usual strategy fails because it assumes the right formatting. Fix the formatting here to make it consistent with the rest of the kernel. Signed-off-by: Johannes Berg --- include/uapi/linux/wireless.h | 63 +++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h index c1592e3e4036..d9ecd7c6d691 100644 --- a/include/uapi/linux/wireless.h +++ b/include/uapi/linux/wireless.h @@ -670,8 +670,7 @@ /* * Generic format for most parameters that fit in an int */ -struct iw_param -{ +struct iw_param { __s32 value; /* The value of the parameter itself */ __u8 fixed; /* Hardware should not use auto select */ __u8 disabled; /* Disable the feature */ @@ -682,8 +681,7 @@ struct iw_param * For all data larger than 16 octets, we need to use a * pointer to memory allocated in user space. */ -struct iw_point -{ +struct iw_point { void __user *pointer; /* Pointer to the data (in user space) */ __u16 length; /* number of fields or size in bytes */ __u16 flags; /* Optional params */ @@ -698,8 +696,7 @@ struct iw_point * of 10 to get 'm' lower than 10^9, with 'm'= f / (10^'e')... * The power of 10 is in 'e', the result of the division is in 'm'. */ -struct iw_freq -{ +struct iw_freq { __s32 m; /* Mantissa */ __s16 e; /* Exponent */ __u8 i; /* List index (when in range struct) */ @@ -709,8 +706,7 @@ struct iw_freq /* * Quality of the link */ -struct iw_quality -{ +struct iw_quality { __u8 qual; /* link quality (%retries, SNR, %missed beacons or better...) */ __u8 level; /* signal level (dBm) */ @@ -725,8 +721,7 @@ struct iw_quality * is already pretty exhaustive, and you should use that first. * This is only additional stats... */ -struct iw_discarded -{ +struct iw_discarded { __u32 nwid; /* Rx : Wrong nwid/essid */ __u32 code; /* Rx : Unable to code/decode (WEP) */ __u32 fragment; /* Rx : Can't perform MAC reassembly */ @@ -738,16 +733,14 @@ struct iw_discarded * Packet/Time period missed in the wireless adapter due to * "wireless" specific problems... */ -struct iw_missed -{ +struct iw_missed { __u32 beacon; /* Missed beacons/superframe */ }; /* * Quality range (for spy threshold) */ -struct iw_thrspy -{ +struct iw_thrspy { struct sockaddr addr; /* Source address (hw/mac) */ struct iw_quality qual; /* Quality of the link */ struct iw_quality low; /* Low threshold */ @@ -765,8 +758,7 @@ struct iw_thrspy * Especially, scan results are required to include an entry for the * current BSS if the driver is in Managed mode and associated with an AP. */ -struct iw_scan_req -{ +struct iw_scan_req { __u8 scan_type; /* IW_SCAN_TYPE_{ACTIVE,PASSIVE} */ __u8 essid_len; __u8 num_channels; /* num entries in channel_list; @@ -827,8 +819,7 @@ struct iw_scan_req * RX_SEQ_VALID for SIOCGIWENCODEEXT are optional, but can be useful for * debugging/testing. */ -struct iw_encode_ext -{ +struct iw_encode_ext { __u32 ext_flags; /* IW_ENCODE_EXT_* */ __u8 tx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */ __u8 rx_seq[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */ @@ -841,8 +832,7 @@ struct iw_encode_ext }; /* SIOCSIWMLME data */ -struct iw_mlme -{ +struct iw_mlme { __u16 cmd; /* IW_MLME_* */ __u16 reason_code; struct sockaddr addr; @@ -855,16 +845,14 @@ struct iw_mlme #define IW_PMKID_LEN 16 -struct iw_pmksa -{ +struct iw_pmksa { __u32 cmd; /* IW_PMKSA_* */ struct sockaddr bssid; __u8 pmkid[IW_PMKID_LEN]; }; /* IWEVMICHAELMICFAILURE data */ -struct iw_michaelmicfailure -{ +struct iw_michaelmicfailure { __u32 flags; struct sockaddr src_addr; __u8 tsc[IW_ENCODE_SEQ_MAX_SIZE]; /* LSB first */ @@ -872,8 +860,7 @@ struct iw_michaelmicfailure /* IWEVPMKIDCAND data */ #define IW_PMKID_CAND_PREAUTH 0x00000001 /* RNS pre-authentication enabled */ -struct iw_pmkid_cand -{ +struct iw_pmkid_cand { __u32 flags; /* IW_PMKID_CAND_* */ __u32 index; /* the smaller the index, the higher the * priority */ @@ -884,8 +871,7 @@ struct iw_pmkid_cand /* * Wireless statistics (used for /proc/net/wireless) */ -struct iw_statistics -{ +struct iw_statistics { __u16 status; /* Status * - device dependent for now */ @@ -897,7 +883,7 @@ struct iw_statistics /* ------------------------ IOCTL REQUEST ------------------------ */ /* - * This structure defines the payload of an ioctl, and is used + * This structure defines the payload of an ioctl, and is used * below. * * Note that this structure should fit on the memory footprint @@ -906,8 +892,7 @@ struct iw_statistics * You should check this when increasing the structures defined * above in this file... */ -union iwreq_data -{ +union iwreq_data { /* Config - generic */ char name[IFNAMSIZ]; /* Name : used to verify the presence of wireless extensions. @@ -944,15 +929,14 @@ union iwreq_data * convenience... * Do I need to remind you about structure size (32 octets) ? */ -struct iwreq -{ +struct iwreq { union { char ifrn_name[IFNAMSIZ]; /* if name, e.g. "eth0" */ } ifr_ifrn; /* Data part (defined just above) */ - union iwreq_data u; + union iwreq_data u; }; /* -------------------------- IOCTL DATA -------------------------- */ @@ -965,8 +949,7 @@ struct iwreq * Range of parameters */ -struct iw_range -{ +struct iw_range { /* Informative stuff (to choose between different interface) */ __u32 throughput; /* To give an idea... */ /* In theory this value should be the maximum benchmarked @@ -1069,9 +1052,8 @@ struct iw_range /* * Private ioctl interface information */ - -struct iw_priv_args -{ + +struct iw_priv_args { __u32 cmd; /* Number of the ioctl to issue */ __u16 set_args; /* Type and number of args */ __u16 get_args; /* Type and number of args */ @@ -1088,8 +1070,7 @@ struct iw_priv_args /* * A Wireless Event. Contains basically the same data as the ioctl... */ -struct iw_event -{ +struct iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ union iwreq_data u; /* IOCTL fixed payload */ -- cgit From 80a83cfc434b1e3afe38974570b460db4898bec6 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Thu, 19 May 2016 10:37:48 +0200 Subject: mac80211: skip netdev queue control with software queuing Qdiscs are designed with no regard to 802.11 aggregation requirements and hand out packet-by-packet with no guarantee they are destined to the same tid. This does more bad than good no matter how fairly a given qdisc may behave on an ethernet interface. Software queuing used per-AC netdev subqueue congestion control whenever a global AC limit was hit. This meant in practice a single station or tid queue could starve others rather easily. This could resonate with qdiscs in a bad way or could just end up with poor aggregation performance. Increasing the AC limit would increase induced latency which is also bad. Disabling qdiscs by default and performing taildrop instead of netdev subqueue congestion control on the other hand makes it possible for tid queues to fill up "in the meantime" while preventing stations starving each other. This increases aggregation opportunities and should allow software queuing based drivers achieve better performance by utilizing airtime more efficiently with big aggregates. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index be30b0549b88..a8683aec6dbe 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2147,9 +2147,6 @@ enum ieee80211_hw_flags { * @n_cipher_schemes: a size of an array of cipher schemes definitions. * @cipher_schemes: a pointer to an array of cipher scheme definitions * supported by HW. - * - * @txq_ac_max_pending: maximum number of frames per AC pending in all txq - * entries for a vif. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2180,7 +2177,6 @@ struct ieee80211_hw { u8 uapsd_max_sp_len; u8 n_cipher_schemes; const struct ieee80211_cipher_scheme *cipher_schemes; - int txq_ac_max_pending; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, -- cgit From 5caa328e3811b7cfa33fd02c93280ffa622deb0e Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Thu, 19 May 2016 10:37:51 +0200 Subject: mac80211: implement codel on fair queuing flows There is no other limit other than a global packet count limit when using software queuing. This means a single flow queue can grow insanely long. This is particularly bad for TCP congestion algorithms which requires a little more sophisticated frame dropping scheme than a mere headdrop on limit overflow. Hence apply (a slighly modified, to fit the knobs) CoDel5 on flow queues. This improves TCP convergence and stability when combined with wireless driver which keeps its own tx queue/fifo at a minimum fill level for given link conditions. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/mac80211.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a8683aec6dbe..a52009ffc19f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -21,6 +21,7 @@ #include #include #include +#include #include /** @@ -895,7 +896,18 @@ struct ieee80211_tx_info { unsigned long jiffies; }; /* NB: vif can be NULL for injected frames */ - struct ieee80211_vif *vif; + union { + /* NB: vif can be NULL for injected frames */ + struct ieee80211_vif *vif; + + /* When packets are enqueued on txq it's easy + * to re-construct the vif pointer. There's no + * more space in tx_info so it can be used to + * store the necessary enqueue time for packet + * sojourn time computation. + */ + codel_time_t enqueue_time; + }; struct ieee80211_key_conf *hw_key; u32 flags; /* 4 bytes free */ -- cgit From 52fbb2907988aa0583c6d9d53a56aee090b2df7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jun 2016 07:45:11 -0700 Subject: net: sched: fix qdisc->running lockdep annotations 1) qdisc_run_begin() is really using the equivalent of a trylock. Instead of using write_seqcount_begin(), use a combination of raw_write_seqcount_begin() and correct lockdep annotation. 2) sch_direct_xmit() should use regular spin_lock(root_lock) Fixes: f9eb8aea2a1e ("net_sched: transform qdisc running bit into a seqcount") Signed-off-by: Eric Dumazet Reported-by: David Ahern Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 49534e28824b..a4c0f1649e2b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -97,7 +97,11 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc_is_running(qdisc)) return false; - write_seqcount_begin(&qdisc->running); + /* Variant of write_seqcount_begin() telling lockdep a trylock + * was attempted. + */ + raw_write_seqcount_begin(&qdisc->running); + seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; } -- cgit From d3fff6c443fe8f8a5ef2bdcea45e2ff39db948c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jun 2016 07:45:12 -0700 Subject: net: add netdev_lockdep_set_classes() helper It is time to add netdev_lockdep_set_classes() helper so that lockdep annotations per device type are easier to manage. This removes a lot of copies and missing annotations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 541562333ba5..4f234b102892 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1946,6 +1946,23 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } +#define netdev_lockdep_set_classes(dev) \ +{ \ + static struct lock_class_key qdisc_tx_busylock_key; \ + static struct lock_class_key qdisc_running_key; \ + static struct lock_class_key qdisc_xmit_lock_key; \ + static struct lock_class_key dev_addr_list_lock_key; \ + unsigned int i; \ + \ + (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ + (dev)->qdisc_running_key = &qdisc_running_key; \ + lockdep_set_class(&(dev)->addr_list_lock, \ + &dev_addr_list_lock_key); \ + for (i = 0; i < (dev)->num_tx_queues; i++) \ + lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ + &qdisc_xmit_lock_key); \ +} + struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, void *accel_priv); -- cgit From 967dd82ffc52e9d8ea0defde094f9a39a3f4eeed Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 9 Jun 2016 18:23:53 -0700 Subject: net: dsa: b53: Add support for Broadcom RoboSwitch This patch adds support for Broadcom's BCM53xx switch family, also known as RoboSwitch. Some of these switches are ubiquituous, found in home routers, Wi-Fi routers, DSL and cable modem gateways and other networking related products. This drivers adds the library driver (b53_common.c) as well as a few bus glue drivers for MDIO, SPI, Switch Register Access Block (SRAB) and memory-mapped I/O into a SoC's address space (Broadcom BCM63xx/33xx). Basic operations are supported to bring the Layer 1/2 up and running, but not much more at this point, subsequent patches add the remaining features. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/platform_data/b53.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/linux/platform_data/b53.h (limited to 'include') diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h new file mode 100644 index 000000000000..69d279c0da96 --- /dev/null +++ b/include/linux/platform_data/b53.h @@ -0,0 +1,33 @@ +/* + * B53 platform data + * + * Copyright (C) 2013 Jonas Gorski + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __B53_H +#define __B53_H + +#include + +struct b53_platform_data { + u32 chip_id; + u16 enabled_ports; + + /* only used by MMAP'd driver */ + unsigned big_endian:1; + void __iomem *regs; +}; + +#endif -- cgit From 2341e0775747864b684abe8627f3d45b167f2940 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Jun 2016 23:02:51 +0100 Subject: rxrpc: Simplify connect() implementation and simplify sendmsg() op Simplify the RxRPC connect() implementation. It will just note the destination address it is given, and if a sendmsg() comes along with no address, this will be assigned as the address. No transport struct will be held internally, which will allow us to remove this later. Simplify sendmsg() also. Whilst a call is active, userspace refers to it by a private unique user ID specified in a control message. When sendmsg() sees a user ID that doesn't map to an extant call, it creates a new call for that user ID and attempts to add it. If, when we try to add it, the user ID is now registered, we now reject the message with -EEXIST. We should never see this situation unless two threads are racing, trying to create a call with the same ID - which would be an error. It also isn't required to provide sendmsg() with an address - provided the control message data holds a user ID that maps to a currently active call. Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/linux/rxrpc.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h index a53915cd5581..1e8f216e2cf1 100644 --- a/include/linux/rxrpc.h +++ b/include/linux/rxrpc.h @@ -40,16 +40,18 @@ struct sockaddr_rxrpc { /* * RxRPC control messages + * - If neither abort or accept are specified, the message is a data message. * - terminal messages mean that a user call ID tag can be recycled + * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() */ -#define RXRPC_USER_CALL_ID 1 /* user call ID specifier */ -#define RXRPC_ABORT 2 /* abort request / notification [terminal] */ -#define RXRPC_ACK 3 /* [Server] RPC op final ACK received [terminal] */ -#define RXRPC_NET_ERROR 5 /* network error received [terminal] */ -#define RXRPC_BUSY 6 /* server busy received [terminal] */ -#define RXRPC_LOCAL_ERROR 7 /* local error generated [terminal] */ -#define RXRPC_NEW_CALL 8 /* [Server] new incoming call notification */ -#define RXRPC_ACCEPT 9 /* [Server] accept request */ +#define RXRPC_USER_CALL_ID 1 /* sr: user call ID specifier */ +#define RXRPC_ABORT 2 /* sr: abort request / notification [terminal] */ +#define RXRPC_ACK 3 /* -r: [Service] RPC op final ACK received [terminal] */ +#define RXRPC_NET_ERROR 5 /* -r: network error received [terminal] */ +#define RXRPC_BUSY 6 /* -r: server busy received [terminal] */ +#define RXRPC_LOCAL_ERROR 7 /* -r: local error generated [terminal] */ +#define RXRPC_NEW_CALL 8 /* -r: [Service] new incoming call notification */ +#define RXRPC_ACCEPT 9 /* s-: [Service] accept request */ /* * RxRPC security levels -- cgit From e434863718d4b99dd0d6e0cefd3c5e79e4fa2083 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 9 Jun 2016 10:21:00 -0700 Subject: net: vrf: Fix crash when IPv6 is disabled at boot time Frank Kellermann reported a kernel crash with 4.5.0 when IPv6 is disabled at boot using the kernel option ipv6.disable=1. Using current net-next with the boot option: $ ip link add red type vrf table 1001 Generates: [12210.919584] BUG: unable to handle kernel NULL pointer dereference at 0000000000000748 [12210.921341] IP: [] fib6_get_table+0x2c/0x5a [12210.922537] PGD b79e3067 PUD bb32b067 PMD 0 [12210.923479] Oops: 0000 [#1] SMP [12210.924001] Modules linked in: ipvlan 8021q garp mrp stp llc [12210.925130] CPU: 3 PID: 1177 Comm: ip Not tainted 4.7.0-rc1+ #235 [12210.926168] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [12210.928065] task: ffff8800b9ac4640 ti: ffff8800bacac000 task.ti: ffff8800bacac000 [12210.929328] RIP: 0010:[] [] fib6_get_table+0x2c/0x5a [12210.930697] RSP: 0018:ffff8800bacaf888 EFLAGS: 00010202 [12210.931563] RAX: 0000000000000748 RBX: ffffffff81a9e280 RCX: ffff8800b9ac4e28 [12210.932688] RDX: 00000000000000e9 RSI: 0000000000000002 RDI: 0000000000000286 [12210.933820] RBP: ffff8800bacaf898 R08: ffff8800b9ac4df0 R09: 000000000052001b [12210.934941] R10: 00000000657c0000 R11: 000000000000c649 R12: 00000000000003e9 [12210.936032] R13: 00000000000003e9 R14: ffff8800bace7800 R15: ffff8800bb3ec000 [12210.937103] FS: 00007faa1766c700(0000) GS:ffff88013ac00000(0000) knlGS:0000000000000000 [12210.938321] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [12210.939166] CR2: 0000000000000748 CR3: 00000000b79d6000 CR4: 00000000000406e0 [12210.940278] Stack: [12210.940603] ffff8800bb3ec000 ffffffff81a9e280 ffff8800bacaf8c8 ffffffff814b3135 [12210.941818] ffff8800bb3ec000 ffffffff81a9e280 ffffffff81a9e280 ffff8800bace7800 [12210.943040] ffff8800bacaf8f0 ffffffff81397c88 ffff8800bb3ec000 ffffffff81a9e280 [12210.944288] Call Trace: [12210.944688] [] fib6_new_table+0x24/0x8a [12210.945516] [] vrf_dev_init+0xd4/0x162 [12210.946328] [] register_netdevice+0x100/0x396 [12210.947209] [] vrf_newlink+0x40/0xb3 [12210.948001] [] rtnl_newlink+0x5d3/0x6d5 ... The problem above is due to the fact that the fib hash table is not allocated when IPv6 is disabled at boot. As for the VRF driver it should not do any IPv6 initializations if IPv6 is disabled, so it needs to know if IPv6 is disabled at boot. The disable parameter is private to the IPv6 module, so provide an accessor for modules to determine if IPv6 was disabled at boot time. Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5c91b0b055d4..c6dbcd84a2c7 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -283,6 +283,8 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) +bool ipv6_mod_enabled(void); + static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; @@ -326,6 +328,11 @@ static inline int inet_v6_ipv6only(const struct sock *sk) #define ipv6_only_sock(sk) 0 #define ipv6_sk_rxinfo(sk) 0 +static inline bool ipv6_mod_enabled(void) +{ + return false; +} + static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { return NULL; -- cgit From 7d84e37e114215be0a0c80095891c8268a99352b Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Thu, 9 Jun 2016 13:41:15 -0400 Subject: virtio_net: Update the feature bit to comply with spec A draft version of the MTU Advice feature bit was specified as 25. This bit is not within the allowed range for network device feature bits, and should be changed to be feature bit 3 to fully comply with the spec. Fixes 14de9d114a82 ('virtio-net: Add initial MTU advice feature') Signed-off-by: Aaron Conole Suggested-by: "Michael S. Tsirkin" Signed-off-by: David S. Miller --- include/uapi/linux/virtio_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 1ab4ea6ec847..0da0e3a98f17 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -35,6 +35,7 @@ #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */ +#define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */ #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ @@ -55,7 +56,6 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ -#define VIRTIO_NET_F_MTU 25 /* Initial MTU advice */ #ifndef VIRTIO_NET_NO_LEGACY #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ -- cgit From 7486216b3a0bd26375b17b2cc168a311106cea70 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 9 Jun 2016 15:11:34 +0300 Subject: {net,IB}/mlx5: mlx5_ifc updates Introducing mlx5_ifc updates for upcoming ConnectX-4 features. Needed bits and hardware structures for mlx5e netdev: - MLX5_CQ_PERIOD_NUM_MODES for adaptive moderation support - QoS rate limiting - SQ context rate limiting - Auto negotiation fields in PTYS register - Source SQN field in flow table entry match structure - DCBX parameters Needed bits and hardware structures for IB: - New XRQ opcodes, commands and capabilities layout - Extend q counters definition to support IB. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 275 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 263 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9a05cd7e5890..209add93159f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -123,6 +123,10 @@ enum { MLX5_CMD_OP_DRAIN_DCT = 0x712, MLX5_CMD_OP_QUERY_DCT = 0x713, MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714, + MLX5_CMD_OP_CREATE_XRQ = 0x717, + MLX5_CMD_OP_DESTROY_XRQ = 0x718, + MLX5_CMD_OP_QUERY_XRQ = 0x719, + MLX5_CMD_OP_ARM_XRQ = 0x71a, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -139,6 +143,8 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, + MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, MLX5_CMD_OP_ALLOC_UAR = 0x802, @@ -361,7 +367,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { }; struct mlx5_ifc_fte_match_set_misc_bits { - u8 reserved_at_0[0x20]; + u8 reserved_at_0[0x8]; + u8 source_sqn[0x18]; u8 reserved_at_20[0x10]; u8 source_port[0x10]; @@ -505,6 +512,17 @@ struct mlx5_ifc_e_switch_cap_bits { u8 reserved_at_20[0x7e0]; }; +struct mlx5_ifc_qos_cap_bits { + u8 packet_pacing[0x1]; + u8 reserved_0[0x1f]; + u8 reserved_1[0x20]; + u8 packet_pacing_max_rate[0x20]; + u8 packet_pacing_min_rate[0x20]; + u8 reserved_2[0x10]; + u8 packet_pacing_rate_table_size[0x10]; + u8 reserved_3[0x760]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -744,7 +762,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; - u8 reserved_at_182[0x4]; + u8 retransmission_q_counters[0x1]; + u8 reserved_at_183[0x3]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -771,7 +790,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_msg[0x5]; u8 reserved_at_1c8[0x4]; u8 max_tc[0x4]; - u8 reserved_at_1d0[0x6]; + u8 reserved_at_1d0[0x1]; + u8 dcbx[0x1]; + u8 reserved_at_1d2[0x4]; u8 rol_s[0x1]; u8 rol_g[0x1]; u8 reserved_at_1d8[0x1]; @@ -803,7 +824,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; - u8 reserved_at_21b[0x1]; + u8 qos[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; @@ -929,7 +950,15 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_compression_timeout[0x10]; u8 cqe_compression_max_num[0x10]; - u8 reserved_at_5e0[0x220]; + u8 reserved_at_5e0[0x10]; + u8 tag_matching[0x1]; + u8 rndv_offload_rc[0x1]; + u8 rndv_offload_dc[0x1]; + u8 log_tag_matching_list_sz[0x5]; + u8 reserved_at_5e8[0x3]; + u8 log_max_xrq[0x5]; + + u8 reserved_at_5f0[0x200]; }; enum mlx5_flow_destination_type { @@ -1967,7 +1996,7 @@ struct mlx5_ifc_qpc_bits { u8 reserved_at_560[0x5]; u8 rq_type[0x3]; - u8 srqn_rmpn[0x18]; + u8 srqn_rmpn_xrqn[0x18]; u8 reserved_at_580[0x8]; u8 rmsn[0x18]; @@ -2018,6 +2047,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; + struct mlx5_ifc_qos_cap_bits qos_cap; u8 reserved_at_0[0x8000]; }; @@ -2244,8 +2274,9 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_40[0x8]; u8 cqn[0x18]; - u8 reserved_at_60[0xa0]; + u8 reserved_at_60[0x90]; + u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; u8 reserved_at_110[0x10]; @@ -2593,7 +2624,7 @@ struct mlx5_ifc_dctc_bits { u8 reserved_at_98[0x8]; u8 reserved_at_a0[0x8]; - u8 srqn[0x18]; + u8 srqn_xrqn[0x18]; u8 reserved_at_c0[0x8]; u8 pd[0x18]; @@ -2645,6 +2676,7 @@ enum { enum { MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, + MLX5_CQ_PERIOD_NUM_MODES }; struct mlx5_ifc_cqc_bits { @@ -2722,6 +2754,54 @@ struct mlx5_ifc_query_adapter_param_block_bits { u8 vsd_contd_psid[16][0x8]; }; +enum { + MLX5_XRQC_STATE_GOOD = 0x0, + MLX5_XRQC_STATE_ERROR = 0x1, +}; + +enum { + MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0, + MLX5_XRQC_TOPOLOGY_TAG_MATCHING = 0x1, +}; + +enum { + MLX5_XRQC_OFFLOAD_RNDV = 0x1, +}; + +struct mlx5_ifc_tag_matching_topology_context_bits { + u8 log_matching_list_sz[0x4]; + u8 reserved_at_4[0xc]; + u8 append_next_index[0x10]; + + u8 sw_phase_cnt[0x10]; + u8 hw_phase_cnt[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_xrqc_bits { + u8 state[0x4]; + u8 rlkey[0x1]; + u8 reserved_at_5[0xf]; + u8 topology[0x4]; + u8 reserved_at_18[0x4]; + u8 offload[0x4]; + + u8 reserved_at_20[0x8]; + u8 user_index[0x18]; + + u8 reserved_at_40[0x8]; + u8 cqn[0x18]; + + u8 reserved_at_60[0xa0]; + + struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; + + u8 reserved_at_180[0x180]; + + struct mlx5_ifc_wq_bits wq; +}; + union mlx5_ifc_modify_field_select_resize_field_select_auto_bits { struct mlx5_ifc_modify_field_select_bits modify_field_select; struct mlx5_ifc_resize_field_select_bits resize_field_select; @@ -3144,6 +3224,30 @@ struct mlx5_ifc_rst2init_qp_in_bits { u8 reserved_at_800[0x80]; }; +struct mlx5_ifc_query_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_xrqc_bits xrq_context; +}; + +struct mlx5_ifc_query_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_query_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -3547,7 +3651,27 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 out_of_sequence[0x20]; - u8 reserved_at_1e0[0x620]; + u8 reserved_at_1e0[0x20]; + + u8 duplicate_request[0x20]; + + u8 reserved_at_220[0x20]; + + u8 rnr_nak_retry_err[0x20]; + + u8 reserved_at_260[0x20]; + + u8 packet_seq_err[0x20]; + + u8 reserved_at_2a0[0x20]; + + u8 implied_nak_seq_err[0x20]; + + u8 reserved_at_2e0[0x20]; + + u8 local_ack_timeout_err[0x20]; + + u8 reserved_at_320[0x4e0]; }; struct mlx5_ifc_query_q_counter_in_bits { @@ -4998,6 +5122,28 @@ struct mlx5_ifc_detach_from_mcg_in_bits { u8 multicast_gid[16][0x8]; }; +struct mlx5_ifc_destroy_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_destroy_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5583,6 +5729,30 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_create_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_create_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_xrqc_bits xrq_context; +}; + struct mlx5_ifc_create_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6124,6 +6294,29 @@ struct mlx5_ifc_attach_to_mcg_in_bits { u8 multicast_gid[16][0x8]; }; +struct mlx5_ifc_arm_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_arm_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x10]; + u8 lwm[0x10]; +}; + struct mlx5_ifc_arm_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6161,7 +6354,8 @@ struct mlx5_ifc_arm_rq_out_bits { }; enum { - MLX5_ARM_RQ_IN_OP_MOD_SRQ_ = 0x1, + MLX5_ARM_RQ_IN_OP_MOD_SRQ = 0x1, + MLX5_ARM_RQ_IN_OP_MOD_XRQ = 0x2, }; struct mlx5_ifc_arm_rq_in_bits { @@ -6354,6 +6548,30 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 vxlan_udp_port[0x10]; }; +struct mlx5_ifc_set_rate_limit_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_set_rate_limit_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 rate_limit_index[0x10]; + + u8 reserved_at_60[0x20]; + + u8 rate_limit[0x20]; +}; + struct mlx5_ifc_access_register_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6478,12 +6696,15 @@ struct mlx5_ifc_pude_reg_bits { }; struct mlx5_ifc_ptys_reg_bits { - u8 reserved_at_0[0x8]; + u8 an_disable_cap[0x1]; + u8 an_disable_admin[0x1]; + u8 reserved_at_2[0x6]; u8 local_port[0x8]; u8 reserved_at_10[0xd]; u8 proto_mask[0x3]; - u8 reserved_at_20[0x40]; + u8 an_status[0x4]; + u8 reserved_at_24[0x3c]; u8 eth_proto_capability[0x20]; @@ -7444,4 +7665,34 @@ struct mlx5_ifc_mcia_reg_bits { u8 dword_11[0x20]; }; +struct mlx5_ifc_dcbx_param_bits { + u8 dcbx_cee_cap[0x1]; + u8 dcbx_ieee_cap[0x1]; + u8 dcbx_standby_cap[0x1]; + u8 reserved_at_0[0x5]; + u8 port_number[0x8]; + u8 reserved_at_10[0xa]; + u8 max_application_table_size[6]; + u8 reserved_at_20[0x15]; + u8 version_oper[0x3]; + u8 reserved_at_38[5]; + u8 version_admin[0x3]; + u8 willing_admin[0x1]; + u8 reserved_at_41[0x3]; + u8 pfc_cap_oper[0x4]; + u8 reserved_at_48[0x4]; + u8 pfc_cap_admin[0x4]; + u8 reserved_at_50[0x4]; + u8 num_of_tc_oper[0x4]; + u8 reserved_at_58[0x4]; + u8 num_of_tc_admin[0x4]; + u8 remote_willing[0x1]; + u8 reserved_at_61[3]; + u8 remote_pfc_cap[4]; + u8 reserved_at_68[0x14]; + u8 remote_num_of_tc[0x4]; + u8 reserved_at_80[0x18]; + u8 error[0x8]; + u8 reserved_at_a0[0x160]; +}; #endif /* MLX5_IFC_H */ -- cgit From f2a4d086ed4c588d32fe9b7aa67fead7280e7bf1 Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 10 Jun 2016 11:49:33 -0700 Subject: openvswitch: Add packet truncation support. The patch adds a new OVS action, OVS_ACTION_ATTR_TRUNC, in order to truncate packets. A 'max_len' is added for setting up the maximum packet size, and a 'cutlen' field is to record the number of bytes to trim the packet when the packet is outputting to a port, or when the packet is sent to userspace. Signed-off-by: William Tu Cc: Pravin Shelar Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index bb0d515b7654..8274675ba9a3 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -580,6 +580,10 @@ enum ovs_userspace_attr { #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) +struct ovs_action_trunc { + uint32_t max_len; /* Max packet size in bytes. */ +}; + /** * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument. * @mpls_lse: MPLS label stack entry to push. @@ -703,6 +707,7 @@ enum ovs_nat_attr { * enum ovs_action_attr - Action types. * * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. + * @OVS_ACTION_ATTR_TRUNC: Output packet to port with truncated packet size. * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested * %OVS_USERSPACE_ATTR_* attributes. * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The @@ -756,6 +761,7 @@ enum ovs_action_attr { * The data must be zero for the unmasked * bits. */ OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */ + OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ -- cgit From a70b506efe899dc8d650eafcc0b11fc9ee746627 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Jun 2016 21:19:06 +0200 Subject: bpf: enforce recursion limit on redirects Respect the stack's xmit_recursion limit for calls into dev_queue_xmit(). Currently, they are not handeled by the limiter when attached to clsact's egress parent, for example, and a buggy program redirecting it to the same device again could run into stack overflow eventually. It would be good if we could notify an admin to give him a chance to react. We reuse xmit_recursion instead of having one private to eBPF, so that the stack's current recursion depth will be taken into account as well. Follow-up to commit 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") and 27b29f63058d ("bpf: add bpf_redirect() helper"). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4f234b102892..94eef356a65f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2389,6 +2389,8 @@ void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); DECLARE_PER_CPU(int, xmit_recursion); +#define XMIT_RECURSION_LIMIT 10 + static inline int dev_recursion_level(void) { return this_cpu_read(xmit_recursion); -- cgit From 678ece30226ac05e95768d5f70db53a475569d37 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 8 Jun 2016 16:09:17 +0300 Subject: virtio_net: add _UAPI prefix to virtio_net header guards This gives better namespacing and prevents conflicts with no-uapi version of virtio_net header that will be introduced in the following patch. Signed-off-by: Mike Rapoport Signed-off-by: David S. Miller --- include/uapi/linux/virtio_net.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 0da0e3a98f17..fc353b518288 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -1,5 +1,5 @@ -#ifndef _LINUX_VIRTIO_NET_H -#define _LINUX_VIRTIO_NET_H +#ifndef _UAPI_LINUX_VIRTIO_NET_H +#define _UAPI_LINUX_VIRTIO_NET_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * @@ -245,4 +245,4 @@ struct virtio_net_ctrl_mq { #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 -#endif /* _LINUX_VIRTIO_NET_H */ +#endif /* _UAPI_LINUX_VIRTIO_NET_H */ -- cgit From fd2a0437dc33b6425cabf74cc7fc7fdba6d5903b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 8 Jun 2016 16:09:18 +0300 Subject: virtio_net: introduce virtio_net_hdr_{from,to}_skb The code for conversion between virtio_net_hdr and skb GSO info is duplicated at several places. Let's put it to a common place to allow reuse. Signed-off-by: Mike Rapoport Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 101 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 include/linux/virtio_net.h (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h new file mode 100644 index 000000000000..1c912f85e041 --- /dev/null +++ b/include/linux/virtio_net.h @@ -0,0 +1,101 @@ +#ifndef _LINUX_VIRTIO_NET_H +#define _LINUX_VIRTIO_NET_H + +#include +#include + +static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian) +{ + unsigned short gso_type = 0; + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + gso_type = SKB_GSO_TCPV4; + break; + case VIRTIO_NET_HDR_GSO_TCPV6: + gso_type = SKB_GSO_TCPV6; + break; + case VIRTIO_NET_HDR_GSO_UDP: + gso_type = SKB_GSO_UDP; + break; + default: + return -EINVAL; + } + + if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + gso_type |= SKB_GSO_TCP_ECN; + + if (hdr->gso_size == 0) + return -EINVAL; + } + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start); + u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); + + if (!skb_partial_csum_set(skb, start, off)) + return -EINVAL; + } + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + + skb_shinfo(skb)->gso_size = gso_size; + skb_shinfo(skb)->gso_type = gso_type; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + return 0; +} + +static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool little_endian) +{ + memset(hdr, 0, sizeof(*hdr)); + + if (skb_is_gso(skb)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + /* This is a hint as to how much should be linear. */ + hdr->hdr_len = __cpu_to_virtio16(little_endian, + skb_headlen(skb)); + hdr->gso_size = __cpu_to_virtio16(little_endian, + sinfo->gso_size); + if (sinfo->gso_type & SKB_GSO_TCPV4) + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else if (sinfo->gso_type & SKB_GSO_TCPV6) + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; + else + return -EINVAL; + if (sinfo->gso_type & SKB_GSO_TCP_ECN) + hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; + } else + hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + if (skb_vlan_tag_present(skb)) + hdr->csum_start = __cpu_to_virtio16(little_endian, + skb_checksum_start_offset(skb) + VLAN_HLEN); + else + hdr->csum_start = __cpu_to_virtio16(little_endian, + skb_checksum_start_offset(skb)); + hdr->csum_offset = __cpu_to_virtio16(little_endian, + skb->csum_offset); + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; + } /* else everything is zero */ + + return 0; +} + +#endif /* _LINUX_VIRTIO_BYTEORDER */ -- cgit From 6f094b9ec680209c5b7314feee983b2f4c910b1b Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Wed, 8 Jun 2016 21:16:44 -0700 Subject: tcp: add in_flight to tcp_skb_cb Add in_flight (bytes in flight when packet was sent) field to tx component of tcp_skb_cb and make it available to congestion modules' pkts_acked() function through the ack_sample function argument. Signed-off-by: Lawrence Brakmo Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0bcc70f4e1fb..a79894b66726 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -767,6 +767,7 @@ struct tcp_skb_cb { union { struct { /* There is space for up to 20 bytes */ + __u32 in_flight;/* Bytes in flight when packet sent */ } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -859,6 +860,7 @@ union tcp_cc_info; struct ack_sample { u32 pkts_acked; s32 rtt_us; + u32 in_flight; }; struct tcp_congestion_ops { -- cgit From 002245cc6407c8ff65b8024554080eb6de1a8e2c Mon Sep 17 00:00:00 2001 From: Zi Shen Lim Date: Wed, 8 Jun 2016 21:18:47 -0700 Subject: bpf: fix missing header inclusion Commit 0fc174dea545 ("ebpf: make internal bpf API independent of CONFIG_BPF_SYSCALL ifdefs") introduced usage of ERR_PTR() in bpf_prog_get(), however did not include linux/err.h. Without this patch, when compiling arm64 BPF without CONFIG_BPF_SYSCALL: ... In file included from arch/arm64/net/bpf_jit_comp.c:21:0: include/linux/bpf.h: In function 'bpf_prog_get': include/linux/bpf.h:235:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration] return ERR_PTR(-EOPNOTSUPP); ^ include/linux/bpf.h:235:9: warning: return makes pointer from integer without a cast [-Wint-conversion] In file included from include/linux/rwsem.h:17:0, from include/linux/mm_types.h:10, from include/linux/sched.h:27, from arch/arm64/include/asm/compat.h:25, from arch/arm64/include/asm/stat.h:23, from include/linux/stat.h:5, from include/linux/compat.h:12, from include/linux/filter.h:10, from arch/arm64/net/bpf_jit_comp.c:22: include/linux/err.h: At top level: include/linux/err.h:23:35: error: conflicting types for 'ERR_PTR' static inline void * __must_check ERR_PTR(long error) ^ In file included from arch/arm64/net/bpf_jit_comp.c:21:0: include/linux/bpf.h:235:9: note: previous implicit declaration of 'ERR_PTR' was here return ERR_PTR(-EOPNOTSUPP); ^ ... Fixes: 0fc174dea545 ("ebpf: make internal bpf API independent of CONFIG_BPF_SYSCALL ifdefs") Suggested-by: Daniel Borkmann Signed-off-by: Zi Shen Lim Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8ee27b8afe81..1bcae82c6cb1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -11,6 +11,7 @@ #include #include #include +#include struct bpf_map; -- cgit From f20e6657a8758fe8d074889a6f1883674f01c7f2 Mon Sep 17 00:00:00 2001 From: Pramod Kumar Date: Fri, 10 Jun 2016 11:03:45 +0530 Subject: mdio: mux: Enhanced MDIO mux framework for integrated multiplexers An integrated multiplexer uses same address space for "muxed bus selection" and "generation of mdio transaction" hence its good to register parent bus from mux driver. Hence added a mechanism where mux driver could register a parent bus and pass it down to framework via mdio_mux_init api. Signed-off-by: Pramod Kumar Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio-mux.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mdio-mux.h b/include/linux/mdio-mux.h index a243dbba8659..61f5b21b31c7 100644 --- a/include/linux/mdio-mux.h +++ b/include/linux/mdio-mux.h @@ -10,11 +10,13 @@ #ifndef __LINUX_MDIO_MUX_H #define __LINUX_MDIO_MUX_H #include +#include int mdio_mux_init(struct device *dev, int (*switch_fn) (int cur, int desired, void *data), void **mux_handle, - void *data); + void *data, + struct mii_bus *mux_bus); void mdio_mux_uninit(void *mux_handle); -- cgit From 45f50bed1d808794e514e9eed0e579a8756ce2ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Jun 2016 16:41:39 -0700 Subject: net_sched: remove generic throttled management __QDISC_STATE_THROTTLED bit manipulation is rather expensive for HTB and few others. I already removed it for sch_fq in commit f2600cf02b5b ("net: sched: avoid costly atomic operation in fq_dequeue()") and so far nobody complained. When one ore more packets are stuck in one or more throttled HTB class, a htb dequeue() performs two atomic operations to clear/set __QDISC_STATE_THROTTLED bit, while root qdisc lock is held. Removing this pair of atomic operations bring me a 8 % performance increase on 200 TCP_RR tests, in presence of throttled classes. This patch has no side effect, since nothing actually uses disc_is_throttled() anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 4 ++-- include/net/sch_generic.h | 16 ---------------- 2 files changed, 2 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index fea53f4d92ca..7caa99b482c6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -67,12 +67,12 @@ struct qdisc_watchdog { }; void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle); +void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires); static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { - qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires), true); + qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires)); } void qdisc_watchdog_cancel(struct qdisc_watchdog *wd); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9f3581980c15..9a0d177884c6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -26,7 +26,6 @@ struct qdisc_rate_table { enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, - __QDISC_STATE_THROTTLED, }; struct qdisc_size_table { @@ -125,21 +124,6 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) #endif } -static inline bool qdisc_is_throttled(const struct Qdisc *qdisc) -{ - return test_bit(__QDISC_STATE_THROTTLED, &qdisc->state) ? true : false; -} - -static inline void qdisc_throttled(struct Qdisc *qdisc) -{ - set_bit(__QDISC_STATE_THROTTLED, &qdisc->state); -} - -static inline void qdisc_unthrottled(struct Qdisc *qdisc) -{ - clear_bit(__QDISC_STATE_THROTTLED, &qdisc->state); -} - struct Qdisc_class_ops { /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); -- cgit From 99860208bc62d8ebd5c57495b84856506fe075bc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 11 Jun 2016 12:46:04 +0200 Subject: sched: remove NET_XMIT_POLICED sch_atm returns this when TC_ACT_SHOT classification occurs. But all other schedulers that use tc_classify (htb, hfsc, drr, fq_codel ...) return NET_XMIT_SUCCESS | __BYPASS in this case so just do that in atm. BATMAN uses it as an intermediate return value to signal forwarding vs. buffering, but it did not return POLICED to callers outside of BATMAN. Reviewed-by: Sven Eckelmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94eef356a65f..d101e4d904ba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -90,7 +90,6 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, #define NET_XMIT_SUCCESS 0x00 #define NET_XMIT_DROP 0x01 /* skb dropped */ #define NET_XMIT_CN 0x02 /* congestion notification */ -#define NET_XMIT_POLICED 0x03 /* skb is shot by police */ #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It -- cgit From cd2a9e62c8a3c5cae7691982667d79a0edc65283 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 13 Jun 2016 13:44:17 -0700 Subject: net: l3mdev: Remove const from flowi6 arg to get_rt6_dst Allow drivers to pass flow arg to functions where the arg is not const and allow the driver to make updates as needed (eg., setting oif). Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 34f33eb96a5e..f8a416ec674c 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -38,7 +38,7 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, - const struct flowi6 *fl6); + struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -139,7 +139,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6); +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -225,7 +225,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6) +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) { return NULL; } -- cgit From 9ff74384600aeecba34ebdacbbde0627489ff601 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 13 Jun 2016 13:44:19 -0700 Subject: net: vrf: Handle ipv6 multicast and link-local addresses IPv6 multicast and link-local addresses require special handling by the VRF driver: 1. Rather than using the VRF device index and full FIB lookups, packets to/from these addresses should use direct FIB lookups based on the VRF device table. 2. fail sends/receives on a VRF device to/from a multicast address (e.g, make ping6 ff02::1% fail) 3. move the setting of the flow oif to the first dst lookup and revert the change in icmpv6_echo_reply made in ca254490c8dfd ("net: Add VRF support to IPv6 stack"). Linklocal/mcast addresses require use of the skb->dev. With this change connections into and out of a VRF enslaved device work for multicast and link-local addresses work (icmp, tcp, and udp) e.g., 1. packets into VM with VRF config: ping6 -c3 fe80::e0:f9ff:fe1c:b974%br1 ping6 -c3 ff02::1%br1 ssh -6 fe80::e0:f9ff:fe1c:b974%br1 2. packets going out a VRF enslaved device: ping6 -c3 fe80::18f8:83ff:fe4b:7a2e%eth1 ping6 -c3 ff02::1%eth1 ssh -6 root@fe80::18f8:83ff:fe4b:7a2e%eth1 Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 54c779416eec..f55bf3d294aa 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -76,6 +76,8 @@ static inline struct dst_entry *ip6_route_output(struct net *net, struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags); +struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, + int ifindex, struct flowi6 *fl6, int flags); int ip6_route_init(void); void ip6_route_cleanup(void); -- cgit From b2313077ed0db35ee186905d8076a737248edd24 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 13 Jun 2016 13:46:28 -0700 Subject: net_sched: make tcf_hash_check() boolean Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index db218a12efb5..fb82b5b5d9e7 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -155,8 +155,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tc_action *a); int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index); u32 tcf_hash_new_index(struct tc_action_net *tn); -int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a, - int bind); +bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a, + int bind); int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, bool cpustats); void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); -- cgit From 2e0ab8ca83c122f275b21ea917d52fee506910bf Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 13 Jun 2016 23:54:31 +0300 Subject: ptr_ring: array based FIFO for pointers A simple array based FIFO of pointers. Intended for net stack which commonly has a single consumer/producer. Signed-off-by: Michael S. Tsirkin Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 264 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 include/linux/ptr_ring.h (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h new file mode 100644 index 000000000000..633406f9af8e --- /dev/null +++ b/include/linux/ptr_ring.h @@ -0,0 +1,264 @@ +/* + * Definitions for the 'struct ptr_ring' datastructure. + * + * Author: + * Michael S. Tsirkin + * + * Copyright (C) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This is a limited-size FIFO maintaining pointers in FIFO order, with + * one CPU producing entries and another consuming entries from a FIFO. + * + * This implementation tries to minimize cache-contention when there is a + * single producer and a single consumer CPU. + */ + +#ifndef _LINUX_PTR_RING_H +#define _LINUX_PTR_RING_H 1 + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include +#include +#endif + +struct ptr_ring { + int producer ____cacheline_aligned_in_smp; + spinlock_t producer_lock; + int consumer ____cacheline_aligned_in_smp; + spinlock_t consumer_lock; + /* Shared consumer/producer data */ + /* Read-only by both the producer and the consumer */ + int size ____cacheline_aligned_in_smp; /* max entries in queue */ + void **queue; +}; + +/* Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). + * Callers don't need to take producer lock - if they don't + * the next call to __ptr_ring_produce may fail. + */ +static inline bool __ptr_ring_full(struct ptr_ring *r) +{ + return r->queue[r->producer]; +} + +static inline bool ptr_ring_full(struct ptr_ring *r) +{ + barrier(); + return __ptr_ring_full(r); +} + +/* Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). + */ +static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) +{ + if (__ptr_ring_full(r)) + return -ENOSPC; + + r->queue[r->producer++] = ptr; + if (unlikely(r->producer >= r->size)) + r->producer = 0; + return 0; +} + +static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) +{ + int ret; + + spin_lock(&r->producer_lock); + ret = __ptr_ring_produce(r, ptr); + spin_unlock(&r->producer_lock); + + return ret; +} + +static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr) +{ + int ret; + + spin_lock_irq(&r->producer_lock); + ret = __ptr_ring_produce(r, ptr); + spin_unlock_irq(&r->producer_lock); + + return ret; +} + +static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&r->producer_lock, flags); + ret = __ptr_ring_produce(r, ptr); + spin_unlock_irqrestore(&r->producer_lock, flags); + + return ret; +} + +static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) +{ + int ret; + + spin_lock_bh(&r->producer_lock); + ret = __ptr_ring_produce(r, ptr); + spin_unlock_bh(&r->producer_lock); + + return ret; +} + +/* Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). Callers must take consumer_lock + * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL. + * There's no need for a lock if pointer is merely tested - see e.g. + * ptr_ring_empty. + */ +static inline void *__ptr_ring_peek(struct ptr_ring *r) +{ + return r->queue[r->consumer]; +} + +static inline bool ptr_ring_empty(struct ptr_ring *r) +{ + barrier(); + return !__ptr_ring_peek(r); +} + +/* Must only be called after __ptr_ring_peek returned !NULL */ +static inline void __ptr_ring_discard_one(struct ptr_ring *r) +{ + r->queue[r->consumer++] = NULL; + if (unlikely(r->consumer >= r->size)) + r->consumer = 0; +} + +static inline void *__ptr_ring_consume(struct ptr_ring *r) +{ + void *ptr; + + ptr = __ptr_ring_peek(r); + if (ptr) + __ptr_ring_discard_one(r); + + return ptr; +} + +static inline void *ptr_ring_consume(struct ptr_ring *r) +{ + void *ptr; + + spin_lock(&r->consumer_lock); + ptr = __ptr_ring_consume(r); + spin_unlock(&r->consumer_lock); + + return ptr; +} + +static inline void *ptr_ring_consume_irq(struct ptr_ring *r) +{ + void *ptr; + + spin_lock_irq(&r->consumer_lock); + ptr = __ptr_ring_consume(r); + spin_unlock_irq(&r->consumer_lock); + + return ptr; +} + +static inline void *ptr_ring_consume_any(struct ptr_ring *r) +{ + unsigned long flags; + void *ptr; + + spin_lock_irqsave(&r->consumer_lock, flags); + ptr = __ptr_ring_consume(r); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ptr; +} + +static inline void *ptr_ring_consume_bh(struct ptr_ring *r) +{ + void *ptr; + + spin_lock_bh(&r->consumer_lock); + ptr = __ptr_ring_consume(r); + spin_unlock_bh(&r->consumer_lock); + + return ptr; +} + +/* Cast to structure type and call a function without discarding from FIFO. + * Function must return a value. + * Callers must take consumer_lock. + */ +#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r))) + +#define PTR_RING_PEEK_CALL(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + \ + spin_lock(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + \ + spin_lock_irq(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock_irq(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +#define PTR_RING_PEEK_CALL_BH(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + \ + spin_lock_bh(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock_bh(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + unsigned long __PTR_RING_PEEK_CALL_f;\ + \ + spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) +{ + r->queue = kzalloc(ALIGN(size * sizeof *(r->queue), SMP_CACHE_BYTES), + gfp); + if (!r->queue) + return -ENOMEM; + + r->size = size; + r->producer = r->consumer = 0; + spin_lock_init(&r->producer_lock); + spin_lock_init(&r->consumer_lock); + + return 0; +} + +static inline void ptr_ring_cleanup(struct ptr_ring *r) +{ + kfree(r->queue); +} + +#endif /* _LINUX_PTR_RING_H */ -- cgit From ad69f35d1dc0a79f86627ca56e01f86512602a49 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 13 Jun 2016 23:54:41 +0300 Subject: skb_array: array based FIFO for skbs A simple array based FIFO of pointers. Intended for net stack so uses skbs for type safety. Implemented as a set of wrappers around ptr_ring. Signed-off-by: Michael S. Tsirkin Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/skb_array.h | 144 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 include/linux/skb_array.h (limited to 'include') diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h new file mode 100644 index 000000000000..c4c090223333 --- /dev/null +++ b/include/linux/skb_array.h @@ -0,0 +1,144 @@ +/* + * Definitions for the 'struct skb_array' datastructure. + * + * Author: + * Michael S. Tsirkin + * + * Copyright (C) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * Limited-size FIFO of skbs. Can be used more or less whenever + * sk_buff_head can be used, except you need to know the queue size in + * advance. + * Implemented as a type-safe wrapper around ptr_ring. + */ + +#ifndef _LINUX_SKB_ARRAY_H +#define _LINUX_SKB_ARRAY_H 1 + +#ifdef __KERNEL__ +#include +#include +#include +#endif + +struct skb_array { + struct ptr_ring ring; +}; + +/* Might be slightly faster than skb_array_full below, but callers invoking + * this in a loop must use a compiler barrier, for example cpu_relax(). + */ +static inline bool __skb_array_full(struct skb_array *a) +{ + return __ptr_ring_full(&a->ring); +} + +static inline bool skb_array_full(struct skb_array *a) +{ + return ptr_ring_full(&a->ring); +} + +static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb) +{ + return ptr_ring_produce(&a->ring, skb); +} + +static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb) +{ + return ptr_ring_produce_irq(&a->ring, skb); +} + +static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb) +{ + return ptr_ring_produce_bh(&a->ring, skb); +} + +static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb) +{ + return ptr_ring_produce_any(&a->ring, skb); +} + +/* Might be slightly faster than skb_array_empty below, but callers invoking + * this in a loop must take care to use a compiler barrier, for example + * cpu_relax(). + */ +static inline bool __skb_array_empty(struct skb_array *a) +{ + return !__ptr_ring_peek(&a->ring); +} + +static inline bool skb_array_empty(struct skb_array *a) +{ + return ptr_ring_empty(&a->ring); +} + +static inline struct sk_buff *skb_array_consume(struct skb_array *a) +{ + return ptr_ring_consume(&a->ring); +} + +static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a) +{ + return ptr_ring_consume_irq(&a->ring); +} + +static inline struct sk_buff *skb_array_consume_any(struct skb_array *a) +{ + return ptr_ring_consume_any(&a->ring); +} + +static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a) +{ + return ptr_ring_consume_bh(&a->ring); +} + +static inline int __skb_array_len_with_tag(struct sk_buff *skb) +{ + if (likely(skb)) { + int len = skb->len; + + if (skb_vlan_tag_present(skb)) + len += VLAN_HLEN; + + return len; + } else { + return 0; + } +} + +static inline int skb_array_peek_len(struct skb_array *a) +{ + return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag); +} + +static inline int skb_array_peek_len_irq(struct skb_array *a) +{ + return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag); +} + +static inline int skb_array_peek_len_bh(struct skb_array *a) +{ + return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag); +} + +static inline int skb_array_peek_len_any(struct skb_array *a) +{ + return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag); +} + +static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp) +{ + return ptr_ring_init(&a->ring, size, gfp); +} + +static inline void skb_array_cleanup(struct skb_array *a) +{ + ptr_ring_cleanup(&a->ring); +} + +#endif /* _LINUX_SKB_ARRAY_H */ -- cgit From 5d49de532002f02755decd1758aac53063a68625 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 13 Jun 2016 23:54:45 +0300 Subject: ptr_ring: resize support This adds ring resize support. Seems to be necessary as users such as tun allow userspace control over queue size. If resize is used, this costs us ability to peek at queue without consumer lock - should not be a big deal as peek and consumer are usually run on the same CPU. If ring is made bigger, ring contents is preserved. If ring is made smaller, extra pointers are passed to an optional destructor callback. Cleanup function also gains destructor callback such that all pointers in queue can be cleaned up. This changes some APIs but we don't have any users yet, so it won't break bisect. Signed-off-by: Michael S. Tsirkin Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 157 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 143 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 633406f9af8e..562a65e8bcc0 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -43,9 +43,9 @@ struct ptr_ring { }; /* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). - * Callers don't need to take producer lock - if they don't - * the next call to __ptr_ring_produce may fail. + * for example cpu_relax(). If ring is ever resized, callers must hold + * producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold + * producer_lock, the next call to __ptr_ring_produce may fail. */ static inline bool __ptr_ring_full(struct ptr_ring *r) { @@ -54,16 +54,55 @@ static inline bool __ptr_ring_full(struct ptr_ring *r) static inline bool ptr_ring_full(struct ptr_ring *r) { - barrier(); - return __ptr_ring_full(r); + bool ret; + + spin_lock(&r->producer_lock); + ret = __ptr_ring_full(r); + spin_unlock(&r->producer_lock); + + return ret; +} + +static inline bool ptr_ring_full_irq(struct ptr_ring *r) +{ + bool ret; + + spin_lock_irq(&r->producer_lock); + ret = __ptr_ring_full(r); + spin_unlock_irq(&r->producer_lock); + + return ret; +} + +static inline bool ptr_ring_full_any(struct ptr_ring *r) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&r->producer_lock, flags); + ret = __ptr_ring_full(r); + spin_unlock_irqrestore(&r->producer_lock, flags); + + return ret; +} + +static inline bool ptr_ring_full_bh(struct ptr_ring *r) +{ + bool ret; + + spin_lock_bh(&r->producer_lock); + ret = __ptr_ring_full(r); + spin_unlock_bh(&r->producer_lock); + + return ret; } /* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). + * for example cpu_relax(). Callers must hold producer_lock. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { - if (__ptr_ring_full(r)) + if (r->queue[r->producer]) return -ENOSPC; r->queue[r->producer++] = ptr; @@ -120,20 +159,68 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must take consumer_lock * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL. - * There's no need for a lock if pointer is merely tested - see e.g. - * ptr_ring_empty. + * If ring is never resized, and if the pointer is merely + * tested, there's no need to take the lock - see e.g. __ptr_ring_empty. */ static inline void *__ptr_ring_peek(struct ptr_ring *r) { return r->queue[r->consumer]; } -static inline bool ptr_ring_empty(struct ptr_ring *r) +/* Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). Callers must take consumer_lock + * if the ring is ever resized - see e.g. ptr_ring_empty. + */ +static inline bool __ptr_ring_empty(struct ptr_ring *r) { - barrier(); return !__ptr_ring_peek(r); } +static inline bool ptr_ring_empty(struct ptr_ring *r) +{ + bool ret; + + spin_lock(&r->consumer_lock); + ret = __ptr_ring_empty(r); + spin_unlock(&r->consumer_lock); + + return ret; +} + +static inline bool ptr_ring_empty_irq(struct ptr_ring *r) +{ + bool ret; + + spin_lock_irq(&r->consumer_lock); + ret = __ptr_ring_empty(r); + spin_unlock_irq(&r->consumer_lock); + + return ret; +} + +static inline bool ptr_ring_empty_any(struct ptr_ring *r) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&r->consumer_lock, flags); + ret = __ptr_ring_empty(r); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ret; +} + +static inline bool ptr_ring_empty_bh(struct ptr_ring *r) +{ + bool ret; + + spin_lock_bh(&r->consumer_lock); + ret = __ptr_ring_empty(r); + spin_unlock_bh(&r->consumer_lock); + + return ret; +} + /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { @@ -241,10 +328,14 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r) __PTR_RING_PEEK_CALL_v; \ }) +static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) +{ + return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); +} + static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) { - r->queue = kzalloc(ALIGN(size * sizeof *(r->queue), SMP_CACHE_BYTES), - gfp); + r->queue = __ptr_ring_init_queue_alloc(size, gfp); if (!r->queue) return -ENOMEM; @@ -256,8 +347,46 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } -static inline void ptr_ring_cleanup(struct ptr_ring *r) +static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, + void (*destroy)(void *)) +{ + unsigned long flags; + int producer = 0; + void **queue = __ptr_ring_init_queue_alloc(size, gfp); + void **old; + void *ptr; + + if (!queue) + return -ENOMEM; + + spin_lock_irqsave(&(r)->producer_lock, flags); + + while ((ptr = ptr_ring_consume(r))) + if (producer < size) + queue[producer++] = ptr; + else if (destroy) + destroy(ptr); + + r->size = size; + r->producer = producer; + r->consumer = 0; + old = r->queue; + r->queue = queue; + + spin_unlock_irqrestore(&(r)->producer_lock, flags); + + kfree(old); + + return 0; +} + +static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { + void *ptr; + + if (destroy) + while ((ptr = ptr_ring_consume(r))) + destroy(ptr); kfree(r->queue); } -- cgit From 7d7072e3bad5569f636d3c54a36da40976bfd505 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 13 Jun 2016 23:54:50 +0300 Subject: skb_array: resize support Update skb_array after ptr_ring API changes. Signed-off-by: Michael S. Tsirkin Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/skb_array.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index c4c090223333..678bfbf78ac4 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -63,9 +63,9 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb return ptr_ring_produce_any(&a->ring, skb); } -/* Might be slightly faster than skb_array_empty below, but callers invoking - * this in a loop must take care to use a compiler barrier, for example - * cpu_relax(). +/* Might be slightly faster than skb_array_empty below, but only safe if the + * array is never resized. Also, callers invoking this in a loop must take care + * to use a compiler barrier, for example cpu_relax(). */ static inline bool __skb_array_empty(struct skb_array *a) { @@ -77,6 +77,21 @@ static inline bool skb_array_empty(struct skb_array *a) return ptr_ring_empty(&a->ring); } +static inline bool skb_array_empty_bh(struct skb_array *a) +{ + return ptr_ring_empty_bh(&a->ring); +} + +static inline bool skb_array_empty_irq(struct skb_array *a) +{ + return ptr_ring_empty_irq(&a->ring); +} + +static inline bool skb_array_empty_any(struct skb_array *a) +{ + return ptr_ring_empty_any(&a->ring); +} + static inline struct sk_buff *skb_array_consume(struct skb_array *a) { return ptr_ring_consume(&a->ring); @@ -136,9 +151,19 @@ static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp) return ptr_ring_init(&a->ring, size, gfp); } +void __skb_array_destroy_skb(void *ptr) +{ + kfree_skb(ptr); +} + +int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) +{ + return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); +} + static inline void skb_array_cleanup(struct skb_array *a) { - ptr_ring_cleanup(&a->ring); + ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb); } #endif /* _LINUX_SKB_ARRAY_H */ -- cgit From 1b5c5493e3e68181be344cb51bf9df192d05ffc2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:50 -0700 Subject: net_sched: add the ability to defer skb freeing qdisc are changed under RTNL protection and often while blocking BH and root qdisc spinlock. When lots of skbs need to be dropped, we free them under these locks causing TX/RX freezes, and more generally latency spikes. This commit adds rtnl_kfree_skbs(), used to queue skbs for deferred freeing. Actual freeing happens right after RTNL is released, with appropriate scheduling points. rtnl_qdisc_drop() can also be used in place of disc_drop() when RTNL is held. qdisc_reset_queue() and __qdisc_reset_queue() get the new behavior, so standard qdiscs like pfifo, pfifo_fast... have their ->reset() method automatically handled. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++-- include/net/sch_generic.h | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c006cc900c44..2daece8979f7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -89,8 +89,9 @@ void net_inc_egress_queue(void); void net_dec_egress_queue(void); #endif -extern void rtnetlink_init(void); -extern void __rtnl_unlock(void); +void rtnetlink_init(void); +void __rtnl_unlock(void); +void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); #define ASSERT_RTNL() do { \ if (unlikely(!rtnl_is_locked())) { \ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9a0d177884c6..4f7cee8344c4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -683,19 +683,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct Qdisc *sch, - struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct sk_buff_head *list) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - __skb_queue_purge(list); + if (!skb_queue_empty(list)) { + rtnl_kfree_skbs(list->next, list->prev); + __skb_queue_head_init(list); + } } static inline void qdisc_reset_queue(struct Qdisc *sch) { - __qdisc_reset_queue(sch, &sch->q); + __qdisc_reset_queue(&sch->q); sch->qstats.backlog = 0; } @@ -716,6 +718,12 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, return old; } +static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) +{ + rtnl_kfree_skbs(skb, skb); + qdisc_qstats_drop(sch); +} + static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) { kfree_skb(skb); -- cgit From 8626a0c83b0d471d859bcd908d016874df951fc3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:16 +0200 Subject: 6lowpan: add private neighbour data This patch will introduce a 6lowpan neighbour private data. Like the interface private data we handle private data for generic 6lowpan and for link-layer specific 6lowpan. The current first use case if to save the short address for a 802.15.4 6lowpan neighbour. Cc: David S. Miller Reviewed-by: Stefan Schmidt Acked-by: YOSHIFUJI Hideaki Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- include/net/6lowpan.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d101e4d904ba..36e43bd422f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,8 +1483,7 @@ enum netdev_priv_flags { * @perm_addr: Permanent hw address * @addr_assign_type: Hw address assignment type * @addr_len: Hardware address length - * @neigh_priv_len; Used in neigh_alloc(), - * initialized only in atm/clip.c + * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address * @dev_port: Used to differentiate devices that share diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index da84cf920b78..2d9b9d39221e 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -141,6 +141,16 @@ struct lowpan_dev { u8 priv[0] __aligned(sizeof(void *)); }; +struct lowpan_802154_neigh { + __le16 short_addr; +}; + +static inline +struct lowpan_802154_neigh *lowpan_802154_neigh(void *neigh_priv) +{ + return neigh_priv; +} + static inline struct lowpan_dev *lowpan_dev(const struct net_device *dev) { -- cgit From 2ad3ed59198c5404c34515cfcfd9a2b3c54d964f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:17 +0200 Subject: 6lowpan: add 802.15.4 short addr slaac This patch adds the autoconfiguration if a valid 802.15.4 short address is available for 802.15.4 6LoWPAN interfaces. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Acked-by: Hannes Frederic Sowa Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/6lowpan.h | 6 ++++++ include/net/addrconf.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index 2d9b9d39221e..5ab4c9901ccc 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -254,6 +254,12 @@ static inline bool lowpan_fetch_skb(struct sk_buff *skb, void *data, return false; } +static inline bool lowpan_802154_is_valid_src_short_addr(__le16 addr) +{ + /* First bit of addr is multicast, reserved or 802.15.4 specific */ + return !(addr & cpu_to_le16(0x8000)); +} + static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data, const size_t len) { diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 730d856683e5..b1774eb03f37 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -94,6 +94,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); +void addrconf_add_linklocal(struct inet6_dev *idev, + const struct in6_addr *addr, u32 flags); + static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->addr_len != ETH_ALEN) -- cgit From 1e82f961ac8e94c50a933e89ee08071fc81a4bbd Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:19 +0200 Subject: ndisc: add __ndisc_opt_addr_space function This patch adds __ndisc_opt_addr_space as low-level function for ndisc_opt_addr_space which doesn't depend on net_device parameter. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Acked-by: YOSHIFUJI Hideaki Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/ndisc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 2d8edaad29cb..4cee82654fe5 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -127,10 +127,15 @@ static inline int ndisc_addr_option_pad(unsigned short type) } } +static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad) +{ + return NDISC_OPT_SPACE(addr_len + pad); +} + static inline int ndisc_opt_addr_space(struct net_device *dev) { - return NDISC_OPT_SPACE(dev->addr_len + - ndisc_addr_option_pad(dev->type)); + return __ndisc_opt_addr_space(dev->addr_len, + ndisc_addr_option_pad(dev->type)); } static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, -- cgit From 4f36ce84c54c971cd67c882035d9bde7b1a2ee53 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:20 +0200 Subject: ndisc: add __ndisc_opt_addr_data function This patch adds __ndisc_opt_addr_data as low-level function for ndisc_opt_addr_data which doesn't depend on net_device parameter. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Acked-by: YOSHIFUJI Hideaki Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/ndisc.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 4cee82654fe5..c8962adf24d0 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -138,17 +138,23 @@ static inline int ndisc_opt_addr_space(struct net_device *dev) ndisc_addr_option_pad(dev->type)); } -static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, - struct net_device *dev) +static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p, + unsigned char addr_len, int prepad) { u8 *lladdr = (u8 *)(p + 1); int lladdrlen = p->nd_opt_len << 3; - int prepad = ndisc_addr_option_pad(dev->type); - if (lladdrlen != ndisc_opt_addr_space(dev)) + if (lladdrlen != __ndisc_opt_addr_space(addr_len, prepad)) return NULL; return lladdr + prepad; } +static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, + struct net_device *dev) +{ + return __ndisc_opt_addr_data(p, dev->addr_len, + ndisc_addr_option_pad(dev->type)); +} + static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { const u32 *p32 = pkey; -- cgit From f997c55c1dc8841b3ee4df0493d0ac7966d42165 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:23 +0200 Subject: ipv6: introduce neighbour discovery ops This patch introduces neighbour discovery ops callback structure. The idea is to separate the handling for 6LoWPAN into the 6lowpan module. These callback offers 6lowpan different handling, such as 802.15.4 short address handling or RFC6775 (Neighbor Discovery Optimization for IPv6 over 6LoWPANs). Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Acked-by: YOSHIFUJI Hideaki Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++ include/net/ndisc.h | 197 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 199 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 36e43bd422f8..890158e99159 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,6 +1456,8 @@ enum netdev_priv_flags { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations + * @ndisc_ops: Includes callbacks for different IPv6 neighbour + * discovery handling. Necessary for e.g. 6LoWPAN. * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * @@ -1672,6 +1674,9 @@ struct net_device { #ifdef CONFIG_NET_L3_MASTER_DEV const struct l3mdev_ops *l3mdev_ops; #endif +#if IS_ENABLED(CONFIG_IPV6) + const struct ndisc_ops *ndisc_ops; +#endif const struct header_ops *header_ops; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index c8962adf24d0..a5e276703cb3 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -58,6 +58,7 @@ struct inet6_dev; struct net_device; struct net_proto_family; struct sk_buff; +struct prefix_info; extern struct neigh_table nd_tbl; @@ -110,9 +111,182 @@ struct ndisc_options { #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) -struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, +struct ndisc_options *ndisc_parse_options(const struct net_device *dev, + u8 *opt, int opt_len, struct ndisc_options *ndopts); +#define NDISC_OPS_REDIRECT_DATA_SPACE 2 + +/* + * This structure defines the hooks for IPv6 neighbour discovery. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. + * + * int (*is_useropt)(u8 nd_opt_type): + * This function is called when IPv6 decide RA userspace options. if + * this function returns 1 then the option given by nd_opt_type will + * be handled as userspace option additional to the IPv6 options. + * + * int (*parse_options)(const struct net_device *dev, + * struct nd_opt_hdr *nd_opt, + * struct ndisc_options *ndopts): + * This function is called while parsing ndisc ops and put each position + * as pointer into ndopts. If this function return unequal 0, then this + * function took care about the ndisc option, if 0 then the IPv6 ndisc + * option parser will take care about that option. + * + * void (*update)(const struct net_device *dev, struct neighbour *n, + * u32 flags, u8 icmp6_type, + * const struct ndisc_options *ndopts): + * This function is called when IPv6 ndisc updates the neighbour cache + * entry. Additional options which can be updated may be previously + * parsed by parse_opts callback and accessible over ndopts parameter. + * + * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, + * struct neighbour *neigh, u8 *ha_buf, + * u8 **ha): + * This function is called when the necessary option space will be + * calculated before allocating a skb. The parameters neigh, ha_buf + * abd ha are available on NDISC_REDIRECT messages only. + * + * void (*fill_addr_option)(const struct net_device *dev, + * struct sk_buff *skb, u8 icmp6_type, + * const u8 *ha): + * This function is called when the skb will finally fill the option + * fields inside skb. NOTE: this callback should fill the option + * fields to the skb which are previously indicated by opt_space + * parameter. That means the decision to add such option should + * not lost between these two callbacks, e.g. protected by interface + * up state. + * + * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, + * const struct prefix_info *pinfo, + * struct inet6_dev *in6_dev, + * struct in6_addr *addr, + * int addr_type, u32 addr_flags, + * bool sllao, bool tokenized, + * __u32 valid_lft, u32 prefered_lft, + * bool dev_addr_generated): + * This function is called when a RA messages is received with valid + * PIO option fields and an IPv6 address will be added to the interface + * for autoconfiguration. The parameter dev_addr_generated reports about + * if the address was based on dev->dev_addr or not. This can be used + * to add a second address if link-layer operates with two link layer + * addresses. E.g. 802.15.4 6LoWPAN. + */ +struct ndisc_ops { + int (*is_useropt)(u8 nd_opt_type); + int (*parse_options)(const struct net_device *dev, + struct nd_opt_hdr *nd_opt, + struct ndisc_options *ndopts); + void (*update)(const struct net_device *dev, struct neighbour *n, + u32 flags, u8 icmp6_type, + const struct ndisc_options *ndopts); + int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, + struct neighbour *neigh, u8 *ha_buf, + u8 **ha); + void (*fill_addr_option)(const struct net_device *dev, + struct sk_buff *skb, u8 icmp6_type, + const u8 *ha); + void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + struct in6_addr *addr, + int addr_type, u32 addr_flags, + bool sllao, bool tokenized, + __u32 valid_lft, u32 prefered_lft, + bool dev_addr_generated); +}; + +#if IS_ENABLED(CONFIG_IPV6) +static inline int ndisc_ops_is_useropt(const struct net_device *dev, + u8 nd_opt_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->is_useropt) + return dev->ndisc_ops->is_useropt(nd_opt_type); + else + return 0; +} + +static inline int ndisc_ops_parse_options(const struct net_device *dev, + struct nd_opt_hdr *nd_opt, + struct ndisc_options *ndopts) +{ + if (dev->ndisc_ops && dev->ndisc_ops->parse_options) + return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts); + else + return 0; +} + +static inline void ndisc_ops_update(const struct net_device *dev, + struct neighbour *n, u32 flags, + u8 icmp6_type, + const struct ndisc_options *ndopts) +{ + if (dev->ndisc_ops && dev->ndisc_ops->update) + dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts); +} + +static inline int ndisc_ops_opt_addr_space(const struct net_device *dev, + u8 icmp6_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space && + icmp6_type != NDISC_REDIRECT) + return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL, + NULL, NULL); + else + return 0; +} + +static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev, + struct neighbour *neigh, + u8 *ha_buf, u8 **ha) +{ + if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space) + return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT, + neigh, ha_buf, ha); + else + return 0; +} + +static inline void ndisc_ops_fill_addr_option(const struct net_device *dev, + struct sk_buff *skb, + u8 icmp6_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option && + icmp6_type != NDISC_REDIRECT) + dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL); +} + +static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev, + struct sk_buff *skb, + const u8 *ha) +{ + if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option) + dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha); +} + +static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net, + struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + struct in6_addr *addr, + int addr_type, u32 addr_flags, + bool sllao, bool tokenized, + __u32 valid_lft, + u32 prefered_lft, + bool dev_addr_generated) +{ + if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr) + dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev, + addr, addr_type, + addr_flags, sllao, + tokenized, valid_lft, + prefered_lft, + dev_addr_generated); +} +#endif + /* * Return the padding between the option length and the start of the * link addr. Currently only IP-over-InfiniBand needs this, although @@ -132,11 +306,25 @@ static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad) return NDISC_OPT_SPACE(addr_len + pad); } -static inline int ndisc_opt_addr_space(struct net_device *dev) +#if IS_ENABLED(CONFIG_IPV6) +static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type) +{ + return __ndisc_opt_addr_space(dev->addr_len, + ndisc_addr_option_pad(dev->type)) + + ndisc_ops_opt_addr_space(dev, icmp6_type); +} + +static inline int ndisc_redirect_opt_addr_space(struct net_device *dev, + struct neighbour *neigh, + u8 *ops_data_buf, + u8 **ops_data) { return __ndisc_opt_addr_space(dev->addr_len, - ndisc_addr_option_pad(dev->type)); + ndisc_addr_option_pad(dev->type)) + + ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf, + ops_data); } +#endif static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p, unsigned char addr_len, int prepad) @@ -205,6 +393,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target); int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir); +void ndisc_update(const struct net_device *dev, struct neighbour *neigh, + const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, + struct ndisc_options *ndopts); /* * IGMP -- cgit From cc84b3c6b48ae81748c5e25d3558872385196162 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:24 +0200 Subject: ipv6: export several functions This patch exports some neighbour discovery functions which can be used by 6lowpan neighbour discovery ops functionality then. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Acked-by: YOSHIFUJI Hideaki Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/addrconf.h | 7 +++++++ include/net/ndisc.h | 12 ++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b1774eb03f37..9826d3a9464c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -97,6 +97,13 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr, u32 flags); +int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + const struct in6_addr *addr, int addr_type, + u32 addr_flags, bool sllao, bool tokenized, + __u32 valid_lft, u32 prefered_lft); + static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->addr_len != ETH_ALEN) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index a5e276703cb3..3f0f41ddbeb0 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -53,6 +53,15 @@ enum { #include +/* Set to 3 to get tracing... */ +#define ND_DEBUG 1 + +#define ND_PRINTK(val, level, fmt, ...) \ +do { \ + if (val <= ND_DEBUG) \ + net_##level##_ratelimited(fmt, ##__VA_ARGS__); \ +} while (0) + struct ctl_table; struct inet6_dev; struct net_device; @@ -115,6 +124,9 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, u8 *opt, int opt_len, struct ndisc_options *ndopts); +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, + int data_len, int pad); + #define NDISC_OPS_REDIRECT_DATA_SPACE 2 /* -- cgit From bbe5f5cefe2818eda0392c178de141ffc5734d90 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:25 +0200 Subject: 6lowpan: introduce 6lowpan-nd This patch introduce different 6lowpan handling for receive and transmit NS/NA messages for the ipv6 neighbour discovery. The first use-case is for supporting 802.15.4 short addresses inside the option fields and handling for RFC6775 6CO option field as userspace option. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Reviewed-by: Stefan Schmidt Acked-by: YOSHIFUJI Hideaki Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/ndisc.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 3f0f41ddbeb0..be1fe2283254 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -35,6 +35,7 @@ enum { ND_OPT_ROUTE_INFO = 24, /* RFC4191 */ ND_OPT_RDNSS = 25, /* RFC5006 */ ND_OPT_DNSSL = 31, /* RFC6106 */ + ND_OPT_6CO = 34, /* RFC6775 */ __ND_OPT_MAX }; @@ -109,14 +110,19 @@ struct ndisc_options { #endif struct nd_opt_hdr *nd_useropts; struct nd_opt_hdr *nd_useropts_end; +#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) + struct nd_opt_hdr *nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR + 1]; +#endif }; -#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] -#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] -#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] -#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] -#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] -#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] +#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] +#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] +#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] +#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] +#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] +#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] +#define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR] +#define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR] #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) -- cgit From 22a59be8b7693eb2d0897a9638f5991f2f8e4ddd Mon Sep 17 00:00:00 2001 From: Philip Prindeville Date: Tue, 14 Jun 2016 15:53:02 -0600 Subject: net: ipv4: Add ability to have GRE ignore DF bit in IPv4 payloads In the presence of firewalls which improperly block ICMP Unreachable (including Fragmentation Required) messages, Path MTU Discovery is prevented from working. A workaround is to handle IPv4 payloads opaquely, ignoring the DF bit--as is done for other payloads like AppleTalk--and doing transparent fragmentation and reassembly. Redux includes the enforcement of mutual exclusion between this feature and Path MTU Discovery as suggested by Alexander Duyck. Cc: Alexander Duyck Reviewed-by: Stephen Hemminger Signed-off-by: Philip Prindeville Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + include/uapi/linux/if_tunnel.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index dbf444428437..9222678426a1 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -132,6 +132,7 @@ struct ip_tunnel { int ip_tnl_net_id; struct gro_cells gro_cells; bool collect_md; + bool ignore_df; }; #define TUNNEL_CSUM __cpu_to_be16(0x01) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index af4de90ba27d..1046f5515174 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -113,6 +113,7 @@ enum { IFLA_GRE_ENCAP_SPORT, IFLA_GRE_ENCAP_DPORT, IFLA_GRE_COLLECT_METADATA, + IFLA_GRE_IGNORE_DF, __IFLA_GRE_MAX, }; -- cgit From 61d1b6a42fec61c5065f54cc62cef02b483c69fb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Jun 2016 22:47:12 +0200 Subject: bpf, maps: add release callback Add a release callback for maps that is invoked when the last reference to its struct file is gone and the struct file about to be released by vfs. The handler will be used by fd array maps. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1bcae82c6cb1..29b5a1ae22cb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -19,7 +19,8 @@ struct bpf_map; struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ struct bpf_map *(*map_alloc)(union bpf_attr *attr); - void (*map_free)(struct bpf_map *); + void (*map_release)(struct bpf_map *map, struct file *map_file); + void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); /* funcs callable from userspace and from eBPF programs */ -- cgit From d056a788765e67773124f520159185bc89f5d1ad Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Jun 2016 22:47:13 +0200 Subject: bpf, maps: extend map_fd_get_ptr arguments This patch extends map_fd_get_ptr() callback that is used by fd array maps, so that struct file pointer from the related map can be passed in. It's safe to remove map_update_elem() callback for the two maps since this is only allowed from syscall side, but not from eBPF programs for these two map types. Like in per-cpu map case, bpf_fd_array_map_update_elem() needs to be called directly here due to the extra argument. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 29b5a1ae22cb..d7b43e73fe87 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -29,8 +29,9 @@ struct bpf_map_ops { int (*map_delete_elem)(struct bpf_map *map, void *key); /* funcs called by prog_array and perf_event_array map */ - void *(*map_fd_get_ptr) (struct bpf_map *map, int fd); - void (*map_fd_put_ptr) (void *ptr); + void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, + int fd); + void (*map_fd_put_ptr)(void *ptr); }; struct bpf_map { @@ -169,7 +170,7 @@ struct bpf_array { u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -void bpf_fd_array_map_clear(struct bpf_map *map); + bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -207,8 +208,13 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 flags); + int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); +int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, + void *key, void *value, u64 map_flags); +void bpf_fd_array_map_clear(struct bpf_map *map); + /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. * Best-effort only. No barriers here, since it _will_ race with concurrent -- cgit From 3b1efb196eee45b2f0c4994e0c43edb5e367f620 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Jun 2016 22:47:14 +0200 Subject: bpf, maps: flush own entries on perf map release The behavior of perf event arrays are quite different from all others as they are tightly coupled to perf event fds, f.e. shown recently by commit e03e7ee34fdd ("perf/bpf: Convert perf_event_array to use struct file") to make refcounting on perf event more robust. A remaining issue that the current code still has is that since additions to the perf event array take a reference on the struct file via perf_event_get() and are only released via fput() (that cleans up the perf event eventually via perf_event_release_kernel()) when the element is either manually removed from the map from user space or automatically when the last reference on the perf event map is dropped. However, this leads us to dangling struct file's when the map gets pinned after the application owning the perf event descriptor exits, and since the struct file reference will in such case only be manually dropped or via pinned file removal, it leads to the perf event living longer than necessary, consuming needlessly resources for that time. Relations between perf event fds and bpf perf event map fds can be rather complex. F.e. maps can act as demuxers among different perf event fds that can possibly be owned by different threads and based on the index selection from the program, events get dispatched to one of the per-cpu fd endpoints. One perf event fd (or, rather a per-cpu set of them) can also live in multiple perf event maps at the same time, listening for events. Also, another requirement is that perf event fds can get closed from application side after they have been attached to the perf event map, so that on exit perf event map will take care of dropping their references eventually. Likewise, when such maps are pinned, the intended behavior is that a user application does bpf_obj_get(), puts its fds in there and on exit when fd is released, they are dropped from the map again, so the map acts rather as connector endpoint. This also makes perf event maps inherently different from program arrays as described in more detail in commit c9da161c6517 ("bpf: fix clearing on persistent program array maps"). To tackle this, map entries are marked by the map struct file that added the element to the map. And when the last reference to that map struct file is released from user space, then the tracked entries are purged from the map. This is okay, because new map struct files instances resp. frontends to the anon inode are provided via bpf_map_new_fd() that is called when we invoke bpf_obj_get_user() for retrieving a pinned map, but also when an initial instance is created via map_create(). The rest is resolved by the vfs layer automatically for us by keeping reference count on the map's struct file. Any concurrent updates on the map slot are fine as well, it just means that perf_event_fd_array_release() needs to delete less of its own entires. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d7b43e73fe87..9adfef694a25 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -13,6 +13,7 @@ #include #include +struct perf_event; struct bpf_map; /* map is generic key/value storage optionally accesible by eBPF programs */ @@ -166,8 +167,16 @@ struct bpf_array { void __percpu *pptrs[0] __aligned(8); }; }; + #define MAX_TAIL_CALL_CNT 32 +struct bpf_event_entry { + struct perf_event *event; + struct file *perf_file; + struct file *map_file; + struct rcu_head rcu; +}; + u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -- cgit From 5fb384b066d7c320832c4541658e5c655c590ac5 Mon Sep 17 00:00:00 2001 From: Fabien Siron Date: Wed, 15 Jun 2016 15:37:49 +0000 Subject: netlink: Add comment to warn about deprecated netlink rings attribute request Signed-off-by: Fabien Siron Signed-off-by: David S. Miller --- include/uapi/linux/netlink_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index d79399394b46..76b4d87c83a8 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -49,6 +49,7 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ #ifndef __KERNEL__ +/* deprecated since 4.6 */ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ #endif -- cgit From cecbc5563a02289164fa6379130243cbe08b2dd6 Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 15 Jun 2016 11:32:21 -0700 Subject: net: stmmac: allow to split suspend/resume from init/exit callbacks Let the stmmac platform drivers provide dedicated suspend and resume callbacks rather than always re-using the init and exits callbacks. If the driver does not provide the suspend or resume callback, we fall back to the old behavior trying to use exit or init. This allows a specific platform to perform only a partial power-down on suspend if Wake-on-Lan is enabled but always perform the full shutdown sequence if the module is unloaded. Signed-off-by: Vincent Palatin Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index ffdaca9c01af..0507dbfbf63c 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -135,6 +135,8 @@ struct plat_stmmacenet_data { void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); + void (*suspend)(struct platform_device *pdev, void *priv); + void (*resume)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; int has_gmac4; -- cgit From 6f3b911d5f29b98752e5da86a295210c0c4f4e14 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 17 Jun 2016 15:35:27 +0200 Subject: can: bcm: add support for CAN FD frames The programming API of the CAN_BCM depends on struct can_frame which is given as array directly behind the bcm_msg_head structure. To follow this schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head flags indicates that the concatenated CAN frame structures behind the bcm_msg_head are defined as struct canfd_frame. This patch adds the support to handle CAN and CAN FD frames on a per BCM-op base. Main changes: - generally use struct canfd_frames instead if struct can_frames - use canfd_frame.flags instead of can_frame.can_dlc for private BCM flags - make all CAN frame sizes depending on the new CAN_FD_FRAME flags - separate between CAN and CAN FD when sending/receiving frames Due to the dependence of the CAN_FD_FRAME flag the former binary interface for classic CAN frames remains stable. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/bcm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h index 7a291dc1ff15..cefb304414ba 100644 --- a/include/uapi/linux/can/bcm.h +++ b/include/uapi/linux/can/bcm.h @@ -99,5 +99,6 @@ enum { #define RX_ANNOUNCE_RESUME 0x0100 #define TX_RESET_MULTI_IDX 0x0200 #define RX_RTR_FRAME 0x0400 +#define CAN_FD_FRAME 0x0800 #endif /* !_UAPI_CAN_BCM_H */ -- cgit From 86a98057256020e75e1be0f88d7617491a06e8f1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 16 Jun 2016 12:20:44 -0700 Subject: vxlan/geneve: Include udp_tunnel.h in vxlan/geneve.h and fixup includes This patch makes it so that we add udp_tunnel.h to vxlan.h and geneve.h header files. This is useful as I plan to move the generic handlers for the port offloads into the udp_tunnel header file and leave the vxlan and geneve headers to be a bit more protocol specific. I also went through and cleaned out a number of redundant includes that where in the .h and .c files for these drivers. Signed-off-by: Alexander Duyck Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/geneve.h | 3 --- include/net/udp_tunnel.h | 2 ++ include/net/vxlan.h | 6 +----- 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/geneve.h b/include/net/geneve.h index cb544a530146..f8aff18d6702 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -1,10 +1,7 @@ #ifndef __NET_GENEVE_H #define __NET_GENEVE_H 1 -#ifdef CONFIG_INET #include -#endif - /* Geneve Header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 9d14f707e534..59019562d14c 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -105,12 +105,14 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); +#ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; return iptunnel_handle_offloads(skb, type); } +#endif static inline void udp_tunnel_encap_enable(struct socket *sock) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b8803165df91..7d944941f32f 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -1,12 +1,8 @@ #ifndef __NET_VXLAN_H #define __NET_VXLAN_H 1 -#include -#include #include -#include -#include -#include +#include #include /* VXLAN protocol (RFC 7348) header: -- cgit From e7b3db5e60e8f471c3f5ef93b497bafe5863e56a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 16 Jun 2016 12:20:52 -0700 Subject: net: Combine GENEVE and VXLAN port notifiers into single functions This patch merges the GENEVE and VXLAN code so that both functions pass through a shared code path. This way we can start the effort of using a single function on the network device drivers to handle both of these tunnel types. Signed-off-by: Alexander Duyck Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/udp_tunnel.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 59019562d14c..71afbea873a0 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -84,6 +84,39 @@ struct udp_tunnel_sock_cfg { void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); +/* -- List of parsable UDP tunnel types -- + * + * Adding to this list will result in serious debate. The main issue is + * that this list is essentially a list of workarounds for either poorly + * designed tunnels, or poorly designed device offloads. + * + * The parsing supported via these types should really be used for Rx + * traffic only as the network stack will have already inserted offsets for + * the location of the headers in the skb. In addition any ports that are + * pushed should be kept within the namespace without leaking to other + * devices such as VFs or other ports on the same device. + * + * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the + * need to use this for Rx checksum offload. It should not be necessary to + * call this function to perform Tx offloads on outgoing traffic. + */ +enum udp_parsable_tunnel_type { + UDP_TUNNEL_TYPE_VXLAN, /* RFC 7348 */ + UDP_TUNNEL_TYPE_GENEVE, /* draft-ietf-nvo3-geneve */ +}; + +struct udp_tunnel_info { + unsigned short type; + sa_family_t sa_family; + __be16 port; +}; + +/* Notify network devices of offloadable types */ +void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type); +void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); +void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, -- cgit From 7c46a640de6fcc4f35d0702710356a024eadf68f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 16 Jun 2016 12:21:00 -0700 Subject: net: Merge VXLAN and GENEVE push notifiers into a single notifier This patch merges the notifiers for VXLAN and GENEVE into a single UDP tunnel notifier. The idea is that we will want to only have to make one notifier call to receive the list of ports for VXLAN and GENEVE tunnels that need to be offloaded. In addition we add a new set of ndo functions named ndo_udp_tunnel_add and ndo_udp_tunnel_del that are meant to allow us to track the tunnel meta-data such as port and address family as tunnels are added and removed. The tunnel meta-data is now transported in a structure named udp_tunnel_info which for now carries the type, address family, and port number. In the future this could be updated so that we can include a tuple of values including things such as the destination IP address and other fields. I also ended up going with a naming scheme that consisted of using the prefix udp_tunnel on function names. I applied this to the notifier and ndo ops as well so that it hopefully points to the fact that these are primarily used in the udp_tunnel functions. Signed-off-by: Alexander Duyck Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 ++++++++++++++++++++-- include/net/geneve.h | 3 +-- include/net/udp_tunnel.h | 6 ++++++ include/net/vxlan.h | 4 ++-- 4 files changed, 29 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 890158e99159..577d2a1814b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -61,6 +61,8 @@ struct wireless_dev; /* 802.15.4 specific */ struct wpan_dev; struct mpls_dev; +/* UDP Tunnel offloads */ +struct udp_tunnel_info; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1050,6 +1052,19 @@ struct tc_to_netdev { * address family that vxlan is not listening to anymore. The operation * is protected by the vxlan_net->sock_lock. * + * void (*ndo_udp_tunnel_add)(struct net_device *dev, + * struct udp_tunnel_info *ti); + * Called by UDP tunnel to notify a driver about the UDP port and socket + * address family that a UDP tunnel is listnening to. It is called only + * when a new port starts listening. The operation is protected by the + * RTNL. + * + * void (*ndo_udp_tunnel_del)(struct net_device *dev, + * struct udp_tunnel_info *ti); + * Called by UDP tunnel to notify the driver about a UDP port and socket + * address family that the UDP tunnel is not listening to anymore. The + * operation is protected by the RTNL. + * * void* (*ndo_dfwd_add_station)(struct net_device *pdev, * struct net_device *dev) * Called by upper layer devices to accelerate switching or other @@ -1269,6 +1284,10 @@ struct net_device_ops { void (*ndo_del_geneve_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); + void (*ndo_udp_tunnel_add)(struct net_device *dev, + struct udp_tunnel_info *ti); + void (*ndo_udp_tunnel_del)(struct net_device *dev, + struct udp_tunnel_info *ti); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void (*ndo_dfwd_del_station)(struct net_device *pdev, @@ -2255,8 +2274,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B -#define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C -#define NETDEV_OFFLOAD_PUSH_GENEVE 0x001D +#define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); diff --git a/include/net/geneve.h b/include/net/geneve.h index f8aff18d6702..3410c4b5a382 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -61,8 +61,7 @@ struct genevehdr { static inline void geneve_get_rx_port(struct net_device *netdev) { - ASSERT_RTNL(); - call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_GENEVE, netdev); + udp_tunnel_get_rx_info(netdev); } #ifdef CONFIG_INET diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 71afbea873a0..1c9408a04213 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -117,6 +117,12 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); +static inline void udp_tunnel_get_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); +} + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 7d944941f32f..c62e2ed1c3af 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -4,6 +4,7 @@ #include #include #include +#include /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -390,8 +391,7 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) static inline void vxlan_get_rx_port(struct net_device *netdev) { - ASSERT_RTNL(); - call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_VXLAN, netdev); + udp_tunnel_get_rx_info(netdev); } static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) -- cgit From 1938ee1fd3de74d761a60806b048df652666afec Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 16 Jun 2016 12:23:12 -0700 Subject: net: Remove deprecated tunnel specific UDP offload functions Now that we have all the drivers using udp_tunnel_get_rx_ports, ndo_add_udp_enc_rx_port, and ndo_del_udp_enc_rx_port we can drop the function calls that were specific to VXLAN and GENEVE. Signed-off-by: Alexander Duyck Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 38 -------------------------------------- include/net/geneve.h | 5 ----- include/net/vxlan.h | 5 ----- 3 files changed, 48 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 577d2a1814b1..e84d9d23c2d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1026,32 +1026,6 @@ struct tc_to_netdev { * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * - * void (*ndo_add_vxlan_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by vxlan to notify a driver about the UDP port and socket - * address family that vxlan is listening to. It is called only when - * a new port starts listening. The operation is protected by the - * vxlan_net->sock_lock. - * - * void (*ndo_add_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by geneve to notify a driver about the UDP port and socket - * address family that geneve is listnening to. It is called only when - * a new port starts listening. The operation is protected by the - * geneve_net->sock_lock. - * - * void (*ndo_del_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by geneve to notify the driver about a UDP port and socket - * address family that geneve is not listening to anymore. The operation - * is protected by the geneve_net->sock_lock. - * - * void (*ndo_del_vxlan_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by vxlan to notify the driver about a UDP port and socket - * address family that vxlan is not listening to anymore. The operation - * is protected by the vxlan_net->sock_lock. - * * void (*ndo_udp_tunnel_add)(struct net_device *dev, * struct udp_tunnel_info *ti); * Called by UDP tunnel to notify a driver about the UDP port and socket @@ -1272,18 +1246,6 @@ struct net_device_ops { struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); - void (*ndo_add_vxlan_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); - void (*ndo_del_vxlan_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); - void (*ndo_add_geneve_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); - void (*ndo_del_geneve_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); void (*ndo_udp_tunnel_add)(struct net_device *dev, struct udp_tunnel_info *ti); void (*ndo_udp_tunnel_del)(struct net_device *dev, diff --git a/include/net/geneve.h b/include/net/geneve.h index 3410c4b5a382..ec0327d4331b 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -59,11 +59,6 @@ struct genevehdr { struct geneve_opt options[]; }; -static inline void geneve_get_rx_port(struct net_device *netdev) -{ - udp_tunnel_get_rx_info(netdev); -} - #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index c62e2ed1c3af..b96d0360c095 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -389,11 +389,6 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) return vni_field; } -static inline void vxlan_get_rx_port(struct net_device *netdev) -{ - udp_tunnel_get_rx_info(netdev); -} - static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { return vs->sock->sk->sk_family; -- cgit From b9adcd69bd7b41625201686b4cfec7ff13357afc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 16 Jun 2016 12:23:19 -0700 Subject: vxlan: Add new UDP encapsulation offload type for VXLAN-GPE The fact is VXLAN with Generic Protocol Extensions cannot be supported by the same hardware parsers that support VXLAN. The protocol extensions allow for things like a Next Protocol field which in turn allows for things other than Ethernet to be passed over the tunnel. Most existing parsers will not know how to interpret this. To resolve this I am giving VXLAN-GPE its own UDP encapsulation offload type. This way hardware that does support GPE can simply add this type to the switch statement for VXLAN, and if they don't support it then this will fix any issues where headers might be interpreted incorrectly. Signed-off-by: Alexander Duyck Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/udp_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 1c9408a04213..02c5be037451 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -103,6 +103,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, enum udp_parsable_tunnel_type { UDP_TUNNEL_TYPE_VXLAN, /* RFC 7348 */ UDP_TUNNEL_TYPE_GENEVE, /* draft-ietf-nvo3-geneve */ + UDP_TUNNEL_TYPE_VXLAN_GPE, /* draft-ietf-nvo3-vxlan-gpe */ }; struct udp_tunnel_info { -- cgit From a2e2ff560f5113e8ca31432fbd985f5f1889efdc Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 16 Jun 2016 16:24:24 -0700 Subject: net: ipv6: Move ip6_route_get_saddr to inline VRF driver needs access to ip6_route_get_saddr code. Since it does little beyond ipv6_dev_get_saddr and ipv6_dev_get_saddr is already exported for modules move ip6_route_get_saddr to the header as an inline. Code move only; no functional change. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f55bf3d294aa..d97305d0e71f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -18,6 +18,7 @@ struct route_info { __u8 prefix[0]; /* 0,8 or 16 */ }; +#include #include #include #include @@ -88,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); -int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, - const struct in6_addr *daddr, unsigned int prefs, - struct in6_addr *saddr); +static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr) +{ + struct inet6_dev *idev = + rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; + int err = 0; + + if (rt && rt->rt6i_prefsrc.plen) + *saddr = rt->rt6i_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, + daddr, prefs, saddr); + + return err; +} struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); -- cgit From 0d240e7811c4ec1965760ee4643b5bbc9cfacbb3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 16 Jun 2016 16:24:25 -0700 Subject: net: vrf: Implement get_saddr for IPv6 IPv6 source address selection needs to consider the real egress route. Similar to IPv4 implement a get_saddr6 method which is called if source address has not been set. The get_saddr6 method does a full lookup which means pulling a route from the VRF FIB table and properly considering linklocal/multicast destination addresses. Lookup failures (eg., unreachable) then cause the source address selection to fail which gets propagated back to the caller. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index f8a416ec674c..818fd4f100fc 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -39,6 +39,9 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, struct flowi6 *fl6); + int (*l3mdev_get_saddr6)(struct net_device *dev, + const struct sock *sk, + struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -140,6 +143,8 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); +int l3mdev_get_saddr6(struct net *net, const struct sock *sk, + struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -230,6 +235,12 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) return NULL; } +static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, + struct flowi6 *fl6) +{ + return 0; +} + static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { -- cgit From afbac6010aec514998214fb19a1f37732b7a1d77 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 16 Jun 2016 16:24:26 -0700 Subject: net: ipv6: Address selection needs to consider L3 domains IPv6 version of 3f2fb9a834cb ("net: l3mdev: address selection should only consider devices in L3 domain") and the follow up commit, a17b693cdd876 ("net: l3mdev: prefer VRF master for source address selection"). That is, if outbound device is given then the address preference order is an address from that device, an address from the master device if it is enslaved, and then an address from a device in the same L3 domain. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 818fd4f100fc..e90095091aa0 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -79,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) return rc; } +static inline +const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +{ + /* netdev_master_upper_dev_get_rcu calls + * list_first_or_null_rcu to walk the upper dev list. + * list_first_or_null_rcu does not handle a const arg. We aren't + * making changes, just want the master device from that list so + * typecast to remove the const + */ + struct net_device *dev = (struct net_device *)_dev; + const struct net_device *master; + + if (!dev) + return NULL; + + if (netif_is_l3_master(dev)) + master = dev; + else if (netif_is_l3_slave(dev)) + master = netdev_master_upper_dev_get_rcu(dev); + else + master = NULL; + + return master; +} + /* get index of an interface to use for FIB lookups. For devices * enslaved to an L3 master device FIB lookups are based on the * master index @@ -190,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) return 0; } +static inline +const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +{ + return NULL; +} + static inline int l3mdev_fib_oif_rcu(struct net_device *dev) { return dev ? dev->ifindex : 0; -- cgit From b1cadc1a0949c82ff7fcb15603e3caf2d32ff9f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:02 -0700 Subject: ipv6: icmp: add a force_saddr param to icmp6_send() SIT or GRE tunnels might want to translate an IPV4 address into a v4mapped one when translating ICMP to ICMPv6. This patch adds the parameter to icmp6_send() but does not change icmpv6_send() signature. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 630f45335c73..432611a297fb 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -14,7 +14,8 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); -typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info); +typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); -- cgit From 5fbba8ac9358f1e796c8aedcccc3487364643723 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:03 -0700 Subject: ip6: move ipip6_err_gen_icmpv6_unreach() We want to use this helper from GRE as well, so this is the time to move it in net/ipv6/icmp.c Also add a @nhs parameter, since SIT and GRE have different values for the header(s) to skip. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 432611a297fb..9796481edbdb 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -18,6 +18,7 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs); #else -- cgit From 2d7a3b276be2d032a6c1a48ced87a474327ee3d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:04 -0700 Subject: ipv6: translate ICMP_TIME_EXCEEDED to ICMPV6_TIME_EXCEED For better traceroute/mtr support for SIT and GRE tunnels, we translate IPV4 ICMP ICMP_TIME_EXCEEDED to ICMPV6_TIME_EXCEED We also have to translate the IPv4 source IP address of ICMP message to IPv6 v4mapped. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 9796481edbdb..97ae98071a03 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -18,7 +18,7 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); -int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs); +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type); #else -- cgit From 9b8c6d7bf2e08a7d3eb6660a2bfaf29b8b49c329 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:05 -0700 Subject: gre: better support for ICMP messages for gre+ipv6 ipgre_err() can call ip6_err_gen_icmpv6_unreach() for proper support of ipv4+gre+icmp+ipv6+... frames, used for example by traceroute/mtr. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9222678426a1..a5e7035fb93f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -157,6 +157,7 @@ struct tnl_ptk_info { __be16 proto; __be32 key; __be32 seq; + int hdr_len; }; #define PACKET_RCVD 0 -- cgit From 20e1954fe238dbe5f8d3a979e593fe352bd703cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2016 21:52:06 -0700 Subject: ipv6: RFC 4884 partial support for SIT/GRE tunnels When receiving an ICMPv4 message containing extensions as defined in RFC 4884, and translating it to ICMPv6 at SIT or GRE tunnel, we need some extra manipulation in order to properly forward the extensions. This patch only takes care of Time Exceeded messages as they are the ones that typically carry information from various routers in a fabric during a traceroute session. It also avoids complex skb logic if the data_len is not a multiple of 8. RFC states : The "original datagram" field MUST contain at least 128 octets. If the original datagram did not contain 128 octets, the "original datagram" field MUST be zero padded to 128 octets. In practice routers use 128 bytes of original datagram, not more. Initial translation was added in commit ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error") Signed-off-by: Eric Dumazet Cc: Oussama Ghorbel Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 3 ++- include/uapi/linux/icmp.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 97ae98071a03..57086e9fc64c 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -18,7 +18,8 @@ typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); -int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type); +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, + unsigned int data_len); #else diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index 16fff055f734..fddd9d736284 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -79,6 +79,7 @@ struct icmphdr { __be16 __unused; __be16 mtu; } frag; + __u8 reserved[4]; } un; }; -- cgit From cc8feb8edd92d854be552fe4f5e0eeabca40b9ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 4 Apr 2016 14:00:37 +0100 Subject: rxrpc: Fix exclusive connection handling "Exclusive connections" are meant to be used for a single client call and then scrapped. The idea is to limit the use of the negotiated security context. The current code, however, isn't doing this: it is instead restricting the socket to a single virtual connection and doing all the calls over that. This is changed such that the socket no longer maintains a special virtual connection over which it will do all the calls, but rather gets a new one each time a new exclusive call is made. Further, using a socket option for this is a poor choice. It should be done on sendmsg with a control message marker instead so that calls can be marked exclusive individually. To that end, add RXRPC_EXCLUSIVE_CALL which, if passed to sendmsg() as a control message element, will cause the call to be done on an single-use connection. The socket option (RXRPC_EXCLUSIVE_CONNECTION) still exists and, if set, will override any lack of RXRPC_EXCLUSIVE_CALL being specified so that programs using the setsockopt() will appear to work the same. Signed-off-by: David Howells --- include/linux/rxrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h index 1e8f216e2cf1..c68307bc306f 100644 --- a/include/linux/rxrpc.h +++ b/include/linux/rxrpc.h @@ -35,7 +35,7 @@ struct sockaddr_rxrpc { */ #define RXRPC_SECURITY_KEY 1 /* [clnt] set client security key */ #define RXRPC_SECURITY_KEYRING 2 /* [srvr] set ring of server security keys */ -#define RXRPC_EXCLUSIVE_CONNECTION 3 /* [clnt] use exclusive RxRPC connection */ +#define RXRPC_EXCLUSIVE_CONNECTION 3 /* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */ #define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ /* @@ -52,6 +52,7 @@ struct sockaddr_rxrpc { #define RXRPC_LOCAL_ERROR 7 /* -r: local error generated [terminal] */ #define RXRPC_NEW_CALL 8 /* -r: [Service] new incoming call notification */ #define RXRPC_ACCEPT 9 /* s-: [Service] accept request */ +#define RXRPC_EXCLUSIVE_CALL 10 /* s-: Call should be on exclusive connection */ /* * RxRPC security levels -- cgit From b95e5928fcc76d156352570858abdea7b2628efd Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 20 Jun 2016 07:26:17 -0700 Subject: openvswitch: Add packet len info to upcall. The commit f2a4d086ed4c ("openvswitch: Add packet truncation support.") introduces packet truncation before sending to userspace upcall receiver. This patch passes up the skb->len before truncation so that the upcall receiver knows the original packet size. Potentially this will be used by sFlow, where OVS translates sFlow config header=N to a sample action, truncating packet to N byte in kernel datapath. Thus, only N bytes instead of full-packet size is copied from kernel to userspace, saving the kernel-to-userspace bandwidth. Signed-off-by: William Tu Cc: Pravin Shelar Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8274675ba9a3..d95a3018f6a1 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -166,6 +166,7 @@ enum ovs_packet_cmd { * output port is actually a tunnel port. Contains the output tunnel key * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes. * @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and + * @OVS_PACKET_ATTR_LEN: Packet size before truncation. * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment * size. * @@ -185,6 +186,7 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe, error logging should be suppressed. */ OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ + OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ __OVS_PACKET_ATTR_MAX }; -- cgit From 506e65df52f2bf250aa9b4264efd180d1646bdec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 10 Jun 2016 23:09:01 +0200 Subject: netfilter: make comparision helpers stub functions in ZONES=n case Those comparisions are useless in case of ZONES=n; all conntracks will reside in the same zone by definition. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_zones.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 4e32512cef32..bd4692690914 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -68,22 +68,34 @@ static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, enum ip_conntrack_dir dir) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_matches_dir(zone, dir) ? zone->id : NF_CT_DEFAULT_ZONE_ID; +#else + return NF_CT_DEFAULT_ZONE_ID; +#endif } static inline bool nf_ct_zone_equal(const struct nf_conn *a, const struct nf_conntrack_zone *b, enum ip_conntrack_dir dir) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_id(nf_ct_zone(a), dir) == nf_ct_zone_id(b, dir); +#else + return true; +#endif } static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, const struct nf_conntrack_zone *b) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone(a)->id == b->id; +#else + return true; +#endif } #endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */ #endif /* _NF_CONNTRACK_ZONES_H */ -- cgit From 6c8dee9842461e6ee6eb46081478999b3d5cb297 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 11 Jun 2016 21:57:35 +0200 Subject: netfilter: move zone info into struct nf_conn Curently we store zone information as a conntrack extension. This has one drawback: for every lookup we need to fetch the zone data from the extension area. This change place the zone data directly into the main conntrack object structure and then removes the zone conntrack extension. The zone data is just 4 bytes, it fits into a padding hole before the tuplehash info, so we do not even increase the nf_conn structure size. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ include/net/netfilter/nf_conntrack_extend.h | 4 ---- include/net/netfilter/nf_conntrack_zones.h | 33 ++++++++++------------------- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index dd78bea227c8..9c0ed3d7af89 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -85,6 +85,9 @@ struct nf_conn { spinlock_t lock; u16 cpu; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 55d15049ab2f..b925395fa5ed 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -15,9 +15,6 @@ enum nf_ct_ext_id { #ifdef CONFIG_NF_CONNTRACK_EVENTS NF_CT_EXT_ECACHE, #endif -#ifdef CONFIG_NF_CONNTRACK_ZONES - NF_CT_EXT_ZONE, -#endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP NF_CT_EXT_TSTAMP, #endif @@ -38,7 +35,6 @@ enum nf_ct_ext_id { #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache -#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index bd4692690914..64a718b60839 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -9,12 +9,11 @@ static inline const struct nf_conntrack_zone * nf_ct_zone(const struct nf_conn *ct) { - const struct nf_conntrack_zone *nf_ct_zone = NULL; - #ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + return &ct->zone; +#else + return &nf_ct_zone_dflt; #endif - return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt; } static inline const struct nf_conntrack_zone * @@ -31,32 +30,22 @@ static inline const struct nf_conntrack_zone * nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, struct nf_conntrack_zone *tmp) { - const struct nf_conntrack_zone *zone; - +#ifdef CONFIG_NF_CONNTRACK_ZONES if (!tmpl) return &nf_ct_zone_dflt; - zone = nf_ct_zone(tmpl); - if (zone->flags & NF_CT_FLAG_MARK) - zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); - - return zone; + if (tmpl->zone.flags & NF_CT_FLAG_MARK) + return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0); +#endif + return nf_ct_zone(tmpl); } -static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, - const struct nf_conntrack_zone *info) +static inline void nf_ct_zone_add(struct nf_conn *ct, + const struct nf_conntrack_zone *zone) { #ifdef CONFIG_NF_CONNTRACK_ZONES - struct nf_conntrack_zone *nf_ct_zone; - - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); - if (!nf_ct_zone) - return -ENOMEM; - - nf_ct_zone_init(nf_ct_zone, info->id, info->dir, - info->flags); + ct->zone = *zone; #endif - return 0; } static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, -- cgit From d1bd330a229fc8a69f0e7532138dfd42b4542fd4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 21 Jun 2016 01:17:17 +0100 Subject: of_mdio: Enable fixed PHY support if driver is a module The fixed_phy driver doesn't have to be built-in, and it's important that of_mdio supports it even if it's a module. Signed-off-by: Ben Hutchings Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 8f2237eb3485..6c8cb9aa4c00 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -69,7 +69,7 @@ static inline int of_mdio_parse_addr(struct device *dev, } #endif /* CONFIG_OF */ -#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) +#if defined(CONFIG_OF) && IS_ENABLED(CONFIG_FIXED_PHY) extern int of_phy_register_fixed_link(struct device_node *np); extern bool of_phy_is_fixed_link(struct device_node *np); #else -- cgit From af7d5185263133f859dd4f35d45594deef9db854 Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Tue, 21 Jun 2016 12:43:59 +0300 Subject: net/mlx4_en: Add DCB PFC support through CEE netlink commands This patch adds support for reading and updating priority flow control (PFC) attributes in the driver via netlink. Signed-off-by: Rana Shahout Signed-off-by: Eran Ben Elisha Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 80dec87a94f8..4dbc1450bbe0 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -535,6 +535,7 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; + int max_tc_eth; u32 *qp0_qkey; u32 *qp0_proxy; u32 *qp1_proxy; @@ -1494,6 +1495,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, u16 offset, u16 size, u8 *data); +int mlx4_max_tc(struct mlx4_dev *dev); /* Returns true if running in low memory profile (kdump kernel) */ static inline bool mlx4_low_memory_profile(void) -- cgit From 722003ac40c2c397bd5bc2b714125bc82ab27043 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 21 Jun 2016 09:36:21 -0400 Subject: qed: Add support for coalescing config read/update. This patch adds support for configuring the device tx/rx coalescing timeout values in the order of micro seconds. It also adds APIs for upper layer drivers for reading/updating the coalescing values. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e1d5122e8a96..b1e3c57c7117 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -488,6 +488,30 @@ struct qed_common_ops { void (*chain_free)(struct qed_dev *cdev, struct qed_chain *p_chain); +/** + * @brief get_coalesce - Get coalesce parameters in usec + * + * @param cdev + * @param rx_coal - Rx coalesce value in usec + * @param tx_coal - Tx coalesce value in usec + * + */ + void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal); + +/** + * @brief set_coalesce - Configure Rx coalesce value in usec + * + * @param cdev + * @param rx_coal - Rx coalesce value in usec + * @param tx_coal - Tx coalesce value in usec + * @param qid - Queue index + * @param sb_id - Status Block Id + * + * @return 0 on success, error otherwise. + */ + int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, + u8 qid, u16 sb_id); + /** * @brief set_led - Configure LED mode * -- cgit From 7643507fe8b5bd8ab7522f6a81058cc1209d2585 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Tue, 21 Jun 2016 14:58:46 -0400 Subject: netfilter: xt_NFLOG: nflog-range does not truncate packets li->u.ulog.copy_len is currently ignored by the kernel, we should truncate the packet to either li->u.ulog.copy_len (if set) or copy_range before sending it to userspace. 0 is a valid input for copy_len, so add a new flag to indicate whether this was option was specified by the user or not. Add two flags to indicate whether nflog-size/copy_len was set or not. XT_NFLOG_F_COPY_LEN is for XT_NFLOG and NFLOG_F_COPY_LEN for nfnetlink_log On the userspace side, this was initially represented by the option nflog-range, this will be replaced by --nflog-size now. --nflog-range would still exist but does not do anything. Reported-by: Joe Dollard Reviewed-by: Josh Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 7 +++++++ include/uapi/linux/netfilter/xt_NFLOG.h | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 57639fca223a..83d855ba6af1 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -12,6 +12,9 @@ #define NF_LOG_UID 0x08 /* Log UID owning local socket */ #define NF_LOG_MASK 0x0f +/* This flag indicates that copy_len field in nf_loginfo is set */ +#define NF_LOG_F_COPY_LEN 0x1 + enum nf_log_type { NF_LOG_TYPE_LOG = 0, NF_LOG_TYPE_ULOG, @@ -22,9 +25,13 @@ struct nf_loginfo { u_int8_t type; union { struct { + /* copy_len will be used iff you set + * NF_LOG_F_COPY_LEN in flags + */ u_int32_t copy_len; u_int16_t group; u_int16_t qthreshold; + u_int16_t flags; } ulog; struct { u_int8_t level; diff --git a/include/uapi/linux/netfilter/xt_NFLOG.h b/include/uapi/linux/netfilter/xt_NFLOG.h index 87b58311ce6b..f33070730fc8 100644 --- a/include/uapi/linux/netfilter/xt_NFLOG.h +++ b/include/uapi/linux/netfilter/xt_NFLOG.h @@ -6,9 +6,13 @@ #define XT_NFLOG_DEFAULT_GROUP 0x1 #define XT_NFLOG_DEFAULT_THRESHOLD 0 -#define XT_NFLOG_MASK 0x0 +#define XT_NFLOG_MASK 0x1 + +/* This flag indicates that 'len' field in xt_nflog_info is set*/ +#define XT_NFLOG_F_COPY_LEN 0x1 struct xt_nflog_info { + /* 'len' will be used iff you set XT_NFLOG_F_COPY_LEN in flags */ __u32 len; __u16 group; __u16 threshold; -- cgit From 889f7ee7c6e84251215d43cbc856ea116c72d3f2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 12 Jun 2016 18:07:07 +0200 Subject: netfilter: nf_tables: add generic macros to check for generation mask Thus, we can reuse these to check the genmask of any object type, not only rules. This is required now that tables, chain and sets will get a generation mask field too in follow up patches. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 092235458691..d0778cbaf7f1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -969,6 +969,30 @@ static inline u8 nft_genmask_cur(const struct net *net) #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) +/* + * Generic transaction helpers + */ + +/* Check if this object is currently active. */ +#define nft_is_active(__net, __obj) \ + (((__obj)->genmask & nft_genmask_cur(__net)) == 0) + +/* Check if this object is active in the next generation. */ +#define nft_is_active_next(__net, __obj) \ + (((__obj)->genmask & nft_genmask_next(__net)) == 0) + +/* This object becomes active in the next generation. */ +#define nft_activate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_cur(__net) + +/* This object becomes inactive in the next generation. */ +#define nft_deactivate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_next(__net) + +/* After committing the ruleset, clear the stale generation bit. */ +#define nft_clear(__net, __obj) \ + (__obj)->genmask &= ~nft_genmask_next(__net) + /* * Set element transaction helpers */ -- cgit From f2a6d766765d2794e26e25655d4ffcfe29c3ec2f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Jun 2016 17:29:18 +0200 Subject: netfilter: nf_tables: add generation mask to tables This patch addresses two problems: 1) The netlink dump is inconsistent when interfering with an ongoing transaction update for several reasons: 1.a) We don't honor the internal NFT_TABLE_INACTIVE flag, and we should be skipping these inactive objects in the dump. 1.b) We perform speculative deletion during the preparation phase, that may result in skipping active objects. 1.c) The listing order changes, which generates noise when tracking incremental ruleset update via tools like git or our own testsuite. 2) We don't allow to add and to update the object in the same batch, eg. add table x; add table x { flags dormant\; }. In order to resolve these problems: 1) If the user requests a deletion, the object becomes inactive in the next generation. Then, ignore objects that scheduled to be deleted from the lookup path, as they will be effectively removed in the next generation. 2) From the get/dump path, if the object is not currently active, we skip it. 3) Support 'add X -> update X' sequence from a transaction. After this update, we obtain a consistent list as long as we stay in the same generation. The userspace side can detect interferences through the generation counter so it can restart the dumping. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d0778cbaf7f1..05c9a64b39aa 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -838,6 +838,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) + * @genmask: generation mask * @name: name of the table */ struct nft_table { @@ -846,7 +847,8 @@ struct nft_table { struct list_head sets; u64 hgenerator; u32 use; - u16 flags; + u16 flags:14, + genmask:2; char name[NFT_TABLE_MAXNAMELEN]; }; @@ -992,6 +994,8 @@ static inline u8 nft_genmask_cur(const struct net *net) /* After committing the ruleset, clear the stale generation bit. */ #define nft_clear(__net, __obj) \ (__obj)->genmask &= ~nft_genmask_next(__net) +#define nft_active_genmask(__obj, __genmask) \ + !((__obj)->genmask & __genmask) /* * Set element transaction helpers -- cgit From 664b0f8cd8c66d02d14168ee7ac6a957cc88177f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 12 Jun 2016 19:21:31 +0200 Subject: netfilter: nf_tables: add generation mask to chains Similar to ("netfilter: nf_tables: add generation mask to tables"). Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 05c9a64b39aa..b023e287ea92 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -732,7 +732,6 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, - NFT_CHAIN_INACTIVE = 0x2, }; /** @@ -754,7 +753,8 @@ struct nft_chain { u64 handle; u32 use; u16 level; - u8 flags; + u8 flags:6, + genmask:2; char name[NFT_CHAIN_MAXNAMELEN]; }; -- cgit From 37a9cc52552579f22e18cca401cfc4351b6cbc72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 12 Jun 2016 22:52:45 +0200 Subject: netfilter: nf_tables: add generation mask to sets Similar to ("netfilter: nf_tables: add generation mask to tables"). Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b023e287ea92..07a5ba47cbda 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -296,6 +296,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @ops: set ops * @pnet: network namespace * @flags: set flags + * @genmask: generation mask * @klen: key length * @dlen: data length * @data: private set data @@ -317,7 +318,8 @@ struct nft_set { /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; possible_net_t pnet; - u16 flags; + u16 flags:14, + genmask:2; u8 klen; u8 dlen; unsigned char data[] @@ -335,9 +337,9 @@ static inline struct nft_set *nft_set_container_of(const void *priv) } struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { -- cgit From 3183ab8997a477c8d9ad175a1cef70dff77c6dbc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2016 13:26:10 +0200 Subject: netfilter: conntrack: allow increasing bucket size via sysctl too No need to restrict this to module parameter. We export a copy of the real hash size -- when user alters the value we allocate the new table, copy entries etc before we update the real size to the requested one. This is also needed because the real size is used by concurrent readers and cannot be changed without synchronizing the conntrack generation seqcnt. We only allow changing this value from the initial net namespace. Tested using http-client-benchmark vs. httpterm with concurrent while true;do echo $RANDOM > /proc/sys/net/netfilter/nf_conntrack_buckets done Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 9c0ed3d7af89..5d3397f34583 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -290,6 +290,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_hash_resize(unsigned int hashsize); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; -- cgit From 82bec71d46b83f39860e2838ff8394e4fcd6efab Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 Jun 2016 14:26:33 +0200 Subject: netfilter: nf_tables: get rid of NFT_BASECHAIN_DISABLED This flag was introduced to restore rulesets from the new netdev family, but since 5ebe0b0eec9d6f7 ("netfilter: nf_tables: destroy basechain and rules on netdevice removal") the ruleset is released once the netdev is gone. This also removes nft_register_basechain() and nft_unregister_basechain() since they have no clients anymore after this rework. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 07a5ba47cbda..1ea19a6e72e6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -798,7 +798,6 @@ struct nft_stats { }; #define NFT_HOOK_OPS_MAX 2 -#define NFT_BASECHAIN_DISABLED (1 << 0) /** * struct nft_base_chain - nf_tables base chain -- cgit From 0071e184a535e40ce487528cb04f4690cb0da881 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Thu, 23 Jun 2016 12:24:08 +0200 Subject: netfilter: nf_tables: add support for inverted logic in nft_lookup Introduce a new configuration option for this expression, which allows users to invert the logic of set lookups. In _init() we will now return EINVAL if NFT_LOOKUP_F_INV is in anyway related to a map lookup. The code in the _eval() function has been untangled and updated to sopport the XOR of options, as we should consider 4 cases: * lookup false, invert false -> NFT_BREAK * lookup false, invert true -> return w/o NFT_BREAK * lookup true, invert false -> return w/o NFT_BREAK * lookup true, invert true -> NFT_BREAK Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6a4dbe04f09e..01751faccaf8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,10 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +enum nft_lookup_flags { + NFT_LOOKUP_F_INV = (1 << 0), +}; + /** * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes * @@ -553,6 +557,7 @@ enum nft_cmp_attributes { * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags) */ enum nft_lookup_attributes { NFTA_LOOKUP_UNSPEC, @@ -560,6 +565,7 @@ enum nft_lookup_attributes { NFTA_LOOKUP_SREG, NFTA_LOOKUP_DREG, NFTA_LOOKUP_SET_ID, + NFTA_LOOKUP_FLAGS, __NFTA_LOOKUP_MAX }; #define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) -- cgit From 520ac30f45519b0a82dd92117c181d1d6144677b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jun 2016 23:16:49 -0700 Subject: net_sched: drop packets after root qdisc lock is released Qdisc performance suffers when packets are dropped at enqueue() time because drops (kfree_skb()) are done while qdisc lock is held, delaying a dequeue() draining the queue. Nominal throughput can be reduced by 50 % when this happens, at a time we would like the dequeue() to proceed as fast as possible. Even FQ is vulnerable to this problem, while one of FQ goals was to provide some flow isolation. This patch adds a 'struct sk_buff **to_free' parameter to all qdisc->enqueue(), and in qdisc_drop() helper. I measured a performance increase of up to 12 %, but this patch is a prereq so that future batches in enqueue() can fly. Signed-off-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/sch_generic.h | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4f7cee8344c4..04e84c07c94f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -37,8 +37,10 @@ struct qdisc_size_table { }; struct Qdisc { - int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); - struct sk_buff * (*dequeue)(struct Qdisc *dev); + int (*enqueue)(struct sk_buff *skb, + struct Qdisc *sch, + struct sk_buff **to_free); + struct sk_buff * (*dequeue)(struct Qdisc *sch); unsigned int flags; #define TCQ_F_BUILTIN 1 #define TCQ_F_INGRESS 2 @@ -160,7 +162,9 @@ struct Qdisc_ops { char id[IFNAMSIZ]; int priv_size; - int (*enqueue)(struct sk_buff *, struct Qdisc *); + int (*enqueue)(struct sk_buff *skb, + struct Qdisc *sch, + struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); @@ -498,10 +502,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, #endif } -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { qdisc_calculate_pkt_len(skb, sch); - return sch->enqueue(skb, sch); + return sch->enqueue(skb, sch, to_free); } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) @@ -626,24 +631,36 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) return __qdisc_dequeue_head(sch, &sch->q); } +/* Instead of calling kfree_skb() while root qdisc lock is held, + * queue the skb for future freeing at end of __dev_xmit_skb() + */ +static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) +{ + skb->next = *to_free; + *to_free = skb; +} + static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff_head *list) + struct sk_buff_head *list, + struct sk_buff **to_free) { struct sk_buff *skb = __skb_dequeue(list); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); + qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return len; } return 0; } -static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch) +static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, + struct sk_buff **to_free) { - return __qdisc_queue_drop_head(sch, &sch->q); + return __qdisc_queue_drop_head(sch, &sch->q, to_free); } static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) @@ -724,9 +741,11 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) qdisc_qstats_drop(sch); } -static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) + +static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { - kfree_skb(skb); + __qdisc_drop(skb, to_free); qdisc_qstats_drop(sch); return NET_XMIT_DROP; -- cgit From 008830bc321c0fc22c0db8d5b0b56f854ed90a5c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jun 2016 23:16:50 -0700 Subject: net_sched: fq_codel: cache skb->truesize into skb->cb Now we defer skb drops, it makes sense to keep a copy of skb->truesize in struct codel_skb_cb to avoid one cache line miss per dropped skb in fq_codel_drop(), to reduce latencies a bit further. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/codel_qdisc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 8144d9cd2908..098630f83a55 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -52,6 +52,7 @@ /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ struct codel_skb_cb { codel_time_t enqueue_time; + unsigned int mem_usage; }; static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb) -- cgit From 4d202a0d31b96ab3324b21e7500d9a2da9ef57dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jun 2016 23:16:52 -0700 Subject: net_sched: generalize bulk dequeue When qdisc bulk dequeue was added in linux-3.18 (commit 5772e9a3463b "qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE"), it was constrained to some specific qdiscs. With some extra care, we can extend this to all qdiscs, so that typical traffic shaping solutions can benefit from small batches (8 packets in this patch). For example, HTB is often used on some multi queue device. And bonding/team are multi queue devices... Idea is to bulk-dequeue packets mapping to the same transmit queue. This brings between 35 and 80 % performance increase in HTB setup under pressure on a bonding setup : 1) NUMA node contention : 610,000 pps -> 1,110,000 pps 2) No node contention : 1,380,000 pps -> 1,930,000 pps Now we should work to add batches on the enqueue() side ;) Signed-off-by: Eric Dumazet Cc: John Fastabend Cc: Jesper Dangaard Brouer Cc: Hannes Frederic Sowa Cc: Florian Westphal Cc: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/sch_generic.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 04e84c07c94f..909aff2db2b3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -75,13 +75,14 @@ struct Qdisc { /* * For performance sake on SMP, we put highly modified fields at the end */ - struct Qdisc *next_sched ____cacheline_aligned_in_smp; - struct sk_buff *gso_skb; - unsigned long state; + struct sk_buff *gso_skb ____cacheline_aligned_in_smp; struct sk_buff_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; + unsigned long state; + struct Qdisc *next_sched; + struct sk_buff *skb_bad_txq; struct rcu_head rcu_head; int padded; atomic_t refcnt; -- cgit From 1466cc5b23d18e7b6b8f1a45443d595393dbcae7 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Thu, 23 Jun 2016 17:02:37 +0300 Subject: net/mlx5: Rate limit tables support Configuring and managing HW rate limit tables. The HW holds a table of rate limits, each rate is associated with an index in that table. Later a Send Queue uses this index to set the rate limit. Multiple Send Queues can have the same rate limit, which is represented by a single entry in this table. Even though a rate can be shared, each queue is being rate limited independently of others. The SW shadow of this table holds the rate itself, the index in the HW table and the refcount (number of queues) working with this rate. The exported functions are mlx5_rl_add_rate and mlx5_rl_remove_rate. Number of different rates and their values are derived from HW capabilities. Signed-off-by: Yevgeny Petrilin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 4 ++++ include/linux/mlx5/driver.h | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 73a48479892d..e0a3ed758287 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1330,6 +1330,7 @@ enum mlx5_cap_type { MLX5_CAP_ESWITCH, MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, + MLX5_CAP_QOS, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1414,6 +1415,9 @@ enum mlx5_cap_type { MLX5_GET(vector_calc_cap, \ mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap) +#define MLX5_CAP_QOS(mdev, cap)\ + MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 80776d0c52dc..46260fdc5305 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -481,6 +481,21 @@ struct mlx5_fc_stats { struct mlx5_eswitch; +struct mlx5_rl_entry { + u32 rate; + u16 index; + u16 refcount; +}; + +struct mlx5_rl_table { + /* protect rate limit table */ + struct mutex rl_lock; + u16 max_size; + u32 max_rate; + u32 min_rate; + struct mlx5_rl_entry *rl_entry; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -544,6 +559,7 @@ struct mlx5_priv { struct mlx5_flow_root_namespace *esw_ingress_root_ns; struct mlx5_fc_stats fc_stats; + struct mlx5_rl_table rl_table; }; enum mlx5_device_state { @@ -861,6 +877,12 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, size_t sz); +int mlx5_init_rl_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index); +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate); +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); + static inline int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; @@ -938,6 +960,11 @@ static inline int mlx5_get_gid_table_len(u16 param) return 8 * (1 << param); } +static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev) +{ + return !!(dev->priv.rl_table.max_size); +} + enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -- cgit From 667daedaecd15b89d0ded7af49519f28d6ea2cf4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:42 +0300 Subject: net/mlx5e: Toggle link only after modifying port parameters Add a dedicated function to toggle port link. It should be called only after setting a port register. Toggle will set port link to down and bring it back up in case that it's admin status was up. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 9851862c0ec5..4adfac15f0e9 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -67,6 +67,7 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, u8 local_port); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); +void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, -- cgit From 89da45b8b5b2187734a11038b8593714f964ffd1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:43 +0300 Subject: ethtool: Add 50G baseSR2 link mode Add ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT bit. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Cc: Ben Hutchings Cc: David Decotigny Acked-By: David Decotigny Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 5f030b46cff4..b8f38e84d93a 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1362,6 +1362,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1370,7 +1371,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ -- cgit From 52244d960755936fa9c8ce54d583d0ed46f24fb6 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:46 +0300 Subject: net/mlx5e: Report correct auto negotiation and allow toggling Previous to this patch auto negotiation was reported off although it was on by default in hardware. This patch reports the correct information to ethtool and allows the user to toggle it on/off. Added another parameter to set port proto function in order to pass the auto negotiation field to the hardware. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 4adfac15f0e9..e3012cc64b8a 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -47,6 +47,14 @@ enum mlx5_module_id { MLX5_MODULE_ID_QSFP28 = 0x11, }; +enum mlx5_an_status { + MLX5_AN_UNAVAILABLE = 0, + MLX5_AN_COMPLETE = 1, + MLX5_AN_FAILED = 2, + MLX5_AN_LINK_UP = 3, + MLX5_AN_LINK_DOWN = 4, +}; + #define MLX5_EEPROM_MAX_BYTES 32 #define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff #define MLX5_I2C_ADDR_LOW 0x50 @@ -65,14 +73,17 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, int proto_mask, u8 local_port); -int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, - int proto_mask); +int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); +void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, + u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); -- cgit From 637c841dd7a5f9bd97b75cbe90b526fa1a52e530 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 23 Jun 2016 18:42:51 -0700 Subject: net: diag: Add support to filter on device index Add support to inet_diag facility to filter sockets based on device index. If an interface index is in the filter only sockets bound to that index (sk_bound_dev_if) are returned. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index a16643705669..abbd1dc5d683 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -72,6 +72,7 @@ enum { INET_DIAG_BC_AUTO, INET_DIAG_BC_S_COND, INET_DIAG_BC_D_COND, + INET_DIAG_BC_DEV_COND, /* u32 ifindex */ }; struct inet_diag_hostcond { -- cgit From a5e4bd991362223346e1d3561e61d7a25797fe25 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 24 Jun 2016 11:24:08 +0200 Subject: of_mdio: select fixed phy support unconditionally Calling the fixed-phy functions when CONFIG_FIXED_PHY=m as a previous change tried cannot work if the caller is in built-in code: drivers/of/built-in.o: In function `of_phy_register_fixed_link': of_reserved_mem.c:(.text+0x85e0): undefined reference to `fixed_phy_register' Making of_mdio depend on 'FIXED_PHY || !FIXED_PHY' would solve this dependency by enforcing that OF_MDIO itself becomes a loadable module when FIXED_PHY=y, but that creates a different dependency as it breaks any built-in ethernet driver that uses of_mdio. Making FIXED_PHY a bool option also cannot work, since it depends on PHYLIB, which again is tristate. This version now uses 'select FIXED_PHY' to ensure that the fixed-phy portion of of_mdio is not optional. The main downside of this is a small increase in code size for cases that do not need fixed phy support, but it should avoid all of the link-time problems. Signed-off-by: Arnd Bergmann Fixes: d1bd330a229f ("of_mdio: Enable fixed PHY support if driver is a module") Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 6c8cb9aa4c00..4b04587d0441 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -25,6 +25,8 @@ struct phy_device *of_phy_attach(struct net_device *dev, extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np); +extern int of_phy_register_fixed_link(struct device_node *np); +extern bool of_phy_is_fixed_link(struct device_node *np); #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) @@ -67,12 +69,6 @@ static inline int of_mdio_parse_addr(struct device *dev, { return -ENOSYS; } -#endif /* CONFIG_OF */ - -#if defined(CONFIG_OF) && IS_ENABLED(CONFIG_FIXED_PHY) -extern int of_phy_register_fixed_link(struct device_node *np); -extern bool of_phy_is_fixed_link(struct device_node *np); -#else static inline int of_phy_register_fixed_link(struct device_node *np) { return -ENOSYS; -- cgit From 02e57b9d7c8ce9e403f15f48fb91dd6549aaf465 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 24 Jun 2016 15:16:26 +0200 Subject: drivers: net: stmmac: add port selection programming In case of SGMII more, for example when a MAC2MAC connection is needed, the port selection bits (inside the MAC configuration registers) have to be programmed according to the link selected. So the patch adds a new DT parameter to pass the port selection and to programmed related PCS and CORE to use it. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 0507dbfbf63c..705840e0438f 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -141,5 +141,6 @@ struct plat_stmmacenet_data { struct stmmac_axi *axi; int has_gmac4; bool tso_en; + int mac_port_sel_speed; }; #endif -- cgit From 6816a7ffce32e999601825ddfd887f36d3052932 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 28 Jun 2016 12:18:25 +0200 Subject: bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_read Follow-up commit to 1e33759c788c ("bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_output") to add the same functionality into bpf_perf_event_read() helper. The split of index into flags and index component is also safe here, since such large maps are rejected during map allocation time. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 406459b935a2..58df2da3e9bf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -347,7 +347,7 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output flags. */ +/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK -- cgit From 6578171a7ff0c31dc73258f93da7407510abf085 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 28 Jun 2016 12:18:27 +0200 Subject: bpf: add bpf_skb_change_proto helper This patch adds a minimal helper for doing the groundwork of changing the skb->protocol in a controlled way. Currently supported is v4 to v6 and vice versa transitions, which allows f.e. for a minimal, static nat64 implementation where applications in containers that still require IPv4 can be transparently operated in an IPv6-only environment. For example, host facing veth of the container can transparently do the transitions in a programmatic way with the help of clsact qdisc and cls_bpf. Idea is to separate concerns for keeping complexity of the helper lower, which means that the programs utilize bpf_skb_change_proto(), bpf_skb_store_bytes() and bpf_lX_csum_replace() to get the job done, instead of doing everything in a single helper (and thus partially duplicating helper functionality). Also, bpf_skb_change_proto() shouldn't need to deal with raw packet data as this is done by other helpers. bpf_skb_proto_6_to_4() and bpf_skb_proto_4_to_6() unclone the skb to operate on a private one, push or pop additionally required header space and migrate the gso/gro meta data from the shared info. We do mark the gso type as dodgy so that headers are checked and segs recalculated by the gso/gro engine. The gso_size target is adapted as well. The flags argument added is currently reserved and can be used for future extensions. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 58df2da3e9bf..66cd738a937a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -313,6 +313,20 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt, + + /** + * bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is + * v4 -> v6, v6 -> v4 transitions. The helper will also + * resize the skb. eBPF program is expected to fill the + * new headers via skb_store_bytes and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_proto, + __BPF_FUNC_MAX_ID, }; -- cgit From d2485c4242a826fdf493fd3a27b8b792965b9b9e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 28 Jun 2016 12:18:28 +0200 Subject: bpf: add bpf_skb_change_type helper This work adds a helper for changing skb->pkt_type in a controlled way. We only allow a subset of possible values and can extend that in future should other use cases come up. Doing this as a helper has the advantage that errors can be handeled gracefully and thus helper kept extensible. It's a write counterpart to pkt_type member we can already read from struct __sk_buff context. Major use case is to change incoming skbs to PACKET_HOST in a programmatic way instead of having to recirculate via redirect(..., BPF_F_INGRESS), for example. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 66cd738a937a..be6ac1291680 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -327,6 +327,15 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_proto, + /** + * bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_type, + __BPF_FUNC_MAX_ID, }; -- cgit From e98e915e11ad1efb11147122bd4932ec6b3425da Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Wed, 22 Jun 2016 20:23:03 +0900 Subject: wireless: Use macro instead of number Use IEEE80211_MIN_ACTION_SIZE macro for robust management frame check. Signed-off-by: Masashi Honma Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b118744d3382..1daebb307e6e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2464,7 +2464,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { - if (skb->len < 25) + if (skb->len < IEEE80211_MIN_ACTION_SIZE) return false; return _ieee80211_is_robust_mgmt_frame((void *)skb->data); } -- cgit From 46f6b06050b736dab4d41494dae27b883cddc365 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Wed, 22 Jun 2016 19:55:20 +0900 Subject: mac80211: Encrypt "Group addressed privacy" action frames Previously, the action frames to group address was not encrypted. But [1] "Table 8-38 Category values" indicates "Mesh" and "Multihop" category action frames should be encrypted (Group addressed privacy == yes). And the encyption key should be MGTK ([1] 10.13 Group addressed robust management frame procedures). So this patch modifies the code to make it suitable for spec. [1] IEEE Std 802.11-2012 Signed-off-by: Masashi Honma Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1daebb307e6e..a80516fd65c8 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -2486,6 +2487,35 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC; } +/** + * _ieee80211_is_group_privacy_action - check if frame is a group addressed + * privacy action frame + * @hdr: the frame + */ +static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) +{ + struct ieee80211_mgmt *mgmt = (void *)hdr; + + if (!ieee80211_is_action(hdr->frame_control) || + !is_multicast_ether_addr(hdr->addr1)) + return false; + + return mgmt->u.action.category == WLAN_CATEGORY_MESH_ACTION || + mgmt->u.action.category == WLAN_CATEGORY_MULTIHOP_ACTION; +} + +/** + * ieee80211_is_group_privacy_action - check if frame is a group addressed + * privacy action frame + * @skb: the skb containing the frame, length will be checked + */ +static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) +{ + if (skb->len < IEEE80211_MIN_ACTION_SIZE) + return false; + return _ieee80211_is_group_privacy_action((void *)skb->data); +} + /** * ieee80211_tu_to_usec - convert time units (TU) to microseconds * @tu: the TUs -- cgit From 80e73cc563c4359be809a03bcb8e7e28141a813a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Jun 2016 16:57:05 +0200 Subject: net: rtnetlink: add support for the IFLA_STATS_LINK_XSTATS_SLAVE attribute This patch adds support for the IFLA_STATS_LINK_XSTATS_SLAVE attribute which allows to export per-slave statistics if the master device supports the linkxstats callback. The attribute is passed down to the linkxstats callback and it is up to the callback user to use it (an example has been added to the only current user - the bridge). This allows us to query only specific slaves of master devices like bridge ports and export only what we're interested in instead of having to dump all ports and searching only for a single one. This will be used to export per-port IGMP/MLD stats and also per-port vlan stats in the future, possibly other statistics as well. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 5 +++-- include/uapi/linux/if_link.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 006a7b81d758..4113916cc1bb 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -98,10 +98,11 @@ struct rtnl_link_ops { const struct net_device *dev, const struct net_device *slave_dev); struct net *(*get_link_net)(const struct net_device *dev); - size_t (*get_linkxstats_size)(const struct net_device *dev); + size_t (*get_linkxstats_size)(const struct net_device *dev, + int attr); int (*fill_linkxstats)(struct sk_buff *skb, const struct net_device *dev, - int *prividx); + int *prividx, int attr); }; int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bb36bd5675a7..db2458ade81c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -822,6 +822,7 @@ enum { IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ IFLA_STATS_LINK_64, IFLA_STATS_LINK_XSTATS, + IFLA_STATS_LINK_XSTATS_SLAVE, __IFLA_STATS_MAX, }; -- cgit From 1080ab95e3c7bdd77870e209aff83c763fdcf439 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Jun 2016 16:57:06 +0200 Subject: net: bridge: add support for IGMP/MLD stats and export them via netlink This patch adds stats support for the currently used IGMP/MLD types by the bridge. The stats are per-port (plus one stat per-bridge) and per-direction (RX/TX). The stats are exported via netlink via the new linkxstats API (RTM_GETSTATS). In order to minimize the performance impact, a new option is used to enable/disable the stats - multicast_stats_enabled, similar to the recent vlan stats. Also in order to avoid multiple IGMP/MLD type lookups and checks, we make use of the current "igmp" member of the bridge private skb->cb region to record the type on Rx (both host-generated and external packets pass by multicast_rcv()). We can do that since the igmp member was used as a boolean and all the valid IGMP/MLD types are positive values. The normal bridge fast-path is not affected at all, the only affected paths are the flooding ones and since we make use of the IGMP/MLD type, we can quickly determine if the packet should be counted using cache-hot data (cb's igmp member). We add counters for: * IGMP Queries * IGMP Leaves * IGMP v1/v2/v3 reports * MLD Queries * MLD Leaves * MLD v1/v2 reports These are invaluable when monitoring or debugging complex multicast setups with bridges. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 26 ++++++++++++++++++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 397d503fdedb..8304fe6f0561 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -247,8 +247,34 @@ enum { enum { BRIDGE_XSTATS_UNSPEC, BRIDGE_XSTATS_VLAN, + BRIDGE_XSTATS_MCAST, + BRIDGE_XSTATS_PAD, __BRIDGE_XSTATS_MAX }; #define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1) +enum { + BR_MCAST_DIR_RX, + BR_MCAST_DIR_TX, + BR_MCAST_DIR_SIZE +}; + +/* IGMP/MLD statistics */ +struct br_mcast_stats { + __u64 igmp_queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_leaves[BR_MCAST_DIR_SIZE]; + __u64 igmp_v1reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_v2reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_v3reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_parse_errors; + + __u64 mld_queries[BR_MCAST_DIR_SIZE]; + __u64 mld_leaves[BR_MCAST_DIR_SIZE]; + __u64 mld_v1reports[BR_MCAST_DIR_SIZE]; + __u64 mld_v2reports[BR_MCAST_DIR_SIZE]; + __u64 mld_parse_errors; + + __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; + __u64 mcast_packets[BR_MCAST_DIR_SIZE]; +}; #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index db2458ade81c..4285ac31e865 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -273,6 +273,7 @@ enum { IFLA_BR_VLAN_DEFAULT_PVID, IFLA_BR_PAD, IFLA_BR_VLAN_STATS_ENABLED, + IFLA_BR_MCAST_STATS_ENABLED, __IFLA_BR_MAX, }; -- cgit From b1ed4c4fa9a5ccf325184fd90edc50978ef6e33a Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 27 Jun 2016 15:33:56 -0700 Subject: tcp: add an ability to dump and restore window parameters We found that sometimes a restored tcp socket doesn't work. A reason of this bug is incorrect window parameters and in this case tcp_acceptable_seq() returns tcp_wnd_end(tp) instead of tp->snd_nxt. The other side drops packets with this seq, because seq is less than tp->rcv_nxt ( tcp_sequence() ). Data from a send queue is sent only if there is enough space in a window, so when we restore unacked data, we need to expand a window to fit this data. This was in a first version of this patch: "tcp: extend window to fit all restored unacked data in a send queue" Then Alexey recommended me to restore window parameters instead of adjusted them according with data in a sent queue. This sounds resonable. rcv_wnd has to be restored, because it was reported to another side and the offered window is never shrunk. One of reasons why we need to restore snd_wnd was described above. Cc: Pavel Emelyanov Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 53e8e3fe6b1b..482898fc433a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -115,12 +115,22 @@ enum { #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ +#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ struct tcp_repair_opt { __u32 opt_code; __u32 opt_val; }; +struct tcp_repair_window { + __u32 snd_wl1; + __u32 snd_wnd; + __u32 max_window; + + __u32 rcv_wnd; + __u32 rcv_wup; +}; + enum { TCP_NO_QUEUE, TCP_RECV_QUEUE, -- cgit From 2631b79f6cb8b634fe41b77de9c4add0ec6b3cae Mon Sep 17 00:00:00 2001 From: "Seymour, Shane M" Date: Tue, 28 Jun 2016 23:06:48 +0000 Subject: tcp: increase size at which tcp_bound_to_half_wnd bounds to > TCP_MSS_DEFAULT In previous commit 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 the following comments were added: "When peer uses tiny windows, there is no use in packetizing to sub-MSS pieces for the sake of SWS or making sure there are enough packets in the pipe for fast recovery." The test should be > TCP_MSS_DEFAULT not >= 512. This allows low end devices that send an MSS of 536 (TCP_MSS_DEFAULT) to see better network performance by sending it 536 bytes of data at a time instead of bounding to half window size (268). Other network stacks work this way, e.g. HP-UX. Signed-off-by: Shane Seymour Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a79894b66726..d825858fe4f1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -589,7 +589,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) * On the other hand, for extremely large MSS devices, handling * smaller than MSS windows in this way does make sense. */ - if (tp->max_window >= 512) + if (tp->max_window > TCP_MSS_DEFAULT) cutoff = (tp->max_window >> 1); else cutoff = tp->max_window; -- cgit From 19689e38eca5d7b32755182d4e62efd7a5376c45 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2016 18:51:53 +0200 Subject: tcp: md5: use kmalloc() backed scratch areas Some arches have virtually mapped kernel stacks, or will soon have. tcp_md5_hash_header() uses an automatic variable to copy tcp header before mangling th->check and calling crypto function, which might be problematic on such arches. David says that using percpu storage is also problematic on non SMP builds. Just use kmalloc() to allocate scratch areas. Signed-off-by: Eric Dumazet Reported-by: Andy Lutomirski Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d825858fe4f1..c00e7d51bb18 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1384,7 +1384,7 @@ union tcp_md5sum_block { /* - pool: digest algorithm, hash description and scratch buffer */ struct tcp_md5sig_pool { struct ahash_request *md5_req; - union tcp_md5sum_block md5_blk; + void *scratch; }; /* - functions */ @@ -1420,7 +1420,6 @@ static inline void tcp_put_md5sig_pool(void) local_bh_enable(); } -int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, unsigned int header_len); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, -- cgit From 982fb490c298896d15e9323a882f34a57c11ff56 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 30 Jun 2016 14:45:31 +0800 Subject: ptr_ring: support zero length ring Sometimes, we need zero length ring. But current code will crash since we don't do any check before accessing the ring. This patch fixes this. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 562a65e8bcc0..d78b8b89c707 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -102,7 +102,7 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { - if (r->queue[r->producer]) + if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; r->queue[r->producer++] = ptr; @@ -164,7 +164,9 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) */ static inline void *__ptr_ring_peek(struct ptr_ring *r) { - return r->queue[r->consumer]; + if (likely(r->size)) + return r->queue[r->consumer]; + return NULL; } /* Note: callers invoking this in a loop must use a compiler barrier, -- cgit From fd68adec9de3104c236ffbcb3bd829d3e635a444 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 30 Jun 2016 14:45:32 +0800 Subject: skb_array: minor tweak Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/skb_array.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 678bfbf78ac4..2dd0d1e4ee7e 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -151,12 +151,12 @@ static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp) return ptr_ring_init(&a->ring, size, gfp); } -void __skb_array_destroy_skb(void *ptr) +static void __skb_array_destroy_skb(void *ptr) { kfree_skb(ptr); } -int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) +static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } -- cgit From 59e6ae53248a72d83cec77dd704b6990b2394479 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 30 Jun 2016 14:45:33 +0800 Subject: ptr_ring: support resizing multiple queues Sometimes, we need support resizing multiple queues at once. This is because it was not easy to recover to recover from a partial failure of multiple queues resizing. Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 71 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index d78b8b89c707..2052011bf9fb 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -349,20 +349,14 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } -static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, - void (*destroy)(void *)) +static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, + int size, gfp_t gfp, + void (*destroy)(void *)) { - unsigned long flags; int producer = 0; - void **queue = __ptr_ring_init_queue_alloc(size, gfp); void **old; void *ptr; - if (!queue) - return -ENOMEM; - - spin_lock_irqsave(&(r)->producer_lock, flags); - while ((ptr = ptr_ring_consume(r))) if (producer < size) queue[producer++] = ptr; @@ -375,6 +369,23 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, old = r->queue; r->queue = queue; + return old; +} + +static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, + void (*destroy)(void *)) +{ + unsigned long flags; + void **queue = __ptr_ring_init_queue_alloc(size, gfp); + void **old; + + if (!queue) + return -ENOMEM; + + spin_lock_irqsave(&(r)->producer_lock, flags); + + old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); + spin_unlock_irqrestore(&(r)->producer_lock, flags); kfree(old); @@ -382,6 +393,48 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, return 0; } +static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, + int size, + gfp_t gfp, void (*destroy)(void *)) +{ + unsigned long flags; + void ***queues; + int i; + + queues = kmalloc(nrings * sizeof *queues, gfp); + if (!queues) + goto noqueues; + + for (i = 0; i < nrings; ++i) { + queues[i] = __ptr_ring_init_queue_alloc(size, gfp); + if (!queues[i]) + goto nomem; + } + + for (i = 0; i < nrings; ++i) { + spin_lock_irqsave(&(rings[i])->producer_lock, flags); + queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], + size, gfp, destroy); + spin_unlock_irqrestore(&(rings[i])->producer_lock, flags); + } + + for (i = 0; i < nrings; ++i) + kfree(queues[i]); + + kfree(queues); + + return 0; + +nomem: + while (--i >= 0) + kfree(queues[i]); + + kfree(queues); + +noqueues: + return -ENOMEM; +} + static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { void *ptr; -- cgit From bf900b3dbefee49855c17aa09fb4245346a78fb3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 30 Jun 2016 14:45:34 +0800 Subject: skb_array: add wrappers for resizing Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/skb_array.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 2dd0d1e4ee7e..f4dfade428f0 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -161,6 +161,15 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } +static inline int skb_array_resize_multiple(struct skb_array **rings, + int nrings, int size, gfp_t gfp) +{ + BUILD_BUG_ON(offsetof(struct skb_array, ring)); + return ptr_ring_resize_multiple((struct ptr_ring **)rings, + nrings, size, gfp, + __skb_array_destroy_skb); +} + static inline void skb_array_cleanup(struct skb_array *a) { ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb); -- cgit From 08294a26e15d7baf1e14ee569e9f2bc82a7ae768 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 30 Jun 2016 14:45:35 +0800 Subject: net: introduce NETDEV_CHANGE_TX_QUEUE_LEN This patch introduces a new event - NETDEV_CHANGE_TX_QUEUE_LEN, this will be triggered when tx_queue_len. It could be used by net device who want to do some processing at that time. An example is tun who may want to resize tx array when tx_queue_len is changed. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e84d9d23c2d5..7dc2ec74122a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2237,6 +2237,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C +#define NETDEV_CHANGE_TX_QUEUE_LEN 0x001E int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit From 1576d98605998fb59d121a39581129e134217182 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 30 Jun 2016 14:45:36 +0800 Subject: tun: switch to use skb array for tx We used to queue tx packets in sk_receive_queue, this is less efficient since it requires spinlocks to synchronize between producer and consumer. This patch tries to address this by: - switch from sk_receive_queue to a skb_array, and resize it when tx_queue_len was changed. - introduce a new proto_ops peek_len which was used for peeking the skb length. - implement a tun version of peek_len for vhost_net to use and convert vhost_net to use peek_len if possible. Pktgen test shows about 15.3% improvement on guest receiving pps for small buffers: Before: ~1300000pps After : ~1500000pps Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 25aa03b51c4e..b9f0ff4d489c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -185,6 +185,7 @@ struct proto_ops { ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); + int (*peek_len)(struct socket *sock); }; #define DECLARE_SOCKADDR(type, dst, src) \ -- cgit From 4ae89ad92477219b504a49966ee010fe8dcb85af Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 24 Jun 2016 11:32:26 -0700 Subject: etherdevice.h & bridge: netfilter: Add and use ether_addr_equal_masked There are code duplications of a masked ethernet address comparison here so make it a separate function instead. Miscellanea: o Neaten alignment of FWINV macro uses to make it clearer for the reader Signed-off-by: Joe Perches Acked-by: David S. Miller Signed-off-by: Pablo Neira Ayuso --- include/linux/etherdevice.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 37ff4a6faa9a..6fec9e81bd70 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -373,6 +373,29 @@ static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2) #endif } +/** + * ether_addr_equal_masked - Compare two Ethernet addresses with a mask + * @addr1: Pointer to a six-byte array containing the 1st Ethernet address + * @addr2: Pointer to a six-byte array containing the 2nd Ethernet address + * @mask: Pointer to a six-byte array containing the Ethernet address bitmask + * + * Compare two Ethernet addresses with a mask, returns true if for every bit + * set in the bitmask the equivalent bits in the ethernet addresses are equal. + * Using a mask with all bits set is a slower ether_addr_equal. + */ +static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2, + const u8 *mask) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++) { + if ((addr1[i] ^ addr2[i]) & mask[i]) + return false; + } + + return true; +} + /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure -- cgit From 1aacde3d22c42281236155c1ef6d7a5aa32a826b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Jun 2016 17:24:43 +0200 Subject: bpf: generally move prog destruction to RCU deferral Jann Horn reported following analysis that could potentially result in a very hard to trigger (if not impossible) UAF race, to quote his event timeline: - Set up a process with threads T1, T2 and T3 - Let T1 set up a socket filter F1 that invokes another filter F2 through a BPF map [tail call] - Let T1 trigger the socket filter via a unix domain socket write, don't wait for completion - Let T2 call PERF_EVENT_IOC_SET_BPF with F2, don't wait for completion - Now T2 should be behind bpf_prog_get(), but before bpf_prog_put() - Let T3 close the file descriptor for F2, dropping the reference count of F2 to 2 - At this point, T1 should have looked up F2 from the map, but not finished executing it - Let T3 remove F2 from the BPF map, dropping the reference count of F2 to 1 - Now T2 should call bpf_prog_put() (wrong BPF program type), dropping the reference count of F2 to 0 and scheduling bpf_prog_free_deferred() via schedule_work() - At this point, the BPF program could be freed - BPF execution is still running in a freed BPF program While at PERF_EVENT_IOC_SET_BPF time it's only guaranteed that the perf event fd we're doing the syscall on doesn't disappear from underneath us for whole syscall time, it may not be the case for the bpf fd used as an argument only after we did the put. It needs to be a valid fd pointing to a BPF program at the time of the call to make the bpf_prog_get() and while T2 gets preempted, F2 must have dropped reference to 1 on the other CPU. The fput() from the close() in T3 should also add additionally delay to the reference drop via exit_task_work() when bpf_prog_release() gets called as well as scheduling bpf_prog_free_deferred(). That said, it makes nevertheless sense to move the BPF prog destruction generally after RCU grace period to guarantee that such scenario above, but also others as recently fixed in ceb56070359b ("bpf, perf: delay release of BPF prog after grace period") with regards to tail calls won't happen. Integrating bpf_prog_free_deferred() directly into the RCU callback is not allowed since the invocation might happen from either softirq or process context, so we're not permitted to block. Reviewing all bpf_prog_put() invocations from eBPF side (note, cBPF -> eBPF progs don't use this for their destruction) with call_rcu() look good to me. Since we don't know whether at the time of attaching the program, we're already part of a tail call map, we need to use RCU variant. However, due to this, there won't be severely more stress on the RCU callback queue: situations with above bpf_prog_get() and bpf_prog_put() combo in practice normally won't lead to releases, but even if they would, enough effort/ cycles have to be put into loading a BPF program into the kernel already. Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8411032ac90d..749549888b86 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -220,7 +220,6 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -281,10 +280,6 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } - -static inline void bpf_prog_put_rcu(struct bpf_prog *prog) -{ -} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ -- cgit From 113214be7f6c98dd6d0435e4765aea8dea91662c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Jun 2016 17:24:44 +0200 Subject: bpf: refactor bpf_prog_get and type check into helper Since bpf_prog_get() and program type check is used in a couple of places, refactor this into a small helper function that we can make use of. Since the non RO prog->aux part is not used in performance critical paths and a program destruction via RCU is rather very unlikley when doing the put, we shouldn't have an issue just doing the bpf_prog_get() + prog->type != type check, but actually not taking the ref at all (due to being in fdget() / fdput() section of the bpf fd) is even cleaner and makes the diff smaller as well, so just go for that. Callsites are changed to make use of the new helper where possible. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 749549888b86..b3336b4f5d04 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -218,6 +218,7 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); @@ -277,6 +278,12 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } +static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void bpf_prog_put(struct bpf_prog *prog) { } -- cgit From 1f3fe7ebf6136c341012db9f554d4caa566fcbaa Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:42 -0700 Subject: cgroup: Add cgroup_get_from_fd Add a helper function to get a cgroup2 from a fd. It will be stored in a bpf array (BPF_MAP_TYPE_CGROUP_ARRAY) which will be introduced in the later patch. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a20320c666fd..984f73b719a9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); struct cgroup *cgroup_get_from_path(const char *path); +struct cgroup *cgroup_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -- cgit From 4ed8ec521ed57c4e207ad464ca0388776de74d4b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:43 -0700 Subject: cgroup: bpf: Add BPF_MAP_TYPE_CGROUP_ARRAY Add a BPF_MAP_TYPE_CGROUP_ARRAY and its bpf_map_ops's implementations. To update an element, the caller is expected to obtain a cgroup2 backed fd by open(cgroup2_dir) and then update the array with that fd. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index be6ac1291680..26c04be32003 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,6 +84,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, }; enum bpf_prog_type { -- cgit From 4a482f34afcc162d8456f449b137ec2a95be60d8 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:44 -0700 Subject: cgroup: bpf: Add bpf_skb_in_cgroup_proto Adds a bpf helper, bpf_skb_in_cgroup, to decide if a skb->sk belongs to a descendant of a cgroup2. It is similar to the feature added in netfilter: commit c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") The user is expected to populate a BPF_MAP_TYPE_CGROUP_ARRAY which will be used by the bpf_skb_in_cgroup. Modifications to the bpf verifier is to ensure BPF_MAP_TYPE_CGROUP_ARRAY and bpf_skb_in_cgroup() are always used together. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 26c04be32003..f44504d875e2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -337,6 +337,17 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_type, + /** + * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_skb_in_cgroup, __BPF_FUNC_MAX_ID, }; -- cgit From acbc2004d7129a1ecf02414c1da8808bdc06d5a2 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:50:58 +0300 Subject: net/mlx5: Introduce offloads steering namespace Add a new namespace (MLX5_FLOW_NAMESPACE_OFFLOADS) to be populated with flow steering rules that deal with rules that have have to be executed before the EN NIC steering rules are matched. The namespace is located after the bypass name-space and before the kernel name-space. Therefore, it precedes the HW processing done for rules set for the kernel NIC name-space. Under SRIOV, it would allow us to match on e-switch missed packet and forward them to the relevant VF representor TIR. Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4b7a107d9c19..6ad111938709 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, -- cgit From 08f4b5918b2d6b491f0403cc1886f5cdccef89bb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:51:01 +0300 Subject: net/devlink: Add E-Switch mode control Add the commands to set and show the mode of SRIOV E-Switch, two modes are supported: * legacy: operating in the "old" L2 based mode (DMAC --> VF vport) * switchdev: the E-Switch is referred to as whitebox switch configured using standard tools such as tc, bridge, openvswitch etc. To allow working with the tools, for each VF, a VF representor netdevice is created by the E-Switch manager vendor device driver instance (e.g PF). Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/devlink.h | 3 +++ include/uapi/linux/devlink.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 1d45b61cb320..c99ffe8cef3c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -90,6 +90,9 @@ struct devlink_ops { u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); + + int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ba0073b26fa6..915bfa74458c 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -57,6 +57,8 @@ enum devlink_command { DEVLINK_CMD_SB_OCC_SNAPSHOT, DEVLINK_CMD_SB_OCC_MAX_CLEAR, + DEVLINK_CMD_ESWITCH_MODE_GET, + DEVLINK_CMD_ESWITCH_MODE_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -95,6 +97,11 @@ enum devlink_sb_threshold_type { #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20 +enum devlink_eswitch_mode { + DEVLINK_ESWITCH_MODE_LEGACY, + DEVLINK_ESWITCH_MODE_SWITCHDEV, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -125,6 +132,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_TC_INDEX, /* u16 */ DEVLINK_ATTR_SB_OCC_CUR, /* u32 */ DEVLINK_ATTR_SB_OCC_MAX, /* u32 */ + DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit From b50d292b4399f4eb11e82d0430aacf62dd5d5365 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 1 Jul 2016 14:51:04 +0300 Subject: net/mlx5e: Create NIC global resources only once To allow creating more than one netdev over the same PCI function, we change the driver such that global NIC resources are created once and later be shared amongst all the mlx5e netdevs running over that port. Move the CQ UAR, PD (pdn), Transport Domain (tdn), MKey resources from being kept in the mlx5e priv part to a new resources structure (mlx5e_resources) placed under the mlx5_core device. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 46260fdc5305..e22b3456b2ee 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -578,6 +578,18 @@ enum mlx5_pci_status { MLX5_PCI_STATUS_ENABLED, }; +struct mlx5_td { + struct list_head tirs_list; + u32 tdn; +}; + +struct mlx5e_resources { + struct mlx5_uar cq_uar; + u32 pdn; + struct mlx5_td td; + struct mlx5_core_mkey mkey; +}; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -602,6 +614,7 @@ struct mlx5_core_dev { struct mlx5_profile *profile; atomic_t num_qps; u32 issi; + struct mlx5e_resources mlx5e_res; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif -- cgit From c37a2dfa67f7920b14ea77dc9f9f9660f7a1f6dd Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 24 Jun 2016 13:25:22 -0700 Subject: netfilter: Convert FWINV<[foo]> macros and uses to NF_INVF netfilter uses multiple FWINV #defines with identical form that hide a specific structure variable and dereference it with a invflags member. $ git grep "#define FWINV" include/linux/netfilter_bridge/ebtables.h:#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) net/bridge/netfilter/ebtables.c:#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) net/ipv4/netfilter/arp_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) net/ipv4/netfilter/ip_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) net/ipv6/netfilter/ip6_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) net/netfilter/xt_tcpudp.c:#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) Consolidate these macros into a single NF_INVF macro. Miscellanea: o Neaten the alignment around these uses o A few lines are > 80 columns for intelligibility Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 ++++ include/linux/netfilter_bridge/ebtables.h | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index dc4f58a3cdcc..e94e81ab2b58 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -6,6 +6,10 @@ #include #include +/* Test a struct->invflags and a boolean for inequality */ +#define NF_INVF(ptr, flag, boolean) \ + ((boolean) ^ !!((ptr)->invflags & (flag))) + /** * struct xt_action_param - parameters for matches/targets * diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2ea517c7c6b9..984b2112c77b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -115,8 +115,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); -/* Used in the kernel match() functions */ -#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) -- cgit From f86dec94e3a86c992a637df1c301a4df25a85801 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 15 Apr 2016 18:14:25 +0200 Subject: NFC: hci: delete unused nfc_llc_get_rx_head_tail_room() It used to be EXPORTed, but then EXPORT usage was cleaned up (in 2012), without noticing that the function has no users at all (and curiously, never had any users). Delete it. While at it, remove non-static "inline" hints on nearby functions: these hints don't work across compilation units anyway, and these functions are not used in their .c file, thus they are never inlined. IOW: "inline" here does not help in any way. Signed-off-by: Denys Vlasenko CC: Samuel Ortiz CC: Christophe Ricard CC: linux-wireless@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Samuel Ortiz --- include/net/nfc/llc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h index c25fbdee0d61..7ecb45757897 100644 --- a/include/net/nfc/llc.h +++ b/include/net/nfc/llc.h @@ -37,10 +37,6 @@ struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, int tx_tailroom, llc_failure_t llc_failure); void nfc_llc_free(struct nfc_llc *llc); -void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom, - int *rx_tailroom); - - int nfc_llc_start(struct nfc_llc *llc); int nfc_llc_stop(struct nfc_llc *llc); void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb); -- cgit From 7854a44526de84142e367f08288c9f3a33c4c8ee Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 7 Jun 2016 16:21:52 +0200 Subject: NFC: digital: Add a delay between poll cycles This replaces the polling work struct with a delayed work struct and add a 10 ms delay between 2 poll cycles. This avoids to flood the device with 'switch off'/'switch on' commands. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- include/net/nfc/digital.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 0ae101eef0f4..506e3f6eabef 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -220,7 +220,7 @@ struct nfc_digital_dev { struct list_head cmd_queue; struct mutex cmd_lock; - struct work_struct poll_work; + struct delayed_work poll_work; u8 curr_protocol; u8 curr_rf_tech; -- cgit From 09748a22f4ab7b0ab5a83c432f6e18f65f18e09b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 9 May 2016 18:41:08 +0200 Subject: batman-adv: add generic netlink family for batman-adv debugfs is currently severely broken virtually everywhere in the kernel where files are dynamically added and removed (see http://lkml.iu.edu/hypermail/linux/kernel/1506.1/02196.html for some details). In addition to that, debugfs is not namespace-aware. Instead of adding new debugfs entries, the whole infrastructure should be moved to netlink. This will fix the long standing problem of large buffers for debug tables and hard to parse text files. Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: Strip down patch to only add genl family, add missing kerneldoc] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 53 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/uapi/linux/batman_adv.h (limited to 'include') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h new file mode 100644 index 000000000000..79f797281b87 --- /dev/null +++ b/include/uapi/linux/batman_adv.h @@ -0,0 +1,53 @@ +/* Copyright (C) 2016 B.A.T.M.A.N. contributors: + * + * Matthias Schiffer + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _UAPI_LINUX_BATMAN_ADV_H_ +#define _UAPI_LINUX_BATMAN_ADV_H_ + +#define BATADV_NL_NAME "batadv" + +/** + * enum batadv_nl_attrs - batman-adv netlink attributes + * + * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors + * @__BATADV_ATTR_AFTER_LAST: internal use + * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available + * @BATADV_ATTR_MAX: highest attribute number currently defined + */ +enum batadv_nl_attrs { + BATADV_ATTR_UNSPEC, + /* add attributes above here, update the policy in netlink.c */ + __BATADV_ATTR_AFTER_LAST, + NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, + BATADV_ATTR_MAX = __BATADV_ATTR_AFTER_LAST - 1 +}; + +/** + * enum batadv_nl_commands - supported batman-adv netlink commands + * + * @BATADV_CMD_UNSPEC: unspecified command to catch errors + * @__BATADV_CMD_AFTER_LAST: internal use + * @BATADV_CMD_MAX: highest used command number + */ +enum batadv_nl_commands { + BATADV_CMD_UNSPEC, + /* add new commands above here */ + __BATADV_CMD_AFTER_LAST, + BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 +}; + +#endif /* _UAPI_LINUX_BATMAN_ADV_H_ */ -- cgit From 5da0aef5e93591b373010c10f374c4161b37728c Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 9 May 2016 18:41:09 +0200 Subject: batman-adv: add netlink command to query generic mesh information files BATADV_CMD_GET_MESH_INFO is used to query basic information about a batman-adv softif (name, index and MAC address for both the softif and the primary hardif; routing algorithm; batman-adv version). Signed-off-by: Matthias Schiffer Signed-off-by: Andrew Lunn [sven.eckelmann@open-mesh.com: Reduce the number of changes to BATADV_CMD_GET_MESH_INFO, add missing kerneldoc, add policy for attributes] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 79f797281b87..c39623c7109e 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -24,12 +24,28 @@ * enum batadv_nl_attrs - batman-adv netlink attributes * * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors + * @BATADV_ATTR_VERSION: batman-adv version string + * @BATADV_ATTR_ALGO_NAME: name of routing algorithm + * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface + * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface + * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface + * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface + * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface + * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined */ enum batadv_nl_attrs { BATADV_ATTR_UNSPEC, + BATADV_ATTR_VERSION, + BATADV_ATTR_ALGO_NAME, + BATADV_ATTR_MESH_IFINDEX, + BATADV_ATTR_MESH_IFNAME, + BATADV_ATTR_MESH_ADDRESS, + BATADV_ATTR_HARD_IFINDEX, + BATADV_ATTR_HARD_IFNAME, + BATADV_ATTR_HARD_ADDRESS, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, @@ -40,11 +56,13 @@ enum batadv_nl_attrs { * enum batadv_nl_commands - supported batman-adv netlink commands * * @BATADV_CMD_UNSPEC: unspecified command to catch errors + * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ enum batadv_nl_commands { BATADV_CMD_UNSPEC, + BATADV_CMD_GET_MESH_INFO, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 -- cgit From 33a3bb4a3345bb511f9c69c913da95d4693e2a4e Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 5 May 2016 13:09:43 +0200 Subject: batman-adv: throughput meter implementation The throughput meter module is a simple, kernel-space replacement for throughtput measurements tool like iperf and netperf. It is intended to approximate TCP behaviour. It is invoked through batctl: the protocol is connection oriented, with cumulative acknowledgment and a dynamic-size sliding window. The test *can* be interrupted by batctl. A receiver side timeout avoids unlimited waitings for sender packets: after one second of inactivity, the receiver abort the ongoing test. Based on a prototype from Edo Monticelli Signed-off-by: Antonio Quartulli Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index c39623c7109e..0fbf6fd4711b 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -20,6 +20,8 @@ #define BATADV_NL_NAME "batadv" +#define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" + /** * enum batadv_nl_attrs - batman-adv netlink attributes * @@ -32,6 +34,12 @@ * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface + * @BATADV_ATTR_ORIG_ADDRESS: originator mac address + * @BATADV_ATTR_TPMETER_RESULT: result of run (see batadv_tp_meter_status) + * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took + * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run + * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session + * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -46,6 +54,12 @@ enum batadv_nl_attrs { BATADV_ATTR_HARD_IFINDEX, BATADV_ATTR_HARD_IFNAME, BATADV_ATTR_HARD_ADDRESS, + BATADV_ATTR_ORIG_ADDRESS, + BATADV_ATTR_TPMETER_RESULT, + BATADV_ATTR_TPMETER_TEST_TIME, + BATADV_ATTR_TPMETER_BYTES, + BATADV_ATTR_TPMETER_COOKIE, + BATADV_ATTR_PAD, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, @@ -57,15 +71,44 @@ enum batadv_nl_attrs { * * @BATADV_CMD_UNSPEC: unspecified command to catch errors * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device + * @BATADV_CMD_TP_METER: Start a tp meter session + * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ enum batadv_nl_commands { BATADV_CMD_UNSPEC, BATADV_CMD_GET_MESH_INFO, + BATADV_CMD_TP_METER, + BATADV_CMD_TP_METER_CANCEL, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 }; +/** + * enum batadv_tp_meter_reason - reason of a tp meter test run stop + * @BATADV_TP_REASON_COMPLETE: sender finished tp run + * @BATADV_TP_REASON_CANCEL: sender was stopped during run + * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or didn't + * answer + * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit + * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node already + * ongoing + * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory + * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface + * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions + */ +enum batadv_tp_meter_reason { + BATADV_TP_REASON_COMPLETE = 3, + BATADV_TP_REASON_CANCEL = 4, + /* error status >= 128 */ + BATADV_TP_REASON_DST_UNREACHABLE = 128, + BATADV_TP_REASON_RESEND_LIMIT = 129, + BATADV_TP_REASON_ALREADY_ONGOING = 130, + BATADV_TP_REASON_MEMORY_ERROR = 131, + BATADV_TP_REASON_CANT_SEND = 132, + BATADV_TP_REASON_TOO_MANY = 133, +}; + #endif /* _UAPI_LINUX_BATMAN_ADV_H_ */ -- cgit From 8b10cab64c134ffbffac96edd1899d303d3afcac Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 2 Jul 2016 06:43:14 -0400 Subject: net: simplify and make pkt_type_ok() available for other users Suggested-by: Daniel Borkmann Signed-off-by: Jamal Hadi Salim Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dc0fca747c5e..638b0e004310 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #include #include #include +#include #include /* The interface for checksum offload between the stack and networking drivers @@ -881,6 +882,15 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb) return (struct rtable *)skb_dst(skb); } +/* For mangling skb->pkt_type from user space side from applications + * such as nft, tc, etc, we only allow a conservative subset of + * possible pkt_types to be set. +*/ +static inline bool skb_pkt_type_ok(u32 ptype) +{ + return ptype <= PACKET_OTHERHOST; +} + void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); -- cgit From ff202ee1ed8f032f05b80b541664cf02e75d7080 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 2 Jul 2016 06:43:15 -0400 Subject: net sched actions: skbedit add support for mod-ing skb pkt_type Extremely useful for setting packet type to host so i dont have to modify the dst mac address using pedit (which requires that i know the mac address) Example usage: tc filter add dev eth0 parent ffff: protocol ip pref 9 u32 \ match ip src 5.5.5.5/32 \ flowid 1:5 action skbedit ptype host This will tag all packets incoming from 5.5.5.5 with type PACKET_HOST Signed-off-by: Jamal Hadi Salim Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/tc_act/tc_skbedit.h | 10 +++++----- include/uapi/linux/tc_act/tc_skbedit.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index b496d5ad7d42..d01a5d40cfb5 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -24,11 +24,11 @@ struct tcf_skbedit { struct tcf_common common; - u32 flags; - u32 priority; - u32 mark; - u16 queue_mapping; - /* XXX: 16-bit pad here? */ + u32 flags; + u32 priority; + u32 mark; + u16 queue_mapping; + u16 ptype; }; #define to_skbedit(a) \ container_of(a->priv, struct tcf_skbedit, common) diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index fecb5cc48c40..a4d00c608d8f 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -27,6 +27,7 @@ #define SKBEDIT_F_PRIORITY 0x1 #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 +#define SKBEDIT_F_PTYPE 0x8 struct tc_skbedit { tc_gen; @@ -40,6 +41,7 @@ enum { TCA_SKBEDIT_QUEUE_MAPPING, TCA_SKBEDIT_MARK, TCA_SKBEDIT_PAD, + TCA_SKBEDIT_PTYPE, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) -- cgit From 13c5c240f789bbd2bcacb14a23771491485ae61f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 3 Jul 2016 01:28:47 +0200 Subject: bpf: add bpf_get_hash_recalc helper If skb_clear_hash() was invoked due to mangling of relevant headers and BPF program needs skb->hash later on, we can add a helper to trigger hash recalculation via bpf_get_hash_recalc(). The helper will return the newly retrieved hash directly, but later access can also be done via skb context again through skb->hash directly (inline) without needing to call the helper once more. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f44504d875e2..c14ca1cd6297 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -348,6 +348,15 @@ enum bpf_func_id { * < 0 error */ BPF_FUNC_skb_in_cgroup, + + /** + * bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + */ + BPF_FUNC_get_hash_recalc, + __BPF_FUNC_MAX_ID, }; -- cgit From 7ce856aaaf13a5dc969ac5f998e5daaf1abe4cd2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 4 Jul 2016 08:23:12 +0200 Subject: mlxsw: spectrum: Add couple of lower device helper functions Add functions that iterate over lower devices and find port device. As a dependency add netdev_for_each_all_lower_dev and netdev_for_each_all_lower_dev_rcu macro with netdev_all_lower_get_next and netdev_all_lower_get_next_rcu shelpers. Also, add functions to return mlxsw struct according to lower device found and mlxsw_port struct with a reference to lower device. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7dc2ec74122a..0c6ee2c5099f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3804,12 +3804,30 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter); + #define netdev_for_each_lower_dev(dev, ldev, iter) \ for (iter = (dev)->adj_list.lower.next, \ ldev = netdev_lower_get_next(dev, &(iter)); \ ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) +struct net_device *netdev_all_lower_get_next(struct net_device *dev, + struct list_head **iter); +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, + struct list_head **iter); + +#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ + for (iter = (dev)->all_adj_list.lower.next, \ + ldev = netdev_all_lower_get_next(dev, &(iter)); \ + ldev; \ + ldev = netdev_all_lower_get_next(dev, &(iter))) + +#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ + for (iter = (dev)->all_adj_list.lower.next, \ + ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ + ldev; \ + ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); -- cgit From c5bb17302e734967822be559cf661704b707b4ed Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 4 Jul 2016 17:23:05 +0300 Subject: net/mlx5: Refactor mlx5_add_flow_rule Reduce the set of arguments passed to mlx5_add_flow_rule by introducing flow_spec structure. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 6ad111938709..d22fe7e5a39a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -68,6 +68,12 @@ struct mlx5_flow_group; struct mlx5_flow_rule; struct mlx5_flow_namespace; +struct mlx5_flow_spec { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; + u32 match_value[MLX5_ST_SZ_DW(fte_match_param)]; +}; + struct mlx5_flow_destination { enum mlx5_flow_destination_type type; union { @@ -116,9 +122,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); */ struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest); -- cgit From fba53f7b571925b8a0d59d460ad6de1fda928a3e Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 4 Jul 2016 17:23:06 +0300 Subject: net/mlx5: Introduce mlx5_flow_steering structure Instead of having all steering private name spaces and steering module fields flat in mlx5_core_priv, we wrap them in mlx5_flow_steering for better modularity and API exposure. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e22b3456b2ee..f21c45941887 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -550,14 +550,10 @@ struct mlx5_priv { struct list_head ctx_list; spinlock_t ctx_lock; + struct mlx5_flow_steering *steering; struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; unsigned long pci_dev_data; - struct mlx5_flow_root_namespace *root_ns; - struct mlx5_flow_root_namespace *fdb_root_ns; - struct mlx5_flow_root_namespace *esw_egress_root_ns; - struct mlx5_flow_root_namespace *esw_ingress_root_ns; - struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; }; -- cgit From 6dc6071cfcde6cf687f8d288c9cef9ee6ee24dc7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 4 Jul 2016 17:23:08 +0300 Subject: net/mlx5e: Add ethtool flow steering support Implement etrhtool set_rxnfc callback to support ethtool flow spec direct steering. This patch adds only the support of ether flow type spec. L3/L4 flow specs support will be added in downstream patches. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index d22fe7e5a39a..e036d6030867 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -55,6 +55,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, MLX5_FLOW_NAMESPACE_OFFLOADS, + MLX5_FLOW_NAMESPACE_ETHTOOL, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, -- cgit From 503eebc265dcf5c512454fd5a6b6673ea4f1d7f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 5 Jul 2016 11:27:37 +0200 Subject: net: add dev arg to ndo_neigh_construct/destroy As the following patch will allow upper devices to follow the call down lower devices, we need to add dev here and not rely on n->dev. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0c6ee2c5099f..91af73c9dd51 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1209,8 +1209,10 @@ struct net_device_ops { netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); - int (*ndo_neigh_construct)(struct neighbour *n); - void (*ndo_neigh_destroy)(struct neighbour *n); + int (*ndo_neigh_construct)(struct net_device *dev, + struct neighbour *n); + void (*ndo_neigh_destroy)(struct net_device *dev, + struct neighbour *n); int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], -- cgit From 18bfb924f0005a728caadd90ba755b2a660bf441 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 5 Jul 2016 11:27:38 +0200 Subject: net: introduce default neigh_construct/destroy ndo calls for L2 upper devices L2 upper device needs to propagate neigh_construct/destroy calls down to lower devices. Do this by defining default ndo functions and use them in team, bond, bridge and vlan. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91af73c9dd51..49736a31acaa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3845,6 +3845,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n); +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 -- cgit From 2a4501ae18b52fcdf553404286e6cefabd1d17ec Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 5 Jul 2016 11:27:42 +0200 Subject: neigh: Send a notification when DELAY_PROBE_TIME changes When the data plane is offloaded the traffic doesn't go through the networking stack. Therefore, after first resolving a neighbour the NUD state machine will transition it from REACHABLE to STALE until it's finally deleted by the garbage collector. To prevent such situations the offloading driver should notify the NUD state machine on any neighbours that were recently used. The driver's polling interval should be set so that the NUD state machine can function as if the traffic wasn't offloaded. Currently, there are no in-tree drivers that can report confirmation for a neighbour, but only 'used' indication. Therefore, the polling interval should be set according to DELAY_FIRST_PROBE_TIME, as a neighbour will transition from REACHABLE state to DELAY (instead of STALE) if "a packet was sent within the last DELAY_FIRST_PROBE_TIME seconds" (RFC 4861). Send a netevent whenever the DELAY_FIRST_PROBE_TIME changes - either via netlink or sysctl - so that offloading drivers can correctly set their polling interval. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/netevent.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netevent.h b/include/net/netevent.h index d8bbb38584b6..f440df172b56 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -24,6 +24,7 @@ struct netevent_redirect { enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ + NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ }; int register_netevent_notifier(struct notifier_block *nb); -- cgit From c1adf20052d80f776849fa2c1acb472cdeb7786c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Jul 2016 07:53:51 +0100 Subject: Introduce rb_replace_node_rcu() Implement an RCU-safe variant of rb_replace_node() and rearrange rb_replace_node() to do things in the same order. Signed-off-by: David Howells Acked-by: Peter Zijlstra (Intel) --- include/linux/rbtree.h | 2 ++ include/linux/rbtree_augmented.h | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index b6900099ea81..e585018498d5 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -76,6 +76,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); +extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 14d7b831b63a..d076183e49be 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -130,6 +130,19 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, WRITE_ONCE(root->rb_node, new); } +static inline void +__rb_change_child_rcu(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + rcu_assign_pointer(parent->rb_left, new); + else + rcu_assign_pointer(parent->rb_right, new); + } else + rcu_assign_pointer(root->rb_node, new); +} + extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); -- cgit From 995f1405610bd8446c5be37d2ffc031a7729e406 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Jul 2016 13:44:53 -0700 Subject: rcu: Suppress sparse warnings for rcu_dereference_raw() Data structures that are used both with and without RCU protection are difficult to write in a sparse-clean manner. If you mark the relevant pointers with __rcu, sparse will complain about all non-RCU uses, but if you don't mark those pointers, sparse will complain about all RCU uses. This commit therefore suppresses sparse warnings for rcu_dereference_raw(), allowing mixed-protection data structures to avoid these warnings. Reported-by: David Howells Signed-off-by: Paul E. McKenney Signed-off-by: David Howells --- include/linux/rcupdate.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5f1533e3d032..85830e6c797b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -611,6 +611,12 @@ static inline void rcu_preempt_sleep_check(void) rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) +#define rcu_dereference_raw(p) \ +({ \ + /* Dependency order vs. p above. */ \ + typeof(p) ________p1 = lockless_dereference(p); \ + ((typeof(*p) __force __kernel *)(________p1)); \ +}) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable @@ -729,8 +735,6 @@ static inline void rcu_preempt_sleep_check(void) __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ __rcu) -#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ - /* * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. -- cgit From c6e6a0c8be575c830a97b1942dabeab70f423fe0 Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Tue, 5 Jul 2016 15:23:08 +0300 Subject: nl80211: Add API to support VHT MU-MIMO air sniffer add API to support VHT MU-MIMO air sniffer. in MU-MIMO there are parallel frames on the air while the HW has only one RX. add the capability to sniff one of the MU-MIMO parallel frames by giving the sniffer additional information so it'll know which of the parallel frames it shall follow. Add attribute - NL80211_ATTR_MU_MIMO_GROUP_DATA - for getting a MU-MIMO groupID in order to monitor packets from that group using VHT MU-MIMO. And add attribute -NL80211_ATTR_MU_MIMO_FOLLOW_ADDR - for passing MAC address to monitor mode. that option will be used by VHT MU-MIMO air sniffer to follow a station according to it's MAC address using VHT MU-MIMO. Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 ++++++++-- include/uapi/linux/nl80211.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7bbb00d8b2cd..fa4f0f793817 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -330,6 +330,9 @@ struct ieee80211_supported_band { * in a separate chapter. */ +#define VHT_MUMIMO_GROUPS_DATA_LEN (WLAN_MEMBERSHIP_LEN +\ + WLAN_USER_POSITION_LEN) + /** * struct vif_params - describes virtual interface parameters * @use_4addr: use 4-address frames @@ -339,10 +342,13 @@ struct ieee80211_supported_band { * This feature is only fully supported by drivers that enable the * %NL80211_FEATURE_MAC_ON_CREATE flag. Others may support creating ** only p2p devices with specified MAC. + * @vht_mumimo_groups: MU-MIMO groupID. used for monitoring only + * packets belonging to that MU-MIMO groupID. */ struct vif_params { - int use_4addr; - u8 macaddr[ETH_ALEN]; + int use_4addr; + u8 macaddr[ETH_ALEN]; + u8 vht_mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN]; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 53c8278827a0..1d7da7888dcf 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1829,6 +1829,25 @@ enum nl80211_commands { * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per * interface type. * + * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO + * groupID for monitor mode. + * The first 8 bytes are a mask that defines the membership in each + * group (there are 64 groups, group 0 and 63 are reserved), + * each bit represents a group and set to 1 for being a member in + * that group and 0 for not being a member. + * The remaining 16 bytes define the position in each group: 2 bits for + * each group. + * (smaller group numbers represented on most significant bits and bigger + * group numbers on least significant bits.) + * This attribute is used only if all interfaces are in monitor mode. + * Set this attribute in order to monitor packets using the given MU-MIMO + * groupID data. + * to turn off that feature set all the bits of the groupID to zero. + * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow + * when using MU-MIMO air sniffer. + * to turn that feature off set an invalid mac address + * (e.g. FF:FF:FF:FF:FF:FF) + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2213,6 +2232,9 @@ enum nl80211_attrs { NL80211_ATTR_IFTYPE_EXT_CAPA, + NL80211_ATTR_MU_MIMO_GROUP_DATA, + NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4479,6 +4501,12 @@ enum nl80211_feature_flags { * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set * the ASSOC_REQ_USE_RRM flag in the association request even if * NL80211_FEATURE_QUIET is not advertized. + * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air + * sniffer which means that it can be configured to hear packets from + * certain groups which can be configured by the + * %NL80211_ATTR_MU_MIMO_GROUP_DATA attribute, + * or can be configured to follow a station by configuring the + * %NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4486,6 +4514,7 @@ enum nl80211_feature_flags { enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VHT_IBSS, NL80211_EXT_FEATURE_RRM, + NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit From 1d76250bd34af86c6498fc51e50cab3bfbbeceaa Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Tue, 5 Jul 2016 17:10:13 +0300 Subject: nl80211: support beacon report scanning Beacon report radio measurement requires reporting observed BSSs on the channels specified in the beacon request. If the measurement mode is set to passive or active, it requires actually performing a scan (passive or active, accordingly), and reporting the time that the scan was started and the time each beacon/probe was received (both in terms of TSF of the BSS of the requesting AP). If the request mode is table, this information is optional. In addition, the radio measurement request specifies the channel dwell time for the measurement. In order to use scan for beacon report when the mode is active or passive, add a parameter to scan request that specifies the channel dwell time, and add scan start time and beacon received time to scan results information. Supporting beacon report is required for Multi Band Operation (MBO). Signed-off-by: Assaf Krauss Signed-off-by: David Spinadel Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 40 +++++++++++++++++++++++++++++++++++----- include/uapi/linux/nl80211.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fa4f0f793817..e2658e392a1f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1423,6 +1423,21 @@ struct cfg80211_ssid { u8 ssid_len; }; +/** + * struct cfg80211_scan_info - information about completed scan + * @scan_start_tsf: scan start time in terms of the TSF of the BSS that the + * wireless device that requested the scan is connected to. If this + * information is not available, this field is left zero. + * @tsf_bssid: the BSSID according to which %scan_start_tsf is set. + * @aborted: set to true if the scan was aborted for any reason, + * userspace will be notified of that + */ +struct cfg80211_scan_info { + u64 scan_start_tsf; + u8 tsf_bssid[ETH_ALEN] __aligned(2); + bool aborted; +}; + /** * struct cfg80211_scan_request - scan request description * @@ -1433,12 +1448,17 @@ struct cfg80211_ssid { * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets + * @duration: how long to listen on each channel, in TUs. If + * %duration_mandatory is not set, this is the maximum dwell time and + * the actual dwell time may be shorter. + * @duration_mandatory: if set, the scan duration must be as specified by the + * %duration field. * @flags: bit field of flags controlling operation * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for * @scan_start: time (in jiffies) when the scan started * @wdev: the wireless device to scan for - * @aborted: (internal) scan request was notified as aborted + * @info: (internal) information about completed scan * @notified: (internal) scan request was notified as done or aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band * @mac_addr: MAC address used with randomisation @@ -1454,6 +1474,8 @@ struct cfg80211_scan_request { enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; + u16 duration; + bool duration_mandatory; u32 flags; u32 rates[NUM_NL80211_BANDS]; @@ -1467,7 +1489,8 @@ struct cfg80211_scan_request { /* internal */ struct wiphy *wiphy; unsigned long scan_start; - bool aborted, notified; + struct cfg80211_scan_info info; + bool notified; bool no_cck; /* keep last */ @@ -1600,12 +1623,19 @@ enum cfg80211_signal_type { * buffered on the device) and be accurate to about 10ms. * If the frame isn't buffered, just passing the return value of * ktime_get_boot_ns() is likely appropriate. + * @parent_tsf: the time at the start of reception of the first octet of the + * timestamp field of the frame. The time is the TSF of the BSS specified + * by %parent_bssid. + * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to + * the BSS that requested the scan in which the beacon/probe was received. */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; enum nl80211_bss_scan_width scan_width; s32 signal; u64 boottime_ns; + u64 parent_tsf; + u8 parent_bssid[ETH_ALEN] __aligned(2); }; /** @@ -4067,10 +4097,10 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * cfg80211_scan_done - notify that scan finished * * @request: the corresponding scan request - * @aborted: set to true if the scan was aborted for any reason, - * userspace will be notified of that + * @info: information about the completed scan */ -void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted); +void cfg80211_scan_done(struct cfg80211_scan_request *request, + struct cfg80211_scan_info *info); /** * cfg80211_sched_scan_results - notify that new scan results are available diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1d7da7888dcf..b39ccab45333 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1848,6 +1848,22 @@ enum nl80211_commands { * to turn that feature off set an invalid mac address * (e.g. FF:FF:FF:FF:FF:FF) * + * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually + * started (u64). The time is the TSF of the BSS the interface that + * requested the scan is connected to (if available, otherwise this + * attribute must not be included). + * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which + * %NL80211_ATTR_SCAN_START_TIME_TSF is set. + * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If + * %NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the + * maximum measurement duration allowed. This attribute is used with + * measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN + * if the scan is used for beacon report radio measurement. + * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates + * that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is + * mandatory. If this flag is not set, the duration is the maximum duration + * and the actual measurement duration may be shorter. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2235,6 +2251,11 @@ enum nl80211_attrs { NL80211_ATTR_MU_MIMO_GROUP_DATA, NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR, + NL80211_ATTR_SCAN_START_TIME_TSF, + NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, + NL80211_ATTR_MEASUREMENT_DURATION, + NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3496,6 +3517,12 @@ enum nl80211_bss_scan_width { * was last updated by a received frame. The value is expected to be * accurate to about 10ms. (u64, nanoseconds) * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment + * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first + * octet of the timestamp field of the last beacon/probe received for + * this BSS. The time is the TSF of the BSS specified by + * @NL80211_BSS_PARENT_BSSID. (u64). + * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF + * is set. * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -3517,6 +3544,8 @@ enum nl80211_bss { NL80211_BSS_PRESP_DATA, NL80211_BSS_LAST_SEEN_BOOTTIME, NL80211_BSS_PAD, + NL80211_BSS_PARENT_TSF, + NL80211_BSS_PARENT_BSSID, /* keep last */ __NL80211_BSS_AFTER_LAST, @@ -4507,6 +4536,16 @@ enum nl80211_feature_flags { * %NL80211_ATTR_MU_MIMO_GROUP_DATA attribute, * or can be configured to follow a station by configuring the * %NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute. + * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual + * time the scan started in scan results event. The time is the TSF of + * the BSS that the interface that requested the scan is connected to + * (if available). + * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the + * time the last beacon/probe was received. The time is the TSF of the + * BSS that the interface that requested the scan is connected to + * (if available). + * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of + * channel dwell time. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4515,6 +4554,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VHT_IBSS, NL80211_EXT_FEATURE_RRM, NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER, + NL80211_EXT_FEATURE_SCAN_START_TIME, + NL80211_EXT_FEATURE_BSS_PARENT_TSF, + NL80211_EXT_FEATURE_SET_SCAN_DWELL, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit From 7947d3e075cde1a18e538f2dafbc850aa356ff79 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Tue, 5 Jul 2016 15:23:12 +0300 Subject: mac80211: Add support for beacon report radio measurement Add the following to support beacon report radio measurement with the measurement mode field set to passive or active: 1. Propagate the required scan duration to the device 2. Report the scan start time (in terms of TSF) 3. Report each BSS's detection time (also in terms of TSF) TSF times refer to the BSS that the interface that requested the scan is connected to. Signed-off-by: Assaf Krauss Signed-off-by: Avraham Stern [changed ath9k/10k, at76c59x-usb, iwlegacy, wl1251 and wlcore to match the new API] Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a52009ffc19f..b4faadbb4e01 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4697,9 +4697,10 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * any context, including hardirq context. * * @hw: the hardware that finished the scan - * @aborted: set to true if scan was aborted + * @info: information about the completed scan */ -void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted); +void ieee80211_scan_completed(struct ieee80211_hw *hw, + struct cfg80211_scan_info *info); /** * ieee80211_sched_scan_results - got results from scheduled scan -- cgit From 7d27a0ba7adc8ef30c2aae7592fce4c162aee4df Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Fri, 1 Jul 2016 10:19:34 +0900 Subject: cfg80211: Add mesh peer AID setting API Previously, mesh power management functionality works only with kernel MPM. Because user space MPM did not report mesh peer AID to kernel, the kernel could not identify the bit in TIM element. So this patch adds mesh peer AID setting API. Signed-off-by: Masashi Honma Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e2658e392a1f..9c23f4d33e06 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -780,6 +780,7 @@ enum station_parameters_apply_mask { * (bitmask of BIT(NL80211_STA_FLAG_...)) * @listen_interval: listen interval or -1 for no change * @aid: AID or zero for no change + * @peer_aid: mesh peer AID or zero for no change * @plink_action: plink action to take * @plink_state: set the peer link state for a station * @ht_capa: HT capabilities of station @@ -811,6 +812,7 @@ struct station_parameters { u32 sta_modify_mask; int listen_interval; u16 aid; + u16 peer_aid; u8 supported_rates_len; u8 plink_action; u8 plink_state; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b39ccab45333..220694151434 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1864,6 +1864,9 @@ enum nl80211_commands { * mandatory. If this flag is not set, the duration is the maximum duration * and the actual measurement duration may be shorter. * + * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is + * used to pull the stored data for mesh peer in power save state. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2256,6 +2259,8 @@ enum nl80211_attrs { NL80211_ATTR_MEASUREMENT_DURATION, NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY, + NL80211_ATTR_MESH_PEER_AID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit From aece0c3fe1f06962a591268b8c236df0ae5d9e4e Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 18 Jun 2016 10:45:33 +0200 Subject: nl802154: move PAD to right position The PAD define should be above the experimental support. We don't care about if we break userspace in experimental stuff but PAD is part of the existing UAPI. Cc: Nicolas Dichtel Acked-by: Nicolas Dichtel Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/nl802154.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index fcab4de49951..7aad2fdfd16a 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -124,6 +124,8 @@ enum nl802154_attrs { NL802154_ATTR_ACKREQ_DEFAULT, + NL802154_ATTR_PAD, + /* add attributes here, update the policy in nl802154.c */ #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL @@ -138,8 +140,6 @@ enum nl802154_attrs { NL802154_ATTR_SEC_KEY, #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ - NL802154_ATTR_PAD, - __NL802154_ATTR_AFTER_LAST, NL802154_ATTR_MAX = __NL802154_ATTR_AFTER_LAST - 1 }; -- cgit From 66e5c2672cd11b9310008099faf6a4ffb9dfb6d0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 18 Jun 2016 10:45:34 +0200 Subject: ieee802154: add netns support This patch adds netns support for 802.15.4 subsystem. Most parts are copy&pasted from wireless subsystem, it has the identically userspace API. Cc: Nicolas Dichtel Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 13 +++++++++++++ include/net/nl802154.h | 5 +++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 171cd76558fb..795ca4008f72 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -219,9 +219,22 @@ struct wpan_phy { struct device dev; + /* the network namespace this phy lives in currently */ + possible_net_t _net; + char priv[0] __aligned(NETDEV_ALIGN); }; +static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy) +{ + return read_pnet(&wpan_phy->_net); +} + +static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net) +{ + write_pnet(&wpan_phy->_net, net); +} + struct ieee802154_addr { u8 mode; __le16 pan_id; diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 7aad2fdfd16a..ddcee128f5d9 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -54,6 +54,8 @@ enum nl802154_commands { NL802154_CMD_SET_ACKREQ_DEFAULT, + NL802154_CMD_SET_WPAN_PHY_NETNS, + /* add new commands above here */ #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL @@ -126,6 +128,9 @@ enum nl802154_attrs { NL802154_ATTR_PAD, + NL802154_ATTR_PID, + NL802154_ATTR_NETNS_FD, + /* add attributes here, update the policy in nl802154.c */ #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL -- cgit From 9cc577dd25b9762df7f353658426bb2e048c480a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 6 Jul 2016 23:32:24 +0200 Subject: ieee802154: add ieee802154_skb_dst_pan helper This patch adds ieee802154_skb_dst_pan function to get the pointer address of the destination pan id at skb mac pointer. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 16 ++++++++++++++++ include/net/mac802154.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index acedbb68a5a3..91f4665fea63 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -31,6 +31,8 @@ #define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_FCS_LEN 2 #define IEEE802154_MAX_AUTH_TAG_LEN 16 +#define IEEE802154_FC_LEN 2 +#define IEEE802154_SEQ_LEN 1 /* General MAC frame format: * 2 bytes: Frame Control @@ -221,9 +223,14 @@ enum { #define IEEE802154_FCTL_ACKREQ 0x0020 #define IEEE802154_FCTL_SECEN 0x0004 #define IEEE802154_FCTL_INTRA_PAN 0x0040 +#define IEEE802154_FCTL_DADDR 0x0c00 #define IEEE802154_FTYPE_DATA 0x0001 +#define IEEE802154_FCTL_ADDR_NONE 0x0000 +#define IEEE802154_FCTL_DADDR_SHORT 0x0800 +#define IEEE802154_FCTL_DADDR_EXTENDED 0x0c00 + /* * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder @@ -261,6 +268,15 @@ static inline bool ieee802154_is_intra_pan(__le16 fc) return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN); } +/* + * ieee802154_daddr_mode - get daddr mode from fc + * @fc: frame control bytes in little-endian byteorder + */ +static inline __le16 ieee802154_daddr_mode(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_DADDR); +} + /** * ieee802154_is_valid_psdu_len - check if psdu len is valid * available lengths: diff --git a/include/net/mac802154.h b/include/net/mac802154.h index e465c8551ac3..b3f7cd868fe9 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -257,6 +257,35 @@ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb) return get_unaligned_le16(skb_mac_header(skb)); } +/** + * ieee802154_skb_dst_pan - get the pointer to destination pan field + * @fc: mac header frame control field + * @skb: skb where the destination pan pointer will be get from + */ +static inline unsigned char *ieee802154_skb_dst_pan(__le16 fc, + const struct sk_buff *skb) +{ + unsigned char *dst_pan; + + switch (ieee802154_daddr_mode(fc)) { + case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE): + dst_pan = NULL; + break; + case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT): + case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED): + dst_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN; + break; + default: + WARN_ONCE(1, "invalid addr mode detected"); + dst_pan = NULL; + break; + } + + return dst_pan; +} + /** * ieee802154_be64_to_le64 - copies and convert be64 to le64 * @le64_dst: le64 destination pointer -- cgit From 19580cc1ed299c736b56b45c7576b477f185f8f5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 6 Jul 2016 23:32:25 +0200 Subject: ieee802154: add ieee802154_skb_src_pan helper This patch adds ieee802154_skb_src_pan function to get the pointer address of the source pan id at skb mac pointer. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 13 ++++++++++ include/net/mac802154.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'include') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 91f4665fea63..ddb890174a0e 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -50,6 +50,7 @@ #define IEEE802154_EXTENDED_ADDR_LEN 8 #define IEEE802154_SHORT_ADDR_LEN 2 +#define IEEE802154_PAN_ID_LEN 2 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 @@ -224,12 +225,15 @@ enum { #define IEEE802154_FCTL_SECEN 0x0004 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FCTL_DADDR 0x0c00 +#define IEEE802154_FCTL_SADDR 0xc000 #define IEEE802154_FTYPE_DATA 0x0001 #define IEEE802154_FCTL_ADDR_NONE 0x0000 #define IEEE802154_FCTL_DADDR_SHORT 0x0800 #define IEEE802154_FCTL_DADDR_EXTENDED 0x0c00 +#define IEEE802154_FCTL_SADDR_SHORT 0x8000 +#define IEEE802154_FCTL_SADDR_EXTENDED 0xc000 /* * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA @@ -277,6 +281,15 @@ static inline __le16 ieee802154_daddr_mode(__le16 fc) return fc & cpu_to_le16(IEEE802154_FCTL_DADDR); } +/* + * ieee802154_saddr_mode - get saddr mode from fc + * @fc: frame control bytes in little-endian byteorder + */ +static inline __le16 ieee802154_saddr_mode(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_SADDR); +} + /** * ieee802154_is_valid_psdu_len - check if psdu len is valid * available lengths: diff --git a/include/net/mac802154.h b/include/net/mac802154.h index b3f7cd868fe9..ec01b35bc969 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -286,6 +286,65 @@ static inline unsigned char *ieee802154_skb_dst_pan(__le16 fc, return dst_pan; } +/** + * ieee802154_skb_src_pan - get the pointer to source pan field + * @fc: mac header frame control field + * @skb: skb where the source pan pointer will be get from + */ +static inline unsigned char *ieee802154_skb_src_pan(__le16 fc, + const struct sk_buff *skb) +{ + unsigned char *src_pan; + + switch (ieee802154_saddr_mode(fc)) { + case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE): + src_pan = NULL; + break; + case cpu_to_le16(IEEE802154_FCTL_SADDR_SHORT): + case cpu_to_le16(IEEE802154_FCTL_SADDR_EXTENDED): + /* if intra-pan and source addr mode is non none, + * then source pan id is equal destination pan id. + */ + if (ieee802154_is_intra_pan(fc)) { + src_pan = ieee802154_skb_dst_pan(fc, skb); + break; + } + + switch (ieee802154_daddr_mode(fc)) { + case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE): + src_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN; + break; + case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT): + src_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN + + IEEE802154_PAN_ID_LEN + + IEEE802154_SHORT_ADDR_LEN; + break; + case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED): + src_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN + + IEEE802154_PAN_ID_LEN + + IEEE802154_EXTENDED_ADDR_LEN; + break; + default: + WARN_ONCE(1, "invalid addr mode detected"); + src_pan = NULL; + break; + } + break; + default: + WARN_ONCE(1, "invalid addr mode detected"); + src_pan = NULL; + break; + } + + return src_pan; +} + /** * ieee802154_be64_to_le64 - copies and convert be64 to le64 * @le64_dst: le64 destination pointer -- cgit From 0ea0b9af9b7599ada307258dc841f4300873e8a1 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 6 Jul 2016 23:32:26 +0200 Subject: ieee802154: 6lowpan: fix intra pan id check The RIOT-OS stack does send intra-pan frames but don't set the intra pan flag inside the mac header. It seems this is valid frame addressing but inefficient. Anyway this patch adds a new function for intra pan addressing, doesn't matter if intra pan flag or source and destination are the same. The newly introduction function will be used to check on intra pan addressing for 6lowpan. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index ec01b35bc969..d757edd0b0b7 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -345,6 +345,25 @@ static inline unsigned char *ieee802154_skb_src_pan(__le16 fc, return src_pan; } +/** + * ieee802154_skb_is_intra_pan_addressing - checks whenever the mac addressing + * is an intra pan communication + * @fc: mac header frame control field + * @skb: skb where the source and destination pan should be get from + */ +static inline bool ieee802154_skb_is_intra_pan_addressing(__le16 fc, + const struct sk_buff *skb) +{ + unsigned char *dst_pan = ieee802154_skb_dst_pan(fc, skb), + *src_pan = ieee802154_skb_src_pan(fc, skb); + + /* if one is NULL is no intra pan addressing */ + if (!dst_pan || !src_pan) + return false; + + return !memcmp(dst_pan, src_pan, IEEE802154_PAN_ID_LEN); +} + /** * ieee802154_be64_to_le64 - copies and convert be64 to le64 * @le64_dst: le64 destination pointer -- cgit From aaa7088eb29a0ffe6cf8d8a443695df41e5a62f3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 6 Jul 2016 23:32:27 +0200 Subject: ieee802154: fix skb get fc on big endian This patch fixes ieee802154_get_fc_from_skb function on big endian machines. The function get_unaligned_le16 converts the byte order to host byte order but we want to keep the byte order like in mac header. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index d757edd0b0b7..bb7bfecc5ab3 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -247,6 +247,8 @@ struct ieee802154_ops { */ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb) { + __le16 fc; + /* check if we can fc at skb_mac_header of sk buffer */ if (unlikely(!skb_mac_header_was_set(skb) || (skb_tail_pointer(skb) - skb_mac_header(skb)) < 2)) { @@ -254,7 +256,8 @@ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb) return cpu_to_le16(0); } - return get_unaligned_le16(skb_mac_header(skb)); + memcpy(&fc, skb_mac_header(skb), IEEE802154_FC_LEN); + return fc; } /** -- cgit From 048e7f7e66a8693fe1707997bffd19d08cde08f5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 6 Jul 2016 23:32:30 +0200 Subject: ieee802154: cleanup WARN_ON for fc fetch This patch cleanups the WARN_ON which occurs when the sk buffer has insufficient buffer space by moving the WARN_ON into if condition. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index bb7bfecc5ab3..286824acd008 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -250,11 +250,10 @@ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb) __le16 fc; /* check if we can fc at skb_mac_header of sk buffer */ - if (unlikely(!skb_mac_header_was_set(skb) || - (skb_tail_pointer(skb) - skb_mac_header(skb)) < 2)) { - WARN_ON(1); + if (WARN_ON(!skb_mac_header_was_set(skb) || + (skb_tail_pointer(skb) - + skb_mac_header(skb)) < IEEE802154_FC_LEN)) return cpu_to_le16(0); - } memcpy(&fc, skb_mac_header(skb), IEEE802154_FC_LEN); return fc; -- cgit From 3467f0d433016c45d1851f3587d32816b7b2ffb0 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 23 Jun 2016 16:57:09 +0200 Subject: ath9k: Allow configuration of LED polarity in platform data. Some devices running OpenWrt need this and it makes sense to add this to ath9k_platform_data as the next patches will add a devicetree (boolean) property for it as well. Suggested-by: Vittorio Gambaletta Signed-off-by: Martin Blumenstingl Signed-off-by: Kalle Valo --- include/linux/ath9k_platform.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index e66153d60bd5..76860a461ed2 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -40,6 +40,7 @@ struct ath9k_platform_data { bool tx_gain_buffalo; bool disable_2ghz; bool disable_5ghz; + bool led_active_high; int (*get_mac_revision)(void); int (*external_reset)(void); -- cgit From d390238c4fba7c87a3bcd859ce3373c864eb7b02 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 6 Jul 2016 20:03:54 -0400 Subject: net: dsa: initialize the routing table The routing table of every switch in a tree is currently initialized to all zeros. This is an issue since 0 is a valid port number. Add a DSA_RTABLE_NONE=-1 constant to initialize the signed values of the routing table pointing to other switches. This fixes the device mapping of the mv88e6xxx driver where the port pointing to the switch itself and to non-existent switches was wrongly configured to be 0. It is now set to the expected 0xf value. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 20b3087ad193..52ab18bc2b0d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -32,6 +32,8 @@ enum dsa_tag_protocol { #define DSA_MAX_SWITCHES 4 #define DSA_MAX_PORTS 12 +#define DSA_RTABLE_NONE -1 + struct dsa_chip_data { /* * How to access the switch configuration registers. -- cgit From 606274c5abd8e245add01bc7145a8cbb92b69ba8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Jul 2016 22:38:36 -0700 Subject: bpf: introduce bpf_get_current_task() helper over time there were multiple requests to access different data structures and fields of task_struct current, so finally add the helper to access 'current' as-is. Tracing bpf programs will do the rest of walking the pointers via bpf_probe_read(). Note that current can be null and bpf program has to deal it with, but even dumb passing null into bpf_probe_read() is still safe. Suggested-by: Brendan Gregg Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c14ca1cd6297..262a7e883b19 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -357,6 +357,13 @@ enum bpf_func_id { */ BPF_FUNC_get_hash_recalc, + /** + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + */ + BPF_FUNC_get_current_task, + __BPF_FUNC_MAX_ID, }; -- cgit From ca8bee5dde1f02c2dbe8c8453dce27f2dfafb21c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 5 Jul 2016 14:30:14 +0200 Subject: Bluetooth: Rename HCI_BREDR into HCI_PRIMARY The HCI_BREDR naming is confusing since it actually stands for Primary Bluetooth Controller. Which is a term that has been used in the latest standard. However from a legacy point of view there only really have been Basic Rate (BR) and Enhanced Data Rate (EDR). Recent versions of Bluetooth introduced Low Energy (LE) and made this terminology a little bit confused since Dual Mode Controllers include BR/EDR and LE. To simplify this the name HCI_PRIMARY stands for the Primary Controller which can be a single mode or dual mode controller. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index eefcf3e96421..a3f86de6f100 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -65,7 +65,7 @@ #define HCI_I2C 8 /* HCI controller types */ -#define HCI_BREDR 0x00 +#define HCI_PRIMARY 0x00 #define HCI_AMP 0x01 /* First BR/EDR Controller shall have ID = 0 */ -- cgit From a65056ecf4b48be0d0284a7b6a57b6dace10b843 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 6 Jul 2016 12:12:21 -0700 Subject: net: bridge: extend MLD/IGMP query stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As was suggested this patch adds support for the different versions of MLD and IGMP query types. Since the user visible structure is still in net-next we can augment it instead of adding netlink attributes. The distinction between the different IGMP/MLD query types is done as suggested in Section 7.1, RFC 3376 [1] and Section 8.1, RFC 3810 [2] based on query payload size and code for IGMP. Since all IGMP packets go through multicast_rcv() and it uses ip_mc_check_igmp/ipv6_mc_check_mld we can be sure that at least the ip/ipv6 header can be directly used. [1] https://tools.ietf.org/html/rfc3376#section-7 [2] https://tools.ietf.org/html/rfc3810#section-8.1 Suggested-by: Linus Lüssing Signed-off-by: Nikolay Aleksandrov Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 8304fe6f0561..c186f64fffca 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -261,14 +261,17 @@ enum { /* IGMP/MLD statistics */ struct br_mcast_stats { - __u64 igmp_queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_v1queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_v2queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_v3queries[BR_MCAST_DIR_SIZE]; __u64 igmp_leaves[BR_MCAST_DIR_SIZE]; __u64 igmp_v1reports[BR_MCAST_DIR_SIZE]; __u64 igmp_v2reports[BR_MCAST_DIR_SIZE]; __u64 igmp_v3reports[BR_MCAST_DIR_SIZE]; __u64 igmp_parse_errors; - __u64 mld_queries[BR_MCAST_DIR_SIZE]; + __u64 mld_v1queries[BR_MCAST_DIR_SIZE]; + __u64 mld_v2queries[BR_MCAST_DIR_SIZE]; __u64 mld_leaves[BR_MCAST_DIR_SIZE]; __u64 mld_v1reports[BR_MCAST_DIR_SIZE]; __u64 mld_v2reports[BR_MCAST_DIR_SIZE]; -- cgit From 1db19db7f5ff4ddd3b1b6dd2092a87298ee5bd0b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 7 Jul 2016 18:01:32 +0200 Subject: net: tracepoint napi:napi_poll add work and budget An important information for the napi_poll tracepoint is knowing the work done (packets processed) by the napi_poll() call. Add both the work done and budget, as they are related. Handle trace_napi_poll() param change in dropwatch/drop_monitor and in python perf script netdev-times.py in backward compat way, as python fortunately supports optional parameter handling. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/napi.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 8fe1e93f531d..118ed7767639 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -12,22 +12,27 @@ TRACE_EVENT(napi_poll, - TP_PROTO(struct napi_struct *napi), + TP_PROTO(struct napi_struct *napi, int work, int budget), - TP_ARGS(napi), + TP_ARGS(napi, work, budget), TP_STRUCT__entry( __field( struct napi_struct *, napi) + __field( int, work) + __field( int, budget) __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) ), TP_fast_assign( __entry->napi = napi; + __entry->work = work; + __entry->budget = budget; __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); ), - TP_printk("napi poll on napi struct %p for device %s", - __entry->napi, __get_str(dev_name)) + TP_printk("napi poll on napi struct %p for device %s work %d budget %d", + __entry->napi, __get_str(dev_name), + __entry->work, __entry->budget) ); #undef NO_DEV -- cgit From 1d984c2e03c1fb21539a9f50627e312788512013 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Fri, 8 Jul 2016 15:52:39 +0200 Subject: NFC: digital: Fix handling of saved PDU sk_buff pointers This patch fixes the way an I-PDU is saved in case it needs to be sent again. It is now copied using pskb_copy() and not simply referenced using skb_get() since it could be modified by the driver. digital_in_send_saved_skb() and digital_tg_send_saved_skb() still get a reference on the saved skb which is re-sent but release it if the send operation fails. That way the caller doesn't have to take care about skb ref in case of error. RTOX supervisor PDU must not be saved as this can override a previously saved I-PDU that should be re-sent later on. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- include/net/nfc/digital.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 506e3f6eabef..f9a4e4771861 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -237,7 +237,6 @@ struct nfc_digital_dev { int nack_count; struct sk_buff *saved_skb; - unsigned int saved_skb_len; u16 target_fsc; -- cgit From 1a09c56f545c8ff8d338a38c7c40d79f4165a94c Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Fri, 8 Jul 2016 15:52:45 +0200 Subject: NFC: digital: Add support for NFC DEP Response Waiting Time When sending an ATR_REQ, the initiator must wait for the ATR_RES at least 'RWT(nfcdep,activation) + dRWT(nfcdep)' and no more than 'RWT(nfcdep,activation) + dRWT(nfcdep) + dT(nfcdep,initiator)'. This gives a timeout value between 1237 ms and 1337 ms. This patch defines DIGITAL_ATR_RES_RWT to 1337 used for the timeout value of ATR_REQ command. For other DEP PDUs, the initiator must wait between 'RWT + dRWT(nfcdep)' and 'RWT + dRWT(nfcdep) + dT(nfcdep,initiator)' where RWT is given by the following formula: '(256 * 16 / f(c)) * 2^wt' where wt is the value of the TO field in the ATR_RES response and is in the range between 0 and 14. This patch declares a mapping table for wt values and gives RWT max values between 100 ms and 5049 ms. This patch also defines DIGITAL_ATR_RES_TO_WT, the maximum wt value in target mode, to 8. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- include/net/nfc/digital.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index f9a4e4771861..74fa7eb94e72 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -226,6 +226,7 @@ struct nfc_digital_dev { u8 curr_rf_tech; u8 curr_nfc_dep_pni; u8 did; + u16 dep_rwt; u8 local_payload_max; u8 remote_payload_max; -- cgit From 64b87639c9cbeb03e26bc65528416c961b1dde96 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 3 Jul 2016 13:18:43 +0800 Subject: netfilter: conntrack: fix race between nf_conntrack proc read and hash resize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack hash table via /sys/module/nf_conntrack/parameters/hashsize, race will happen, because reader can observe a newly allocated hash but the old size (or vice versa). So oops will happen like follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000017 IP: [] seq_print_acct+0x11/0x50 [nf_conntrack] Call Trace: [] ? ct_seq_show+0x14e/0x340 [nf_conntrack] [] seq_read+0x2cc/0x390 [] proc_reg_read+0x42/0x70 [] __vfs_read+0x37/0x130 [] ? security_file_permission+0xa0/0xc0 [] vfs_read+0x95/0x140 [] SyS_read+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 It is very easy to reproduce this kernel crash. 1. open one shell and input the following cmds: while : ; do echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize done 2. open more shells and input the following cmds: while : ; do cat /proc/net/nf_conntrack done 3. just wait a monent, oops will happen soon. The solution in this patch is based on Florian's Commit 5e3c61f98175 ("netfilter: conntrack: fix lookup race during hash resize"). And add a wrapper function nf_conntrack_get_ht to get hash and hsize suggested by Florian Westphal. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3e2f3328945c..79d7ac5c9740 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); + /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, -- cgit From 242922a027176cd260c5adce4ba6bbfa3a05190c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Jul 2016 20:44:01 +0200 Subject: netfilter: conntrack: simplify early_drop We don't need to acquire the bucket lock during early drop, we can use lockless traveral just like ____nf_conntrack_find. The timer deletion serves as synchronization point, if another cpu attempts to evict same entry, only one will succeed with timer deletion. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5d3397f34583..2a5133e214c9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -301,6 +301,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) +#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper) -- cgit From 7c9664351980aaa6a4b8837a314360b3a4ad382a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jul 2016 12:07:23 +0200 Subject: netfilter: move nat hlist_head to nf_conn The nat extension structure is 32bytes in size on x86_64: struct nf_conn_nat { struct hlist_node bysource; /* 0 16 */ struct nf_conn * ct; /* 16 8 */ union nf_conntrack_nat_help help; /* 24 4 */ int masq_index; /* 28 4 */ /* size: 32, cachelines: 1, members: 4 */ /* last cacheline: 32 bytes */ }; The hlist is needed to quickly check for possible tuple collisions when installing a new nat binding. Storing this in the extension area has two drawbacks: 1. We need ct backpointer to get the conntrack struct from the extension. 2. When reallocation of extension area occurs we need to fixup the bysource hash head via hlist_replace_rcu. We can avoid both by placing the hlist_head in nf_conn and place nf_conn in the bysource hash rather than the extenstion. We can also remove the ->move support; no other extension needs it. Moving the entire nat extension into nf_conn would be possible as well but then we have to add yet another callback for deletion from the bysource hash table rather than just using nat extension ->destroy hook for this. nf_conn size doesn't increase due to aligment, followup patch replaces hlist_node with single pointer. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ include/net/netfilter/nf_conntrack_extend.h | 3 --- include/net/netfilter/nf_nat.h | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2a5133e214c9..e5135d8728b4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -117,6 +117,9 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; +#if IS_ENABLED(CONFIG_NF_NAT) + struct hlist_node nat_bysource; +#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index b925395fa5ed..1c3035dda31f 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ void (*destroy)(struct nf_conn *ct); - /* Called when realloacted (can be NULL). - Contents has already been moved. */ - void (*move)(void *new, void *old); enum nf_ct_ext_id id; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 344b1ab19220..02515f7ed4cc 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -29,8 +29,6 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct hlist_node bysource; - struct nf_conn *ct; union nf_conntrack_nat_help help; #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) -- cgit From 870190a9ec9075205c0fa795a09fa931694a3ff1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jul 2016 12:07:24 +0200 Subject: netfilter: nat: convert nat bysrc hash to rhashtable It did use a fixed-size bucket list plus single lock to protect add/del. Unlike the main conntrack table we only need to add and remove keys. Convert it to rhashtable to get table autosizing and per-bucket locking. The maximum number of entries is -- as before -- tied to the number of conntracks so we do not need another upperlimit. The change does not handle rhashtable_remove_fast error, only possible "error" is -ENOENT, and that is something that can happen legitimetely, e.g. because nat module was inserted at a later time and no src manip took place yet. Tested with http-client-benchmark + httpterm with DNAT and SNAT rules in place. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 ++- include/net/netfilter/nf_nat.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e5135d8728b4..a08825b7e955 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -118,7 +119,7 @@ struct nf_conn { struct nf_ct_ext *ext; #if IS_ENABLED(CONFIG_NF_NAT) - struct hlist_node nat_bysource; + struct rhash_head nat_bysource; #endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 02515f7ed4cc..c327a431a6f3 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,5 +1,6 @@ #ifndef _NF_NAT_H #define _NF_NAT_H +#include #include #include #include -- cgit From d51ed8367bcbbb06f4f4986d1ef7dc2480bed1ad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 8 Jul 2016 13:08:50 +0200 Subject: netfilter: constify arg to is_dying/confirmed Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a08825b7e955..1e04911b78ea 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -270,12 +270,12 @@ static inline int nf_ct_is_template(const struct nf_conn *ct) } /* It's confirmed if it is, or has been in the hash table. */ -static inline int nf_ct_is_confirmed(struct nf_conn *ct) +static inline int nf_ct_is_confirmed(const struct nf_conn *ct) { return test_bit(IPS_CONFIRMED_BIT, &ct->status); } -static inline int nf_ct_is_dying(struct nf_conn *ct) +static inline int nf_ct_is_dying(const struct nf_conn *ct) { return test_bit(IPS_DYING_BIT, &ct->status); } -- cgit From 42a55769132fdf4f44bac1471b371d7f80bcde35 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Jul 2016 14:41:49 +0200 Subject: netfilter: nf_tables: get rid of possible_net_t from set and basechain We can pass the netns pointer as parameter to the functions that need to gain access to it. From basechains, I didn't find any client for this field anymore so let's remove this too. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 30c1d9489ae2..f2f13399ce44 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -236,7 +236,8 @@ struct nft_expr; * @features: features supported by the implementation */ struct nft_set_ops { - bool (*lookup)(const struct nft_set *set, + bool (*lookup)(const struct net *net, + const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, @@ -248,11 +249,14 @@ struct nft_set_ops { struct nft_regs *regs, const struct nft_set_ext **ext); - int (*insert)(const struct nft_set *set, + int (*insert)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void (*activate)(const struct nft_set *set, + void (*activate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void * (*deactivate)(const struct nft_set *set, + void * (*deactivate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); @@ -295,7 +299,6 @@ void nft_unregister_set(struct nft_set_ops *ops); * @udlen: user data length * @udata: user data * @ops: set ops - * @pnet: network namespace * @flags: set flags * @genmask: generation mask * @klen: key length @@ -318,7 +321,6 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - possible_net_t pnet; u16 flags:14, genmask:2; u8 klen; @@ -804,7 +806,6 @@ struct nft_stats { * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops - * @pnet: net namespace that this chain belongs to * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -813,7 +814,6 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; - possible_net_t pnet; const struct nf_chain_type *type; u8 policy; u8 flags; @@ -1009,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext, return !(ext->genmask & genmask); } -static inline void nft_set_elem_change_active(const struct nft_set *set, +static inline void nft_set_elem_change_active(const struct net *net, + const struct nft_set *set, struct nft_set_ext *ext) { - ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); + ext->genmask ^= nft_genmask_next(net); } /* -- cgit From 28aa4c26fce2202db8d42ae76b639ca1d9a23d25 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:40 +0800 Subject: sctp: add SCTP_PR_SUPPORTED on sctp sockopt According to section 4.5 of rfc7496, prsctp_enable should be per asoc. We will add prsctp_enable to both asoc and ep, and replace the places where it used net.sctp->prsctp_enable with asoc->prsctp_enable. ep->prsctp_enable will be initialized with net.sctp->prsctp_enable, and asoc->prsctp_enable will be initialized with ep->prsctp_enable. We can also modify it's value through sockopt SCTP_PR_SUPPORTED. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 ++++-- include/uapi/linux/sctp.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 83c5ec58b93a..07115ca9de4d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1256,7 +1256,8 @@ struct sctp_endpoint { /* SCTP-AUTH: endpoint shared keys */ struct list_head endpoint_shared_keys; __u16 active_key_id; - __u8 auth_enable; + __u8 auth_enable:1, + prsctp_enable:1; }; /* Recover the outter endpoint structure. */ @@ -1848,7 +1849,8 @@ struct sctp_association { __u16 active_key_id; __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ - temp:1; /* Is it a temporary association? */ + temp:1, /* Is it a temporary association? */ + prsctp_enable:1; struct sctp_priv_assoc_stats stats; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index ce70fe6b45df..aa08906f292d 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -112,6 +112,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ #define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ #define SCTP_GET_ASSOC_STATS 112 /* Read only */ +#define SCTP_PR_SUPPORTED 113 /* These are bit fields for msghdr->msg_flags. See section 5.1. */ /* On user space Linux, these live in as an enum. */ -- cgit From f959fb442c35f4b61fea341401b8463dd0a1b959 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:41 +0800 Subject: sctp: add SCTP_DEFAULT_PRINFO into sctp sockopt This patch adds SCTP_DEFAULT_PRINFO to sctp sockopt. It is used to set/get sctp Partially Reliable Policies' default params, which includes 3 policies (ttl, rtx, prio) and their values. Still, if we set policy params in sndinfo, we will use the params of sndinfo against chunks, instead of the default params. In this patch, we will use 5-8bit of sp/asoc->default_flags to store prsctp policies, and reuse asoc->default_timetolive to store their values. It means if we enable and set prsctp policy, prior ttl timeout in sctp will not work any more. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index aa08906f292d..984cf2e9a61d 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -113,6 +113,29 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ #define SCTP_GET_ASSOC_STATS 112 /* Read only */ #define SCTP_PR_SUPPORTED 113 +#define SCTP_DEFAULT_PRINFO 114 + +/* PR-SCTP policies */ +#define SCTP_PR_SCTP_NONE 0x0000 +#define SCTP_PR_SCTP_TTL 0x0010 +#define SCTP_PR_SCTP_RTX 0x0020 +#define SCTP_PR_SCTP_PRIO 0x0030 +#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_PRIO +#define SCTP_PR_SCTP_MASK 0x0030 + +#define __SCTP_PR_INDEX(x) ((x >> 4) - 1) +#define SCTP_PR_INDEX(x) __SCTP_PR_INDEX(SCTP_PR_SCTP_ ## x) + +#define SCTP_PR_POLICY(x) ((x) & SCTP_PR_SCTP_MASK) +#define SCTP_PR_SET_POLICY(flags, x) \ + do { \ + flags &= ~SCTP_PR_SCTP_MASK; \ + flags |= x; \ + } while (0) + +#define SCTP_PR_TTL_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_TTL) +#define SCTP_PR_RTX_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX) +#define SCTP_PR_PRIO_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO) /* These are bit fields for msghdr->msg_flags. See section 5.1. */ /* On user space Linux, these live in as an enum. */ @@ -903,4 +926,10 @@ struct sctp_paddrthlds { __u16 spt_pathpfthld; }; +struct sctp_default_prinfo { + sctp_assoc_t pr_assoc_id; + __u32 pr_value; + __u16 pr_policy; +}; + #endif /* _UAPI_SCTP_H */ -- cgit From 826d253d57b11f69add81c8086d2e7f1dce5ec77 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:42 +0800 Subject: sctp: add SCTP_PR_ASSOC_STATUS on sctp sockopt This patch adds SCTP_PR_ASSOC_STATUS to sctp sockopt, which is used to dump the prsctp statistics info from the asoc. The prsctp statistics includes abandoned_sent/unsent from the asoc. abandoned_sent is the count of the packets we drop packets from retransmit/transmited queue, and abandoned_unsent is the count of the packets we drop from out_queue according to the policy. Note: another option for prsctp statistics dump described in rfc is SCTP_PR_STREAM_STATUS, which is used to dump the prsctp statistics info from each stream. But by now, linux doesn't yet have per stream statistics info, it needs rfc6525 to be implemented. As the prsctp statistics for each stream has to be based on per stream statistics, we will delay it until rfc6525 is done in linux. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 +++ include/uapi/linux/sctp.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 07115ca9de4d..d8e464aacb20 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1853,6 +1853,9 @@ struct sctp_association { prsctp_enable:1; struct sctp_priv_assoc_stats stats; + + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 984cf2e9a61d..d304f4c9792c 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -114,6 +114,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_GET_ASSOC_STATS 112 /* Read only */ #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 +#define SCTP_PR_ASSOC_STATUS 115 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -926,6 +927,17 @@ struct sctp_paddrthlds { __u16 spt_pathpfthld; }; +/* + * Socket Option for Getting the Association/Stream-Specific PR-SCTP Status + */ +struct sctp_prstatus { + sctp_assoc_t sprstat_assoc_id; + __u16 sprstat_sid; + __u16 sprstat_policy; + __u64 sprstat_abandoned_unsent; + __u64 sprstat_abandoned_sent; +}; + struct sctp_default_prinfo { sctp_assoc_t pr_assoc_id; __u32 pr_value; -- cgit From a6c2f792873aff332a4689717c3cd6104f46684c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:43 +0800 Subject: sctp: implement prsctp TTL policy prsctp TTL policy is a policy to abandon chunks when they expire at the specific time in local stack. It's similar with expires_at in struct sctp_datamsg. This patch uses sinfo->sinfo_timetolive to set the specific time for TTL policy. sinfo->sinfo_timetolive is also used for msg->expires_at. So if prsctp_enable or TTL policy is not enabled, msg->expires_at still works as before. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d8e464aacb20..6bcda715008e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -602,6 +602,16 @@ struct sctp_chunk { /* This needs to be recoverable for SCTP_SEND_FAILED events. */ struct sctp_sndrcvinfo sinfo; + /* We use this field to record param for prsctp policies, + * for TTL policy, it is the time_to_drop of this chunk, + * for RTX policy, it is the max_sent_count of this chunk, + * for PRIO policy, it is the priority of this chunk. + */ + unsigned long prsctp_param; + + /* How many times this chunk have been sent, for prsctp RTX policy */ + int sent_count; + /* Which association does this belong to? */ struct sctp_association *asoc; -- cgit From 8dbdf1f5b09cb22560e7c7173b52fe3c631046bd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:45 +0800 Subject: sctp: implement prsctp PRIO policy prsctp PRIO policy is a policy to abandon lower priority chunks when asoc doesn't have enough snd buffer, so that the current chunk with higher priority can be queued successfully. Similar to TTL/RTX policy, we will set the priority of the chunk to prsctp_param with sinfo->sinfo_timetolive in sctp_set_prsctp_policy(). So if PRIO policy is enabled, msg->expire_at won't work. asoc->sent_cnt_removable will record how many chunks can be checked to remove. If priority policy is enabled, when the chunk is queued into the out_queue, we will increase sent_cnt_removable. When the chunk is moved to abandon_queue or dequeue and free, we will decrease sent_cnt_removable. In sctp_sendmsg, we will check if there is enough snd buffer for current msg and if sent_cnt_removable is not 0. Then try to abandon chunks in sctp_prune_prsctp when sendmsg from the retransmit/transmited queue, and free chunks from out_queue in right order until the abandon+free size > msg_len - sctp_wfree. For the abandon size, we have to wait until it sends FORWARD TSN, receives the sack and the chunks are really freed. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6bcda715008e..8626bdd3249a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1084,6 +1084,8 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_prsctp_prune(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ static inline void sctp_outq_cork(struct sctp_outq *q) { @@ -1864,6 +1866,8 @@ struct sctp_association { struct sctp_priv_assoc_stats stats; + int sent_cnt_removable; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; -- cgit From e5224f0fe2acddbc2fa9b419d8867ced7f5381fc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 Jul 2016 18:05:03 +0200 Subject: devlink: add hardware messages tracing facility Define a tracepoint and allow user to trace messages going to and from hardware associated with devlink instance. Signed-off-by: Jiri Pirko Acked-by: Steven Rostedt Signed-off-by: David S. Miller --- include/trace/events/devlink.h | 68 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 include/trace/events/devlink.h (limited to 'include') diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h new file mode 100644 index 000000000000..333c32ac9bfa --- /dev/null +++ b/include/trace/events/devlink.h @@ -0,0 +1,68 @@ +#if IS_ENABLED(CONFIG_NET_DEVLINK) + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM devlink + +#if !defined(_TRACE_DEVLINK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DEVLINK_H + +#include +#include +#include + +/* + * Tracepoint for devlink hardware message: + */ +TRACE_EVENT(devlink_hwmsg, + TP_PROTO(const struct devlink *devlink, bool incoming, + unsigned long type, const u8 *buf, size_t len), + + TP_ARGS(devlink, incoming, type, buf, len), + + TP_STRUCT__entry( + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(owner_name, devlink->dev->driver->owner->name) + __field(bool, incoming) + __field(unsigned long, type) + __dynamic_array(u8, buf, len) + __field(size_t, len) + ), + + TP_fast_assign( + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(owner_name, devlink->dev->driver->owner->name); + __entry->incoming = incoming; + __entry->type = type; + memcpy(__get_dynamic_array(buf), buf, len); + __entry->len = len; + ), + + TP_printk("bus_name=%s dev_name=%s owner_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu", + __get_str(bus_name), __get_str(dev_name), + __get_str(owner_name), __entry->incoming, __entry->type, + (int) __entry->len, __get_dynamic_array(buf), __entry->len) +); + +#endif /* _TRACE_DEVLINK_H */ + +/* This part must be outside protection */ +#include + +#else /* CONFIG_NET_DEVLINK */ + +#if !defined(_TRACE_DEVLINK_H) +#define _TRACE_DEVLINK_H + +#include + +static inline void trace_devlink_hwmsg(const struct devlink *devlink, + bool incoming, unsigned long type, + const u8 *buf, size_t len) +{ +} + +#endif /* _TRACE_DEVLINK_H */ + +#endif -- cgit From 160b925163c0aabc2c2fbb7d58a75e38b7cd6a17 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 12 Jul 2016 02:12:16 +0200 Subject: Bluetooth: Add Authentication Failed reason to Disconnected Mgmt event If link is disconnected due to Authentication Failure (PIN or Key Missing status) userspace will be notified about this with proper error code. Many LE profiles define "PIN or Key Missing" status as indication of remote lost bond so this allows userspace to take action on this. @ Device Connected: 88:63:DF:88:0E:83 (1) flags 0x0000 02 01 1a 05 03 0a 18 0d 18 0b 09 48 65 61 72 74 ...........Heart 20 52 61 74 65 Rate > HCI Event: Command Status (0x0f) plen 4 LE Read Remote Used Features (0x08|0x0016) ncmd 1 Status: Success (0x00) > ACL Data RX: Handle 3585 flags 0x02 dlen 11 ATT: Read By Group Type Request (0x10) len 6 Handle range: 0x0001-0xffff Attribute group type: Primary Service (0x2800) > HCI Event: LE Meta Event (0x3e) plen 12 LE Read Remote Used Features (0x04) Status: Success (0x00) Handle: 3585 Features: 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 LE Encryption < HCI Command: LE Start Encryption (0x08|0x0019) plen 28 Handle: 3585 Random number: 0x0000000000000000 Encrypted diversifier: 0x0000 Long term key: 26201cd479a0921b6f949f0b1fa8dc82 > HCI Event: Command Status (0x0f) plen 4 LE Start Encryption (0x08|0x0019) ncmd 1 Status: Success (0x00) > HCI Event: Encryption Change (0x08) plen 4 Status: PIN or Key Missing (0x06) Handle: 3585 Encryption: Disabled (0x00) < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 3585 Reason: Authentication Failure (0x05) > HCI Event: Command Status (0x0f) plen 4 Disconnect (0x01|0x0006) ncmd 1 Status: Success (0x00) > HCI Event: Disconnect Complete (0x05) plen 4 Status: Success (0x00) Handle: 3585 Reason: Connection Terminated By Local Host (0x16) @ Device Disconnected: 88:63:DF:88:0E:83 (1) reason 4 @ Device Connected: C4:43:8F:A3:4D:83 (0) flags 0x0000 08 09 4e 65 78 75 73 20 35 ..Nexus 5 > HCI Event: Command Status (0x0f) plen 4 Authentication Requested (0x01|0x0011) ncmd 1 Status: Success (0x00) > HCI Event: Link Key Request (0x17) plen 6 Address: C4:43:8F:A3:4D:83 (LG Electronics) < HCI Command: Link Key Request Reply (0x01|0x000b) plen 22 Address: C4:43:8F:A3:4D:83 (LG Electronics) Link key: 080812e4aa97a863d11826f71f65a933 > HCI Event: Command Complete (0x0e) plen 10 Link Key Request Reply (0x01|0x000b) ncmd 1 Status: Success (0x00) Address: C4:43:8F:A3:4D:83 (LG Electronics) > HCI Event: Auth Complete (0x06) plen 3 Status: PIN or Key Missing (0x06) Handle: 75 @ Authentication Failed: C4:43:8F:A3:4D:83 (0) status 0x05 < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 75 Reason: Remote User Terminated Connection (0x13) > HCI Event: Command Status (0x0f) plen 4 Disconnect (0x01|0x0006) ncmd 1 Status: Success (0x00) > HCI Event: Disconnect Complete (0x05) plen 4 Status: Success (0x00) Handle: 75 Reason: Connection Terminated By Local Host (0x16) @ Device Disconnected: C4:43:8F:A3:4D:83 (0) reason 4 Signed-off-by: Szymon Janc Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a3f86de6f100..003b25283407 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -445,6 +445,7 @@ enum { /* ---- HCI Error Codes ---- */ #define HCI_ERROR_UNKNOWN_CONN_ID 0x02 #define HCI_ERROR_AUTH_FAILURE 0x05 +#define HCI_ERROR_PIN_OR_KEY_MISSING 0x06 #define HCI_ERROR_MEMORY_EXCEEDED 0x07 #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index dc71473462ac..77d7fe115a0d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -654,6 +654,7 @@ enum { HCI_CONN_PARAM_REMOVAL_PEND, HCI_CONN_NEW_LINK_KEY, HCI_CONN_SCANNING, + HCI_CONN_AUTH_FAILURE, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index ea73e0826aa7..7647964b1efa 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -645,6 +645,7 @@ struct mgmt_ev_device_connected { #define MGMT_DEV_DISCONN_TIMEOUT 0x01 #define MGMT_DEV_DISCONN_LOCAL_HOST 0x02 #define MGMT_DEV_DISCONN_REMOTE 0x03 +#define MGMT_DEV_DISCONN_AUTH_FAILURE 0x04 #define MGMT_EV_DEVICE_DISCONNECTED 0x000C struct mgmt_ev_device_disconnected { -- cgit From 9e238323799fb8c2add2b1de9a22edd4d4e51e30 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:55 -0300 Subject: sctp: allow others to use sctp_input_cb We process input path in other files too and having access to it is nice, so move it to a header where it's shared. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8626bdd3249a..966c3a40039c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -59,6 +59,7 @@ #include /* We need tq_struct. */ #include /* We need sctp* header structs. */ #include /* We need auth specific structs */ +#include /* For inet_skb_parm */ /* A convenience structure for handling sockaddr structures. * We should wean ourselves off this. @@ -1092,6 +1093,20 @@ static inline void sctp_outq_cork(struct sctp_outq *q) q->cork = 1; } +/* SCTP skb control block. + * sctp_input_cb is currently used on rx and sock rx queue + */ +struct sctp_input_cb { + union { + struct inet_skb_parm h4; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_skb_parm h6; +#endif + } header; + struct sctp_chunk *chunk; +}; +#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) + /* These bind address data fields common between endpoints and associations */ struct sctp_bind_addr { -- cgit From f5d258e60722142e88cb6f0f337d78bca67cf973 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:56 -0300 Subject: sctp: reorder sctp_ulpevent and shrink msg_flags The next patch needs 8 bytes in there. sctp_ulpevent has a hole due to bad alignment; msg_flags is using 4 bytes while it actually uses only 2, so we shrink it, and iif member (4 bytes) which can be easily fetched from another place once the next patch is there, so we remove it and thus creating space for 8 bytes. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index cccdcfd14973..aa342645dbce 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -48,15 +48,15 @@ */ struct sctp_ulpevent { struct sctp_association *asoc; - __u16 stream; - __u16 ssn; - __u16 flags; + unsigned int rmem_len; __u32 ppid; __u32 tsn; __u32 cumtsn; - int msg_flags; int iif; - unsigned int rmem_len; + __u16 stream; + __u16 ssn; + __u16 flags; + __u16 msg_flags; }; /* Retrieve the skb this event sits inside of. */ -- cgit From 1f45f78f8e511203f03138f2ccde3d2cf90d2cbf Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:57 -0300 Subject: sctp: allow GSO frags to access the chunk too SCTP will try to access original IP headers on sctp_recvmsg in order to copy the addresses used. There are also other places that do similar access to IP or even SCTP headers. But after 90017accff61 ("sctp: Add GSO support") they aren't always there because they are only present in the header skb. SCTP handles the queueing of incoming data by cloning the incoming skb and limiting to only the relevant payload. This clone has its cb updated to something different and it's then queued on socket rx queue. Thus we need to fix this in two moments. For rx path, not related to socket queue yet, this patch uses a partially copied sctp_input_cb to such GSO frags. This restores the ability to access the headers for this part of the code. Regarding the socket rx queue, it removes iif member from sctp_event and also add a chunk pointer on it. With these changes we're always able to reach the headers again. The biggest change here is that now the sctp_chunk struct and the original skb are only freed after the application consumed the buffer. Note however that the original payload was already like this due to the skb cloning. For iif, SCTP's IPv4 code doesn't use it, so no change is necessary. IPv6 now can fetch it directly from original's IPv6 CB as the original skb is still accessible. In the future we probably can simplify sctp_v*_skb_iif() stuff, as sctp_v4_skb_iif() was called but it's return value not used, and now it's not even called, but such cleanup is out of scope for this change. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 7 +++++++ include/net/sctp/ulpevent.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 966c3a40039c..f6f201de6fa4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1107,6 +1107,13 @@ struct sctp_input_cb { }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) +static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb) +{ + const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + + return chunk->head_skb ? : skb; +} + /* These bind address data fields common between endpoints and associations */ struct sctp_bind_addr { diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index aa342645dbce..2c098cd7e7e2 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -48,11 +48,11 @@ */ struct sctp_ulpevent { struct sctp_association *asoc; + struct sctp_chunk *chunk; unsigned int rmem_len; __u32 ppid; __u32 tsn; __u32 cumtsn; - int iif; __u16 stream; __u16 ssn; __u16 flags; -- cgit From e7487c86dc5c4a528a7dbd9dc14f453a0de61a84 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:58 -0300 Subject: sctp: avoid identifying address family many times for a chunk Identifying address family operations during rx path is not something expensive but it's ugly to the eye to have it done multiple times, specially when we already validated it during initial rx processing. This patch takes advantage of the now shared sctp_input_cb and make the pointer to the operations readily available. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f6f201de6fa4..ce93c4b10d26 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1104,6 +1104,7 @@ struct sctp_input_cb { #endif } header; struct sctp_chunk *chunk; + struct sctp_af *af; }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) -- cgit From 29cc6679076a00a6ce193004dcf2d14ae7c428a5 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 14 Jul 2016 10:32:37 +0300 Subject: net/mlx5: Store counters in rbtree instead of list In order to use bulk counters, we need to have counters sorted by id. Signed-off-by: Amir Vadai Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 81e8396574f4..a041b99fceac 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -469,7 +469,7 @@ struct mlx5_irq_info { }; struct mlx5_fc_stats { - struct list_head list; + struct rb_root counters; struct list_head addlist; /* protect addlist add/splice operations */ spinlock_t addlist_lock; -- cgit From a351a1b03bf169f77891060be30036ef71cbe618 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 14 Jul 2016 10:32:38 +0300 Subject: net/mlx5: Introduce bulk reading of flow counters This commit utilize the ability of ConnectX-4 to bulk read flow counters. Few bulk counter queries could be done instead of issuing thousands of firmware commands per second to get statistics of all flows set to HW, such as those programmed when we offload tc filters. Counters are stored sorted by hardware id, and queried in blocks (id + number of counters). Due to hardware requirement, start of block and number of counters in a block must be four aligned. Reviewed-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 152421cc6f44..d671e4e8e7db 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -893,7 +893,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_330[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_at_340[0x20]; + u8 reserved_at_340[0x8]; + u8 log_max_flow_counter_bulk[0x8]; + u8 max_flow_counter[0x10]; + u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; @@ -980,7 +983,8 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 reserved_at_0[0x10]; + u8 clear[0x1]; + u8 num_of_counters[0xf]; u8 flow_counter_id[0x10]; u8 reserved_at_20[0x20]; -- cgit From 8438884d4ab423161b974854ebb90c08219dd678 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:43 +0300 Subject: net/switchdev: Export the same parent ID service function This helper serves to know if two switchdev port netdevices belong to the same HW ASIC, e.g to figure out if forwarding offload is possible between them. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/switchdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 985619a59323..9023e3e3be0b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); +bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b); #else static inline void switchdev_deferred_process(void) @@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev, { } +static inline bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) +{ + return false; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ -- cgit From 0e1824c98a0ffd7fd9ffb2a3da01ec49ff1348a2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 14 Jul 2016 11:37:28 +0200 Subject: tracing: change owner name to driver name for devlink hwmsg tracepoint Turned on that driver->owner which is struct module is not available when modules are disabled. Better to depend on a driver name which is always available. Reported-by: Randy Dunlap Fixes: e5224f0fe2 ("devlink: add hardware messages tracing facility") Signed-off-by: Jiri Pirko Acked-by: Randy Dunlap Acked-by: Steven Rostedt Signed-off-by: David S. Miller --- include/trace/events/devlink.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 333c32ac9bfa..77dce71df42a 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -22,7 +22,7 @@ TRACE_EVENT(devlink_hwmsg, TP_STRUCT__entry( __string(bus_name, devlink->dev->bus->name) __string(dev_name, dev_name(devlink->dev)) - __string(owner_name, devlink->dev->driver->owner->name) + __string(driver_name, devlink->dev->driver->name) __field(bool, incoming) __field(unsigned long, type) __dynamic_array(u8, buf, len) @@ -32,16 +32,16 @@ TRACE_EVENT(devlink_hwmsg, TP_fast_assign( __assign_str(bus_name, devlink->dev->bus->name); __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(owner_name, devlink->dev->driver->owner->name); + __assign_str(driver_name, devlink->dev->driver->name); __entry->incoming = incoming; __entry->type = type; memcpy(__get_dynamic_array(buf), buf, len); __entry->len = len; ), - TP_printk("bus_name=%s dev_name=%s owner_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu", + TP_printk("bus_name=%s dev_name=%s driver_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu", __get_str(bus_name), __get_str(dev_name), - __get_str(owner_name), __entry->incoming, __entry->type, + __get_str(driver_name), __entry->incoming, __entry->type, (int) __entry->len, __get_dynamic_array(buf), __entry->len) ); -- cgit From caeccd5180930eb8586771bb1935f4f2e456a8e8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Jul 2016 11:37:29 +0200 Subject: devlink: fix trace format string Including devlink.h on ARM and probably other 32-bit architectures results in a harmless warning: In file included from ../include/trace/define_trace.h:95:0, from ../include/trace/events/devlink.h:51, from ../net/core/devlink.c:30: include/trace/events/devlink.h: In function 'trace_raw_output_devlink_hwmsg': include/trace/events/devlink.h:42:12: error: format '%lu' expects argument of type 'long unsigned int', but argument 10 has type 'size_t {aka unsigned int}' [-Werror=format=] The correct format string for 'size_t' is %zu, not %lu, this works on all architectures. Signed-off-by: Arnd Bergmann Fixes: e5224f0fe2ac ("devlink: add hardware messages tracing facility") Signed-off-by: Jiri Pirko Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- include/trace/events/devlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 77dce71df42a..09f1df228f2c 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -39,7 +39,7 @@ TRACE_EVENT(devlink_hwmsg, __entry->len = len; ), - TP_printk("bus_name=%s dev_name=%s driver_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu", + TP_printk("bus_name=%s dev_name=%s driver_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%zu", __get_str(bus_name), __get_str(dev_name), __get_str(driver_name), __entry->incoming, __entry->type, (int) __entry->len, __get_dynamic_array(buf), __entry->len) -- cgit From 77501a79cec40eac65c59ee7af3f786c703ead9c Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 14 Jul 2016 16:29:43 +0200 Subject: net: phy: micrel: Add KSZ8041FTL fiber mode support We can't detect the FXEN (fiber mode) bootstrap pin, so configure it via a boolean device tree property "micrel,fiber-mode". If it is enabled, auto-negotiation is not supported. The only available modes are 100base-fx (full duplex and half duplex). Signed-off-by: Philipp Zabel Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 2e5b194b9b19..257173e0095e 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -37,6 +37,7 @@ /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 +#define MICREL_PHY_FXEN 0x00000002 #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit From 7e3f977edd0bd9ea6104156feba95bb5ae9bdd38 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Jul 2016 18:08:03 +0200 Subject: perf, events: add non-linear data support for raw records This patch adds support for non-linear data on raw records. It extends raw records to have one or multiple fragments that will be written linearly into the ring slot, where each fragment can optionally have a custom callback handler to walk and extract complex, possibly non-linear data. If a callback handler is provided for a fragment, then the new __output_custom() will be used instead of __output_copy() for the perf_output_sample() part. perf_prepare_sample() does all the size calculation only once, so perf_output_sample() doesn't need to redo the same work anymore, meaning real_size and padding will be cached in the raw record. The raw record becomes 32 bytes in size without holes; to not increase it further and to avoid doing unnecessary recalculations in fast-path, we can reuse next pointer of the last fragment, idea here is borrowed from ZERO_OR_NULL_PTR(), which should keep the perf_output_sample() path for PERF_SAMPLE_RAW minimal. This facility is needed for BPF's event output helper as a first user that will, in a follow-up, add an additional perf_raw_frag to its perf_raw_record in order to be able to more efficiently dump skb context after a linear head meta data related to it. skbs can be non-linear and thus need a custom output function to dump buffers. Currently, the skb data needs to be copied twice; with the help of __output_custom() this work only needs to be done once. Future users could be things like XDP/BPF programs that work on different context though and would thus also have a different callback function. The few users of raw records are adapted to initialize their frag data from the raw record itself, no change in behavior for them. The code is based upon a PoC diff provided by Peter Zijlstra [1]. [1] http://thread.gmane.org/gmane.linux.network/421294 Suggested-by: Peter Zijlstra Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/perf_event.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a827cecd62f..e79e6c6fed89 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx { bool contexts_maxed; }; +typedef unsigned long (*perf_copy_f)(void *dst, const void *src, + unsigned long len); + +struct perf_raw_frag { + union { + struct perf_raw_frag *next; + unsigned long pad; + }; + perf_copy_f copy; + void *data; + u32 size; +} __packed; + struct perf_raw_record { + struct perf_raw_frag frag; u32 size; - void *data; }; /* @@ -1283,6 +1296,11 @@ extern void perf_restore_debug_store(void); static inline void perf_restore_debug_store(void) { } #endif +static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) +{ + return frag->pad < sizeof(u64); +} + #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) /* -- cgit From 555c8a8623a3a87b3c990ba30b7fd2e5914e41d2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Jul 2016 18:08:05 +0200 Subject: bpf: avoid stack copy and use skb ctx for event output This work addresses a couple of issues bpf_skb_event_output() helper currently has: i) We need two copies instead of just a single one for the skb data when it should be part of a sample. The data can be non-linear and thus needs to be extracted via bpf_skb_load_bytes() helper first, and then copied once again into the ring buffer slot. ii) Since bpf_skb_load_bytes() currently needs to be used first, the helper needs to see a constant size on the passed stack buffer to make sure BPF verifier can do sanity checks on it during verification time. Thus, just passing skb->len (or any other non-constant value) wouldn't work, but changing bpf_skb_load_bytes() is also not the proper solution, since the two copies are generally still needed. iii) bpf_skb_load_bytes() is just for rather small buffers like headers, since they need to sit on the limited BPF stack anyway. Instead of working around in bpf_skb_load_bytes(), this work improves the bpf_skb_event_output() helper to address all 3 at once. We can make use of the passed in skb context that we have in the helper anyway, and use some of the reserved flag bits as a length argument. The helper will use the new __output_custom() facility from perf side with bpf_skb_copy() as callback helper to walk and extract the data. It will pass the data for setup to bpf_event_output(), which generates and pushes the raw record with an additional frag part. The linear data used in the first frag of the record serves as programmatically defined meta data passed along with the appended sample. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 ++++++- include/uapi/linux/bpf.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b3336b4f5d04..c13e92b00bf5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); -const struct bpf_func_proto *bpf_get_event_output_proto(void); + +typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, + unsigned long len); + +u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, + void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 262a7e883b19..c4d922439d20 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -401,6 +401,8 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +/* BPF_FUNC_perf_event_output for sk_buff input context. */ +#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure -- cgit From 02a198777e646a12a8aabae5639f1d33d81d79ef Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 15 Jul 2016 23:55:20 +0200 Subject: net: fixup for tracepoint napi:napi_poll The recent change to tracepoint napi:napi_poll changed the order of the parameters that perf scripts sees, the printk was correct. The problem was that the new parameters (work and budget) were pushed in front of dev_name. The new parameters obviously need to be appended to keep backward compatible. Fixes: 1db19db7f5ff ("net: tracepoint napi:napi_poll add work and budget") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/napi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 118ed7767639..0b9e5136a2a3 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -18,16 +18,16 @@ TRACE_EVENT(napi_poll, TP_STRUCT__entry( __field( struct napi_struct *, napi) + __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) __field( int, work) __field( int, budget) - __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) ), TP_fast_assign( __entry->napi = napi; + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); __entry->work = work; __entry->budget = budget; - __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); ), TP_printk("napi poll on napi struct %p for device %s work %d budget %d", -- cgit From 43b9e127406079d187794a5140a2411fbc6df2df Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 14 Jul 2016 19:28:27 +0300 Subject: net: ipmr/ip6mr: add support for keeping an entry age In preparation for hardware offloading of ipmr/ip6mr we need an interface that allows to check (and later update) the age of entries. Relying on stats alone can show activity but not actual age of the entry, furthermore when there're tens of thousands of entries a lot of the hardware implementations only support "hit" bits which are cleared on read to denote that the entry was active and shouldn't be aged out, these can then be naturally translated into age timestamp and will be compatible with the software forwarding age. Using a lastuse entry doesn't affect performance because the members in that cache line are written to along with the age. Since all new users are encouraged to use ipmr via netlink, this is exported via the RTA_EXPIRES attribute. Also do a minor local variable declaration style adjustment - arrange them longest to shortest. Signed-off-by: Nikolay Aleksandrov CC: Roopa Prabhu CC: Shrijeet Mukherjee CC: Satish Ashok CC: Donald Sharp CC: David S. Miller CC: Alexey Kuznetsov CC: James Morris CC: Hideaki YOSHIFUJI CC: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/mroute.h | 1 + include/linux/mroute6.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index bf9b322cb0b0..d351fd3e1049 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -104,6 +104,7 @@ struct mfc_cache { unsigned long bytes; unsigned long pkt; unsigned long wrong_if; + unsigned long lastuse; unsigned char ttls[MAXVIFS]; /* TTL thresholds */ } res; } mfc_un; diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 66982e764051..3987b64040c5 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -92,6 +92,7 @@ struct mfc6_cache { unsigned long bytes; unsigned long pkt; unsigned long wrong_if; + unsigned long lastuse; unsigned char ttls[MAXMIFS]; /* TTL thresholds */ } res; } mfc_un; -- cgit From b786241253041c13d94309ca4dace301833f63d1 Mon Sep 17 00:00:00 2001 From: Dongpo Li Date: Fri, 15 Jul 2016 16:26:34 +0800 Subject: of_mdio: Abstract a general interface for phy connect Abstract a general interface "of_phy_get_and_connect" for PHY connect. User will have no bother with getting "phy-mode" and "phy-handle" any more. Suggested-by: Arnd Bergmann Signed-off-by: Dongpo Li Reviewed-by: Jiancheng Xue Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 4b04587d0441..2ab233661ae5 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -19,6 +19,9 @@ extern struct phy_device *of_phy_connect(struct net_device *dev, struct device_node *phy_np, void (*hndlr)(struct net_device *), u32 flags, phy_interface_t iface); +extern struct phy_device * +of_phy_get_and_connect(struct net_device *dev, struct device_node *np, + void (*hndlr)(struct net_device *)); struct phy_device *of_phy_attach(struct net_device *dev, struct device_node *phy_np, u32 flags, phy_interface_t iface); @@ -52,6 +55,13 @@ static inline struct phy_device *of_phy_connect(struct net_device *dev, return NULL; } +static inline struct phy_device * +of_phy_get_and_connect(struct net_device *dev, struct device_node *np, + void (*hndlr)(struct net_device *)) +{ + return NULL; +} + static inline struct phy_device *of_phy_attach(struct net_device *dev, struct device_node *phy_np, u32 flags, phy_interface_t iface) -- cgit From 4360fa22ad5b48a1d1e10e31ffb383ed8c977435 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 6 Jun 2016 11:02:03 +0200 Subject: drivers: misc: ti-st: Use int instead of fuzzy char for callback status On mips and parisc: drivers/bluetooth/btwilink.c: In function 'ti_st_open': drivers/bluetooth/btwilink.c:174:21: warning: overflow in implicit constant conversion [-Woverflow] hst->reg_status = -EINPROGRESS; drivers/nfc/nfcwilink.c: In function 'nfcwilink_open': drivers/nfc/nfcwilink.c:396:31: warning: overflow in implicit constant conversion [-Woverflow] drv->st_register_cb_status = -EINPROGRESS; There are actually two issues: 1. Whether "char" is signed or unsigned depends on the architecture. As the completion callback data is used to pass a (negative) error code, it should always be signed. 2. EINPROGRESS is 150 on mips, 245 on parisc. Hence -EINPROGRESS doesn't fit in a signed 8-bit number. Change the callback status from "char" to "int" to fix these. Signed-off-by: Geert Uytterhoeven Acked-by: Mauro Carvalho Chehab Acked-by: Samuel Ortiz Signed-off-by: Marcel Holtmann --- include/linux/ti_wilink_st.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index 0a0d56834c8e..f2293028ab9d 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -71,7 +71,7 @@ struct st_proto_s { enum proto_type type; long (*recv) (void *, struct sk_buff *); unsigned char (*match_packet) (const unsigned char *data); - void (*reg_complete_cb) (void *, char data); + void (*reg_complete_cb) (void *, int data); long (*write) (struct sk_buff *skb); void *priv_data; -- cgit From f962fe32f2f85769cd835ddcecbff8c1d34cf561 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 17 Jul 2016 19:55:15 +0200 Subject: Bluetooth: Move hci_recv_frame and hci_recv_diag prototypes The protoypes for hci_recv_frame and hci_recv_diag are in the wrong location in the header file. Move them close to all the other hci_dev related exported functions. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 77d7fe115a0d..84d0273d826a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1022,6 +1022,8 @@ void hci_unregister_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); +int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); +int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); @@ -1098,9 +1100,6 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); -int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); -int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); - void hci_init_sysfs(struct hci_dev *hdev); void hci_conn_init_sysfs(struct hci_conn *conn); void hci_conn_add_sysfs(struct hci_conn *conn); -- cgit From 5177a83827cd0b8cf6ce0391b00dd4417352d2f1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 17 Jul 2016 19:55:16 +0200 Subject: Bluetooth: Add debugfs fields for hardware and firmware info Some Bluetooth controllers allow for reading hardware and firmware related vendor specific infos. If they are available, then they can be exposed via debugfs now. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 84d0273d826a..ee7fc47680a1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -372,6 +372,8 @@ struct hci_dev { atomic_t promisc; + const char *hw_info; + const char *fw_info; struct dentry *debugfs; struct device dev; @@ -1024,6 +1026,8 @@ int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); +void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); +void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); -- cgit From f4dc77713f8016d2e8a3295e1c9c53a21f296def Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 14 Jul 2016 17:51:26 +0200 Subject: netfilter: x_tables: speed up jump target validation The dummy ruleset I used to test the original validation change was broken, most rules were unreachable and were not tested by mark_source_chains(). In some cases rulesets that used to load in a few seconds now require several minutes. sample ruleset that shows the behaviour: echo "*filter" for i in $(seq 0 100000);do printf ":chain_%06x - [0:0]\n" $i done for i in $(seq 0 100000);do printf -- "-A INPUT -j chain_%06x\n" $i printf -- "-A INPUT -j chain_%06x\n" $i printf -- "-A INPUT -j chain_%06x\n" $i done echo COMMIT [ pipe result into iptables-restore ] This ruleset will be about 74mbyte in size, with ~500k searches though all 500k[1] rule entries. iptables-restore will take forever (gave up after 10 minutes) Instead of always searching the entire blob for a match, fill an array with the start offsets of every single ipt_entry struct, then do a binary search to check if the jump target is present or not. After this change ruleset restore times get again close to what one gets when reverting 36472341017529e (~3 seconds on my workstation). [1] every user-defined rule gets an implicit RETURN, so we get 300k jumps + 100k userchains + 100k returns -> 500k rule entries Fixes: 36472341017529e ("netfilter: x_tables: validate targets of jumps") Reported-by: Jeff Wu Tested-by: Jeff Wu Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e94e81ab2b58..2ad1a2b289b5 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -250,6 +250,10 @@ int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); +unsigned int *xt_alloc_entry_offsets(unsigned int size); +bool xt_find_jump_offset(const unsigned int *offsets, + unsigned int target, unsigned int size); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, -- cgit From cc2d1de06f0572a51437d1f31633d81afea5eb47 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 8 Jul 2016 17:14:18 +0200 Subject: bcma: define ChipCommon B MII registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't have access to datasheets to document all the bits but we can name these registers at least. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_chipcommon.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index a5ac2cad5cb7..b20e3d56253f 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -504,6 +504,9 @@ #define BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_MASK 0x1ff00000 #define BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_SHIFT 20 +#define BCMA_CCB_MII_MNG_CTL 0x0000 +#define BCMA_CCB_MII_MNG_CMD_DATA 0x0004 + /* BCM4331 ChipControl numbers. */ #define BCMA_CHIPCTL_4331_BT_COEXIST BIT(0) /* 0 disable */ #define BCMA_CHIPCTL_4331_SECI BIT(1) /* 0 SECI is disabled (JATG functional) */ -- cgit From 359ebda25aa06fe3a1d028f7e338a849165e661b Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 18 Jul 2016 14:49:33 +0300 Subject: net/ipv4: Introduce IPSKB_FRAG_SEGS bit to inet_skb_parm.flags This flag indicates whether fragmentation of segments is allowed. Formerly this policy was hardcoded according to IPSKB_FORWARDED (set by either ip_forward or ipmr_forward). Cc: Hannes Frederic Sowa Cc: Florian Westphal Signed-off-by: Shmulik Ladkani Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 08f36cd2b874..9742b92dc933 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,6 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) +#define IPSKB_FRAG_SEGS BIT(7) u16 frag_max_size; }; -- cgit From 34a79f63bbe49c888f95e75dd759685a238556b6 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 18 Jul 2016 20:45:38 -0400 Subject: net: dsa: support switchdev ageing time attr Add a new function for DSA drivers to handle the switchdev SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME attribute. The ageing time is passed as milliseconds. Also because we can have multiple logical bridges on top of a physical switch and ageing time are switch-wide, call the driver function with the fastest ageing time in use on the chip instead of the requested one. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 52ab18bc2b0d..2217a3f817f8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -141,6 +141,7 @@ struct dsa_switch_tree { struct dsa_port { struct net_device *netdev; struct device_node *dn; + unsigned int ageing_time; }; struct dsa_switch { @@ -329,6 +330,7 @@ struct dsa_switch_driver { /* * Bridge integration */ + int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct net_device *bridge); void (*port_bridge_leave)(struct dsa_switch *ds, int port); -- cgit From 2d283bdd079c0ad4da020bbc9e9c2a4280823098 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 19 Jul 2016 11:54:16 +1000 Subject: net/ncsi: Resource management NCSI spec (DSP0222) defines several objects: package, channel, mode, filter, version and statistics etc. This introduces the data structs to represent those objects and implement functions to manage them. Also, this introduces CONFIG_NET_NCSI for the newly implemented NCSI stack. * The user (e.g. netdev driver) dereference NCSI device by "struct ncsi_dev", which is embedded to "struct ncsi_dev_priv". The later one is used by NCSI stack internally. * Every NCSI device can have multiple packages simultaneously, up to 8 packages. It's represented by "struct ncsi_package" and identified by 3-bits ID. * Every NCSI package can have multiple channels, up to 32. It's represented by "struct ncsi_channel" and identified by 5-bits ID. * Every NCSI channel has version, statistics, various modes and filters. They are represented by "struct ncsi_channel_version", "struct ncsi_channel_stats", "struct ncsi_channel_mode" and "struct ncsi_channel_filter" separately. * Apart from AEN (Asynchronous Event Notification), the NCSI stack works in terms of command and response. This introduces "struct ncsi_req" to represent a complete NCSI transaction made of NCSI request and response. link: https://www.dmtf.org/sites/default/files/standards/documents/DSP0222_1.1.0.pdf Signed-off-by: Gavin Shan Acked-by: Joel Stanley Signed-off-by: David S. Miller --- include/net/ncsi.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/net/ncsi.h (limited to 'include') diff --git a/include/net/ncsi.h b/include/net/ncsi.h new file mode 100644 index 000000000000..70d14ee1ef84 --- /dev/null +++ b/include/net/ncsi.h @@ -0,0 +1,46 @@ +#ifndef __NET_NCSI_H +#define __NET_NCSI_H + +/* + * The NCSI device states seen from external. More NCSI device states are + * only visible internally (in net/ncsi/internal.h). When the NCSI device + * is registered, it's in ncsi_dev_state_registered state. The state + * ncsi_dev_state_start is used to drive to choose active package and + * channel. After that, its state is changed to ncsi_dev_state_functional. + * + * The state ncsi_dev_state_stop helps to shut down the currently active + * package and channel while ncsi_dev_state_config helps to reconfigure + * them. + */ +enum { + ncsi_dev_state_registered = 0x0000, + ncsi_dev_state_functional = 0x0100, + ncsi_dev_state_probe = 0x0200, + ncsi_dev_state_config = 0x0300, + ncsi_dev_state_suspend = 0x0400, +}; + +struct ncsi_dev { + int state; + int link_up; + struct net_device *dev; + void (*handler)(struct ncsi_dev *ndev); +}; + +#ifdef CONFIG_NET_NCSI +struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + void (*notifier)(struct ncsi_dev *nd)); +void ncsi_unregister_dev(struct ncsi_dev *nd); +#else /* !CONFIG_NET_NCSI */ +static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + void (*notifier)(struct ncsi_dev *nd)) +{ + return NULL; +} + +static inline void ncsi_unregister_dev(struct ncsi_dev *nd) +{ +} +#endif /* CONFIG_NET_NCSI */ + +#endif /* __NET_NCSI_H */ -- cgit From 6389eaa7fa9c3ee6c7d39f6087b86660d17236ac Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 19 Jul 2016 11:54:17 +1000 Subject: net/ncsi: NCSI command packet handler The NCSI command packets are sent from MC (Management Controller) to remote end. They are used for multiple purposes: probe existing NCSI package/channel, retrieve NCSI channel's capability, configure NCSI channel etc. This defines struct to represent NCSI command packets and introduces function ncsi_xmit_cmd(), which will be used to transmit NCSI command packet according to the request. The request is represented by struct ncsi_cmd_arg. Signed-off-by: Gavin Shan Acked-by: Joel Stanley Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index cec849a239f6..117d02e0fc31 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -87,6 +87,7 @@ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ +#define ETH_P_NCSI 0x88F8 /* NCSI protocol */ #define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ -- cgit From e6f44ed6d04d3185dcd8e8e98af8742d87bdffcc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 19 Jul 2016 11:54:19 +1000 Subject: net/ncsi: Package and channel management This manages NCSI packages and channels: * The available packages and channels are enumerated in the first time of calling ncsi_start_dev(). The channels' capabilities are probed in the meanwhile. The NCSI network topology won't change until the NCSI device is destroyed. * There in a queue in every NCSI device. The element in the queue, channel, is waiting for configuration (bringup) or suspending (teardown). The channel's state (inactive/active) indicates the futher action (configuration or suspending) will be applied on the channel. Another channel's state (invisible) means the requested action is being applied. * The hardware arbitration will be enabled if all available packages and channels support it. All available channels try to provide service when hardware arbitration is enabled. Otherwise, one channel is selected as the active one at once. * When channel is in active state, meaning it's providing service, a timer started to retrieve the channe's link status. If the channel's link status fails to be updated in the determined period, the channel is going to be reconfigured. It's the error handling implementation as defined in NCSI spec. Signed-off-by: Gavin Shan Acked-by: Joel Stanley Signed-off-by: David S. Miller --- include/net/ncsi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 70d14ee1ef84..1dbf42f79750 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -30,6 +30,7 @@ struct ncsi_dev { #ifdef CONFIG_NET_NCSI struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); +int ncsi_start_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, @@ -38,6 +39,11 @@ static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, return NULL; } +static inline int ncsi_start_dev(struct ncsi_dev *nd) +{ + return -ENOTTY; +} + static inline void ncsi_unregister_dev(struct ncsi_dev *nd) { } -- cgit From 59d3656d5bf504f771fc44fdbc7a9a8590795f22 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:46 -0700 Subject: bpf: add bpf_prog_add api for bulk prog refcnt A subsystem may need to store many copies of a bpf program, each deserving its own reference. Rather than requiring the caller to loop one by one (with possible mid-loop failure), add a bulk bpf_prog_add api. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c13e92b00bf5..75a5ae6bee07 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -224,6 +224,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); +struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -- cgit From 6a773a15a1e8874e5eccd2f29190c31085912c95 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:47 -0700 Subject: bpf: add XDP prog type for early driver filter Add a new bpf prog type that is intended to run in early stages of the packet rx path. Only minimal packet metadata will be available, hence a new context type, struct xdp_md, is exposed to userspace. So far only expose the packet start and end pointers, and only in read mode. An XDP program must return one of the well known enum values, all other return codes are reserved for future use. Unfortunately, this restriction is hard to enforce at verification time, so take the approach of warning at runtime when such programs are encountered. Out of bounds return codes should alias to XDP_ABORTED. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 18 ++++++++++++++++++ include/uapi/linux/bpf.h | 20 ++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 6fc31ef1da2d..15d816a8b755 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -368,6 +368,11 @@ struct bpf_skb_data_end { void *data_end; }; +struct xdp_buff { + void *data; + void *data_end; +}; + /* compute the linear packet data range [data, data_end) which * will be accessed by cls_bpf and act_bpf programs */ @@ -429,6 +434,18 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return BPF_PROG_RUN(prog, skb); } +static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + u32 ret; + + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, (void *)xdp); + rcu_read_unlock(); + + return ret; +} + static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), @@ -509,6 +526,7 @@ bool bpf_helper_changes_skb_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c4d922439d20..a51786566c2f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, }; #define BPF_PSEUDO_MAP_FD 1 @@ -439,4 +440,23 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* User return codes for XDP prog type. + * A valid XDP program must return one of these defined values. All other + * return codes are reserved for future use. Unknown return codes will result + * in packet drop. + */ +enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, +}; + +/* user accessible metadata for XDP packet hook + * new fields must be added to the end of this structure + */ +struct xdp_md { + __u32 data; + __u32 data_end; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit From a7862b45849fe2f8610a2bec89235580f55d337f Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:48 -0700 Subject: net: add ndo to setup/query xdp prog in adapter rx Add one new netdev op for drivers implementing the BPF_PROG_TYPE_XDP filter. The single op is used for both setup/query of the xdp program, modelled after ndo_setup_tc. Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller --- include/linux/netdevice.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 49736a31acaa..fab9a1c2a2ac 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct wpan_dev; struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; +struct bpf_prog; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -799,6 +800,33 @@ struct tc_to_netdev { }; }; +/* These structures hold the attributes of xdp state that are being passed + * to the netdevice through the xdp op. + */ +enum xdp_netdev_command { + /* Set or clear a bpf program used in the earliest stages of packet + * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee + * is responsible for calling bpf_prog_put on any old progs that are + * stored. In case of error, the callee need not release the new prog + * reference, but on success it takes ownership and must bpf_prog_put + * when it is no longer used. + */ + XDP_SETUP_PROG, + /* Check if a bpf program is set on the device. The callee should + * return true if a program is currently attached and running. + */ + XDP_QUERY_PROG, +}; + +struct netdev_xdp { + enum xdp_netdev_command command; + union { + /* XDP_SETUP_PROG */ + struct bpf_prog *prog; + /* XDP_QUERY_PROG */ + bool prog_attached; + }; +}; /* * This structure defines the management hooks for network devices. @@ -1087,6 +1115,9 @@ struct tc_to_netdev { * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. + * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + * This function is used to set or query state related to XDP on the + * netdevice. See definition of enum xdp_netdev_command for details. * */ struct net_device_ops { @@ -1271,6 +1302,8 @@ struct net_device_ops { struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); + int (*ndo_xdp)(struct net_device *dev, + struct netdev_xdp *xdp); }; /** @@ -3257,6 +3290,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); +int dev_change_xdp_fd(struct net_device *dev, int fd); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -- cgit From d1fdd9138682e0f272beee0cb08b6328c5478b26 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:49 -0700 Subject: rtnl: add option for setting link xdp prog Sets the bpf program represented by fd as an early filter in the rx path of the netdev. The fd must have been created as BPF_PROG_TYPE_XDP. Providing a negative value as fd clears the program. Getting the fd back via rtnl is not possible, therefore reading of this value merely provides a bool whether the program is valid on the link or not. Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4285ac31e865..a1b5202c5f6b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -156,6 +156,7 @@ enum { IFLA_GSO_MAX_SEGS, IFLA_GSO_MAX_SIZE, IFLA_PAD, + IFLA_XDP, __IFLA_MAX }; @@ -843,4 +844,15 @@ enum { }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) +/* XDP section */ + +enum { + IFLA_XDP_UNSPEC, + IFLA_XDP_FD, + IFLA_XDP_ATTACHED, + __IFLA_XDP_MAX, +}; + +#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit From 6ce96ca348a9e949f8c43f4d3e98db367d93cffd Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:53 -0700 Subject: bpf: add XDP_TX xdp_action for direct forwarding XDP enabled drivers must transmit received packets back out on the same port they were received on when a program returns this action. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a51786566c2f..2b7076f5b5ad 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -449,6 +449,7 @@ enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, + XDP_TX, }; /* user accessible metadata for XDP packet hook -- cgit From 224e92e02a769b8028ca2450443586af8b4f1715 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:54 -0700 Subject: net/mlx4_en: break out tx_desc write into separate function In preparation for writing the tx descriptor from multiple functions, create a helper for both normal and blueflame access. Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller --- include/linux/mlx4/qp.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 587cdf943b52..deaa2217214d 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -291,16 +291,18 @@ enum { MLX4_WQE_CTRL_FORCE_LOOPBACK = 1 << 0, }; +union mlx4_wqe_qpn_vlan { + struct { + __be16 vlan_tag; + u8 ins_vlan; + u8 fence_size; + }; + __be32 bf_qpn; +}; + struct mlx4_wqe_ctrl_seg { __be32 owner_opcode; - union { - struct { - __be16 vlan_tag; - u8 ins_vlan; - u8 fence_size; - }; - __be32 bf_qpn; - }; + union mlx4_wqe_qpn_vlan qpn_vlan; /* * High 24 bits are SRC remote buffer; low 8 bits are flags: * [7] SO (strong ordering) -- cgit From cc2e0b3fbcdd9667d7b7ecdf36d7b4d3647681d6 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Wed, 20 Jul 2016 07:55:52 -0700 Subject: bpf: fix implicit declaration of bpf_prog_add For the ifndef case of CONFIG_BPF_SYSCALL, an inline version of bpf_prog_add needs to exist otherwise the build breaks on some configs. drivers/net/ethernet/mellanox/mlx4/en_netdev.c:2544:10: error: implicit declaration of function 'bpf_prog_add' prog = bpf_prog_add(prog, priv->rx_ring_num - 1); The function is introduced in 59d3656d5bf50 ("bpf: add bpf_prog_add api for bulk prog refcnt") and first used in 47f1afdba2b87 ("net/mlx4_en: add support for fast rx drop bpf program"). Fixes: 47f1afdba2b87 ("net/mlx4_en: add support for fast rx drop bpf program") Reported-by: kbuild test robot Reported-by: Tariq Toukan Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 75a5ae6bee07..36da0749205a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -289,6 +289,10 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, { return ERR_PTR(-EOPNOTSUPP); } +static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline void bpf_prog_put(struct bpf_prog *prog) { -- cgit From b02b94b331be5097d248d49242bd1fc0649a8092 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Jul 2016 20:17:47 +0200 Subject: bpf, elf: add official ELF machine define for eBPF Add the official BPF ELF e_machine value that was assigned recently [1,2] and will be propagated to glibc, et al. LLVM is switching to it in 3.9 release. [1] https://github.com/llvm-mirror/llvm/commit/36b9c09330bfb5e771914cfe307588f30d5510d2 [2] http://lists.iovisor.org/pipermail/iovisor-dev/2016-June/000266.html Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/elf-em.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index c3fdfe79e5cc..cb5d1a519202 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -40,6 +40,7 @@ #define EM_TILEPRO 188 /* Tilera TILEPro */ #define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ #define EM_TILEGX 191 /* Tilera TILE-Gx */ +#define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ -- cgit From 82de0be6862cdca2e6802267bda57cfc8844d3a7 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 18 Jul 2016 11:39:23 +0800 Subject: netfilter: Add helper array register/unregister functions Add nf_ct_helper_init(), nf_conntrack_helpers_register() and nf_conntrack_helpers_unregister() functions to avoid repetitive opencoded initialization in helpers. This patch keeps an id parameter for nf_ct_helper_init() not to break helper matching by name that has been inconsistently exposed to userspace through ports, eg. ftp-2121, and through an incremental id, eg. tftp-1. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 6cf614bc0029..1eaac1f4cd6a 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_ct_helper_init(struct nf_conntrack_helper *helper, + u16 l3num, u16 protonum, const char *name, + u16 default_port, u16 spec_port, u32 id, + const struct nf_conntrack_expect_policy *exp_pol, + u32 expect_class_max, u32 data_len, + int (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo), + int (*from_nlattr)(struct nlattr *attr, + struct nf_conn *ct), + struct module *module); int nf_conntrack_helper_register(struct nf_conntrack_helper *); void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int); +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, + unsigned int); + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp); -- cgit From 5f652bb2eb3eb38f97194ce5c41da1fa12e914b8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Jul 2016 18:11:31 +0200 Subject: gro_cells: gro_cells_receive now return error code so that the caller can update stats accordingly, if needed Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/gro_cells.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index cf6c74550baa..d15214d673b2 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -14,27 +14,26 @@ struct gro_cells { struct gro_cell __percpu *cells; }; -static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) +static inline int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct gro_cell *cell; struct net_device *dev = skb->dev; - if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) { - netif_rx(skb); - return; - } + if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) + return netif_rx(skb); cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); - return; + return NET_RX_DROP; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); + return NET_RX_SUCCESS; } /* called under BH context */ -- cgit From 23014011ba4209a086931ff402eac1c41abbe456 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Jul 2016 12:51:16 +0200 Subject: netfilter: conntrack: support a fixed size of 128 distinct labels The conntrack label extension is currently variable-sized, e.g. if only 2 labels are used by iptables rules then the labels->bits[] array will only contain one element. We track size of each label storage area in the 'words' member. But in nftables and openvswitch we always have to ask for worst-case since we don't know what bit will be used at configuration time. As most arches are 64bit we need to allocate 24 bytes in this case: struct nf_conn_labels { u8 words; /* 0 1 */ /* XXX 7 bytes hole, try to pack */ long unsigned bits[2]; /* 8 24 */ Make bits a fixed size and drop the words member, it simplifies the code and only increases memory requirements on x86 when less than 64bit labels are required. We still only allocate the extension if its needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index c5f8fc736b3d..0fd4989de836 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -10,8 +10,7 @@ #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) struct nf_conn_labels { - u8 words; - unsigned long bits[]; + unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; }; static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) @@ -26,20 +25,13 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_LABELS - struct nf_conn_labels *cl_ext; struct net *net = nf_ct_net(ct); - u8 words; - words = ACCESS_ONCE(net->ct.label_words); - if (words == 0) + if (net->ct.labels_used == 0) return NULL; - cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - words * sizeof(long), GFP_ATOMIC); - if (cl_ext != NULL) - cl_ext->words = words; - - return cl_ext; + return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + sizeof(struct nf_conn_labels), GFP_ATOMIC); #else return NULL; #endif -- cgit From 857ed310c013fe0d0059f955048dab589fa7a57a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Jul 2016 12:51:17 +0200 Subject: netfilter: connlabels: move set helper to xt_connlabel xt_connlabel is the only user so move it. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 0fd4989de836..498814626e28 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -37,8 +37,6 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) #endif } -int nf_connlabel_set(struct nf_conn *ct, u16 bit); - int nf_connlabels_replace(struct nf_conn *ct, const u32 *data, const u32 *mask, unsigned int words); -- cgit From bf3994d2ed310813da28362d87bfe9f0e1c3e37f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 21 Jul 2016 12:03:11 +0200 Subject: net/sched: introduce Match-all classifier The matchall classifier matches every packet and allows the user to apply actions on it. This filter is very useful in usecases where every packet should be matched, for example, packet mirroring (SPAN) can be setup very easily using that filter. Signed-off-by: Jiri Pirko Signed-off-by: Yotam Gigi Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 5702e933dc07..a32494887e01 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -433,6 +433,17 @@ enum { #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) +/* Match-all classifier */ + +enum { + TCA_MATCHALL_UNSPEC, + TCA_MATCHALL_CLASSID, + TCA_MATCHALL_ACT, + __TCA_MATCHALL_MAX, +}; + +#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr { -- cgit From b87f7936a93246804cf70e7e2e0568799c948bb1 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Thu, 21 Jul 2016 12:03:12 +0200 Subject: net/sched: Add match-all classifier hw offloading. Following the work that have been done on offloading classifiers like u32 and flower, now the match-all classifier hw offloading is possible. if the interface supports tc offloading. To control the offloading, two tc flags have been introduced: skip_sw and skip_hw. Typical usage: tc filter add dev eth25 parent ffff: \ matchall skip_sw \ action mirred egress mirror \ dev eth27 Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/net/pkt_cls.h | 11 +++++++++++ include/uapi/linux/pkt_cls.h | 1 + 3 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43c749b1b619..076df5360ba5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -787,6 +787,7 @@ enum { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, + TC_SETUP_MATCHALL, }; struct tc_cls_u32_offload; @@ -797,6 +798,7 @@ struct tc_to_netdev { u8 tc; struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; + struct tc_cls_matchall_offload *cls_mall; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 3722dda0199d..6f8d65342d3a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -442,4 +442,15 @@ struct tc_cls_flower_offload { struct tcf_exts *exts; }; +enum tc_matchall_command { + TC_CLSMATCHALL_REPLACE, + TC_CLSMATCHALL_DESTROY, +}; + +struct tc_cls_matchall_offload { + enum tc_matchall_command command; + struct tcf_exts *exts; + unsigned long cookie; +}; + #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index a32494887e01..d1c1ccaba787 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -439,6 +439,7 @@ enum { TCA_MATCHALL_UNSPEC, TCA_MATCHALL_CLASSID, TCA_MATCHALL_ACT, + TCA_MATCHALL_FLAGS, __TCA_MATCHALL_MAX, }; -- cgit From 56a20680f70393d199fc5e8ecc7859549ca5a0c0 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Thu, 21 Jul 2016 12:03:16 +0200 Subject: net/sched: act_mirred: Add helper inlines to access tcf_mirred info. The helper function is_tcf_mirred_mirror helps finding whether an action struct is of type mirred and is configured to be of type mirror. Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_mirred.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index e891835eb74e..6a13a7c74e0c 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -24,6 +24,15 @@ static inline bool is_tcf_mirred_redirect(const struct tc_action *a) return false; } +static inline bool is_tcf_mirred_mirror(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_MIRRED) + return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR; +#endif + return false; +} + static inline int tcf_mirred_ifindex(const struct tc_action *a) { return to_mirred(a)->tcfm_ifindex; -- cgit From aa7145c16d6bf086538ad7eb20c807513bfa5efc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Jul 2016 01:19:42 +0200 Subject: bpf, events: fix offset in skb copy handler This patch fixes the __output_custom() routine we currently use with bpf_skb_copy(). I missed that when len is larger than the size of the current handle, we can issue multiple invocations of copy_func, and __output_custom() advances destination but also source buffer by the written amount of bytes. When we have __output_custom(), this is actually wrong since in that case the source buffer points to a non-linear object, in our case an skb, which the copy_func helper is supposed to walk. Therefore, since this is non-linear we thus need to pass the offset into the helper, so that copy_func can use it for extracting the data from the source object. Therefore, adjust the callback signatures properly and pass offset into the skb_header_pointer() invoked from bpf_skb_copy() callback. The __DEFINE_OUTPUT_COPY_BODY() is adjusted to accommodate for two things: i) to pass in whether we should advance source buffer or not; this is a compile-time constant condition, ii) to pass in the offset for __output_custom(), which we do with help of __VA_ARGS__, so everything can stay inlined as is currently. Both changes allow for adapting the __output_* fast-path helpers w/o extra overhead. Fixes: 555c8a8623a3 ("bpf: avoid stack copy and use skb ctx for event output") Fixes: 7e3f977edd0b ("perf, events: add non-linear data support for raw records") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/linux/perf_event.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 36da0749205a..11134238417d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -211,7 +211,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f const struct bpf_func_proto *bpf_get_trace_printk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, - unsigned long len); + unsigned long off, unsigned long len); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e79e6c6fed89..15e55b7ee096 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -70,7 +70,7 @@ struct perf_callchain_entry_ctx { }; typedef unsigned long (*perf_copy_f)(void *dst, const void *src, - unsigned long len); + unsigned long off, unsigned long len); struct perf_raw_frag { union { -- cgit From 2ccbe2cb79f2f74ab739252299b6f9ff27586f2c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 22 Jul 2016 15:07:56 +0200 Subject: macsec: limit ICV length to 16 octets IEEE 802.1AE-2006 standard recommends that the ICV element in a MACsec frame should not exceed 16 octets: add MACSEC_STD_ICV_LEN in uapi definitions accordingly, and avoid accepting configurations where the ICV length exceeds the standard value. Leave definition of MACSEC_MAX_ICV_LEN unchanged for backwards compatibility with userspace programs. Fixes: dece8d2b78d1 ("uapi: add MACsec bits") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/uapi/linux/if_macsec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index f7d4831a2cc7..02fc49cb72d8 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -26,6 +26,8 @@ #define MACSEC_MIN_ICV_LEN 8 #define MACSEC_MAX_ICV_LEN 32 +/* upper limit for ICV length as recommended by IEEE802.1AE-2006 */ +#define MACSEC_STD_ICV_LEN 16 enum macsec_attrs { MACSEC_ATTR_UNSPEC, -- cgit From ae76715d153e33c249b6850361e4d8d775388b5a Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Sun, 24 Jul 2016 16:12:39 +0300 Subject: net/mlx5e: Check the minimum inline header mode before xmit Each send queue (SQ) has inline mode that defines the minimal required inline headers in the SQ WQE. Before sending each packet check that the minimum required headers on the WQE are copied. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e0a3ed758287..0b6d15cddb2f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -129,6 +129,13 @@ __mlx5_mask(typ, fld)) tmp; \ }) +enum mlx5_inline_modes { + MLX5_INLINE_MODE_NONE, + MLX5_INLINE_MODE_L2, + MLX5_INLINE_MODE_IP, + MLX5_INLINE_MODE_TCP_UDP, +}; + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, -- cgit From cff92d7c7ebd7ceddd4def6b39e0302585b1eb14 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Sun, 24 Jul 2016 16:12:40 +0300 Subject: net/mlx5e: Query minimum required header copy during xmit Add support for query the minimum inline mode from the Firmware. It is required for correct TX steering according to L3/L4 packet headers. Each send queue (SQ) has inline mode that defines the minimal required headers that needs to be copied into the SQ WQE. The driver asks the Firmware for the wqe_inline_mode device capability value. In case the device capability defined as "vport context" the driver must check the reported min inline mode from the vport context before creating its SQs. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++--- include/linux/mlx5/vport.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d671e4e8e7db..21bc4557b67a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -536,7 +536,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 self_lb_en_modifiable[0x1]; u8 reserved_at_9[0x2]; u8 max_lso_cap[0x5]; - u8 reserved_at_10[0x4]; + u8 reserved_at_10[0x2]; + u8 wqe_inline_mode[0x2]; u8 rss_ind_tbl_cap[0x4]; u8 reg_umr_sq[0x1]; u8 scatter_fcs[0x1]; @@ -2270,7 +2271,8 @@ struct mlx5_ifc_sqc_bits { u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_at_4[0x4]; + u8 reserved_at_4[0x1]; + u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; u8 reserved_at_d[0x13]; @@ -2367,7 +2369,9 @@ struct mlx5_ifc_rmpc_bits { }; struct mlx5_ifc_nic_vport_context_bits { - u8 reserved_at_0[0x1f]; + u8 reserved_at_0[0x5]; + u8 min_wqe_inline_mode[0x3]; + u8 reserved_at_8[0x17]; u8 roce_en[0x1]; u8 arm_change_event[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 6c16c198f680..e087b7d047ac 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -43,6 +43,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); +void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); -- cgit From 9b8ac4f9dd60ac7375fb0e221dbf596db4c4e622 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 24 Jul 2016 19:24:09 +0100 Subject: gtp: #define #define _GTP_H_ and not #define _GTP_H Fix clang build warning: ./include/net/gtp.h:1:9: warning: '_GTP_H_' is used as a header guard here, followed by #define of a different macro [-Wheader-guard] fix by defining _GTP_H_ and not _GTP_H Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- include/net/gtp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/gtp.h b/include/net/gtp.h index 894a37b87d63..6398891b99ba 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -1,5 +1,5 @@ #ifndef _GTP_H_ -#define _GTP_H +#define _GTP_H_ /* General GTP protocol related definitions. */ -- cgit From 86cb13e4ec5060d94069a8418fd4f3ccb38edee2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Jul 2016 13:12:33 +0300 Subject: mlxsw: spectrum: Fix compilation error when CLS_ACT isn't set When CONFIG_NET_CLS_ACT isn't set 'struct tcf_exts' has no member named 'actions' and we therefore must not access it. Otherwise compilation fails. Fix this by introducing a new macro similar to tc_no_actions(), which always returns 'false' if CONFIG_NET_CLS_ACT isn't set. Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading") Reported-by: kbuild test robot Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index fb82b5b5d9e7..0bb210635e5f 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -192,6 +192,9 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); #define tc_for_each_action(_a, _exts) \ list_for_each_entry(a, &(_exts)->actions, list) +#define tc_single_action(_exts) \ + (list_is_singular(&(_exts)->actions)) + static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 lastuse) { @@ -205,6 +208,7 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, #define tc_no_actions(_exts) true #define tc_for_each_action(_a, _exts) while ((void)(_a), 0) +#define tc_single_action(_exts) false #define tcf_action_stats_update(a, bytes, packets, lastuse) #endif /* CONFIG_NET_CLS_ACT */ -- cgit From 96ae52279594470622ff0585621a13e96b700600 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 25 Jul 2016 05:54:46 -0700 Subject: bpf: Add bpf_probe_write_user BPF helper to be called in tracers This allows user memory to be written to during the course of a kprobe. It shouldn't be used to implement any kind of security mechanism because of TOC-TOU attacks, but rather to debug, divert, and manipulate execution of semi-cooperative processes. Although it uses probe_kernel_write, we limit the address space the probe can write into by checking the space with access_ok. We do this as opposed to calling copy_to_user directly, in order to avoid sleeping. In addition we ensure the threads's current fs / segment is USER_DS and the thread isn't exiting nor a kernel thread. Given this feature is meant for experiments, and it has a risk of crashing the system, and running programs, we print a warning on when a proglet that attempts to use this helper is installed, along with the pid and process name. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2b7076f5b5ad..da218fec6056 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -365,6 +365,16 @@ enum bpf_func_id { */ BPF_FUNC_get_current_task, + /** + * bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + */ + BPF_FUNC_probe_write_user, + __BPF_FUNC_MAX_ID, }; -- cgit From a85a970af265f156740977168b542234511b28a8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 25 Jul 2016 16:09:41 -0700 Subject: net_sched: move tc_action into tcf_common struct tc_action is confusing, currently we use it for two purposes: 1) Pass in arguments and carry out results from helper functions 2) A generic representation for tc actions The first one is error-prone, since we need to make sure we don't miss anything. This patch aims to get rid of this use, by moving tc_action into tcf_common, so that they are allocated together in hashtable and can be cast'ed easily. And together with the following patch, we could really make tc_action a generic representation for all tc actions and each type of action can inherit from it. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 52 ++++++++++++++++++++++------------------ include/net/tc_act/tc_bpf.h | 3 +-- include/net/tc_act/tc_connmark.h | 3 +-- include/net/tc_act/tc_csum.h | 3 +-- include/net/tc_act/tc_defact.h | 3 +-- include/net/tc_act/tc_gact.h | 5 ++-- include/net/tc_act/tc_ife.h | 3 +-- include/net/tc_act/tc_ipt.h | 3 +-- include/net/tc_act/tc_mirred.h | 3 +-- include/net/tc_act/tc_nat.h | 5 +--- include/net/tc_act/tc_pedit.h | 3 +-- include/net/tc_act/tc_skbedit.h | 3 +-- include/net/tc_act/tc_vlan.h | 3 +-- 13 files changed, 42 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 0bb210635e5f..8b199095ea51 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -10,7 +10,26 @@ #include #include + +struct tcf_hashinfo { + struct hlist_head *htab; + unsigned int hmask; + spinlock_t lock; + u32 index; +}; + +struct tc_action_ops; + +struct tc_action { + const struct tc_action_ops *ops; + __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ + __u32 order; + struct list_head list; + struct tcf_hashinfo *hinfo; +}; + struct tcf_common { + struct tc_action tcfc_act; struct hlist_node tcfc_head; u32 tcfc_index; int tcfc_refcnt; @@ -26,6 +45,7 @@ struct tcf_common { struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; }; +#define tcf_act common.tcfc_act #define tcf_head common.tcfc_head #define tcf_index common.tcfc_index #define tcf_refcnt common.tcfc_refcnt @@ -39,13 +59,6 @@ struct tcf_common { #define tcf_lock common.tcfc_lock #define tcf_rcu common.tcfc_rcu -struct tcf_hashinfo { - struct hlist_head *htab; - unsigned int hmask; - spinlock_t lock; - u32 index; -}; - static inline unsigned int tcf_hash(u32 index, unsigned int hmask) { return index & hmask; @@ -88,15 +101,6 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) dtm->expires = jiffies_to_clock_t(stm->expires); } -struct tc_action { - void *priv; - const struct tc_action_ops *ops; - __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - __u32 order; - struct list_head list; - struct tcf_hashinfo *hinfo; -}; - #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -106,17 +110,18 @@ struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ + size_t size; struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *, int bind); - int (*lookup)(struct net *, struct tc_action *, u32); + int (*lookup)(struct net *, struct tc_action **, u32); int (*init)(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action *act, int ovr, + struct nlattr *est, struct tc_action **act, int ovr, int bind); int (*walk)(struct net *, struct sk_buff *, - struct netlink_callback *, int, struct tc_action *); + struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); }; @@ -152,13 +157,14 @@ static inline void tc_action_net_exit(struct tc_action_net *tn) int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, - struct tc_action *a); -int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index); + const struct tc_action_ops *ops); +int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index); u32 tcf_hash_new_index(struct tc_action_net *tn); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a, +bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action *a, int size, int bind, bool cpustats); + struct tc_action **a, const struct tc_action_ops *ops, int bind, + bool cpustats); void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a); diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h index 958d69cfb19c..80a4d6f49773 100644 --- a/include/net/tc_act/tc_bpf.h +++ b/include/net/tc_act/tc_bpf.h @@ -23,7 +23,6 @@ struct tcf_bpf { struct sock_filter *bpf_ops; const char *bpf_name; }; -#define to_bpf(a) \ - container_of(a->priv, struct tcf_bpf, common) +#define to_bpf(a) ((struct tcf_bpf *)a) #endif /* __NET_TC_BPF_H */ diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h index 02caa406611b..8a661135f4ac 100644 --- a/include/net/tc_act/tc_connmark.h +++ b/include/net/tc_act/tc_connmark.h @@ -9,7 +9,6 @@ struct tcf_connmark_info { u16 zone; }; -#define to_connmark(a) \ - container_of(a->priv, struct tcf_connmark_info, common) +#define to_connmark(a) ((struct tcf_connmark_info *)a) #endif /* __NET_TC_CONNMARK_H */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index fa8f5fac65e9..1a9ef15d573b 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -9,7 +9,6 @@ struct tcf_csum { u32 update_flags; }; -#define to_tcf_csum(a) \ - container_of(a->priv,struct tcf_csum,common) +#define to_tcf_csum(a) ((struct tcf_csum *)a) #endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h index ab9b5d6be67b..e25b4eb4fc66 100644 --- a/include/net/tc_act/tc_defact.h +++ b/include/net/tc_act/tc_defact.h @@ -8,7 +8,6 @@ struct tcf_defact { u32 tcfd_datalen; void *tcfd_defdata; }; -#define to_defact(a) \ - container_of(a->priv, struct tcf_defact, common) +#define to_defact(a) ((struct tcf_defact *)a) #endif /* __NET_TC_DEF_H */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 93c520b83d10..119cdb418c23 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -13,8 +13,7 @@ struct tcf_gact { atomic_t packets; #endif }; -#define to_gact(a) \ - container_of(a->priv, struct tcf_gact, common) +#define to_gact(a) ((struct tcf_gact *)a) static inline bool is_tcf_gact_shot(const struct tc_action *a) { @@ -24,7 +23,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a) if (a->ops && a->ops->type != TCA_ACT_GACT) return false; - gact = a->priv; + gact = to_gact(a); if (gact->tcf_action == TC_ACT_SHOT) return true; diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index c55facd17b7e..7921abe42adc 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -16,8 +16,7 @@ struct tcf_ife_info { /* list of metaids allowed */ struct list_head metalist; }; -#define to_ife(a) \ - container_of(a->priv, struct tcf_ife_info, common) +#define to_ife(a) ((struct tcf_ife_info *)a) struct tcf_meta_info { const struct tcf_meta_ops *ops; diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h index c0f4193f432c..c22ae7ab66ed 100644 --- a/include/net/tc_act/tc_ipt.h +++ b/include/net/tc_act/tc_ipt.h @@ -11,7 +11,6 @@ struct tcf_ipt { char *tcfi_tname; struct xt_entry_target *tcfi_t; }; -#define to_ipt(a) \ - container_of(a->priv, struct tcf_ipt, common) +#define to_ipt(a) ((struct tcf_ipt *)a) #endif /* __NET_TC_IPT_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 6a13a7c74e0c..89aebd22cd79 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -12,8 +12,7 @@ struct tcf_mirred { struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; -#define to_mirred(a) \ - container_of(a->priv, struct tcf_mirred, common) +#define to_mirred(a) ((struct tcf_mirred *)a) static inline bool is_tcf_mirred_redirect(const struct tc_action *a) { diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h index 63d8e9ca9d99..a91ad3ad565e 100644 --- a/include/net/tc_act/tc_nat.h +++ b/include/net/tc_act/tc_nat.h @@ -13,9 +13,6 @@ struct tcf_nat { u32 flags; }; -static inline struct tcf_nat *to_tcf_nat(struct tc_action *a) -{ - return container_of(a->priv, struct tcf_nat, common); -} +#define to_tcf_nat(a) ((struct tcf_nat *)a) #endif /* __NET_TC_NAT_H */ diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 5b80998879c7..2cccfbaae800 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -9,7 +9,6 @@ struct tcf_pedit { unsigned char tcfp_flags; struct tc_pedit_key *tcfp_keys; }; -#define to_pedit(a) \ - container_of(a->priv, struct tcf_pedit, common) +#define to_pedit(a) ((struct tcf_pedit *)a) #endif /* __NET_TC_PED_H */ diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index d01a5d40cfb5..9e0548998327 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -30,8 +30,7 @@ struct tcf_skbedit { u16 queue_mapping; u16 ptype; }; -#define to_skbedit(a) \ - container_of(a->priv, struct tcf_skbedit, common) +#define to_skbedit(a) ((struct tcf_skbedit *)a) /* Return true iff action is mark */ static inline bool is_tcf_skbedit_mark(const struct tc_action *a) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 93b70ade1ff3..584b80788d52 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -21,7 +21,6 @@ struct tcf_vlan { u16 tcfv_push_vid; __be16 tcfv_push_proto; }; -#define to_vlan(a) \ - container_of(a->priv, struct tcf_vlan, common) +#define to_vlan(a) ((struct tcf_vlan *)a) #endif /* __NET_TC_VLAN_H */ -- cgit From ec0595cc4495be579309b4bfd5e997af0f2ae6f9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 25 Jul 2016 16:09:42 -0700 Subject: net_sched: get rid of struct tcf_common After the previous patch, struct tc_action should be enough to represent the generic tc action, tcf_common is not necessary any more. This patch gets rid of it to make tc action code more readable. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 63 +++++++++++++++++++--------------------- include/net/tc_act/tc_bpf.h | 2 +- include/net/tc_act/tc_connmark.h | 2 +- include/net/tc_act/tc_csum.h | 2 +- include/net/tc_act/tc_defact.h | 2 +- include/net/tc_act/tc_gact.h | 2 +- include/net/tc_act/tc_ife.h | 2 +- include/net/tc_act/tc_ipt.h | 2 +- include/net/tc_act/tc_mirred.h | 2 +- include/net/tc_act/tc_nat.h | 2 +- include/net/tc_act/tc_pedit.h | 2 +- include/net/tc_act/tc_skbedit.h | 2 +- include/net/tc_act/tc_vlan.h | 2 +- 13 files changed, 42 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8b199095ea51..41e6a24a44b9 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -22,42 +22,39 @@ struct tc_action_ops; struct tc_action { const struct tc_action_ops *ops; - __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - __u32 order; - struct list_head list; - struct tcf_hashinfo *hinfo; -}; - -struct tcf_common { - struct tc_action tcfc_act; - struct hlist_node tcfc_head; - u32 tcfc_index; - int tcfc_refcnt; - int tcfc_bindcnt; - u32 tcfc_capab; - int tcfc_action; - struct tcf_t tcfc_tm; - struct gnet_stats_basic_packed tcfc_bstats; - struct gnet_stats_queue tcfc_qstats; - struct gnet_stats_rate_est64 tcfc_rate_est; - spinlock_t tcfc_lock; - struct rcu_head tcfc_rcu; + __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ + __u32 order; + struct list_head list; + struct tcf_hashinfo *hinfo; + + struct hlist_node tcfa_head; + u32 tcfa_index; + int tcfa_refcnt; + int tcfa_bindcnt; + u32 tcfa_capab; + int tcfa_action; + struct tcf_t tcfa_tm; + struct gnet_stats_basic_packed tcfa_bstats; + struct gnet_stats_queue tcfa_qstats; + struct gnet_stats_rate_est64 tcfa_rate_est; + spinlock_t tcfa_lock; + struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; }; -#define tcf_act common.tcfc_act -#define tcf_head common.tcfc_head -#define tcf_index common.tcfc_index -#define tcf_refcnt common.tcfc_refcnt -#define tcf_bindcnt common.tcfc_bindcnt -#define tcf_capab common.tcfc_capab -#define tcf_action common.tcfc_action -#define tcf_tm common.tcfc_tm -#define tcf_bstats common.tcfc_bstats -#define tcf_qstats common.tcfc_qstats -#define tcf_rate_est common.tcfc_rate_est -#define tcf_lock common.tcfc_lock -#define tcf_rcu common.tcfc_rcu +#define tcf_act common.tcfa_act +#define tcf_head common.tcfa_head +#define tcf_index common.tcfa_index +#define tcf_refcnt common.tcfa_refcnt +#define tcf_bindcnt common.tcfa_bindcnt +#define tcf_capab common.tcfa_capab +#define tcf_action common.tcfa_action +#define tcf_tm common.tcfa_tm +#define tcf_bstats common.tcfa_bstats +#define tcf_qstats common.tcfa_qstats +#define tcf_rate_est common.tcfa_rate_est +#define tcf_lock common.tcfa_lock +#define tcf_rcu common.tcfa_rcu static inline unsigned int tcf_hash(u32 index, unsigned int hmask) { diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h index 80a4d6f49773..2b94673a3dbc 100644 --- a/include/net/tc_act/tc_bpf.h +++ b/include/net/tc_act/tc_bpf.h @@ -14,7 +14,7 @@ #include struct tcf_bpf { - struct tcf_common common; + struct tc_action common; struct bpf_prog __rcu *filter; union { u32 bpf_fd; diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h index 8a661135f4ac..59b515d32bb4 100644 --- a/include/net/tc_act/tc_connmark.h +++ b/include/net/tc_act/tc_connmark.h @@ -4,7 +4,7 @@ #include struct tcf_connmark_info { - struct tcf_common common; + struct tc_action common; struct net *net; u16 zone; }; diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 1a9ef15d573b..f31fb6331a53 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -5,7 +5,7 @@ #include struct tcf_csum { - struct tcf_common common; + struct tc_action common; u32 update_flags; }; diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h index e25b4eb4fc66..d47f040a3bdf 100644 --- a/include/net/tc_act/tc_defact.h +++ b/include/net/tc_act/tc_defact.h @@ -4,7 +4,7 @@ #include struct tcf_defact { - struct tcf_common common; + struct tc_action common; u32 tcfd_datalen; void *tcfd_defdata; }; diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 119cdb418c23..b6f173910226 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -5,7 +5,7 @@ #include struct tcf_gact { - struct tcf_common common; + struct tc_action common; #ifdef CONFIG_GACT_PROB u16 tcfg_ptype; u16 tcfg_pval; diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 7921abe42adc..5164bd7a38fb 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -8,7 +8,7 @@ #define IFE_METAHDRLEN 2 struct tcf_ife_info { - struct tcf_common common; + struct tc_action common; u8 eth_dst[ETH_ALEN]; u8 eth_src[ETH_ALEN]; u16 eth_type; diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h index c22ae7ab66ed..31309766e379 100644 --- a/include/net/tc_act/tc_ipt.h +++ b/include/net/tc_act/tc_ipt.h @@ -6,7 +6,7 @@ struct xt_entry_target; struct tcf_ipt { - struct tcf_common common; + struct tc_action common; u32 tcfi_hook; char *tcfi_tname; struct xt_entry_target *tcfi_t; diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 89aebd22cd79..62770add15bd 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -5,7 +5,7 @@ #include struct tcf_mirred { - struct tcf_common common; + struct tc_action common; int tcfm_eaction; int tcfm_ifindex; int tcfm_ok_push; diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h index a91ad3ad565e..56681a320612 100644 --- a/include/net/tc_act/tc_nat.h +++ b/include/net/tc_act/tc_nat.h @@ -5,7 +5,7 @@ #include struct tcf_nat { - struct tcf_common common; + struct tc_action common; __be32 old_addr; __be32 new_addr; diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 2cccfbaae800..29e38d6823df 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -4,7 +4,7 @@ #include struct tcf_pedit { - struct tcf_common common; + struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; struct tc_pedit_key *tcfp_keys; diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 9e0548998327..5767e9dbcf92 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -23,7 +23,7 @@ #include struct tcf_skbedit { - struct tcf_common common; + struct tc_action common; u32 flags; u32 priority; u32 mark; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 584b80788d52..e29f52e8bdf1 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -16,7 +16,7 @@ #define VLAN_F_PUSH 0x2 struct tcf_vlan { - struct tcf_common common; + struct tc_action common; int tcfv_action; u16 tcfv_push_vid; __be16 tcfv_push_proto; -- cgit From 9ff26e9fabaf52f28fb5e875c0b9ffc2d1512039 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 26 Jul 2016 08:47:18 +0200 Subject: tipc: introduce constants for tipc address validation In this commit, we introduce defines for tipc address size, offset and mask specification for Zone.Cluster.Node. There is no functional change in this commit. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 6f71b9b41595..bf049e8fe31b 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -60,26 +60,48 @@ struct tipc_name_seq { __u32 upper; }; +/* TIPC Address Size, Offset, Mask specification for Z.C.N + */ +#define TIPC_NODE_BITS 12 +#define TIPC_CLUSTER_BITS 12 +#define TIPC_ZONE_BITS 8 + +#define TIPC_NODE_OFFSET 0 +#define TIPC_CLUSTER_OFFSET TIPC_NODE_BITS +#define TIPC_ZONE_OFFSET (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS) + +#define TIPC_NODE_SIZE ((1UL << TIPC_NODE_BITS) - 1) +#define TIPC_CLUSTER_SIZE ((1UL << TIPC_CLUSTER_BITS) - 1) +#define TIPC_ZONE_SIZE ((1UL << TIPC_ZONE_BITS) - 1) + +#define TIPC_NODE_MASK (TIPC_NODE_SIZE << TIPC_NODE_OFFSET) +#define TIPC_CLUSTER_MASK (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET) +#define TIPC_ZONE_MASK (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET) + +#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) + static inline __u32 tipc_addr(unsigned int zone, unsigned int cluster, unsigned int node) { - return (zone << 24) | (cluster << 12) | node; + return (zone << TIPC_ZONE_OFFSET) | + (cluster << TIPC_CLUSTER_OFFSET) | + node; } static inline unsigned int tipc_zone(__u32 addr) { - return addr >> 24; + return addr >> TIPC_ZONE_OFFSET; } static inline unsigned int tipc_cluster(__u32 addr) { - return (addr >> 12) & 0xfff; + return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET; } static inline unsigned int tipc_node(__u32 addr) { - return addr & 0xfff; + return addr & TIPC_NODE_MASK; } /* -- cgit From 7b3f52296493656015f0c0deddb6e90e36b9cda2 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 26 Jul 2016 08:47:19 +0200 Subject: tipc: make cluster size threshold for monitoring configurable In this commit, we introduce support to configure the minimum threshold to activate the new link monitoring algorithm. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d4c8f142ba63..d387b65a0d97 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -56,6 +56,7 @@ enum { TIPC_NL_NET_GET, TIPC_NL_NET_SET, TIPC_NL_NAME_TABLE_GET, + TIPC_NL_MON_SET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -72,6 +73,7 @@ enum { TIPC_NLA_NODE, /* nest */ TIPC_NLA_NET, /* nest */ TIPC_NLA_NAME_TABLE, /* nest */ + TIPC_NLA_MON, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -166,6 +168,15 @@ enum { TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1 }; +/* Monitor info */ +enum { + TIPC_NLA_MON_UNSPEC, + TIPC_NLA_MON_ACTIVATION_THRESHOLD, /* u32 */ + + __TIPC_NLA_MON_MAX, + TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, -- cgit From bf1035b2ff5296c7c49e262152253ce29d87e82d Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 26 Jul 2016 08:47:20 +0200 Subject: tipc: get monitor threshold for the cluster In this commit, we add support to fetch the configured cluster monitoring threshold. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d387b65a0d97..d07c6ec76062 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -57,6 +57,7 @@ enum { TIPC_NL_NET_SET, TIPC_NL_NAME_TABLE_GET, TIPC_NL_MON_SET, + TIPC_NL_MON_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 -- cgit From cf6f7e1d51090772d5ff7355aaf0fcff17f20d1a Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 26 Jul 2016 08:47:22 +0200 Subject: tipc: dump monitor attributes In this commit, we dump the monitor attributes when queried. The link monitor attributes are separated into two kinds: 1. general attributes per bearer 2. specific attributes per node/peer This style resembles the socket attributes and the nametable publications per socket. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d07c6ec76062..5f3f6d09fb79 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -58,6 +58,7 @@ enum { TIPC_NL_NAME_TABLE_GET, TIPC_NL_MON_SET, TIPC_NL_MON_GET, + TIPC_NL_MON_PEER_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -75,6 +76,7 @@ enum { TIPC_NLA_NET, /* nest */ TIPC_NLA_NAME_TABLE, /* nest */ TIPC_NLA_MON, /* nest */ + TIPC_NLA_MON_PEER, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -173,6 +175,11 @@ enum { enum { TIPC_NLA_MON_UNSPEC, TIPC_NLA_MON_ACTIVATION_THRESHOLD, /* u32 */ + TIPC_NLA_MON_REF, /* u32 */ + TIPC_NLA_MON_ACTIVE, /* flag */ + TIPC_NLA_MON_BEARER_NAME, /* string */ + TIPC_NLA_MON_PEERCNT, /* u32 */ + TIPC_NLA_MON_LISTGEN, /* u32 */ __TIPC_NLA_MON_MAX, TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1 @@ -194,6 +201,24 @@ enum { TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1 }; +/* Monitor peer info */ +enum { + TIPC_NLA_MON_PEER_UNSPEC, + + TIPC_NLA_MON_PEER_ADDR, /* u32 */ + TIPC_NLA_MON_PEER_DOMGEN, /* u32 */ + TIPC_NLA_MON_PEER_APPLIED, /* u32 */ + TIPC_NLA_MON_PEER_UPMAP, /* u64 */ + TIPC_NLA_MON_PEER_MEMBERS, /* tlv */ + TIPC_NLA_MON_PEER_UP, /* flag */ + TIPC_NLA_MON_PEER_HEAD, /* flag */ + TIPC_NLA_MON_PEER_LOCAL, /* flag */ + TIPC_NLA_MON_PEER_PAD, /* flag */ + + __TIPC_NLA_MON_PEER_MAX, + TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1 +}; + /* Nest, connection info */ enum { TIPC_NLA_CON_UNSPEC, -- cgit