From 09ea0dfbf972c63dfea8cf46f2fb67f39e4d833b Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 29 May 2018 15:59:18 +0200 Subject: dma-buf: make map_atomic and map function pointers optional So drivers don't need dummy functions just returning NULL. Cc: Daniel Vetter Signed-off-by: Gerd Hoffmann Reviewed-by: Daniel Vetter Reviewed-by: Oleksandr Andrushchenko Link: http://patchwork.freedesktop.org/patch/msgid/20180529135918.19729-1-kraxel@redhat.com --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 085db2fee2d7..88917fa796e4 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -39,12 +39,12 @@ struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf - * @map_atomic: maps a page from the buffer into kernel address + * @map_atomic: [optional] maps a page from the buffer into kernel address * space, users may not block until the subsequent unmap call. * This callback must not sleep. * @unmap_atomic: [optional] unmaps a atomically mapped page from the buffer. * This Callback must not sleep. - * @map: maps a page from the buffer into kernel address space. + * @map: [optional] maps a page from the buffer into kernel address space. * @unmap: [optional] unmaps a page from the buffer. * @vmap: [optional] creates a virtual mapping for the buffer into kernel * address space. Same restrictions as for vmap and friends apply. -- cgit From 2d4f545cb14fa2d9865124183f3100d46dc9b3b5 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Mon, 4 Jun 2018 23:34:06 +0200 Subject: rfkill: Fixes and cleanup of kernel-doc in the header file Fixes kerneldoc parameter names to match implementation, rfkill_set_hw_state(), rfkill_set_sw_state(). Fix description of rfkill_resume_polling(). Fix typos in documentation of rfkill_find_type(). Consistently start kerneldoc description with uppercase letter. Signed-off-by: Peter Meerwald-Stadler Signed-off-by: Johannes Berg --- include/linux/rfkill.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index e6a0031d1b1f..8ad2487a86d5 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -66,7 +66,7 @@ struct rfkill_ops { #if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) /** - * rfkill_alloc - allocate rfkill structure + * rfkill_alloc - Allocate rfkill structure * @name: name of the struct -- the string is not copied internally * @parent: device that has rf switch on it * @type: type of the switch (RFKILL_TYPE_*) @@ -112,7 +112,7 @@ void rfkill_pause_polling(struct rfkill *rfkill); /** * rfkill_resume_polling(struct rfkill *rfkill) * - * Pause polling -- say transmitter is off for other reasons. + * Resume polling * NOTE: not necessary for suspend/resume -- in that case the * core stops polling anyway */ @@ -130,7 +130,7 @@ void rfkill_resume_polling(struct rfkill *rfkill); void rfkill_unregister(struct rfkill *rfkill); /** - * rfkill_destroy - free rfkill structure + * rfkill_destroy - Free rfkill structure * @rfkill: rfkill structure to be destroyed * * Destroys the rfkill structure. @@ -140,7 +140,7 @@ void rfkill_destroy(struct rfkill *rfkill); /** * rfkill_set_hw_state - Set the internal rfkill hardware block state * @rfkill: pointer to the rfkill class to modify. - * @state: the current hardware block state to set + * @blocked: the current hardware block state to set * * rfkill drivers that get events when the hard-blocked state changes * use this function to notify the rfkill core (and through that also @@ -161,7 +161,7 @@ bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked); /** * rfkill_set_sw_state - Set the internal rfkill software block state * @rfkill: pointer to the rfkill class to modify. - * @state: the current software block state to set + * @blocked: the current software block state to set * * rfkill drivers that get events when the soft-blocked state changes * (yes, some platforms directly act on input but allow changing again) @@ -183,7 +183,7 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked); /** * rfkill_init_sw_state - Initialize persistent software block state * @rfkill: pointer to the rfkill class to modify. - * @state: the current software block state to set + * @blocked: the current software block state to set * * rfkill drivers that preserve their software block state over power off * use this function to notify the rfkill core (and through that also @@ -208,17 +208,17 @@ void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked); void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw); /** - * rfkill_blocked - query rfkill block + * rfkill_blocked - Query rfkill block state * * @rfkill: rfkill struct to query */ bool rfkill_blocked(struct rfkill *rfkill); /** - * rfkill_find_type - Helpper for finding rfkill type by name + * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type * - * Returns enum rfkill_type that conrresponds the name. + * Returns enum rfkill_type that corresponds to the name. */ enum rfkill_type rfkill_find_type(const char *name); @@ -296,7 +296,7 @@ static inline enum rfkill_type rfkill_find_type(const char *name) const char *rfkill_get_led_trigger_name(struct rfkill *rfkill); /** - * rfkill_set_led_trigger_name -- set the LED trigger name + * rfkill_set_led_trigger_name - Set the LED trigger name * @rfkill: rfkill struct * @name: LED trigger name * -- cgit From c4cbaf7973a794839af080f13748335976cf3f3f Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 9 Jun 2018 09:14:42 +0300 Subject: cfg80211: Add support for HE Add support for the HE in cfg80211 and also add userspace API to nl80211 to send rate information out, conforming with P802.11ax_D2.0. Signed-off-by: Liad Kaufman Signed-off-by: Johannes Berg Signed-off-by: Ilan Peer Signed-off-by: Ido Yariv Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 427 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 8fe7e4306816..e6a6503bfa33 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1539,6 +1539,106 @@ struct ieee80211_vht_operation { __le16 basic_mcs_set; } __packed; +/** + * struct ieee80211_he_cap_elem - HE capabilities element + * + * This structure is the "HE capabilities element" fixed fields as + * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3 + */ +struct ieee80211_he_cap_elem { + u8 mac_cap_info[5]; + u8 phy_cap_info[9]; +} __packed; + +#define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN 5 + +/** + * enum ieee80211_he_mcs_support - HE MCS support definitions + * @IEEE80211_HE_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the + * number of streams + * @IEEE80211_HE_MCS_SUPPORT_0_9: MCSes 0-9 are supported + * @IEEE80211_HE_MCS_SUPPORT_0_11: MCSes 0-11 are supported + * @IEEE80211_HE_MCS_NOT_SUPPORTED: This number of streams isn't supported + * + * These definitions are used in each 2-bit subfield of the rx_mcs_* + * and tx_mcs_* fields of &struct ieee80211_he_mcs_nss_supp, which are + * both split into 8 subfields by number of streams. These values indicate + * which MCSes are supported for the number of streams the value appears + * for. + */ +enum ieee80211_he_mcs_support { + IEEE80211_HE_MCS_SUPPORT_0_7 = 0, + IEEE80211_HE_MCS_SUPPORT_0_9 = 1, + IEEE80211_HE_MCS_SUPPORT_0_11 = 2, + IEEE80211_HE_MCS_NOT_SUPPORTED = 3, +}; + +/** + * struct ieee80211_he_mcs_nss_supp - HE Tx/Rx HE MCS NSS Support Field + * + * This structure holds the data required for the Tx/Rx HE MCS NSS Support Field + * described in P802.11ax_D2.0 section 9.4.2.237.4 + * + * @rx_mcs_80: Rx MCS map 2 bits for each stream, total 8 streams, for channel + * widths less than 80MHz. + * @tx_mcs_80: Tx MCS map 2 bits for each stream, total 8 streams, for channel + * widths less than 80MHz. + * @rx_mcs_160: Rx MCS map 2 bits for each stream, total 8 streams, for channel + * width 160MHz. + * @tx_mcs_160: Tx MCS map 2 bits for each stream, total 8 streams, for channel + * width 160MHz. + * @rx_mcs_80p80: Rx MCS map 2 bits for each stream, total 8 streams, for + * channel width 80p80MHz. + * @tx_mcs_80p80: Tx MCS map 2 bits for each stream, total 8 streams, for + * channel width 80p80MHz. + */ +struct ieee80211_he_mcs_nss_supp { + __le16 rx_mcs_80; + __le16 tx_mcs_80; + __le16 rx_mcs_160; + __le16 tx_mcs_160; + __le16 rx_mcs_80p80; + __le16 tx_mcs_80p80; +} __packed; + +/** + * struct ieee80211_he_operation - HE capabilities element + * + * This structure is the "HE operation element" fields as + * described in P802.11ax_D2.0 section 9.4.2.238 + */ +struct ieee80211_he_operation { + __le32 he_oper_params; + __le16 he_mcs_nss_set; + /* Optional 0,1,3 or 4 bytes: depends on @he_oper_params */ + u8 optional[0]; +} __packed; + +/** + * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field + * + * This structure is the "MU AC Parameter Record" fields as + * described in P802.11ax_D2.0 section 9.4.2.240 + */ +struct ieee80211_he_mu_edca_param_ac_rec { + u8 aifsn; + u8 ecw_min_max; + u8 mu_edca_timer; +} __packed; + +/** + * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element + * + * This structure is the "MU EDCA Parameter Set element" fields as + * described in P802.11ax_D2.0 section 9.4.2.240 + */ +struct ieee80211_mu_edca_param_set { + u8 mu_qos_info; + struct ieee80211_he_mu_edca_param_ac_rec ac_be; + struct ieee80211_he_mu_edca_param_ac_rec ac_bk; + struct ieee80211_he_mu_edca_param_ac_rec ac_vi; + struct ieee80211_he_mu_edca_param_ac_rec ac_vo; +} __packed; /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 @@ -1577,6 +1677,328 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN 0x10000000 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN 0x20000000 +/* 802.11ax HE MAC capabilities */ +#define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 +#define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 +#define IEEE80211_HE_MAC_CAP0_TWT_RES 0x04 +#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_NOT_SUPP 0x00 +#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_1 0x08 +#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_2 0x10 +#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_3 0x18 +#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK 0x18 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_1 0x00 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_2 0x20 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_4 0x40 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_8 0x60 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_16 0x80 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_32 0xa0 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_64 0xc0 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_UNLIMITED 0xe0 +#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_MASK 0xe0 + +#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_UNLIMITED 0x00 +#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_128 0x01 +#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_256 0x02 +#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_512 0x03 +#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_MASK 0x03 +#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_0US 0x00 +#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US 0x04 +#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US 0x08 +#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK 0x0c +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1 0x00 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2 0x10 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3 0x20 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4 0x30 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5 0x40 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6 0x50 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7 0x60 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8 0x70 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK 0x70 + +/* Link adaptation is split between byte HE_MAC_CAP1 and + * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE + * in which case the following values apply: + * 0 = No feedback. + * 1 = reserved. + * 2 = Unsolicited feedback. + * 3 = both + */ +#define IEEE80211_HE_MAC_CAP1_LINK_ADAPTATION 0x80 + +#define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION 0x01 +#define IEEE80211_HE_MAC_CAP2_ALL_ACK 0x02 +#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED 0x04 +#define IEEE80211_HE_MAC_CAP2_BSR 0x08 +#define IEEE80211_HE_MAC_CAP2_BCAST_TWT 0x10 +#define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP 0x20 +#define IEEE80211_HE_MAC_CAP2_MU_CASCADING 0x40 +#define IEEE80211_HE_MAC_CAP2_ACK_EN 0x80 + +#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU 0x01 +#define IEEE80211_HE_MAC_CAP3_OMI_CONTROL 0x02 +#define IEEE80211_HE_MAC_CAP3_OFDMA_RA 0x04 + +/* The maximum length of an A-MDPU is defined by the combination of the Maximum + * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the + * same field in the HE capabilities. + */ +#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT 0x00 +#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1 0x08 +#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2 0x10 +#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED 0x18 +#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK 0x18 +#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG 0x20 +#define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED 0x40 +#define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS 0x80 + +#define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 +#define IEEE80211_HE_MAC_CAP4_QTP 0x02 +#define IEEE80211_HE_MAC_CAP4_BQR 0x04 +#define IEEE80211_HE_MAC_CAP4_SR_RESP 0x08 +#define IEEE80211_HE_MAC_CAP4_NDP_FB_REP 0x10 +#define IEEE80211_HE_MAC_CAP4_OPS 0x20 +#define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU 0x40 + +/* 802.11ax HE PHY capabilities */ +#define IEEE80211_HE_PHY_CAP0_DUAL_BAND 0x01 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G 0x02 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G 0x08 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G 0x10 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G 0x20 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G 0x40 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK 0xfe + +#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ 0x01 +#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ 0x02 +#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ 0x04 +#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ 0x08 +#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK 0x0f +#define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A 0x10 +#define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD 0x20 +#define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US 0x40 +/* Midamble RX Max NSTS is split between byte #2 and byte #3 */ +#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS 0x80 + +#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS 0x01 +#define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US 0x02 +#define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ 0x04 +#define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ 0x08 +#define IEEE80211_HE_PHY_CAP2_DOPPLER_TX 0x10 +#define IEEE80211_HE_PHY_CAP2_DOPPLER_RX 0x20 + +/* Note that the meaning of UL MU below is different between an AP and a non-AP + * sta, where in the AP case it indicates support for Rx and in the non-AP sta + * case it indicates support for Tx. + */ +#define IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO 0x40 +#define IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO 0x80 + +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM 0x00 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK 0x01 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK 0x02 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM 0x03 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK 0x03 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 0x00 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2 0x04 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM 0x00 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK 0x08 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK 0x10 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM 0x18 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK 0x18 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1 0x00 +#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2 0x20 +#define IEEE80211_HE_PHY_CAP3_RX_HE_MU_PPDU_FROM_NON_AP_STA 0x40 +#define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER 0x80 + +#define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE 0x01 +#define IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER 0x02 + +/* Minimal allowed value of Max STS under 80MHz is 3 */ +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 0x0c +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5 0x10 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_6 0x14 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_7 0x18 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8 0x1c +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK 0x1c + +/* Minimal allowed value of Max STS above 80MHz is 3 */ +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4 0x60 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5 0x80 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_6 0xa0 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_7 0xc0 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8 0xe0 +#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_MASK 0xe0 + +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_1 0x00 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 0x01 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_3 0x02 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_4 0x03 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_5 0x04 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_6 0x05 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_7 0x06 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_8 0x07 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK 0x07 + +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_1 0x00 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2 0x08 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_3 0x10 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_4 0x18 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_5 0x20 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_6 0x28 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_7 0x30 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_8 0x38 +#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK 0x38 + +#define IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK 0x40 +#define IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK 0x80 + +#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU 0x01 +#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU 0x02 +#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB 0x04 +#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB 0x08 +#define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB 0x10 +#define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE 0x20 +#define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO 0x40 +#define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT 0x80 + +#define IEEE80211_HE_PHY_CAP7_SRP_BASED_SR 0x01 +#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR 0x02 +#define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI 0x04 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_1 0x08 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_2 0x10 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_3 0x18 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_4 0x20 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_5 0x28 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_6 0x30 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_7 0x38 +#define IEEE80211_HE_PHY_CAP7_MAX_NC_MASK 0x38 +#define IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ 0x40 +#define IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ 0x80 + +#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI 0x01 +#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G 0x02 +#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU 0x04 +#define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU 0x08 +#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI 0x10 +#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF 0x20 + +/* 802.11ax HE TX/RX MCS NSS Support */ +#define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS (3) +#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_POS (6) +#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_POS (11) +#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_MASK 0x07c0 +#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_MASK 0xf800 + +/* TX/RX HE MCS Support field Highest MCS subfield encoding */ +enum ieee80211_he_highest_mcs_supported_subfield_enc { + HIGHEST_MCS_SUPPORTED_MCS7 = 0, + HIGHEST_MCS_SUPPORTED_MCS8, + HIGHEST_MCS_SUPPORTED_MCS9, + HIGHEST_MCS_SUPPORTED_MCS10, + HIGHEST_MCS_SUPPORTED_MCS11, +}; + +/* Calculate 802.11ax HE capabilities IE Tx/Rx HE MCS NSS Support Field size */ +static inline u8 +ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap) +{ + u8 count = 4; + + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) + count += 4; + + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) + count += 4; + + return count; +} + +/* 802.11ax HE PPE Thresholds */ +#define IEEE80211_PPE_THRES_NSS_SUPPORT_2NSS (1) +#define IEEE80211_PPE_THRES_NSS_POS (0) +#define IEEE80211_PPE_THRES_NSS_MASK (7) +#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_2x966_AND_966_RU \ + (BIT(5) | BIT(6)) +#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK 0x78 +#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS (3) +#define IEEE80211_PPE_THRES_INFO_PPET_SIZE (3) + +/* + * Calculate 802.11ax HE capabilities IE PPE field size + * Input: Header byte of ppe_thres (first byte), and HE capa IE's PHY cap u8* + */ +static inline u8 +ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) +{ + u8 n; + + if ((phy_cap_info[6] & + IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0) + return 0; + + n = hweight8(ppe_thres_hdr & + IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK); + n *= (1 + ((ppe_thres_hdr & IEEE80211_PPE_THRES_NSS_MASK) >> + IEEE80211_PPE_THRES_NSS_POS)); + + /* + * Each pair is 6 bits, and we need to add the 7 "header" bits to the + * total size. + */ + n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7; + n = DIV_ROUND_UP(n, 8); + + return n; +} + +/* HE Operation defines */ +#define IEEE80211_HE_OPERATION_BSS_COLOR_MASK 0x0000003f +#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK 0x000001c0 +#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET 6 +#define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000200 +#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK 0x000ffc00 +#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET 10 +#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x000100000 +#define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x000200000 +#define IEEE80211_HE_OPERATION_MULTI_BSSID_AP 0x10000000 +#define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR 0x20000000 +#define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x40000000 + +/* + * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size + * @he_oper_ie: byte data of the He Operations IE, stating from the the byte + * after the ext ID byte. It is assumed that he_oper_ie has at least + * sizeof(struct ieee80211_he_operation) bytes, checked already in + * ieee802_11_parse_elems_crc() + * @return the actual size of the IE data (not including header), or 0 on error + */ +static inline u8 +ieee80211_he_oper_size(const u8 *he_oper_ie) +{ + struct ieee80211_he_operation *he_oper = (void *)he_oper_ie; + u8 oper_len = sizeof(struct ieee80211_he_operation); + u32 he_oper_params; + + /* Make sure the input is not NULL */ + if (!he_oper_ie) + return 0; + + /* Calc required length */ + he_oper_params = le32_to_cpu(he_oper->he_oper_params); + if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO) + oper_len += 3; + if (he_oper_params & IEEE80211_HE_OPERATION_MULTI_BSSID_AP) + oper_len++; + + /* Add the first byte (extension ID) to the total length */ + oper_len++; + + return oper_len; +} + /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 @@ -1992,6 +2414,11 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, WLAN_EID_EXT_FILS_NONCE = 13, + WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14, + WLAN_EID_EXT_HE_CAPABILITY = 35, + WLAN_EID_EXT_HE_OPERATION = 36, + WLAN_EID_EXT_UORA = 37, + WLAN_EID_EXT_HE_MU_EDCA = 38, }; /* Action category code */ -- cgit From eb3744a2dd01cb07ce9f556d56d6fe451f0c313a Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 13 Jun 2018 09:10:37 +0530 Subject: gpio: davinci: Do not assume continuous IRQ numbering Currently the driver assumes that the interrupts are continuous and does platform_get_irq only once and assumes the rest are continuous, instead call platform_get_irq for all the interrupts and store them in an array for later use. Signed-off-by: Keerthy Reviewed-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-davinci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index 90ae19ca828f..57a5a35e0073 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -22,6 +22,7 @@ #include #define MAX_REGS_BANKS 5 +#define MAX_INT_PER_BANK 32 struct davinci_gpio_platform_data { u32 ngpio; @@ -41,7 +42,7 @@ struct davinci_gpio_controller { spinlock_t lock; void __iomem *regs[MAX_REGS_BANKS]; int gpio_unbanked; - unsigned int base_irq; + int irqs[MAX_INT_PER_BANK]; unsigned int base; }; -- cgit From 90b39402e9f31c4aab48dc1a43d85a724065793f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 1 Jun 2018 13:21:27 +0200 Subject: gpio: Add API to explicitly name a consumer The GPIO (descriptor) API registers a "label" naming what is currently using the GPIO line. Typically this is taken from things like the device tree node, so "reset-gpios" will result in he line being labeled "reset". The technical effect is pretty much zero: the use is for debug and introspection, such as "lsgpio" and debugfs files. However sometimes the user want this cuddly feeling of listing all GPIO lines and seeing exactly what they are for and it gives a very fulfilling sense of control. Especially in the cases when the device tree node doesn't provide a good name, or anonymous GPIO lines assigned just to "gpios" in the device tree because the usage is implicit. For these cases it may be nice to be able to label the line directly and explicitly. Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 243112c7fa7d..e8aaf34dd65d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -145,6 +145,7 @@ int gpiod_is_active_low(const struct gpio_desc *desc); int gpiod_cansleep(const struct gpio_desc *desc); int gpiod_to_irq(const struct gpio_desc *desc); +void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name); /* Convert between the old gpio_ and new gpiod_ interfaces */ struct gpio_desc *gpio_to_desc(unsigned gpio); @@ -467,6 +468,12 @@ static inline int gpiod_to_irq(const struct gpio_desc *desc) return -EINVAL; } +static inline void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name) +{ + /* GPIO can never have been requested */ + WARN_ON(1); +} + static inline struct gpio_desc *gpio_to_desc(unsigned gpio) { return ERR_PTR(-EINVAL); -- cgit From 297101ab85841319aac2c7843ca755d650c1964f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 15 Jun 2018 13:44:53 +0200 Subject: regulator: pfuze100: add pfuze3001 support This extends the pfuze100 driver with pfuze3001 support. Latest datasheet: https://www.nxp.com/docs/en/data-sheet/PF3001.pdf Signed-off-by: Robin Gong Signed-off-by: Stefan Wahren Signed-off-by: Mark Brown --- include/linux/regulator/pfuze100.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h index e0ccf46f66cf..cb5aecd40f07 100644 --- a/include/linux/regulator/pfuze100.h +++ b/include/linux/regulator/pfuze100.h @@ -64,6 +64,17 @@ #define PFUZE3000_VLDO3 11 #define PFUZE3000_VLDO4 12 +#define PFUZE3001_SW1 0 +#define PFUZE3001_SW2 1 +#define PFUZE3001_SW3 2 +#define PFUZE3001_VSNVS 3 +#define PFUZE3001_VLDO1 4 +#define PFUZE3001_VLDO2 5 +#define PFUZE3001_VCCSD 6 +#define PFUZE3001_V33 7 +#define PFUZE3001_VLDO3 8 +#define PFUZE3001_VLDO4 9 + struct regulator_init_data; struct pfuze_regulator_platform_data { -- cgit From 7bd0c7ba62e8a9840f15fc4ff0122b29fe1b6413 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 7 Jun 2018 16:51:38 -0700 Subject: regulator: Fix typo in comment of struct regulator_linear_range regulator_map_linar_range() => regulator_map_linear_range() Signed-off-by: Matthias Kaehlcke Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index fc2dc8df476f..dea96ee39fdc 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -46,7 +46,7 @@ enum regulator_status { /** * struct regulator_linear_range - specify linear voltage ranges * - * Specify a range of voltages for regulator_map_linar_range() and + * Specify a range of voltages for regulator_map_linear_range() and * regulator_list_linear_range(). * * @min_uV: Lowest voltage in range -- cgit From 3e247b0fb540a9a503a71ef6b918a3443e49655f Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 7 Jun 2018 19:40:32 +0000 Subject: spi: remove unused adi_spi3.h header include/linux/spi/adi_spi3.h is unused since commit 47838669de9d ("spi: remove blackfin related host drivers") Finish the cleaning by removing it. Signed-off-by: Corentin Labbe Signed-off-by: Mark Brown --- include/linux/spi/adi_spi3.h | 254 ------------------------------------------- 1 file changed, 254 deletions(-) delete mode 100644 include/linux/spi/adi_spi3.h (limited to 'include/linux') diff --git a/include/linux/spi/adi_spi3.h b/include/linux/spi/adi_spi3.h deleted file mode 100644 index c84123aa1d06..000000000000 --- a/include/linux/spi/adi_spi3.h +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Analog Devices SPI3 controller driver - * - * Copyright (c) 2014 Analog Devices Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _ADI_SPI3_H_ -#define _ADI_SPI3_H_ - -#include - -/* SPI_CONTROL */ -#define SPI_CTL_EN 0x00000001 /* Enable */ -#define SPI_CTL_MSTR 0x00000002 /* Master/Slave */ -#define SPI_CTL_PSSE 0x00000004 /* controls modf error in master mode */ -#define SPI_CTL_ODM 0x00000008 /* Open Drain Mode */ -#define SPI_CTL_CPHA 0x00000010 /* Clock Phase */ -#define SPI_CTL_CPOL 0x00000020 /* Clock Polarity */ -#define SPI_CTL_ASSEL 0x00000040 /* Slave Select Pin Control */ -#define SPI_CTL_SELST 0x00000080 /* Slave Select Polarity in-between transfers */ -#define SPI_CTL_EMISO 0x00000100 /* Enable MISO */ -#define SPI_CTL_SIZE 0x00000600 /* Word Transfer Size */ -#define SPI_CTL_SIZE08 0x00000000 /* SIZE: 8 bits */ -#define SPI_CTL_SIZE16 0x00000200 /* SIZE: 16 bits */ -#define SPI_CTL_SIZE32 0x00000400 /* SIZE: 32 bits */ -#define SPI_CTL_LSBF 0x00001000 /* LSB First */ -#define SPI_CTL_FCEN 0x00002000 /* Flow-Control Enable */ -#define SPI_CTL_FCCH 0x00004000 /* Flow-Control Channel Selection */ -#define SPI_CTL_FCPL 0x00008000 /* Flow-Control Polarity */ -#define SPI_CTL_FCWM 0x00030000 /* Flow-Control Water-Mark */ -#define SPI_CTL_FIFO0 0x00000000 /* FCWM: TFIFO empty or RFIFO Full */ -#define SPI_CTL_FIFO1 0x00010000 /* FCWM: TFIFO 75% or more empty or RFIFO 75% or more full */ -#define SPI_CTL_FIFO2 0x00020000 /* FCWM: TFIFO 50% or more empty or RFIFO 50% or more full */ -#define SPI_CTL_FMODE 0x00040000 /* Fast-mode Enable */ -#define SPI_CTL_MIOM 0x00300000 /* Multiple I/O Mode */ -#define SPI_CTL_MIO_DIS 0x00000000 /* MIOM: Disable */ -#define SPI_CTL_MIO_DUAL 0x00100000 /* MIOM: Enable DIOM (Dual I/O Mode) */ -#define SPI_CTL_MIO_QUAD 0x00200000 /* MIOM: Enable QUAD (Quad SPI Mode) */ -#define SPI_CTL_SOSI 0x00400000 /* Start on MOSI */ -/* SPI_RX_CONTROL */ -#define SPI_RXCTL_REN 0x00000001 /* Receive Channel Enable */ -#define SPI_RXCTL_RTI 0x00000004 /* Receive Transfer Initiate */ -#define SPI_RXCTL_RWCEN 0x00000008 /* Receive Word Counter Enable */ -#define SPI_RXCTL_RDR 0x00000070 /* Receive Data Request */ -#define SPI_RXCTL_RDR_DIS 0x00000000 /* RDR: Disabled */ -#define SPI_RXCTL_RDR_NE 0x00000010 /* RDR: RFIFO not empty */ -#define SPI_RXCTL_RDR_25 0x00000020 /* RDR: RFIFO 25% full */ -#define SPI_RXCTL_RDR_50 0x00000030 /* RDR: RFIFO 50% full */ -#define SPI_RXCTL_RDR_75 0x00000040 /* RDR: RFIFO 75% full */ -#define SPI_RXCTL_RDR_FULL 0x00000050 /* RDR: RFIFO full */ -#define SPI_RXCTL_RDO 0x00000100 /* Receive Data Over-Run */ -#define SPI_RXCTL_RRWM 0x00003000 /* FIFO Regular Water-Mark */ -#define SPI_RXCTL_RWM_0 0x00000000 /* RRWM: RFIFO Empty */ -#define SPI_RXCTL_RWM_25 0x00001000 /* RRWM: RFIFO 25% full */ -#define SPI_RXCTL_RWM_50 0x00002000 /* RRWM: RFIFO 50% full */ -#define SPI_RXCTL_RWM_75 0x00003000 /* RRWM: RFIFO 75% full */ -#define SPI_RXCTL_RUWM 0x00070000 /* FIFO Urgent Water-Mark */ -#define SPI_RXCTL_UWM_DIS 0x00000000 /* RUWM: Disabled */ -#define SPI_RXCTL_UWM_25 0x00010000 /* RUWM: RFIFO 25% full */ -#define SPI_RXCTL_UWM_50 0x00020000 /* RUWM: RFIFO 50% full */ -#define SPI_RXCTL_UWM_75 0x00030000 /* RUWM: RFIFO 75% full */ -#define SPI_RXCTL_UWM_FULL 0x00040000 /* RUWM: RFIFO full */ -/* SPI_TX_CONTROL */ -#define SPI_TXCTL_TEN 0x00000001 /* Transmit Channel Enable */ -#define SPI_TXCTL_TTI 0x00000004 /* Transmit Transfer Initiate */ -#define SPI_TXCTL_TWCEN 0x00000008 /* Transmit Word Counter Enable */ -#define SPI_TXCTL_TDR 0x00000070 /* Transmit Data Request */ -#define SPI_TXCTL_TDR_DIS 0x00000000 /* TDR: Disabled */ -#define SPI_TXCTL_TDR_NF 0x00000010 /* TDR: TFIFO not full */ -#define SPI_TXCTL_TDR_25 0x00000020 /* TDR: TFIFO 25% empty */ -#define SPI_TXCTL_TDR_50 0x00000030 /* TDR: TFIFO 50% empty */ -#define SPI_TXCTL_TDR_75 0x00000040 /* TDR: TFIFO 75% empty */ -#define SPI_TXCTL_TDR_EMPTY 0x00000050 /* TDR: TFIFO empty */ -#define SPI_TXCTL_TDU 0x00000100 /* Transmit Data Under-Run */ -#define SPI_TXCTL_TRWM 0x00003000 /* FIFO Regular Water-Mark */ -#define SPI_TXCTL_RWM_FULL 0x00000000 /* TRWM: TFIFO full */ -#define SPI_TXCTL_RWM_25 0x00001000 /* TRWM: TFIFO 25% empty */ -#define SPI_TXCTL_RWM_50 0x00002000 /* TRWM: TFIFO 50% empty */ -#define SPI_TXCTL_RWM_75 0x00003000 /* TRWM: TFIFO 75% empty */ -#define SPI_TXCTL_TUWM 0x00070000 /* FIFO Urgent Water-Mark */ -#define SPI_TXCTL_UWM_DIS 0x00000000 /* TUWM: Disabled */ -#define SPI_TXCTL_UWM_25 0x00010000 /* TUWM: TFIFO 25% empty */ -#define SPI_TXCTL_UWM_50 0x00020000 /* TUWM: TFIFO 50% empty */ -#define SPI_TXCTL_UWM_75 0x00030000 /* TUWM: TFIFO 75% empty */ -#define SPI_TXCTL_UWM_EMPTY 0x00040000 /* TUWM: TFIFO empty */ -/* SPI_CLOCK */ -#define SPI_CLK_BAUD 0x0000FFFF /* Baud Rate */ -/* SPI_DELAY */ -#define SPI_DLY_STOP 0x000000FF /* Transfer delay time in multiples of SCK period */ -#define SPI_DLY_LEADX 0x00000100 /* Extended (1 SCK) LEAD Control */ -#define SPI_DLY_LAGX 0x00000200 /* Extended (1 SCK) LAG control */ -/* SPI_SSEL */ -#define SPI_SLVSEL_SSE1 0x00000002 /* SPISSEL1 Enable */ -#define SPI_SLVSEL_SSE2 0x00000004 /* SPISSEL2 Enable */ -#define SPI_SLVSEL_SSE3 0x00000008 /* SPISSEL3 Enable */ -#define SPI_SLVSEL_SSE4 0x00000010 /* SPISSEL4 Enable */ -#define SPI_SLVSEL_SSE5 0x00000020 /* SPISSEL5 Enable */ -#define SPI_SLVSEL_SSE6 0x00000040 /* SPISSEL6 Enable */ -#define SPI_SLVSEL_SSE7 0x00000080 /* SPISSEL7 Enable */ -#define SPI_SLVSEL_SSEL1 0x00000200 /* SPISSEL1 Value */ -#define SPI_SLVSEL_SSEL2 0x00000400 /* SPISSEL2 Value */ -#define SPI_SLVSEL_SSEL3 0x00000800 /* SPISSEL3 Value */ -#define SPI_SLVSEL_SSEL4 0x00001000 /* SPISSEL4 Value */ -#define SPI_SLVSEL_SSEL5 0x00002000 /* SPISSEL5 Value */ -#define SPI_SLVSEL_SSEL6 0x00004000 /* SPISSEL6 Value */ -#define SPI_SLVSEL_SSEL7 0x00008000 /* SPISSEL7 Value */ -/* SPI_RWC */ -#define SPI_RWC_VALUE 0x0000FFFF /* Received Word-Count */ -/* SPI_RWCR */ -#define SPI_RWCR_VALUE 0x0000FFFF /* Received Word-Count Reload */ -/* SPI_TWC */ -#define SPI_TWC_VALUE 0x0000FFFF /* Transmitted Word-Count */ -/* SPI_TWCR */ -#define SPI_TWCR_VALUE 0x0000FFFF /* Transmitted Word-Count Reload */ -/* SPI_IMASK */ -#define SPI_IMSK_RUWM 0x00000002 /* Receive Urgent Water-Mark Interrupt Mask */ -#define SPI_IMSK_TUWM 0x00000004 /* Transmit Urgent Water-Mark Interrupt Mask */ -#define SPI_IMSK_ROM 0x00000010 /* Receive Over-Run Error Interrupt Mask */ -#define SPI_IMSK_TUM 0x00000020 /* Transmit Under-Run Error Interrupt Mask */ -#define SPI_IMSK_TCM 0x00000040 /* Transmit Collision Error Interrupt Mask */ -#define SPI_IMSK_MFM 0x00000080 /* Mode Fault Error Interrupt Mask */ -#define SPI_IMSK_RSM 0x00000100 /* Receive Start Interrupt Mask */ -#define SPI_IMSK_TSM 0x00000200 /* Transmit Start Interrupt Mask */ -#define SPI_IMSK_RFM 0x00000400 /* Receive Finish Interrupt Mask */ -#define SPI_IMSK_TFM 0x00000800 /* Transmit Finish Interrupt Mask */ -/* SPI_IMASKCL */ -#define SPI_IMSK_CLR_RUW 0x00000002 /* Receive Urgent Water-Mark Interrupt Mask */ -#define SPI_IMSK_CLR_TUWM 0x00000004 /* Transmit Urgent Water-Mark Interrupt Mask */ -#define SPI_IMSK_CLR_ROM 0x00000010 /* Receive Over-Run Error Interrupt Mask */ -#define SPI_IMSK_CLR_TUM 0x00000020 /* Transmit Under-Run Error Interrupt Mask */ -#define SPI_IMSK_CLR_TCM 0x00000040 /* Transmit Collision Error Interrupt Mask */ -#define SPI_IMSK_CLR_MFM 0x00000080 /* Mode Fault Error Interrupt Mask */ -#define SPI_IMSK_CLR_RSM 0x00000100 /* Receive Start Interrupt Mask */ -#define SPI_IMSK_CLR_TSM 0x00000200 /* Transmit Start Interrupt Mask */ -#define SPI_IMSK_CLR_RFM 0x00000400 /* Receive Finish Interrupt Mask */ -#define SPI_IMSK_CLR_TFM 0x00000800 /* Transmit Finish Interrupt Mask */ -/* SPI_IMASKST */ -#define SPI_IMSK_SET_RUWM 0x00000002 /* Receive Urgent Water-Mark Interrupt Mask */ -#define SPI_IMSK_SET_TUWM 0x00000004 /* Transmit Urgent Water-Mark Interrupt Mask */ -#define SPI_IMSK_SET_ROM 0x00000010 /* Receive Over-Run Error Interrupt Mask */ -#define SPI_IMSK_SET_TUM 0x00000020 /* Transmit Under-Run Error Interrupt Mask */ -#define SPI_IMSK_SET_TCM 0x00000040 /* Transmit Collision Error Interrupt Mask */ -#define SPI_IMSK_SET_MFM 0x00000080 /* Mode Fault Error Interrupt Mask */ -#define SPI_IMSK_SET_RSM 0x00000100 /* Receive Start Interrupt Mask */ -#define SPI_IMSK_SET_TSM 0x00000200 /* Transmit Start Interrupt Mask */ -#define SPI_IMSK_SET_RFM 0x00000400 /* Receive Finish Interrupt Mask */ -#define SPI_IMSK_SET_TFM 0x00000800 /* Transmit Finish Interrupt Mask */ -/* SPI_STATUS */ -#define SPI_STAT_SPIF 0x00000001 /* SPI Finished */ -#define SPI_STAT_RUWM 0x00000002 /* Receive Urgent Water-Mark Breached */ -#define SPI_STAT_TUWM 0x00000004 /* Transmit Urgent Water-Mark Breached */ -#define SPI_STAT_ROE 0x00000010 /* Receive Over-Run Error Indication */ -#define SPI_STAT_TUE 0x00000020 /* Transmit Under-Run Error Indication */ -#define SPI_STAT_TCE 0x00000040 /* Transmit Collision Error Indication */ -#define SPI_STAT_MODF 0x00000080 /* Mode Fault Error Indication */ -#define SPI_STAT_RS 0x00000100 /* Receive Start Indication */ -#define SPI_STAT_TS 0x00000200 /* Transmit Start Indication */ -#define SPI_STAT_RF 0x00000400 /* Receive Finish Indication */ -#define SPI_STAT_TF 0x00000800 /* Transmit Finish Indication */ -#define SPI_STAT_RFS 0x00007000 /* SPI_RFIFO status */ -#define SPI_STAT_RFIFO_EMPTY 0x00000000 /* RFS: RFIFO Empty */ -#define SPI_STAT_RFIFO_25 0x00001000 /* RFS: RFIFO 25% Full */ -#define SPI_STAT_RFIFO_50 0x00002000 /* RFS: RFIFO 50% Full */ -#define SPI_STAT_RFIFO_75 0x00003000 /* RFS: RFIFO 75% Full */ -#define SPI_STAT_RFIFO_FULL 0x00004000 /* RFS: RFIFO Full */ -#define SPI_STAT_TFS 0x00070000 /* SPI_TFIFO status */ -#define SPI_STAT_TFIFO_FULL 0x00000000 /* TFS: TFIFO full */ -#define SPI_STAT_TFIFO_25 0x00010000 /* TFS: TFIFO 25% empty */ -#define SPI_STAT_TFIFO_50 0x00020000 /* TFS: TFIFO 50% empty */ -#define SPI_STAT_TFIFO_75 0x00030000 /* TFS: TFIFO 75% empty */ -#define SPI_STAT_TFIFO_EMPTY 0x00040000 /* TFS: TFIFO empty */ -#define SPI_STAT_FCS 0x00100000 /* Flow-Control Stall Indication */ -#define SPI_STAT_RFE 0x00400000 /* SPI_RFIFO Empty */ -#define SPI_STAT_TFF 0x00800000 /* SPI_TFIFO Full */ -/* SPI_ILAT */ -#define SPI_ILAT_RUWMI 0x00000002 /* Receive Urgent Water Mark Interrupt */ -#define SPI_ILAT_TUWMI 0x00000004 /* Transmit Urgent Water Mark Interrupt */ -#define SPI_ILAT_ROI 0x00000010 /* Receive Over-Run Error Indication */ -#define SPI_ILAT_TUI 0x00000020 /* Transmit Under-Run Error Indication */ -#define SPI_ILAT_TCI 0x00000040 /* Transmit Collision Error Indication */ -#define SPI_ILAT_MFI 0x00000080 /* Mode Fault Error Indication */ -#define SPI_ILAT_RSI 0x00000100 /* Receive Start Indication */ -#define SPI_ILAT_TSI 0x00000200 /* Transmit Start Indication */ -#define SPI_ILAT_RFI 0x00000400 /* Receive Finish Indication */ -#define SPI_ILAT_TFI 0x00000800 /* Transmit Finish Indication */ -/* SPI_ILATCL */ -#define SPI_ILAT_CLR_RUWMI 0x00000002 /* Receive Urgent Water Mark Interrupt */ -#define SPI_ILAT_CLR_TUWMI 0x00000004 /* Transmit Urgent Water Mark Interrupt */ -#define SPI_ILAT_CLR_ROI 0x00000010 /* Receive Over-Run Error Indication */ -#define SPI_ILAT_CLR_TUI 0x00000020 /* Transmit Under-Run Error Indication */ -#define SPI_ILAT_CLR_TCI 0x00000040 /* Transmit Collision Error Indication */ -#define SPI_ILAT_CLR_MFI 0x00000080 /* Mode Fault Error Indication */ -#define SPI_ILAT_CLR_RSI 0x00000100 /* Receive Start Indication */ -#define SPI_ILAT_CLR_TSI 0x00000200 /* Transmit Start Indication */ -#define SPI_ILAT_CLR_RFI 0x00000400 /* Receive Finish Indication */ -#define SPI_ILAT_CLR_TFI 0x00000800 /* Transmit Finish Indication */ - -/* - * adi spi3 registers layout - */ -struct adi_spi_regs { - u32 revid; - u32 control; - u32 rx_control; - u32 tx_control; - u32 clock; - u32 delay; - u32 ssel; - u32 rwc; - u32 rwcr; - u32 twc; - u32 twcr; - u32 reserved0; - u32 emask; - u32 emaskcl; - u32 emaskst; - u32 reserved1; - u32 status; - u32 elat; - u32 elatcl; - u32 reserved2; - u32 rfifo; - u32 reserved3; - u32 tfifo; -}; - -#define MAX_CTRL_CS 8 /* cs in spi controller */ - -/* device.platform_data for SSP controller devices */ -struct adi_spi3_master { - u16 num_chipselect; - u16 pin_req[7]; -}; - -/* spi_board_info.controller_data for SPI slave devices, - * copied to spi_device.platform_data ... mostly for dma tuning - */ -struct adi_spi3_chip { - u32 control; - u16 cs_chg_udelay; /* Some devices require 16-bit delays */ - u32 tx_dummy_val; /* tx value for rx only transfer */ - bool enable_dma; -}; - -#endif /* _ADI_SPI3_H_ */ -- cgit From cf85ec88701610f0de2e959ac447d9613462fbc7 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 9 Jun 2018 16:18:16 +0300 Subject: removed extra extern file_fdatawait_range Jeff added this extern twice in commit a823e4589e68 Fixes: a823e4589e68 ("mm: add file_fdatawait_range and file_write_and_wait") Signed-off-by: Vasily Averin Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..f140c11d35dd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2623,8 +2623,6 @@ static inline int filemap_fdatawait(struct address_space *mapping) extern bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); -extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, - loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); -- cgit From 420c0117db25db38b72b6230223f7a976d3070ea Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 17 Jun 2018 19:02:04 +0200 Subject: dmaengine: pxa: use a dma slave map In order to remove the specific knowledge of the dma mapping from PXA drivers, add a default slave map for pxa architectures. This won't impact MMP architecture, but is aimed only at all PXA boards. This is the first step, and once all drivers are converted, pxad_filter_fn() will be made static, and the DMA resources removed from device.c. Signed-off-by: Robert Jarzmik Reported-by: Arnd Bergmann Acked-by: Vinod Koul --- include/linux/platform_data/mmp_dma.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmp_dma.h b/include/linux/platform_data/mmp_dma.h index d1397c8ed94e..6397b9c8149a 100644 --- a/include/linux/platform_data/mmp_dma.h +++ b/include/linux/platform_data/mmp_dma.h @@ -12,9 +12,13 @@ #ifndef MMP_DMA_H #define MMP_DMA_H +struct dma_slave_map; + struct mmp_dma_platdata { int dma_channels; int nb_requestors; + int slave_map_cnt; + const struct dma_slave_map *slave_map; }; #endif /* MMP_DMA_H */ -- cgit From b6d1a17f4729e4fda5740a855da91d202db2c118 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 17 Jun 2018 19:02:14 +0200 Subject: dmaengine: pxa: document pxad_param Add some documentation for the pxad_param structure, and describe the contract behind the minimal required priority of a DMA channel. Signed-off-by: Robert Jarzmik Acked-by: Vinod Koul --- include/linux/dma/pxa-dma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h index e56ec7af4fd7..9fc594f69eff 100644 --- a/include/linux/dma/pxa-dma.h +++ b/include/linux/dma/pxa-dma.h @@ -9,6 +9,15 @@ enum pxad_chan_prio { PXAD_PRIO_LOWEST, }; +/** + * struct pxad_param - dma channel request parameters + * @drcmr: requestor line number + * @prio: minimal mandatory priority of the channel + * + * If a requested channel is granted, its priority will be at least @prio, + * ie. if PXAD_PRIO_LOW is required, the requested channel will be either + * PXAD_PRIO_LOW, PXAD_PRIO_NORMAL or PXAD_PRIO_HIGHEST. + */ struct pxad_param { unsigned int drcmr; enum pxad_chan_prio prio; -- cgit From cd31b80736852d34bc1072f3e579a6fd73a244e7 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 17 Jun 2018 19:02:17 +0200 Subject: ARM: pxa: change SSP DMA channels allocation Now the dma_slave_map is available for PXA architecture, switch the SSP device to it. This specifically means that : - for platform data based machines, the DMA requestor channels are extracted from the slave map, where pxa-ssp-dai. is a 1-1 match to ssp., and the channels are either "rx" or "tx". - for device tree platforms, the dma node should be hooked into the pxa2xx-ac97 or pxa-ssp-dai node. Signed-off-by: Robert Jarzmik Acked-by: Daniel Mack --- include/linux/pxa2xx_ssp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 8461b18e4608..03a7ca46735b 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -212,8 +212,6 @@ struct ssp_device { int type; int use_count; int irq; - int drcmr_rx; - int drcmr_tx; struct device_node *of_node; }; -- cgit From b8042b3da925f390c1482bf9dc0898dc0b3ea7b5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Jun 2018 22:39:29 +0200 Subject: ieee80211: bump IEEE80211_MAX_AMPDU_BUF to support HE Bump the IEEE80211_MAX_AMPDU_BUF size to 0x100 for HE support and - for now - use IEEE80211_MAX_AMPDU_BUF_HT everywhere. This is derived from my internal patch, parts of which Luca had sent upstream. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e6a6503bfa33..9c03a7d5e400 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1433,11 +1433,13 @@ struct ieee80211_ht_operation { #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 /* - * A-PMDU buffer sizes - * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) + * A-MPDU buffer sizes + * According to HT size varies from 8 to 64 frames + * HE adds the ability to have up to 256 frames. */ -#define IEEE80211_MIN_AMPDU_BUF 0x8 -#define IEEE80211_MAX_AMPDU_BUF 0x40 +#define IEEE80211_MIN_AMPDU_BUF 0x8 +#define IEEE80211_MAX_AMPDU_BUF_HT 0x40 +#define IEEE80211_MAX_AMPDU_BUF 0x100 /* Spatial Multiplexing Power Save Modes (for capability) */ -- cgit From f7859590d97614815b35a755c8213dfb8f2766bd Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 5 Jun 2018 19:20:39 -0400 Subject: audit: eliminate audit_enabled magic number comparison Remove comparison of audit_enabled to magic numbers outside of audit. Related: https://github.com/linux-audit/audit-kernel/issues/86 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 69c78477590b..9334fbef7bae 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -117,6 +117,9 @@ struct filename; extern void audit_log_session_info(struct audit_buffer *ab); +#define AUDIT_OFF 0 +#define AUDIT_ON 1 +#define AUDIT_LOCKED 2 #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ @@ -202,7 +205,7 @@ static inline int audit_log_task_context(struct audit_buffer *ab) static inline void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { } -#define audit_enabled 0 +#define audit_enabled AUDIT_OFF #endif /* CONFIG_AUDIT */ #ifdef CONFIG_AUDIT_COMPAT_GENERIC -- cgit From 9fba738a53dda20e748d6ee240b6c017c8146b4b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 19 Jun 2018 16:41:41 +0200 Subject: clk: add duty cycle support Add the possibility to apply and query the clock signal duty cycle ratio. This is useful when the duty cycle of the clock signal depends on some other parameters controlled by the clock framework. For example, the duty cycle of a divider may depends on the raw divider setting (ratio = N / div) , which is controlled by the CCF. In such case, going through the pwm framework to control the duty cycle ratio of this clock would be a burden. A clock provider is not required to implement the operation to set and get the duty cycle. If it does not implement .get_duty_cycle(), the ratio is assumed to be 50%. This change also adds a new flag, CLK_DUTY_CYCLE_PARENT. This flag should be used to indicate that a clock, such as gates and muxes, may inherit the duty cycle ratio of its parent clock. If a clock does not provide a get_duty_cycle() callback and has CLK_DUTY_CYCLE_PARENT, then the call will be directly forwarded to its parent clock, if any. For set_duty_cycle(), the clock should also have CLK_SET_RATE_PARENT for the call to be forwarded Signed-off-by: Jerome Brunet Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/20180619144141.8506-1-jbrunet@baylibre.com --- include/linux/clk-provider.h | 26 ++++++++++++++++++++++++++ include/linux/clk.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index b7cfa037e593..08b1aa70a38d 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -38,6 +38,8 @@ #define CLK_IS_CRITICAL BIT(11) /* do not gate, ever */ /* parents need enable during gate/ungate, set rate and re-parent */ #define CLK_OPS_PARENT_ENABLE BIT(12) +/* duty cycle call may be forwarded to the parent clock */ +#define CLK_DUTY_CYCLE_PARENT BIT(13) struct clk; struct clk_hw; @@ -66,6 +68,17 @@ struct clk_rate_request { struct clk_hw *best_parent_hw; }; +/** + * struct clk_duty - Struture encoding the duty cycle ratio of a clock + * + * @num: Numerator of the duty cycle ratio + * @den: Denominator of the duty cycle ratio + */ +struct clk_duty { + unsigned int num; + unsigned int den; +}; + /** * struct clk_ops - Callback operations for hardware clocks; these are to * be provided by the clock implementation, and will be called by drivers @@ -169,6 +182,15 @@ struct clk_rate_request { * by the second argument. Valid values for degrees are * 0-359. Return 0 on success, otherwise -EERROR. * + * @get_duty_cycle: Queries the hardware to get the current duty cycle ratio + * of a clock. Returned values denominator cannot be 0 and must be + * superior or equal to the numerator. + * + * @set_duty_cycle: Apply the duty cycle ratio to this clock signal specified by + * the numerator (2nd argurment) and denominator (3rd argument). + * Argument must be a valid ratio (denominator > 0 + * and >= numerator) Return 0 on success, otherwise -EERROR. + * * @init: Perform platform-specific initialization magic. * This is not not used by any of the basic clock types. * Please consider other ways of solving initialization problems @@ -218,6 +240,10 @@ struct clk_ops { unsigned long parent_accuracy); int (*get_phase)(struct clk_hw *hw); int (*set_phase)(struct clk_hw *hw, int degrees); + int (*get_duty_cycle)(struct clk_hw *hw, + struct clk_duty *duty); + int (*set_duty_cycle)(struct clk_hw *hw, + struct clk_duty *duty); void (*init)(struct clk_hw *hw); void (*debug_init)(struct clk_hw *hw, struct dentry *dentry); }; diff --git a/include/linux/clk.h b/include/linux/clk.h index 0dbd0885b2c2..4f750c481b82 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -141,6 +141,27 @@ int clk_set_phase(struct clk *clk, int degrees); */ int clk_get_phase(struct clk *clk); +/** + * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal + * @clk: clock signal source + * @num: numerator of the duty cycle ratio to be applied + * @den: denominator of the duty cycle ratio to be applied + * + * Adjust the duty cycle of a clock signal by the specified ratio. Returns 0 on + * success, -EERROR otherwise. + */ +int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den); + +/** + * clk_get_duty_cycle - return the duty cycle ratio of a clock signal + * @clk: clock signal source + * @scale: scaling factor to be applied to represent the ratio as an integer + * + * Returns the duty cycle ratio multiplied by the scale provided, otherwise + * returns -EERROR. + */ +int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale); + /** * clk_is_match - check if two clk's point to the same hardware clock * @p: clk compared against q @@ -183,6 +204,18 @@ static inline long clk_get_phase(struct clk *clk) return -ENOTSUPP; } +static inline int clk_set_duty_cycle(struct clk *clk, unsigned int num, + unsigned int den) +{ + return -ENOTSUPP; +} + +static inline unsigned int clk_get_scaled_duty_cycle(struct clk *clk, + unsigned int scale) +{ + return 0; +} + static inline bool clk_is_match(const struct clk *p, const struct clk *q) { return p == q; -- cgit From 19e0c58f6552638c86395f0717210326fdf14fd2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Jun 2018 15:10:56 -0700 Subject: iomap: generic inline data handling Add generic inline data handling by adding a pointer to the inline data region to struct iomap. When handling a buffered IOMAP_INLINE write, iomap_write_begin will copy the current inline data from the inline data region into the page cache, and iomap_write_end will copy the changes in the page cache back to the inline data region. This doesn't cover inline data reads and direct I/O yet because so far, we have no users. Signed-off-by: Andreas Gruenbacher [hch: small cleanups to better fit in with other iomap work] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index a044a824da85..10d6cff7f69a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -55,6 +55,7 @@ struct iomap { u16 flags; /* flags for mapping */ struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ + void *inline_data; }; /* -- cgit From 63899c6f8851c32214b19390254fa1ae90b582df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jun 2018 15:10:56 -0700 Subject: iomap: add a page_done callback This will be used by gfs2 to attach data to transactions for the journaled data mode. But the concept is generic enough that we might be able to use it for other purposes like encryption/integrity post-processing in the future. Based on a patch from Andreas Gruenbacher. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 10d6cff7f69a..45f43865b0f0 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -9,6 +9,7 @@ struct fiemap_extent_info; struct inode; struct iov_iter; struct kiocb; +struct page; struct vm_area_struct; struct vm_fault; @@ -56,6 +57,14 @@ struct iomap { struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; + + /* + * Called when finished processing a page in the mapping returned in + * this iomap. At least for now this is only supported in the buffered + * write path. + */ + void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, + struct page *page, struct iomap *iomap); }; /* -- cgit From e184fde6f3f5353040dd4b5637f823fc87436e55 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Jun 2018 15:10:57 -0700 Subject: iomap: add private pointer to struct iomap Add a private pointer to struct iomap to allow filesystems to pass data from iomap_begin to iomap_end. Will be used by gfs2 for passing on the on-disk inode buffer head. Signed-off-by: Andreas Gruenbacher Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 45f43865b0f0..1f36523d448a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -57,6 +57,7 @@ struct iomap { struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; + void *private; /* filesystem private */ /* * Called when finished processing a page in the mapping returned in -- cgit From 72b4daa241295440f98e80ae21294a67b27ca091 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jun 2018 15:10:57 -0700 Subject: iomap: add an iomap-based readpage and readpages implementation Simply use iomap_apply to iterate over the file and a submit a bio for each non-uptodate but mapped region and zero everything else. Note that as-is this can not be used for file systems with a blocksize smaller than the page size, but that support will be added later. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 1f36523d448a..30d314407f66 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -99,6 +99,9 @@ struct iomap_ops { ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int iomap_readpage(struct page *page, const struct iomap_ops *ops); +int iomap_readpages(struct address_space *mapping, struct list_head *pages, + unsigned nr_pages, const struct iomap_ops *ops); int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, -- cgit From 693ba15c9202fe0283404abe4066e1b986e284eb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 12 Jun 2018 12:05:45 -0700 Subject: scsi: Remove percpu_ida With its one user gone, remove the library code. Signed-off-by: Matthew Wilcox Reviewed-by: Jens Axboe Signed-off-by: Martin K. Petersen --- include/linux/percpu_ida.h | 83 ---------------------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 include/linux/percpu_ida.h (limited to 'include/linux') diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h deleted file mode 100644 index 07d78e4653bc..000000000000 --- a/include/linux/percpu_ida.h +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERCPU_IDA_H__ -#define __PERCPU_IDA_H__ - -#include -#include -#include -#include -#include -#include -#include - -struct percpu_ida_cpu; - -struct percpu_ida { - /* - * number of tags available to be allocated, as passed to - * percpu_ida_init() - */ - unsigned nr_tags; - unsigned percpu_max_size; - unsigned percpu_batch_size; - - struct percpu_ida_cpu __percpu *tag_cpu; - - /* - * Bitmap of cpus that (may) have tags on their percpu freelists: - * steal_tags() uses this to decide when to steal tags, and which cpus - * to try stealing from. - * - * It's ok for a freelist to be empty when its bit is set - steal_tags() - * will just keep looking - but the bitmap _must_ be set whenever a - * percpu freelist does have tags. - */ - cpumask_t cpus_have_tags; - - struct { - spinlock_t lock; - /* - * When we go to steal tags from another cpu (see steal_tags()), - * we want to pick a cpu at random. Cycling through them every - * time we steal is a bit easier and more or less equivalent: - */ - unsigned cpu_last_stolen; - - /* For sleeping on allocation failure */ - wait_queue_head_t wait; - - /* - * Global freelist - it's a stack where nr_free points to the - * top - */ - unsigned nr_free; - unsigned *freelist; - } ____cacheline_aligned_in_smp; -}; - -/* - * Number of tags we move between the percpu freelist and the global freelist at - * a time - */ -#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U -/* Max size of percpu freelist, */ -#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2) - -int percpu_ida_alloc(struct percpu_ida *pool, int state); -void percpu_ida_free(struct percpu_ida *pool, unsigned tag); - -void percpu_ida_destroy(struct percpu_ida *pool); -int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, - unsigned long max_size, unsigned long batch_size); -static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) -{ - return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE, - IDA_DEFAULT_PCPU_BATCH_MOVE); -} - -typedef int (*percpu_ida_cb)(unsigned, void *); -int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, - void *data); - -unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu); -#endif /* __PERCPU_IDA_H__ */ -- cgit From 2fa4a32613c9182b00e46872755b0662374424a7 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 10 May 2018 11:05:16 +0800 Subject: scsi: libsas: dynamically allocate and free ata host Commit 2623c7a5f2 ("libata: add refcounting to ata_host") v4.17+ introduced refcounting to ata_host and will increase or decrease the refcount when adding or deleting transport ATA port. Now the ata host for libsas is embedded in domain_device, and the ->kref member is not initialized. Afer we add ata transport class, ata_host_get() will be called when adding transport ATA port and a warning will be triggered as below: refcount_t: increment on 0; use-after-free. WARNING: CPU: 2 PID: 103 at lib/refcount.c:153 refcount_inc+0x40/0x48 ...... Call trace: refcount_inc+0x40/0x48 ata_host_get+0x10/0x18 ata_tport_add+0x40/0x120 ata_sas_tport_add+0xc/0x14 sas_ata_init+0x7c/0xc8 sas_discover_domain+0x380/0x53c process_one_work+0x12c/0x288 worker_thread+0x58/0x3f0 kthread+0xfc/0x128 ret_from_fork+0x10/0x18 And also when removing transport ATA port ata_host_put() will be called and another similar warning will be triggered. If the refcount decreased to zero, the ata host will be freed. But this ata host is only part of domain_device, it cannot be freed directly. So we have to change this embedded static ata host to a dynamically allocated ata host and initialize the ->kref member. To use ata_host_get() and ata_host_put() in libsas, we need to move the declaration of these functions to the public libata.h and export them. Fixes: b6240a4df018 ("scsi: libsas: add transport class for ATA devices") Signed-off-by: Jason Yan CC: John Garry CC: Taras Kondratiuk CC: Tejun Heo Acked-by: Tejun Heo Signed-off-by: Martin K. Petersen --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8b8946dd63b9..33e9718397e2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1110,6 +1110,8 @@ extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports); extern struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports); extern int ata_slave_link_init(struct ata_port *ap); +extern void ata_host_get(struct ata_host *host); +extern void ata_host_put(struct ata_host *host); extern int ata_host_start(struct ata_host *host); extern int ata_host_register(struct ata_host *host, struct scsi_host_template *sht); -- cgit From 6519750210d9d3330e85d12e0128652edde448d2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Jun 2018 10:34:50 +0200 Subject: sched/swait: Remove __prepare_to_swait There is no public user of this API, remove it. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Cc: bigeasy@linutronix.de Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180612083909.157076812@infradead.org --- include/linux/swait.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swait.h b/include/linux/swait.h index bf8cb0dee23c..d6a5e949e4ca 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -161,7 +161,6 @@ extern void swake_up(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); -extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); -- cgit From 0abf17bc7790dd0467ed0e38522242f23c5da1c4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Jun 2018 10:34:51 +0200 Subject: sched/swait: Switch to full exclusive mode Linus noted that swait basically implements exclusive mode -- because swake_up() only wakes a single waiter. And because of that it should take care to properly deal with the interruptible case. In short, the problem is that swake_up() can race with a signal. In this this case it is possible the swake_up() 'wakes' the waiter that is already on the way out because it just got a signal and the wakeup gets lost. The normal wait code is very careful and avoids this situation, make sure we do too. Copy the exact exclusive semantics from wait. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Cc: bigeasy@linutronix.de Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180612083909.209762413@infradead.org --- include/linux/swait.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swait.h b/include/linux/swait.h index d6a5e949e4ca..dd032061112d 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -38,8 +38,8 @@ * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right * sleeper state. * - * - the exclusive mode; because this requires preserving the list order - * and this is hard. + * - the !exclusive mode; because that leads to O(n) wakeups, everything is + * exclusive. * * - custom wake callback functions; because you cannot give any guarantees * about random code. This also allows swait to be used in RT, such that @@ -167,9 +167,10 @@ extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queu extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); -/* as per ___wait_event() but for swait, therefore "exclusive == 0" */ +/* as per ___wait_event() but for swait, therefore "exclusive == 1" */ #define ___swait_event(wq, condition, state, ret, cmd) \ ({ \ + __label__ __out; \ struct swait_queue __wait; \ long __ret = ret; \ \ @@ -182,13 +183,13 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); \ if (___wait_is_interruptible(state) && __int) { \ __ret = __int; \ - break; \ + goto __out; \ } \ \ cmd; \ } \ finish_swait(&wq, &__wait); \ - __ret; \ +__out: __ret; \ }) #define __swait_event(wq, condition) \ -- cgit From b3dae109fa89d67334bf3349babab3ad9b6f233f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Jun 2018 10:34:52 +0200 Subject: sched/swait: Rename to exclusive Since swait basically implemented exclusive waits only, make sure the API reflects that. $ git grep -l -e "\" -e "\" | while read file; do sed -i -e 's/\/&_one/g' -e 's/\/&_exclusive/g' $file; done With a few manual touch-ups. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Cc: bigeasy@linutronix.de Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180612083909.261946548@infradead.org --- include/linux/swait.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swait.h b/include/linux/swait.h index dd032061112d..73e06e9986d4 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -16,7 +16,7 @@ * wait-queues, but the semantics are actually completely different, and * every single user we have ever had has been buggy (or pointless). * - * A "swake_up()" only wakes up _one_ waiter, which is not at all what + * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what * "wake_up()" does, and has led to problems. In other cases, it has * been fine, because there's only ever one waiter (kvm), but in that * case gthe whole "simple" wait-queue is just pointless to begin with, @@ -115,7 +115,7 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name * CPU0 - waker CPU1 - waiter * * for (;;) { - * @cond = true; prepare_to_swait(&wq_head, &wait, state); + * @cond = true; prepare_to_swait_exclusive(&wq_head, &wait, state); * smp_mb(); // smp_mb() from set_current_state() * if (swait_active(wq_head)) if (@cond) * wake_up(wq_head); break; @@ -157,11 +157,11 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq) return swait_active(wq); } -extern void swake_up(struct swait_queue_head *q); +extern void swake_up_one(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); -extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); +extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); @@ -196,7 +196,7 @@ __out: __ret; \ (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ schedule()) -#define swait_event(wq, condition) \ +#define swait_event_exclusive(wq, condition) \ do { \ if (condition) \ break; \ @@ -208,7 +208,7 @@ do { \ TASK_UNINTERRUPTIBLE, timeout, \ __ret = schedule_timeout(__ret)) -#define swait_event_timeout(wq, condition, timeout) \ +#define swait_event_timeout_exclusive(wq, condition, timeout) \ ({ \ long __ret = timeout; \ if (!___wait_cond_timeout(condition)) \ @@ -220,7 +220,7 @@ do { \ ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ schedule()) -#define swait_event_interruptible(wq, condition) \ +#define swait_event_interruptible_exclusive(wq, condition) \ ({ \ int __ret = 0; \ if (!(condition)) \ @@ -233,7 +233,7 @@ do { \ TASK_INTERRUPTIBLE, timeout, \ __ret = schedule_timeout(__ret)) -#define swait_event_interruptible_timeout(wq, condition, timeout) \ +#define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\ ({ \ long __ret = timeout; \ if (!___wait_cond_timeout(condition)) \ @@ -246,7 +246,7 @@ do { \ (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule()) /** - * swait_event_idle - wait without system load contribution + * swait_event_idle_exclusive - wait without system load contribution * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @@ -257,7 +257,7 @@ do { \ * condition and doesn't want to contribute to system load. Signals are * ignored. */ -#define swait_event_idle(wq, condition) \ +#define swait_event_idle_exclusive(wq, condition) \ do { \ if (condition) \ break; \ @@ -270,7 +270,7 @@ do { \ __ret = schedule_timeout(__ret)) /** - * swait_event_idle_timeout - wait up to timeout without load contribution + * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout at which we'll give up in jiffies @@ -288,7 +288,7 @@ do { \ * or the remaining jiffies (at least 1) if the @condition evaluated * to %true before the @timeout elapsed. */ -#define swait_event_idle_timeout(wq, condition, timeout) \ +#define swait_event_idle_timeout_exclusive(wq, condition, timeout) \ ({ \ long __ret = timeout; \ if (!___wait_cond_timeout(condition)) \ -- cgit From a19741e5e5a9f1f02f8e3c037bde7d73d4bfae9c Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 28 May 2018 11:47:52 +0200 Subject: dma_buf: remove device parameter from attach callback v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The device parameter is completely unused because it is available in the attachment structure as well. v2: fix kerneldoc as well Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/226643/ --- include/linux/dma-buf.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 88917fa796e4..c0ad5bf61188 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -55,11 +55,11 @@ struct dma_buf_ops { * @attach: * * This is called from dma_buf_attach() to make sure that a given - * &device can access the provided &dma_buf. Exporters which support - * buffer objects in special locations like VRAM or device-specific - * carveout areas should check whether the buffer could be move to - * system memory (or directly accessed by the provided device), and - * otherwise need to fail the attach operation. + * &dma_buf_attachment.dev can access the provided &dma_buf. Exporters + * which support buffer objects in special locations like VRAM or + * device-specific carveout areas should check whether the buffer could + * be move to system memory (or directly accessed by the provided + * device), and otherwise need to fail the attach operation. * * The exporter should also in general check whether the current * allocation fullfills the DMA constraints of the new device. If this @@ -77,8 +77,7 @@ struct dma_buf_ops { * to signal that backing storage is already allocated and incompatible * with the requirements of requesting device. */ - int (*attach)(struct dma_buf *, struct device *, - struct dma_buf_attachment *); + int (*attach)(struct dma_buf *, struct dma_buf_attachment *); /** * @detach: -- cgit From f664a52695429b68afb4e130a0f69cd5fd1fec86 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 28 May 2018 13:34:01 +0200 Subject: dma-buf: remove kmap_atomic interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Neither used nor correctly implemented anywhere. Just completely remove the interface. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Acked-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/226645/ --- include/linux/dma-buf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index c0ad5bf61188..58725f890b5b 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -205,8 +205,6 @@ struct dma_buf_ops { * to be restarted. */ int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction); - void *(*map_atomic)(struct dma_buf *, unsigned long); - void (*unmap_atomic)(struct dma_buf *, unsigned long, void *); void *(*map)(struct dma_buf *, unsigned long); void (*unmap)(struct dma_buf *, unsigned long, void *); @@ -394,8 +392,6 @@ int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); -void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long); -void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *); void *dma_buf_kmap(struct dma_buf *, unsigned long); void dma_buf_kunmap(struct dma_buf *, unsigned long, void *); -- cgit From c03cea42149de56fbae2301d7123daaa2cfe80e2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jun 2018 15:10:58 -0700 Subject: iomap: add initial support for writes without buffer heads For now just limited to blocksize == PAGE_SIZE, where we can simply read in the full page in write begin, and just set the whole page dirty after copying data into it. This code is enabled by default and XFS will now be feed pages without buffer heads in ->writepage and ->writepages. If a file system sets the IOMAP_F_BUFFER_HEAD flag on the iomap the old path will still be used, this both helps the transition in XFS and prepares for the gfs2 migration to the iomap infrastructure. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 30d314407f66..5eb9ca8d7ce5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -30,6 +30,7 @@ struct vm_fault; */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ +#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */ /* * Flags that only need to be reported for IOMAP_REPORT requests: @@ -102,6 +103,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, int iomap_readpage(struct page *page, const struct iomap_ops *ops); int iomap_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, const struct iomap_ops *ops); +int iomap_set_page_dirty(struct page *page); int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, -- cgit From 17dbca119312b4e8173d4e25ff64262119fcef38 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:26 -0700 Subject: x86/speculation/l1tf: Add sysfs reporting for l1tf L1TF core kernel workarounds are cheap and normally always enabled, However they still should be reported in sysfs if the system is vulnerable or mitigated. Add the necessary CPU feature/bug bits. - Extend the existing checks for Meltdowns to determine if the system is vulnerable. All CPUs which are not vulnerable to Meltdown are also not vulnerable to L1TF - Check for 32bit non PAE and emit a warning as there is no practical way for mitigation due to the limited physical address bits - If the system has more than MAX_PA/2 physical memory the invert page workarounds don't protect the system against the L1TF attack anymore, because an inverted physical address will also point to valid memory. Print a warning in this case and report that the system is vulnerable. Add a function which returns the PFN limit for the L1TF mitigation, which will be used in follow up patches for sanity and range checks. [ tglx: Renamed the CPU feature bit to L1TF_PTEINV ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Dave Hansen --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index a97a63eef59f..d3da5184aa1c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -55,6 +55,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_l1tf(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit From 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:28 -0700 Subject: x86/speculation/l1tf: Limit swap file size to MAX_PA/2 For the L1TF workaround its necessary to limit the swap file size to below MAX_PA/2, so that the higher bits of the swap offset inverted never point to valid memory. Add a mechanism for the architecture to override the swap file size check in swapfile.c and add a x86 specific max swapfile check function that enforces that limit. The check is only enabled if the CPU is vulnerable to L1TF. In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system with 46bit PA it is 32TB. The limit is only per individual swap file, so it's always possible to exceed these limits with multiple swap files or partitions. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Dave Hansen --- include/linux/swapfile.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 06bd7b096167..e06febf62978 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -10,5 +10,7 @@ extern spinlock_t swap_lock; extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; extern int try_to_unuse(unsigned int, bool, unsigned long); +extern unsigned long generic_max_swapfile_size(void); +extern unsigned long max_swapfile_size(void); #endif /* _LINUX_SWAPFILE_H */ -- cgit From 5a6cf77f5e35e7af35d36a1e7dc21a42f6412e4f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 20 Jun 2018 01:05:07 +0900 Subject: kprobes: Remove jprobe API implementation Remove functionally empty jprobe API implementations and test cases. Signed-off-by: Masami Hiramatsu Acked-by: Thomas Gleixner Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/lkml/152942430705.15209.2307050500995264322.stgit@devbox Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 9440a2fc8893..b520baa65682 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -389,9 +389,6 @@ int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); void unregister_kprobes(struct kprobe **kps, int num); -int setjmp_pre_handler(struct kprobe *, struct pt_regs *); -int longjmp_break_handler(struct kprobe *, struct pt_regs *); -void jprobe_return(void); unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); -- cgit From 4de58696de076d9bd2745d1cbe0930635c3f5ac9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 20 Jun 2018 01:17:15 +0900 Subject: kprobes: Remove jprobe stub API Remove jprobe stub APIs from linux/kprobes.h since the jprobe implementation was completely gone. Signed-off-by: Masami Hiramatsu Acked-by: Thomas Gleixner Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/lkml/152942503572.15209.1652552217914694917.stgit@devbox Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 50 ------------------------------------------------- 1 file changed, 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b520baa65682..e909413e4e38 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -63,7 +63,6 @@ struct pt_regs; struct kretprobe; struct kretprobe_instance; typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *); -typedef int (*kprobe_break_handler_t) (struct kprobe *, struct pt_regs *); typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *, unsigned long flags); typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *, @@ -101,12 +100,6 @@ struct kprobe { */ kprobe_fault_handler_t fault_handler; - /* - * ... called if breakpoint trap occurs in probe handler. - * Return 1 if it handled break, otherwise kernel will see it. - */ - kprobe_break_handler_t break_handler; - /* Saved opcode (which has been replaced with breakpoint) */ kprobe_opcode_t opcode; @@ -154,24 +147,6 @@ static inline int kprobe_ftrace(struct kprobe *p) return p->flags & KPROBE_FLAG_FTRACE; } -/* - * Special probe type that uses setjmp-longjmp type tricks to resume - * execution at a specified entry with a matching prototype corresponding - * to the probed function - a trick to enable arguments to become - * accessible seamlessly by probe handling logic. - * Note: - * Because of the way compilers allocate stack space for local variables - * etc upfront, regardless of sub-scopes within a function, this mirroring - * principle currently works only for probes placed on function entry points. - */ -struct jprobe { - struct kprobe kp; - void *entry; /* probe handling code to jump to */ -}; - -/* For backward compatibility with old code using JPROBE_ENTRY() */ -#define JPROBE_ENTRY(handler) (handler) - /* * Function-return probe - * Note: @@ -436,9 +411,6 @@ static inline void unregister_kprobe(struct kprobe *p) static inline void unregister_kprobes(struct kprobe **kps, int num) { } -static inline void jprobe_return(void) -{ -} static inline int register_kretprobe(struct kretprobe *rp) { return -ENOSYS; @@ -465,20 +437,6 @@ static inline int enable_kprobe(struct kprobe *kp) return -ENOSYS; } #endif /* CONFIG_KPROBES */ -static inline int register_jprobe(struct jprobe *p) -{ - return -ENOSYS; -} -static inline int register_jprobes(struct jprobe **jps, int num) -{ - return -ENOSYS; -} -static inline void unregister_jprobe(struct jprobe *p) -{ -} -static inline void unregister_jprobes(struct jprobe **jps, int num) -{ -} static inline int disable_kretprobe(struct kretprobe *rp) { return disable_kprobe(&rp->kp); @@ -487,14 +445,6 @@ static inline int enable_kretprobe(struct kretprobe *rp) { return enable_kprobe(&rp->kp); } -static inline int disable_jprobe(struct jprobe *jp) -{ - return -ENOSYS; -} -static inline int enable_jprobe(struct jprobe *jp) -{ - return -ENOSYS; -} #ifndef CONFIG_KPROBES static inline bool is_kprobe_insn_slot(unsigned long addr) -- cgit From 8bd9cb51daac89337295b6f037b0486911e1b408 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jun 2018 13:53:08 +0100 Subject: locking/atomics, asm-generic: Move some macros from to a new file In preparation for implementing the asm-generic atomic bitops in terms of atomic_long_*(), we need to prevent implementations from pulling in . A common reason for this include is for the BITS_PER_BYTE definition, so move this and some other BIT() and masking macros into a new header file, . Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: yamada.masahiro@socionext.com Link: https://lore.kernel.org/lkml/1529412794-17720-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 22 +--------------------- include/linux/bits.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 include/linux/bits.h (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 4cac4e1a72ff..af419012d77d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -2,29 +2,9 @@ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H #include +#include -#ifdef __KERNEL__ -#define BIT(nr) (1UL << (nr)) -#define BIT_ULL(nr) (1ULL << (nr)) -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) -#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) -#define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) -#endif - -/* - * Create a contiguous bitmask starting at bit position @l and ending at - * position @h. For example - * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. - */ -#define GENMASK(h, l) \ - (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) - -#define GENMASK_ULL(h, l) \ - (((~0ULL) - (1ULL << (l)) + 1) & \ - (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/bits.h b/include/linux/bits.h new file mode 100644 index 000000000000..2b7b532c1d51 --- /dev/null +++ b/include/linux/bits.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITS_H +#define __LINUX_BITS_H +#include + +#define BIT(nr) (1UL << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) +#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) +#define BITS_PER_BYTE 8 + +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) \ + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define GENMASK_ULL(h, l) \ + (((~0ULL) - (1ULL << (l)) + 1) & \ + (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + +#endif /* __LINUX_BITS_H */ -- cgit From 05736e4ac13c08a4a9b1ef2de26dd31a32cbee57 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 May 2018 17:48:27 +0200 Subject: cpu/hotplug: Provide knobs to control SMT Provide a command line and a sysfs knob to control SMT. The command line options are: 'nosmt': Enumerate secondary threads, but do not online them 'nosmt=force': Ignore secondary threads completely during enumeration via MP table and ACPI/MADT. The sysfs control file has the following states (read/write): 'on': SMT is enabled. Secondary threads can be freely onlined 'off': SMT is disabled. Secondary threads, even if enumerated cannot be onlined 'forceoff': SMT is permanentely disabled. Writes to the control file are rejected. 'notsupported': SMT is not supported by the CPU The command line option 'nosmt' sets the sysfs control to 'off'. This can be changed to 'on' to reenable SMT during runtime. The command line option 'nosmt=force' sets the sysfs control to 'forceoff'. This cannot be changed during runtime. When SMT is 'on' and the control file is changed to 'off' then all online secondary threads are offlined and attempts to online a secondary thread later on are rejected. When SMT is 'off' and the control file is changed to 'on' then secondary threads can be onlined again. The 'off' -> 'on' transition does not automatically online the secondary threads. When the control file is set to 'forceoff', the behaviour is the same as setting it to 'off', but the operation is irreversible and later writes to the control file are rejected. When the control status is 'notsupported' then writes to the control file are rejected. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Ingo Molnar --- include/linux/cpu.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d3da5184aa1c..7532cbf27b1d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -168,4 +168,17 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +enum cpuhp_smt_control { + CPU_SMT_ENABLED, + CPU_SMT_DISABLED, + CPU_SMT_FORCE_DISABLED, + CPU_SMT_NOT_SUPPORTED, +}; + +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) +extern enum cpuhp_smt_control cpu_smt_control; +#else +# define cpu_smt_control (CPU_SMT_ENABLED) +#endif + #endif /* _LINUX_CPU_H_ */ -- cgit From bfc18e389c7a09fbbbed6bf4032396685b14246e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:04 +0100 Subject: atomics/treewide: Rename __atomic_add_unless() => atomic_fetch_add_unless() While __atomic_add_unless() was originally intended as a building-block for atomic_add_unless(), it's now used in a number of places around the kernel. It's the only common atomic operation named __atomic*(), rather than atomic_*(), and for consistency it would be better named atomic_fetch_add_unless(). This lack of consistency is slightly confusing, and gets in the way of scripting atomics. Given that, let's clean things up and promote it to an official part of the atomics API, in the form of atomic_fetch_add_unless(). This patch converts definitions and invocations over to the new name, including the instrumented version, using the following script: ---- git grep -w __atomic_add_unless | while read line; do sed -i '{s/\<__atomic_add_unless\>/atomic_fetch_add_unless/}' "${line%%:*}"; done git grep -w __arch_atomic_add_unless | while read line; do sed -i '{s/\<__arch_atomic_add_unless\>/arch_atomic_fetch_add_unless/}' "${line%%:*}"; done ---- Note that we do not have atomic{64,_long}_fetch_add_unless(), which will be introduced by later patches. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-2-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 01ce3997cb42..9cc982936675 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -530,7 +530,7 @@ */ static inline int atomic_add_unless(atomic_t *v, int a, int u) { - return __atomic_add_unless(v, a, u) != u; + return atomic_fetch_add_unless(v, a, u) != u; } /** -- cgit From f74445b6dd6b8f33ed34e005d19ecbb49171dabf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:06 +0100 Subject: atomics/treewide: Remove atomic_inc_not_zero_hint() While documentation suggests atomic_inc_not_zero_hint() will perform better than atomic_inc_not_zero(), this is unlikely to be the case. No architectures implement atomic_inc_not_zero_hint() directly, and thus it either falls back to atomic_inc_not_zero(), or a loop using atomic_cmpxchg(). Whenever the hint does not match the value in memory, the repeated use of atomic_cmpxchg() will be more expensive than the read that atomic_inc_not_zero_hint() attempts to avoid. For architectures with LL/SC atomics, a read cannot be avoided, and it would always be better to use atomic_inc_not_zero() directly. For other architectures, their own atomic_inc_not_zero() is likely to be more optimal than an atomic_cmpxchg() loop regardless. Generally, atomic_inc_not_zero_hint() is liable to perform worse than atomic_inc_not_zero(). Further, atomic_inc_not_zero_hint() only exists for atomic_t, and not atomic64_t or atomic_long_t, and there is only one user in the kernel tree. Given all this, let's remove atomic_inc_not_zero_hint(), and migrate the existing user over to atomic_inc_not_zero(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-4-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 9cc982936675..5c5620ae5a35 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -571,38 +571,6 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) } #endif -/** - * atomic_inc_not_zero_hint - increment if not null - * @v: pointer of type atomic_t - * @hint: probable value of the atomic before the increment - * - * This version of atomic_inc_not_zero() gives a hint of probable - * value of the atomic. This helps processor to not read the memory - * before doing the atomic read/modify/write cycle, lowering - * number of bus transactions on some arches. - * - * Returns: 0 if increment was not done, 1 otherwise. - */ -#ifndef atomic_inc_not_zero_hint -static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) -{ - int val, c = hint; - - /* sanity test, should be removed by compiler if hint is a constant */ - if (!hint) - return atomic_inc_not_zero(v); - - do { - val = atomic_cmpxchg(v, c, c + 1); - if (val == c) - return 1; - c = val; - } while (c); - - return 0; -} -#endif - #ifndef atomic_inc_unless_negative static inline int atomic_inc_unless_negative(atomic_t *p) { -- cgit From ade5ef9280c33993099199c51e2e27c2c4013afd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:07 +0100 Subject: atomics: Make conditional ops return 'bool' Some of the atomics return a status value, which is a boolean value describing whether the operation was performed. To make it clear that this is a boolean value, let's update the common fallbacks to return bool, fixing up the return values and comments likewise. At the same time, let's simplify the description of the operations in their respective comments. The instrumented atomics and generic atomic64 implementation are updated accordingly. Note that atomic64_dec_if_positive() doesn't follow the usual test op pattern, and returns the would-be decremented value. This is not changed. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Michael Ellerman Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-5-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 5c5620ae5a35..307a7f6d619a 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -2,6 +2,8 @@ /* Atomic operations usable in machine independent code */ #ifndef _LINUX_ATOMIC_H #define _LINUX_ATOMIC_H +#include + #include #include @@ -525,10 +527,10 @@ * @a: the amount to add to v... * @u: ...unless v is equal to u. * - * Atomically adds @a to @v, so long as @v was not already @u. - * Returns non-zero if @v was not @u, and zero otherwise. + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. */ -static inline int atomic_add_unless(atomic_t *v, int a, int u) +static inline bool atomic_add_unless(atomic_t *v, int a, int u) { return atomic_fetch_add_unless(v, a, u) != u; } @@ -537,8 +539,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) * atomic_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic_t * - * Atomically increments @v by 1, so long as @v is non-zero. - * Returns non-zero if @v was non-zero, and zero otherwise. + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. */ #ifndef atomic_inc_not_zero #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) @@ -572,28 +574,28 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) #endif #ifndef atomic_inc_unless_negative -static inline int atomic_inc_unless_negative(atomic_t *p) +static inline bool atomic_inc_unless_negative(atomic_t *p) { int v, v1; for (v = 0; v >= 0; v = v1) { v1 = atomic_cmpxchg(p, v, v + 1); if (likely(v1 == v)) - return 1; + return true; } - return 0; + return false; } #endif #ifndef atomic_dec_unless_positive -static inline int atomic_dec_unless_positive(atomic_t *p) +static inline bool atomic_dec_unless_positive(atomic_t *p) { int v, v1; for (v = 0; v <= 0; v = v1) { v1 = atomic_cmpxchg(p, v, v - 1); if (likely(v1 == v)) - return 1; + return true; } - return 0; + return false; } #endif -- cgit From bef828204a1bc7a0fd3a24551c4265e9c2ab95ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:08 +0100 Subject: atomics/treewide: Make atomic64_inc_not_zero() optional We define a trivial fallback for atomic_inc_not_zero(), but don't do the same for atomic64_inc_not_zero(), leading most architectures to define the same boilerplate. Let's add a fallback in , and remove the redundant implementations. Note that atomic64_add_unless() is always defined in , and promotes its arguments to the requisite types, so we need not do this explicitly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-6-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 307a7f6d619a..ae3f30923d05 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -1019,6 +1019,17 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define atomic64_try_cmpxchg_release atomic64_try_cmpxchg #endif /* atomic64_try_cmpxchg */ +/** + * atomic64_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +#ifndef atomic64_inc_not_zero +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#endif + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { -- cgit From eccc2da8c03f316bba202e15af2be4615f461900 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:09 +0100 Subject: atomics/treewide: Make atomic_fetch_add_unless() optional Several architectures these have a near-identical implementation based on atomic_read() and atomic_cmpxchg() which we can instead define in , so let's do so, using something close to the existing x86 implementation with try_cmpxchg(). Where an architecture provides its own atomic_fetch_add_unless(), it must define a preprocessor symbol for it. The instrumented atomics are updated accordingly. Note that arch/arc's existing atomic_fetch_add_unless() had redundant barriers, as these are already present in its atomic_cmpxchg() implementation. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Geert Uytterhoeven Reviewed-by: Will Deacon Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vineet Gupta Link: https://lore.kernel.org/lkml/20180621121321.4761-7-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index ae3f30923d05..b89ba36cab94 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -521,6 +521,29 @@ #endif #endif /* xchg_relaxed */ +/** + * atomic_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns the original value of @v. + */ +#ifndef atomic_fetch_add_unless +static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int c = atomic_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic_try_cmpxchg(v, &c, c + a)); + + return c; +} +#endif + /** * atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t -- cgit From 0ae1d994020d75ac065fd42ac4cbf5ac6ce9b255 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:10 +0100 Subject: atomics: Prepare for atomic64_fetch_add_unless() Currently all architectures must implement atomic_fetch_add_unless(), with common code providing atomic_add_unless(). Architectures must also implement atomic64_add_unless() directly, with no corresponding atomic64_fetch_add_unless(). This divergence is unfortunate, and means that the APIs for atomic_t, atomic64_t, and atomic_long_t differ. In preparation for unifying things, with architectures providing atomic64_fetch_add_unless, this patch adds a generic atomic64_add_unless() which will use atomic64_fetch_add_unless(). The instrumented atomics are updated to take this case into account. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Albert Ou Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Richard Henderson Cc: Russell King Cc: Thomas Gleixner Cc: Vineet Gupta Link: https://lore.kernel.org/lkml/20180621121321.4761-8-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index b89ba36cab94..3c03de648007 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -1042,6 +1042,22 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define atomic64_try_cmpxchg_release atomic64_try_cmpxchg #endif /* atomic64_try_cmpxchg */ +/** + * atomic64_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +#ifdef atomic64_fetch_add_unless +static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + return atomic64_fetch_add_unless(v, a, u) != u; +} +#endif + /** * atomic64_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic64_t -- cgit From 356701329fb391184618eda7b7fb68cb35271506 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:17 +0100 Subject: atomics/treewide: Make atomic64_fetch_add_unless() optional Architectures with atomic64_fetch_add_unless() provide a preprocessor symbol if they do so, and all other architectures have trivial C implementations of atomic64_add_unless() which are near-identical. Let's unify the trivial definitions of atomic64_fetch_add_unless() in , so that we always have both atomic64_fetch_add_unless() and atomic64_add_unless() with less boilerplate code. This means that atomic64_add_unless() is always implemented in core code, and the instrumented atomics are updated accordingly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-15-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 3c03de648007..530562ac7909 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -1042,6 +1042,30 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define atomic64_try_cmpxchg_release atomic64_try_cmpxchg #endif /* atomic64_try_cmpxchg */ +/** + * atomic64_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns the original value of @v. + */ +#ifndef atomic64_fetch_add_unless +static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, + long long u) +{ + long long c = atomic64_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic64_try_cmpxchg(v, &c, c + a)); + + return c; +} +#endif + /** * atomic64_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t @@ -1051,12 +1075,10 @@ static inline int atomic_dec_if_positive(atomic_t *v) * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ -#ifdef atomic64_fetch_add_unless static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) { return atomic64_fetch_add_unless(v, a, u) != u; } -#endif /** * atomic64_inc_not_zero - increment unless the number is zero -- cgit From 18cc1814d4e7560412c9c8c6d28f9d6782c8b402 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:18 +0100 Subject: atomics/treewide: Make test ops optional Some of the atomics return the result of a test applied after the atomic operation, and almost all architectures implement these as trivial wrappers around the underlying atomic. Specifically: * _inc_and_test(v) is (_inc_return(v) == 0) * _dec_and_test(v) is (_dec_return(v) == 0) * _sub_and_test(i, v) is (_sub_return(i, v) == 0) * _add_negative(i, v) is (_add_return(i, v) < 0) Rather than have these definitions duplicated in all architectures, with minor inconsistencies in formatting and documentation, let's make these operations optional, with default fallbacks as above. Implementations must now provide a preprocessor symbol. The instrumented atomics are updated accordingly. Both x86 and m68k have custom implementations, which are left as-is, given preprocessor symbols to avoid being overridden. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-16-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 530562ac7909..3ee8da9023cd 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -569,6 +569,68 @@ static inline bool atomic_add_unless(atomic_t *v, int a, int u) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #endif +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic_inc_and_test +static inline bool atomic_inc_and_test(atomic_t *v) +{ + return atomic_inc_return(v) == 0; +} +#endif + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +#ifndef atomic_dec_and_test +static inline bool atomic_dec_and_test(atomic_t *v) +{ + return atomic_dec_return(v) == 0; +} +#endif + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic_sub_and_test +static inline bool atomic_sub_and_test(int i, atomic_t *v) +{ + return atomic_sub_return(i, v) == 0; +} +#endif + +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +#ifndef atomic_add_negative +static inline bool atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} +#endif + #ifndef atomic_andnot static inline void atomic_andnot(int i, atomic_t *v) { @@ -1091,6 +1153,68 @@ static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) #endif +/** + * atomic64_inc_and_test - increment and test + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic64_inc_and_test +static inline bool atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} +#endif + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer of type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +#ifndef atomic64_dec_and_test +static inline bool atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} +#endif + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic64_sub_and_test +static inline bool atomic64_sub_and_test(long long i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} +#endif + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +#ifndef atomic64_add_negative +static inline bool atomic64_add_negative(long long i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} +#endif + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { -- cgit From 9837559d8eb01ce834e56fc9a567c1d94ebd3698 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:19 +0100 Subject: atomics/treewide: Make unconditional inc/dec ops optional Many of the inc/dec ops are mandatory, but for most architectures inc/dec are simply trivial wrappers around their corresponding add/sub ops. Let's make all the inc/dec ops optional, so that we can get rid of these boilerplate wrappers. The instrumented atomics are updated accordingly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-17-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 3ee8da9023cd..24f345df7ba6 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -97,11 +97,23 @@ #endif #endif /* atomic_add_return_relaxed */ +#ifndef atomic_inc +#define atomic_inc(v) atomic_add(1, (v)) +#endif + /* atomic_inc_return_relaxed */ #ifndef atomic_inc_return_relaxed + +#ifndef atomic_inc_return +#define atomic_inc_return(v) atomic_add_return(1, (v)) +#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) +#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) +#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) +#else /* atomic_inc_return */ #define atomic_inc_return_relaxed atomic_inc_return #define atomic_inc_return_acquire atomic_inc_return #define atomic_inc_return_release atomic_inc_return +#endif /* atomic_inc_return */ #else /* atomic_inc_return_relaxed */ @@ -145,11 +157,23 @@ #endif #endif /* atomic_sub_return_relaxed */ +#ifndef atomic_dec +#define atomic_dec(v) atomic_sub(1, (v)) +#endif + /* atomic_dec_return_relaxed */ #ifndef atomic_dec_return_relaxed + +#ifndef atomic_dec_return +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) +#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) +#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) +#else /* atomic_dec_return */ #define atomic_dec_return_relaxed atomic_dec_return #define atomic_dec_return_acquire atomic_dec_return #define atomic_dec_return_release atomic_dec_return +#endif /* atomic_dec_return */ #else /* atomic_dec_return_relaxed */ @@ -748,11 +772,23 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_add_return_relaxed */ +#ifndef atomic64_inc +#define atomic64_inc(v) atomic64_add(1, (v)) +#endif + /* atomic64_inc_return_relaxed */ #ifndef atomic64_inc_return_relaxed + +#ifndef atomic64_inc_return +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) +#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) +#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) +#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) +#else /* atomic64_inc_return */ #define atomic64_inc_return_relaxed atomic64_inc_return #define atomic64_inc_return_acquire atomic64_inc_return #define atomic64_inc_return_release atomic64_inc_return +#endif /* atomic64_inc_return */ #else /* atomic64_inc_return_relaxed */ @@ -797,11 +833,23 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_sub_return_relaxed */ +#ifndef atomic64_dec +#define atomic64_dec(v) atomic64_sub(1, (v)) +#endif + /* atomic64_dec_return_relaxed */ #ifndef atomic64_dec_return_relaxed + +#ifndef atomic64_dec_return +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) +#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) +#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) +#else /* atomic64_dec_return */ #define atomic64_dec_return_relaxed atomic64_dec_return #define atomic64_dec_return_acquire atomic64_dec_return #define atomic64_dec_return_release atomic64_dec_return +#endif /* atomic64_dec_return */ #else /* atomic64_dec_return_relaxed */ -- cgit From b3a2a05f9111de0b79312e577608a27b0318c0a1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:20 +0100 Subject: atomics/treewide: Make conditional inc/dec ops optional The conditional inc/dec ops differ for atomic_t and atomic64_t: - atomic_inc_unless_positive() is optional for atomic_t, and doesn't exist for atomic64_t. - atomic_dec_unless_negative() is optional for atomic_t, and doesn't exist for atomic64_t. - atomic_dec_if_positive is optional for atomic_t, and is mandatory for atomic64_t. Let's make these consistently optional for both. At the same time, let's clean up the existing fallbacks to use atomic_try_cmpxchg(). The instrumented atomics are updated accordingly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-18-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 97 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 24f345df7ba6..93fe5b4041e1 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -683,28 +683,30 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) #endif #ifndef atomic_inc_unless_negative -static inline bool atomic_inc_unless_negative(atomic_t *p) +static inline bool atomic_inc_unless_negative(atomic_t *v) { - int v, v1; - for (v = 0; v >= 0; v = v1) { - v1 = atomic_cmpxchg(p, v, v + 1); - if (likely(v1 == v)) - return true; - } - return false; + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; } #endif #ifndef atomic_dec_unless_positive -static inline bool atomic_dec_unless_positive(atomic_t *p) +static inline bool atomic_dec_unless_positive(atomic_t *v) { - int v, v1; - for (v = 0; v <= 0; v = v1) { - v1 = atomic_cmpxchg(p, v, v - 1); - if (likely(v1 == v)) - return true; - } - return false; + int c = atomic_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c - 1)); + + return true; } #endif @@ -718,17 +720,14 @@ static inline bool atomic_dec_unless_positive(atomic_t *p) #ifndef atomic_dec_if_positive static inline int atomic_dec_if_positive(atomic_t *v) { - int c, old, dec; - c = atomic_read(v); - for (;;) { + int dec, c = atomic_read(v); + + do { dec = c - 1; if (unlikely(dec < 0)) break; - old = atomic_cmpxchg((v), c, dec); - if (likely(old == c)) - break; - c = old; - } + } while (!atomic_try_cmpxchg(v, &c, dec)); + return dec; } #endif @@ -1290,6 +1289,56 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v } #endif +#ifndef atomic64_inc_unless_negative +static inline bool atomic64_inc_unless_negative(atomic64_t *v) +{ + long long c = atomic64_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#endif + +#ifndef atomic64_dec_unless_positive +static inline bool atomic64_dec_unless_positive(atomic64_t *v) +{ + long long c = atomic64_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#endif + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic64_t + * + * The function returns the old value of *v minus 1, even if + * the atomic64 variable, v, was not decremented. + */ +#ifndef atomic64_dec_if_positive +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long dec, c = atomic64_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic64_try_cmpxchg(v, &c, dec)); + + return dec; +} +#endif + #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) #define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) -- cgit From 7cc7eaad49c30ac165ecf84d95b26f7e0d53bd97 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:21 +0100 Subject: atomics/treewide: Clean up '*_andnot()' ifdeffery The ifdeffery for atomic*_{fetch_,}andnot() is unlike that for all the other atomics. If atomic*_andnot() is not defined, the corresponding atomic*_fetch_andnot() is assumed to not be defined. Additionally, the fallbacks for the various ordering cases are written much later in atomic.h as static inlines. This isn't problematic today, but gets in the way of scripting the generation of atomics. To prepare for scripting, this patch: * Switches to separate ifdefs for atomic*_andnot() and atomic*_fetch_andnot(), updating implementations as appropriate. * Moves the fallbacks into the standards ifdefs, as macro expansions rather than static inlines. * Removes trivial andnot implementations from architectures, where these are superseded by core code. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-19-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 96 ++++++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 93fe5b4041e1..8e04f1f69bd9 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -354,12 +354,22 @@ #endif #endif /* atomic_fetch_and_relaxed */ -#ifdef atomic_andnot -/* atomic_fetch_andnot_relaxed */ +#ifndef atomic_andnot +#define atomic_andnot(i, v) atomic_and(~(int)(i), (v)) +#endif + #ifndef atomic_fetch_andnot_relaxed -#define atomic_fetch_andnot_relaxed atomic_fetch_andnot -#define atomic_fetch_andnot_acquire atomic_fetch_andnot -#define atomic_fetch_andnot_release atomic_fetch_andnot + +#ifndef atomic_fetch_andnot +#define atomic_fetch_andnot(i, v) atomic_fetch_and(~(int)(i), (v)) +#define atomic_fetch_andnot_relaxed(i, v) atomic_fetch_and_relaxed(~(int)(i), (v)) +#define atomic_fetch_andnot_acquire(i, v) atomic_fetch_and_acquire(~(int)(i), (v)) +#define atomic_fetch_andnot_release(i, v) atomic_fetch_and_release(~(int)(i), (v)) +#else /* atomic_fetch_andnot */ +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot +#define atomic_fetch_andnot_acquire atomic_fetch_andnot +#define atomic_fetch_andnot_release atomic_fetch_andnot +#endif /* atomic_fetch_andnot */ #else /* atomic_fetch_andnot_relaxed */ @@ -378,7 +388,6 @@ __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__) #endif #endif /* atomic_fetch_andnot_relaxed */ -#endif /* atomic_andnot */ /* atomic_fetch_xor_relaxed */ #ifndef atomic_fetch_xor_relaxed @@ -655,33 +664,6 @@ static inline bool atomic_add_negative(int i, atomic_t *v) } #endif -#ifndef atomic_andnot -static inline void atomic_andnot(int i, atomic_t *v) -{ - atomic_and(~i, v); -} - -static inline int atomic_fetch_andnot(int i, atomic_t *v) -{ - return atomic_fetch_and(~i, v); -} - -static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v) -{ - return atomic_fetch_and_relaxed(~i, v); -} - -static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v) -{ - return atomic_fetch_and_acquire(~i, v); -} - -static inline int atomic_fetch_andnot_release(int i, atomic_t *v) -{ - return atomic_fetch_and_release(~i, v); -} -#endif - #ifndef atomic_inc_unless_negative static inline bool atomic_inc_unless_negative(atomic_t *v) { @@ -1029,12 +1011,22 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_fetch_and_relaxed */ -#ifdef atomic64_andnot -/* atomic64_fetch_andnot_relaxed */ +#ifndef atomic64_andnot +#define atomic64_andnot(i, v) atomic64_and(~(long long)(i), (v)) +#endif + #ifndef atomic64_fetch_andnot_relaxed -#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot -#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot -#define atomic64_fetch_andnot_release atomic64_fetch_andnot + +#ifndef atomic64_fetch_andnot +#define atomic64_fetch_andnot(i, v) atomic64_fetch_and(~(long long)(i), (v)) +#define atomic64_fetch_andnot_relaxed(i, v) atomic64_fetch_and_relaxed(~(long long)(i), (v)) +#define atomic64_fetch_andnot_acquire(i, v) atomic64_fetch_and_acquire(~(long long)(i), (v)) +#define atomic64_fetch_andnot_release(i, v) atomic64_fetch_and_release(~(long long)(i), (v)) +#else /* atomic64_fetch_andnot */ +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot +#define atomic64_fetch_andnot_release atomic64_fetch_andnot +#endif /* atomic64_fetch_andnot */ #else /* atomic64_fetch_andnot_relaxed */ @@ -1053,7 +1045,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__) #endif #endif /* atomic64_fetch_andnot_relaxed */ -#endif /* atomic64_andnot */ /* atomic64_fetch_xor_relaxed */ #ifndef atomic64_fetch_xor_relaxed @@ -1262,33 +1253,6 @@ static inline bool atomic64_add_negative(long long i, atomic64_t *v) } #endif -#ifndef atomic64_andnot -static inline void atomic64_andnot(long long i, atomic64_t *v) -{ - atomic64_and(~i, v); -} - -static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v) -{ - return atomic64_fetch_and(~i, v); -} - -static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v) -{ - return atomic64_fetch_and_relaxed(~i, v); -} - -static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v) -{ - return atomic64_fetch_and_acquire(~i, v); -} - -static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v) -{ - return atomic64_fetch_and_release(~i, v); -} -#endif - #ifndef atomic64_inc_unless_negative static inline bool atomic64_inc_unless_negative(atomic64_t *v) { -- cgit From 75a040ff14d9a99fc041f5e1d8f09541cab13ba4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 1 Apr 2018 01:00:36 +0300 Subject: locking/refcounts: Include fewer headers in Debloat 's dependencies: - is not needed, but is. - is not needed, only a forward declaration of "struct mutex". - is not needed, is enough. Signed-off-by: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/lkml/20180331220036.GA7676@avx2 Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 4193c41e383a..c36addd27dd5 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -3,9 +3,10 @@ #define _LINUX_REFCOUNT_H #include -#include -#include -#include +#include +#include + +struct mutex; /** * struct refcount_t - variant of atomic_t specialized for reference counts -- cgit From 0eb71a9da5796851fa87ddc1a534066c0fe54055 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: split rhashtable.h Due to the use of rhashtables in net namespaces, rhashtable.h is included in lots of the kernel, so a small changes can required a large recompilation. This makes development painful. This patch splits out rhashtable-types.h which just includes the major type declarations, and does not include (non-trivial) inline code. rhashtable.h is no longer included by anything in the include/ directory. Common include files only include rhashtable-types.h so a large recompilation is only triggered when that changes. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/ipc.h | 2 +- include/linux/ipc_namespace.h | 2 +- include/linux/mroute_base.h | 2 +- include/linux/rhashtable-types.h | 139 +++++++++++++++++++++++++++++++++++++++ include/linux/rhashtable.h | 127 +---------------------------------- 5 files changed, 144 insertions(+), 128 deletions(-) create mode 100644 include/linux/rhashtable-types.h (limited to 'include/linux') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 6cc2df7f7ac9..e1c9eea6015b 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index b5630c8eb2f3..6cea726612b7 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include struct user_namespace; diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index d633f737b3c6..fd436cdd4725 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -2,7 +2,7 @@ #define __LINUX_MROUTE_BASE_H #include -#include +#include #include #include #include diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h new file mode 100644 index 000000000000..9740063ff13b --- /dev/null +++ b/include/linux/rhashtable-types.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Simple structures that might be needed in include + * files. + */ + +#ifndef _LINUX_RHASHTABLE_TYPES_H +#define _LINUX_RHASHTABLE_TYPES_H + +#include +#include +#include +#include + +struct rhash_head { + struct rhash_head __rcu *next; +}; + +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + +struct bucket_table; + +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + +typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); + +/** + * struct rhashtable_params - Hash table construction parameters + * @nelem_hint: Hint on number of elements, should be 75% of desired size + * @key_len: Length of key + * @key_offset: Offset of key in struct to be hashed + * @head_offset: Offset of rhash_head in struct to be hashed + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking + * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) + * @automatic_shrinking: Enable automatic shrinking of tables + * @nulls_base: Base value to generate nulls marker + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) + * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object + */ +struct rhashtable_params { + u16 nelem_hint; + u16 key_len; + u16 key_offset; + u16 head_offset; + unsigned int max_size; + u16 min_size; + bool automatic_shrinking; + u8 locks_mul; + u32 nulls_base; + rht_hashfn_t hashfn; + rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; +}; + +/** + * struct rhashtable - Hash table handle + * @tbl: Bucket table + * @key_len: Key length for hashfn + * @max_elems: Maximum number of elements in table + * @p: Configuration parameters + * @rhlist: True if this is an rhltable + * @run_work: Deferred worker to expand/shrink asynchronously + * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list + * @nelems: Number of elements in table + */ +struct rhashtable { + struct bucket_table __rcu *tbl; + unsigned int key_len; + unsigned int max_elems; + struct rhashtable_params p; + bool rhlist; + struct work_struct run_work; + struct mutex mutex; + spinlock_t lock; + atomic_t nelems; +}; + +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + +/** + * struct rhashtable_walker - Hash table walker + * @list: List entry on list of walkers + * @tbl: The table that we were walking over + */ +struct rhashtable_walker { + struct list_head list; + struct bucket_table *tbl; +}; + +/** + * struct rhashtable_iter - Hash table iterator + * @ht: Table to iterate through + * @p: Current pointer + * @list: Current hash list pointer + * @walker: Associated rhashtable walker + * @slot: Current slot + * @skip: Number of entries to skip in slot + */ +struct rhashtable_iter { + struct rhashtable *ht; + struct rhash_head *p; + struct rhlist_head *list; + struct rhashtable_walker walker; + unsigned int slot; + unsigned int skip; + bool end_of_table; +}; + +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); + +#endif /* _LINUX_RHASHTABLE_TYPES_H */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 4e1f535c2034..48754ab07cdf 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Resizable, Scalable, Concurrent Hash Table * @@ -17,16 +18,14 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H -#include -#include #include #include #include #include #include -#include #include +#include /* * The end of the chain is marked with a special nulls marks which has * the following format: @@ -64,15 +63,6 @@ */ #define RHT_ELASTICITY 16u -struct rhash_head { - struct rhash_head __rcu *next; -}; - -struct rhlist_head { - struct rhash_head rhead; - struct rhlist_head __rcu *next; -}; - /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -102,114 +92,6 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -/** - * struct rhashtable_compare_arg - Key for the function rhashtable_compare - * @ht: Hash table - * @key: Key to compare against - */ -struct rhashtable_compare_arg { - struct rhashtable *ht; - const void *key; -}; - -typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); -typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, - const void *obj); - -struct rhashtable; - -/** - * struct rhashtable_params - Hash table construction parameters - * @nelem_hint: Hint on number of elements, should be 75% of desired size - * @key_len: Length of key - * @key_offset: Offset of key in struct to be hashed - * @head_offset: Offset of rhash_head in struct to be hashed - * @max_size: Maximum size while expanding - * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) - * @automatic_shrinking: Enable automatic shrinking of tables - * @nulls_base: Base value to generate nulls marker - * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) - * @obj_hashfn: Function to hash object - * @obj_cmpfn: Function to compare key with object - */ -struct rhashtable_params { - u16 nelem_hint; - u16 key_len; - u16 key_offset; - u16 head_offset; - unsigned int max_size; - u16 min_size; - bool automatic_shrinking; - u8 locks_mul; - u32 nulls_base; - rht_hashfn_t hashfn; - rht_obj_hashfn_t obj_hashfn; - rht_obj_cmpfn_t obj_cmpfn; -}; - -/** - * struct rhashtable - Hash table handle - * @tbl: Bucket table - * @key_len: Key length for hashfn - * @max_elems: Maximum number of elements in table - * @p: Configuration parameters - * @rhlist: True if this is an rhltable - * @run_work: Deferred worker to expand/shrink asynchronously - * @mutex: Mutex to protect current/future table swapping - * @lock: Spin lock to protect walker list - * @nelems: Number of elements in table - */ -struct rhashtable { - struct bucket_table __rcu *tbl; - unsigned int key_len; - unsigned int max_elems; - struct rhashtable_params p; - bool rhlist; - struct work_struct run_work; - struct mutex mutex; - spinlock_t lock; - atomic_t nelems; -}; - -/** - * struct rhltable - Hash table with duplicate objects in a list - * @ht: Underlying rhtable - */ -struct rhltable { - struct rhashtable ht; -}; - -/** - * struct rhashtable_walker - Hash table walker - * @list: List entry on list of walkers - * @tbl: The table that we were walking over - */ -struct rhashtable_walker { - struct list_head list; - struct bucket_table *tbl; -}; - -/** - * struct rhashtable_iter - Hash table iterator - * @ht: Table to iterate through - * @p: Current pointer - * @list: Current hash list pointer - * @walker: Associated rhashtable walker - * @slot: Current slot - * @skip: Number of entries to skip in slot - */ -struct rhashtable_iter { - struct rhashtable *ht; - struct rhash_head *p; - struct rhlist_head *list; - struct rhashtable_walker walker; - unsigned int slot; - unsigned int skip; - bool end_of_table; -}; - static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) { return NULLS_MARKER(ht->p.nulls_base + hash); @@ -376,11 +258,6 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, } #endif /* CONFIG_PROVE_LOCKING */ -int rhashtable_init(struct rhashtable *ht, - const struct rhashtable_params *params); -int rhltable_init(struct rhltable *hlt, - const struct rhashtable_params *params); - void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj); -- cgit From 9f9a707738aa7a8b9f78a641b83927ada256a626 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: remove nulls_base and related code. This "feature" is unused, undocumented, and untested and so doesn't really belong. A patch is under development to properly implement support for detecting when a search gets diverted down a different chain, which the common purpose of nulls markers. This patch actually fixes a bug too. The table resizing allows a table to grow to 2^31 buckets, but the hash is truncated to 27 bits - any growth beyond 2^27 is wasteful an ineffective. This patch results in NULLS_MARKER(0) being used for all chains, and leaves the use of rht_is_a_null() to test for it. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable-types.h | 2 -- include/linux/rhashtable.h | 33 +++------------------------------ 2 files changed, 3 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 9740063ff13b..763d613ce2c2 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -50,7 +50,6 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, * @min_size: Minimum size while shrinking * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) * @automatic_shrinking: Enable automatic shrinking of tables - * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object @@ -64,7 +63,6 @@ struct rhashtable_params { u16 min_size; bool automatic_shrinking; u8 locks_mul; - u32 nulls_base; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; rht_obj_cmpfn_t obj_cmpfn; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 48754ab07cdf..d9f719af7936 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -28,25 +28,8 @@ #include /* * The end of the chain is marked with a special nulls marks which has - * the following format: - * - * +-------+-----------------------------------------------------+-+ - * | Base | Hash |1| - * +-------+-----------------------------------------------------+-+ - * - * Base (4 bits) : Reserved to distinguish between multiple tables. - * Specified via &struct rhashtable_params.nulls_base. - * Hash (27 bits): Full hash (unmasked) of first element added to bucket - * 1 (1 bit) : Nulls marker (always set) - * - * The remaining bits of the next pointer remain unused for now. + * the least significant bit set. */ -#define RHT_BASE_BITS 4 -#define RHT_HASH_BITS 27 -#define RHT_BASE_SHIFT RHT_HASH_BITS - -/* Base bits plus 1 bit for nulls marker */ -#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) /* Maximum chain length before rehash * @@ -92,24 +75,14 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) -{ - return NULLS_MARKER(ht->p.nulls_base + hash); -} - #define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ - ((ptr) = (typeof(ptr)) rht_marker(ht, hash)) + ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { return ((unsigned long) ptr & 1); } -static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) -{ - return ((unsigned long) ptr) >> 1; -} - static inline void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { @@ -119,7 +92,7 @@ static inline void *rht_obj(const struct rhashtable *ht, static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, unsigned int hash) { - return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); + return hash & (tbl->size - 1); } static inline unsigned int rht_key_get_hash(struct rhashtable *ht, -- cgit From 9b4f64a227b6f462482a8cc68c7134dc6e26f1c1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: simplify INIT_RHT_NULLS_HEAD() The 'ht' and 'hash' arguments to INIT_RHT_NULLS_HEAD() are no longer used - so drop them. This allows us to also remove the nhash argument from nested_table_alloc(). Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d9f719af7936..3f3a182bd0b4 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -75,7 +75,7 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ +#define INIT_RHT_NULLS_HEAD(ptr) \ ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) -- cgit From c0690016a73fe6bd456887bbbe6e10c7f0096554 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: clean up dereference of ->future_tbl. Using rht_dereference_bucket() to dereference ->future_tbl looks like a type error, and could be confusing. Using rht_dereference_rcu() to test a pointer for NULL adds an unnecessary barrier - rcu_access_pointer() is preferred for NULL tests when no lock is held. This uses 3 different ways to access ->future_tbl. - if we know the mutex is held, use rht_dereference() - if we don't hold the mutex, and are only testing for NULL, use rcu_access_pointer() - otherwise (using RCU protection for true dereference), use rht_dereference_rcu(). Note that this includes a simplification of the call to rhashtable_last_table() - we don't do an extra dereference before the call any more. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3f3a182bd0b4..eb7111039247 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -595,7 +595,7 @@ static inline void *__rhashtable_insert_fast( lock = rht_bucket_lock(tbl, hash); spin_lock_bh(lock); - if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { + if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: spin_unlock_bh(lock); rcu_read_unlock(); -- cgit From 3f6c65d6255a872846c44182c82c78d3dc6239f5 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 19 Jun 2018 21:42:50 -0700 Subject: tcp: ignore rcv_rtt sample with old ts ecr value When receiving multiple packets with the same ts ecr value, only try to compute rcv_rtt sample with the earliest received packet. This is because the rcv_rtt calculated by later received packets could possibly include long idle time or other types of delay. For example: (1) server sends last packet of reply with TS val V1 (2) client ACKs last packet of reply with TS ecr V1 (3) long idle time passes (4) client sends next request data packet with TS ecr V1 (again!) At this time, the rcv_rtt computed on server with TS ecr V1 will be inflated with the idle time and should get ignored. Signed-off-by: Wei Wang Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 72705eaf4b84..3dbea6610304 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -350,6 +350,7 @@ struct tcp_sock { #endif /* Receiver side RTT estimation */ + u32 rcv_rtt_last_tsecr; struct { u32 rtt_us; u32 seq; -- cgit From 63a67a926e214dac94e29147c0f3d11499f655a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jun 2018 17:16:44 -0400 Subject: kill dentry_update_name_case() the last user is gone Spotted-by: Richard Weinberger Signed-off-by: Al Viro --- include/linux/dcache.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 66c6e17e61e5..cee70bf207fc 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -271,8 +271,6 @@ extern void d_rehash(struct dentry *); extern void d_add(struct dentry *, struct inode *); -extern void dentry_update_name_case(struct dentry *, const struct qstr *); - /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern void d_exchange(struct dentry *, struct dentry *); -- cgit From d0dd63a8aee1ef89f2e48e554b796b9f9e4fcadb Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 16 Jun 2018 22:11:42 -0700 Subject: time: Introduce struct __kernel_itimerspec struct itimerspec is not y2038-safe. Introduce a new struct __kernel_itimerspec based on the kernel internal y2038-safe struct itimerspec64. The definition of struct __kernel_itimerspec includes two struct __kernel_timespec. Since struct __kernel_timespec has the same representation in native and compat modes, so does struct __kernel_itimerspec. This helps have a common entry point for syscalls using struct __kernel_itimerspec. New y2038-safe syscalls will use this new type. Since most of the new syscalls are just an update to the native syscalls with the type update, place the new definition under CONFIG_64BIT_TIME. This helps architectures that do not support the above config to keep using the old definition of struct itimerspec. Also change the get/put_itimerspec64 to use struct__kernel_itimerspec. This will help 32 bit architectures to use the new syscalls when architectures select CONFIG_64BIT_TIME. Signed-off-by: Deepa Dinamani Signed-off-by: Thomas Gleixner Cc: arnd@arndb.de Cc: viro@zeniv.linux.org.uk Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: y2038@lists.linaro.org Link: https://lkml.kernel.org/r/20180617051144.29756-2-deepa.kernel@gmail.com --- include/linux/time.h | 4 ++-- include/linux/time64.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index aed74463592d..27d83fd2ae61 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -14,9 +14,9 @@ int get_timespec64(struct timespec64 *ts, int put_timespec64(const struct timespec64 *ts, struct __kernel_timespec __user *uts); int get_itimerspec64(struct itimerspec64 *it, - const struct itimerspec __user *uit); + const struct __kernel_itimerspec __user *uit); int put_itimerspec64(const struct itimerspec64 *it, - struct itimerspec __user *uit); + struct __kernel_itimerspec __user *uit); extern time64_t mktime64(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, diff --git a/include/linux/time64.h b/include/linux/time64.h index 0a7b2f79cec7..05634afba0db 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -12,6 +12,7 @@ typedef __u64 timeu64_t; */ #ifndef CONFIG_64BIT_TIME #define __kernel_timespec timespec +#define __kernel_itimerspec itimerspec #endif #include -- cgit From afef05cf238cfcecdc5ea9b06f31027b13ce6214 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 16 Jun 2018 22:11:43 -0700 Subject: time: Enable get/put_compat_itimerspec64 always This will aid in enabling the compat syscalls on 32-bit architectures later on. Also move compat_itimerspec and related defines to compat_time.h. The compat_time.h file will eventually be deleted. Signed-off-by: Deepa Dinamani Signed-off-by: Thomas Gleixner Cc: arnd@arndb.de Cc: viro@zeniv.linux.org.uk Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: y2038@lists.linaro.org Link: https://lkml.kernel.org/r/20180617051144.29756-3-deepa.kernel@gmail.com --- include/linux/compat.h | 9 --------- include/linux/compat_time.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index b1a5562b3215..18a13f2df14b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -109,11 +109,6 @@ typedef compat_ulong_t compat_aio_context_t; struct compat_sel_arg_struct; struct rusage; -struct compat_itimerspec { - struct compat_timespec it_interval; - struct compat_timespec it_value; -}; - struct compat_utimbuf { compat_time_t actime; compat_time_t modtime; @@ -294,10 +289,6 @@ extern int compat_get_timespec(struct timespec *, const void __user *); extern int compat_put_timespec(const struct timespec *, void __user *); extern int compat_get_timeval(struct timeval *, const void __user *); extern int compat_put_timeval(const struct timeval *, void __user *); -extern int get_compat_itimerspec64(struct itimerspec64 *its, - const struct compat_itimerspec __user *uits); -extern int put_compat_itimerspec64(const struct itimerspec64 *its, - struct compat_itimerspec __user *uits); struct compat_iovec { compat_uptr_t iov_base; diff --git a/include/linux/compat_time.h b/include/linux/compat_time.h index 31f2774f1994..e70bfd1d2c3f 100644 --- a/include/linux/compat_time.h +++ b/include/linux/compat_time.h @@ -17,7 +17,16 @@ struct compat_timeval { s32 tv_usec; }; +struct compat_itimerspec { + struct compat_timespec it_interval; + struct compat_timespec it_value; +}; + extern int compat_get_timespec64(struct timespec64 *, const void __user *); extern int compat_put_timespec64(const struct timespec64 *, void __user *); +extern int get_compat_itimerspec64(struct itimerspec64 *its, + const struct compat_itimerspec __user *uits); +extern int put_compat_itimerspec64(const struct itimerspec64 *its, + struct compat_itimerspec __user *uits); #endif /* _LINUX_COMPAT_TIME_H */ -- cgit From 6ff84735070276d72af716e21c3214ee20d60e70 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 16 Jun 2018 22:11:44 -0700 Subject: time: Change types to new y2038 safe __kernel_itimerspec timer_set/gettime and timerfd_set/get apis use struct itimerspec at the user interface layer. struct itimerspec is not y2038-safe. Change these interfaces to use y2038-safe struct __kernel_itimerspec instead. This will help define new syscalls when 32bit architectures select CONFIG_64BIT_TIME. Signed-off-by: Deepa Dinamani Signed-off-by: Thomas Gleixner Cc: arnd@arndb.de Cc: viro@zeniv.linux.org.uk Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: y2038@lists.linaro.org Link: https://lkml.kernel.org/r/20180617051144.29756-4-deepa.kernel@gmail.com --- include/linux/syscalls.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 73810808cdf2..38b9ec152024 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -501,9 +501,9 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, /* fs/timerfd.c */ asmlinkage long sys_timerfd_create(int clockid, int flags); asmlinkage long sys_timerfd_settime(int ufd, int flags, - const struct itimerspec __user *utmr, - struct itimerspec __user *otmr); -asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); + const struct __kernel_itimerspec __user *utmr, + struct __kernel_itimerspec __user *otmr); +asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr); /* fs/utimes.c */ asmlinkage long sys_utimensat(int dfd, const char __user *filename, @@ -568,10 +568,10 @@ asmlinkage long sys_timer_create(clockid_t which_clock, struct sigevent __user *timer_event_spec, timer_t __user * created_timer_id); asmlinkage long sys_timer_gettime(timer_t timer_id, - struct itimerspec __user *setting); + struct __kernel_itimerspec __user *setting); asmlinkage long sys_timer_getoverrun(timer_t timer_id); asmlinkage long sys_timer_settime(timer_t timer_id, int flags, - const struct itimerspec __user *new_setting, + const struct __kernel_itimerspec __user *new_setting, struct itimerspec __user *old_setting); asmlinkage long sys_timer_delete(timer_t timer_id); asmlinkage long sys_clock_settime(clockid_t which_clock, -- cgit From faef87723ace12e5f685437c1e68cbecb69a7a89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Jun 2018 13:08:51 +0200 Subject: dma-noncoherent: add a arch_sync_dma_for_cpu_all hook The MIPS bmips platform needs a global flush when transferring ownership back to the CPU. Add a hook for that to the dma-noncoherent implementation. Signed-off-by: Christoph Hellwig Patchwork: https://patchwork.linux-mips.org/patch/19549/ Signed-off-by: Paul Burton Cc: Florian Fainelli Cc: David Daney Cc: Kevin Cernekee Cc: Jiaxun Yang Cc: Tom Bogendoerfer Cc: Huacai Chen Cc: iommu@lists.linux-foundation.org Cc: linux-mips@linux-mips.org --- include/linux/dma-noncoherent.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 10b2654d549b..a0aa00cc909d 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -44,4 +44,12 @@ static inline void arch_sync_dma_for_cpu(struct device *dev, } #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL +void arch_sync_dma_for_cpu_all(struct device *dev); +#else +static inline void arch_sync_dma_for_cpu_all(struct device *dev) +{ +} +#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ + #endif /* _LINUX_DMA_NONCOHERENT_H */ -- cgit From fd55a281adf6c9b9723d369203fdd92e5ea62a09 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Apr 2018 10:06:03 -0700 Subject: srcu: Introduce srcu_read_{un,}lock_notrace() Joel Fernandes is using SRCU to protect from-idle tracepoints, which requires notrace variants of srcu_read_lock() and srcu_read_unlock() in order to avoid problems with tracepoints in lockdep. This commit therefore adds srcu_read_lock_notrace() and srcu_read_unlock_notrace(). [1] http://lkml.kernel.org/r/20180427042656.190746-1-joelaf@google.com Reported-by: Joel Fernandes Intermittently-reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 91494d7e8e41..3e72a291c401 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) return retval; } +/* Used by tracing, cannot be traced and cannot invoke lockdep. */ +static inline notrace int +srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp) +{ + int retval; + + retval = __srcu_read_lock(sp); + return retval; +} + /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. * @sp: srcu_struct in which to unregister the old reader. @@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __srcu_read_unlock(sp, idx); } +/* Used by tracing, cannot be traced and cannot call lockdep. */ +static inline notrace void +srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp) +{ + __srcu_read_unlock(sp, idx); +} + /** * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock * -- cgit From 90127d605f403d814f4986436871210bf8ceb335 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2018 10:29:18 -0700 Subject: torture: Make online/offline messages appear only for verbose=2 Some bugs reproduce quickly only at high CPU-hotplug rates, so the rcutorture TREE03 scenario now has only 200 milliseconds spacing between CPU-hotplug operations. At this rate, the torture-test pair of console messages per operation becomes a bit voluminous. This commit therefore converts the torture-test set of "verbose" kernel-boot arguments from bool to int, and prints the extra console messages only when verbose=2. The default is still verbose=1. Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 66272862070b..a55e80817dae 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -79,7 +79,7 @@ void stutter_wait(const char *title); int torture_stutter_init(int s); /* Initialization and cleanup. */ -bool torture_init_begin(char *ttype, bool v); +bool torture_init_begin(char *ttype, int v); void torture_init_end(void); bool torture_cleanup_begin(void); void torture_cleanup_end(void); -- cgit From d4546c2509b1e9cd082e3682dcec98472e37ee5a Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 24 Jun 2018 14:13:49 +0900 Subject: net: Convert GRO SKB handling to list_head. Manage pending per-NAPI GRO packets via list_head. Return an SKB pointer from the GRO receive handlers. When GRO receive handlers return non-NULL, it means that this SKB needs to be completed at this time and removed from the NAPI queue. Several operations are greatly simplified by this transformation, especially timing out the oldest SKB in the list when gro_count exceeds MAX_GRO_SKBS, and napi_gro_flush() which walks the queue in reverse order. Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 3 +-- include/linux/netdevice.h | 32 ++++++++++++++++---------------- include/linux/skbuff.h | 3 ++- include/linux/udp.h | 4 ++-- 4 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 79563840c295..572e11bb8696 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -59,8 +59,7 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, unsigned int rxqs); #define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1) -struct sk_buff **eth_gro_receive(struct sk_buff **head, - struct sk_buff *skb); +struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb); int eth_gro_complete(struct sk_buff *skb, int nhoff); /* Reserved Ethernet Addresses per IEEE 802.1Q */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ec9850c7936..f176d9873910 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -322,7 +322,7 @@ struct napi_struct { int poll_owner; #endif struct net_device *dev; - struct sk_buff *gro_list; + struct list_head gro_list; struct sk_buff *skb; struct hrtimer timer; struct list_head dev_list; @@ -2255,10 +2255,10 @@ static inline int gro_recursion_inc_test(struct sk_buff *skb) return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; } -typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *); -static inline struct sk_buff **call_gro_receive(gro_receive_t cb, - struct sk_buff **head, - struct sk_buff *skb) +typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); +static inline struct sk_buff *call_gro_receive(gro_receive_t cb, + struct list_head *head, + struct sk_buff *skb) { if (unlikely(gro_recursion_inc_test(skb))) { NAPI_GRO_CB(skb)->flush |= 1; @@ -2268,12 +2268,12 @@ static inline struct sk_buff **call_gro_receive(gro_receive_t cb, return cb(head, skb); } -typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **, - struct sk_buff *); -static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb, - struct sock *sk, - struct sk_buff **head, - struct sk_buff *skb) +typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, + struct sk_buff *); +static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct list_head *head, + struct sk_buff *skb) { if (unlikely(gro_recursion_inc_test(skb))) { NAPI_GRO_CB(skb)->flush |= 1; @@ -2299,8 +2299,8 @@ struct packet_type { struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb); + struct sk_buff *(*gro_receive)(struct list_head *head, + struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb, int nhoff); }; @@ -2568,7 +2568,7 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); -int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); +int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) { @@ -2784,13 +2784,13 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, } #ifdef CONFIG_XFRM_OFFLOAD -static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) { if (PTR_ERR(pp) != -EINPROGRESS) NAPI_GRO_CB(skb)->flush |= flush; } #else -static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) { NAPI_GRO_CB(skb)->flush |= flush; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c86885954994..7ccc601b55d9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -677,7 +677,8 @@ struct sk_buff { int ip_defrag_offset; }; }; - struct rb_node rbnode; /* used in netem & tcp stack */ + struct rb_node rbnode; /* used in netem & tcp stack */ + struct list_head list; }; struct sock *sk; diff --git a/include/linux/udp.h b/include/linux/udp.h index ca840345571b..320d49d85484 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -74,8 +74,8 @@ struct udp_sock { void (*encap_destroy)(struct sock *sk); /* GRO functions for UDP socket */ - struct sk_buff ** (*gro_receive)(struct sock *sk, - struct sk_buff **head, + struct sk_buff * (*gro_receive)(struct sock *sk, + struct list_head *head, struct sk_buff *skb); int (*gro_complete)(struct sock *sk, struct sk_buff *skb, -- cgit From 07d78363dcffd9cb1bf6f06a6cac0e0847f3c1de Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 24 Jun 2018 14:14:02 +0900 Subject: net: Convert NAPI gro list into a small hash table. Improve the performance of GRO receive by splitting flows into multiple hash chains. Suggested-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f176d9873910..c6b377a15869 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -305,6 +305,7 @@ int __init netdev_boot_setup(char *str); /* * Structure for NAPI scheduling similar to tasklet but with weighting */ +#define GRO_HASH_BUCKETS 8 struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means @@ -322,7 +323,7 @@ struct napi_struct { int poll_owner; #endif struct net_device *dev; - struct list_head gro_list; + struct list_head gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; struct hrtimer timer; struct list_head dev_list; -- cgit From a8f62d0c6fe533e07cd1acce7588278f9d6e7720 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 25 Jun 2018 20:37:08 +0800 Subject: ptp: support DPAA FMan 1588 timer in ptp_qoriq This patch is to support DPAA (Data Path Acceleration Architecture) 1588 timer by adding "fsl,fman-ptp-timer" compatible, sharing interrupt with FMan, adding FSL_DPAA_ETH dependency, and fixing up register offset. Signed-off-by: Yangbo Lu Acked-by: Richard Cochran Acked-by: Madalin Bucur Signed-off-by: David S. Miller --- include/linux/fsl/ptp_qoriq.h | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index b462d9ea8007..dc3dac40f069 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -11,9 +11,8 @@ /* * qoriq ptp registers - * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010 */ -struct qoriq_ptp_registers { +struct ctrl_regs { u32 tmr_ctrl; /* Timer control register */ u32 tmr_tevent; /* Timestamp event register */ u32 tmr_temask; /* Timer event mask register */ @@ -28,22 +27,47 @@ struct qoriq_ptp_registers { u8 res1[4]; u32 tmroff_h; /* Timer offset high */ u32 tmroff_l; /* Timer offset low */ - u8 res2[8]; +}; + +struct alarm_regs { u32 tmr_alarm1_h; /* Timer alarm 1 high register */ u32 tmr_alarm1_l; /* Timer alarm 1 high register */ u32 tmr_alarm2_h; /* Timer alarm 2 high register */ u32 tmr_alarm2_l; /* Timer alarm 2 high register */ - u8 res3[48]; +}; + +struct fiper_regs { u32 tmr_fiper1; /* Timer fixed period interval */ u32 tmr_fiper2; /* Timer fixed period interval */ u32 tmr_fiper3; /* Timer fixed period interval */ - u8 res4[20]; +}; + +struct etts_regs { u32 tmr_etts1_h; /* Timestamp of general purpose external trigger */ u32 tmr_etts1_l; /* Timestamp of general purpose external trigger */ u32 tmr_etts2_h; /* Timestamp of general purpose external trigger */ u32 tmr_etts2_l; /* Timestamp of general purpose external trigger */ }; +struct qoriq_ptp_registers { + struct ctrl_regs __iomem *ctrl_regs; + struct alarm_regs __iomem *alarm_regs; + struct fiper_regs __iomem *fiper_regs; + struct etts_regs __iomem *etts_regs; +}; + +/* Offset definitions for the four register groups */ +#define CTRL_REGS_OFFSET 0x0 +#define ALARM_REGS_OFFSET 0x40 +#define FIPER_REGS_OFFSET 0x80 +#define ETTS_REGS_OFFSET 0xa0 + +#define FMAN_CTRL_REGS_OFFSET 0x80 +#define FMAN_ALARM_REGS_OFFSET 0xb8 +#define FMAN_FIPER_REGS_OFFSET 0xd0 +#define FMAN_ETTS_REGS_OFFSET 0xe0 + + /* Bit definitions for the TMR_CTRL register */ #define ALM1P (1<<31) /* Alarm1 output polarity */ #define ALM2P (1<<30) /* Alarm2 output polarity */ @@ -105,10 +129,10 @@ struct qoriq_ptp_registers { #define DRIVER "ptp_qoriq" #define DEFAULT_CKSEL 1 #define N_EXT_TS 2 -#define REG_SIZE sizeof(struct qoriq_ptp_registers) struct qoriq_ptp { - struct qoriq_ptp_registers __iomem *regs; + void __iomem *base; + struct qoriq_ptp_registers regs; spinlock_t lock; /* protects regs */ struct ptp_clock *clock; struct ptp_clock_info caps; -- cgit From b03799b0cb35dbc39e89602b1203863e2a6e06bf Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Jun 2018 16:49:06 -0500 Subject: PCI: shpchp: Separate existence of SHPC and permission to use it The shpchp driver registers for all PCI bridge devices. Its probe method should fail if either (1) the bridge doesn't have an SHPC or (2) the OS isn't allowed to use it (the platform firmware may be operating the SHPC itself). Separate these two tests into: - A new shpc_capable() that looks for the SHPC hardware and is applicable on all systems (ACPI and non-ACPI), and - A simplified acpi_get_hp_hw_control_from_firmware() that we call only when we already know an SHPC exists and there may be ACPI methods to either request permission to use it (_OSC) or transfer control to the OS (OSHP). acpi_get_hp_hw_control_from_firmware() is implemented when CONFIG_ACPI=y, but does nothing if the current platform doesn't support ACPI. Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 340029b2fb38..f776a1cce120 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -388,6 +388,7 @@ struct pci_dev { unsigned int is_virtfn:1; unsigned int reset_fn:1; unsigned int is_hotplug_bridge:1; + unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ unsigned int __aer_firmware_first_valid:1; unsigned int __aer_firmware_first:1; -- cgit From 22eceb8bf3e8f1f9b2f566062d06b25807725d7f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 19 Jun 2018 13:57:26 +0200 Subject: printk: Make CONSOLE_LOGLEVEL_QUIET configurable The goal of passing the "quiet" option to the kernel is for the kernel to be quiet unless something really is wrong. Sofar passing quiet has been (mostly) equivalent to passing loglevel=4 on the kernel commandline. Which means to show any messages with a level of KERN_ERR or higher severity on the console. In practice this often does not result in a quiet boot though, since there are many false-positive or otherwise harmless error messages printed, defeating the purpose of the quiet option. Esp. the ACPICA code is really bad wrt this, but there are plenty of others too. This commit makes CONSOLE_LOGLEVEL_QUIET configurable. This for example will allow distros which want quiet to really mean quiet to set CONSOLE_LOGLEVEL_QUIET so that only messages with a higher severity then KERN_ERR (CRIT, ALERT, EMERG) get printed, avoiding an endless game of whack-a-mole silencing harmless error messages. Link: http://lkml.kernel.org/r/20180619115726.3098-1-hdegoede@redhat.com To: Petr Mladek To: Sergey Senozhatsky Cc: Hans de Goede Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Signed-off-by: Hans de Goede Acked-by: Steven Rostedt (VMware) Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 6d7e800affd8..18602bb3eca8 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -50,15 +50,15 @@ static inline const char *printk_skip_headers(const char *buffer) /* We show everything that is MORE important than this.. */ #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ -#define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */ #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ /* - * Default used to be hard-coded at 7, we're now allowing it to be set from - * kernel config. + * Default used to be hard-coded at 7, quiet used to be hardcoded at 4, + * we're now allowing both to be set from kernel config. */ #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT +#define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET extern int console_printk[]; -- cgit From e7d4a95da86e0b048702765bbdcdc968aaf312e7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Jun 2018 08:58:28 +0200 Subject: bitfield: fix *_encode_bits() There's a bug in *_encode_bits() in using ~field_multiplier() for the check whether or not the constant value fits into the field, this is wrong and clearly ~field_mask() was intended. This was triggering for me for both constant and non-constant values. Additionally, make this case actually into an compile error. Declaring the extern function that will never exist with just a warning is pointless as then later we'll just get a link error. While at it, also fix the indentation in those lines I'm touching. Finally, as suggested by Andy Shevchenko, add some tests and for that introduce also u8 helpers. The tests don't compile without the fix, showing that it's necessary. Fixes: 00b0c9b82663 ("Add primitives for manipulating bitfields both in host- and fixed-endian.") Reviewed-by: Andy Shevchenko Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- include/linux/bitfield.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index cf2588d81148..147a7bb341dd 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -104,7 +104,7 @@ (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ }) -extern void __compiletime_warning("value doesn't fit into mask") +extern void __compiletime_error("value doesn't fit into mask") __field_overflow(void); extern void __compiletime_error("bad bitfield mask") __bad_mask(void); @@ -121,8 +121,8 @@ static __always_inline u64 field_mask(u64 field) #define ____MAKE_OP(type,base,to,from) \ static __always_inline __##type type##_encode_bits(base v, base field) \ { \ - if (__builtin_constant_p(v) && (v & ~field_multiplier(field))) \ - __field_overflow(); \ + if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ + __field_overflow(); \ return to((v & field_mask(field)) * field_multiplier(field)); \ } \ static __always_inline __##type type##_replace_bits(__##type old, \ -- cgit From 37a3862e1238262e9866d5d66e5f5f9069cee3a1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Jun 2018 08:58:29 +0200 Subject: bitfield: add u8 helpers There's no reason why we shouldn't pack/unpack bits into/from u8 values/registers/etc., so add u8 helpers. Use the ____MAKE_OP() macro directly to avoid having nonsense le8_encode_bits() and similar functions. Reviewed-by: Andy Shevchenko Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- include/linux/bitfield.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 147a7bb341dd..65a6981eef7b 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -143,6 +143,7 @@ static __always_inline base type##_get_bits(__##type v, base field) \ ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \ ____MAKE_OP(u##size,u##size,,) +____MAKE_OP(u8,u8,,) __MAKE_OP(16) __MAKE_OP(32) __MAKE_OP(64) -- cgit From 83d83bebf40132e2d55ec58af666713cc76f9764 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Jun 2018 15:20:30 +0200 Subject: console/fbcon: Add support for deferred console takeover Currently fbcon claims fbdevs as soon as they are registered and takes over the console as soon as the first fbdev gets registered. This behavior is undesirable in cases where a smooth graphical bootup is desired, in such cases we typically want the contents of the framebuffer (typically a vendor logo) to stay in place as is. The current solution for this problem (on embedded systems) is to not enable fbcon. This commit adds a new FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER config option, which when enabled defers fbcon taking over the console from the dummy console until the first text is displayed on the console. Together with the "quiet" kernel commandline option, this allows fbcon to still be used together with a smooth graphical bootup, having it take over the console as soon as e.g. an error message is logged. Note the choice to detect the first console output in the dummycon driver, rather then handling this entirely inside the fbcon code, was made after 2 failed attempts to handle this entirely inside the fbcon code. The fbcon code is woven quite tightly into the console code, making this to only feasible option. Reviewed-by: Daniel Vetter Signed-off-by: Hans de Goede Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/console.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index dfd6b0e97855..f59f3dbca65c 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -21,6 +21,7 @@ struct console_font_op; struct console_font; struct module; struct tty_struct; +struct notifier_block; /* * this is what the terminal answers to a ESC-Z or csi0c query. @@ -220,4 +221,8 @@ static inline bool vgacon_text_force(void) { return false; } extern void console_init(void); +/* For deferred console takeover */ +void dummycon_register_output_notifier(struct notifier_block *nb); +void dummycon_unregister_output_notifier(struct notifier_block *nb); + #endif /* _LINUX_CONSOLE_H */ -- cgit From 4b09791ba059cc5a5ec7d69049f5d05da65b6418 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnáček Date: Tue, 26 Jun 2018 13:04:42 +0200 Subject: cred: conditionally declare groups-related functions The groups-related functions declared in include/linux/cred.h are defined in kernel/groups.c, which is compiled only when CONFIG_MULTIUSER=y. Move all these function declarations under #ifdef CONFIG_MULTIUSER to help avoid accidental usage in contexts where CONFIG_MULTIUSER might be disabled. This patch also adds a fallback for groups_search(). Currently this function is only called from kernel/groups.c itself and security/keys/permissions.c, where the call is (by coincidence) optimized away in case CONFIG_MULTIUSER=n. However, the audit subsystem (which does not depend on CONFIG_MULTIUSER) calls this function in -next, so the fallback will be needed to avoid compilation errors or ugly workarounds. See also: https://lkml.org/lkml/2018/6/20/670 https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git/commit/?h=next&id=af85d1772e31fed34165a1b3decef340cf4080c0 Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- include/linux/cred.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 631286535d0f..7eed6101c791 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -65,6 +65,12 @@ extern void groups_free(struct group_info *); extern int in_group_p(kgid_t); extern int in_egroup_p(kgid_t); +extern int groups_search(const struct group_info *, kgid_t); + +extern int set_current_groups(struct group_info *); +extern void set_groups(struct cred *, struct group_info *); +extern bool may_setgroups(void); +extern void groups_sort(struct group_info *); #else static inline void groups_free(struct group_info *group_info) { @@ -78,12 +84,11 @@ static inline int in_egroup_p(kgid_t grp) { return 1; } +static inline int groups_search(const struct group_info *group_info, kgid_t grp) +{ + return 1; +} #endif -extern int set_current_groups(struct group_info *); -extern void set_groups(struct cred *, struct group_info *); -extern int groups_search(const struct group_info *, kgid_t); -extern bool may_setgroups(void); -extern void groups_sort(struct group_info *); /* * The security context of a task -- cgit From cfdfc14e7fb8ae77290e9d5afaeecc0a234a3846 Mon Sep 17 00:00:00 2001 From: Doug Meyer Date: Wed, 23 May 2018 13:18:05 -0700 Subject: switchtec: Use generic PCI Vendor ID and Class Code Move the Microsemi Switchtec PCI Vendor ID (same as PCI_VENDOR_ID_PMC_Sierra) to pci_ids.h. Also, replace Microsemi class constants with the standard PCI definitions. Signed-off-by: Doug Meyer [bhelgaas: restore SPDX (I assume it was removed by mistake), remove device ID definitions] Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- include/linux/pci_ids.h | 1 + include/linux/switchtec.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 29502238e510..80aec5b9a6c1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1668,6 +1668,7 @@ #define PCI_DEVICE_ID_COMPEX_ENET100VG4 0x0112 #define PCI_VENDOR_ID_PMC_Sierra 0x11f8 +#define PCI_VENDOR_ID_MICROSEMI 0x11f8 #define PCI_VENDOR_ID_RP 0x11fe #define PCI_DEVICE_ID_RP32INTF 0x0001 diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index ec93e93371fa..ab400af6f0ce 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -19,10 +19,6 @@ #include #include -#define MICROSEMI_VENDOR_ID 0x11f8 -#define MICROSEMI_NTB_CLASSCODE 0x068000 -#define MICROSEMI_MGMT_CLASSCODE 0x058000 - #define SWITCHTEC_MRPC_PAYLOAD_SIZE 1024 #define SWITCHTEC_MAX_PFF_CSR 48 -- cgit From 783e84961b1d7a75045383586b8c49e02b7704cd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 29 Jun 2018 21:17:26 -0500 Subject: PCI: Make pci_get_rom_size() static pci_get_rom_size() is called only from pci_map_rom(), so it can be static. Make it static and remove the declaration from include/linux/pci.h. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 340029b2fb38..f40d3e05ccd1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1122,7 +1122,6 @@ int pci_enable_rom(struct pci_dev *pdev); void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); -size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size); /* Power management related routines */ -- cgit From 7ce3f912ae0a79e5d738a3ae1f158b281973e849 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sat, 30 Jun 2018 11:24:24 -0400 Subject: PCI: Enable PASID only if entire path supports End-End TLP prefixes A PCIe endpoint carries the process address space identifier (PASID) in the TLP prefix as part of the memory read/write transaction. The address information in the TLP is relevant only for a given PASID context. An IOMMU takes PASID value and the address information from the TLP to look up the physical address in the system. PASID is an End-End TLP Prefix (PCIe r4.0, sec 6.20). Sec 2.2.10.2 says It is an error to receive a TLP with an End-End TLP Prefix by a Receiver that does not support End-End TLP Prefixes. A TLP in violation of this rule is handled as a Malformed TLP. This is a reported error associated with the Receiving Port (see Section 6.2). Prevent error condition by proactively requiring End-End TLP prefix to be supported on the entire data path between the endpoint and the root port before enabling PASID. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 340029b2fb38..6ba818449095 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -350,6 +350,7 @@ struct pci_dev { unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ #endif + unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ pci_channel_state_t error_state; /* Current connectivity state */ struct device dev; /* Generic device interface */ -- cgit From 80d19669ecd34423e85ca04f2210b0e42a47cb16 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 29 Jun 2018 21:26:41 -0700 Subject: net: Refactor XPS for CPUs and Rx queues Refactor XPS code to support Tx queue selection based on CPU(s) map or Rx queue(s) map. Signed-off-by: Amritha Nambiar Signed-off-by: David S. Miller --- include/linux/cpumask.h | 11 ++++-- include/linux/netdevice.h | 98 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index bf53d893ad02..57f20a0a7794 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask; #define cpu_active(cpu) ((cpu) == 0) #endif -/* verify cpu argument to cpumask_* operators */ -static inline unsigned int cpumask_check(unsigned int cpu) +static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) { #ifdef CONFIG_DEBUG_PER_CPU_MAPS - WARN_ON_ONCE(cpu >= nr_cpumask_bits); + WARN_ON_ONCE(cpu >= bits); #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ +} + +/* verify cpu argument to cpumask_* operators */ +static inline unsigned int cpumask_check(unsigned int cpu) +{ + cpu_max_bits_warn(cpu, nr_cpumask_bits); return cpu; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c6b377a15869..8bf8d6149f79 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -731,10 +731,15 @@ struct xps_map { */ struct xps_dev_maps { struct rcu_head rcu; - struct xps_map __rcu *cpu_map[0]; + struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */ }; -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ + +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) + +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\ + (_rxqs * (_tcs) * sizeof(struct xps_map *))) + #endif /* CONFIG_XPS */ #define TC_MAX_QUEUE 16 @@ -1910,7 +1915,8 @@ struct net_device { int watchdog_timeo; #ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_maps; + struct xps_dev_maps __rcu *xps_cpus_map; + struct xps_dev_maps __rcu *xps_rxqs_map; #endif #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; @@ -3259,6 +3265,92 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) #ifdef CONFIG_XPS int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index); +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, + u16 index, bool is_rxqs_map); + +/** + * netif_attr_test_mask - Test a CPU or Rx queue set in a mask + * @j: CPU/Rx queue index + * @mask: bitmask of all cpus/rx queues + * @nr_bits: number of bits in the bitmask + * + * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues. + */ +static inline bool netif_attr_test_mask(unsigned long j, + const unsigned long *mask, + unsigned int nr_bits) +{ + cpu_max_bits_warn(j, nr_bits); + return test_bit(j, mask); +} + +/** + * netif_attr_test_online - Test for online CPU/Rx queue + * @j: CPU/Rx queue index + * @online_mask: bitmask for CPUs/Rx queues that are online + * @nr_bits: number of bits in the bitmask + * + * Returns true if a CPU/Rx queue is online. + */ +static inline bool netif_attr_test_online(unsigned long j, + const unsigned long *online_mask, + unsigned int nr_bits) +{ + cpu_max_bits_warn(j, nr_bits); + + if (online_mask) + return test_bit(j, online_mask); + + return (j < nr_bits); +} + +/** + * netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask + * @n: CPU/Rx queue index + * @srcp: the cpumask/Rx queue mask pointer + * @nr_bits: number of bits in the bitmask + * + * Returns >= nr_bits if no further CPUs/Rx queues set. + */ +static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, + unsigned int nr_bits) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpu_max_bits_warn(n, nr_bits); + + if (srcp) + return find_next_bit(srcp, nr_bits, n + 1); + + return n + 1; +} + +/** + * netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p + * @n: CPU/Rx queue index + * @src1p: the first CPUs/Rx queues mask pointer + * @src2p: the second CPUs/Rx queues mask pointer + * @nr_bits: number of bits in the bitmask + * + * Returns >= nr_bits if no further CPUs/Rx queues set in both. + */ +static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, + const unsigned long *src2p, + unsigned int nr_bits) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpu_max_bits_warn(n, nr_bits); + + if (src1p && src2p) + return find_next_and_bit(src1p, src2p, nr_bits, n + 1); + else if (src1p) + return find_next_bit(src1p, nr_bits, n + 1); + else if (src2p) + return find_next_bit(src2p, nr_bits, n + 1); + + return n + 1; +} #else static inline int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, -- cgit From f308d7353d1f5c3ddedc784a367edc72fc51bbaa Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 24 Jun 2018 23:27:22 +0200 Subject: mtd: rawnand: add Reed-Solomon error correction algorithm Add Reed-Solomon (RS) to the enumeration of ECC algorithms. Signed-off-by: Stefan Agner Reviewed-by: Boris Brezillon Acked-by: Rob Herring Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 3e8ec3b8a39c..2d9cb7acbc3d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -121,6 +121,7 @@ enum nand_ecc_algo { NAND_ECC_UNKNOWN, NAND_ECC_HAMMING, NAND_ECC_BCH, + NAND_ECC_RS, }; /* -- cgit From f922bd798bb94e0adcce26ddd811245443173268 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 24 Jun 2018 23:27:23 +0200 Subject: mtd: rawnand: add an option to specify NAND chip as a boot device Allow to define a NAND chip as a boot device. This can be helpful for the selection of the ECC algorithm and strength in case the boot ROM supports only a subset of controller provided options. Signed-off-by: Stefan Agner Reviewed-by: Boris Brezillon Acked-by: Rob Herring Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2d9cb7acbc3d..80aeeca03f36 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -219,6 +219,12 @@ enum nand_ecc_algo { */ #define NAND_WAIT_TCCS 0x00200000 +/* + * Whether the NAND chip is a boot medium. Drivers might use this information + * to select ECC algorithms supported by the boot ROM or similar restrictions. + */ +#define NAND_IS_BOOT_MEDIUM 0x00400000 + /* Options set by nand scan */ /* Nand scan has allocated controller struct */ #define NAND_CONTROLLER_ALLOC 0x80000000 -- cgit From 00ce4e039ad5bded462931606c3063ff691964b7 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 25 Jun 2018 10:44:44 +1200 Subject: mtd: rawnand: add manufacturer fixup for ONFI parameter page This is called after the ONFI parameter page checksum is verified and allows us to override the contents of the parameter page. Suggested-by: Boris Brezillon Signed-off-by: Chris Packham Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 80aeeca03f36..dc4538b58b84 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -785,11 +785,15 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * implementation) if any. * @cleanup: the ->init() function may have allocated resources, ->cleanup() * is here to let vendor specific code release those resources. + * @fixup_onfi_param_page: apply vendor specific fixups to the ONFI parameter + * page. This is called after the checksum is verified. */ struct nand_manufacturer_ops { void (*detect)(struct nand_chip *chip); int (*init)(struct nand_chip *chip); void (*cleanup)(struct nand_chip *chip); + void (*fixup_onfi_param_page)(struct nand_chip *chip, + struct nand_onfi_params *p); }; /** -- cgit From 872b71ff084ab125c68073d9e69acfd7095f2015 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 25 Jun 2018 10:44:45 +1200 Subject: mtd: rawnand: add defines for ONFI version bits Add defines for the ONFI version bits and use them in nand_flash_detect_onfi(). Signed-off-by: Chris Packham Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index dc4538b58b84..fbac4741da52 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -237,6 +237,17 @@ enum nand_ecc_algo { /* Keep gcc happy */ struct nand_chip; +/* ONFI version bits */ +#define ONFI_VERSION_1_0 BIT(1) +#define ONFI_VERSION_2_0 BIT(2) +#define ONFI_VERSION_2_1 BIT(3) +#define ONFI_VERSION_2_2 BIT(4) +#define ONFI_VERSION_2_3 BIT(5) +#define ONFI_VERSION_3_0 BIT(6) +#define ONFI_VERSION_3_1 BIT(7) +#define ONFI_VERSION_3_2 BIT(8) +#define ONFI_VERSION_4_0 BIT(9) + /* ONFI features */ #define ONFI_FEATURE_16_BIT_BUS (1 << 0) #define ONFI_FEATURE_EXT_PARAM_PAGE (1 << 7) -- cgit From 1b48b7202c43e41c9f05d39901567dbcad5dc307 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 3 May 2018 16:25:49 +0200 Subject: dma-fence: remove fill_driver_data callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Noticed while I was typing docs. Entirely unused. v2: Remove reference in @timeline_value_str too. While at it clarify why timeline_value_str has a fence parameter - we don't have an explicit timeline structure unfortunately. Cc: Eric Anholt Reviewed-by: Eric Anholt Reviewed-by: Christian König (v1) Cc: Christian König (v1) Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180503142603.28513-2-daniel.vetter@ffwll.ch --- include/linux/dma-fence.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index eb9b05aa5aea..111aefe1c956 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -217,17 +217,6 @@ struct dma_fence_ops { */ void (*release)(struct dma_fence *fence); - /** - * @fill_driver_data: - * - * Callback to fill in free-form debug info. - * - * Returns amount of bytes filled, or negative error on failure. - * - * This callback is optional. - */ - int (*fill_driver_data)(struct dma_fence *fence, void *data, int size); - /** * @fence_value_str: * @@ -242,8 +231,9 @@ struct dma_fence_ops { * @timeline_value_str: * * Fills in the current value of the timeline as a string, like the - * sequence number. This should match what @fill_driver_data prints for - * the most recently signalled fence (assuming no delayed signalling). + * sequence number. Note that the specific fence passed to this function + * should not matter, drivers should only use it to look up the + * corresponding timeline structures. */ void (*timeline_value_str)(struct dma_fence *fence, char *str, int size); -- cgit From c701317a3eb8c012364a8d468f20eabf6df1ad77 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 4 May 2018 16:10:34 +0200 Subject: dma-fence: Make ->enable_signaling optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many drivers have a trivial implementation for ->enable_signaling. Let's make it optional by assuming that signalling is already available when the callback isn't present. v2: Don't do the trick to set the ENABLE_SIGNAL_BIT unconditionally, it results in an expensive spinlock take for everyone. Instead just check if the callback is present. Suggested by Maarten. Also move misplaced kerneldoc hunk to the right patch. Cc: Maarten Lankhorst Reviewed-by: Christian König (v1) Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Sumit Semwal Cc: Gustavo Padovan Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/20180504141034.27727-1-daniel.vetter@ffwll.ch --- include/linux/dma-fence.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 111aefe1c956..c053d19e1e24 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -166,7 +166,8 @@ struct dma_fence_ops { * released when the fence is signalled (through e.g. the interrupt * handler). * - * This callback is mandatory. + * This callback is optional. If this callback is not present, then the + * driver must always have signaling enabled. */ bool (*enable_signaling)(struct dma_fence *fence); -- cgit From 78c9c4dfbf8c04883941445a195276bb4bb92c76 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Jun 2018 15:21:32 +0200 Subject: posix-timers: Sanitize overrun handling The posix timer overrun handling is broken because the forwarding functions can return a huge number of overruns which does not fit in an int. As a consequence timer_getoverrun(2) and siginfo::si_overrun can turn into random number generators. The k_clock::timer_forward() callbacks return a 64 bit value now. Make k_itimer::ti_overrun[_last] 64bit as well, so the kernel internal accounting is correct. 3Remove the temporary (int) casts. Add a helper function which clamps the overrun value returned to user space via timer_getoverrun(2) or siginfo::si_overrun limited to a positive value between 0 and INT_MAX. INT_MAX is an indicator for user space that the overrun value has been clamped. Reported-by: Team OWL337 Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Peter Zijlstra Cc: Michael Kerrisk Link: https://lkml.kernel.org/r/20180626132705.018623573@linutronix.de --- include/linux/posix-timers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index c85704fcdbd2..ee7e987ea1b4 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -95,8 +95,8 @@ struct k_itimer { clockid_t it_clock; timer_t it_id; int it_active; - int it_overrun; - int it_overrun_last; + s64 it_overrun; + s64 it_overrun_last; int it_requeue_pending; int it_sigev_notify; ktime_t it_interval; -- cgit From 05739375f1c0a1048fea8b9c4cb54d9e4a891938 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 29 Jun 2018 14:59:40 +0200 Subject: ASoC: pxa-ssp: remove .set_pll() and .set_clkdiv() callbacks The .set_pll() and .set_clkdiv() callbacks are considered legacy and should not be used anymore. In order to support PXA boards on DT platforms, remove them and let the code figure out the correct dividers and PLL base frequencies itself. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 03a7ca46735b..13b4244d44c1 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -171,6 +171,14 @@ #define SSACD_SCDB (1 << 3) /* SSPSYSCLK Divider Bypass */ #define SSACD_ACPS(x) ((x) << 4) /* Audio clock PLL select */ #define SSACD_ACDS(x) ((x) << 0) /* Audio clock divider select */ +#define SSACD_ACDS_1 (0) +#define SSACD_ACDS_2 (1) +#define SSACD_ACDS_4 (2) +#define SSACD_ACDS_8 (3) +#define SSACD_ACDS_16 (4) +#define SSACD_ACDS_32 (5) +#define SSACD_SCDB_4X (0) +#define SSACD_SCDB_1X (1) #define SSACD_SCDX8 (1 << 7) /* SYSCLK division ratio select */ /* LPSS SSP */ -- cgit From 88763a5cf80ca59a7c3bea32681ce8f697d9995f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 26 Jun 2018 12:53:29 +0200 Subject: powercap / idle_inject: Add an idle injection framework Initially, the cpu_cooling device for ARM was changed by adding a new policy inserting idle cycles. The intel_powerclamp driver does a similar action. Instead of implementing idle injections privately in the cpu_cooling device, move the idle injection code in a dedicated framework and give the opportunity to other frameworks to make use of it. The framework relies on the smpboot kthreads which handles via its main loop the common code for hotplugging and [un]parking. This code was previously tested with the cpu cooling device and went through several iterations. It results now in split code and API exported in the header file. It was tested with the cpu cooling device with success. Signed-off-by: Daniel Lezcano Reviewed-by: Viresh Kumar [ rjw: Rewrite of all comments ] Signed-off-by: Rafael J. Wysocki --- include/linux/idle_inject.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/idle_inject.h (limited to 'include/linux') diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h new file mode 100644 index 000000000000..bdc0293fb6cb --- /dev/null +++ b/include/linux/idle_inject.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Linaro Ltd + * + * Author: Daniel Lezcano + * + */ +#ifndef __IDLE_INJECT_H__ +#define __IDLE_INJECT_H__ + +/* private idle injection device structure */ +struct idle_inject_device; + +struct idle_inject_device *idle_inject_register(struct cpumask *cpumask); + +void idle_inject_unregister(struct idle_inject_device *ii_dev); + +int idle_inject_start(struct idle_inject_device *ii_dev); + +void idle_inject_stop(struct idle_inject_device *ii_dev); + +void idle_inject_set_duration(struct idle_inject_device *ii_dev, + unsigned int run_duration_ms, + unsigned int idle_duration_ms); + +void idle_inject_get_duration(struct idle_inject_device *ii_dev, + unsigned int *run_duration_ms, + unsigned int *idle_duration_ms); +#endif /* __IDLE_INJECT_H__ */ -- cgit From a7ca13826e478f9b201eb2f9f20de0b978a82ad9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 29 Jun 2018 14:11:19 +1000 Subject: gpio: aspeed: Add interfaces for co-processor to grab GPIOs On the Aspeed chip, the GPIOs can be under control of the ARM chip or of the ColdFire coprocessor. (There's a third command source, the LPC bus, which we don't use or support yet). The control of which master is allowed to modify a given GPIO is per-bank (8 GPIOs). Unfortunately, systems already exist for which we want to use GPIOs of both sources in the same bank. This provides an API exported by the gpio-aspeed driver that an aspeed coprocessor driver can use to "grab" some GPIOs for use by the coprocessor, and allow the coprocessor driver to provide callbacks for arbitrating access. Once at least one GPIO of a given bank has been "grabbed" by the coprocessor, the entire bank is marked as being under coprocessor control. It's command source is switched to the coprocessor. If the ARM then tries to write to a GPIO in such a marked bank, the provided callbacks are used to request access from the coprocessor driver, which is responsible to doing whatever is necessary to "pause" the coprocessor or prevent it from trying to use the GPIOs while the ARM is doing its accesses. During that time, the command source for the bank is temporarily switched back to the ARM. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Joel Stanley Reviewed-by: Andrew Jeffery Signed-off-by: Linus Walleij --- include/linux/gpio/aspeed.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/linux/gpio/aspeed.h (limited to 'include/linux') diff --git a/include/linux/gpio/aspeed.h b/include/linux/gpio/aspeed.h new file mode 100644 index 000000000000..1bfb3cdc86d0 --- /dev/null +++ b/include/linux/gpio/aspeed.h @@ -0,0 +1,15 @@ +#ifndef __GPIO_ASPEED_H +#define __GPIO_ASPEED_H + +struct aspeed_gpio_copro_ops { + int (*request_access)(void *data); + int (*release_access)(void *data); +}; + +int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc, + u16 *vreg_offset, u16 *dreg_offset, u8 *bit); +int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc); +int aspeed_gpio_copro_set_ops(const struct aspeed_gpio_copro_ops *ops, void *data); + + +#endif /* __GPIO_ASPEED_H */ -- cgit From 60a866685006bae0c3db20e4bae31887eb452ff1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 26 Jun 2018 06:49:08 -0300 Subject: gpio.h: fix location of gpio legacy documentation The location of this doc file was moved. Change its reference accordingly. Fixes: 7ee2c13080c9 ("Documentation: gpio: Move legacy documentation to driver-api") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- include/linux/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 91ed23468530..39745b8bdd65 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -14,7 +14,7 @@ #include -/* see Documentation/gpio/gpio-legacy.txt */ +/* see Documentation/driver-api/gpio/legacy.rst */ /* make these flag values available regardless of GPIO kconfig options */ #define GPIOF_DIR_OUT (0 << 0) -- cgit From 9cf57731b63e37ed995b46690adc604891a9a28f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jun 2018 10:52:03 +0200 Subject: watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work Oleg suggested to replace the "watchdog/%u" threads with cpu_stop_work. That removes one thread per CPU while at the same time fixes softlockup vs SCHED_DEADLINE. But more importantly, it does away with the single smpboot_update_cpumask_percpu_thread() user, which allows cleanups/shrinkage of the smpboot interface. Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpuhotplug.h | 1 + include/linux/nmi.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8796ba387152..4cf06a64bc02 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -164,6 +164,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, + CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b8d868d23e79..80664bbeca43 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -33,10 +33,15 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #define sysctl_hardlockup_all_cpu_backtrace 0 #endif /* !CONFIG_SMP */ +extern int lockup_detector_online_cpu(unsigned int cpu); +extern int lockup_detector_offline_cpu(unsigned int cpu); + #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } +#define lockup_detector_online_cpu NULL +#define lockup_detector_offline_cpu NULL #endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR -- cgit From 167a88677b05d6a810f23b871cfb2b5db1808e60 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jun 2018 10:53:01 +0200 Subject: smpboot: Remove cpumask from the API Now that the sole use of the whole smpboot_*cpumask() API is gone, remove it. Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/smpboot.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index c174844cf663..d0884b525001 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -25,8 +25,6 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) - * @cpumask: Internal state. To update which threads are unparked, - * call smpboot_update_cpumask_percpu_thread(). * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ @@ -40,23 +38,12 @@ struct smp_hotplug_thread { void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); - cpumask_var_t cpumask; bool selfparking; const char *thread_comm; }; -int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, - const struct cpumask *cpumask); - -static inline int -smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) -{ - return smpboot_register_percpu_thread_cpumask(plug_thread, - cpu_possible_mask); -} +int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); -void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *); #endif -- cgit From 55f036ca7e74b85e34958af3d22121c656796413 Mon Sep 17 00:00:00 2001 From: Peter Ziljstra Date: Fri, 15 Jun 2018 10:07:12 +0200 Subject: locking: WW mutex cleanup Make the WW mutex code more readable by adding comments, splitting up functions and pointing out that we're actually using the Wait-Die algorithm. Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Gustavo Padovan Cc: Maarten Lankhorst Cc: Sean Paul Cc: David Airlie Cc: Davidlohr Bueso Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: linux-doc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Co-authored-by: Thomas Hellstrom Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Hellstrom Acked-by: Ingo Molnar --- include/linux/ww_mutex.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 39fda195bf78..f82fce2229c8 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -6,7 +6,7 @@ * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar * - * Wound/wait implementation: + * Wait/Die implementation: * Copyright (C) 2013 Canonical Ltd. * * This file contains the main data structure and API definitions. @@ -28,9 +28,9 @@ struct ww_class { struct ww_acquire_ctx { struct task_struct *task; unsigned long stamp; - unsigned acquired; + unsigned int acquired; #ifdef CONFIG_DEBUG_MUTEXES - unsigned done_acquire; + unsigned int done_acquire; struct ww_class *ww_class; struct ww_mutex *contending_lock; #endif @@ -38,8 +38,8 @@ struct ww_acquire_ctx { struct lockdep_map dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH - unsigned deadlock_inject_interval; - unsigned deadlock_inject_countdown; + unsigned int deadlock_inject_interval; + unsigned int deadlock_inject_countdown; #endif }; @@ -102,7 +102,7 @@ static inline void ww_mutex_init(struct ww_mutex *lock, * * Context-based w/w mutex acquiring can be done in any order whatsoever within * a given lock class. Deadlocks will be detected and handled with the - * wait/wound logic. + * wait/die logic. * * Mixing of context-based w/w mutex acquiring and single w/w mutex locking can * result in undetected deadlocks and is so forbidden. Mixing different contexts @@ -195,13 +195,13 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) * Lock the w/w mutex exclusively for this task. * * Deadlocks within a given w/w class of locks are detected and handled with the - * wait/wound algorithm. If the lock isn't immediately avaiable this function + * wait/die algorithm. If the lock isn't immediately available this function * will either sleep until it is (wait case). Or it selects the current context - * for backing off by returning -EDEADLK (wound case). Trying to acquire the + * for backing off by returning -EDEADLK (die case). Trying to acquire the * same lock with the same context twice is also detected and signalled by * returning -EALREADY. Returns 0 if the mutex was successfully acquired. * - * In the wound case the caller must release all currently held w/w mutexes for + * In the die case the caller must release all currently held w/w mutexes for * the given context and then wait for this contending lock to be available by * calling ww_mutex_lock_slow. Alternatively callers can opt to not acquire this * lock and proceed with trying to acquire further w/w mutexes (e.g. when @@ -226,14 +226,14 @@ extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acq * Lock the w/w mutex exclusively for this task. * * Deadlocks within a given w/w class of locks are detected and handled with the - * wait/wound algorithm. If the lock isn't immediately avaiable this function + * wait/die algorithm. If the lock isn't immediately available this function * will either sleep until it is (wait case). Or it selects the current context - * for backing off by returning -EDEADLK (wound case). Trying to acquire the + * for backing off by returning -EDEADLK (die case). Trying to acquire the * same lock with the same context twice is also detected and signalled by * returning -EALREADY. Returns 0 if the mutex was successfully acquired. If a * signal arrives while waiting for the lock then this function returns -EINTR. * - * In the wound case the caller must release all currently held w/w mutexes for + * In the die case the caller must release all currently held w/w mutexes for * the given context and then wait for this contending lock to be available by * calling ww_mutex_lock_slow_interruptible. Alternatively callers can opt to * not acquire this lock and proceed with trying to acquire further w/w mutexes @@ -256,7 +256,7 @@ extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, * @lock: the mutex to be acquired * @ctx: w/w acquire context * - * Acquires a w/w mutex with the given context after a wound case. This function + * Acquires a w/w mutex with the given context after a die case. This function * will sleep until the lock becomes available. * * The caller must have released all w/w mutexes already acquired with the @@ -290,7 +290,7 @@ ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) * @lock: the mutex to be acquired * @ctx: w/w acquire context * - * Acquires a w/w mutex with the given context after a wound case. This function + * Acquires a w/w mutex with the given context after a die case. This function * will sleep until the lock becomes available and returns 0 when the lock has * been acquired. If a signal arrives while waiting for the lock then this * function returns -EINTR. -- cgit From 08295b3b5beec9aac0f7a9db86f0fc3792039da3 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 15 Jun 2018 10:17:38 +0200 Subject: locking: Implement an algorithm choice for Wound-Wait mutexes The current Wound-Wait mutex algorithm is actually not Wound-Wait but Wait-Die. Implement also Wound-Wait as a per-ww-class choice. Wound-Wait is, contrary to Wait-Die a preemptive algorithm and is known to generate fewer backoffs. Testing reveals that this is true if the number of simultaneous contending transactions is small. As the number of simultaneous contending threads increases, Wait-Wound becomes inferior to Wait-Die in terms of elapsed time. Possibly due to the larger number of held locks of sleeping transactions. Update documentation and callers. Timings using git://people.freedesktop.org/~thomash/ww_mutex_test tag patch-18-06-15 Each thread runs 100000 batches of lock / unlock 800 ww mutexes randomly chosen out of 100000. Four core Intel x86_64: Algorithm #threads Rollbacks time Wound-Wait 4 ~100 ~17s. Wait-Die 4 ~150000 ~19s. Wound-Wait 16 ~360000 ~109s. Wait-Die 16 ~450000 ~82s. Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Gustavo Padovan Cc: Maarten Lankhorst Cc: Sean Paul Cc: David Airlie Cc: Davidlohr Bueso Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: linux-doc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Co-authored-by: Peter Zijlstra Signed-off-by: Thomas Hellstrom Acked-by: Peter Zijlstra (Intel) Acked-by: Ingo Molnar --- include/linux/ww_mutex.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index f82fce2229c8..3af7c0e03be5 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -8,6 +8,8 @@ * * Wait/Die implementation: * Copyright (C) 2013 Canonical Ltd. + * Choice of algorithm: + * Copyright (C) 2018 WMWare Inc. * * This file contains the main data structure and API definitions. */ @@ -23,12 +25,15 @@ struct ww_class { struct lock_class_key mutex_key; const char *acquire_name; const char *mutex_name; + unsigned int is_wait_die; }; struct ww_acquire_ctx { struct task_struct *task; unsigned long stamp; unsigned int acquired; + unsigned short wounded; + unsigned short is_wait_die; #ifdef CONFIG_DEBUG_MUTEXES unsigned int done_acquire; struct ww_class *ww_class; @@ -58,17 +63,21 @@ struct ww_mutex { # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) #endif -#define __WW_CLASS_INITIALIZER(ww_class) \ +#define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die) \ { .stamp = ATOMIC_LONG_INIT(0) \ , .acquire_name = #ww_class "_acquire" \ - , .mutex_name = #ww_class "_mutex" } + , .mutex_name = #ww_class "_mutex" \ + , .is_wait_die = _is_wait_die } #define __WW_MUTEX_INITIALIZER(lockname, class) \ { .base = __MUTEX_INITIALIZER(lockname.base) \ __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } +#define DEFINE_WD_CLASS(classname) \ + struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1) + #define DEFINE_WW_CLASS(classname) \ - struct ww_class classname = __WW_CLASS_INITIALIZER(classname) + struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0) #define DEFINE_WW_MUTEX(mutexname, ww_class) \ struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class) @@ -123,6 +132,8 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, ctx->task = current; ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); ctx->acquired = 0; + ctx->wounded = false; + ctx->is_wait_die = ww_class->is_wait_die; #ifdef CONFIG_DEBUG_MUTEXES ctx->ww_class = ww_class; ctx->done_acquire = 0; -- cgit From 418cc6ca06071e7b7d75c0d525d80a6f49a763d6 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 3 May 2018 16:25:52 +0200 Subject: dma-fence: Make ->wait callback optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Almost everyone uses dma_fence_default_wait. v2: Also remove the BUG_ON(!ops->wait) (Chris). Reviewed-by: Christian König (v1) Signed-off-by: Daniel Vetter Cc: Chris Wilson Cc: Sumit Semwal Cc: Gustavo Padovan Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/20180503142603.28513-5-daniel.vetter@ffwll.ch --- include/linux/dma-fence.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index c053d19e1e24..02dba8cd033d 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -191,11 +191,14 @@ struct dma_fence_ops { /** * @wait: * - * Custom wait implementation, or dma_fence_default_wait. + * Custom wait implementation, defaults to dma_fence_default_wait() if + * not set. * - * Must not be NULL, set to dma_fence_default_wait for default implementation. - * the dma_fence_default_wait implementation should work for any fence, as long - * as enable_signaling works correctly. + * The dma_fence_default_wait implementation should work for any fence, as long + * as @enable_signaling works correctly. This hook allows drivers to + * have an optimized version for the case where a process context is + * already available, e.g. if @enable_signaling for the general case + * needs to set up a worker thread. * * Must return -ERESTARTSYS if the wait is intr = true and the wait was * interrupted, and remaining jiffies if fence has signaled, or 0 if wait @@ -203,7 +206,7 @@ struct dma_fence_ops { * which should be treated as if the fence is signaled. For example a hardware * lockup could be reported like that. * - * This callback is mandatory. + * This callback is optional. */ signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout); -- cgit From 8a9c58d28d0f66569737a3295116710ed24573cd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 2 Jul 2018 18:21:12 +0800 Subject: sctp: add support for dscp and flowlabel per transport Like some other per transport params, flowlabel and dscp are added in transport, asoc and sctp_sock. By default, transport sets its value from asoc's, and asoc does it from sctp_sock. flowlabel only works for ipv6 transport. Other than that they need to be passed down in sctp_xmit, flow4/6 also needs to set them before looking up route in get_dst. Note that it uses '& 0x100000' to check if flowlabel is set and '& 0x1' (tos 1st bit is unused) to check if dscp is set by users, so that they could be set to 0 by sockopt in next patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b36c76635f18..83d94341e003 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -801,4 +801,11 @@ struct sctp_strreset_resptsn { __be32 receivers_next_tsn; }; +enum { + SCTP_DSCP_SET_MASK = 0x1, + SCTP_DSCP_VAL_MASK = 0xfc, + SCTP_FLOWLABEL_SET_MASK = 0x100000, + SCTP_FLOWLABEL_VAL_MASK = 0xfffff +}; + #endif /* __LINUX_SCTP_H__ */ -- cgit From f6ad8c1bcdf014272d08c55b9469536952a0a771 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 2 Jul 2018 16:12:45 +0100 Subject: net: core: trivial netif_receive_skb_list() entry point Just calls netif_receive_skb() in a loop. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 64480a0f2c16..f67258f057ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3477,6 +3477,7 @@ int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); +void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); -- cgit From 4ce0017a373afaaa9ef17614d8fa4f6fde261d18 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 2 Jul 2018 16:13:40 +0100 Subject: net: core: another layer of lists, around PF_MEMALLOC skb handling First example of a layer splitting the list (rather than merely taking individual packets off it). Involves new list.h function, list_cut_before(), like list_cut_position() but cuts on the other side of the given entry. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/list.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 4b129df4d46b..de04cc5ed536 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -285,6 +285,36 @@ static inline void list_cut_position(struct list_head *list, __list_cut_position(list, head, entry); } +/** + * list_cut_before - cut a list into two, before given entry + * @list: a new list to add all removed entries + * @head: a list with entries + * @entry: an entry within head, could be the head itself + * + * This helper moves the initial part of @head, up to but + * excluding @entry, from @head to @list. You should pass + * in @entry an element you know is on @head. @list should + * be an empty list or a list you do not care about losing + * its data. + * If @entry == @head, all entries on @head are moved to + * @list. + */ +static inline void list_cut_before(struct list_head *list, + struct list_head *head, + struct list_head *entry) +{ + if (head->next == entry) { + INIT_LIST_HEAD(list); + return; + } + list->next = head->next; + list->next->prev = list; + list->prev = entry->prev; + list->prev->next = list; + head->next = entry; + entry->prev = head; +} + static inline void __list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next) -- cgit From 17266ee939849cb095ed7dd9edbec4162172226b Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 2 Jul 2018 16:14:12 +0100 Subject: net: ipv4: listified version of ip_rcv Also involved adding a way to run a netfilter hook over a list of packets. Rather than attempting to make netfilter know about lists (which would be a major project in itself) we just let it call the regular okfn (in this case ip_rcv_finish()) for any packets it steals, and have it give us back a list of packets it's synchronously accepted (which normally NF_HOOK would automatically call okfn() on, but we want to be able to potentially pass the list to a listified version of okfn().) The netfilter hooks themselves are indirect calls that still happen per- packet (see nf_hook_entry_hookfn()), but again, changing that can be left for future work. There is potential for out-of-order receives if the netfilter hook ends up synchronously stealing packets, as they will be processed before any accepts earlier in the list. However, it was already possible for an asynchronous accept to cause out-of-order receives, so presumably this is considered OK. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/netfilter.h | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f67258f057ca..c1ef749b6f9f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2297,6 +2297,9 @@ struct packet_type { struct net_device *, struct packet_type *, struct net_device *); + void (*list_func) (struct list_head *, + struct packet_type *, + struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); void *af_packet_priv; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index dd2052f0efb7..5a5e0a2ab2a3 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -288,6 +288,20 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct return ret; } +static inline void +NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, + struct list_head *head, struct net_device *in, struct net_device *out, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) +{ + struct sk_buff *skb, *next; + + list_for_each_entry_safe(skb, next, head, list) { + int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); + if (ret != 1) + list_del(&skb->list); + } +} + /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, unsigned int len); @@ -369,6 +383,14 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, return okfn(net, sk, skb); } +static inline void +NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, + struct list_head *head, struct net_device *in, struct net_device *out, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) +{ + /* nothing to do */ +} + static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, -- cgit From 25db26a91364db00f5a30da2fea8e9afe14a163c Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 3 Jul 2018 15:42:53 -0700 Subject: net/sched: Introduce the ETF Qdisc The ETF (Earliest TxTime First) qdisc uses the information added earlier in this series (the socket option SO_TXTIME and the new role of sk_buff->tstamp) to schedule packets transmission based on absolute time. For some workloads, just bandwidth enforcement is not enough, and precise control of the transmission of packets is necessary. Example: $ tc qdisc replace dev enp2s0 parent root handle 100 mqprio num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 0 $ tc qdisc add dev enp2s0 parent 100:1 etf delta 100000 \ clockid CLOCK_TAI In this example, the Qdisc will provide SW best-effort for the control of the transmission time to the network adapter, the time stamp in the socket will be in reference to the clockid CLOCK_TAI and packets will leave the qdisc "delta" (100000) nanoseconds before its transmission time. The ETF qdisc will buffer packets sorted by their txtime. It will drop packets on enqueue() if their skbuff clockid does not match the clock reference of the Qdisc. Moreover, on dequeue(), a packet will be dropped if it expires while being enqueued. The qdisc also supports the SO_TXTIME deadline mode. For this mode, it will dequeue a packet as soon as possible and change the skb timestamp to 'now' during etf_dequeue(). Note that both the qdisc's and the SO_TXTIME ABIs allow for a clockid to be configured, but it's been decided that usage of CLOCK_TAI should be enforced until we decide to allow for other clockids to be used. The rationale here is that PTP times are usually in the TAI scale, thus no other clocks should be necessary. For now, the qdisc will return EINVAL if any clocks other than CLOCK_TAI are used. Signed-off-by: Jesus Sanchez-Palencia Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c1ef749b6f9f..f06ee8f91e74 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -798,6 +798,7 @@ enum tc_setup_type { TC_SETUP_QDISC_RED, TC_SETUP_QDISC_PRIO, TC_SETUP_QDISC_MQ, + TC_SETUP_QDISC_ETF, }; /* These structures hold the attributes of bpf state that are being passed -- cgit From b5cb15d9372abc9adc4e844c0c1bf594ca6a7695 Mon Sep 17 00:00:00 2001 From: Chris von Recklinghausen Date: Tue, 3 Jul 2018 15:43:08 -0400 Subject: usercopy: Allow boot cmdline disabling of hardening Enabling HARDENED_USERCOPY may cause measurable regressions in networking performance: up to 8% under UDP flood. I ran a small packet UDP flood using pktgen vs. a host b2b connected. On the receiver side the UDP packets are processed by a simple user space process that just reads and drops them: https://github.com/netoptimizer/network-testing/blob/master/src/udp_sink.c Not very useful from a functional PoV, but it helps to pin-point bottlenecks in the networking stack. When running a kernel with CONFIG_HARDENED_USERCOPY=y, I see a 5-8% regression in the receive tput, compared to the same kernel without this option enabled. With CONFIG_HARDENED_USERCOPY=y, perf shows ~6% of CPU time spent cumulatively in __check_object_size (~4%) and __virt_addr_valid (~2%). The call-chain is: __GI___libc_recvfrom entry_SYSCALL_64_after_hwframe do_syscall_64 __x64_sys_recvfrom __sys_recvfrom inet_recvmsg udp_recvmsg __check_object_size udp_recvmsg() actually calls copy_to_iter() (inlined) and the latters calls check_copy_size() (again, inlined). A generic distro may want to enable HARDENED_USERCOPY in their default kernel config, but at the same time, such distro may want to be able to avoid the performance penalties in with the default configuration and disable the stricter check on a per-boot basis. This change adds a boot parameter that conditionally disables HARDENED_USERCOPY via "hardened_usercopy=off". Signed-off-by: Chris von Recklinghausen Signed-off-by: Kees Cook --- include/linux/jump_label.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b46b541c67c4..1a0b6f17a5d6 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -299,12 +299,18 @@ struct static_key_false { #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT +#define DEFINE_STATIC_KEY_TRUE_RO(name) \ + struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT + #define DECLARE_STATIC_KEY_TRUE(name) \ extern struct static_key_true name #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +#define DEFINE_STATIC_KEY_FALSE_RO(name) \ + struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT + #define DECLARE_STATIC_KEY_FALSE(name) \ extern struct static_key_false name -- cgit From 6312fe77751f57d4fa2b28abeef84c6a95c28136 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 5 Jul 2018 14:34:32 +0800 Subject: net: limit each hash list length to MAX_GRO_SKBS After commit 07d78363dcff ("net: Convert NAPI gro list into a small hash table.")' there is 8 hash buckets, which allows more flows to be held for merging. but MAX_GRO_SKBS, the total held skb for merging, is 8 skb still, limit the hash table performance. keep MAX_GRO_SKBS as 8 skb, but limit each hash list length to 8 skb, not the total 8 skb Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f06ee8f91e74..b683971e500d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -302,6 +302,11 @@ struct netdev_boot_setup { int __init netdev_boot_setup(char *str); +struct gro_list { + struct list_head list; + int count; +}; + /* * Structure for NAPI scheduling similar to tasklet but with weighting */ @@ -323,7 +328,7 @@ struct napi_struct { int poll_owner; #endif struct net_device *dev; - struct list_head gro_hash[GRO_HASH_BUCKETS]; + struct gro_list gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; struct hrtimer timer; struct list_head dev_list; -- cgit From 0380cf7dbaca75c524e34b30979f0806124fa8e6 Mon Sep 17 00:00:00 2001 From: pascal paillet Date: Thu, 5 Jul 2018 14:25:57 +0000 Subject: regulator: core: Change suspend_late to suspend Change suspend_late ops to suspend normal ops. The goal is to avoid requesting all the regulator drivers to be operational in suspend late phase. Signed-off-by: pascal paillet Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index dea96ee39fdc..0fd8fbb74763 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -220,7 +220,7 @@ struct regulator_ops { /* set regulator suspend operating mode (defined in consumer.h) */ int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode); - int (*resume_early)(struct regulator_dev *rdev); + int (*resume)(struct regulator_dev *rdev); int (*set_pull_down) (struct regulator_dev *); }; -- cgit From d8842211b6f63d3f069df973d137de0a85964965 Mon Sep 17 00:00:00 2001 From: pascal paillet Date: Thu, 5 Jul 2018 14:25:56 +0000 Subject: driver core: Add device_link_remove function Device_link_remove uses the same arguments than device_link_add. The Goal is to avoid storing the link pointer. Signed-off-by: pascal paillet Acked-by: Greg Kroah-Hartman Signed-off-by: Mark Brown --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 055a69dbcd18..9c1c3b1d5e11 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1316,6 +1316,7 @@ extern const char *dev_driver_string(const struct device *dev); struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags); void device_link_del(struct device_link *link); +void device_link_remove(void *consumer, struct device *supplier); #ifdef CONFIG_PRINTK -- cgit From 2282e125a406e09331c5a785e3df29035c99a607 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 2 Jul 2018 22:05:21 +0200 Subject: leds: triggers: let struct led_trigger::activate() return an error code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given that activating a trigger can fail, let the callback return an indication. This prevents to have a trigger active according to the "trigger" sysfs attribute but not functional. All users are changed accordingly to return 0 for now. There is no intended change in behaviour. Signed-off-by: Uwe Kleine-König Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index b7e82550e655..ba5baaaa43bf 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -253,7 +253,7 @@ static inline bool led_sysfs_is_disabled(struct led_classdev *led_cdev) struct led_trigger { /* Trigger Properties */ const char *name; - void (*activate)(struct led_classdev *led_cdev); + int (*activate)(struct led_classdev *led_cdev); void (*deactivate)(struct led_classdev *led_cdev); /* LEDs under control by this trigger (for simple triggers) */ @@ -288,8 +288,8 @@ extern void led_trigger_blink_oneshot(struct led_trigger *trigger, unsigned long *delay_off, int invert); extern void led_trigger_set_default(struct led_classdev *led_cdev); -extern void led_trigger_set(struct led_classdev *led_cdev, - struct led_trigger *trigger); +extern int led_trigger_set(struct led_classdev *led_cdev, + struct led_trigger *trigger); extern void led_trigger_remove(struct led_classdev *led_cdev); static inline void *led_get_trigger_data(struct led_classdev *led_cdev) @@ -334,8 +334,12 @@ static inline void led_trigger_blink_oneshot(struct led_trigger *trigger, unsigned long *delay_off, int invert) {} static inline void led_trigger_set_default(struct led_classdev *led_cdev) {} -static inline void led_trigger_set(struct led_classdev *led_cdev, - struct led_trigger *trigger) {} +static inline int led_trigger_set(struct led_classdev *led_cdev, + struct led_trigger *trigger) +{ + return 0; +} + static inline void led_trigger_remove(struct led_classdev *led_cdev) {} static inline void *led_get_trigger_data(struct led_classdev *led_cdev) { -- cgit From a7e7a3156300a7e1982b03cc9cb8fb0c86434c49 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 2 Jul 2018 22:05:22 +0200 Subject: leds: triggers: add device attribute support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As many triggers use device attributes, add support for these in led_trigger_set which allows simplifying the drivers accordingly. Signed-off-by: Uwe Kleine-König Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index ba5baaaa43bf..33484a5c7478 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -262,8 +262,19 @@ struct led_trigger { /* Link to next registered trigger */ struct list_head next_trig; + + const struct attribute_group **groups; }; +/* + * Currently the attributes in struct led_trigger::groups are added directly to + * the LED device. As this might change in the future, the following + * macros abstract getting the LED device and its trigger_data from the dev + * parameter passed to the attribute accessor functions. + */ +#define led_trigger_get_led(dev) ((struct led_classdev *)dev_get_drvdata((dev))) +#define led_trigger_get_drvdata(dev) (led_get_trigger_data(led_trigger_get_led(dev))) + ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr, -- cgit From a0b750768371e410d77b60bcf49c18bd45078d55 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 2 Jul 2018 22:05:24 +0200 Subject: leds: triggers: define module_led_trigger helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps to simplify modules that provide a simple led_trigger. It's inspired by module_platform_driver, module_i2c_driver et al. Signed-off-by: Uwe Kleine-König Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 33484a5c7478..a3ee10846a4b 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -326,6 +326,10 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) extern void led_trigger_rename_static(const char *name, struct led_trigger *trig); +#define module_led_trigger(__led_trigger) \ + module_driver(__led_trigger, led_trigger_register, \ + led_trigger_unregister) + #else /* Trigger has no members */ -- cgit From 9acc560de2aac73ef99c54f0fdfb86b4684296b5 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 2 Jul 2018 22:05:25 +0200 Subject: leds: triggers: new function led_set_trigger_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the natural counter part to the already existing led_get_trigger_data(). Signed-off-by: Uwe Kleine-König Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index a3ee10846a4b..834683d603f9 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -303,6 +303,12 @@ extern int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger); extern void led_trigger_remove(struct led_classdev *led_cdev); +static inline void led_set_trigger_data(struct led_classdev *led_cdev, + void *trigger_data) +{ + led_cdev->trigger_data = trigger_data; +} + static inline void *led_get_trigger_data(struct led_classdev *led_cdev) { return led_cdev->trigger_data; @@ -356,6 +362,7 @@ static inline int led_trigger_set(struct led_classdev *led_cdev, } static inline void led_trigger_remove(struct led_classdev *led_cdev) {} +static inline void led_set_trigger_data(struct led_classdev *led_cdev) {} static inline void *led_get_trigger_data(struct led_classdev *led_cdev) { return NULL; -- cgit From 576c7218a1546e0153480b208b125509cec71470 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 25 Jun 2018 13:17:41 -0500 Subject: PCI: Export pcie_get_speed_cap and pcie_get_width_cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So drivers can use them. This can be used to replace duplicate code in the drm subsystem. Acked-by: Christian König Acked-by: Bjorn Helgaas Signed-off-by: Alex Deucher --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 340029b2fb38..6e0c0803b241 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -261,6 +261,9 @@ enum pci_bus_speed { PCI_SPEED_UNKNOWN = 0xff, }; +enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); +enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); + struct pci_cap_saved_data { u16 cap_nr; bool cap_extended; -- cgit From 1b0707a781eeaeeaacb464e18bb3f049b99b496b Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 7 Jun 2018 19:50:38 +0000 Subject: Bluetooth: remove unused bt-nokia-h4p.h header Nothing in tree use this header which seems a remains of a staging driver. This patch remove it. Signed-off-by: Corentin Labbe Reviewed-by: Sebastian Reichel Signed-off-by: Marcel Holtmann --- include/linux/platform_data/bt-nokia-h4p.h | 38 ------------------------------ 1 file changed, 38 deletions(-) delete mode 100644 include/linux/platform_data/bt-nokia-h4p.h (limited to 'include/linux') diff --git a/include/linux/platform_data/bt-nokia-h4p.h b/include/linux/platform_data/bt-nokia-h4p.h deleted file mode 100644 index 30d169dfadf3..000000000000 --- a/include/linux/platform_data/bt-nokia-h4p.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * This file is part of Nokia H4P bluetooth driver - * - * Copyright (C) 2010 Nokia Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - - -/** - * struct hci_h4p_platform data - hci_h4p Platform data structure - */ -struct hci_h4p_platform_data { - int chip_type; - int bt_sysclk; - unsigned int bt_wakeup_gpio; - unsigned int host_wakeup_gpio; - unsigned int reset_gpio; - int reset_gpio_shared; - unsigned int uart_irq; - phys_addr_t uart_base; - const char *uart_iclk; - const char *uart_fclk; - void (*set_pm_limits)(struct device *dev, bool set); -}; -- cgit From 06ae48269d1e0324d806fca30fe77112f4a4a14a Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:18 -0700 Subject: lib: reciprocal_div: implement the improved algorithm on the paper mentioned The new added "reciprocal_value_adv" implements the advanced version of the algorithm described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then requires u128 divide on host. The exception case could be easily handled before calling "reciprocal_value_adv". The advanced version requires more complex calculation to get the reciprocal multiplier and other control variables, but then could reduce the required emulation operations. It makes no sense to use this advanced version for host divide emulation, those extra complexities for calculating multiplier etc could completely waive our saving on emulation operations. However, it makes sense to use it for JIT divide code generation (for example eBPF JIT backends) for which we are willing to trade performance of JITed code with that of host. As shown by the following pseudo code, the required emulation operations could go down from 6 (the basic version) to 3 or 4. To use the result of "reciprocal_value_adv", suppose we want to calculate n/d, the C-style pseudo code will be the following, it could be easily changed to real code generation for other JIT targets. struct reciprocal_value_adv rvalue; u8 pre_shift, exp; // handle exception case. if (d >= (1U << 31)) { result = n >= d; return; } rvalue = reciprocal_value_adv(d, 32) exp = rvalue.exp; if (rvalue.is_wide_m && !(d & 1)) { // floor(log2(d & (2^32 -d))) pre_shift = fls(d & -d) - 1; rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); } else { pre_shift = 0; } // code generation starts. if (imm == 1U << exp) { result = n >> exp; } else if (rvalue.is_wide_m) { // pre_shift must be zero when reached here. t = (n * rvalue.m) >> 32; result = n - t; result >>= 1; result += t; result >>= rvalue.sh - 1; } else { if (pre_shift) result = n >> pre_shift; result = ((u64)result * rvalue.m) >> 32; result >>= rvalue.sh; } Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/reciprocal_div.h | 68 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index e031e9f2f9d8..585ce89c0f33 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h @@ -25,6 +25,9 @@ struct reciprocal_value { u8 sh1, sh2; }; +/* "reciprocal_value" and "reciprocal_divide" together implement the basic + * version of the algorithm described in Figure 4.1 of the paper. + */ struct reciprocal_value reciprocal_value(u32 d); static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) @@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) return (t + ((a - t) >> R.sh1)) >> R.sh2; } +struct reciprocal_value_adv { + u32 m; + u8 sh, exp; + bool is_wide_m; +}; + +/* "reciprocal_value_adv" implements the advanced version of the algorithm + * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose + * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The + * exception case could be easily handled before calling "reciprocal_value_adv". + * + * The advanced version requires more complex calculation to get the reciprocal + * multiplier and other control variables, but then could reduce the required + * emulation operations. + * + * It makes no sense to use this advanced version for host divide emulation, + * those extra complexities for calculating multiplier etc could completely + * waive our saving on emulation operations. + * + * However, it makes sense to use it for JIT divide code generation for which + * we are willing to trade performance of JITed code with that of host. As shown + * by the following pseudo code, the required emulation operations could go down + * from 6 (the basic version) to 3 or 4. + * + * To use the result of "reciprocal_value_adv", suppose we want to calculate + * n/d, the pseudo C code will be: + * + * struct reciprocal_value_adv rvalue; + * u8 pre_shift, exp; + * + * // handle exception case. + * if (d >= (1U << 31)) { + * result = n >= d; + * return; + * } + * + * rvalue = reciprocal_value_adv(d, 32) + * exp = rvalue.exp; + * if (rvalue.is_wide_m && !(d & 1)) { + * // floor(log2(d & (2^32 -d))) + * pre_shift = fls(d & -d) - 1; + * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); + * } else { + * pre_shift = 0; + * } + * + * // code generation starts. + * if (imm == 1U << exp) { + * result = n >> exp; + * } else if (rvalue.is_wide_m) { + * // pre_shift must be zero when reached here. + * t = (n * rvalue.m) >> 32; + * result = n - t; + * result >>= 1; + * result += t; + * result >>= rvalue.sh - 1; + * } else { + * if (pre_shift) + * result = n >> pre_shift; + * result = ((u64)result * rvalue.m) >> 32; + * result >>= rvalue.sh; + * } + */ +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); + #endif /* _LINUX_RECIPROCAL_DIV_H */ -- cgit From b233504033dbd65740e59681820ccfd0a2a8ec53 Mon Sep 17 00:00:00 2001 From: Yifeng Sun Date: Mon, 2 Jul 2018 08:18:03 -0700 Subject: openvswitch: kernel datapath clone action Add 'clone' action to kernel datapath by using existing functions. When actions within clone don't modify the current flow, the flow key is not cloned before executing clone actions. This is a follow up patch for this incomplete work: https://patchwork.ozlabs.org/patch/722096/ v1 -> v2: Refactor as advised by reviewer. Signed-off-by: Yifeng Sun Signed-off-by: Andy Zhou Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/openvswitch.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index e6b240b6196c..379affc63e24 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -21,4 +21,9 @@ #include +#define OVS_CLONE_ATTR_EXEC 0 /* Specify an u32 value. When nonzero, + * actions in clone will not change flow + * keys. False otherwise. + */ + #endif /* _LINUX_OPENVSWITCH_H */ -- cgit From 69448867abcb231afaa7891ff9d9fd04b2b94a0d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 5 Jul 2018 09:25:43 +0300 Subject: fs: shave 8 bytes off of struct inode Here is a link to Linus' reply to Jan's concern about making i_blkbibts byte addressable: https://marc.info/?l=linux-fsdevel&m=152882624707975&w=2 Here is a link to an lkp.org report about potential performance improvement in some workload, which could(?) be related to packing i_blkbits closer to i_bytes/i_lock: https://marc.info/?l=linux-fsdevel&m=153077048108198&w=2 Changes since v1: - Add links to relevant discussions Signed-off-by: Amir Goldstein Signed-off-by: Al Viro --- include/linux/fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f140c11d35dd..9b00676f5d9d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -275,6 +275,7 @@ struct writeback_control; /* * Write life time hint values. + * Stored in struct inode as u8. */ enum rw_hint { WRITE_LIFE_NOT_SET = 0, @@ -609,8 +610,8 @@ struct inode { struct timespec64 i_ctime; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; - unsigned int i_blkbits; - enum rw_hint i_write_hint; + u8 i_blkbits; + u8 i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED -- cgit From 5d6be70add65e3f236642ab0029e356261617cd0 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 29 Jun 2018 13:04:31 +0200 Subject: PM / Domains: Introduce option to attach a device by name to genpd For the multiple PM domain case, let's introduce a new function called genpd_dev_pm_attach_by_name(). This allows a device to be associated with its PM domain through genpd, by using a name based lookup. Note that, genpd_dev_pm_attach_by_name() shall only be called by the driver core / PM core, similar to how the existing dev_pm_domain_attach_by_id() makes use of genpd_dev_pm_attach_by_id(). However, this is implemented by following changes on top. Signed-off-by: Ulf Hansson Tested-by: Rajendra Nayak Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index cb8d84090cfb..03e14a38462d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -239,6 +239,8 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev, int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, unsigned int index); +struct device *genpd_dev_pm_attach_by_name(struct device *dev, + char *name); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ static inline int of_genpd_add_provider_simple(struct device_node *np, struct generic_pm_domain *genpd) @@ -290,6 +292,12 @@ static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev, return NULL; } +static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev, + char *name) +{ + return NULL; +} + static inline struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { -- cgit From 27dceb81f445c58b1d10d27d26eaf4ac2e9e0b00 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 29 Jun 2018 13:04:32 +0200 Subject: PM / Domains: Introduce dev_pm_domain_attach_by_name() For the multiple PM domain case, let's introduce a new API called dev_pm_domain_attach_by_name(). This allows a consumer driver to associate its device with one of its PM domains, by using a name based lookup. Do note that, currently it's only genpd that supports multiple PM domains per device, but dev_pm_domain_attach_by_name() can easily by extended to cover other PM domain types, if/when needed. Signed-off-by: Ulf Hansson Tested-by: Rajendra Nayak Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 03e14a38462d..776c546d581a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -309,6 +309,8 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) int dev_pm_domain_attach(struct device *dev, bool power_on); struct device *dev_pm_domain_attach_by_id(struct device *dev, unsigned int index); +struct device *dev_pm_domain_attach_by_name(struct device *dev, + char *name); void dev_pm_domain_detach(struct device *dev, bool power_off); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); #else @@ -321,6 +323,11 @@ static inline struct device *dev_pm_domain_attach_by_id(struct device *dev, { return NULL; } +static inline struct device *dev_pm_domain_attach_by_name(struct device *dev, + char *name) +{ + return NULL; +} static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} -- cgit From e88728f46cfbb59cc7e7acf1d230c05ec093764e Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Wed, 27 Jun 2018 18:20:55 +0530 Subject: driver core: Rename flag AUTOREMOVE to AUTOREMOVE_CONSUMER Now that we want to add another flag to autoremove the device link on supplier unbind, it's fair to rename the existing flag from DL_FLAG_AUTOREMOVE to DL_FLAG_AUTOREMOVE_CONSUMER so that we can add similar flag for supplier later. And, while we are touching device.h, fix a doc build warning. Signed-off-by: Vivek Gautam Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 055a69dbcd18..3929805cdd59 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -90,7 +90,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * @num_vf: Called to find out how many virtual functions a device on this * bus supports. * @dma_configure: Called to setup DMA configuration on a device on - this bus. + * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU @@ -784,14 +784,14 @@ enum device_link_state { * Device link flags. * * STATELESS: The core won't track the presence of supplier/consumer drivers. - * AUTOREMOVE: Remove this link automatically on consumer driver unbind. + * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind. * PM_RUNTIME: If set, the runtime PM framework will use this link. * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. */ -#define DL_FLAG_STATELESS BIT(0) -#define DL_FLAG_AUTOREMOVE BIT(1) -#define DL_FLAG_PM_RUNTIME BIT(2) -#define DL_FLAG_RPM_ACTIVE BIT(3) +#define DL_FLAG_STATELESS BIT(0) +#define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) +#define DL_FLAG_PM_RUNTIME BIT(2) +#define DL_FLAG_RPM_ACTIVE BIT(3) /** * struct device_link - Device link representation. -- cgit From 1689cac5b32a6db6f812e8063ea418a7cf023d03 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Wed, 27 Jun 2018 18:20:56 +0530 Subject: driver core: Add flag to autoremove device link on supplier unbind Add a flag to autoremove the device links on supplier driver unbind. This obviates the need to explicitly delete the link in the remove path. We remove these links only when the supplier's link to its consumers has gone to DL_STATE_SUPPLIER_UNBIND state. Signed-off-by: Vivek Gautam Suggested-by: Lukas Wunner Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 3929805cdd59..e80920452b49 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -787,11 +787,13 @@ enum device_link_state { * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind. * PM_RUNTIME: If set, the runtime PM framework will use this link. * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. + * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind. */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) #define DL_FLAG_PM_RUNTIME BIT(2) #define DL_FLAG_RPM_ACTIVE BIT(3) +#define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4) /** * struct device_link - Device link representation. -- cgit From 63f3fb8d7ccb813e0a1f2ceedb9a3ebe2685b620 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 2 Jul 2018 15:59:37 -0700 Subject: pinctrl: Document pin_config_group_get() return codes like pin_config_get() The pinconf_generic_dump_one() function makes the assumption that pin_config_group_get() should return -EINVAL and -ENOTSUPP just like pin_config_get() does. Document that so it's more obvious. Signed-off-by: Douglas Anderson Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index 09eb80f2574a..8dd85d302b90 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -28,7 +28,8 @@ struct seq_file; * is not available on this controller this should return -ENOTSUPP * and if it is available but disabled it should return -EINVAL * @pin_config_set: configure an individual pin - * @pin_config_group_get: get configurations for an entire pin group + * @pin_config_group_get: get configurations for an entire pin group; should + * return -ENOTSUPP and -EINVAL using the same rules as pin_config_get. * @pin_config_group_set: configure all pins in a group * @pin_config_dbg_parse_modify: optional debugfs to modify a pin configuration * @pin_config_dbg_show: optional debugfs display hook that will provide -- cgit From 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 27 Jun 2018 16:20:28 +0200 Subject: printk/nmi: Prevent deadlock when accessing the main log buffer in NMI The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available") brought back the possible deadlocks in printk() and NMI. The check of logbuf_lock is done only in printk_nmi_enter() to prevent mixed output. But another CPU might take the lock later, enter NMI, and: + Both NMIs might be serialized by yet another lock, for example, the one in nmi_cpu_backtrace(). + The other CPU might get stopped in NMI, see smp_send_stop() in panic(). The only safe solution is to use trylock when storing the message into the main log-buffer. It might cause reordering when some lines go to the main lock buffer directly and others are delayed via the per-CPU buffer. It means that it is not useful in general. This patch replaces the problematic NMI deferred context with NMI direct context. It can be used to mark a code that might produce many messages in NMI and the risk of losing them is more critical than problems with eventual reordering. The context is then used when dumping trace buffers on oops. It was the primary motivation for the original fix. Also the reordering is even smaller issue there because some traces have their own time stamps. Finally, nmi_cpu_backtrace() need not longer be serialized because it will always us the per-CPU buffers again. Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available") Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@suse.com To: Steven Rostedt Cc: Peter Zijlstra Cc: Tetsuo Handa Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 6d7e800affd8..3ede9f46a494 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -148,9 +148,13 @@ void early_printk(const char *s, ...) { } #ifdef CONFIG_PRINTK_NMI extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); +extern void printk_nmi_direct_enter(void); +extern void printk_nmi_direct_exit(void); #else static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } +static inline void printk_nmi_direct_enter(void) { } +static inline void printk_nmi_direct_exit(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK -- cgit From 6b1d83d274486615cc341e410467a98fd9c27c0a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jun 2018 14:55:19 -0700 Subject: block: Remove bdev_nr_zones() Remove this function since it has no callers. This function was introduced in commit 6cc77e9cb080 ("block: introduce zoned block devices zone write locking"). Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Cc: Christoph Hellwig Cc: Matias Bjorling Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 79226ca8f80f..49a400afb146 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1639,15 +1639,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev) return 0; } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return blk_queue_nr_zones(q); - return 0; -} - static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit From 7c8542b7982264226cf94102950343185869b584 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jun 2018 14:55:20 -0700 Subject: block: Inline blk_queue_nr_zones() Since the implementation of blk_queue_nr_zones() is trivial and since it only has a single caller, inline this function. Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Cc: Matias Bjorling Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49a400afb146..905daa7c647e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -800,11 +800,6 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) -{ - return q->nr_zones; -} - static inline unsigned int blk_queue_zone_no(struct request_queue *q, sector_t sector) { -- cgit From 6a5ac9846508ad7d1d23881d9d5add35f2e6ae71 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jun 2018 14:55:21 -0700 Subject: block: Make struct request_queue smaller for CONFIG_BLK_DEV_ZONED=n Exclude zoned block device members from struct request_queue for CONFIG_BLK_DEV_ZONED == n. Avoid breaking the build by only building the code that uses these struct request_queue members if CONFIG_BLK_DEV_ZONED != n. Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Cc: Matias Bjorling Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 905daa7c647e..ca5a8b046894 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -592,6 +592,7 @@ struct request_queue { struct queue_limits limits; +#ifdef CONFIG_BLK_DEV_ZONED /* * Zoned block device information for request dispatch control. * nr_zones is the total number of zones of the device. This is always @@ -612,6 +613,7 @@ struct request_queue { unsigned int nr_zones; unsigned long *seq_zones_bitmap; unsigned long *seq_zones_wlock; +#endif /* CONFIG_BLK_DEV_ZONED */ /* * sg stuff @@ -800,6 +802,7 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } +#ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_queue_zone_no(struct request_queue *q, sector_t sector) { @@ -815,6 +818,7 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return false; return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); } +#endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) { @@ -1065,6 +1069,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; } +#ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_rq_zone_no(struct request *rq) { return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); @@ -1074,6 +1079,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq) { return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); } +#endif /* CONFIG_BLK_DEV_ZONED */ /* * Some commands like WRITE SAME have a payload or data transfer size which -- cgit From 5815839b3ca16bb1d45939270871169f6803a121 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Jun 2018 19:31:47 +0800 Subject: blk-mq: introduce new lock for protecting hctx->dispatch_wait Now hctx->lock is only acquired when adding hctx->dispatch_wait to one wait queue, but not held when removing it from the wait queue. IO hang can be observed easily if SCHED RESTART is disabled, that means now RESTART exits just for fixing the issue in blk_mq_mark_tag_wait(). This patch fixes the issue by introducing hctx->dispatch_wait_lock and holding it for removing hctx->dispatch_wait in blk_mq_dispatch_wake(), since we need to avoid acquiring hctx->lock in irq context. Fixes: eb619fdb2d4cb8b3d3419 ("blk-mq: fix issue with shared tag queue re-running") Cc: Christoph Hellwig Cc: Omar Sandoval Cc: Bart Van Assche Tested-by: Andrew Jones Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e3147eb74222..ea690254dab7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -39,6 +39,7 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx **ctxs; unsigned int nr_ctx; + spinlock_t dispatch_wait_lock; wait_queue_entry_t dispatch_wait; atomic_t wait_index; -- cgit From 97889f9ac24f8d2fc8e703ea7f80c162bab10d4d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Jun 2018 19:31:48 +0800 Subject: blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set() We have to remove synchronize_rcu() from blk_queue_cleanup(), otherwise long delay can be caused during lun probe. For removing it, we have to avoid to iterate the set->tag_list in IO path, eg, blk_mq_sched_restart(). This patch reverts 5b79413946d (Revert "blk-mq: don't handle TAG_SHARED in restart"). Given we have fixed enough IO hang issue, and there isn't any reason to restart all queues in one tags any more, see the following reasons: 1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(), which can wake up queues waiting for driver tag. 2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue, target or host is ready, but SCSI built-in restart can cover all these well, see scsi_end_request(), queue will be rerun after any request initiated from this host/target is completed. In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30% when running I/O on these 8 luns concurrently. Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list") Cc: Omar Sandoval Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Martin K. Petersen Cc: linux-scsi@vger.kernel.org Reported-by: Andrew Jones Tested-by: Andrew Jones Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ca5a8b046894..9d05646d5059 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -442,8 +442,6 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ - atomic_t shared_hctx_restart; - struct blk_queue_stats *stats; struct rq_wb *rq_wb; -- cgit From 6e768717304bdbe8d2897ca8298f6b58863fdc41 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 3 Jul 2018 09:03:16 -0600 Subject: blk-mq: dequeue request one by one from sw queue if hctx is busy It won't be efficient to dequeue request one by one from sw queue, but we have to do that when queue is busy for better merge performance. This patch takes the Exponential Weighted Moving Average(EWMA) to figure out if queue is busy, then only dequeue request one by one from sw queue when queue is busy. Fixes: b347689ffbca ("blk-mq-sched: improve dispatching from sw queue") Cc: Kashyap Desai Cc: Laurence Oberman Cc: Omar Sandoval Cc: Christoph Hellwig Cc: Bart Van Assche Cc: Hannes Reinecke Reported-by: Kashyap Desai Tested-by: Kashyap Desai Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ea690254dab7..d710e92874cc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -35,9 +35,10 @@ struct blk_mq_hw_ctx { struct sbitmap ctx_map; struct blk_mq_ctx *dispatch_from; + unsigned int dispatch_busy; - struct blk_mq_ctx **ctxs; unsigned int nr_ctx; + struct blk_mq_ctx **ctxs; spinlock_t dispatch_wait_lock; wait_queue_entry_t dispatch_wait; -- cgit From 08e18eab0c579ad84399c1899c11899734854eb2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 11:14:50 -0400 Subject: block: add bi_blkg to the bio for cgroups Currently io.low uses a bi_cg_private to stash its private data for the blkg, however other blkcg policies may want to use this as well. Since we can get the private data out of the blkg, move this to bi_blkg in the bio and make it generic, then we can use bio_associate_blkg() to attach the blkg to the bio. Theoretically we could simply replace the bi_css with this since we can get to all the same information from the blkg, however you have to lookup the blkg, so for example wbc_init_bio() would have to lookup and possibly allocate the blkg for the css it was trying to attach to the bio. This could be problematic and result in us either not attaching the css at all to the bio, or falling back to the root blkcg if we are unable to allocate the corresponding blkg. So for now do this, and in the future if possible we could just replace the bi_css with bi_blkg and update the helpers to do the correct translation. Signed-off-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + include/linux/blk_types.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index f08f5fe7bd08..a279ba384da9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -555,6 +555,7 @@ do { \ #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); +int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3c4f390aea4b..3364d42ebe08 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -179,8 +179,8 @@ struct bio { */ struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; + struct blkcg_gq *bi_blkg; #ifdef CONFIG_BLK_DEV_THROTTLING_LOW - void *bi_cg_private; struct bio_issue bi_issue; #endif #endif -- cgit From c7c98fd37653955d3a17dd4f1fa67aba070096a9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 11:14:51 -0400 Subject: block: introduce bio_issue_as_root_blkg Instead of forcing all file systems to get the right context on their bio's, simply check for REQ_META to see if we need to issue as the root blkg. We don't want to force all bio's to have the root blkg associated with them if REQ_META is set, as some controllers (blk-iolatency) need to know who the originating cgroup is so it can backcharge them for the work they are doing. This helper will make sure that the controllers do the proper thing wrt the IO priority and backcharging. Signed-off-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6c666fd7de3c..69aa71dc0c04 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -238,6 +238,22 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) return css_to_blkcg(task_css(current, io_cgrp_id)); } +/** + * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg + * @return: true if this bio needs to be submitted with the root blkg context. + * + * In order to avoid priority inversions we sometimes need to issue a bio as if + * it were attached to the root blkg, and then backcharge to the actual owning + * blkg. The idea is we do bio_blkcg() to look up the actual context for the + * bio and attach the appropriate blkg to the bio. Then we call this helper and + * if it is true run with the root blkg for that queue and then do any + * backcharging to the originating cgroup once the io is complete. + */ +static inline bool bio_issue_as_root_blkg(struct bio *bio) +{ + return (bio->bi_opf & REQ_META); +} + /** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest -- cgit From 903d23f0a354f226fa78f1c1c34b60aaf992e812 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 11:14:52 -0400 Subject: blk-cgroup: allow controllers to output their own stats blk-iolatency has a few stats that it would like to print out, and instead of adding a bunch of crap to the generic code just provide a helper so that controllers can add stuff to the stat line if they want to. Hide it behind a boot option since it changes the output of io.stat from normal, and these stats are only interesting to developers. Signed-off-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 69aa71dc0c04..b41292726c0f 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -148,6 +148,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); +typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf, + size_t size); struct blkcg_policy { int plid; @@ -167,6 +169,7 @@ struct blkcg_policy { blkcg_pol_offline_pd_fn *pd_offline_fn; blkcg_pol_free_pd_fn *pd_free_fn; blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; + blkcg_pol_stat_pd_fn *pd_stat_fn; }; extern struct blkcg blkcg_root; -- cgit From 0d1e0c7cd5909d6c6aa0957179318e13fcca971a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 11:14:53 -0400 Subject: blk: introduce REQ_SWAP Just like REQ_META, it's important to know the IO coming down is swap in order to guard against potential IO priority inversion issues with cgroups. Add REQ_SWAP and use it for all swap IO, and add it to our bio_issue_as_root_blkg helper. Signed-off-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 +- include/linux/blk_types.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b41292726c0f..a8f9ba8f33a4 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -254,7 +254,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) */ static inline bool bio_issue_as_root_blkg(struct bio *bio) { - return (bio->bi_opf & REQ_META); + return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; } /** diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3364d42ebe08..0ffc34c5cc83 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -329,7 +329,7 @@ enum req_flag_bits { /* for driver use */ __REQ_DRV, - + __REQ_SWAP, /* swapping request. */ __REQ_NR_BITS, /* stops here */ }; @@ -351,6 +351,7 @@ enum req_flag_bits { #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) #define REQ_DRV (1ULL << __REQ_DRV) +#define REQ_SWAP (1ULL << __REQ_SWAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -- cgit From 0d3bd88d54f513723602b361dccfc71639f50779 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 3 Jul 2018 11:14:54 -0400 Subject: swap,blkcg: issue swap io with the appropriate context For backcharging we need to know who the page belongs to when swapping it out. We don't worry about things that do ->rw_page (zram etc) at the moment, we're only worried about pages that actually go to a block device. Signed-off-by: Tejun Heo Signed-off-by: Josef Bacik Acked-by: Johannes Weiner Acked-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/bio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a279ba384da9..a00dfff51aa5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -553,6 +553,13 @@ do { \ #define bio_dev(bio) \ disk_devt((bio)->bi_disk) +#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); +#else +static inline int bio_associate_blkcg_from_page(struct bio *bio, + struct page *page) { return 0; } +#endif + #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); -- cgit From d09d8df3a29403693d9d20cc34ed101f2c558e2b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 11:14:55 -0400 Subject: blkcg: add generic throttling mechanism Since IO can be issued from literally anywhere it's almost impossible to do throttling without having some sort of adverse effect somewhere else in the system because of locking or other dependencies. The best way to solve this is to do the throttling when we know we aren't holding any other kernel resources. Do this by tracking throttling in a per-blkg basis, and if we require throttling flag the task that it needs to check before it returns to user space and possibly sleep there. This is to address the case where a process is doing work that is generating IO that can't be throttled, whether that is directly with a lot of REQ_META IO, or indirectly by allocating so much memory that it is swamping the disk with REQ_SWAP. We can't use task_add_work as we don't want to induce a memory allocation in the IO path, so simply saving the request queue in the task and flagging it to do the notify_resume thing achieves the same result without the overhead of a memory allocation. Signed-off-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 99 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/cgroup-defs.h | 3 ++ include/linux/sched.h | 8 ++++ include/linux/tracehook.h | 2 + 4 files changed, 112 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index a8f9ba8f33a4..de57de4831d5 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -136,6 +136,12 @@ struct blkcg_gq { struct blkg_policy_data *pd[BLKCG_MAX_POLS]; struct rcu_head rcu_head; + + atomic_t use_delay; + atomic64_t delay_nsec; + atomic64_t delay_start; + u64 last_delay; + int last_use; }; typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); @@ -241,6 +247,26 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) return css_to_blkcg(task_css(current, io_cgrp_id)); } +static inline bool blk_cgroup_congested(void) +{ + struct cgroup_subsys_state *css; + bool ret = false; + + rcu_read_lock(); + css = kthread_blkcg(); + if (!css) + css = task_css(current, io_cgrp_id); + while (css) { + if (atomic_read(&css->cgroup->congestion_count)) { + ret = true; + break; + } + css = css->parent; + } + rcu_read_unlock(); + return ret; +} + /** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg * @return: true if this bio needs to be submitted with the root blkg context. @@ -374,6 +400,21 @@ static inline void blkg_get(struct blkcg_gq *blkg) atomic_inc(&blkg->refcnt); } +/** + * blkg_try_get - try and get a blkg reference + * @blkg: blkg to get + * + * This is for use when doing an RCU lookup of the blkg. We may be in the midst + * of freeing this blkg, so we can only use it if the refcnt is not zero. + */ +static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +{ + if (atomic_inc_not_zero(&blkg->refcnt)) + return blkg; + return NULL; +} + + void __blkg_release_rcu(struct rcu_head *rcu); /** @@ -734,6 +775,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, return !throtl; } +static inline void blkcg_use_delay(struct blkcg_gq *blkg) +{ + if (atomic_add_return(1, &blkg->use_delay) == 1) + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); +} + +static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) +{ + int old = atomic_read(&blkg->use_delay); + + if (old == 0) + return 0; + + /* + * We do this song and dance because we can race with somebody else + * adding or removing delay. If we just did an atomic_dec we'd end up + * negative and we'd already be in trouble. We need to subtract 1 and + * then check to see if we were the last delay so we can drop the + * congestion count on the cgroup. + */ + while (old) { + int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); + if (cur == old) + break; + old = cur; + } + + if (old == 0) + return 0; + if (old == 1) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); + return 1; +} + +static inline void blkcg_clear_delay(struct blkcg_gq *blkg) +{ + int old = atomic_read(&blkg->use_delay); + if (!old) + return; + /* We only want 1 person clearing the congestion count for this blkg. */ + while (old) { + int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); + if (cur == old) { + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); + break; + } + old = cur; + } +} + +void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); +void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_maybe_throttle_current(void); #else /* CONFIG_BLK_CGROUP */ struct blkcg { @@ -753,8 +847,13 @@ struct blkcg_policy { #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) +static inline void blkcg_maybe_throttle_current(void) { } +static inline bool blk_cgroup_congested(void) { return false; } + #ifdef CONFIG_BLOCK +static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } + static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } static inline int blkcg_init_queue(struct request_queue *q) { return 0; } static inline void blkcg_drain_queue(struct request_queue *q) { } diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index c0e68f903011..ff20b677fb9f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -438,6 +438,9 @@ struct cgroup { /* used to store eBPF programs */ struct cgroup_bpf bpf; + /* If there is block congestion on this cgroup. */ + atomic_t congestion_count; + /* ids of the ancestors at each level including self */ int ancestor_ids[]; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 43731fe51c97..c2e993de67ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,6 +734,10 @@ struct task_struct { /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; #endif +#ifdef CONFIG_BLK_CGROUP + /* to be used once the psi infrastructure lands upstream. */ + unsigned use_memdelay:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ @@ -1151,6 +1155,10 @@ struct task_struct { unsigned int memcg_nr_pages_over_high; #endif +#ifdef CONFIG_BLK_CGROUP + struct request_queue *throttle_queue; +#endif + #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 4a8841963c2e..05589a3e37f4 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -51,6 +51,7 @@ #include #include #include +#include struct linux_binprm; /* @@ -192,6 +193,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) task_work_run(); mem_cgroup_handle_over_high(); + blkcg_maybe_throttle_current(); } #endif /* */ -- cgit From 2cf855837b89d92996cf264713f3bed2bf9b0b4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 3 Jul 2018 11:14:56 -0400 Subject: memcontrol: schedule throttling if we are congested Memory allocations can induce swapping via kswapd or direct reclaim. If we are having IO done for us by kswapd and don't actually go into direct reclaim we may never get scheduled for throttling. So instead check to see if our cgroup is congested, and if so schedule the throttling. Before we return to user space the throttling stuff will only throttle if we actually required it. Signed-off-by: Tejun Heo Signed-off-by: Josef Bacik Acked-by: Johannes Weiner Acked-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/memcontrol.h | 13 +++++++++++++ include/linux/swap.h | 11 ++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c6fb116e925..680d3395fc83 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp, bool compound); +int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **memcgp, + bool compound); void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, bool lrucare, bool compound); void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, @@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, return 0; } +static inline int mem_cgroup_try_charge_delay(struct page *page, + struct mm_struct *mm, + gfp_t gfp_mask, + struct mem_cgroup **memcgp, + bool compound) +{ + *memcgp = NULL; + return 0; +} + static inline void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, bool lrucare, bool compound) diff --git a/include/linux/swap.h b/include/linux/swap.h index c063443d8638..1a8bd05a335e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -629,7 +629,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) return memcg->swappiness; } - #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) { @@ -637,6 +636,16 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) } #endif +#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node, + gfp_t gfp_mask); +#else +static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, + int node, gfp_t gfp_mask) +{ +} +#endif + #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); -- cgit From a79050434b45959f397042080fd1d70ffa9bd9df Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 09:32:35 -0600 Subject: blk-rq-qos: refactor out common elements of blk-wbt blkcg-qos is going to do essentially what wbt does, only on a cgroup basis. Break out the common code that will be shared between blkcg-qos and wbt into blk-rq-qos.* so they can both utilize the same infrastructure. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9d05646d5059..137759862f07 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -42,7 +42,7 @@ struct bsg_job; struct blkcg_gq; struct blk_flush_queue; struct pr_ops; -struct rq_wb; +struct rq_qos; struct blk_queue_stats; struct blk_stat_callback; @@ -443,7 +443,7 @@ struct request_queue { int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ struct blk_queue_stats *stats; - struct rq_wb *rq_wb; + struct rq_qos *rq_qos; /* * If blkcg is not used, @q->root_rl serves all requests. If blkcg -- cgit From d70675121546c35feaceebf7ed9caed8716640f3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Jul 2018 11:15:01 -0400 Subject: block: introduce blk-iolatency io controller Current IO controllers for the block layer are less than ideal for our use case. The io.max controller is great at hard limiting, but it is not work conserving. This patch introduces io.latency. You provide a latency target for your group and we monitor the io in short windows to make sure we are not exceeding those latency targets. This makes use of the rq-qos infrastructure and works much like the wbt stuff. There are a few differences from wbt - It's bio based, so the latency covers the whole block layer in addition to the actual io. - We will throttle all IO types that comes in here if we need to. - We use the mean latency over the 100ms window. This is because writes can be particularly fast, which could give us a false sense of the impact of other workloads on our protected workload. - By default there's no throttling, we set the queue_depth to INT_MAX so that we can have as many outstanding bio's as we're allowed to. Only at throttle time do we pay attention to the actual queue depth. - We backcharge cgroups for root cg issued IO and induce artificial delays in order to deal with cases like metadata only or swap heavy workloads. In testing this has worked out relatively well. Protected workloads will throttle noisy workloads down to 1 io at time if they are doing normal IO on their own, or induce up to a 1 second delay per syscall if they are doing a lot of root issued IO (metadata/swap IO). Our testing has revolved mostly around our production web servers where we have hhvm (the web server application) in a protected group and everything else in another group. We see slightly higher requests per second (RPS) on the test tier vs the control tier, and much more stable RPS across all machines in the test tier vs the control tier. Another test we run is a slow memory allocator in the unprotected group. Before this would eventually push us into swap and cause the whole box to die and not recover at all. With these patches we see slight RPS drops (usually 10-15%) before the memory consumer is properly killed and things recover within seconds. Signed-off-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0ffc34c5cc83..e13449a379a1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -180,9 +180,7 @@ struct bio { struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW struct bio_issue bi_issue; -#endif #endif union { #if defined(CONFIG_BLK_DEV_INTEGRITY) -- cgit From ffcfe25bb50f27395e15fa999f1a7eb769f55360 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 9 Jul 2018 12:19:38 -0400 Subject: net: Add support for subordinate device traffic classes This patch is meant to provide the basic tools needed to allow us to create subordinate device traffic classes. The general idea here is to allow subdividing the queues of a device into queue groups accessible through an upper device such as a macvlan. The idea here is to enforce the idea that an upper device has to be a single queue device, ideally with IFF_NO_QUQUE set. With that being the case we can pretty much guarantee that the tc_to_txq mappings and XPS maps for the upper device are unused. As such we could reuse those in order to support subdividing the lower device and distributing those queues between the subordinate devices. In order to distinguish between a regular set of traffic classes and if a device is carrying subordinate traffic classes I changed num_tc from a u8 to a s16 value and use the negative values to represent the subordinate pool values. So starting at -1 and running to -32768 we can encode those as pool values, and the existing values of 0 to 15 can be maintained. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b683971e500d..b1ff77276bc4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -575,6 +575,9 @@ struct netdev_queue { * (/sys/class/net/DEV/Q/trans_timeout) */ unsigned long trans_timeout; + + /* Subordinate device that the queue has been assigned to */ + struct net_device *sb_dev; /* * write-mostly part */ @@ -1991,7 +1994,7 @@ struct net_device { #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - u8 num_tc; + s16 num_tc; struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; u8 prio_tc_map[TC_BITMASK + 1]; @@ -2045,6 +2048,17 @@ int netdev_get_num_tc(struct net_device *dev) return dev->num_tc; } +void netdev_unbind_sb_channel(struct net_device *dev, + struct net_device *sb_dev); +int netdev_bind_sb_channel_queue(struct net_device *dev, + struct net_device *sb_dev, + u8 tc, u16 count, u16 offset); +int netdev_set_sb_channel(struct net_device *dev, u16 channel); +static inline int netdev_get_sb_channel(struct net_device *dev) +{ + return max_t(int, -dev->num_tc, 0); +} + static inline struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, unsigned int index) -- cgit From eadec877ce9ca46a94e9036b5a44e7941d4fc501 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 9 Jul 2018 12:19:48 -0400 Subject: net: Add support for subordinate traffic classes to netdev_pick_tx This change makes it so that we can support the concept of subordinate device traffic classes to the core networking code. In doing this we can start pulling out the driver specific bits needed to support selecting a queue based on an upper device. The solution at is currently stands is only partially implemented. I have the start of some XPS bits in here, but I would still need to allow for configuration of the XPS maps on the queues reserved for the subordinate devices. For now I am using the reference to the sb_dev XPS map as just a way to skip the lookup of the lower device XPS map for now as that would result in the wrong queue being picked. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b1ff77276bc4..fda0bcda7a42 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2103,7 +2103,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, - void *accel_priv); + struct net_device *sb_dev); /* returns the headroom that the master device needs to take in account * when forwarding to this dev @@ -2568,7 +2568,7 @@ void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); -int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); +int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); -- cgit From a4ea8a3dacc312c3402c78f6e4843afdda9b43a0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 9 Jul 2018 12:19:54 -0400 Subject: net: Add generic ndo_select_queue functions This patch adds a generic version of the ndo_select_queue functions for either returning 0 or selecting a queue based on the processor ID. This is generally meant to just reduce the number of functions we have to change in the future when we have to deal with ndo_select_queue changes. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fda0bcda7a42..46f4c44ce3e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2567,6 +2567,10 @@ void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); +u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback); +u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback); int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); -- cgit From 4f49dec9075aa0277b8c9c657ec31e6361f88724 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 9 Jul 2018 12:19:59 -0400 Subject: net: allow ndo_select_queue to pass netdev This patch makes it so that instead of passing a void pointer as the accel_priv we instead pass a net_device pointer as sb_dev. Making this change allows us to pass the subordinate device through to the fallback function eventually so that we can keep the actual code in the ndo_select_queue call as focused on possible on the exception cases. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46f4c44ce3e4..bbf062c1ca8a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -957,7 +957,8 @@ struct dev_ifalias { * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - * void *accel_priv, select_queue_fallback_t fallback); + * struct net_device *sb_dev, + * select_queue_fallback_t fallback); * Called to decide which queue to use when device supports multiple * transmit queues. * @@ -1229,7 +1230,7 @@ struct net_device_ops { netdev_features_t features); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, + struct net_device *sb_dev, select_queue_fallback_t fallback); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); @@ -2568,9 +2569,11 @@ void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback); + struct net_device *sb_dev, + select_queue_fallback_t fallback); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback); + struct net_device *sb_dev, + select_queue_fallback_t fallback); int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); -- cgit From 8ec56fc3c5ee6f9700adac190e9ce5b8859a58b6 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 9 Jul 2018 12:20:04 -0400 Subject: net: allow fallback function to pass netdev For most of these calls we can just pass NULL through to the fallback function as the sb_dev. The only cases where we cannot are the cases where we might be dealing with either an upper device or a driver that would have configured things to support an sb_dev itself. The only driver that has any significant change in this patch set should be ixgbe as we can drop the redundant functionality that existed in both the ndo_select_queue function and the fallback function that was passed through to us. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bbf062c1ca8a..2daf2fa6554f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -793,7 +793,8 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, } typedef u16 (*select_queue_fallback_t)(struct net_device *dev, - struct sk_buff *skb); + struct sk_buff *skb, + struct net_device *sb_dev); enum tc_setup_type { TC_SETUP_QDISC_MQPRIO, -- cgit From 9f17dbf04ddf55ae48f5bbafea4c4920ea943215 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 9 Jul 2018 18:10:02 +0100 Subject: netfilter: fix use-after-free in NF_HOOK_LIST nf_hook() can free the skb, so we need to remove it from the list before calling, and add passed skbs to a sublist afterwards. Fixes: 17266ee93984 ("net: ipv4: listified version of ip_rcv") Reported-by: Dan Carpenter Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/netfilter.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 5a5e0a2ab2a3..23b48de8c2e2 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -294,12 +294,16 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *skb, *next; + struct list_head sublist; + INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); - if (ret != 1) - list_del(&skb->list); + list_del(&skb->list); + if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1) + list_add_tail(&skb->list, &sublist); } + /* Put passed packets back on main list */ + list_splice(&sublist, head); } /* Call setsockopt() */ -- cgit From aef92a8bed25d03b8f03ce499a56e8e8e5e2c05e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Jul 2018 13:42:10 +0200 Subject: watchdog/softlockup: Fix the SOFTLOCKUP_DETECTOR=n build I got confused by all the various CONFIG options here about and conflated CONFIG_LOCKUP_DETECTOR and CONFIG_SOFTLOCKUP_DETECTOR. This results in a build failure for: CONFIG_LOCKUP_DETECTOR=y && CONFIG_SOFTLOCKUP_DETECTOR=n As reported by Abdul. Reported-and-tested-by: Abdul Haleem Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-next Cc: linuxppc-dev Cc: mpe Cc: sachinp Cc: stephen Rothwell Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work") Link: http://lkml.kernel.org/r/20180710114210.GI2476@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 80664bbeca43..08f9247e9827 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -33,15 +33,10 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #define sysctl_hardlockup_all_cpu_backtrace 0 #endif /* !CONFIG_SMP */ -extern int lockup_detector_online_cpu(unsigned int cpu); -extern int lockup_detector_offline_cpu(unsigned int cpu); - #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } -#define lockup_detector_online_cpu NULL -#define lockup_detector_offline_cpu NULL #endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -50,12 +45,18 @@ extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -#else + +extern int lockup_detector_online_cpu(unsigned int cpu); +extern int lockup_detector_offline_cpu(unsigned int cpu); +#else /* CONFIG_SOFTLOCKUP_DETECTOR */ static inline void touch_softlockup_watchdog_sched(void) { } static inline void touch_softlockup_watchdog(void) { } static inline void touch_softlockup_watchdog_sync(void) { } static inline void touch_all_softlockup_watchdogs(void) { } -#endif + +#define lockup_detector_online_cpu NULL +#define lockup_detector_offline_cpu NULL +#endif /* CONFIG_SOFTLOCKUP_DETECTOR */ #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); -- cgit From 8d3e994241e6bcc7ead2b918c4f15b7683afa90a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:57:58 +0100 Subject: arm_pmu: Clean up maximum period handling Each PMU defines their max_period of the counter as the maximum value that can be counted. Since all the PMU backends support 32bit counters by default, let us remove the redundant field. No functional changes. Cc: Will Deacon Acked-by: Mark Rutland Reviewed-by: Julien Thierry Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index ad5444491975..12c30a22fc8d 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -94,7 +94,6 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); int num_events; - u64 max_period; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); -- cgit From 3a95200d3f89afd8b67f39d88d36cc7ec96ce385 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:57:59 +0100 Subject: arm_pmu: Change API to support 64bit counter values Convert the {read/write}_counter APIs to handle 64bit values to enable supporting chained event counters. The backends still use 32bit values and we pass them 32bit values only. So in effect there are no functional changes. Cc: Will Deacon Acked-by: Mark Rutland Reviewed-by: Julien Thierry Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 12c30a22fc8d..f7126a21df30 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -87,8 +87,8 @@ struct arm_pmu { struct perf_event *event); int (*set_event_filter)(struct hw_perf_event *evt, struct perf_event_attr *attr); - u32 (*read_counter)(struct perf_event *event); - void (*write_counter)(struct perf_event *event, u32 val); + u64 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u64 val); void (*start)(struct arm_pmu *); void (*stop)(struct arm_pmu *); void (*reset)(void *); -- cgit From e2da97d328d4951d25f6634eda7213f7257417b6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:00 +0100 Subject: arm_pmu: Add support for 64bit event counters Each PMU has a set of 32bit event counters. But in some special cases, the events could be counted using counters which are effectively 64bit wide. e.g, Arm V8 PMUv3 has a 64 bit cycle counter which can count only the CPU cycles. Also, the PMU can chain the event counters to effectively count as a 64bit counter. Add support for tracking the events that uses 64bit counters. This only affects the periods set for each counter in the core driver. Cc: Will Deacon Reviewed-by: Julien Thierry Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index f7126a21df30..10f92e1d8e7b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -25,6 +25,12 @@ */ #define ARMPMU_MAX_HWEVENTS 32 +/* + * ARM PMU hw_event flags + */ +/* Event uses a 64bit counter */ +#define ARMPMU_EVT_64BIT 1 + #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF -- cgit From 14b470b56840dbb093abd71f214e9d63770c87b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 6 Jul 2018 22:19:07 +0200 Subject: scsi: target: sbitmap: add seq_file forward declaration The target core runs into a warning in the linux/sbitmap.h file in some configurations: In file included from include/target/target_core_base.h:7, from drivers/target/target_core_fabric_lib.c:41: include/linux/sbitmap.h:331:46: error: 'struct seq_file' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] void sbitmap_show(struct sbitmap *sb, struct seq_file *m); ^~~~~~~~ In general, headers should not depend on others being included first, so this fixes it with a forward declaration for that struct name, but we probably want to merge the patch through the scsi tree to help bisection. Fixes: 10e9cbb6b531 ("scsi: target: Convert target drivers to use sbitmap") Signed-off-by: Arnd Bergmann Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Reviewed-by: Jens Axboe Signed-off-by: Martin K. Petersen --- include/linux/sbitmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index e6539536dea9..804a50983ec5 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -23,6 +23,8 @@ #include #include +struct seq_file; + /** * struct sbitmap_word - Word in a &struct sbitmap. */ -- cgit From b60a60405fb95a688eb2ef4ef20f5fcaa7b64f68 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Fri, 6 Jul 2018 17:37:19 +0200 Subject: netfilter: Add nf_ct_get_tuple_skb global lookup function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a global netfilter function to extract a conntrack tuple from an skb. The function uses a new function added to nf_ct_hook, which will try to get the tuple from skb->_nfct, and do a full lookup if that fails. This makes it possible to use the lookup function before the skb has passed through the conntrack init hooks (e.g., in an ingress qdisc). The tuple is copied to the caller to avoid issues with reference counting. The function returns false if conntrack is not loaded, allowing it to be used without incurring a module dependency on conntrack. This is used by the NAT mode in sch_cake. Cc: netfilter-devel@vger.kernel.org Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/netfilter.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 23b48de8c2e2..07efffd0c759 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -414,8 +414,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +struct nf_conntrack_tuple; +bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +struct nf_conntrack_tuple; +static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) +{ + return false; +} #endif struct nf_conn; @@ -424,6 +433,8 @@ enum ip_conntrack_info; struct nf_ct_hook { int (*update)(struct net *net, struct sk_buff *skb); void (*destroy)(struct nf_conntrack *); + bool (*get_tuple_skb)(struct nf_conntrack_tuple *, + const struct sk_buff *); }; extern struct nf_ct_hook __rcu *nf_ct_hook; -- cgit From 19f391eb05b8b005f2907ddc8f284487b446abf3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2018 11:19:32 -0400 Subject: turn filp_clone_open() into inline wrapper for dentry_open() it's exactly the same thing as dentry_open(&file->f_path, file->f_flags, file->f_cred) ... and rename it to file_clone_open(), while we are at it. 'filp' naming convention is bogus; sure, it's "file pointer", but we generally don't do that kind of Hungarian notation. Some of the instances have too many callers to touch, but this one has only two, so let's sanitize it while we can... Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index aa9b4c169ed2..c4ca4c9c1130 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2422,7 +2422,10 @@ extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int, umode_t); extern struct file * dentry_open(const struct path *, int, const struct cred *); -extern struct file *filp_clone_open(struct file *); +static inline struct file *file_clone_open(struct file *file) +{ + return dentry_open(&file->f_path, file->f_flags, file->f_cred); +} extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int, int *); -- cgit From 9dc55f1389f9569acf9659e58dd836a9c70df217 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jul 2018 22:26:05 -0700 Subject: iomap: add support for sub-pagesize buffered I/O without buffer heads After already supporting a simple implementation of buffered writes for the blocksize == PAGE_SIZE case in the last commit this adds full support even for smaller block sizes. There are three bits of per-block information in the buffer_head structure that really matter for the iomap read and write path: - uptodate status (BH_uptodate) - marked as currently under read I/O (BH_Async_Read) - marked as currently under write I/O (BH_Async_Write) Instead of having new per-block structures this now adds a per-page structure called struct iomap_page to track this information in a slightly different form: - a bitmap for the per-block uptodate status. For worst case of a 64k page size system this bitmap needs to contain 128 bits. For the typical 4k page size case it only needs 8 bits, although we still need a full unsigned long due to the way the atomic bitmap API works. - two atomic_t counters are used to track the outstanding read and write counts There is quite a bit of boilerplate code as the buffered I/O path uses various helper methods, but the actual code is very straight forward. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 5eb9ca8d7ce5..3555d54bf79a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -2,6 +2,9 @@ #ifndef LINUX_IOMAP_H #define LINUX_IOMAP_H 1 +#include +#include +#include #include struct address_space; @@ -98,12 +101,40 @@ struct iomap_ops { ssize_t written, unsigned flags, struct iomap *iomap); }; +/* + * Structure allocate for each page when block size < PAGE_SIZE to track + * sub-page uptodate status and I/O completions. + */ +struct iomap_page { + atomic_t read_count; + atomic_t write_count; + DECLARE_BITMAP(uptodate, PAGE_SIZE / 512); +}; + +static inline struct iomap_page *to_iomap_page(struct page *page) +{ + if (page_has_private(page)) + return (struct iomap_page *)page_private(page); + return NULL; +} + ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); int iomap_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, const struct iomap_ops *ops); int iomap_set_page_dirty(struct page *page); +int iomap_is_partially_uptodate(struct page *page, unsigned long from, + unsigned long count); +int iomap_releasepage(struct page *page, gfp_t gfp_mask); +void iomap_invalidatepage(struct page *page, unsigned int offset, + unsigned int len); +#ifdef CONFIG_MIGRATION +int iomap_migrate_page(struct address_space *mapping, struct page *newpage, + struct page *page, enum migrate_mode mode); +#else +#define iomap_migrate_page NULL +#endif int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, -- cgit From 3443b00e07eed5798605ba6524de37e1d3f1a4bf Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 10 Jul 2018 10:02:57 +0300 Subject: team: Publish team_port_get_rcu() A follow-up patch adds a new entry point, team_port_dev_txable(). Making it an ordinary exported function would mean that any module that may need the service in one of the supported configurations also unconditionally needs to pull in the team module, whether or not the user actually intends to create team interfaces. To prevent that, team_port_dev_txable() is defined in if_team.h, and therefore all dependencies of that function also need to be publicly-visible. Therefore move team_port_get_rcu() from team.c to if_team.h. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/if_team.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index d95cae09dea0..0d07c6655cce 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -74,6 +74,11 @@ struct team_port { long mode_priv[0]; }; +static inline struct team_port *team_port_get_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler_data); +} + static inline bool team_port_enabled(struct team_port *port) { return port->index != -1; -- cgit From eeed992b776c54af6108187c87ac60d028e69d37 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 10 Jul 2018 10:02:58 +0300 Subject: net: Add lag.h, net_lag_port_dev_txable() LAG devices (team or bond) recognize for each one of their slave devices whether LAG traffic is going to be sent through that device. Bond calls such devices "active", team calls them "txable". When this state changes, a NETDEV_CHANGELOWERSTATE notification is distributed, together with a netdev_notifier_changelowerstate_info structure that for LAG devices includes a tx_enabled flag that refers to the new state. The notification thus makes it possible to react to the changes in txability in drivers. However there's no way to query txability from the outside on demand. That is problematic namely for mlxsw, which when resolving ERSPAN packet path, may encounter a LAG device, and needs to determine which of the slaves it should choose. To that end, introduce a new function, net_lag_port_dev_txable(), which determines whether a given slave device is "active" or "txable" (depending on the flavor of the LAG device). That function then dispatches to per-LAG-flavor helpers, bond_is_active_slave_dev() resp. team_port_dev_txable(). Because there currently is no good place where net_lag_port_dev_txable() should be added, introduce a new header file, lag.h, which should from now on hold any logic common to both team and bond. (But keep netif_is_lag_master() together with the rest of netif_is_*_master() functions). Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/if_team.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 0d07c6655cce..ac42da56f7a2 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -89,6 +89,19 @@ static inline bool team_port_txable(struct team_port *port) return port->linkup && team_port_enabled(port); } +static inline bool team_port_dev_txable(const struct net_device *port_dev) +{ + struct team_port *port; + bool txable; + + rcu_read_lock(); + port = team_port_get_rcu(port_dev); + txable = port ? team_port_txable(port) : false; + rcu_read_unlock(); + + return txable; +} + #ifdef CONFIG_NET_POLL_CONTROLLER static inline void team_netpoll_send_skb(struct team_port *port, struct sk_buff *skb) -- cgit From bf1c77b4644f46d2986b7ca5e43e012f0fc8984b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:50 +0100 Subject: kernel: add ksys_personality() Using this helper allows us to avoid the in-kernel call to the sys_personality() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_personality(). Since ksys_personality is trivial, it is implemented directly in , as we do for ksys_close() and friends. This helper is necessary to enable conversion of arm64's syscall handling to use pt_regs wrappers. Signed-off-by: Mark Rutland Reviewed-by: Dominik Brodowski Cc: Al Viro Cc: Christoph Hellwig Cc: Dave Martin Signed-off-by: Will Deacon --- include/linux/syscalls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a368a68cb667..abfe12d8a9c5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -80,6 +80,7 @@ union bpf_attr; #include #include #include +#include #include #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER @@ -1281,4 +1282,14 @@ static inline long ksys_truncate(const char __user *pathname, loff_t length) return do_sys_truncate(pathname, length); } +static inline unsigned int ksys_personality(unsigned int personality) +{ + unsigned int old = current->personality; + + if (personality != 0xffffffff) + set_personality(personality); + + return old; +} + #endif -- cgit From 9b54bf9d6a5b30e2cc22b18793e9a4158c5b4882 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:51 +0100 Subject: kernel: add kcompat_sys_{f,}statfs64() Using this helper allows us to avoid the in-kernel calls to the compat_sys_{f,}statfs64() sycalls, as are necessary for parameter mangling in arm64's compat handling. Following the example of ksys_* functions, kcompat_sys_* functions are intended to be a drop-in replacement for their compat_sys_* counterparts, with the same calling convention. This is necessary to enable conversion of arm64's syscall handling to use pt_regs wrappers. Signed-off-by: Mark Rutland Reviewed-by: Dominik Brodowski Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Will Deacon --- include/linux/compat.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index c68acc47da57..43f4ed44c5d5 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -1028,6 +1028,17 @@ static inline struct compat_timeval ns_to_compat_timeval(s64 nsec) return ctv; } +/* + * Kernel code should not call compat syscalls (i.e., compat_sys_xyzyyz()) + * directly. Instead, use one of the functions which work equivalently, such + * as the kcompat_sys_xyzyyz() functions prototyped below. + */ + +int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, + struct compat_statfs64 __user * buf); +int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, + struct compat_statfs64 __user * buf); + #else /* !CONFIG_COMPAT */ #define is_compat_task() (0) -- cgit From c9c554f21490bbc96cc554f80024d27d09670480 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Jul 2018 14:19:04 -0400 Subject: alloc_file(): switch to passing O_... flags instead of FMODE_... mode ... so that it could set both ->f_flags and ->f_mode, without callers having to set ->f_flags manually. Signed-off-by: Al Viro --- include/linux/file.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 279720db984a..6d34a1262b31 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -18,7 +18,7 @@ struct file_operations; struct vfsmount; struct dentry; struct path; -extern struct file *alloc_file(const struct path *, fmode_t mode, +extern struct file *alloc_file(const struct path *, int flags, const struct file_operations *fop); static inline void fput_light(struct file *file, int fput_needed) -- cgit From e3f20ae21079ecac282df65d83865c5771f4bca0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jul 2018 13:25:29 -0400 Subject: security_file_open(): lose cred argument Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/security.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 63030c85ee19..88d30fc975e7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -309,7 +309,7 @@ void security_file_set_fowner(struct file *file); int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); -int security_file_open(struct file *file, const struct cred *cred); +int security_file_open(struct file *file); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); @@ -858,8 +858,7 @@ static inline int security_file_receive(struct file *file) return 0; } -static inline int security_file_open(struct file *file, - const struct cred *cred) +static inline int security_file_open(struct file *file) { return 0; } -- cgit From 9481769208b5e39b871ae4e89f5328c776ec38dc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jul 2018 14:13:18 -0400 Subject: ->file_open(): lose cred argument Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 8f1131c8dd54..a8ee106b865d 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1569,7 +1569,7 @@ union security_list_options { int (*file_send_sigiotask)(struct task_struct *tsk, struct fown_struct *fown, int sig); int (*file_receive)(struct file *file); - int (*file_open)(struct file *file, const struct cred *cred); + int (*file_open)(struct file *file); int (*task_alloc)(struct task_struct *task, unsigned long clone_flags); void (*task_free)(struct task_struct *task); -- cgit From f5d11409e61dadf1f9af91b22bbedc28a60a2e2c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jul 2018 02:35:08 -0400 Subject: introduce FMODE_OPENED basically, "is that instance set up enough for regular fput(), or do we want put_filp() for that one". NOTE: the only alloc_file() caller that could be followed by put_filp() is in arch/ia64/kernel/perfmon.c, which is (Kconfig-level) broken. Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c4ca4c9c1130..05f34726e29c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -148,6 +148,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Has write method(s) */ #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) +#define FMODE_OPENED ((__force fmode_t)0x80000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) -- cgit From 4d27f3266f14e4d1d13125ce32cb49a40f3122c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jul 2018 11:14:39 -0400 Subject: fold put_filp() into fput() Just check FMODE_OPENED in __fput() and be done with that... Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/file.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 6d34a1262b31..aed45d69811e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -78,7 +78,6 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); -extern void put_filp(struct file *); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); -- cgit From 73a09dd94377e4b186b300bd5461920710c7c3d5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2018 13:22:02 -0400 Subject: introduce FMODE_CREATED and switch to it Parallel to FILE_CREATED, goes into ->f_mode instead of *opened. NFS is a bit of a wart here - it doesn't have file at the point where FILE_CREATED used to be set, so we need to propagate it there (for now). IMA is another one (here and everywhere)... Note that this needs do_dentry_open() to leave old bits in ->f_mode alone - we want it to preserve FMODE_CREATED if it had been already set (no other bit can be there). Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 05f34726e29c..ca668c7e48a7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -149,6 +149,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) #define FMODE_OPENED ((__force fmode_t)0x80000) +#define FMODE_CREATED ((__force fmode_t)0x100000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) -- cgit From 6035a27b25ab9dadc8c3d5c5df5eae3fca62fc95 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2018 13:40:10 -0400 Subject: IMA: don't propagate opened through the entire thing just check ->f_mode in ima_appraise_measurement() Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/ima.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 0e4647e0eb60..d9ba3fc363b7 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -16,7 +16,7 @@ struct linux_binprm; #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask, int opened); +extern int ima_file_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_read_file(struct file *file, enum kernel_read_file_id id); @@ -34,7 +34,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm) return 0; } -static inline int ima_file_check(struct file *file, int mask, int opened) +static inline int ima_file_check(struct file *file, int mask) { return 0; } -- cgit From be12af3ef5e61ebc44d065e121424ac605d7bb8e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2018 11:44:56 -0400 Subject: getting rid of 'opened' argument of ->atomic_open() - part 1 'opened' argument of finish_open() is unused. Kill it. Signed-off-by Al Viro --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ca668c7e48a7..70be3e4c26ac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2441,8 +2441,7 @@ enum { FILE_OPENED = 2 }; extern int finish_open(struct file *file, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened); + int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); /* fs/ioctl.c */ -- cgit From 44907d79002466049fdbb8ef15730d185e0808b4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2018 13:32:02 -0400 Subject: get rid of 'opened' argument of ->atomic_open() - part 3 now it can be done... Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 70be3e4c26ac..c25896b30e9f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1781,7 +1781,7 @@ struct inode_operations { int (*update_time)(struct inode *, struct timespec64 *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, - umode_t create_mode, int *opened); + umode_t create_mode); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); } ____cacheline_aligned; -- cgit From dbae8f2ca2f0586f4b80201c78ff0aed2a012ab5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2018 17:51:47 -0400 Subject: kill FILE_{CREATED,OPENED} no users left Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c25896b30e9f..bd904c496878 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2436,10 +2436,6 @@ extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); extern void putname(struct filename *name); -enum { - FILE_CREATED = 1, - FILE_OPENED = 2 -}; extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); -- cgit From d93aa9d82aea80b80f225dbf9c7986df444d8106 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2018 09:40:05 -0400 Subject: new wrapper: alloc_file_pseudo() takes inode, vfsmount, name, O_... flags and file_operations and either returns a new struct file (in which case inode reference we held is consumed) or returns ERR_PTR(), in which case no refcounts are altered. converted aio_private_file() and sock_alloc_file() to it Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/file.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index aed45d69811e..5b25388f2f79 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -17,9 +17,12 @@ extern void fput(struct file *); struct file_operations; struct vfsmount; struct dentry; +struct inode; struct path; extern struct file *alloc_file(const struct path *, int flags, const struct file_operations *fop); +extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, + const char *, int flags, const struct file_operations *); static inline void fput_light(struct file *file, int fput_needed) { -- cgit From 183266f26f45a47958afb5c9aa1b3d4651e2eb8c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jun 2018 14:15:10 -0400 Subject: new helper: alloc_file_clone() alloc_file_clone(old_file, mode, ops): create a new struct file with ->f_path equal to that of old_file. pipe converted. Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/file.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 5b25388f2f79..60914843c737 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -23,6 +23,8 @@ extern struct file *alloc_file(const struct path *, int flags, const struct file_operations *fop); extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); +extern struct file *alloc_file_clone(struct file *, int flags, + const struct file_operations *); static inline void fput_light(struct file *file, int fput_needed) { -- cgit From ee1904ba44bd4a242b453e8fe179b374906da173 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jun 2018 14:21:27 -0400 Subject: make alloc_file() static Acked-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/file.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 60914843c737..6b2fb032416c 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -19,8 +19,6 @@ struct vfsmount; struct dentry; struct inode; struct path; -extern struct file *alloc_file(const struct path *, int flags, - const struct file_operations *fop); extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); extern struct file *alloc_file_clone(struct file *, int flags, -- cgit From a8802d97e73346bc81609df9dfba7d3306f40d87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jul 2018 11:16:41 -0700 Subject: ktime: Provide typesafe ktime_to_ns() Using ktime_to_ns() is nice to help backports to stable kernels. Having a typesafe function instead of a macro avoid stupid typos and waste of time tracking these typos. Reported-by: Willem de Bruijn Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Cc: Eric Dumazet Cc: John Stultz Cc: Peter Zijlstra Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20180711181641.10369-1-edumazet@google.com --- include/linux/ktime.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 5b9fddbaac41..b2bb44f87f5a 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -93,8 +93,11 @@ static inline ktime_t timeval_to_ktime(struct timeval tv) /* Map the ktime_t to timeval conversion to ns_to_timeval function */ #define ktime_to_timeval(kt) ns_to_timeval((kt)) -/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */ -#define ktime_to_ns(kt) (kt) +/* Convert ktime_t to nanoseconds */ +static inline s64 ktime_to_ns(const ktime_t kt) +{ + return kt; +} /** * ktime_compare - Compares two ktime_t variables for less, greater or equal -- cgit From cca9bab1b72cd2296097c75f59ef11ef80461279 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jul 2018 12:16:12 +0200 Subject: tcp: use monotonic timestamps for PAWS Using get_seconds() for timestamps is deprecated since it can lead to overflows on 32-bit systems. While the interface generally doesn't overflow until year 2106, the specific implementation of the TCP PAWS algorithm breaks in 2038 when the intermediate signed 32-bit timestamps overflow. A related problem is that the local timestamps in CLOCK_REALTIME form lead to unexpected behavior when settimeofday is called to set the system clock backwards or forwards by more than 24 days. While the first problem could be solved by using an overflow-safe method of comparing the timestamps, a nicer solution is to use a monotonic clocksource with ktime_get_seconds() that simply doesn't overflow (at least not until 136 years after boot) and that doesn't change during settimeofday(). To make 32-bit and 64-bit architectures behave the same way here, and also save a few bytes in the tcp_options_received structure, I'm changing the type to a 32-bit integer, which is now safe on all architectures. Finally, the ts_recent_stamp field also (confusingly) gets used to store a jiffies value in tcp_synq_overflow()/tcp_synq_no_recent_overflow(). This is currently safe, but changing the type to 32-bit requires some small changes there to keep it working. Signed-off-by: Arnd Bergmann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3dbea6610304..58a8d7d71354 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -89,7 +89,7 @@ struct tcp_sack_block { struct tcp_options_received { /* PAWS/RTTM data */ - long ts_recent_stamp;/* Time we stored ts_recent (for aging) */ + int ts_recent_stamp;/* Time we stored ts_recent (for aging) */ u32 ts_recent; /* Time stamp to echo next */ u32 rcv_tsval; /* Time stamp value */ u32 rcv_tsecr; /* Time stamp echo reply */ @@ -426,7 +426,7 @@ struct tcp_timewait_sock { /* The time we sent the last out-of-window ACK: */ u32 tw_last_oow_ack_time; - long tw_ts_recent_stamp; + int tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif -- cgit From 3949fa9bac090ad217534c30bc3b6572289abf21 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2018 15:29:10 -0700 Subject: rcu: Make rcu_read_unlock_special() static Because rcu_read_unlock_special() is no longer used outside of kernel/rcu/tree_plugin.h, this commit makes it static. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 65163aa0bb04..67ec077c7ee5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -64,7 +64,6 @@ void rcu_barrier_tasks(void); void __rcu_read_lock(void); void __rcu_read_unlock(void); -void rcu_read_unlock_special(struct task_struct *t); void synchronize_rcu(void); /* -- cgit From 07f27570dcd148a5f4de7dc3513c1d1cd069b362 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Fri, 11 May 2018 17:30:34 +0900 Subject: rcu: Improve rcu_note_voluntary_context_switch() reporting We expect a quiescent state of TASKS_RCU when cond_resched_tasks_rcu_qs() is called, no matter whether it actually be scheduled or not. However, it currently doesn't report the quiescent state when the task enters into __schedule() as it's called with preempt = true. So make it report the quiescent state unconditionally when cond_resched_tasks_rcu_qs() is called. And in TINY_RCU, even though the quiescent state of rcu_bh also should be reported when the tick interrupt comes from user, it doesn't. So make it reported. Lastly in TREE_RCU, rcu_note_voluntary_context_switch() should be reported when the tick interrupt comes from not only user but also idle, as an extended quiescent state. Signed-off-by: Byungchul Park Signed-off-by: Paul E. McKenney [ paulmck: Simplify rcutiny portion given no RCU-tasks for !PREEMPT. ] --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 67ec077c7ee5..5cab15e7ec83 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -194,8 +194,8 @@ static inline void exit_tasks_rcu_finish(void) { } */ #define cond_resched_tasks_rcu_qs() \ do { \ - if (!cond_resched()) \ - rcu_note_voluntary_context_switch_lite(current); \ + rcu_note_voluntary_context_switch_lite(current); \ + cond_resched(); \ } while (0) /* -- cgit From 1445e9175bead409bb9930f3c745246a09f22cf6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 7 May 2018 06:35:46 -0300 Subject: rcu: rcupdate.h: Get rid of Sphinx warnings at rcu_pointer_handoff() The code example at rcupdate.h currently produce lots of warnings: ./include/linux/rcupdate.h:572: WARNING: Unexpected indentation. ./include/linux/rcupdate.h:576: WARNING: Unexpected indentation. ./include/linux/rcupdate.h:580: WARNING: Block quote ends without a blank line; unexpected unindent. ./include/linux/rcupdate.h:582: WARNING: Block quote ends without a blank line; unexpected unindent. ./include/linux/rcupdate.h:582: WARNING: Inline literal start-string without end-string. This commit therefore changes it to a code-block. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5cab15e7ec83..dacc90358b33 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -566,8 +566,8 @@ static inline void rcu_preempt_sleep_check(void) { } * This is simply an identity function, but it documents where a pointer * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to - * kill_dependency(). It could be used as follows: - * `` + * kill_dependency(). It could be used as follows:: + * * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); @@ -578,7 +578,6 @@ static inline void rcu_preempt_sleep_check(void) { } * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); - *`` */ #define rcu_pointer_handoff(p) (p) -- cgit From 6f56f714db067056c80f5d71510118f82872e34c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 May 2018 13:52:27 -0700 Subject: rcu: Improve RCU-tasks naming and comments The naming and comments associated with some RCU-tasks code make the faulty assumption that context switches due to cond_resched() are voluntary. As several people pointed out, this is not the case. This commit therefore updates function names and comments to better reflect current reality. Reported-by: Byungchul Park Reported-by: Joel Fernandes Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 12 ++++++------ include/linux/rcutiny.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dacc90358b33..75e5b393cf44 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -158,11 +158,11 @@ static inline void rcu_init_nohz(void) { } } while (0) /* - * Note a voluntary context switch for RCU-tasks benefit. This is a - * macro rather than an inline function to avoid #include hell. + * Note a quasi-voluntary context switch for RCU-tasks's benefit. + * This is a macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU -#define rcu_note_voluntary_context_switch_lite(t) \ +#define rcu_tasks_qs(t) \ do { \ if (READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ @@ -170,14 +170,14 @@ static inline void rcu_init_nohz(void) { } #define rcu_note_voluntary_context_switch(t) \ do { \ rcu_all_qs(); \ - rcu_note_voluntary_context_switch_lite(t); \ + rcu_tasks_qs(t); \ } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); void exit_tasks_rcu_start(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU */ -#define rcu_note_voluntary_context_switch_lite(t) do { } while (0) +#define rcu_tasks_qs(t) do { } while (0) #define rcu_note_voluntary_context_switch(t) rcu_all_qs() #define call_rcu_tasks call_rcu_sched #define synchronize_rcu_tasks synchronize_sched @@ -194,7 +194,7 @@ static inline void exit_tasks_rcu_finish(void) { } */ #define cond_resched_tasks_rcu_qs() \ do { \ - rcu_note_voluntary_context_switch_lite(current); \ + rcu_tasks_qs(current); \ cond_resched(); \ } while (0) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7b3c82e8a625..8d9a0ea8f0b5 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -93,7 +93,7 @@ static inline void kfree_call_rcu(struct rcu_head *head, #define rcu_note_context_switch(preempt) \ do { \ rcu_sched_qs(); \ - rcu_note_voluntary_context_switch_lite(current); \ + rcu_tasks_qs(current); \ } while (0) static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) -- cgit From b7b6f94cf6e6d961f78064315b6f5de5d9c6414b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 14:22:40 +1000 Subject: rculist: Improve documentation for list_for_each_entry_from_rcu() Unfortunately the patch for adding list_for_each_entry_from_rcu() wasn't the final patch after all review. It is functionally correct but the documentation was incomplete. This patch adds this missing documentation which includes an update to the documentation for list_for_each_entry_continue_rcu() to match the documentation for the new list_for_each_entry_from_rcu(), and adds list_for_each_entry_from_rcu() and the already existing hlist_for_each_entry_from_rcu() to section 7 of whatisRCU.txt. Reviewed-by: Paul E. McKenney Signed-off-by: NeilBrown Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 36df6ccbc874..4786c2235b98 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -396,7 +396,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * @member: the name of the list_head within the struct. * * Continue to iterate over list of given type, continuing after - * the current position. + * the current position which must have been in the list when the RCU read + * lock was taken. + * This would typically require either that you obtained the node from a + * previous walk of the list in the same RCU read-side critical section, or + * that you held some sort of non-RCU reference (such as a reference count) + * to keep the node alive *and* in the list. + * + * This iterator is similar to list_for_each_entry_from_rcu() except + * this starts after the given position and that one starts at the given + * position. */ #define list_for_each_entry_continue_rcu(pos, head, member) \ for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \ @@ -411,6 +420,14 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * * Iterate over the tail of a list starting from a given position, * which must have been in the list when the RCU read lock was taken. + * This would typically require either that you obtained the node from a + * previous walk of the list in the same RCU read-side critical section, or + * that you held some sort of non-RCU reference (such as a reference count) + * to keep the node alive *and* in the list. + * + * This iterator is similar to list_for_each_entry_continue_rcu() except + * this starts from the given position and that one starts from the position + * after the given position. */ #define list_for_each_entry_from_rcu(pos, head, member) \ for (; &(pos)->member != (head); \ -- cgit From 3025520ec424df8b0fd5cdc319ad6b83406d9954 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 May 2018 11:38:47 -0700 Subject: rcutorture: Use per-CPU random state for rcu_torture_timer() Currently, the rcu_torture_timer() function uses a single global torture_random_state structure protected by a single global lock. This conflicts to some extent with performance and scalability, but even more with the goal of consolidating read-side testing with rcu_torture_reader(). This commit therefore creates a per-CPU torture_random_state structure for use by rcu_torture_timer() and eliminates the lock. Signed-off-by: Paul E. McKenney [ paulmck: Make rcu_torture_timer_rand static, per 0day Test Robot report. ] --- include/linux/torture.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index a55e80817dae..61dfd93b6ee4 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -64,6 +64,8 @@ struct torture_random_state { long trs_count; }; #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } +#define DEFINE_TORTURE_RANDOM_PERCPU(name) \ + DEFINE_PER_CPU(struct torture_random_state, name) unsigned long torture_random(struct torture_random_state *trsp); /* Task shuffler, which causes CPUs to occasionally go idle. */ -- cgit From 523f9eb1ef25aab4aaf9aeb5356160e8039411ef Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 12 Jul 2018 15:13:15 +0300 Subject: net/mlx4_core: Add health buffer address capability Health buffer address is a 32 bit PCI address offset provided by the FW. This offset is used for reading FW health debug data located on the shared CR space. Cr space is accessible in both driver and FW and allows for different queries and configurations. Health buffer size is always 64B of readable data followed by a lock which is used to block volatile CR space access. Signed-off-by: Alex Vesker Signed-off-by: Tariq Toukan Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 122e7e9d3091..e3bfe76aea98 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -630,6 +630,7 @@ struct mlx4_caps { u32 vf_caps; bool wol_port[MLX4_MAX_PORTS + 1]; struct mlx4_rate_limit_caps rl_caps; + u32 health_buffer_addrs; }; struct mlx4_buf_list { -- cgit From bedc989b0c98285b277ff8a08ff9514e580913f4 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 12 Jul 2018 15:13:16 +0300 Subject: net/mlx4_core: Add Crdump FW snapshot support Crdump allows the driver to create a snapshot of the FW PCI crspace and health buffer during a critical FW issue. In case of a FW command timeout, FW getting stuck or a non zero value on the catastrophic buffer, a snapshot will be taken. The snapshot is exposed using devlink, cr-space, fw-health address regions are registered on init and snapshots are attached once a new snapshot is collected by the driver. Signed-off-by: Alex Vesker Signed-off-by: Tariq Toukan Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e3bfe76aea98..300b944e6e1e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -852,6 +852,11 @@ struct mlx4_vf_dev { u8 n_ports; }; +struct mlx4_fw_crdump { + struct devlink_region *region_crspace; + struct devlink_region *region_fw_health; +}; + enum mlx4_pci_status { MLX4_PCI_STATUS_DISABLED, MLX4_PCI_STATUS_ENABLED, @@ -872,6 +877,7 @@ struct mlx4_dev_persistent { u8 interface_state; struct mutex pci_status_mutex; /* sync pci state */ enum mlx4_pci_status pci_status; + struct mlx4_fw_crdump crdump; }; struct mlx4_dev { -- cgit From 3c641ba4a852cf4e90e3d7f29c5df08e24213c5d Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 12 Jul 2018 15:13:18 +0300 Subject: net/mlx4_core: Use devlink region_snapshot parameter This parameter enables capturing region snapshot of the crspace during critical errors. The default value of this parameter is disabled, it can be enabled using devlink param commands. It is possible to configure during runtime and also driver init. Signed-off-by: Alex Vesker Signed-off-by: Jiri Pirko Reviewed-by: Moshe Shemesh Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 300b944e6e1e..dca6ab4eaa99 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -853,6 +853,7 @@ struct mlx4_vf_dev { }; struct mlx4_fw_crdump { + bool snapshot_enable; struct devlink_region *region_crspace; struct devlink_region *region_fw_health; }; -- cgit From afed7bcf9487bb28e2e2b016a195085c07416c0b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 10:36:07 +0100 Subject: locking/refcount: Always allow checked forms In many cases, it would be useful to be able to use the full sanity-checked refcount helpers regardless of CONFIG_REFCOUNT_FULL, as this would help to avoid duplicate warnings where callers try to sanity-check refcount manipulation. This patch refactors things such that the full refcount helpers were always built, as refcount_${op}_checked(), such that they can be used regardless of CONFIG_REFCOUNT_FULL. This will allow code which *always* wants a checked refcount to opt-in, avoiding the need to duplicate the logic for warnings. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: David Sterba Acked-by: Kees Cook Acked-by: Will Deacon Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180711093607.1644-1-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index c36addd27dd5..53c5eca24d83 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -43,17 +43,30 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } +extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r); +extern void refcount_add_checked(unsigned int i, refcount_t *r); + +extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); +extern void refcount_inc_checked(refcount_t *r); + +extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r); + +extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); +extern void refcount_dec_checked(refcount_t *r); + #ifdef CONFIG_REFCOUNT_FULL -extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r); -extern void refcount_add(unsigned int i, refcount_t *r); -extern __must_check bool refcount_inc_not_zero(refcount_t *r); -extern void refcount_inc(refcount_t *r); +#define refcount_add_not_zero refcount_add_not_zero_checked +#define refcount_add refcount_add_checked + +#define refcount_inc_not_zero refcount_inc_not_zero_checked +#define refcount_inc refcount_inc_checked + +#define refcount_sub_and_test refcount_sub_and_test_checked -extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); +#define refcount_dec_and_test refcount_dec_and_test_checked +#define refcount_dec refcount_dec_checked -extern __must_check bool refcount_dec_and_test(refcount_t *r); -extern void refcount_dec(refcount_t *r); #else # ifdef CONFIG_ARCH_HAS_REFCOUNT # include -- cgit From 05814a10370b3252fe2b0898b6adac3cdd531096 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 13 Jul 2018 17:07:26 +0300 Subject: block: remove blkdev_entry_to_request() macro Remove blkdev_entry_to_request() macro, which remained unused through the observable history, also note that it repeats list_entry_rq() macro verbatim. Signed-off-by: Vladimir Zapolskiy Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 137759862f07..1939ed95f936 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1436,8 +1436,6 @@ enum blk_default_limits { BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) - static inline unsigned long queue_segment_boundary(struct request_queue *q) { return q->limits.seg_boundary_mask; -- cgit From 8e1b706b6e819bed215c0db16345568864660393 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 13 Jul 2018 16:23:23 +0200 Subject: cpu/hotplug: Expose SMT control init function The L1TF mitigation will gain a commend line parameter which allows to set a combination of hypervisor mitigation and SMT control. Expose cpu_smt_disable() so the command line parser can tweak SMT settings. [ tglx: Split out of larger patch and made it preserve an already existing force off state ] Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142323.039715135@linutronix.de --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7532cbf27b1d..c9b23ad27b38 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -177,8 +177,10 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; +extern void cpu_smt_disable(bool force); #else # define cpu_smt_control (CPU_SMT_ENABLED) +static inline void cpu_smt_disable(bool force) { } #endif #endif /* _LINUX_CPU_H_ */ -- cgit From fee0aede6f4739c87179eca76136f83210953b86 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Jul 2018 16:23:24 +0200 Subject: cpu/hotplug: Set CPU_SMT_NOT_SUPPORTED early The CPU_SMT_NOT_SUPPORTED state is set (if the processor does not support SMT) when the sysfs SMT control file is initialized. That was fine so far as this was only required to make the output of the control file correct and to prevent writes in that case. With the upcoming l1tf command line parameter, this needs to be set up before the L1TF mitigation selection and command line parsing happens. Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20180713142323.121795971@linutronix.de --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c9b23ad27b38..ccdf3a67ce56 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -178,9 +178,11 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); +extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_check_topology(void) { } #endif #endif /* _LINUX_CPU_H_ */ -- cgit From 6b8675897338f874c41612655a85d8e10cdb23d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:39 -0700 Subject: xdp: don't make drivers report attachment mode prog_attached of struct netdev_bpf should have been superseded by simply setting prog_id long time ago, but we kept it around to allow offloading drivers to communicate attachment mode (drv vs hw). Subsequently drivers were also allowed to report back attachment flags (prog_flags), and since nowadays only programs attached will XDP_FLAGS_HW_MODE can get offloaded, we can tell the attachment mode from the flags driver reports. Remove prog_attached member. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b683971e500d..69a664789b33 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -819,10 +819,6 @@ enum bpf_netdev_command { */ XDP_SETUP_PROG, XDP_SETUP_PROG_HW, - /* Check if a bpf program is set on the device. The callee should - * set @prog_attached to one of XDP_ATTACHED_* values, note that "true" - * is equivalent to XDP_ATTACHED_DRV. - */ XDP_QUERY_PROG, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_VERIFIER_PREP, @@ -849,7 +845,6 @@ struct netdev_bpf { }; /* XDP_QUERY_PROG */ struct { - u8 prog_attached; u32 prog_id; /* flags with which program was installed */ u32 prog_flags; -- cgit From a25717d2b604347d9af8da81deea7b08e8c94220 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:41 -0700 Subject: xdp: support simultaneous driver and hw XDP attachment Split the query of HW-attached program from the software one. Introduce new .ndo_bpf command to query HW-attached program. This will allow drivers to install different programs in HW and SW at the same time. Netlink can now also carry multiple programs on dump (in which case mode will be set to XDP_ATTACHED_MULTI and user has to check per-attachment point attributes, IFLA_XDP_PROG_ID will not be present). We reuse IFLA_XDP_PROG_ID skb space for second mode, so rtnl_xdp_size() doesn't need to be updated. Note that the installation side is still not there, since all drivers currently reject installing more than one program at the time. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69a664789b33..2422c0e88f5c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -820,6 +820,7 @@ enum bpf_netdev_command { XDP_SETUP_PROG, XDP_SETUP_PROG_HW, XDP_QUERY_PROG, + XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_VERIFIER_PREP, BPF_OFFLOAD_TRANSLATE, @@ -843,7 +844,7 @@ struct netdev_bpf { struct bpf_prog *prog; struct netlink_ext_ack *extack; }; - /* XDP_QUERY_PROG */ + /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */ struct { u32 prog_id; /* flags with which program was installed */ @@ -3533,8 +3534,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, - struct netdev_bpf *xdp); +u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, + enum bpf_netdev_command cmd); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit From c921c2077b32081617789a645120148bc8b60c98 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 13 Jul 2018 12:16:43 +0300 Subject: net: ipmr: add support for passing full packet on wrong vif This patch adds support for IGMPMSG_WRVIFWHOLE which is used to pass full packet and real vif id when the incoming interface is wrong. While the RP and FHR are setting up state we need to be sending the registers encapsulated with all the data inside otherwise we lose it. The RP then decapsulates it and forwards it to the interested parties. Currently with WRONGVIF we can only be sending empty register packets and will lose that data. This behaviour can be enabled by using MRT_PIM with val == IGMPMSG_WRVIFWHOLE. This doesn't prevent IGMPMSG_WRONGVIF from happening, it happens in addition to it, also it is controlled by the same throttling parameters as WRONGVIF (i.e. 1 packet per 3 seconds currently). Both messages are generated to keep backwards compatibily and avoid breaking someone who was enabling MRT_PIM with val == 4, since any positive val is accepted and treated the same. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index fd436cdd4725..6675b9f81979 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -254,6 +254,7 @@ struct mr_table { atomic_t cache_resolve_queue_len; bool mroute_do_assert; bool mroute_do_pim; + bool mroute_do_wrvifwhole; int mroute_reg_vif_num; }; -- cgit From 5fd778915ad29184a5ff8eb82d1118f6916b79e4 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 28 Jun 2018 17:45:14 +0200 Subject: sched/sysctl: Remove unused sched_time_avg_ms sysctl /proc/sys/kernel/sched_time_avg_ms entry is not used anywhere, remove it. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Luis R. Rodriguez Cc: Kees Cook Cc: Linus Torvalds Cc: Morten.Rasmussen@arm.com Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: claudio@evidence.eu.com Cc: daniel.lezcano@linaro.org Cc: dietmar.eggemann@arm.com Cc: joel@joelfernandes.org Cc: juri.lelli@redhat.com Cc: luca.abeni@santannapisa.it Cc: patrick.bellasi@arm.com Cc: quentin.perret@arm.com Cc: rjw@rjwysocki.net Cc: valentin.schneider@arm.com Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/1530200714-4504-12-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 1c1a1512ec55..913488d828cb 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -40,7 +40,6 @@ extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; -extern __read_mostly unsigned int sysctl_sched_time_avg; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, -- cgit From 788faab70d5a882693286b8d5022779559c79904 Mon Sep 17 00:00:00 2001 From: Tobias Tefke Date: Mon, 9 Jul 2018 12:57:15 +0200 Subject: perf, tools: Use correct articles in comments Some of the comments in the perf events code use articles incorrectly, using 'a' for words beginning with a vowel sound, where 'an' should be used. Signed-off-by: Tobias Tefke Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: jolsa@redhat.com Cc: namhyung@kernel.org Link: http://lkml.kernel.org/r/20180709105715.22938-1-tobias.tefke@tutanota.com [ Fix a few more perf related 'a event' typo fixes from all around the kernel and tooling tree. ] Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1fa12887ec02..e6dd3a2f8ec4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -490,7 +490,7 @@ struct perf_addr_filters_head { }; /** - * enum perf_event_state - the states of a event + * enum perf_event_state - the states of an event: */ enum perf_event_state { PERF_EVENT_STATE_DEAD = -4, -- cgit From 3eb420e70d879ce0e6bf752accf5cdedb0a59de8 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 11 Jul 2018 11:40:35 +0200 Subject: efi: Use a work queue to invoke EFI Runtime Services Presently, when a user process requests the kernel to execute any UEFI runtime service, the kernel temporarily switches to a separate set of page tables that describe the virtual mapping of the UEFI runtime services regions in memory. Since UEFI runtime services are typically invoked with interrupts enabled, any code that may be called during this time, will have an incorrect view of the process's address space. Although it is unusual for code running in interrupt context to make assumptions about the process context it runs in, there are cases (such as the perf subsystem taking samples) where this causes problems. So let's set up a work queue for calling UEFI runtime services, so that the actual calls are made when the work queue items are dispatched by a work queue worker running in a separate kernel thread. Such threads are not expected to have userland mappings in the first place, and so the additional mappings created for the UEFI runtime services can never clash with any. The ResetSystem() runtime service is not covered by the work queue handling, since it is not expected to return, and may be called at a time when the kernel is torn down to the point where we cannot expect work queues to still be operational. The non-blocking variants of SetVariable() and QueryVariableInfo() are also excluded: these are intended to be used from atomic context, which obviously rules out waiting for a completion to be signalled by another thread. Note that these variants are currently only used for UEFI runtime services calls that occur very early in the boot, and for ones that occur in critical conditions, e.g., to flush kernel logs to UEFI variables via efi-pstore. Suggested-by: Andy Lutomirski Signed-off-by: Sai Praneeth Prakhya [ardb: exclude ResetSystem() from the workqueue treatment merge from 2 separate patches and rewrite commit log] Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180711094040.12506-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 56add823f190..8ba0cdd244b2 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1651,4 +1651,7 @@ struct linux_efi_tpm_eventlog { extern int efi_tpm_eventlog_init(void); +/* Workqueue to queue EFI Runtime Services */ +extern struct workqueue_struct *efi_rts_wq; + #endif /* _LINUX_EFI_H */ -- cgit From f5dcc214aae29a68b37b2b4183f7171724e7b02d Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 11 Jul 2018 11:40:37 +0200 Subject: efi: Remove the declaration of efi_late_init() as the function is unused The following commit: 7b0a911478c74 ("efi/x86: Move the EFI BGRT init code to early init code") ... removed the implementation and all the references to efi_late_init() but the function is still declared at include/linux/efi.h. Hence, remove the unnecessary declaration. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180711094040.12506-6-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 8ba0cdd244b2..e190652f5ef9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -988,14 +988,12 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec64 *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ #ifdef CONFIG_X86 -extern void efi_late_init(void); extern void efi_free_boot_services(void); extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, bool nonblocking); extern void efi_find_mirror(void); #else -static inline void efi_late_init(void) {} static inline void efi_free_boot_services(void) {} static inline efi_status_t efi_query_variable_store(u32 attributes, -- cgit From 784abe24c903b093af04cf1a043140faa556cad7 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Fri, 13 Jul 2018 14:33:35 +0300 Subject: net: Add decrypted field to skb The decrypted bit is propogated to cloned/copied skbs. This will be used later by the inline crypto receive side offload of tls. Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7601838c2513..3ceb8dcc54da 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -630,6 +630,7 @@ typedef unsigned char *sk_buff_data_t; * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue + * @decrypted: Decrypted SKB * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -736,7 +737,11 @@ struct sk_buff { peeked:1, head_frag:1, xmit_more:1, - __unused:1; /* one bit hole */ +#ifdef CONFIG_TLS_DEVICE + decrypted:1; +#else + __unused:1; +#endif /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() -- cgit From 14136564c8ee94566945e85014019cbdb1716dca Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Fri, 13 Jul 2018 14:33:36 +0300 Subject: net: Add TLS RX offload feature This patch adds a netdev feature to configure TLS RX inline crypto offload. Signed-off-by: Ilya Lesokhin Signed-off-by: Boris Pismenny Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 623bb8ced060..2b2a6dce1630 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -79,6 +79,7 @@ enum { NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */ NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */ NETIF_F_HW_TLS_TX_BIT, /* Hardware TLS TX offload */ + NETIF_F_HW_TLS_RX_BIT, /* Hardware TLS RX offload */ NETIF_F_GRO_HW_BIT, /* Hardware Generic receive offload */ NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */ @@ -151,6 +152,7 @@ enum { #define NETIF_F_HW_TLS_RECORD __NETIF_F(HW_TLS_RECORD) #define NETIF_F_GSO_UDP_L4 __NETIF_F(GSO_UDP_L4) #define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX) +#define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) -- cgit From 16e4edc297ffc9b643b8dd3da6b0d579753ea2b3 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Fri, 13 Jul 2018 14:33:37 +0300 Subject: net: Add TLS rx resync NDO Add new netdev tls op for resynchronizing HW tls context Signed-off-by: Boris Pismenny Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4fa7f7a3f8b3..3514d67112b3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -903,6 +903,8 @@ struct tlsdev_ops { void (*tls_dev_del)(struct net_device *netdev, struct tls_context *ctx, enum tls_offload_ctx_dir direction); + void (*tls_dev_resync_rx)(struct net_device *netdev, + struct sock *sk, u32 seq, u64 rcd_sn); }; #endif -- cgit From ab412e1dd7db132c2abeb9385b4bf0dc8e6c5a65 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Fri, 13 Jul 2018 14:33:46 +0300 Subject: net/mlx5: Accel, add TLS rx offload routines In Innova TLS, TLS contexts are added or deleted via a command message over the SBU connection. The HW then sends a response message over the same connection. Complete the implementation for Innova TLS (FPGA-based) hardware by adding support for rx inline crypto offload. Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc_fpga.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 64d0f40d4cc3..37e065a80a43 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -576,6 +576,7 @@ struct mlx5_ifc_fpga_ipsec_sa { enum fpga_tls_cmds { CMD_SETUP_STREAM = 0x1001, CMD_TEARDOWN_STREAM = 0x1002, + CMD_RESYNC_RX = 0x1003, }; #define MLX5_TLS_1_2 (0) -- cgit From a4f9edb29d9c19f9f8dcd2df7ddfe4eb7ad58996 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 May 2018 17:29:52 +0100 Subject: irqchip/gic-v3: Expose GICD_TYPER in the rdist structure Instead of exposing the GIC distributor IntID field in the rdist structure that is passed to the ITS, let's replace it with a copy of the whole GICD_TYPER register. We are going to need some of this information at a later time. No functionnal change. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index cbb872c1b607..396cd99af02f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -576,8 +576,8 @@ struct rdists { phys_addr_t phys_base; } __percpu *rdist; struct page *prop_page; - int id_bits; u64 flags; + u32 gicd_typer; bool has_vlpis; bool has_direct_lpi; }; -- cgit From 12b2905af183c931bedcab4292c81d3a415e080f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 May 2018 09:01:59 +0100 Subject: irqchip/gic-v3-its: Honor hypervisor enforced LPI range A recent extension to the GIC architecture allows a hypervisor to arbitrarily reduce the number of LPIs available to a guest, no matter what the GIC says about the valid range of IntIDs. Let's factor in this information when computing the number of available LPIs Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 396cd99af02f..9d2ea3e907d0 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -73,6 +73,7 @@ #define GICD_TYPER_MBIS (1U << 16) #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) #define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) #define GICD_IROUTER_SPI_MODE_ONE (0U << 31) -- cgit From 0f292f023ffcc67ec49d63dcb7fe388711cbb83a Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Date: Wed, 11 Jul 2018 13:37:53 +0100 Subject: ALSA: usb-audio: Add support for Processing Units in UAC3 This patch adds support for the Processig Units defined in the UAC3 spec. The main difference with the previous specs is the lack of on/off switches in the controls for these units and the addiction of the new Multi Function Processing Unit. The current version of the UAC3 spec doesn't define any useful controls for the new Multi Function Processing Unit so no control will get created once this unit is parsed. Signed-off-by: Jorge Sanjuan Signed-off-by: Takashi Iwai --- include/linux/usb/audio-v3.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h index a710e28b5215..334bfa6dfb47 100644 --- a/include/linux/usb/audio-v3.h +++ b/include/linux/usb/audio-v3.h @@ -387,6 +387,12 @@ struct uac3_interrupt_data_msg { #define UAC3_CONNECTORS 0x0f #define UAC3_POWER_DOMAIN 0x10 +/* A.20 PROCESSING UNIT PROCESS TYPES */ +#define UAC3_PROCESS_UNDEFINED 0x00 +#define UAC3_PROCESS_UP_DOWNMIX 0x01 +#define UAC3_PROCESS_STEREO_EXTENDER 0x02 +#define UAC3_PROCESS_MULTI_FUNCTION 0x03 + /* A.22 AUDIO CLASS-SPECIFIC REQUEST CODES */ /* see audio-v2.h for the rest, which is identical to v2 */ #define UAC3_CS_REQ_INTEN 0x04 @@ -406,6 +412,15 @@ struct uac3_interrupt_data_msg { #define UAC3_TE_OVERFLOW 0x04 #define UAC3_TE_LATENCY 0x05 +/* A.23.10 PROCESSING UNITS CONTROL SELECTROS */ + +/* Up/Down Mixer */ +#define UAC3_UD_MODE_SELECT 0x01 + +/* Stereo Extender */ +#define UAC3_EXT_WIDTH_CONTROL 0x01 + + /* BADD predefined Unit/Terminal values */ #define UAC3_BADD_IT_ID1 1 /* Input Terminal ID1: bTerminalID = 1 */ #define UAC3_BADD_FU_ID2 2 /* Feature Unit ID2: bUnitID = 2 */ -- cgit From d7e5a9a50245b91f016c814b0f076f7e55cbb980 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Jun 2018 17:49:43 +0200 Subject: netfilter: utils: move nf_ip_checksum* from ipv4 to utils allows to make nf_ip_checksum_partial static, it no longer has an external caller. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index b31dabfdb453..95ab5cc64422 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -23,9 +23,6 @@ struct nf_queue_entry; #ifdef CONFIG_INET __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); -__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, unsigned int len, - u_int8_t protocol); int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry); @@ -35,14 +32,6 @@ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, { return 0; } -static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb, - unsigned int hook, - unsigned int dataoff, - unsigned int len, - u_int8_t protocol) -{ - return 0; -} static inline int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { -- cgit From ebee5a50d0b7cdc576aa8081f05b86971880054d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Jun 2018 17:49:59 +0200 Subject: netfilter: utils: move nf_ip6_checksum* from ipv6 to utils similar to previous change, this also allows to remove it from nf_ipv6_ops and avoid the indirection. It also removes the bogus dependency of nf_conntrack_ipv6 on ipv6 module: ipv6 checksum functions are built into kernel even if CONFIG_IPV6=m, but ipv6/netfilter.o isn't. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 288c597e75b3..c0dc4dd78887 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -30,11 +30,6 @@ struct nf_ipv6_ops { void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); - __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol); - __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, unsigned int len, - u_int8_t protocol); int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); -- cgit From 377179cd28cd417dcfb4396edb824533431e607e Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Fri, 13 Jul 2018 14:05:56 -0400 Subject: security: define new LSM hook named security_kernel_load_data Differentiate between the kernel reading a file specified by userspace from the kernel loading a buffer containing data provided by userspace. This patch defines a new LSM hook named security_kernel_load_data(). Signed-off-by: Mimi Zohar Cc: Eric Biederman Cc: Luis R. Rodriguez Cc: Kees Cook Cc: Casey Schaufler Acked-by: Serge Hallyn Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 6 ++++++ include/linux/security.h | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 8f1131c8dd54..a08bc2587b96 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -576,6 +576,10 @@ * userspace to load a kernel module with the given name. * @kmod_name name of the module requested by the kernel * Return 0 if successful. + * @kernel_load_data: + * Load data provided by userspace. + * @id kernel load data identifier + * Return 0 if permission is granted. * @kernel_read_file: * Read a file specified by userspace. * @file contains the file structure pointing to the file being read @@ -1582,6 +1586,7 @@ union security_list_options { int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*kernel_module_request)(char *kmod_name); + int (*kernel_load_data)(enum kernel_load_data_id id); int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id); int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id); @@ -1872,6 +1877,7 @@ struct security_hook_heads { struct hlist_head cred_getsecid; struct hlist_head kernel_act_as; struct hlist_head kernel_create_files_as; + struct hlist_head kernel_load_data; struct hlist_head kernel_read_file; struct hlist_head kernel_post_read_file; struct hlist_head kernel_module_request; diff --git a/include/linux/security.h b/include/linux/security.h index 63030c85ee19..3410acfe139c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -159,6 +159,27 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write, typedef int (*initxattrs) (struct inode *inode, const struct xattr *xattr_array, void *fs_data); + +/* Keep the kernel_load_data_id enum in sync with kernel_read_file_id */ +#define __data_id_enumify(ENUM, dummy) LOADING_ ## ENUM, +#define __data_id_stringify(dummy, str) #str, + +enum kernel_load_data_id { + __kernel_read_file_id(__data_id_enumify) +}; + +static const char * const kernel_load_data_str[] = { + __kernel_read_file_id(__data_id_stringify) +}; + +static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id) +{ + if ((unsigned)id >= LOADING_MAX_ID) + return kernel_load_data_str[LOADING_UNKNOWN]; + + return kernel_load_data_str[id]; +} + #ifdef CONFIG_SECURITY struct security_mnt_opts { @@ -320,6 +341,7 @@ void security_cred_getsecid(const struct cred *c, u32 *secid); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); +int security_kernel_load_data(enum kernel_load_data_id id); int security_kernel_read_file(struct file *file, enum kernel_read_file_id id); int security_kernel_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id); @@ -909,6 +931,11 @@ static inline int security_kernel_module_request(char *kmod_name) return 0; } +static inline int security_kernel_load_data(enum kernel_load_data_id id) +{ + return 0; +} + static inline int security_kernel_read_file(struct file *file, enum kernel_read_file_id id) { -- cgit From 16c267aac86b463b1fcccd43c89f4c8e5c5c86fa Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Fri, 13 Jul 2018 14:05:58 -0400 Subject: ima: based on policy require signed kexec kernel images The original kexec_load syscall can not verify file signatures, nor can the kexec image be measured. Based on policy, deny the kexec_load syscall. Signed-off-by: Mimi Zohar Cc: Eric Biederman Cc: Kees Cook Reviewed-by: Kees Cook Signed-off-by: James Morris --- include/linux/ima.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 0e4647e0eb60..84806b54b50a 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -11,6 +11,7 @@ #define _LINUX_IMA_H #include +#include #include struct linux_binprm; @@ -19,6 +20,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_file_check(struct file *file, int mask, int opened); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); +extern int ima_load_data(enum kernel_load_data_id id); extern int ima_read_file(struct file *file, enum kernel_read_file_id id); extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); @@ -49,6 +51,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) return 0; } +static inline int ima_load_data(enum kernel_load_data_id id) +{ + return 0; +} + static inline int ima_read_file(struct file *file, enum kernel_read_file_id id) { return 0; -- cgit From 2b9672ddb6f347467d7b33b86c5dfc4d5c0501a8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 12 Jul 2018 21:32:53 +0200 Subject: net: phy: add phy_speed_down and phy_speed_up Some network drivers include functionality to speed down the PHY when suspending and just waiting for a WoL packet because this saves energy. This functionality is quite generic, therefore let's factor it out to phylib. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6cd09098427c..075c2f770d3e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -942,6 +942,8 @@ void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); +int phy_speed_down(struct phy_device *phydev, bool sync); +int phy_speed_up(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_restart_aneg(struct phy_device *phydev); -- cgit From d9f37d01e294e5338aa3e9d3b2eda61b59b619df Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 13 Jul 2018 14:41:36 +0800 Subject: net: convert gro_count to bitmask gro_hash size is 192 bytes, and uses 3 cache lines, if there is few flows, gro_hash may be not fully used, so it is unnecessary to iterate all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline. convert gro_count to a bitmask, and rename it as gro_bitmask, each bit represents a element of gro_hash, only flush a gro_hash element if the related bit is set, to speed up napi_gro_flush(). and update gro_bitmask only if it will be changed, to reduce cache update Suggested-by: Eric Dumazet Signed-off-by: Li RongQing Cc: Stefano Brivio Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3514d67112b3..c1295c7a452e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -308,9 +308,14 @@ struct gro_list { }; /* - * Structure for NAPI scheduling similar to tasklet but with weighting + * size of gro hash buckets, must less than bit number of + * napi_struct::gro_bitmask */ #define GRO_HASH_BUCKETS 8 + +/* + * Structure for NAPI scheduling similar to tasklet but with weighting + */ struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means @@ -322,7 +327,7 @@ struct napi_struct { unsigned long state; int weight; - unsigned int gro_count; + unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL int poll_owner; -- cgit From 3d85b2703783636366560c94842affd8608ec9d1 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 16 Jul 2018 11:06:02 -0700 Subject: locking/spinlock, sched/core: Clarify requirements for smp_mb__after_spinlock() There are 11 interpretations of the requirements described in the header comment for smp_mb__after_spinlock(): one for each LKMM maintainer, and one currently encoded in the Cat file. Stick to the latter (until a more satisfactory solution is available). This also reworks some snippets related to the barrier to illustrate the requirements and to link them to the idioms which are relied upon at its call sites. Suggested-by: Boqun Feng Signed-off-by: Andrea Parri Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Will Deacon Cc: akiyks@gmail.com Cc: dhowells@redhat.com Cc: j.alglave@ucl.ac.uk Cc: linux-arch@vger.kernel.org Cc: luc.maranget@inria.fr Cc: npiggin@gmail.com Cc: parri.andrea@gmail.com Cc: stern@rowland.harvard.edu Link: http://lkml.kernel.org/r/20180716180605.16115-11-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 53 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index fd57888d4942..3190997df9ca 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -114,29 +114,48 @@ do { \ #endif /*arch_spin_is_contended*/ /* - * This barrier must provide two things: + * smp_mb__after_spinlock() provides the equivalent of a full memory barrier + * between program-order earlier lock acquisitions and program-order later + * memory accesses. * - * - it must guarantee a STORE before the spin_lock() is ordered against a - * LOAD after it, see the comments at its two usage sites. + * This guarantees that the following two properties hold: * - * - it must ensure the critical section is RCsc. + * 1) Given the snippet: * - * The latter is important for cases where we observe values written by other - * CPUs in spin-loops, without barriers, while being subject to scheduling. + * { X = 0; Y = 0; } * - * CPU0 CPU1 CPU2 + * CPU0 CPU1 * - * for (;;) { - * if (READ_ONCE(X)) - * break; - * } - * X=1 - * - * - * r = X; + * WRITE_ONCE(X, 1); WRITE_ONCE(Y, 1); + * spin_lock(S); smp_mb(); + * smp_mb__after_spinlock(); r1 = READ_ONCE(X); + * r0 = READ_ONCE(Y); + * spin_unlock(S); * - * without transitivity it could be that CPU1 observes X!=0 breaks the loop, - * we get migrated and CPU2 sees X==0. + * it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0) + * and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments + * preceding the call to smp_mb__after_spinlock() in __schedule() and in + * try_to_wake_up(). + * + * 2) Given the snippet: + * + * { X = 0; Y = 0; } + * + * CPU0 CPU1 CPU2 + * + * spin_lock(S); spin_lock(S); r1 = READ_ONCE(Y); + * WRITE_ONCE(X, 1); smp_mb__after_spinlock(); smp_rmb(); + * spin_unlock(S); r0 = READ_ONCE(X); r2 = READ_ONCE(X); + * WRITE_ONCE(Y, 1); + * spin_unlock(S); + * + * it is forbidden that CPU0's critical section executes before CPU1's + * critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1) + * and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments + * preceding the calls to smp_rmb() in try_to_wake_up() for similar + * snippets but "projected" onto two CPUs. + * + * Property (2) upgrades the lock to an RCsc lock. * * Since most load-store architectures implement ACQUIRE with an smp_mb() after * the LL/SC loop, they need no further barriers. Similarly all our TSO -- cgit From 7696f9910a9a40b8a952f57d3428515fabd2d889 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 16 Jul 2018 11:06:03 -0700 Subject: sched/Documentation: Update wake_up() & co. memory-barrier guarantees Both the implementation and the users' expectation [1] for the various wakeup primitives have evolved over time, but the documentation has not kept up with these changes: brings it into 2018. [1] http://lkml.kernel.org/r/20180424091510.GB4064@hirez.programming.kicks-ass.net Also applied feedback from Alan Stern. Suggested-by: Peter Zijlstra Signed-off-by: Andrea Parri Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Cc: Akira Yokosawa Cc: Alan Stern Cc: Boqun Feng Cc: Daniel Lustig Cc: David Howells Cc: Jade Alglave Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Luc Maranget Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: parri.andrea@gmail.com Link: http://lkml.kernel.org/r/20180716180605.16115-12-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 43731fe51c97..05cd419f962d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -167,8 +167,8 @@ struct task_group; * need_sleep = false; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * - * Where wake_up_state() (and all other wakeup primitives) imply enough - * barriers to order the store of the variable against wakeup. + * where wake_up_state() executes a full memory barrier before accessing the + * task state. * * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a -- cgit From c1a2f7f0c06454387c2cd7b93ff1491c715a8c69 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 16 Jul 2018 15:03:31 -0400 Subject: mm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on nr_cpu_ids The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the CPU bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Pointer magic by Mike Galbraith, to evade -Wstringop-overflow getting confused by the dynamically sized array. Tested-by: Song Liu Signed-off-by: Rik van Riel Signed-off-by: Mike Galbraith Signed-off-by: Rik van Riel Acked-by: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@fb.com Cc: luto@kernel.org Link: http://lkml.kernel.org/r/20180716190337.26133-2-riel@surriel.com Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 241 ++++++++++++++++++++++++----------------------- 1 file changed, 124 insertions(+), 117 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..efdc24dd9e97 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,183 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap; /* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap; /* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base; /* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base; /* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size; /* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** - * @mm_users: The number of users including userspace. - * - * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops - * to 0 (i.e. when the task exits and there are no other temporary - * reference holders), we also release a reference on @mm_count - * (which may then free the &struct mm_struct if @mm_count also - * drops to 0). - */ - atomic_t mm_users; - - /** - * @mm_count: The number of references to &struct mm_struct - * (@mm_users count as 1). - * - * Use mmgrab()/mmdrop() to modify. When this drops to 0, the - * &struct mm_struct is freed. - */ - atomic_t mm_count; + unsigned long task_size; /* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** + * @mm_users: The number of users including userspace. + * + * Use mmget()/mmget_not_zero()/mmput() to modify. When this + * drops to 0 (i.e. when the task exits and there are no other + * temporary reference holders), we also release a reference on + * @mm_count (which may then free the &struct mm_struct if + * @mm_count also drops to 0). + */ + atomic_t mm_users; + + /** + * @mm_count: The number of references to &struct mm_struct + * (@mm_users count as 1). + * + * Use mmgrab()/mmdrop() to modify. When this drops to 0, the + * &struct mm_struct is freed. + */ + atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif - int map_count; /* number of VMAs */ + int map_count; /* number of VMAs */ - spinlock_t page_table_lock; /* Protects page tables and some counters */ - struct rw_semaphore mmap_sem; + spinlock_t page_table_lock; /* Protects page tables and some + * counters + */ + struct rw_semaphore mmap_sem; - struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung - * together off init_mm.mmlist, and are protected - * by mmlist_lock - */ + struct list_head mmlist; /* List of maybe swapped mm's. These + * are globally strung together off + * init_mm.mmlist, and are protected + * by mmlist_lock + */ - unsigned long hiwater_rss; /* High-watermark of RSS usage */ - unsigned long hiwater_vm; /* High-water virtual memory usage */ + unsigned long hiwater_rss; /* High-watermark of RSS usage */ + unsigned long hiwater_vm; /* High-water virtual memory usage */ - unsigned long total_vm; /* Total pages mapped */ - unsigned long locked_vm; /* Pages that have PG_mlocked set */ - unsigned long pinned_vm; /* Refcount permanently increased */ - unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ - unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ - unsigned long stack_vm; /* VM_STACK */ - unsigned long def_flags; + unsigned long total_vm; /* Total pages mapped */ + unsigned long locked_vm; /* Pages that have PG_mlocked set */ + unsigned long pinned_vm; /* Refcount permanently increased */ + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ + unsigned long stack_vm; /* VM_STACK */ + unsigned long def_flags; - spinlock_t arg_lock; /* protect the below fields */ - unsigned long start_code, end_code, start_data, end_data; - unsigned long start_brk, brk, start_stack; - unsigned long arg_start, arg_end, env_start, env_end; + spinlock_t arg_lock; /* protect the below fields */ + unsigned long start_code, end_code, start_data, end_data; + unsigned long start_brk, brk, start_stack; + unsigned long arg_start, arg_end, env_start, env_end; - unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ + unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - /* - * Special counters, in some configurations protected by the - * page_table_lock, in other configurations by being atomic. - */ - struct mm_rss_stat rss_stat; - - struct linux_binfmt *binfmt; + /* + * Special counters, in some configurations protected by the + * page_table_lock, in other configurations by being atomic. + */ + struct mm_rss_stat rss_stat; - cpumask_var_t cpu_vm_mask_var; + struct linux_binfmt *binfmt; - /* Architecture-specific MM context */ - mm_context_t context; + /* Architecture-specific MM context */ + mm_context_t context; - unsigned long flags; /* Must use atomic bitops to access the bits */ + unsigned long flags; /* Must use atomic bitops to access */ - struct core_state *core_state; /* coredumping support */ + struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_MEMBARRIER - atomic_t membarrier_state; + atomic_t membarrier_state; #endif #ifdef CONFIG_AIO - spinlock_t ioctx_lock; - struct kioctx_table __rcu *ioctx_table; + spinlock_t ioctx_lock; + struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MEMCG - /* - * "owner" points to a task that is regarded as the canonical - * user/owner of this mm. All of the following must be true in - * order for it to be changed: - * - * current == mm->owner - * current->mm != mm - * new_owner->mm == mm - * new_owner->alloc_lock is held - */ - struct task_struct __rcu *owner; + /* + * "owner" points to a task that is regarded as the canonical + * user/owner of this mm. All of the following must be true in + * order for it to be changed: + * + * current == mm->owner + * current->mm != mm + * new_owner->mm == mm + * new_owner->alloc_lock is held + */ + struct task_struct __rcu *owner; #endif - struct user_namespace *user_ns; + struct user_namespace *user_ns; - /* store ref to file /proc//exe symlink points to */ - struct file __rcu *exe_file; + /* store ref to file /proc//exe symlink points to */ + struct file __rcu *exe_file; #ifdef CONFIG_MMU_NOTIFIER - struct mmu_notifier_mm *mmu_notifier_mm; + struct mmu_notifier_mm *mmu_notifier_mm; #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page_table_lock */ -#endif -#ifdef CONFIG_CPUMASK_OFFSTACK - struct cpumask cpumask_allocation; + pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_NUMA_BALANCING - /* - * numa_next_scan is the next time that the PTEs will be marked - * pte_numa. NUMA hinting faults will gather statistics and migrate - * pages to new nodes if necessary. - */ - unsigned long numa_next_scan; + /* + * numa_next_scan is the next time that the PTEs will be marked + * pte_numa. NUMA hinting faults will gather statistics and + * migrate pages to new nodes if necessary. + */ + unsigned long numa_next_scan; - /* Restart point for scanning and setting pte_numa */ - unsigned long numa_scan_offset; + /* Restart point for scanning and setting pte_numa */ + unsigned long numa_scan_offset; - /* numa_scan_seq prevents two threads setting pte_numa */ - int numa_scan_seq; + /* numa_scan_seq prevents two threads setting pte_numa */ + int numa_scan_seq; #endif - /* - * An operation with batched TLB flushing is going on. Anything that - * can move process memory needs to flush the TLB when moving a - * PROT_NONE or PROT_NUMA mapped page. - */ - atomic_t tlb_flush_pending; + /* + * An operation with batched TLB flushing is going on. Anything + * that can move process memory needs to flush the TLB when + * moving a PROT_NONE or PROT_NUMA mapped page. + */ + atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH - /* See flush_tlb_batched_pending() */ - bool tlb_flush_batched; + /* See flush_tlb_batched_pending() */ + bool tlb_flush_batched; #endif - struct uprobes_state uprobes_state; + struct uprobes_state uprobes_state; #ifdef CONFIG_HUGETLB_PAGE - atomic_long_t hugetlb_usage; + atomic_long_t hugetlb_usage; #endif - struct work_struct async_put_work; + struct work_struct async_put_work; #if IS_ENABLED(CONFIG_HMM) - /* HMM needs to track a few things per mm */ - struct hmm *hmm; + /* HMM needs to track a few things per mm */ + struct hmm *hmm; #endif -} __randomize_layout; + } __randomize_layout; + + /* + * The mm_cpumask needs to be at the end of mm_struct, because it + * is dynamically sized based on nr_cpu_ids. + */ + unsigned long cpu_bitmap[]; +}; extern struct mm_struct init_mm; +/* Pointer magic because the dynamic array size confuses some compilers. */ static inline void mm_init_cpumask(struct mm_struct *mm) { -#ifdef CONFIG_CPUMASK_OFFSTACK - mm->cpu_vm_mask_var = &mm->cpumask_allocation; -#endif - cpumask_clear(mm->cpu_vm_mask_var); + unsigned long cpu_bitmap = (unsigned long)mm; + + cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); + cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { - return mm->cpu_vm_mask_var; + return (struct cpumask *)&mm->cpu_bitmap; } struct mmu_gather; -- cgit From 4aaf448fa9754e2d5ee188d32327b24ffc15ca4d Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Tue, 17 Jul 2018 16:20:50 +0800 Subject: vga_switcheroo: set audio client id according to bound GPU id On modern laptop, there are more and more platforms have two GPUs, and each of them maybe have audio codec for HDMP/DP output. For some dGPU which is no output, audio codec usually is disabled. In currect HDA audio driver, it will set all codec as VGA_SWITCHEROO_DIS, the audio which is binded to UMA will be suspended if user use debugfs to contorl power In HDA driver side, it is difficult to know which GPU the audio has binded to. So set the bound gpu pci dev to vga_switcheroo. if the audio client is not the third registration, audio id will set in vga_switcheroo enable function. if the audio client is the last registration when vga_switcheroo _ready() get true, we should get audio client id from bound GPU directly. Signed-off-by: Jim Qu Reviewed-by: Lukas Wunner Signed-off-by: Takashi Iwai --- include/linux/vga_switcheroo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 77f0f0af3a71..a34539b7f750 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -84,8 +84,8 @@ enum vga_switcheroo_state { * Client identifier. Audio clients use the same identifier & 0x100. */ enum vga_switcheroo_client_id { - VGA_SWITCHEROO_UNKNOWN_ID = -1, - VGA_SWITCHEROO_IGD, + VGA_SWITCHEROO_UNKNOWN_ID = 0x1000, + VGA_SWITCHEROO_IGD = 0, VGA_SWITCHEROO_DIS, VGA_SWITCHEROO_MAX_CLIENTS, }; @@ -151,7 +151,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev, bool driver_power_control); int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - enum vga_switcheroo_client_id id); + struct pci_dev *vga_dev); void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info); @@ -180,7 +180,7 @@ static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_ha enum vga_switcheroo_handler_flags_t handler_flags) { return 0; } static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - enum vga_switcheroo_client_id id) { return 0; } + struct pci_dev *vga_dev) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void) { return 0; } static inline int vga_switcheroo_lock_ddc(struct pci_dev *pdev) { return -ENODEV; } -- cgit From 9afb719e7046c4f2462278862ab3db2961cc141c Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 5 Jul 2018 17:49:37 -0700 Subject: kbuild: Add build salt to the kernel and modules In Fedora, the debug information is packaged separately (foo-debuginfo) and can be installed separately. There's been a long standing issue where only one version of a debuginfo info package can be installed at a time. There's been an effort for Fedora for parallel debuginfo to rectify this problem. Part of the requirement to allow parallel debuginfo to work is that build ids are unique between builds. The existing upstream rpm implementation ensures this by re-calculating the build-id using the version and release as a seed. This doesn't work 100% for the kernel because of the vDSO which is its own binary and doesn't get updated when embedded. Fix this by adding some data in an ELF note for both the kernel and modules. The data is controlled via a Kconfig option so distributions can set it to an appropriate value to ensure uniqueness between builds. Suggested-by: Masahiro Yamada Signed-off-by: Laura Abbott Signed-off-by: Masahiro Yamada --- include/linux/build-salt.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/build-salt.h (limited to 'include/linux') diff --git a/include/linux/build-salt.h b/include/linux/build-salt.h new file mode 100644 index 000000000000..bb007bd05e7a --- /dev/null +++ b/include/linux/build-salt.h @@ -0,0 +1,20 @@ +#ifndef __BUILD_SALT_H +#define __BUILD_SALT_H + +#include + +#define LINUX_ELFNOTE_BUILD_SALT 0x100 + +#ifdef __ASSEMBLER__ + +#define BUILD_SALT \ + ELFNOTE(Linux, LINUX_ELFNOTE_BUILD_SALT, .asciz CONFIG_BUILD_SALT) + +#else + +#define BUILD_SALT \ + ELFNOTE32("Linux", LINUX_ELFNOTE_BUILD_SALT, CONFIG_BUILD_SALT) + +#endif + +#endif /* __BUILD_SALT_H */ -- cgit From 2fe31e43124040a67495fa81f7ac67bc4c09ebc8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 17 Jul 2018 21:48:10 +0200 Subject: hwmon: Add missing HWMON_T_LCRIT_ALARM define The enum hwmon_temp_lcrit_alarm exists, but the BIT definition is missing. Signed-off-by: Andrew Lunn Acked-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/hwmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index e5fd2707b6df..1b74ad11a5a4 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -93,6 +93,7 @@ enum hwmon_temp_attributes { #define HWMON_T_MIN_ALARM BIT(hwmon_temp_min_alarm) #define HWMON_T_MAX_ALARM BIT(hwmon_temp_max_alarm) #define HWMON_T_CRIT_ALARM BIT(hwmon_temp_crit_alarm) +#define HWMON_T_LCRIT_ALARM BIT(hwmon_temp_lcrit_alarm) #define HWMON_T_EMERGENCY_ALARM BIT(hwmon_temp_emergency_alarm) #define HWMON_T_FAULT BIT(hwmon_temp_fault) #define HWMON_T_OFFSET BIT(hwmon_temp_offset) -- cgit From aa7f29b07c8702127124d0522e3cd46850cdbc41 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 17 Jul 2018 21:48:11 +0200 Subject: hwmon: Add support for power min, lcrit, min_alarm and lcrit_alarm Some sensors support reporting minimal and lower critical power, as well as alarms when these thresholds are reached. Add support for these attributes to the hwmon core. Signed-off-by: Andrew Lunn Acked-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/hwmon.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 1b74ad11a5a4..b217101ca76e 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -188,12 +188,16 @@ enum hwmon_power_attributes { hwmon_power_cap_hyst, hwmon_power_cap_max, hwmon_power_cap_min, + hwmon_power_min, hwmon_power_max, hwmon_power_crit, + hwmon_power_lcrit, hwmon_power_label, hwmon_power_alarm, hwmon_power_cap_alarm, + hwmon_power_min_alarm, hwmon_power_max_alarm, + hwmon_power_lcrit_alarm, hwmon_power_crit_alarm, }; @@ -214,12 +218,16 @@ enum hwmon_power_attributes { #define HWMON_P_CAP_HYST BIT(hwmon_power_cap_hyst) #define HWMON_P_CAP_MAX BIT(hwmon_power_cap_max) #define HWMON_P_CAP_MIN BIT(hwmon_power_cap_min) +#define HWMON_P_MIN BIT(hwmon_power_min) #define HWMON_P_MAX BIT(hwmon_power_max) +#define HWMON_P_LCRIT BIT(hwmon_power_lcrit) #define HWMON_P_CRIT BIT(hwmon_power_crit) #define HWMON_P_LABEL BIT(hwmon_power_label) #define HWMON_P_ALARM BIT(hwmon_power_alarm) #define HWMON_P_CAP_ALARM BIT(hwmon_power_cap_alarm) +#define HWMON_P_MIN_ALARM BIT(hwmon_power_max_alarm) #define HWMON_P_MAX_ALARM BIT(hwmon_power_max_alarm) +#define HWMON_P_LCRIT_ALARM BIT(hwmon_power_lcrit_alarm) #define HWMON_P_CRIT_ALARM BIT(hwmon_power_crit_alarm) enum hwmon_energy_attributes { -- cgit From dcb5d0fcaa1ddd0af9c18ef51e6b05e38a6cec71 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 17 Jul 2018 21:48:12 +0200 Subject: hwmon: Add helper to tell if a char is invalid in a name HWMON device names are not allowed to contain "-* \t\n". Add a helper which will return true if passed an invalid character. It can be used to massage a string into a hwmon compatible name by replacing invalid characters with '_'. Signed-off-by: Andrew Lunn Acked-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/hwmon.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index b217101ca76e..9493d4a388db 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -398,4 +398,27 @@ devm_hwmon_device_register_with_info(struct device *dev, void hwmon_device_unregister(struct device *dev); void devm_hwmon_device_unregister(struct device *dev); +/** + * hwmon_is_bad_char - Is the char invalid in a hwmon name + * @ch: the char to be considered + * + * hwmon_is_bad_char() can be used to determine if the given character + * may not be used in a hwmon name. + * + * Returns true if the char is invalid, false otherwise. + */ +static inline bool hwmon_is_bad_char(const char ch) +{ + switch (ch) { + case '-': + case '*': + case ' ': + case '\t': + case '\n': + return true; + default: + return false; + } +} + #endif -- cgit From 1323061a018a7514287894a552c4ec2a5f0cb0cd Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 17 Jul 2018 21:48:13 +0200 Subject: net: phy: sfp: Add HWMON support for module sensors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SFP modules can contain a number of sensors. The EEPROM also contains recommended alarm and critical values for each sensor, and indications of if these have been exceeded. Export this information via HWMON. Currently temperature, VCC, bias current, transmit power, and possibly receiver power is supported. The sensors in the modules can either return calibrate or uncalibrated values. Uncalibrated values need to be manipulated, using coefficients provided in the SFP EEPROM. Uncalibrated receive power values require floating point maths in order to calibrate them. Performing this in the kernel is hard. So if the SFP module indicates it uses uncalibrated values, RX power is not made available. With this hwmon device, it is possible to view the sensor values using lm-sensors programs: in0: +3.29 V (crit min = +2.90 V, min = +3.00 V) (max = +3.60 V, crit max = +3.70 V) temp1: +33.0°C (low = -5.0°C, high = +80.0°C) (crit low = -10.0°C, crit = +85.0°C) power1: 1000.00 nW (max = 794.00 uW, min = 50.00 uW) ALARM (LCRIT) (lcrit = 40.00 uW, crit = 1000.00 uW) curr1: +0.00 A (crit min = +0.00 A, min = +0.00 A) ALARM (LCRIT, MIN) (max = +0.01 A, crit max = +0.01 A) The scaling sensors performs on the bias current is not particularly good. The raw values are more useful: curr1: curr1_input: 0.000 curr1_min: 0.002 curr1_max: 0.010 curr1_lcrit: 0.000 curr1_crit: 0.011 curr1_min_alarm: 1.000 curr1_max_alarm: 0.000 curr1_lcrit_alarm: 1.000 curr1_crit_alarm: 0.000 In order to keep the I2C overhead to a minimum, the constant values, such as limits and calibration coefficients are read once at module insertion time. Thus only reading *_input and *_alarm properties requires i2c read operations. Signed-off-by: Andrew Lunn Acked-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/sfp.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index ebce9e24906a..d37518e89db2 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -231,6 +231,50 @@ struct sfp_eeprom_id { struct sfp_eeprom_ext ext; } __packed; +struct sfp_diag { + __be16 temp_high_alarm; + __be16 temp_low_alarm; + __be16 temp_high_warn; + __be16 temp_low_warn; + __be16 volt_high_alarm; + __be16 volt_low_alarm; + __be16 volt_high_warn; + __be16 volt_low_warn; + __be16 bias_high_alarm; + __be16 bias_low_alarm; + __be16 bias_high_warn; + __be16 bias_low_warn; + __be16 txpwr_high_alarm; + __be16 txpwr_low_alarm; + __be16 txpwr_high_warn; + __be16 txpwr_low_warn; + __be16 rxpwr_high_alarm; + __be16 rxpwr_low_alarm; + __be16 rxpwr_high_warn; + __be16 rxpwr_low_warn; + __be16 laser_temp_high_alarm; + __be16 laser_temp_low_alarm; + __be16 laser_temp_high_warn; + __be16 laser_temp_low_warn; + __be16 tec_cur_high_alarm; + __be16 tec_cur_low_alarm; + __be16 tec_cur_high_warn; + __be16 tec_cur_low_warn; + __be32 cal_rxpwr4; + __be32 cal_rxpwr3; + __be32 cal_rxpwr2; + __be32 cal_rxpwr1; + __be32 cal_rxpwr0; + __be16 cal_txi_slope; + __be16 cal_txi_offset; + __be16 cal_txpwr_slope; + __be16 cal_txpwr_offset; + __be16 cal_t_slope; + __be16 cal_t_offset; + __be16 cal_v_slope; + __be16 cal_v_offset; +} __packed; + /* SFP EEPROM registers */ enum { SFP_PHYS_ID = 0x00, @@ -384,7 +428,33 @@ enum { SFP_TEC_CUR = 0x6c, SFP_STATUS = 0x6e, - SFP_ALARM = 0x70, + SFP_ALARM0 = 0x70, + SFP_ALARM0_TEMP_HIGH = BIT(7), + SFP_ALARM0_TEMP_LOW = BIT(6), + SFP_ALARM0_VCC_HIGH = BIT(5), + SFP_ALARM0_VCC_LOW = BIT(4), + SFP_ALARM0_TX_BIAS_HIGH = BIT(3), + SFP_ALARM0_TX_BIAS_LOW = BIT(2), + SFP_ALARM0_TXPWR_HIGH = BIT(1), + SFP_ALARM0_TXPWR_LOW = BIT(0), + + SFP_ALARM1 = 0x71, + SFP_ALARM1_RXPWR_HIGH = BIT(7), + SFP_ALARM1_RXPWR_LOW = BIT(6), + + SFP_WARN0 = 0x74, + SFP_WARN0_TEMP_HIGH = BIT(7), + SFP_WARN0_TEMP_LOW = BIT(6), + SFP_WARN0_VCC_HIGH = BIT(5), + SFP_WARN0_VCC_LOW = BIT(4), + SFP_WARN0_TX_BIAS_HIGH = BIT(3), + SFP_WARN0_TX_BIAS_LOW = BIT(2), + SFP_WARN0_TXPWR_HIGH = BIT(1), + SFP_WARN0_TXPWR_LOW = BIT(0), + + SFP_WARN1 = 0x75, + SFP_WARN1_RXPWR_HIGH = BIT(7), + SFP_WARN1_RXPWR_LOW = BIT(6), SFP_EXT_STATUS = 0x76, SFP_VSL = 0x78, -- cgit From 753d433b586d1d43c487e3d660f5778c7c8d58ea Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 22 Jun 2018 09:15:32 +1000 Subject: random: Return nbytes filled from hw RNG Currently the function get_random_bytes_arch() has return value 'void'. If the hw RNG fails we currently fall back to using get_random_bytes(). This defeats the purpose of requesting random material from the hw RNG in the first place. There are currently no intree users of get_random_bytes_arch(). Only get random bytes from the hw RNG, make function return the number of bytes retrieved from the hw RNG. Acked-by: Theodore Ts'o Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Tobin C. Harding Signed-off-by: Theodore Ts'o --- include/linux/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 2ddf13b4281e..f1c9bc5cd231 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -38,7 +38,7 @@ extern void get_random_bytes(void *buf, int nbytes); extern int wait_for_random_bytes(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern void get_random_bytes_arch(void *buf, int nbytes); +extern int __must_check get_random_bytes_arch(void *buf, int nbytes); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; -- cgit From 181ace9e1b2e1c5c2be590bc9603baa65f3a16d8 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Wed, 20 Jun 2018 12:57:28 +0530 Subject: mtd: rawnand: helper function for setting up ECC configuration commit 2c8f8afa7f92 ("mtd: nand: add generic helpers to check, match, maximize ECC settings") provides generic helpers which drivers can use for setting up ECC parameters. Since same board can have different ECC strength nand chips so following is the logic for setting up ECC strength and ECC step size, which can be used by most of the drivers. 1. If both ECC step size and ECC strength are already set (usually by DT) then just check whether this setting is supported by NAND controller. 2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength supported by NAND controller. 3. Otherwise, try to match the ECC step size and ECC strength closest to the chip's requirement. If available OOB size can't fit the chip requirement then select maximum ECC strength which can be fit with available OOB size. This patch introduces nand_ecc_choose_conf function which calls the required helper functions for the above logic. The drivers can use this single function instead of calling the 3 helper functions individually. Signed-off-by: Abhishek Sahu Reviewed-by: Masahiro Yamada Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index fbac4741da52..2ce305388471 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1672,6 +1672,9 @@ int nand_match_ecc_req(struct nand_chip *chip, int nand_maximize_ecc(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail); +int nand_ecc_choose_conf(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + /* Default write_oob implementation */ int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); -- cgit From 0cf5c7dbaa392d11cfae532cedeaad0ac48b2165 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Wed, 20 Jun 2018 12:57:42 +0530 Subject: mtd: rawnand: provide only single helper function for ECC conf Function nand_ecc_choose_conf() will be help for all the cases, so other helper functions can be made static. nand_check_ecc_caps(): Invoke nand_ecc_choose_conf() with both chip->ecc.size and chip->ecc.strength value set. nand_maximize_ecc(): Invoke nand_ecc_choose_conf() with NAND_ECC_MAXIMIZE flag. nand_match_ecc_req(): Invoke nand_ecc_choose_conf() with either chip->ecc.size or chip->ecc.strength value set and without NAND_ECC_MAXIMIZE flag. CC: Masahiro Yamada Signed-off-by: Abhishek Sahu Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2ce305388471..0c6fb316b409 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1663,15 +1663,6 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, void *extraoob, int extraooblen, int threshold); -int nand_check_ecc_caps(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - -int nand_match_ecc_req(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - -int nand_maximize_ecc(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - int nand_ecc_choose_conf(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail); -- cgit From 7529df4652482c33ae1a99ee8189401146f13cb7 Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Fri, 22 Jun 2018 14:28:23 +0200 Subject: mtd: nand: Add core infrastructure to support SPI NANDs Add a SPI NAND framework based on the generic NAND framework and the spi-mem infrastructure. In its current state, this framework supports the following features: - single/dual/quad IO modes - on-die ECC Signed-off-by: Peter Pan Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 416 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/spi/spi-mem.h | 4 +- 2 files changed, 419 insertions(+), 1 deletion(-) create mode 100644 include/linux/mtd/spinand.h (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h new file mode 100644 index 000000000000..d3efe62dc9de --- /dev/null +++ b/include/linux/mtd/spinand.h @@ -0,0 +1,416 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016-2017 Micron Technology, Inc. + * + * Authors: + * Peter Pan + */ +#ifndef __LINUX_MTD_SPINAND_H +#define __LINUX_MTD_SPINAND_H + +#include +#include +#include +#include +#include +#include +#include + +/** + * Standard SPI NAND flash operations + */ + +#define SPINAND_RESET_OP \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xff, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_WR_EN_DIS_OP(enable) \ + SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_READID_OP(ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1)) + +#define SPINAND_SET_FEATURE_OP(reg, valptr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x1f, 1), \ + SPI_MEM_OP_ADDR(1, reg, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(1, valptr, 1)) + +#define SPINAND_GET_FEATURE_OP(reg, valptr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0f, 1), \ + SPI_MEM_OP_ADDR(1, reg, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_IN(1, valptr, 1)) + +#define SPINAND_BLK_ERASE_OP(addr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xd8, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PAGE_READ_OP(addr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x13, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PAGE_READ_FROM_CACHE_OP(fast, addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1)) + +#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 4)) + +#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 4), \ + SPI_MEM_OP_DUMMY(ndummy, 4), \ + SPI_MEM_OP_DATA_IN(len, buf, 4)) + +#define SPINAND_PROG_EXEC_OP(addr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PROG_LOAD(reset, addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x02 : 0x84, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 1)) + +#define SPINAND_PROG_LOAD_X4(reset, addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x32 : 0x34, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 4)) + +/** + * Standard SPI NAND flash commands + */ +#define SPINAND_CMD_PROG_LOAD_X4 0x32 +#define SPINAND_CMD_PROG_LOAD_RDM_DATA_X4 0x34 + +/* feature register */ +#define REG_BLOCK_LOCK 0xa0 +#define BL_ALL_UNLOCKED 0x00 + +/* configuration register */ +#define REG_CFG 0xb0 +#define CFG_OTP_ENABLE BIT(6) +#define CFG_ECC_ENABLE BIT(4) +#define CFG_QUAD_ENABLE BIT(0) + +/* status register */ +#define REG_STATUS 0xc0 +#define STATUS_BUSY BIT(0) +#define STATUS_ERASE_FAILED BIT(2) +#define STATUS_PROG_FAILED BIT(3) +#define STATUS_ECC_MASK GENMASK(5, 4) +#define STATUS_ECC_NO_BITFLIPS (0 << 4) +#define STATUS_ECC_HAS_BITFLIPS (1 << 4) +#define STATUS_ECC_UNCOR_ERROR (2 << 4) + +struct spinand_op; +struct spinand_device; + +#define SPINAND_MAX_ID_LEN 4 + +/** + * struct spinand_id - SPI NAND id structure + * @data: buffer containing the id bytes. Currently 4 bytes large, but can + * be extended if required + * @len: ID length + * + * struct_spinand_id->data contains all bytes returned after a READ_ID command, + * including dummy bytes if the chip does not emit ID bytes right after the + * READ_ID command. The responsibility to extract real ID bytes is left to + * struct_manufacurer_ops->detect(). + */ +struct spinand_id { + u8 data[SPINAND_MAX_ID_LEN]; + int len; +}; + +/** + * struct manufacurer_ops - SPI NAND manufacturer specific operations + * @detect: detect a SPI NAND device. Every time a SPI NAND device is probed + * the core calls the struct_manufacurer_ops->detect() hook of each + * registered manufacturer until one of them return 1. Note that + * the first thing to check in this hook is that the manufacturer ID + * in struct_spinand_device->id matches the manufacturer whose + * ->detect() hook has been called. Should return 1 if there's a + * match, 0 if the manufacturer ID does not match and a negative + * error code otherwise. When true is returned, the core assumes + * that properties of the NAND chip (spinand->base.memorg and + * spinand->base.eccreq) have been filled + * @init: initialize a SPI NAND device + * @cleanup: cleanup a SPI NAND device + * + * Each SPI NAND manufacturer driver should implement this interface so that + * NAND chips coming from this vendor can be detected and initialized properly. + */ +struct spinand_manufacturer_ops { + int (*detect)(struct spinand_device *spinand); + int (*init)(struct spinand_device *spinand); + void (*cleanup)(struct spinand_device *spinand); +}; + +/** + * struct spinand_manufacturer - SPI NAND manufacturer instance + * @id: manufacturer ID + * @name: manufacturer name + * @ops: manufacturer operations + */ +struct spinand_manufacturer { + u8 id; + char *name; + const struct spinand_manufacturer_ops *ops; +}; + +/** + * struct spinand_op_variants - SPI NAND operation variants + * @ops: the list of variants for a given operation + * @nops: the number of variants + * + * Some operations like read-from-cache/write-to-cache have several variants + * depending on the number of IO lines you use to transfer data or address + * cycles. This structure is a way to describe the different variants supported + * by a chip and let the core pick the best one based on the SPI mem controller + * capabilities. + */ +struct spinand_op_variants { + const struct spi_mem_op *ops; + unsigned int nops; +}; + +#define SPINAND_OP_VARIANTS(name, ...) \ + const struct spinand_op_variants name = { \ + .ops = (struct spi_mem_op[]) { __VA_ARGS__ }, \ + .nops = sizeof((struct spi_mem_op[]){ __VA_ARGS__ }) / \ + sizeof(struct spi_mem_op), \ + } + +/** + * spinand_ecc_info - description of the on-die ECC implemented by a SPI NAND + * chip + * @get_status: get the ECC status. Should return a positive number encoding + * the number of corrected bitflips if correction was possible or + * -EBADMSG if there are uncorrectable errors. I can also return + * other negative error codes if the error is not caused by + * uncorrectable bitflips + * @ooblayout: the OOB layout used by the on-die ECC implementation + */ +struct spinand_ecc_info { + int (*get_status)(struct spinand_device *spinand, u8 status); + const struct mtd_ooblayout_ops *ooblayout; +}; + +#define SPINAND_HAS_QE_BIT BIT(0) + +/** + * struct spinand_info - Structure used to describe SPI NAND chips + * @model: model name + * @devid: device ID + * @flags: OR-ing of the SPINAND_XXX flags + * @memorg: memory organization + * @eccreq: ECC requirements + * @eccinfo: on-die ECC info + * @op_variants: operations variants + * @op_variants.read_cache: variants of the read-cache operation + * @op_variants.write_cache: variants of the write-cache operation + * @op_variants.update_cache: variants of the update-cache operation + * @select_target: function used to select a target/die. Required only for + * multi-die chips + * + * Each SPI NAND manufacturer driver should have a spinand_info table + * describing all the chips supported by the driver. + */ +struct spinand_info { + const char *model; + u8 devid; + u32 flags; + struct nand_memory_organization memorg; + struct nand_ecc_req eccreq; + struct spinand_ecc_info eccinfo; + struct { + const struct spinand_op_variants *read_cache; + const struct spinand_op_variants *write_cache; + const struct spinand_op_variants *update_cache; + } op_variants; + int (*select_target)(struct spinand_device *spinand, + unsigned int target); +}; + +#define SPINAND_INFO_OP_VARIANTS(__read, __write, __update) \ + { \ + .read_cache = __read, \ + .write_cache = __write, \ + .update_cache = __update, \ + } + +#define SPINAND_ECCINFO(__ooblayout, __get_status) \ + .eccinfo = { \ + .ooblayout = __ooblayout, \ + .get_status = __get_status, \ + } + +#define SPINAND_SELECT_TARGET(__func) \ + .select_target = __func, + +#define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants, \ + __flags, ...) \ + { \ + .model = __model, \ + .devid = __id, \ + .memorg = __memorg, \ + .eccreq = __eccreq, \ + .op_variants = __op_variants, \ + .flags = __flags, \ + __VA_ARGS__ \ + } + +/** + * struct spinand_device - SPI NAND device instance + * @base: NAND device instance + * @spimem: pointer to the SPI mem object + * @lock: lock used to serialize accesses to the NAND + * @id: NAND ID as returned by READ_ID + * @flags: NAND flags + * @op_templates: various SPI mem op templates + * @op_templates.read_cache: read cache op template + * @op_templates.write_cache: write cache op template + * @op_templates.update_cache: update cache op template + * @select_target: select a specific target/die. Usually called before sending + * a command addressing a page or an eraseblock embedded in + * this die. Only required if your chip exposes several dies + * @cur_target: currently selected target/die + * @eccinfo: on-die ECC information + * @cfg_cache: config register cache. One entry per die + * @databuf: bounce buffer for data + * @oobbuf: bounce buffer for OOB data + * @scratchbuf: buffer used for everything but page accesses. This is needed + * because the spi-mem interface explicitly requests that buffers + * passed in spi_mem_op be DMA-able, so we can't based the bufs on + * the stack + * @manufacturer: SPI NAND manufacturer information + * @priv: manufacturer private data + */ +struct spinand_device { + struct nand_device base; + struct spi_mem *spimem; + struct mutex lock; + struct spinand_id id; + u32 flags; + + struct { + const struct spi_mem_op *read_cache; + const struct spi_mem_op *write_cache; + const struct spi_mem_op *update_cache; + } op_templates; + + int (*select_target)(struct spinand_device *spinand, + unsigned int target); + unsigned int cur_target; + + struct spinand_ecc_info eccinfo; + + u8 *cfg_cache; + u8 *databuf; + u8 *oobbuf; + u8 *scratchbuf; + const struct spinand_manufacturer *manufacturer; + void *priv; +}; + +/** + * mtd_to_spinand() - Get the SPI NAND device attached to an MTD instance + * @mtd: MTD instance + * + * Return: the SPI NAND device attached to @mtd. + */ +static inline struct spinand_device *mtd_to_spinand(struct mtd_info *mtd) +{ + return container_of(mtd_to_nanddev(mtd), struct spinand_device, base); +} + +/** + * spinand_to_mtd() - Get the MTD device embedded in a SPI NAND device + * @spinand: SPI NAND device + * + * Return: the MTD device embedded in @spinand. + */ +static inline struct mtd_info *spinand_to_mtd(struct spinand_device *spinand) +{ + return nanddev_to_mtd(&spinand->base); +} + +/** + * nand_to_spinand() - Get the SPI NAND device embedding an NAND object + * @nand: NAND object + * + * Return: the SPI NAND device embedding @nand. + */ +static inline struct spinand_device *nand_to_spinand(struct nand_device *nand) +{ + return container_of(nand, struct spinand_device, base); +} + +/** + * spinand_to_nand() - Get the NAND device embedded in a SPI NAND object + * @spinand: SPI NAND device + * + * Return: the NAND device embedded in @spinand. + */ +static inline struct nand_device * +spinand_to_nand(struct spinand_device *spinand) +{ + return &spinand->base; +} + +/** + * spinand_set_of_node - Attach a DT node to a SPI NAND device + * @spinand: SPI NAND device + * @np: DT node + * + * Attach a DT node to a SPI NAND device. + */ +static inline void spinand_set_of_node(struct spinand_device *spinand, + struct device_node *np) +{ + nanddev_set_of_node(&spinand->base, np); +} + +int spinand_match_and_init(struct spinand_device *dev, + const struct spinand_info *table, + unsigned int table_size, u8 devid); + +int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); +int spinand_select_target(struct spinand_device *spinand, unsigned int target); + +#endif /* __LINUX_MTD_SPINAND_H */ diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index bb4bd15ae1f6..4fa34a227a0f 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -3,7 +3,9 @@ * Copyright (C) 2018 Exceet Electronics GmbH * Copyright (C) 2018 Bootlin * - * Author: Boris Brezillon + * Author: + * Peter Pan + * Boris Brezillon */ #ifndef __LINUX_SPI_MEM_H -- cgit From a508e8875e135d7a1df26d8131b5443cb07005ff Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Fri, 22 Jun 2018 14:28:25 +0200 Subject: mtd: spinand: Add initial support for Micron MT29F2G01ABAGD Add a basic driver for Micron SPI NANDs. Only one device is supported right now, but the driver will be extended to support more devices afterwards. Signed-off-by: Peter Pan Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index d3efe62dc9de..717b272940f1 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -193,6 +193,9 @@ struct spinand_manufacturer { const struct spinand_manufacturer_ops *ops; }; +/* SPI NAND manufacturers */ +extern const struct spinand_manufacturer micron_spinand_manufacturer; + /** * struct spinand_op_variants - SPI NAND operation variants * @ops: the list of variants for a given operation -- cgit From 1075492bb9e26312bc8ddeec1a93e2de5f9c76b4 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 22 Jun 2018 14:28:26 +0200 Subject: mtd: spinand: Add initial support for Winbond W25M02GV Add support for the W25M02GV chip. Signed-off-by: Frieder Schrempf Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 717b272940f1..f0f16b9029e7 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -195,6 +195,7 @@ struct spinand_manufacturer { /* SPI NAND manufacturers */ extern const struct spinand_manufacturer micron_spinand_manufacturer; +extern const struct spinand_manufacturer winbond_spinand_manufacturer; /** * struct spinand_op_variants - SPI NAND operation variants -- cgit From b02308af05e62c7d995f4fc75b0bc2ae3c3026f7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 22 Jun 2018 14:28:27 +0200 Subject: mtd: spinand: Add initial support for the MX35LF1GE4AB chip Add minimal support for the MX35LF1GE4AB SPI NAND chip. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index f0f16b9029e7..088ff96c3eb6 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -194,6 +194,7 @@ struct spinand_manufacturer { }; /* SPI NAND manufacturers */ +extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; extern const struct spinand_manufacturer winbond_spinand_manufacturer; -- cgit From a4c025372d9d4756e7be98a98f5dde302c6df562 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:27 +0200 Subject: mtd: rawnand: Remove nand_do_read() prototype from rawnand.h nand_do_read() is a static function implemented in nand_base.c. There's no good reason to expose its prototype in rawnand.h. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 0c6fb316b409..4d7a46c42900 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1545,8 +1545,6 @@ int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, int allowbbt); -int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, uint8_t *buf); /** * struct platform_nand_chip - chip level device structure -- cgit From 707329aca6e0cf5781ca7f62c39bdcb5f87e9c23 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:28 +0200 Subject: mtd: rawnand: Remove forward declaration of mtd_info struct mtd_info is defined in linux/mtd/mtd.h which is included at the beginning of nand_base.c, there's thus no need for the forward declaration of mtd_info. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 4d7a46c42900..32145b302585 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -23,7 +23,6 @@ #include #include -struct mtd_info; struct nand_flash_dev; struct device_node; -- cgit From 1c3ab61ebcf8cfb5308d2e68e03fd4b4df484e62 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:29 +0200 Subject: mtd: rawnand: Remove forward declaration of device_node struct device_node is defined in linux/of.h. Let's include this file instead of having a forward declaration of this struct. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 32145b302585..83ab6779144e 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -21,10 +21,10 @@ #include #include #include +#include #include struct nand_flash_dev; -struct device_node; /* Scan and identify a NAND device */ int nand_scan_with_ids(struct mtd_info *mtd, int max_chips, -- cgit From 44b07b921dea5ec997fb929ceaa82582a7415816 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:30 +0200 Subject: mtd: rawnand: Rename nand_default_bbt() into nand_create_bbt() Rename nand_default_bbt() into nand_create_bbt() and pass it a nand_chip object to prepare removal of the chip->scan_bbt() hook. We add a temporary nand_default_bbt() wrapper which will be dropped after the removal of ->scan_bbt(). Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 83ab6779144e..186f9fb1e7eb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1538,7 +1538,7 @@ extern const struct nand_manufacturer_ops micron_nand_manuf_ops; extern const struct nand_manufacturer_ops amd_nand_manuf_ops; extern const struct nand_manufacturer_ops macronix_nand_manuf_ops; -int nand_default_bbt(struct mtd_info *mtd); +int nand_create_bbt(struct nand_chip *chip); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); -- cgit From e80eba7581512625083ba540f120479b935425de Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:31 +0200 Subject: mtd: rawnand: Kill the chip->scan_bbt() hook None of the existing drivers are overloading the ->scan_bbt() method, let's get rid of it and replace calls to ->scan_bbt() by nand_create_bbt() ones. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 186f9fb1e7eb..ac0007ddadf2 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1199,7 +1199,6 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * @buf_align: minimum buffer alignment required by a platform * @hwcontrol: platform-specific hardware control structure * @erase: [REPLACEABLE] erase function - * @scan_bbt: [REPLACEABLE] function to scan bad block table * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transferring * data from array to read regs (tR). * @state: [INTERN] the current state of the NAND device @@ -1292,7 +1291,6 @@ struct nand_chip { const struct nand_operation *op, bool check_only); int (*erase)(struct mtd_info *mtd, int page); - int (*scan_bbt)(struct mtd_info *mtd); int (*set_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*get_features)(struct mtd_info *mtd, struct nand_chip *chip, -- cgit From f0f01838f76420988b50b23f315704c9a5cd2fa9 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:32 +0200 Subject: mtd: rawnand: orion_nand: Kill orion_nand_data.dev_ready() None of the boards seem to overload the ->dev_ready() hook, just drop this field from orion_nand_data. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/platform_data/mtd-orion_nand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-orion_nand.h b/include/linux/platform_data/mtd-orion_nand.h index a7ce77c7c1a8..34828eb85982 100644 --- a/include/linux/platform_data/mtd-orion_nand.h +++ b/include/linux/platform_data/mtd-orion_nand.h @@ -12,7 +12,6 @@ */ struct orion_nand_data { struct mtd_partition *parts; - int (*dev_ready)(struct mtd_info *mtd); u32 nr_parts; u8 ale; /* address line number connected to ALE */ u8 cle; /* address line number connected to CLE */ -- cgit From dc2d8856a758627d4f126d17bc113eedd72b2d60 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:33 +0200 Subject: mtd: rawnand: plat_nand: Kill pdata->ctrl.{hwcontrol, read_byte}() None of the board files are overloading those hooks, so let's drop them from struct platform_nand_ctrl. Signed-off-by: Boris Brezillon Acked-by: Robert Jarzmik Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index ac0007ddadf2..11c2426fc363 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1572,14 +1572,12 @@ struct platform_device; * struct platform_nand_ctrl - controller level device structure * @probe: platform specific function to probe/setup hardware * @remove: platform specific function to remove/teardown hardware - * @hwcontrol: platform specific hardware control structure * @dev_ready: platform specific function to read ready/busy pin * @select_chip: platform specific chip select function * @cmd_ctrl: platform specific function for controlling * ALE/CLE/nCE. Also used to write command and address * @write_buf: platform specific function for write buffer * @read_buf: platform specific function for read buffer - * @read_byte: platform specific function to read one byte from chip * @priv: private data to transport driver specific settings * * All fields are optional and depend on the hardware driver requirements @@ -1587,13 +1585,11 @@ struct platform_device; struct platform_nand_ctrl { int (*probe)(struct platform_device *pdev); void (*remove)(struct platform_device *pdev); - void (*hwcontrol)(struct mtd_info *mtd, int cmd); int (*dev_ready)(struct mtd_info *mtd); void (*select_chip)(struct mtd_info *mtd, int chip); void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); - unsigned char (*read_byte)(struct mtd_info *mtd); void *priv; }; -- cgit From dc2865ac3527d76fe04ee1f1a3ffc101a60faba0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 9 Jul 2018 22:09:40 +0200 Subject: MIPS: txx9: Move the ndfc.h header to include/linux/platform_data/txx9 This way we will be able to compile the ndfmc driver when COMPILE_TEST=y. Signed-off-by: Boris Brezillon Acked-by: Paul Burton Signed-off-by: Miquel Raynal --- include/linux/platform_data/txx9/ndfmc.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/platform_data/txx9/ndfmc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/txx9/ndfmc.h b/include/linux/platform_data/txx9/ndfmc.h new file mode 100644 index 000000000000..fc172627d54e --- /dev/null +++ b/include/linux/platform_data/txx9/ndfmc.h @@ -0,0 +1,30 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * (C) Copyright TOSHIBA CORPORATION 2007 + */ +#ifndef __TXX9_NDFMC_H +#define __TXX9_NDFMC_H + +#define NDFMC_PLAT_FLAG_USE_BSPRT 0x01 +#define NDFMC_PLAT_FLAG_NO_RSTR 0x02 +#define NDFMC_PLAT_FLAG_HOLDADD 0x04 +#define NDFMC_PLAT_FLAG_DUMMYWRITE 0x08 + +struct txx9ndfmc_platform_data { + unsigned int shift; + unsigned int gbus_clock; + unsigned int hold; /* hold time in nanosecond */ + unsigned int spw; /* strobe pulse width in nanosecond */ + unsigned int flags; + unsigned char ch_mask; /* available channel bitmask */ + unsigned char wp_mask; /* write-protect bitmask */ + unsigned char wide_mask; /* 16bit-nand bitmask */ +}; + +void txx9_ndfmc_init(unsigned long baseaddr, + const struct txx9ndfmc_platform_data *plat_data); + +#endif /* __TXX9_NDFMC_H */ -- cgit From e65e3a50702f4e7a01c0b36ff08d6ed8dde7ee7b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 9 Jul 2018 22:09:42 +0200 Subject: MIPS: jz4740: Move jz4740_nand.h header to include/linux/platform_data/jz4740 This way we will be able to compile the jz4740_nand driver when COMPILE_TEST=y. Signed-off-by: Boris Brezillon Acked-by: Paul Burton Signed-off-by: Miquel Raynal --- include/linux/platform_data/jz4740/jz4740_nand.h | 34 ++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/platform_data/jz4740/jz4740_nand.h (limited to 'include/linux') diff --git a/include/linux/platform_data/jz4740/jz4740_nand.h b/include/linux/platform_data/jz4740/jz4740_nand.h new file mode 100644 index 000000000000..bc571f6d5ced --- /dev/null +++ b/include/linux/platform_data/jz4740/jz4740_nand.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2009-2010, Lars-Peter Clausen + * JZ4740 SoC NAND controller driver + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __JZ4740_NAND_H__ +#define __JZ4740_NAND_H__ + +#include +#include + +#define JZ_NAND_NUM_BANKS 4 + +struct jz_nand_platform_data { + int num_partitions; + struct mtd_partition *partitions; + + unsigned char banks[JZ_NAND_NUM_BANKS]; + + void (*ident_callback)(struct platform_device *, struct mtd_info *, + struct mtd_partition **, int *num_partitions); +}; + +#endif -- cgit From be2ab5b4d5c0bf041a34ec2e1397d50afbfb095e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Jul 2018 13:45:12 +0200 Subject: netfilter: nf_tables: take module reference when starting a batch Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 3ecc3050be0e..4a520d3304a2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -29,6 +29,7 @@ struct nfnetlink_subsystem { __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ + struct module *owner; int (*commit)(struct net *net, struct sk_buff *skb); int (*abort)(struct net *net, struct sk_buff *skb); void (*cleanup)(struct net *net); -- cgit From e2861fa71641c6414831d628a1f4f793b6562580 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 8 Jun 2018 14:57:42 -0700 Subject: evm: Don't deadlock if a crypto algorithm is unavailable When EVM attempts to appraise a file signed with a crypto algorithm the kernel doesn't have support for, it will cause the kernel to trigger a module load. If the EVM policy includes appraisal of kernel modules this will in turn call back into EVM - since EVM is holding a lock until the crypto initialisation is complete, this triggers a deadlock. Add a CRYPTO_NOLOAD flag and skip module loading if it's set, and add that flag in the EVM case in order to fail gracefully with an error message instead of deadlocking. Signed-off-by: Matthew Garrett Acked-by: Herbert Xu Signed-off-by: Mimi Zohar --- include/linux/crypto.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 6eb06101089f..e8839d3a7559 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -112,6 +112,11 @@ */ #define CRYPTO_ALG_OPTIONAL_KEY 0x00004000 +/* + * Don't trigger module loading + */ +#define CRYPTO_NOLOAD 0x00008000 + /* * Transform masks and values (for crt_flags). */ -- cgit From 6eb864c1d9dd1ef32b88e03c3f49d8be0dab7dcf Mon Sep 17 00:00:00 2001 From: Mikhail Kurinnoi Date: Wed, 27 Jun 2018 16:33:42 +0300 Subject: integrity: prevent deadlock during digsig verification. This patch aimed to prevent deadlock during digsig verification.The point of issue - user space utility modprobe and/or it's dependencies (ld-*.so, libz.so.*, libc-*.so and /lib/modules/ files) that could be used for kernel modules load during digsig verification and could be signed by digsig in the same time. First at all, look at crypto_alloc_tfm() work algorithm: crypto_alloc_tfm() will first attempt to locate an already loaded algorithm. If that fails and the kernel supports dynamically loadable modules, it will then attempt to load a module of the same name or alias. If that fails it will send a query to any loaded crypto manager to construct an algorithm on the fly. We have situation, when public_key_verify_signature() in case of RSA algorithm use alg_name to store internal information in order to construct an algorithm on the fly, but crypto_larval_lookup() will try to use alg_name in order to load kernel module with same name. 1) we can't do anything with crypto module work, since it designed to work exactly in this way; 2) we can't globally filter module requests for modprobe, since it designed to work with any requests. In this patch, I propose add an exception for "crypto-pkcs1pad(rsa,*)" module requests only in case of enabled integrity asymmetric keys support. Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules for sure, we are safe to fail such module request from crypto_larval_lookup(). In this way we prevent modprobe execution during digsig verification and avoid possible deadlock if modprobe and/or it's dependencies also signed with digsig. Requested "crypto-pkcs1pad(rsa,*)" kernel module name formed by: 1) "pkcs1pad(rsa,%s)" in public_key_verify_signature(); 2) "crypto-%s" / "crypto-%s-all" in crypto_larval_lookup(). "crypto-pkcs1pad(rsa," part of request is a constant and unique and could be used as filter. Signed-off-by: Mikhail Kurinnoi Signed-off-by: Mimi Zohar include/linux/integrity.h | 13 +++++++++++++ security/integrity/digsig_asymmetric.c | 23 +++++++++++++++++++++++ security/security.c | 7 ++++++- 3 files changed, 42 insertions(+), 1 deletion(-) --- include/linux/integrity.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/integrity.h b/include/linux/integrity.h index 858d3f4a2241..54c853ec2fd1 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -44,4 +44,17 @@ static inline void integrity_load_keys(void) } #endif /* CONFIG_INTEGRITY */ +#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS + +extern int integrity_kernel_module_request(char *kmod_name); + +#else + +static inline int integrity_kernel_module_request(char *kmod_name) +{ + return 0; +} + +#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */ + #endif /* _LINUX_INTEGRITY_H */ -- cgit From d29ab6e1fa21ebc2a8a771015dd9e0e5d4e28dc1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 13 Jul 2018 12:41:10 -0700 Subject: bpf: bpf_prog_array_alloc() should return a generic non-rcu pointer Currently the return type of the bpf_prog_array_alloc() is struct bpf_prog_array __rcu *, which is not quite correct. Obviously, the returned pointer is a generic pointer, which is valid for an indefinite amount of time and it's not shared with anyone else, so there is no sense in marking it as __rcu. This change eliminate the following sparse warnings: kernel/bpf/core.c:1544:31: warning: incorrect type in return expression (different address spaces) kernel/bpf/core.c:1544:31: expected struct bpf_prog_array [noderef] * kernel/bpf/core.c:1544:31: got void * kernel/bpf/core.c:1548:17: warning: incorrect type in return expression (different address spaces) kernel/bpf/core.c:1548:17: expected struct bpf_prog_array [noderef] * kernel/bpf/core.c:1548:17: got struct bpf_prog_array * kernel/bpf/core.c:1681:15: warning: incorrect type in assignment (different address spaces) kernel/bpf/core.c:1681:15: expected struct bpf_prog_array *array kernel/bpf/core.c:1681:15: got struct bpf_prog_array [noderef] * Fixes: 324bda9e6c5a ("bpf: multi program support for cgroup+bpf") Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8827e797ff97..943fb08d8287 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -352,7 +352,7 @@ struct bpf_prog_array { struct bpf_prog *progs[0]; }; -struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); +struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, -- cgit From 09728266b6f99ab57cd4f84f3eead65b7b65dbf7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Jul 2018 10:53:23 -0700 Subject: bpf: offload: rename bpf_offload_dev_match() to bpf_offload_prog_map_match() A set of new API functions exported for the drivers will soon use 'bpf_offload_dev_' as a prefix. Rename the bpf_offload_dev_match() which is internal to the core (used by the verifier) to avoid any confusion. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 943fb08d8287..9b010d9129f3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -648,7 +648,7 @@ int bpf_map_offload_delete_elem(struct bpf_map *map, void *key); int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key); -bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map); +bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -- cgit From 9fd7c5559165f4c679b40c5e6ad442955832dfad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Jul 2018 10:53:24 -0700 Subject: bpf: offload: aggregate offloads per-device Currently we have two lists of offloaded objects - programs and maps. Netdevice unregister notifier scans those lists to orphan objects associated with device being unregistered. This puts unnecessary (even if negligible) burden on all netdev unregister calls in BPF- -enabled kernel. The lists of objects may potentially get long making the linear scan even more problematic. There haven't been complaints about this mechanisms so far, but it is suboptimal. Instead of relying on notifiers, make the few BPF-capable drivers register explicitly for BPF offloads. The programs and maps will now be collected per-device not on a global list, and only scanned for removal when driver unregisters from BPF offloads. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9b010d9129f3..aa2e834a122b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -650,6 +650,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); +int bpf_offload_dev_netdev_register(struct net_device *netdev); +void bpf_offload_dev_netdev_unregister(struct net_device *netdev); + #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -- cgit From 602144c224604f1cbff02ee2d1cf46825269ecbd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Jul 2018 10:53:25 -0700 Subject: bpf: offload: keep the offload state per-ASIC Create a higher-level entity to represent a device/ASIC to allow programs and maps to be shared between device ports. The extra work is required to make sure we don't destroy BPF objects as soon as the netdev for which they were loaded gets destroyed, as other ports may still be using them. When netdev goes away all of its BPF objects will be moved to other netdevs of the device, and only destroyed when last netdev is unregistered. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index aa2e834a122b..913465e45062 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -85,6 +85,7 @@ struct bpf_map { char name[BPF_OBJ_NAME_LEN]; }; +struct bpf_offload_dev; struct bpf_offloaded_map; struct bpf_map_dev_ops { @@ -650,8 +651,12 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); -int bpf_offload_dev_netdev_register(struct net_device *netdev); -void bpf_offload_dev_netdev_unregister(struct net_device *netdev); +struct bpf_offload_dev *bpf_offload_dev_create(void); +void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev); +int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, + struct net_device *netdev); +void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, + struct net_device *netdev); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -- cgit From fd4f227dea0f24d89f52f7c4eb3207f84ddcbcbd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Jul 2018 10:53:26 -0700 Subject: bpf: offload: allow program and map sharing per-ASIC Allow programs and maps to be re-used across different netdevs, as long as they belong to the same struct bpf_offload_dev. Update the bpf_offload_prog_map_match() helper for the verifier and export a new helper for the drivers to use when checking programs at attachment time. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 913465e45062..5b5ad95cf339 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -657,6 +657,7 @@ int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev); void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev); +bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -- cgit From 3f289dcb4b265416a57ca79cf4a324060bb09060 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Jul 2018 04:47:36 -0700 Subject: block: make bdev_ops->rw_page() take a REQ_OP instead of bool c11f0c0b5bb9 ("block/mm: make bdev_ops->rw_page() take a bool for read/write") replaced @op with boolean @is_write, which limited the amount of information going into ->rw_page() and more importantly page_endio(), which removed the need to expose block internals to mm. Unfortunately, we want to track discards separately and @is_write isn't enough information. This patch updates bdev_ops->rw_page() to take REQ_OP instead but leaves page_endio() to take bool @is_write. This allows the block part of operations to have enough information while not leaking it to mm. Signed-off-by: Tejun Heo Cc: Mike Christie Cc: Minchan Kim Cc: Dan Williams Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1939ed95f936..331a6cb8805f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1943,7 +1943,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req, struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, bool); + int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, -- cgit From 59767fbd49d794b4499d30b314df6c0d4aca584b Mon Sep 17 00:00:00 2001 From: Michael Callahan Date: Wed, 18 Jul 2018 04:47:37 -0700 Subject: block: Add part_stat_read_accum to read across field entries. Add a part_stat_read_accum macro to genhd.h to read and sum across field entries. For example to sum up the number read and write sectors completed. In addition to being ar reasonable cleanup by itself this will make it easier to add new stat fields in the future. tj: Refreshed on top of v4.17. Signed-off-by: Michael Callahan Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6cb8a5789668..19f36fa10995 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -353,6 +353,10 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ +#define part_stat_read_accum(part, field) \ + (part_stat_read(part, field[0]) + \ + part_stat_read(part, field[1])) + #define part_stat_add(cpu, part, field, addnd) do { \ __part_stat_add((cpu), (part), field, addnd); \ if ((part)->partno) \ -- cgit From dbae2c551377b6533a00c11fc7ede370100ab404 Mon Sep 17 00:00:00 2001 From: Michael Callahan Date: Wed, 18 Jul 2018 04:47:38 -0700 Subject: block: Define and use STAT_READ and STAT_WRITE Add defines for STAT_READ and STAT_WRITE for indexing the partition stat entries. This clarifies some fs/ code which has hardcoded 1 for STAT_WRITE and will make it easier to extend the stats with additional fields. tj: Refreshed on top of v4.17. Signed-off-by: Michael Callahan Signed-off-by: Tejun Heo Cc: "Theodore Ts'o" Cc: Jaegeuk Kim Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 +++++++ include/linux/genhd.h | 13 +++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e13449a379a1..d2b44de56bc1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -357,6 +357,13 @@ enum req_flag_bits { #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) +enum stat_group { + STAT_READ, + STAT_WRITE, + + NR_STAT_GROUPS +}; + #define bio_op(bio) \ ((bio)->bi_opf & REQ_OP_MASK) #define req_op(req) \ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 19f36fa10995..a75445446974 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -82,10 +83,10 @@ struct partition { } __attribute__((packed)); struct disk_stats { - unsigned long sectors[2]; /* READs and WRITEs */ - unsigned long ios[2]; - unsigned long merges[2]; - unsigned long ticks[2]; + unsigned long sectors[NR_STAT_GROUPS]; + unsigned long ios[NR_STAT_GROUPS]; + unsigned long merges[NR_STAT_GROUPS]; + unsigned long ticks[NR_STAT_GROUPS]; unsigned long io_ticks; unsigned long time_in_queue; }; @@ -354,8 +355,8 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ #define part_stat_read_accum(part, field) \ - (part_stat_read(part, field[0]) + \ - part_stat_read(part, field[1])) + (part_stat_read(part, field[STAT_READ]) + \ + part_stat_read(part, field[STAT_WRITE])) #define part_stat_add(cpu, part, field, addnd) do { \ __part_stat_add((cpu), (part), field, addnd); \ -- cgit From ddcf35d397976421a4ec1d0d00fbcc027a8cb034 Mon Sep 17 00:00:00 2001 From: Michael Callahan Date: Wed, 18 Jul 2018 04:47:39 -0700 Subject: block: Add and use op_stat_group() for indexing disk_stat fields. Add and use a new op_stat_group() function for indexing partition stat fields rather than indexing them by rq_data_dir() or bio_data_dir(). This function works similarly to op_is_sync() in that it takes the request::cmd_flags or bio::bi_opf flags and determines which stats should et updated. In addition, the second parameter to generic_start_io_acct() and generic_end_io_acct() is now a REQ_OP rather than simply a read or write bit and it uses op_stat_group() on the parameter to determine the stat group. Note that the partition in_flight counts are not part of the per-cpu statistics and as such are not indexed via this function. It's now indexed by op_is_write(). tj: Refreshed on top of v4.17. Updated to pass around REQ_OP. Signed-off-by: Michael Callahan Signed-off-by: Tejun Heo Cc: Minchan Kim Cc: Dan Williams Cc: Joshua Morris Cc: Philipp Reisner Cc: Matias Bjorling Cc: Kent Overstreet Cc: Alasdair Kergon Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- include/linux/blk_types.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a00dfff51aa5..ab221c517f4e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -496,9 +496,9 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -void generic_start_io_acct(struct request_queue *q, int rw, +void generic_start_io_acct(struct request_queue *q, int op, unsigned long sectors, struct hd_struct *part); -void generic_end_io_acct(struct request_queue *q, int rw, +void generic_end_io_acct(struct request_queue *q, int op, struct hd_struct *part, unsigned long start_time); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d2b44de56bc1..2960a96d833c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -401,6 +401,11 @@ static inline bool op_is_sync(unsigned int op) (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } +static inline int op_stat_group(unsigned int op) +{ + return op_is_write(op); +} + typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U #define BLK_QC_T_SHIFT 16 -- cgit From bdca3c87fb7ad1cc61d231d37eb0d8f90d001e0c Mon Sep 17 00:00:00 2001 From: Michael Callahan Date: Wed, 18 Jul 2018 04:47:40 -0700 Subject: block: Track DISCARD statistics and output them in stat and diskstat Add tracking of REQ_OP_DISCARD ios to the partition statistics and append them to the various stat files in /sys as well as /proc/diskstats. These are tracked with the same four stats as reads and writes: Number of discard ios completed. Number of discard ios merged Number of discard sectors completed Milliseconds spent on discard requests This is done via adding a new STAT_DISCARD define to genhd.h and then using it to index that stat field for discard requests. tj: Refreshed on top of v4.17 and other previous updates. Signed-off-by: Michael Callahan Signed-off-by: Tejun Heo Cc: Andy Newell Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 ++++++++ include/linux/genhd.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2960a96d833c..f6dfb30737d8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -360,6 +360,7 @@ enum req_flag_bits { enum stat_group { STAT_READ, STAT_WRITE, + STAT_DISCARD, NR_STAT_GROUPS }; @@ -401,8 +402,15 @@ static inline bool op_is_sync(unsigned int op) (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } +static inline bool op_is_discard(unsigned int op) +{ + return (op & REQ_OP_MASK) == REQ_OP_DISCARD; +} + static inline int op_stat_group(unsigned int op) { + if (op_is_discard(op)) + return STAT_DISCARD; return op_is_write(op); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a75445446974..57864422a2c8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -356,7 +356,8 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_read_accum(part, field) \ (part_stat_read(part, field[STAT_READ]) + \ - part_stat_read(part, field[STAT_WRITE])) + part_stat_read(part, field[STAT_WRITE]) + \ + part_stat_read(part, field[STAT_DISCARD])) #define part_stat_add(cpu, part, field, addnd) do { \ __part_stat_add((cpu), (part), field, addnd); \ -- cgit From 636620b66d5d4012c4a9c86206013964d3986c4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Jul 2018 04:47:41 -0700 Subject: blkcg: Track DISCARD statistics and output them in cgroup io.stat Add tracking of REQ_OP_DISCARD ios to the per-cgroup io.stat. Two fields, dbytes and dios, to respectively count the total bytes and number of discards are added. Signed-off-by: Tejun Heo Cc: Andy Newell Cc: Michael Callahan Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index de57de4831d5..3bed5e02a873 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -35,6 +35,7 @@ enum blkg_rwstat_type { BLKG_RWSTAT_WRITE, BLKG_RWSTAT_SYNC, BLKG_RWSTAT_ASYNC, + BLKG_RWSTAT_DISCARD, BLKG_RWSTAT_NR, BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, @@ -649,7 +650,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, { struct percpu_counter *cnt; - if (op_is_write(op)) + if (op_is_discard(op)) + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; + else if (op_is_write(op)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; -- cgit From bcf7eac3d97f49d8400ba52c71bee5934bf20093 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 17 Jul 2018 00:47:48 +0900 Subject: regmap: add SCCB support This adds Serial Camera Control Bus (SCCB) support for regmap API that is intended to be used by some of Omnivision sensor drivers. The ov772x and ov9650 drivers are going to use this SCCB regmap API. The ov772x driver was previously only worked with the i2c controller drivers that support I2C_FUNC_PROTOCOL_MANGLING, because the ov772x device doesn't support repeated starts. After commit 0b964d183cbf ("media: ov772x: allow i2c controllers without I2C_FUNC_PROTOCOL_MANGLING"), reading ov772x register is replaced with issuing two separated i2c messages in order to avoid repeated start. Using this SCCB regmap hides the implementation detail. The ov9650 driver also issues two separated i2c messages to read the registers as the device doesn't support repeated start. So it can make use of this SCCB regmap. Cc: Mark Brown Cc: Peter Rosin Cc: Sebastian Reichel Cc: Wolfram Sang Cc: Sylwester Nawrocki Cc: Jacopo Mondi Cc: Laurent Pinchart Cc: Hans Verkuil Cc: Sakari Ailus Cc: Mauro Carvalho Chehab Signed-off-by: Akinobu Mita Signed-off-by: Mark Brown --- include/linux/regmap.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4f38068ffb71..52bf358e2c73 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -514,6 +514,10 @@ struct regmap *__regmap_init_i2c(struct i2c_client *i2c, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_sccb(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__regmap_init_slimbus(struct slim_device *slimbus, const struct regmap_config *config, struct lock_class_key *lock_key, @@ -558,6 +562,10 @@ struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_sccb(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__devm_regmap_init_spi(struct spi_device *dev, const struct regmap_config *config, struct lock_class_key *lock_key, @@ -645,6 +653,19 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, __regmap_lockdep_wrapper(__regmap_init_i2c, #config, \ i2c, config) +/** + * regmap_init_sccb() - Initialise register map + * + * @i2c: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_sccb(i2c, config) \ + __regmap_lockdep_wrapper(__regmap_init_sccb, #config, \ + i2c, config) + /** * regmap_init_slimbus() - Initialise register map * @@ -797,6 +818,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config, \ i2c, config) +/** + * devm_regmap_init_sccb() - Initialise managed register map + * + * @i2c: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sccb(i2c, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sccb, #config, \ + i2c, config) + /** * devm_regmap_init_spi() - Initialise register map * -- cgit From 7c201ead1bfd19935f689fca69100c335028c047 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 16 Jul 2018 22:20:49 -0500 Subject: spi: spi-bitbang: change flags from u8 to u16 This changes the data type of the flags field in struct spi_bitbang from u8 to u16. This matches the size of the mode field of struct spi_device where these flags are also used. This is done in preparation of adding a new SPI mode flag that will be used with this field that would otherwise not fit in 8 bits. Signed-off-by: David Lechner Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 51d8c060e513..c1e61641a7f1 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -8,7 +8,7 @@ struct spi_bitbang { struct mutex lock; u8 busy; u8 use_dma; - u8 flags; /* extra spi->mode support */ + u16 flags; /* extra spi->mode support */ struct spi_master *master; -- cgit From a48d189ef53146a8df132a327a637c4182f50a16 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 17 Jul 2018 11:52:57 +0200 Subject: net: Move skb decrypted field, avoid explicity copy Commit 784abe24c903 ("net: Add decrypted field to skb") introduced a 'decrypted' field that is explicitly copied on skb copy and clone. Move it between headers_start[0] and headers_end[0], so that we don't need to copy it explicitly as it's copied by the memcpy() in __copy_skb_header(). While at it, drop the assignment in __skb_clone(), it was already redundant. This doesn't change the size of sk_buff or cacheline boundaries. The 15-bits hole before tc_index becomes a 14-bits hole, and will be again a 15-bits hole when this change is merged with commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()"). v2: as reported by kbuild test robot (oops, I forgot to build with CONFIG_TLS_DEVICE it seems), we can't use CHECK_SKB_FIELD() on a bit-field member. Just drop the check for the moment being, perhaps we could think of some magic to also check bit-field members one day. Fixes: 784abe24c903 ("net: Add decrypted field to skb") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3ceb8dcc54da..14bc9ebe30f2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -630,7 +630,6 @@ typedef unsigned char *sk_buff_data_t; * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue - * @decrypted: Decrypted SKB * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -641,6 +640,7 @@ typedef unsigned char *sk_buff_data_t; * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL * @dst_pending_confirm: need to confirm neighbour + * @decrypted: Decrypted SKB * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark @@ -737,11 +737,7 @@ struct sk_buff { peeked:1, head_frag:1, xmit_more:1, -#ifdef CONFIG_TLS_DEVICE - decrypted:1; -#else __unused:1; -#endif /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() @@ -797,6 +793,9 @@ struct sk_buff { __u8 tc_redirected:1; __u8 tc_from_ingress:1; #endif +#ifdef CONFIG_TLS_DEVICE + __u8 decrypted:1; +#endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ -- cgit From 8963106eabdc56911e9b65258eb5e9a6b7b3dfda Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:12 +0200 Subject: PCI: endpoint: Add MSI-X interfaces Add PCI_EPC_IRQ_MSIX type. Add MSI-X callbacks signatures to the ops structure. Add sysfs interface for set/get MSI-X capability maximum number. Update documentation accordingly. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 9 +++++++++ include/linux/pci-epf.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 243eaa5a66ff..89f079f582df 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -17,6 +17,7 @@ enum pci_epc_irq_type { PCI_EPC_IRQ_UNKNOWN, PCI_EPC_IRQ_LEGACY, PCI_EPC_IRQ_MSI, + PCI_EPC_IRQ_MSIX, }; /** @@ -30,6 +31,10 @@ enum pci_epc_irq_type { * capability register * @get_msi: ops to get the number of MSI interrupts allocated by the RC from * the MSI capability register + * @set_msix: ops to set the requested number of MSI-X interrupts in the + * MSI-X capability register + * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC + * from the MSI-X capability register * @raise_irq: ops to raise a legacy or MSI interrupt * @start: ops to start the PCI link * @stop: ops to stop the PCI link @@ -48,6 +53,8 @@ struct pci_epc_ops { phys_addr_t addr); int (*set_msi)(struct pci_epc *epc, u8 func_no, u8 interrupts); int (*get_msi)(struct pci_epc *epc, u8 func_no); + int (*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts); + int (*get_msix)(struct pci_epc *epc, u8 func_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, enum pci_epc_irq_type type, u8 interrupt_num); int (*start)(struct pci_epc *epc); @@ -144,6 +151,8 @@ void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, phys_addr_t phys_addr); int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts); int pci_epc_get_msi(struct pci_epc *epc, u8 func_no); +int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts); +int pci_epc_get_msix(struct pci_epc *epc, u8 func_no); int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, enum pci_epc_irq_type type, u8 interrupt_num); int pci_epc_start(struct pci_epc *epc); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 4e7764935fa8..ec02f58758c8 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -119,6 +119,7 @@ struct pci_epf { struct pci_epf_header *header; struct pci_epf_bar bar[6]; u8 msi_interrupts; + u16 msix_interrupts; u8 func_no; struct pci_epc *epc; -- cgit From d3c70a98d7d63cae02d50ebfafea04264a767401 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:13 +0200 Subject: PCI: Update xxx_pcie_ep_raise_irq() and pci_epc_raise_irq() signatures Change {cdns, dra7xx, artpec6, dw, rockchip}_pcie_ep_raise_irq() and pci_epc_raise_irq() signature, namely the interrupt_num variable type from u8 to u16 to accommodate 2048 maximum MSI-X interrupts. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Alan Douglas Acked-by: Shawn Lin Acked-by: Jesper Nilsson Acked-by: Joao Pinto Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 89f079f582df..bb2395b56f13 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -35,7 +35,7 @@ enum pci_epc_irq_type { * MSI-X capability register * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC * from the MSI-X capability register - * @raise_irq: ops to raise a legacy or MSI interrupt + * @raise_irq: ops to raise a legacy, MSI or MSI-X interrupt * @start: ops to start the PCI link * @stop: ops to stop the PCI link * @owner: the module owner containing the ops @@ -56,7 +56,7 @@ struct pci_epc_ops { int (*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts); int (*get_msix)(struct pci_epc *epc, u8 func_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, - enum pci_epc_irq_type type, u8 interrupt_num); + enum pci_epc_irq_type type, u16 interrupt_num); int (*start)(struct pci_epc *epc); void (*stop)(struct pci_epc *epc); struct module *owner; @@ -154,7 +154,7 @@ int pci_epc_get_msi(struct pci_epc *epc, u8 func_no); int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts); int pci_epc_get_msix(struct pci_epc *epc, u8 func_no); int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, - enum pci_epc_irq_type type, u8 interrupt_num); + enum pci_epc_irq_type type, u16 interrupt_num); int pci_epc_start(struct pci_epc *epc); void pci_epc_stop(struct pci_epc *epc); struct pci_epc *pci_epc_get(const char *epc_name); -- cgit From c2e00e31087e58f6c49b90b4702fc3df4fad6a83 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:19 +0200 Subject: pci-epf-test/pci_endpoint_test: Add MSI-X support Add MSI-X support and update driver documentation accordingly. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index bb2395b56f13..37dab8116901 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -102,6 +102,7 @@ struct pci_epc { #define EPC_FEATURE_NO_LINKUP_NOTIFIER BIT(0) #define EPC_FEATURE_BAR_MASK (BIT(1) | BIT(2) | BIT(3)) +#define EPC_FEATURE_MSIX_AVAILABLE BIT(4) #define EPC_FEATURE_SET_BAR(features, bar) \ (features |= (EPC_FEATURE_BAR_MASK & (bar << 1))) #define EPC_FEATURE_GET_BAR(features) \ -- cgit From d70420bcd447e5400a0116b20b9b6088bb32448e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jul 2018 22:33:02 +0200 Subject: mtd: adapt misleading comment in mtd_oob_ops structure A comment in the kernel doc of the mtd_oob_ops structure tells that it is not possible to write more than one page with OOB. This is actually true for only a few MTD devices like 'onenand' but it is definitely not a general limitation. While this would benefit to be handled elsewhere either by the MTD layer or by the limited drivers, let's update this comment to reflect the reality. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/mtd.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index a86c4fa93115..cd0be91bdefa 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -67,9 +67,11 @@ struct mtd_erase_region_info { * @datbuf: data buffer - if NULL only oob data are read/written * @oobbuf: oob data buffer * - * Note, it is allowed to read more than one OOB area at one go, but not write. - * The interface assumes that the OOB write requests program only one page's - * OOB area. + * Note, some MTD drivers do not allow you to write more than one OOB area at + * one go. If you try to do that on such an MTD device, -EINVAL will be + * returned. If you want to make your implementation portable on all kind of MTD + * devices you should split the write request into several sub-requests when the + * request crosses a page boundary. */ struct mtd_oob_ops { unsigned int mode; -- cgit From 0d6030ac041f6835974deb88a1a9c299b4adc3ad Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 18 Jul 2018 10:42:17 +0200 Subject: mtd: rawnand: Expose _notsupp() helpers for raw page accessors Some implementations simply can't disable their ECC engine. Expose helpers returning -ENOTSUPP so that the caller knows that raw accesses are not supported instead of silently falling back to non-raw accessors. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 11c2426fc363..f60fad29eae6 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1681,10 +1681,14 @@ int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip, /* Default read_page_raw implementation */ int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); +int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip, + u8 *buf, int oob_required, int page); /* Default write_page_raw implementation */ int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page); +int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip, + const u8 *buf, int oob_required, int page); /* Reset and initialize a NAND device */ int nand_reset(struct nand_chip *chip, int chipnr); -- cgit From 60ed982a4e78ff938824a750dbac8a10e5b472ef Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Thu, 21 Jun 2018 16:48:26 -0700 Subject: PCI/AER: Move internal declarations to drivers/pci/pci.h Since pci_aer_init() and pci_no_aer() are used only internally, move their declarations to the PCI internal header file. Also, no one cares about return value of pci_aer_init(), so make it void. Signed-off-by: Rajat Jain Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 340029b2fb38..b4ffea05c999 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1468,13 +1468,9 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } #endif #ifdef CONFIG_PCIEAER -void pci_no_aer(void); bool pci_aer_available(void); -int pci_aer_init(struct pci_dev *dev); #else -static inline void pci_no_aer(void) { } static inline bool pci_aer_available(void) { return false; } -static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; } #endif #ifdef CONFIG_PCIE_ECRC -- cgit From db89ccbe52c7885644ba578c7771e57620f879b1 Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Sat, 30 Jun 2018 15:07:17 -0500 Subject: PCI/AER: Define aer_stats structure for AER capable devices Define a structure to hold the AER statistics. There are 2 groups of statistics: dev_* counters that are to be collected for all AER capable devices and rootport_* counters that are collected for all (AER capable) rootports only. Allocate and free this structure when device is added or released (thus counters survive the lifetime of the device). Signed-off-by: Rajat Jain Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index b4ffea05c999..6bc0aa0fc33f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -299,6 +299,7 @@ struct pci_dev { u8 hdr_type; /* PCI header type (`multi' flag masked out) */ #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ + struct aer_stats *aer_stats; /* AER stats for this device */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ -- cgit From 3eca993740b8eb40f514b90b1877a4dbcf0a6710 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:34 -0400 Subject: timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset() If architecture does not support exact boot time, it is challenging to estimate boot time without having a reference to the current persistent clock value. Yet, it cannot read the persistent clock time again, because this may lead to math discrepancies with the caller of read_boot_clock64() who have read the persistent clock at a different time. This is why it is better to provide two values simultaneously: the persistent clock value, and the boot time. Replace read_boot_clock64() with: read_persistent_wall_and_boot_offset(wall_time, boot_offset) Where wall_time is returned by read_persistent_clock() And boot_offset is wall_time - boot time, which defaults to 0. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-16-pasha.tatashin@oracle.com --- include/linux/timekeeping.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 86bc2026efce..686bc27acef0 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); extern int persistent_clock_is_local; extern void read_persistent_clock64(struct timespec64 *ts); -extern void read_boot_clock64(struct timespec64 *ts); +void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock, + struct timespec64 *boot_offset); extern int update_persistent_clock64(struct timespec64 now); /* -- cgit From 5d2a4e91a541cb04d20d11602f0f9340291322ac Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:41 -0400 Subject: sched/clock: Move sched clock initialization and merge with generic clock sched_clock_postinit() initializes a generic clock on systems where no other clock is provided. This function may be called only after timekeeping_init(). Rename sched_clock_postinit to generic_clock_inti() and call it from sched_clock_init(). Move the call for sched_clock_init() until after time_init(). Suggested-by: Peter Zijlstra Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-23-pasha.tatashin@oracle.com --- include/linux/sched_clock.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 411b52e424e1..abe28d5cb3f4 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -9,17 +9,16 @@ #define LINUX_SCHED_CLOCK #ifdef CONFIG_GENERIC_SCHED_CLOCK -extern void sched_clock_postinit(void); +extern void generic_sched_clock_init(void); extern void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate); #else -static inline void sched_clock_postinit(void) { } +static inline void generic_sched_clock_init(void) { } static inline void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { - ; } #endif -- cgit From 381634cad15b711e033a2638d558232b60f753f6 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 19 Jul 2018 18:04:11 -0500 Subject: PCI: Hide pci_reset_bridge_secondary_bus() from drivers Rename pci_reset_bridge_secondary_bus() to pci_bridge_secondary_bus_reset() and move the declaration from linux/pci.h to drivers/pci.h to be used internally in PCI directory only. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 6ba818449095..03520ff23d73 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1103,7 +1103,6 @@ int pci_reset_bus(struct pci_bus *bus); int pci_try_reset_bus(struct pci_bus *bus); void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); -int pci_reset_bridge_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); -- cgit From 811c5cb37df46b0cd714dbd053d19cdb97d08cff Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 19 Jul 2018 18:04:12 -0500 Subject: PCI: Unify try slot and bus reset API Drivers are expected to call pci_try_reset_slot() or pci_try_reset_bus() by querying if a system supports hotplug or not. A survey showed that most drivers don't do this and we are leaking hotplug capability to the user. Hide pci_try_slot_reset() from drivers and embed into pci_try_bus_reset(). Change pci_try_reset_bus() parameter from struct pci_bus to struct pci_dev. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 03520ff23d73..19fb82559145 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1097,10 +1097,9 @@ int pci_reset_function_locked(struct pci_dev *dev); int pci_try_reset_function(struct pci_dev *dev); int pci_probe_reset_slot(struct pci_slot *slot); int pci_reset_slot(struct pci_slot *slot); -int pci_try_reset_slot(struct pci_slot *slot); int pci_probe_reset_bus(struct pci_bus *bus); int pci_reset_bus(struct pci_bus *bus); -int pci_try_reset_bus(struct pci_bus *bus); +int pci_try_reset_bus(struct pci_dev *dev); void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); -- cgit From fe32e2fa656c29d5d25f959f8e6168ac405d9ab4 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 19 Jul 2018 18:04:14 -0500 Subject: PCI: Deprecate pci_reset_bus() and pci_reset_slot() functions pci_reset_bus() and pci_reset_slot() functions are not being used by any code. Remove them from the kernel in favor of pci_try_reset_bus() and pci_try_reset_slot() functions. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 19fb82559145..cf53ecb50789 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1096,9 +1096,7 @@ int pci_reset_function(struct pci_dev *dev); int pci_reset_function_locked(struct pci_dev *dev); int pci_try_reset_function(struct pci_dev *dev); int pci_probe_reset_slot(struct pci_slot *slot); -int pci_reset_slot(struct pci_slot *slot); int pci_probe_reset_bus(struct pci_bus *bus); -int pci_reset_bus(struct pci_bus *bus); int pci_try_reset_bus(struct pci_dev *dev); void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); -- cgit From c6a44ba950d147e15fe6dab6455a52f91d8fe625 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 19 Jul 2018 18:04:15 -0500 Subject: PCI: Rename pci_try_reset_bus() to pci_reset_bus() Now that the old implementation of pci_reset_bus() is gone, replace pci_try_reset_bus() with pci_reset_bus(). Compared to the old implementation, new code will fail immmediately with -EAGAIN if object lock cannot be obtained. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index cf53ecb50789..307b336496f6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1097,7 +1097,7 @@ int pci_reset_function_locked(struct pci_dev *dev); int pci_try_reset_function(struct pci_dev *dev); int pci_probe_reset_slot(struct pci_slot *slot); int pci_probe_reset_bus(struct pci_bus *bus); -int pci_try_reset_bus(struct pci_dev *dev); +int pci_reset_bus(struct pci_dev *dev); void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); -- cgit From b976690f5db26fbc7c2be413bfa0fbd270547a94 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 18 Jul 2018 11:41:06 +0200 Subject: x86/mm/pti: Introduce pti_finalize() Introduce a new function to finalize the kernel mappings for the userspace page-table after all ro/nx protections have been applied to the kernel mappings. Also move the call to pti_clone_kernel_text() to that function so that it will run on 32 bit kernels too. Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Tested-by: Pavel Machek Cc: "H . Peter Anvin" Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Jiri Kosina Cc: Boris Ostrovsky Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: Andrea Arcangeli Cc: Waiman Long Cc: "David H . Gutteridge" Cc: joro@8bytes.org Link: https://lkml.kernel.org/r/1531906876-13451-30-git-send-email-joro@8bytes.org --- include/linux/pti.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pti.h b/include/linux/pti.h index 0174883a935a..1a941efcaa62 100644 --- a/include/linux/pti.h +++ b/include/linux/pti.h @@ -6,6 +6,7 @@ #include #else static inline void pti_init(void) { } +static inline void pti_finalize(void) { } #endif #endif -- cgit From 985e695074d35768cb04d65f58bca45f7bf1a99d Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 13 Jul 2018 14:06:42 +0200 Subject: timekeeping/ntp: Constify some function arguments Add 'const' to some function arguments and variables to make it easier to read the code. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Ondrej Mosnacek [jstultz: Also fixup pre-existing checkpatch warnings for prototype arguments with no variable name] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 86bc2026efce..edace6b656e9 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -177,7 +177,7 @@ static inline time64_t ktime_get_clocktai_seconds(void) extern bool timekeeping_rtc_skipsuspend(void); extern bool timekeeping_rtc_skipresume(void); -extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); +extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); /* * struct system_time_snapshot - simultaneous raw/real time capture with -- cgit From 39232ed5a1793f67b11430c43ed8a9ed6e96c6eb Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 17 Jul 2018 15:55:16 +0800 Subject: time: Introduce one suspend clocksource to compensate the suspend time On some hardware with multiple clocksources, we have coarse grained clocksources that support the CLOCK_SOURCE_SUSPEND_NONSTOP flag, but which are less than ideal for timekeeping whereas other clocksources can be better candidates but halt on suspend. Currently, the timekeeping core only supports timing suspend using CLOCK_SOURCE_SUSPEND_NONSTOP clocksources if that clocksource is the current clocksource for timekeeping. As a result, some architectures try to implement read_persistent_clock64() using those non-stop clocksources, but isn't really ideal, which will introduce more duplicate code. To fix this, provide logic to allow a registered SUSPEND_NONSTOP clocksource, which isn't the current clocksource, to be used to calculate the suspend time. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Cc: Daniel Lezcano Reviewed-by: Thomas Gleixner Reviewed-by: Daniel Lezcano Suggested-by: Thomas Gleixner Signed-off-by: Baolin Wang [jstultz: minor tweaks to merge with previous resume changes] Signed-off-by: John Stultz --- include/linux/clocksource.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 7dff1963c185..308918928767 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -194,6 +194,9 @@ extern void clocksource_suspend(void); extern void clocksource_resume(void); extern struct clocksource * __init clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); +extern void +clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles); +extern u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 now); extern u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles); -- cgit From b51dab46c6adfbb7e80cd0f59ae17b8a30d94b1a Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 18 Jul 2018 06:27:22 -0700 Subject: qed: Add qed APIs for PHY module query. This patch adds qed APIs for reading the PHY module. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b4040023cbfb..8cd34645e892 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -759,6 +759,9 @@ struct qed_generic_tlvs { u8 mac[QED_TLV_MAC_COUNT][ETH_ALEN]; }; +#define QED_I2C_DEV_ADDR_A0 0xA0 +#define QED_I2C_DEV_ADDR_A2 0xA2 + #define QED_NVM_SIGNATURE 0x12435687 enum qed_nvm_flash_cmd { @@ -1026,6 +1029,18 @@ struct qed_common_ops { * @param enabled - true iff WoL should be enabled. */ int (*update_wol) (struct qed_dev *cdev, bool enabled); + +/** + * @brief read_module_eeprom + * + * @param cdev + * @param buf - buffer + * @param dev_addr - PHY device memory region + * @param offset - offset into eeprom contents to be read + * @param len - buffer length, i.e., max bytes to be read + */ + int (*read_module_eeprom)(struct qed_dev *cdev, + char *buf, u8 dev_addr, u32 offset, u32 len); }; #define MASK_FIELD(_name, _value) \ -- cgit From eef5ba1aa148ca5e6deb1e0aa1de797fa4e12cb7 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 20 Jun 2018 10:51:53 +0200 Subject: i2c: smbus: add unlocked __i2c_smbus_xfer variant Removes all locking from i2c_smbus_xfer and renames it to __i2c_smbus_xfer, then adds a new i2c_smbus_xfer function that simply grabs the lock while calling the unlocked variant. This is not perfectly equivalent, since i2c_smbus_xfer was callable from atomic/irq context if you happened to end up emulating SMBus with an I2C transfer, and that is no longer the case with this patch. It is unknown (to me) if anything depends on that quirk, but it seems fragile enough to simply break those cases and require them to call i2c_transfer directly instead. While at it, for consistency rename the 2nd to last argument (size) of the i2c_smbus_xfer declaration to protocol and remove the surplus extern marker. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 254cd34eeae2..465afb092fa7 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -140,9 +140,14 @@ extern int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, and probably just as fast. Note that we use i2c_adapter here, because you do not need a specific smbus adapter to call this function. */ -extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, - unsigned short flags, char read_write, u8 command, - int size, union i2c_smbus_data *data); +s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, + unsigned short flags, char read_write, u8 command, + int protocol, union i2c_smbus_data *data); + +/* Unlocked flavor */ +s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, + unsigned short flags, char read_write, u8 command, + int protocol, union i2c_smbus_data *data); /* Now follow the 'nice' access routines. These also document the calling conventions of i2c_smbus_xfer. */ -- cgit From 488dee96bb62f0b3d9e678cf42574034d5b033a5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 20 Jul 2018 21:56:47 +0000 Subject: kernfs: allow creating kernfs objects with arbitrary uid/gid This change allows creating kernfs files and directories with arbitrary uid/gid instead of always using GLOBAL_ROOT_UID/GID by extending kernfs_create_dir_ns() and kernfs_create_file_ns() with uid/gid arguments. The "simple" kernfs_create_file() and kernfs_create_dir() are left alone and always create objects belonging to the global root. When creating symlinks ownership (uid/gid) is taken from the target kernfs object. Co-Developed-by: Tyler Hicks Signed-off-by: Dmitry Torokhov Signed-off-by: Tyler Hicks Signed-off-by: David S. Miller --- include/linux/kernfs.h | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index ab25c8b6d9e3..814643f7ee52 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -15,6 +15,7 @@ #include #include #include +#include #include struct file; @@ -325,12 +326,14 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, + kuid_t uid, kgid_t gid, void *priv, const void *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, - const char *name, - umode_t mode, loff_t size, + const char *name, umode_t mode, + kuid_t uid, kgid_t gid, + loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key); @@ -415,12 +418,14 @@ static inline void kernfs_destroy_root(struct kernfs_root *root) { } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, - umode_t mode, void *priv, const void *ns) + umode_t mode, kuid_t uid, kgid_t gid, + void *priv, const void *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, - umode_t mode, loff_t size, const struct kernfs_ops *ops, + umode_t mode, kuid_t uid, kgid_t gid, + loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } @@ -498,12 +503,15 @@ static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) { - return kernfs_create_dir_ns(parent, name, mode, priv, NULL); + return kernfs_create_dir_ns(parent, name, mode, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + priv, NULL); } static inline struct kernfs_node * kernfs_create_file_ns(struct kernfs_node *parent, const char *name, - umode_t mode, loff_t size, const struct kernfs_ops *ops, + umode_t mode, kuid_t uid, kgid_t gid, + loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns) { struct lock_class_key *key = NULL; @@ -511,15 +519,17 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name, #ifdef CONFIG_DEBUG_LOCK_ALLOC key = (struct lock_class_key *)&ops->lockdep_key; #endif - return __kernfs_create_file(parent, name, mode, size, ops, priv, ns, - key); + return __kernfs_create_file(parent, name, mode, uid, gid, + size, ops, priv, ns, key); } static inline struct kernfs_node * kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, const struct kernfs_ops *ops, void *priv) { - return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL); + return kernfs_create_file_ns(parent, name, mode, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + size, ops, priv, NULL); } static inline int kernfs_remove_by_name(struct kernfs_node *parent, -- cgit From 5f81880d5204ee2388fd9a75bb850ccd526885b7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 20 Jul 2018 21:56:48 +0000 Subject: sysfs, kobject: allow creating kobject belonging to arbitrary users Normally kobjects and their sysfs representation belong to global root, however it is not necessarily the case for objects in separate namespaces. For example, objects in separate network namespace logically belong to the container's root and not global root. This change lays groundwork for allowing network namespace objects ownership to be transferred to container's root user by defining get_ownership() callback in ktype structure and using it in sysfs code to retrieve desired uid/gid when creating sysfs objects for given kobject. Co-Developed-by: Tyler Hicks Signed-off-by: Dmitry Torokhov Signed-off-by: Tyler Hicks Signed-off-by: David S. Miller --- include/linux/kobject.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 7f6f93c3df9c..b49ff230beba 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -26,6 +26,7 @@ #include #include #include +#include #define UEVENT_HELPER_PATH_LEN 256 #define UEVENT_NUM_ENVP 32 /* number of env pointers */ @@ -114,6 +115,8 @@ extern struct kobject * __must_check kobject_get_unless_zero( extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); +extern void kobject_get_ownership(struct kobject *kobj, + kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { @@ -122,6 +125,7 @@ struct kobj_type { struct attribute **default_attrs; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); + void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; struct kobj_uevent_env { -- cgit From 9944e894c1266dc8515c82d1ff752d681215526b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 20 Jul 2018 21:56:50 +0000 Subject: driver core: set up ownership of class devices in sysfs Plumb in get_ownership() callback for devices belonging to a class so that they can be created with uid/gid different from global root. This will allow network devices in a container to belong to container's root and not global root. Signed-off-by: Dmitry Torokhov Reviewed-by: Tyler Hicks Signed-off-by: David S. Miller --- include/linux/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 055a69dbcd18..fe6ccb6dc119 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -384,6 +384,9 @@ int subsys_virtual_register(struct bus_type *subsys, * @shutdown_pre: Called at shut-down time before driver shutdown. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. + * @get_ownership: Allows class to specify uid/gid of the sysfs directories + * for the devices belonging to the class. Usually tied to + * device's namespace. * @pm: The default device power management operations of this class. * @p: The private data of the driver core, no one other than the * driver core can touch this. @@ -413,6 +416,8 @@ struct class { const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(struct device *dev); + void (*get_ownership)(struct device *dev, kuid_t *uid, kgid_t *gid); + const struct dev_pm_ops *pm; struct subsys_private *p; -- cgit From a3134fb09e0bc5bee76e13bf863173b86f21cf87 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 23 May 2018 17:35:21 -0700 Subject: drivers: soc: Add LLCC driver LLCC (Last Level Cache Controller) provides additional cache memory in the system. LLCC is partitioned into multiple slices and each slice gets its own priority, size, ID and other config parameters. LLCC driver programs these parameters for each slice. Clients that are assigned to use LLCC need to get information such size & ID of the slice they get and activate or deactivate the slice as needed. LLCC driver provides API for the clients to perform these operations. Signed-off-by: Channagoud Kadabi Signed-off-by: Rishabh Bhatnagar Reviewed-by: Evan Green Reviewed-by: Rob Herring Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/llcc-qcom.h | 180 +++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 include/linux/soc/qcom/llcc-qcom.h (limited to 'include/linux') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h new file mode 100644 index 000000000000..7e3b9c605ab2 --- /dev/null +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. + * + */ + +#include +#ifndef __LLCC_QCOM__ +#define __LLCC_QCOM__ + +#define LLCC_CPUSS 1 +#define LLCC_VIDSC0 2 +#define LLCC_VIDSC1 3 +#define LLCC_ROTATOR 4 +#define LLCC_VOICE 5 +#define LLCC_AUDIO 6 +#define LLCC_MDMHPGRW 7 +#define LLCC_MDM 8 +#define LLCC_CMPT 10 +#define LLCC_GPUHTW 11 +#define LLCC_GPU 12 +#define LLCC_MMUHWT 13 +#define LLCC_CMPTDMA 15 +#define LLCC_DISP 16 +#define LLCC_VIDFW 17 +#define LLCC_MDMHPFX 20 +#define LLCC_MDMPNG 21 +#define LLCC_AUDHW 22 + +/** + * llcc_slice_desc - Cache slice descriptor + * @slice_id: llcc slice id + * @slice_size: Size allocated for the llcc slice + */ +struct llcc_slice_desc { + u32 slice_id; + size_t slice_size; +}; + +/** + * llcc_slice_config - Data associated with the llcc slice + * @usecase_id: usecase id for which the llcc slice is used + * @slice_id: llcc slice id assigned to each slice + * @max_cap: maximum capacity of the llcc slice + * @priority: priority of the llcc slice + * @fixed_size: whether the llcc slice can grow beyond its size + * @bonus_ways: bonus ways associated with llcc slice + * @res_ways: reserved ways associated with llcc slice + * @cache_mode: mode of the llcc slice + * @probe_target_ways: Probe only reserved and bonus ways on a cache miss + * @dis_cap_alloc: Disable capacity based allocation + * @retain_on_pc: Retain through power collapse + * @activate_on_init: activate the slice on init + */ +struct llcc_slice_config { + u32 usecase_id; + u32 slice_id; + u32 max_cap; + u32 priority; + bool fixed_size; + u32 bonus_ways; + u32 res_ways; + u32 cache_mode; + u32 probe_target_ways; + bool dis_cap_alloc; + bool retain_on_pc; + bool activate_on_init; +}; + +/** + * llcc_drv_data - Data associated with the llcc driver + * @regmap: regmap associated with the llcc device + * @cfg: pointer to the data structure for slice configuration + * @lock: mutex associated with each slice + * @cfg_size: size of the config data table + * @max_slices: max slices as read from device tree + * @bcast_off: Offset of the broadcast bank + * @num_banks: Number of llcc banks + * @bitmap: Bit map to track the active slice ids + * @offsets: Pointer to the bank offsets array + */ +struct llcc_drv_data { + struct regmap *regmap; + const struct llcc_slice_config *cfg; + struct mutex lock; + u32 cfg_size; + u32 max_slices; + u32 bcast_off; + u32 num_banks; + unsigned long *bitmap; + u32 *offsets; +}; + +#if IS_ENABLED(CONFIG_QCOM_LLCC) +/** + * llcc_slice_getd - get llcc slice descriptor + * @uid: usecase_id of the client + */ +struct llcc_slice_desc *llcc_slice_getd(u32 uid); + +/** + * llcc_slice_putd - llcc slice descritpor + * @desc: Pointer to llcc slice descriptor + */ +void llcc_slice_putd(struct llcc_slice_desc *desc); + +/** + * llcc_get_slice_id - get slice id + * @desc: Pointer to llcc slice descriptor + */ +int llcc_get_slice_id(struct llcc_slice_desc *desc); + +/** + * llcc_get_slice_size - llcc slice size + * @desc: Pointer to llcc slice descriptor + */ +size_t llcc_get_slice_size(struct llcc_slice_desc *desc); + +/** + * llcc_slice_activate - Activate the llcc slice + * @desc: Pointer to llcc slice descriptor + */ +int llcc_slice_activate(struct llcc_slice_desc *desc); + +/** + * llcc_slice_deactivate - Deactivate the llcc slice + * @desc: Pointer to llcc slice descriptor + */ +int llcc_slice_deactivate(struct llcc_slice_desc *desc); + +/** + * qcom_llcc_probe - program the sct table + * @pdev: platform device pointer + * @table: soc sct table + * @sz: Size of the config table + */ +int qcom_llcc_probe(struct platform_device *pdev, + const struct llcc_slice_config *table, u32 sz); +#else +static inline struct llcc_slice_desc *llcc_slice_getd(u32 uid) +{ + return NULL; +} + +static inline void llcc_slice_putd(struct llcc_slice_desc *desc) +{ + +}; + +static inline int llcc_get_slice_id(struct llcc_slice_desc *desc) +{ + return -EINVAL; +} + +static inline size_t llcc_get_slice_size(struct llcc_slice_desc *desc) +{ + return 0; +} +static inline int llcc_slice_activate(struct llcc_slice_desc *desc) +{ + return -EINVAL; +} + +static inline int llcc_slice_deactivate(struct llcc_slice_desc *desc) +{ + return -EINVAL; +} +static inline int qcom_llcc_probe(struct platform_device *pdev, + const struct llcc_slice_config *table, u32 sz) +{ + return -ENODEV; +} + +static inline int qcom_llcc_remove(struct platform_device *pdev) +{ + return -ENODEV; +} +#endif + +#endif -- cgit From a0b1561f846191a1967e8f5a26d8ef59c1b9162a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 12 Jun 2018 15:23:13 +0200 Subject: firmware: qcom: scm: add a dummy qcom_scm_assign_mem() Add a dummy qcom_scm_assign_mem() to enable building drivers when CONFIG_COMPILE_TEST=y && CONFIG_QCOM_SCM=n. All other qcom_scm_* functions already have a dummy version. Signed-off-by: Niklas Cassel Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index b401b962afff..5d65521260b3 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -87,6 +87,10 @@ static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, static inline int qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; } static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; } +static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, + unsigned int *src, + struct qcom_scm_vmperm *newvm, + int dest_cnt) { return -ENODEV; } static inline void qcom_scm_cpu_power_down(u32 flags) {} static inline u32 qcom_scm_get_version(void) { return 0; } static inline u32 -- cgit From c4db9c1e8c70bc60e392da8a485bcfb035d559c2 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 20 Jul 2018 10:47:23 +0900 Subject: efi: Deduplicate efi_open_volume() There's one ARM, one x86_32 and one x86_64 version of efi_open_volume() which can be folded into a single shared version by masking their differences with the efi_call_proto() macro introduced by commit: 3552fdf29f01 ("efi: Allow bitness-agnostic protocol calls"). To be able to dereference the device_handle attribute from the efi_loaded_image_t table in an arch- and bitness-agnostic manner, introduce the efi_table_attr() macro (which already exists for x86) to arm and arm64. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Ard Biesheuvel Cc: Andy Shevchenko Cc: Hans de Goede Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180720014726.24031-7-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index e190652f5ef9..401e4b254e30 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -894,6 +894,16 @@ typedef struct _efi_file_handle { void *flush; } efi_file_handle_t; +typedef struct { + u64 revision; + u32 open_volume; +} efi_file_io_interface_32_t; + +typedef struct { + u64 revision; + u64 open_volume; +} efi_file_io_interface_64_t; + typedef struct _efi_file_io_interface { u64 revision; int (*open_volume)(struct _efi_file_io_interface *, -- cgit From 40c6f9c28ef03f2f2c3ee58c2447a6e6b9a713f2 Mon Sep 17 00:00:00 2001 From: Revanth Rajashekar Date: Fri, 15 Jun 2018 12:39:27 -0600 Subject: nvme.h: resync with nvme-cli Added some feature ids present in nvme-cli but not kernel. Signed-off-by: Revanth Rajashekar Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2950ce957656..80dfedcf0bf7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -749,6 +749,11 @@ enum { NVME_FEAT_HOST_MEM_BUF = 0x0d, NVME_FEAT_TIMESTAMP = 0x0e, NVME_FEAT_KATO = 0x0f, + NVME_FEAT_HCTM = 0x10, + NVME_FEAT_NOPSC = 0x11, + NVME_FEAT_RRL = 0x12, + NVME_FEAT_PLM_CONFIG = 0x13, + NVME_FEAT_PLM_WINDOW = 0x14, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, -- cgit From 977d5ad39f3ea12ac0bd51d75020cea5ecdca235 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Jul 2018 17:19:11 +0300 Subject: ACPI: Convert ACPI reference args to generic fwnode reference args Convert all users of struct acpi_reference_args to more generic fwnode_reference_args. This will 1) avoid an ACPI specific references to device nodes with integer arguments as well as 2) allow making references to nodes other than device nodes in ACPI. As a by-product, convert the fwnode interger arguments to u64. The arguments were 64-bit integers on ACPI but the fwnode arguments were just 32-bit. Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 17 +++++------------ include/linux/fwnode.h | 2 +- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e54f40974eb0..11cf39719fd8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1058,27 +1058,20 @@ static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) /* Device properties */ -#define MAX_ACPI_REFERENCE_ARGS 8 -struct acpi_reference_args { - struct acpi_device *adev; - size_t nargs; - u64 args[MAX_ACPI_REFERENCE_ARGS]; -}; - #ifdef CONFIG_ACPI int acpi_dev_get_property(const struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, - struct acpi_reference_args *args); + struct fwnode_reference_args *args); static inline int acpi_node_get_property_reference( const struct fwnode_handle *fwnode, const char *name, size_t index, - struct acpi_reference_args *args) + struct fwnode_reference_args *args) { return __acpi_node_get_property_reference(fwnode, name, index, - MAX_ACPI_REFERENCE_ARGS, args); + NR_FWNODE_REFERENCE_ARGS, args); } int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, @@ -1169,7 +1162,7 @@ static inline int acpi_dev_get_property(struct acpi_device *adev, static inline int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, - struct acpi_reference_args *args) + struct fwnode_reference_args *args) { return -ENXIO; } @@ -1177,7 +1170,7 @@ __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, static inline int acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, - struct acpi_reference_args *args) + struct fwnode_reference_args *args) { return -ENXIO; } diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 4fe8f289b3f6..faebf0ca0686 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -45,7 +45,7 @@ struct fwnode_endpoint { struct fwnode_reference_args { struct fwnode_handle *fwnode; unsigned int nargs; - unsigned int args[NR_FWNODE_REFERENCE_ARGS]; + u64 args[NR_FWNODE_REFERENCE_ARGS]; }; /** -- cgit From 0ef7478639c5165a672f01b4024aacfffa951813 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Jul 2018 17:19:14 +0300 Subject: ACPI: property: Make the ACPI graph API private The fwnode graph API is preferred over the ACPI graph API. Therefore make the ACPI graph API private, and use it as a back-end for the fwnode graph API only. Unused functionality is removed while the functionality actually used remains the same. Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 11cf39719fd8..de8d3d3fa651 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1089,14 +1089,6 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode); -struct fwnode_handle * -acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, - struct fwnode_handle *prev); -int acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode, - struct fwnode_handle **remote, - struct fwnode_handle **port, - struct fwnode_handle **endpoint); - struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, struct acpi_probe_entry *); -- cgit From 0e3fd810c4f41dbd63fb7caddc11684959176727 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jul 2018 16:46:41 +0200 Subject: Documentation: document ktime_get_*() APIs As Dave Chinner points out, we don't have a proper documentation for the ktime_get() family of interfaces, making it rather unclear which of the over 30 (!) interfaces one should actually use in a driver or elsewhere in the kernel. I wrote up an explanation from how I personally see the interfaces, documenting what each of the functions do and hopefully making it a bit clearer which should be used where. This is the first time I tried writing .rst format documentation, so in addition to any mistakes in the content, I probably also introduce nonstandard formatting ;-) I first tried to add an extra section to Documentation/timers/timekeeping.txt, but this is currently not included in the generated API, and it seems useful to have the API docs as part of what gets generated in https://www.kernel.org/doc/html/latest/core-api/index.html#core-utilities instead, so I started a new file there. I also considered adding the documentation inline in the include/linux/timekeeping.h header, but couldn't figure out how to do that in a way that would result both in helpful inline comments as well as readable html output, so I settled for the latter, with a small note pointing to it from the header. Cc: Dave Chinner Cc: John Stultz Cc: Thomas Gleixner Cc: Stephen Boyd Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann Signed-off-by: Jonathan Corbet --- include/linux/timekeeping.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 86bc2026efce..947b1b8d2d01 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -20,6 +20,21 @@ extern int do_settimeofday64(const struct timespec64 *ts); extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); +/* + * ktime_get() family: read the current time in a multitude of ways, + * + * The default time reference is CLOCK_MONOTONIC, starting at + * boot time but not counting the time spent in suspend. + * For other references, use the functions with "real", "clocktai", + * "boottime" and "raw" suffixes. + * + * To get the time in a different format, use the ones wit + * "ns", "ts64" and "seconds" suffix. + * + * See Documentation/core-api/timekeeping.rst for more details. + */ + + /* * timespec64 based interfaces */ -- cgit From f53aaa31cce7b543e407da7e97690a700206f7b9 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 16 Jul 2018 15:22:01 -0700 Subject: net/mlx5: FW tracer, implement tracer logic Implement FW tracer logic and registers access, initialization and cleanup flows. Initializing the tracer will be part of load one flow, as multiple PFs will try to acquire ownership but only one will succeed and will be the tracer owner. Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 957199c20a0f..86cb0ebf92fa 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -816,6 +816,8 @@ struct mlx5_clock { struct mlx5_pps pps_info; }; +struct mlx5_fw_tracer; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -860,6 +862,7 @@ struct mlx5_core_dev { struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; struct page *clock_info_page; + struct mlx5_fw_tracer *tracer; }; struct mlx5_db { -- cgit From c71ad41ccb0c29fce95149b74786574b354c9dda Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Wed, 7 Feb 2018 11:08:56 +0200 Subject: net/mlx5: FW tracer, events handling The tracer has one event, event 0x26, with two subtypes: - Subtype 0: Ownership change - Subtype 1: Traces available An ownership change occurs in the following cases: 1- Owner releases his ownership, in this case, an event will be sent to inform others to reattempt acquire ownership. 2- Ownership was taken by a higher priority tool, in this case the owner should understand that it lost ownership, and go through tear down flow. The second subtype indicates that there are traces in the trace buffer, in this case, the driver polls the tracer buffer for new traces, parse them and prepares the messages for printing. The HW starts tracing from the first address in the tracer buffer. Driver receives an event notifying that new trace block exists. HW posts a timestamp event at the last 8B of every 256B block. Comparing the timestamp to the last handled timestamp would indicate that this is a new trace block. Once the new timestamp is detected, the entire block is considered valid. Block validation and parsing, should be done after copying the current block to a different location, in order to avoid block overwritten during processing. Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0566c6a94805..d489494b0a84 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -332,6 +332,13 @@ enum mlx5_event { MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, MLX5_EVENT_TYPE_FPGA_QP_ERROR = 0x21, + + MLX5_EVENT_TYPE_DEVICE_TRACER = 0x26, +}; + +enum { + MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE = 0x0, + MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE = 0x1, }; enum { -- cgit From 51bbf9bee34ff5d4006d266f24a54dc9c1669eb5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Jul 2018 17:27:43 -0500 Subject: PCI: hotplug: Demidlayer registration with the core When a hotplug driver calls pci_hp_register(), all steps necessary for registration are carried out in one go, including creation of a kobject and addition to sysfs. That's a problem for pciehp once it's converted to enable/disable the slot exclusively from the IRQ thread: The thread needs to be spawned after creation of the kobject (because it uses the kobject's name), but before addition to sysfs (because it will handle enable/disable requests submitted via sysfs). pci_hp_deregister() does offer a ->release callback that's invoked after deletion from sysfs and before destruction of the kobject. But because pci_hp_register() doesn't offer a counterpart, hotplug drivers' ->probe and ->remove code becomes asymmetric, which is error prone as recently discovered use-after-free bugs in pciehp's ->remove hook have shown. In a sense, this appears to be a case of the midlayer antipattern: "The core thesis of the "midlayer mistake" is that midlayers are bad and should not exist. That common functionality which it is so tempting to put in a midlayer should instead be provided as library routines which can [be] used, augmented, or ignored by each bottom level driver independently. Thus every subsystem that supports multiple implementations (or drivers) should provide a very thin top layer which calls directly into the bottom layer drivers, and a rich library of support code that eases the implementation of those drivers. This library is available to, but not forced upon, those drivers." -- Neil Brown (2009), https://lwn.net/Articles/336262/ The presence of midlayer traits in the PCI hotplug core might be ascribed to its age: When it was introduced in February 2002, the blessings of a library approach might not have been well known: https://git.kernel.org/tglx/history/c/a8a2069f432c For comparison, the driver core does offer split functions for creating a kobject (device_initialize()) and addition to sysfs (device_add()) as an alternative to carrying out everything at once (device_register()). This was introduced in October 2002: https://git.kernel.org/tglx/history/c/8b290eb19962 The odd ->release callback in the PCI hotplug core was added in 2003: https://git.kernel.org/tglx/history/c/69f8d663b595 Clearly, a library approach would not force every hotplug driver to implement a ->release callback, but rather allow the driver to remove the sysfs files, release its data structures and finally destroy the kobject. Alternatively, a driver may choose to remove everything with pci_hp_deregister(), then release its data structures. To this end, offer drivers pci_hp_initialize() and pci_hp_add() as a split-up version of pci_hp_register(). Likewise, offer pci_hp_del() and pci_hp_destroy() as a split-up version of pci_hp_deregister(). Eliminate the ->release callback and move its code into each driver's teardown routine. Declare pci_hp_deregister() void, in keeping with the usual kernel pattern that enablement can fail, but disablement cannot. It only returned an error if the caller passed in a NULL pointer or a slot which has never or is no longer registered or is sharing its name with another slot. Those would be bugs, so WARN about them. Few hotplug drivers actually checked the return value and those that did only printed a useless error message to dmesg. Remove that. For most drivers the conversion was straightforward since it doesn't matter whether the code in the ->release callback is executed before or after destruction of the kobject. But in the case of ibmphp, it was unclear to me whether setting slot_cur->ctrl and slot_cur->bus_on to NULL needs to happen before the kobject is destroyed, so I erred on the side of caution and ensured that the order stays the same. Another nontrivial case is pnv_php, I've found the list and kref logic difficult to understand, however my impression was that it is safe to delete the list element and drop the references until after the kobject is destroyed. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Andy Shevchenko # drivers/platform/x86 Cc: Rafael J. Wysocki Cc: Len Brown Cc: Scott Murray Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Gavin Shan Cc: Sebastian Ott Cc: Gerald Schaefer Cc: Corentin Chary Cc: Darren Hart Cc: Andy Shevchenko --- include/linux/pci_hotplug.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index cf5e22103f68..a6d6650a0490 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -80,15 +80,12 @@ struct hotplug_slot_info { * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot * @info: pointer to the &struct hotplug_slot_info for the initial values for * this slot. - * @release: called during pci_hp_deregister to free memory allocated in a - * hotplug_slot structure. * @private: used by the hotplug pci controller driver to store whatever it * needs. */ struct hotplug_slot { struct hotplug_slot_ops *ops; struct hotplug_slot_info *info; - void (*release) (struct hotplug_slot *slot); void *private; /* Variables below this are for use only by the hotplug pci core. */ @@ -104,13 +101,23 @@ static inline const char *hotplug_slot_name(const struct hotplug_slot *slot) int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *pbus, int nr, const char *name, struct module *owner, const char *mod_name); -int pci_hp_deregister(struct hotplug_slot *slot); +int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus, int nr, + const char *name, struct module *owner, + const char *mod_name); +int pci_hp_add(struct hotplug_slot *slot); + +void pci_hp_del(struct hotplug_slot *slot); +void pci_hp_destroy(struct hotplug_slot *slot); +void pci_hp_deregister(struct hotplug_slot *slot); + int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot, struct hotplug_slot_info *info); /* use a define to avoid include chaining to get THIS_MODULE & friends */ #define pci_hp_register(slot, pbus, devnr, name) \ __pci_hp_register(slot, pbus, devnr, name, THIS_MODULE, KBUILD_MODNAME) +#define pci_hp_initialize(slot, bus, nr, name) \ + __pci_hp_initialize(slot, bus, nr, name, THIS_MODULE, KBUILD_MODNAME) /* PCI Setting Record (Type 0) */ struct hpp_type0 { -- cgit From 3730cf4dd70b6a36e48d58a862120311411b77f5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Jul 2018 12:47:56 +0200 Subject: netlink: do not store start function in netlink_cb ->start() is called once when dump is being initialized, there is no need to store it in netlink_cb. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index f3075d6c7e82..71f121b66ca8 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -170,7 +170,6 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; - int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); -- cgit From e5cda42c16d89720c29678f51d95a119490ef7d8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 23 Jul 2018 19:52:57 +0200 Subject: ARM: exynos: Define EINT_WAKEUP_MASK registers for S5Pv210 and Exynos5433 S5Pv210 and Exynos5433/Exynos7 have different address of EINT_WAKEUP_MASK register. Rename existing S5P_EINT_WAKEUP_MASK to avoid confusion and add new ones. Signed-off-by: Krzysztof Kozlowski Cc: Tomasz Figa Cc: Sylwester Nawrocki Acked-by: Tomasz Figa Tested-by: Marek Szyprowski --- include/linux/soc/samsung/exynos-regs-pmu.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 66dcb9ec273a..eb0d240df7a7 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -42,7 +42,7 @@ #define EXYNOS_SWRESET 0x0400 #define S5P_WAKEUP_STAT 0x0600 -#define S5P_EINT_WAKEUP_MASK 0x0604 +#define EXYNOS_EINT_WAKEUP_MASK 0x0604 #define S5P_WAKEUP_MASK 0x0608 #define S5P_WAKEUP_MASK2 0x0614 @@ -180,6 +180,9 @@ #define S5P_CORE_WAKEUP_FROM_LOCAL_CFG (0x3 << 8) #define S5P_CORE_AUTOWAKEUP_EN (1 << 31) +/* Only for S5Pv210 */ +#define S5PV210_EINT_WAKEUP_MASK 0xC004 + /* Only for EXYNOS4210 */ #define S5P_CMU_CLKSTOP_LCD1_LOWPWR 0x1154 #define S5P_CMU_RESET_LCD1_LOWPWR 0x1174 @@ -641,6 +644,7 @@ | EXYNOS5420_KFC_USE_STANDBY_WFI3) /* For EXYNOS5433 */ +#define EXYNOS5433_EINT_WAKEUP_MASK (0x060C) #define EXYNOS5433_USBHOST30_PHY_CONTROL (0x0728) #define EXYNOS5433_PAD_RETENTION_AUD_OPTION (0x3028) #define EXYNOS5433_PAD_RETENTION_MMC2_OPTION (0x30C8) -- cgit From a8be2af0218cf037704dc2e733bf56d6560fa324 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 23 Jul 2018 19:52:58 +0200 Subject: pinctrl: samsung: Write external wakeup interrupt mask The pinctrl driver defines an IRQ chip which handles external wakeup interrupts, therefore from logical point of view, it is the owner of external interrupt mask. The register controlling the mask belongs to Power Management Unit address space so it has to be accessed with PMU syscon regmap handle. This mask should be written to hardware during system suspend. Till now ARMv7 machine code was responsible for this which created a dependency between pin controller driver and arch/arm/mach code. Try to rework this dependency so the pinctrl driver will write external wakeup interrupt mask during late suspend. Impact on ARMv7 designs (S5Pv210 and Exynos) ============================================ This duplicates setting mask with existing machine code arch/arm/mach-exynos/suspend.c and arch/arm/mach-s5pv210/pm.c but it is not a problem - the wakeup mask register will be written twice. The machine code will be cleaned up later. The difference between implementation here and ARMv7 machine code (arch/arm/mach-*) is the time of writing the mask: 1. The machine code is writing the mask quite late during system suspend path, after offlining secondary CPUs and just before doing actual suspend. 2. The implementation in pinctrl driver uses late suspend ops, therefore it will write the mask much earlier. Hopefully late enough, after all drivers will enable or disable their interrupt wakeups (enable_irq_wake() etc). Impact on ARMv8 designs (Exynos5433 and Exynos7) ================================================ The Suspend to RAM was not supported and external wakeup interrupt mask was not written to HW. This change brings us one step closer to supporting Suspend to RAM. Signed-off-by: Krzysztof Kozlowski Cc: Tomasz Figa Cc: Sylwester Nawrocki Acked-by: Tomasz Figa Tested-by: Marek Szyprowski --- include/linux/soc/samsung/exynos-regs-pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index eb0d240df7a7..5addaf5ccbce 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -42,6 +42,8 @@ #define EXYNOS_SWRESET 0x0400 #define S5P_WAKEUP_STAT 0x0600 +/* Value for EXYNOS_EINT_WAKEUP_MASK disabling all external wakeup interrupts */ +#define EXYNOS_EINT_WAKEUP_MASK_DISABLED 0xffffffff #define EXYNOS_EINT_WAKEUP_MASK 0x0604 #define S5P_WAKEUP_MASK 0x0608 #define S5P_WAKEUP_MASK2 0x0614 -- cgit From 071f52fbce6161706d070ceada5accb81630bf02 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Jul 2018 09:52:32 +0200 Subject: block: remove bio_clone_kmalloc Unused now. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index ab221c517f4e..b861baa59454 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -443,12 +443,6 @@ static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); } -static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) -{ - return bio_clone_bioset(bio, gfp_mask, NULL); - -} - extern blk_qc_t submit_bio(struct bio *); extern void bio_endio(struct bio *); -- cgit From c55183c9aaa00d2bbb578169a480e31aff3d397c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Jul 2018 09:52:34 +0200 Subject: block: unexport bio_clone_bioset Now only used by the bounce code, so move it there and mark the function static. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index b861baa59454..51371740d2a8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -429,7 +429,6 @@ extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); -extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); extern struct bio_set fs_bio_set; -- cgit From 6e30396767508101eacec8b93b068e8905e660dc Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 20 Jun 2018 22:32:42 +0530 Subject: sched/numa: Remove redundant field 'numa_entry' is a struct list_head defined in task_struct, but never used. No functional change. Signed-off-by: Srikar Dronamraju Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1529514181-9842-2-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 43731fe51c97..e0f4f56c9310 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1017,7 +1017,6 @@ struct task_struct { u64 last_sum_exec_runtime; struct callback_head numa_work; - struct list_head numa_entry; struct numa_group *numa_group; /* -- cgit From fd2efaa4eb5317c3a86357a83a7d456a1b86a0ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Jul 2018 12:30:11 +0100 Subject: locking/atomics: Rework ordering barriers Currently architectures can override __atomic_op_*() to define the barriers used before/after a relaxed atomic when used to build acquire/release/fence variants. This has the unfortunate property of requiring the architecture to define the full wrapper for the atomics, rather than just the barriers they care about, and gets in the way of generating atomics which can be easily read. Instead, this patch has architectures define an optional set of barriers: * __atomic_acquire_fence() * __atomic_release_fence() * __atomic_pre_full_fence() * __atomic_post_full_fence() ... which uses to build the wrappers. It would be nice if we could undef these, along with the __atomic_op_*() wrappers, but that would break the cmpxchg() wrappers, which are written in preprocessor. Undefs would have been nice, but alas. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrea Parri Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andy.shevchenko@gmail.com Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: catalin.marinas@arm.com Cc: dvyukov@google.com Cc: glider@google.com Cc: linux-arm-kernel@lists.infradead.org Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/20180716113017.3909-7-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 8e04f1f69bd9..1e8e88bdaf09 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -38,40 +38,46 @@ * barriers on top of the relaxed variant. In the case where the relaxed * variant is already fully ordered, no additional barriers are needed. * - * Besides, if an arch has a special barrier for acquire/release, it could - * implement its own __atomic_op_* and use the same framework for building - * variants - * - * If an architecture overrides __atomic_op_acquire() it will probably want - * to define smp_mb__after_spinlock(). + * If an architecture overrides __atomic_acquire_fence() it will probably + * want to define smp_mb__after_spinlock(). */ -#ifndef __atomic_op_acquire +#ifndef __atomic_acquire_fence +#define __atomic_acquire_fence smp_mb__after_atomic +#endif + +#ifndef __atomic_release_fence +#define __atomic_release_fence smp_mb__before_atomic +#endif + +#ifndef __atomic_pre_full_fence +#define __atomic_pre_full_fence smp_mb__before_atomic +#endif + +#ifndef __atomic_post_full_fence +#define __atomic_post_full_fence smp_mb__after_atomic +#endif + #define __atomic_op_acquire(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ - smp_mb__after_atomic(); \ + __atomic_acquire_fence(); \ __ret; \ }) -#endif -#ifndef __atomic_op_release #define __atomic_op_release(op, args...) \ ({ \ - smp_mb__before_atomic(); \ + __atomic_release_fence(); \ op##_relaxed(args); \ }) -#endif -#ifndef __atomic_op_fence #define __atomic_op_fence(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret; \ - smp_mb__before_atomic(); \ + __atomic_pre_full_fence(); \ __ret = op##_relaxed(args); \ - smp_mb__after_atomic(); \ + __atomic_post_full_fence(); \ __ret; \ }) -#endif /* atomic_add_return_relaxed */ #ifndef atomic_add_return_relaxed -- cgit From d27fb99f62af7b79c542d161aa5155ed57271ddc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 23 Jul 2018 22:42:48 +0100 Subject: dma-mapping: relax warning for per-device areas The reasons why dma_free_attrs() should not be called from IRQ context are not necessarily obvious and somewhat buried in the development history, so let's start by documenting the warning itself to help anyone who does happen to hit it and wonder what the deal is. However, this check turns out to be slightly over-restrictive for the way that per-device memory has been spliced into the general API, since for that case we know that dma_declare_coherent_memory() has created an appropriate CPU mapping for the entire area and nothing dynamic should be happening. Given that the usage model for per-device memory is often more akin to streaming DMA than 'real' coherent DMA (e.g. allocating and freeing space to copy short-lived packets in and out), it is also somewhat more reasonable for those operations to happen in IRQ handlers for such devices. Therefore, let's move the irqs_disabled() check down past the per-device area hook, so that that gets a chance to resolve the request before we reach definite "you're doing it wrong" territory. Reported-by: Fredrik Noring Tested-by: Fredrik Noring Signed-off-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f9cc309507d9..1db6a6b46d0d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -538,10 +538,17 @@ static inline void dma_free_attrs(struct device *dev, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!ops); - WARN_ON(irqs_disabled()); if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) return; + /* + * On non-coherent platforms which implement DMA-coherent buffers via + * non-cacheable remaps, ops->free() may call vunmap(). Thus getting + * this far in IRQ context is a) at risk of a BUG_ON() or trying to + * sleep on some machines, and b) an indication that the driver is + * probably misusing the coherent API anyway. + */ + WARN_ON(irqs_disabled()); if (!ops->free || !cpu_addr) return; -- cgit From 3c507b8af638c67d4a80d70091d2057ecb01e8a6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 23 Jul 2018 21:40:07 +0200 Subject: net: phy: add helper phy_polling_mode Add a helper for checking whether polling is used to detect PHY status changes. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 075c2f770d3e..cd6f637cbbfb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -824,6 +824,16 @@ static inline bool phy_interrupt_is_valid(struct phy_device *phydev) return phydev->irq != PHY_POLL && phydev->irq != PHY_IGNORE_INTERRUPT; } +/** + * phy_polling_mode - Convenience function for testing whether polling is + * used to detect PHY status changes + * @phydev: the phy_device struct + */ +static inline bool phy_polling_mode(struct phy_device *phydev) +{ + return phydev->irq == PHY_POLL; +} + /** * phy_is_internal - Convenience function for testing if a PHY is internal * @phydev: the phy_device struct -- cgit From 6f4ceee9305dc3fe74099159b460f4b56b506f1d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 24 Jul 2018 14:26:04 -0400 Subject: cpu/hotplug: Add a cpus_read_trylock() function There are use cases where it can be useful to have a cpus_read_trylock() function to work around circular lock dependency problem involving the cpu_hotplug_lock. Signed-off-by: Waiman Long Signed-off-by: Rafael J. Wysocki --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index a97a63eef59f..e850bfea3e84 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -103,6 +103,7 @@ extern void cpus_write_lock(void); extern void cpus_write_unlock(void); extern void cpus_read_lock(void); extern void cpus_read_unlock(void); +extern int cpus_read_trylock(void); extern void lockdep_assert_cpus_held(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); @@ -115,6 +116,7 @@ static inline void cpus_write_lock(void) { } static inline void cpus_write_unlock(void) { } static inline void cpus_read_lock(void) { } static inline void cpus_read_unlock(void) { } +static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } -- cgit From 359f642700f2ff05d9c94cd9216c97af7b8e9553 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 25 Jul 2018 10:22:58 -0400 Subject: block: move bio_integrity_{intervals,bytes} into blkdev.h This allows bio_integrity_bytes() to be called from drivers instead of open coding it. Acked-by: Martin K. Petersen Signed-off-by: Greg Edwards Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 331a6cb8805f..050d599f5ea9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1865,6 +1865,28 @@ static inline bool integrity_req_gap_front_merge(struct request *req, bip_next->bip_vec[0].bv_offset); } +/** + * bio_integrity_intervals - Return number of integrity intervals for a bio + * @bi: blk_integrity profile for device + * @sectors: Size of the bio in 512-byte sectors + * + * Description: The block layer calculates everything in 512 byte + * sectors but integrity metadata is done in terms of the data integrity + * interval size of the storage device. Convert the block layer sectors + * to the appropriate number of integrity intervals. + */ +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return sectors >> (bi->interval_exp - 9); +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return bio_integrity_intervals(bi, sectors) * bi->tuple_size; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; @@ -1938,6 +1960,18 @@ static inline bool integrity_req_gap_front_merge(struct request *req, return false; } +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ struct block_device_operations { -- cgit From 038709071328ff68a936c6b8c33a24a805eea3c5 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Wed, 13 Jun 2018 16:37:17 +0200 Subject: can: dev: enable multi-queue for SocketCAN devices The existing SocketCAN implementation provides alloc_candev() to allocate a CAN device using a single Tx and Rx queue. This can lead to priority inversion in case the single Tx queue is already full with low priority messages and a high priority message needs to be sent while the bus is fully loaded with medium priority messages. This problem can be solved by using the existing multi-queue support of the network subsytem. The commit makes it possible to use multi-queue in the CAN subsystem in the same way it is used in the Ethernet subsystem by adding an alloc_candev_mqs() call and accompanying macros. With this support a CAN device can use multi-queue qdisc (e.g. mqprio) to avoid the aforementioned priority inversion. The exisiting functionality of alloc_candev() is the same as before. CAN devices need to have prioritized multiple hardware queues or are able to abort waiting for arbitration to make sensible use of multi-queues. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Reviewed-by: Heiko Schocher Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 055aaf5ed9af..a83e1f632eb7 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -143,7 +143,12 @@ u8 can_dlc2len(u8 can_dlc); /* map the sanitized data length to an appropriate data length code */ u8 can_len2dlc(u8 len); -struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max); +struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, + unsigned int txqs, unsigned int rxqs); +#define alloc_candev(sizeof_priv, echo_skb_max) \ + alloc_candev_mqs(sizeof_priv, echo_skb_max, 1, 1) +#define alloc_candev_mq(sizeof_priv, echo_skb_max, count) \ + alloc_candev_mqs(sizeof_priv, echo_skb_max, count, count) void free_candev(struct net_device *dev); /* a candev safe wrapper around netdev_priv */ -- cgit From 1fb2e3f276ddafee81073d884f599cd2574c31e2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 17 Jul 2018 18:05:36 +0200 Subject: lib/crc: Move polynomial definition to separate header Allow other drivers and parts of kernel to use the same define for CRC32 polynomial, instead of duplicating it in many places. This code does not bring any functional changes, except moving existing code. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- include/linux/crc32poly.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/crc32poly.h (limited to 'include/linux') diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h new file mode 100644 index 000000000000..7ad5aa92d3c7 --- /dev/null +++ b/include/linux/crc32poly.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CRC32_POLY_H +#define _LINUX_CRC32_POLY_H + +/* + * There are multiple 16-bit CRC polynomials in common use, but this is + * *the* standard CRC-32 polynomial, first popularized by Ethernet. + * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 + */ +#define CRCPOLY_LE 0xedb88320 +#define CRCPOLY_BE 0x04c11db7 + +/* + * This is the CRC32c polynomial, as outlined by Castagnoli. + * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+ + * x^8+x^6+x^0 + */ +#define CRC32C_POLY_LE 0x82F63B78 + +#endif /* _LINUX_CRC32_POLY_H */ -- cgit From e37f2f93afe594682702439ca34eb8130598cdf2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 17 Jul 2018 18:05:37 +0200 Subject: lib/crc: Use consistent naming for CRC-32 polynomials Header was defining CRCPOLY_LE/BE and CRC32C_POLY_LE but in fact all of them are CRC-32 polynomials so use consistent naming. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- include/linux/crc32poly.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h index 7ad5aa92d3c7..62c4b7790a28 100644 --- a/include/linux/crc32poly.h +++ b/include/linux/crc32poly.h @@ -7,8 +7,8 @@ * *the* standard CRC-32 polynomial, first popularized by Ethernet. * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ -#define CRCPOLY_LE 0xedb88320 -#define CRCPOLY_BE 0x04c11db7 +#define CRC32_POLY_LE 0xedb88320 +#define CRC32_POLY_BE 0x04c11db7 /* * This is the CRC32c polynomial, as outlined by Castagnoli. -- cgit From f07d141fe9430cdf9f8a65a87c4136bd83b8ab2e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 23 Jul 2018 23:16:07 +0100 Subject: dma-mapping: Generalise dma_32bit_limit flag Whilst the notion of an upstream DMA restriction is most commonly seen in PCI host bridges saddled with a 32-bit native interface, a more general version of the same issue can exist on complex SoCs where a bus or point-to-point interconnect link from a device's DMA master interface to another component along the path to memory (often an IOMMU) may carry fewer address bits than the interfaces at both ends nominally support. In order to properly deal with this, the first step is to expand the dma_32bit_limit flag into an arbitrary mask. To minimise the impact on existing code, we'll make sure to only consider this new mask valid if set. That makes sense anyway, since a mask of zero would represent DMA not being wired up at all, and that would be better handled by not providing valid ops in the first place. Signed-off-by: Robin Murphy Acked-by: Ard Biesheuvel Signed-off-by: Christoph Hellwig --- include/linux/device.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 055a69dbcd18..6d3b000be57e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -886,6 +886,8 @@ struct dev_links_info { * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all * hardware supports 64-bit addresses for consistent allocations * such descriptors. + * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA + * limit than the device itself supports. * @dma_pfn_offset: offset of DMA memory range relatively of RAM * @dma_parms: A low level driver may set these to teach IOMMU code about * segment limitations. @@ -912,8 +914,6 @@ struct dev_links_info { * @offline: Set after successful invocation of bus type's .offline(). * @of_node_reused: Set if the device-tree node is shared with an ancestor * device. - * @dma_32bit_limit: bridge limited to 32bit DMA even if the device itself - * indicates support for a higher limit in the dma_mask field. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -967,6 +967,7 @@ struct device { not all hardware supports 64 bit addresses for consistent allocations such descriptors. */ + u64 bus_dma_mask; /* upstream dma_mask constraint */ unsigned long dma_pfn_offset; struct device_dma_parameters *dma_parms; @@ -1002,7 +1003,6 @@ struct device { bool offline_disabled:1; bool offline:1; bool of_node_reused:1; - bool dma_32bit_limit:1; }; static inline struct device *kobj_to_dev(struct kobject *kobj) -- cgit From 9b89bc3857a6c0dfda18ddae2a42c114ecc32753 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 12 May 2018 18:18:12 +0200 Subject: nvme.h: add support for the log specific field NVMe 1.3 added a new log specific field to the get log page CQ defintion, add it to our get_log_page SQ structure. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 80dfedcf0bf7..39f05b0b8c7f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -885,7 +885,7 @@ struct nvme_get_log_page_command { __u64 rsvd2[2]; union nvme_data_ptr dptr; __u8 lid; - __u8 rsvd10; + __u8 lsp; /* upper 4 bits reserved */ __le16 numdl; __le16 numdu; __u16 rsvd11; -- cgit From 1a37621658fe06b10cf8bac02c32304d2a1c888c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 May 2018 18:53:57 +0200 Subject: nvme.h: add ANA definitions Add various defintions from NVMe 1.3 TP 4004. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- include/linux/nvme.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 39f05b0b8c7f..64c9175723de 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -242,7 +242,12 @@ struct nvme_id_ctrl { __le32 sanicap; __le32 hmminds; __le16 hmmaxd; - __u8 rsvd338[174]; + __u8 rsvd338[4]; + __u8 anatt; + __u8 anacap; + __le32 anagrpmax; + __le32 nanagrpid; + __u8 rsvd352[160]; __u8 sqes; __u8 cqes; __le16 maxcmd; @@ -258,7 +263,8 @@ struct nvme_id_ctrl { __le16 acwu; __u8 rsvd534[2]; __le32 sgls; - __u8 rsvd540[228]; + __le32 mnan; + __u8 rsvd544[224]; char subnqn[256]; __u8 rsvd1024[768]; __le32 ioccsz; @@ -312,7 +318,9 @@ struct nvme_id_ns { __le16 nabspf; __le16 noiob; __u8 nvmcap[16]; - __u8 rsvd64[40]; + __u8 rsvd64[28]; + __le32 anagrpid; + __u8 rsvd96[8]; __u8 nguid[16]; __u8 eui64[8]; struct nvme_lbaf lbaf[16]; @@ -425,6 +433,32 @@ struct nvme_effects_log { __u8 resv[2048]; }; +enum nvme_ana_state { + NVME_ANA_OPTIMIZED = 0x01, + NVME_ANA_NONOPTIMIZED = 0x02, + NVME_ANA_INACCESSIBLE = 0x03, + NVME_ANA_PERSISTENT_LOSS = 0x04, + NVME_ANA_CHANGE = 0x0f, +}; + +struct nvme_ana_group_desc { + __le32 grpid; + __le32 nnsids; + __le64 chgcnt; + __u8 state; + __u8 rsvd17[7]; + __le32 nsids[]; +}; + +/* flag for the log specific field of the ANA log */ +#define NVME_ANA_LOG_RGO (1 << 0) + +struct nvme_ana_rsp_hdr { + __le64 chgcnt; + __le16 ngrps; + __le16 rsvd10[3]; +}; + enum { NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_TEMPERATURE = 1 << 1, @@ -444,11 +478,13 @@ enum { enum { NVME_AER_NOTICE_NS_CHANGED = 0x00, NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, + NVME_AER_NOTICE_ANA = 0x03, }; enum { NVME_AEN_CFG_NS_ATTR = 1 << 8, NVME_AEN_CFG_FW_ACT = 1 << 9, + NVME_AEN_CFG_ANA_CHANGE = 1 << 11, }; struct nvme_lba_range_type { @@ -763,6 +799,7 @@ enum { NVME_LOG_FW_SLOT = 0x03, NVME_LOG_CHANGED_NS = 0x04, NVME_LOG_CMD_EFFECTS = 0x05, + NVME_LOG_ANA = 0x0c, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1185,6 +1222,13 @@ enum { NVME_SC_ACCESS_DENIED = 0x286, NVME_SC_UNWRITTEN_BLOCK = 0x287, + /* + * Path-related Errors: + */ + NVME_SC_ANA_PERSISTENT_LOSS = 0x301, + NVME_SC_ANA_INACCESSIBLE = 0x302, + NVME_SC_ANA_TRANSITION = 0x303, + NVME_SC_DNR = 0x4000, }; -- cgit From 22a65aa8b1a84ca429c0fc8415dee5681ab36eb3 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 25 Dec 2017 18:40:52 +0200 Subject: net/mlx5e: Vxlan, check maximum number of UDP ports The NIC has a limited number of offloaded VXLAN UDP ports (usually 4). Instead of letting the firmware fail when trying to add more ports than it can handle, let the driver check it on its own. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 22f54bedfaae..60c2308fe062 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -668,7 +668,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 swp[0x1]; u8 swp_csum[0x1]; u8 swp_lso[0x1]; - u8 reserved_at_23[0x1b]; + u8 reserved_at_23[0xd]; + u8 max_vxlan_udp_ports[0x8]; + u8 reserved_at_38[0x6]; u8 max_geneve_opt_len[0x1]; u8 tunnel_stateless_geneve_rx[0x1]; -- cgit From 358aa5ce288aa1085f0f3ef9f315119563fa6541 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 9 May 2018 13:28:00 -0700 Subject: net/mlx5e: Vxlan, move vxlan logic to core driver Move vxlan logic and objects to mlx5 core dirver. Since it going to be used from different mlx5 interfaces. e.g. mlx5e PF NIC netdev and mlx5e E-Switch representors. Signed-off-by: Saeed Mahameed Reviewed-by: Or Gerlitz --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fd0aaa5568fe..54f385cc8811 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -818,6 +818,7 @@ struct mlx5_clock { }; struct mlx5_fw_tracer; +struct mlx5_vxlan; struct mlx5_core_dev { struct pci_dev *pdev; @@ -850,6 +851,7 @@ struct mlx5_core_dev { atomic_t num_qps; u32 issi; struct mlx5e_resources mlx5e_res; + struct mlx5_vxlan *vxlan; struct { struct mlx5_rsvd_gids reserved_gids; u32 roce_en; -- cgit From 627448e85c766587f6fdde1ea3886d6615081c77 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 28 Jun 2018 18:13:33 +0300 Subject: tpm: separate cmd_ready/go_idle from runtime_pm Fix tpm ptt initialization error: tpm tpm0: A TPM error (378) occurred get tpm pcr allocation. We cannot use go_idle cmd_ready commands via runtime_pm handles as with the introduction of localities this is no longer an optional feature, while runtime pm can be not enabled. Though cmd_ready/go_idle provides a power saving, it's also a part of TPM2 protocol and should be called explicitly. This patch exposes cmd_read/go_idle via tpm class ops and removes runtime pm support as it is not used by any driver. When calling from nested context always use both flags: TPM_TRANSMIT_UNLOCKED and TPM_TRANSMIT_RAW. Both are needed to resolve tpm spaces and locality request recursive calls to tpm_transmit(). TPM_TRANSMIT_RAW should never be used standalone as it will fail on double locking. While TPM_TRANSMIT_UNLOCKED standalone should be called from non-recursive locked contexts. New wrappers are added tpm_cmd_ready() and tpm_go_idle() to streamline tpm_try_transmit code. tpm_crb no longer needs own power saving functions and can drop using tpm_pm_suspend/resume. This patch cannot be really separated from the locality fix. Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality) Cc: stable@vger.kernel.org Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality) Signed-off-by: Tomas Winkler Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 06639fb6ab85..8eb5e5ebe136 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -43,6 +43,8 @@ struct tpm_class_ops { u8 (*status) (struct tpm_chip *chip); bool (*update_timeouts)(struct tpm_chip *chip, unsigned long *timeout_cap); + int (*go_idle)(struct tpm_chip *chip); + int (*cmd_ready)(struct tpm_chip *chip); int (*request_locality)(struct tpm_chip *chip, int loc); int (*relinquish_locality)(struct tpm_chip *chip, int loc); void (*clk_enable)(struct tpm_chip *chip, bool value); -- cgit From aaae81536351f831fe6719e5b4e6afbadc5e5f85 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Tue, 26 Jun 2018 15:09:30 -0400 Subject: tpm: Implement tpm_default_chip() to find a TPM chip Implement tpm_default_chip() to find the first TPM chip and return it to the caller while increasing the reference count on its device. This function can be used by other subsystems, such as IMA, to find the system's default TPM chip and use it for all subsequent TPM operations. Signed-off-by: Stefan Berger Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 8eb5e5ebe136..4609b94142d4 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -63,6 +63,7 @@ extern int tpm_seal_trusted(struct tpm_chip *chip, extern int tpm_unseal_trusted(struct tpm_chip *chip, struct trusted_key_payload *payload, struct trusted_key_options *options); +extern struct tpm_chip *tpm_default_chip(void); #else static inline int tpm_is_tpm2(struct tpm_chip *chip) { @@ -98,5 +99,9 @@ static inline int tpm_unseal_trusted(struct tpm_chip *chip, { return -ENODEV; } +static inline struct tpm_chip *tpm_default_chip(void) +{ + return NULL; +} #endif #endif -- cgit From 5cbf777cfdf6e5a7b7149006e4881a255da78fdd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 27 Jul 2018 16:37:28 +0800 Subject: route: add support for directed broadcast forwarding This patch implements the feature described in rfc1812#section-5.3.5.2 and rfc2644. It allows the router to forward directed broadcast when sysctl bc_forwarding is enabled. Note that this feature could be done by iptables -j TEE, but it would cause some problems: - target TEE's gateway param has to be set with a specific address, and it's not flexible especially when the route wants forward all directed broadcasts. - this duplicates the directed broadcasts so this may cause side effects to applications. Besides, to keep consistent with other os router like BSD, it's also necessary to implement it in the route rx path. Note that route cache needs to be flushed when bc_forwarding is changed. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 27650f1bff3d..c759d1cbcedd 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -93,6 +93,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) +#define IN_DEV_BFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), BC_FORWARDING) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) #define IN_DEV_SRC_VMARK(in_dev) IN_DEV_ORCONF((in_dev), SRC_VMARK) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ -- cgit From 7a4c53bee3324ac00bf964aa2f82d15d279e86e4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 27 Jul 2018 13:43:23 -0700 Subject: net: report invalid mtu value via netlink extack If an invalid MTU value is set through rtnetlink return extra error information instead of putting message in kernel log. For other cases where there is no visible API, keep the error report in the log. Example: # ip li set dev enp12s0 mtu 10000 Error: mtu greater than device maximum. # ifconfig enp12s0 mtu 10000 SIOCSIFMTU: Invalid argument # dmesg | tail -1 [ 2047.795467] enp12s0: mtu greater than device maximum Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c1295c7a452e..9c917467a2c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3546,6 +3546,8 @@ int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); +int dev_set_mtu_ext(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); int dev_set_mtu(struct net_device *, int); int dev_change_tx_queue_len(struct net_device *, unsigned long); void dev_set_group(struct net_device *, int); -- cgit From 0969a204bfdaf7470d2666736b90a8595ae671e9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 27 Jul 2018 17:47:01 +0300 Subject: gpiolib: Use GPIOD_OUT_{LOW,HIGH} macros in open drain ones There should not be anything more than stated by the name of newly introduced constants, i.e. GPIOD_OUT_LOW_OPEN_DRAIN == GPIOD_OUT_LOW + open drain and nothing more. Make it better to read and slightly more robust by using GPIOD_OUT_LOW and GPIOD_OUT_HIGH constants with open drain flag. No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index e8aaf34dd65d..21ddbe440030 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -41,11 +41,8 @@ enum gpiod_flags { GPIOD_OUT_LOW = GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT, GPIOD_OUT_HIGH = GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_DIR_VAL, - GPIOD_OUT_LOW_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET | - GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_OPEN_DRAIN, - GPIOD_OUT_HIGH_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET | - GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_DIR_VAL | - GPIOD_FLAGS_BIT_OPEN_DRAIN, + GPIOD_OUT_LOW_OPEN_DRAIN = GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_OPEN_DRAIN, + GPIOD_OUT_HIGH_OPEN_DRAIN = GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_OPEN_DRAIN, }; #ifdef CONFIG_GPIOLIB -- cgit From 51c23b47e6b8590ea7a6a6776ffb21810ece73bf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 13 Jul 2018 14:54:45 +0200 Subject: netfilter: nf_osf: add nf_osf_find() This new function returns the OS genre as a string. Plan is to use to from the new nft_osf extension. Note that this doesn't yet support ttl options, but it could be easily extended to do so. Tested-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_osf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h index 0e114c492fb8..aee460fcbd31 100644 --- a/include/linux/netfilter/nf_osf.h +++ b/include/linux/netfilter/nf_osf.h @@ -1,3 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NFOSF_H +#define _NFOSF_H + #include /* Initial window size option state machine: multiple of mss, mtu or @@ -31,3 +35,8 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, int hooknum, struct net_device *in, struct net_device *out, const struct nf_osf_info *info, struct net *net, const struct list_head *nf_osf_fingers); + +const char *nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers); + +#endif /* _NFOSF_H */ -- cgit From f609e7a0f3956e3b03af735b83600a4eef2a04ec Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 7 Jun 2018 15:55:25 -0400 Subject: media: sii9234: remove unused header The include/linux/platform_data/media/sii9234.h header file is unused, let's remove it. Signed-off-by: Corentin Labbe Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/sii9234.h | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 include/linux/platform_data/media/sii9234.h (limited to 'include/linux') diff --git a/include/linux/platform_data/media/sii9234.h b/include/linux/platform_data/media/sii9234.h deleted file mode 100644 index 6a4a809fe9a3..000000000000 --- a/include/linux/platform_data/media/sii9234.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Driver header for SII9234 MHL converter chip. - * - * Copyright (c) 2011 Samsung Electronics, Co. Ltd - * Contact: Tomasz Stanislawski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef SII9234_H -#define SII9234_H - -/** - * @gpio_n_reset: GPIO driving nRESET pin - */ - -struct sii9234_platform_data { - int gpio_n_reset; -}; - -#endif /* SII9234_H */ -- cgit From 489cae632fc04927e8ef36ac8d8847193a41df3b Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 24 Jul 2018 10:33:19 -0600 Subject: include: Move ascii85 functions from i915 to linux/ascii85.h The i915 DRM driver very cleverly used ascii85 encoding for their GPU state file. Move the encode functions to a general header file to support other drivers that might be interested in the same functionality. v4: Make the return value const char * as suggested by Chris Wilson v3: Fix error_puts -> err_puts pointed out by the 01.org bot v2: Update API to be cleaner for the caller as suggested by Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- include/linux/ascii85.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/linux/ascii85.h (limited to 'include/linux') diff --git a/include/linux/ascii85.h b/include/linux/ascii85.h new file mode 100644 index 000000000000..4cc40201273e --- /dev/null +++ b/include/linux/ascii85.h @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2008 Intel Corporation + * Copyright (c) 2018 The Linux Foundation. All rights reserved. + */ + +#ifndef _ASCII85_H_ +#define _ASCII85_H_ + +#include + +#define ASCII85_BUFSZ 6 + +static inline long +ascii85_encode_len(long len) +{ + return DIV_ROUND_UP(len, 4); +} + +static inline const char * +ascii85_encode(u32 in, char *out) +{ + int i; + + if (in == 0) + return "z"; + + out[5] = '\0'; + for (i = 5; i--; ) { + out[i] = '!' + in % 85; + in /= 85; + } + + return out; +} + +#endif -- cgit From c454edc21b12dd7d416de6c81555e87aaec9685c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 30 Jul 2018 10:10:01 -0400 Subject: block: don't account for split bio's size in cgroup stats We need to check in blkcg_bio_issue_check if the bio is flagged as QUEUE_ENTERED, because if it is then we've already accounted for the size of the IO in the cgroup stats. We can still however account for the extra IO since it'll be another request. Reported-by: Tejun Heo Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 3bed5e02a873..f7b910768306 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -769,8 +769,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, if (!throtl) { blkg = blkg ?: q->root_blkg; - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, - bio->bi_iter.bi_size); + /* + * If the bio is flagged with BIO_QUEUE_ENTERED it means this + * is a split bio and we would have already accounted for the + * size of the bio. + */ + if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) + blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, + bio->bi_iter.bi_size); blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } -- cgit From ddd0bc756983dc4d19000a4fe021b4c7f9d59aab Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 30 Jul 2018 00:15:31 +0300 Subject: block: move ref_tag calculation func to the block layer Currently this function is implemented in the scsi layer, but it's actual place should be the block layer since T10-PI is a general data integrity feature that is used in the nvme protocol as well. Suggested-by: Christoph Hellwig Cc: Martin K. Petersen Signed-off-by: Max Gurtovoy Signed-off-by: Jens Axboe --- include/linux/t10-pi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index c6aa8a3c42ed..c40511f4e63d 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -37,6 +37,16 @@ struct t10_pi_tuple { #define T10_PI_APP_ESCAPE cpu_to_be16(0xffff) #define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff) +static inline u32 t10_pi_ref_tag(struct request *rq) +{ +#ifdef CONFIG_BLK_DEV_INTEGRITY + return blk_rq_pos(rq) >> + (rq->q->integrity.interval_exp - 9) & 0xffffffff; +#else + return -1U; +#endif +} + extern const struct blk_integrity_profile t10_pi_type1_crc; extern const struct blk_integrity_profile t10_pi_type1_ip; extern const struct blk_integrity_profile t10_pi_type3_crc; -- cgit From 10c41ddd61323b27b447bc8e18296ac6c06107ad Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 30 Jul 2018 00:15:32 +0300 Subject: block: move dif_prepare/dif_complete functions to block layer Currently these functions are implemented in the scsi layer, but their actual place should be the block layer since T10-PI is a general data integrity feature that is used in the nvme protocol as well. Also, use the tuple size from the integrity profile since it may vary between integrity types. Suggested-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Max Gurtovoy Signed-off-by: Jens Axboe --- include/linux/t10-pi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index c40511f4e63d..5a427c289f58 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -51,5 +51,8 @@ extern const struct blk_integrity_profile t10_pi_type1_crc; extern const struct blk_integrity_profile t10_pi_type1_ip; extern const struct blk_integrity_profile t10_pi_type3_crc; extern const struct blk_integrity_profile t10_pi_type3_ip; +extern void t10_pi_prepare(struct request *rq, u8 protection_type); +extern void t10_pi_complete(struct request *rq, u8 protection_type, + unsigned int intervals); #endif -- cgit From 760c435e0f85ed19e48a90d746ce1de2cd02def7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 19 Jul 2018 00:09:12 +0200 Subject: mtd: rawnand: make subop helpers return unsigned values A report from Colin Ian King pointed a CoverityScan issue where error values on these helpers where not checked in the drivers. These helpers can error out only in case of a software bug in driver code, not because of a runtime/hardware error. Hence, let's WARN_ON() in this case and return 0 which is harmless anyway. Fixes: 8878b126df76 ("mtd: nand: add ->exec_op() implementation") Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index f60fad29eae6..598d356de83f 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1007,14 +1007,14 @@ struct nand_subop { unsigned int last_instr_end_off; }; -int nand_subop_get_addr_start_off(const struct nand_subop *subop, - unsigned int op_id); -int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, - unsigned int op_id); -int nand_subop_get_data_start_off(const struct nand_subop *subop, - unsigned int op_id); -int nand_subop_get_data_len(const struct nand_subop *subop, - unsigned int op_id); +unsigned int nand_subop_get_addr_start_off(const struct nand_subop *subop, + unsigned int op_id); +unsigned int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, + unsigned int op_id); +unsigned int nand_subop_get_data_start_off(const struct nand_subop *subop, + unsigned int op_id); +unsigned int nand_subop_get_data_len(const struct nand_subop *subop, + unsigned int op_id); /** * struct nand_op_parser_addr_constraints - Constraints for address instructions -- cgit From 7da45139d264f3b7ead04e00ebb29b189cf9826e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 17 Jul 2018 09:08:02 +0200 Subject: mtd: rawnand: better name for the controller structure In the raw NAND core, a NAND chip is described by a nand_chip structure, while a NAND controller is described with a nand_hw_control structure which is not very meaningful. Rename this structure nand_controller. As the structure gets renamed, it is logical to also rename the core function initializing it from nand_hw_control_init() to nand_controller_init(). Lastly, the 'hwcontrol' entry of the nand_chip structure is not meaningful neither while it has the role of fallback when no controller structure is provided by the driver (the controller driver is dumb and can only control a single chip). Thus, it is renamed dummy_controller. Signed-off-by: Miquel Raynal Acked-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 598d356de83f..93a2678e0f0d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -510,20 +510,21 @@ struct nand_id { }; /** - * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices + * struct nand_controller - Structure used to describe a NAND controller + * * @lock: protection lock * @active: the mtd device which holds the controller currently * @wq: wait queue to sleep on if a NAND operation is in * progress used instead of the per chip wait queue * when a hw controller is available. */ -struct nand_hw_control { +struct nand_controller { spinlock_t lock; struct nand_chip *active; wait_queue_head_t wq; }; -static inline void nand_hw_control_init(struct nand_hw_control *nfc) +static inline void nand_controller_init(struct nand_controller *nfc) { nfc->active = NULL; spin_lock_init(&nfc->lock); @@ -1197,7 +1198,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure * @buf_align: minimum buffer alignment required by a platform - * @hwcontrol: platform-specific hardware control structure + * @dummy_controller: dummy controller implementation for drivers that can + * only control a single chip * @erase: [REPLACEABLE] erase function * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transferring * data from array to read regs (tR). @@ -1333,11 +1335,11 @@ struct nand_chip { flstate_t state; uint8_t *oob_poi; - struct nand_hw_control *controller; + struct nand_controller *controller; struct nand_ecc_ctrl ecc; unsigned long buf_align; - struct nand_hw_control hwcontrol; + struct nand_controller dummy_controller; uint8_t *bbt; struct nand_bbt_descr *bbt_td; -- cgit From 05b54c7bac906c443fd0b23ab8954e0560b33e5c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 19 Jul 2018 01:05:46 +0200 Subject: mtd: rawnand: add hooks that may be called during nand_scan() In order to remove the limitation that forbids dynamic allocation in nand_scan_ident(), we must create a path that will be the same for all controller drivers. The idea is to use nand_scan() instead of the widely used nand_scan_ident()/nand_scan_tail() couple. In order to achieve this, controller drivers will need to adjust some parameters between these two functions depending on the NAND chip wired on them. This takes the form of two new hooks (->{attach,detach}_chip()) that are placed in a new nand_controller_ops structure, which is then attached to the nand_controller object at driver initialization time. ->attach_chip() is called between nand_scan_ident() and nand_scan_tail(), and ->detach_chip() is called in the error path of nand_scan() and in nand_cleanup(). Note that some NAND controller drivers don't have a dedicated nand_controller object and instead rely on the default/dummy one embedded in nand_chip. If you're in this case and still want to initialize the controller ops, you'll have to manipulate chip->dummy_controller directly. Last but not least, it's worth mentioning that we plan to move some of the controller related hooks placed in nand_chip into nand_controller_ops to make the separation between NAND chip and NAND controller methods clearer. Signed-off-by: Miquel Raynal Acked-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 93a2678e0f0d..fbd6b29cf22c 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -509,6 +509,25 @@ struct nand_id { int len; }; +/** + * struct nand_controller_ops - Controller operations + * + * @attach_chip: this method is called after the NAND detection phase after + * flash ID and MTD fields such as erase size, page size and OOB + * size have been set up. ECC requirements are available if + * provided by the NAND chip or device tree. Typically used to + * choose the appropriate ECC configuration and allocate + * associated resources. + * This hook is optional. + * @detach_chip: free all resources allocated/claimed in + * nand_controller_ops->attach_chip(). + * This hook is optional. + */ +struct nand_controller_ops { + int (*attach_chip)(struct nand_chip *chip); + void (*detach_chip)(struct nand_chip *chip); +}; + /** * struct nand_controller - Structure used to describe a NAND controller * @@ -517,11 +536,13 @@ struct nand_id { * @wq: wait queue to sleep on if a NAND operation is in * progress used instead of the per chip wait queue * when a hw controller is available. + * @ops: NAND controller operations. */ struct nand_controller { spinlock_t lock; struct nand_chip *active; wait_queue_head_t wq; + const struct nand_controller_ops *ops; }; static inline void nand_controller_init(struct nand_controller *nfc) -- cgit From 98732da1a08ebb666983d469981a8b994e77d556 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jul 2018 15:31:50 +0200 Subject: mtd: rawnand: do not export nand_scan_[ident|tail]() anymore Both nand_scan_ident() and nand_scan_tail() helpers used to be called directly from controller drivers that needed to tweak some ECC-related parameters before nand_scan_tail(). This separation prevented dynamic allocations during the phase of NAND identification, which was inconvenient. All controller drivers have been moved to use nand_scan(), in conjunction with the chip->ecc.[attach|detach]_chip() hooks that actually do the required tweaking sequence between both ident/tail calls, allowing programmers to use dynamic allocation as they need all across the scanning sequence. Declare nand_scan_[ident|tail]() statically now. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index fbd6b29cf22c..71571ed23a20 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -35,17 +35,6 @@ static inline int nand_scan(struct mtd_info *mtd, int max_chips) return nand_scan_with_ids(mtd, max_chips, NULL); } -/* - * Separate phases of nand_scan(), allowing board driver to intervene - * and override command or ECC setup according to flash type. - */ -int nand_scan_ident(struct mtd_info *mtd, int max_chips, - struct nand_flash_dev *table); -int nand_scan_tail(struct mtd_info *mtd); - -/* Unregister the MTD device and free resources held by the NAND device */ -void nand_release(struct mtd_info *mtd); - /* Internal helper for board drivers which need to override command function */ void nand_wait_ready(struct mtd_info *mtd); @@ -1745,8 +1734,13 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, int nand_write_data_op(struct nand_chip *chip, const void *buf, unsigned int len, bool force_8bit); -/* Free resources held by the NAND device */ +/* + * Free resources held by the NAND device, must be called on error after a + * sucessful nand_scan(). + */ void nand_cleanup(struct nand_chip *chip); +/* Unregister the MTD device and calls nand_cleanup() */ +void nand_release(struct mtd_info *mtd); /* Default extended ID decoding function */ void nand_decode_ext_id(struct nand_chip *chip); -- cgit From 2023f1fa216f30b1877d65be2057fbaf0bbd49b3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jul 2018 15:31:51 +0200 Subject: mtd: rawnand: allocate model parameter dynamically Thanks to the migration of all drivers to use nand_scan() and the related nand_controller_ops, we can now allocate data during the detection phase. Let's do it first for the NAND model parameter which is allocated in nand_detect(). Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 71571ed23a20..099fa166569a 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -476,7 +476,7 @@ struct onfi_params { */ struct nand_parameters { /* Generic parameters */ - char model[100]; + const char *model; bool supports_set_get_features; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); -- cgit From 11785ef53228d23ec386f5fe4a34601536f0c891 Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Date: Tue, 31 Jul 2018 13:28:42 +0100 Subject: ALSA: usb-audio: Initial Power Domain support Thee USB Audio Class 3 (UAC3) introduces Power Domains as a new feature to let a host turn individual parts of an audio function to different power states via USB requests. This lets the device get to know a bit amore about what the host is up to in order to optimize power consumption efficiently. The Power Domains are optional for UAC3 configuration but all UAC3 devices shall include at least one BADD configuration where the support for Power Domains is compulsory. This patch adds a set of features/helpers to parse these power domains and change their status. Signed-off-by: Jorge Sanjuan Signed-off-by: Takashi Iwai --- include/linux/usb/audio-v3.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h index 334bfa6dfb47..6b708434b7f9 100644 --- a/include/linux/usb/audio-v3.h +++ b/include/linux/usb/audio-v3.h @@ -447,4 +447,8 @@ struct uac3_interrupt_data_msg { /* BADD sample rate is always fixed to 48kHz */ #define UAC3_BADD_SAMPLING_RATE 48000 +/* BADD power domains recovery times in 50us increments */ +#define UAC3_BADD_PD_RECOVER_D1D0 0x0258 /* 30ms */ +#define UAC3_BADD_PD_RECOVER_D2D0 0x1770 /* 300ms */ + #endif /* __LINUX_USB_AUDIO_V3_H */ -- cgit From 08fcf813281ebcf72c69487c1501ad91b7121cdb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 31 Jul 2018 09:10:26 -0600 Subject: t10-pi: provide empty t10_pi_complete() for !CONFIG_BLK_DEV_INTEGRITY Fixes a link failure whtn BLK_DEV_INTEGRITY isn't defined. Fixes: 10c41ddd6132 ("block: move dif_prepare/dif_complete functions to block layer") Signed-off-by: Jens Axboe --- include/linux/t10-pi.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 5a427c289f58..b9626aa7e90c 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -51,8 +51,19 @@ extern const struct blk_integrity_profile t10_pi_type1_crc; extern const struct blk_integrity_profile t10_pi_type1_ip; extern const struct blk_integrity_profile t10_pi_type3_crc; extern const struct blk_integrity_profile t10_pi_type3_ip; + +#ifdef CONFIG_BLK_DEV_INTEGRITY extern void t10_pi_prepare(struct request *rq, u8 protection_type); extern void t10_pi_complete(struct request *rq, u8 protection_type, unsigned int intervals); +#else +static inline void t10_pi_complete(struct request *rq, u8 protection_type, + unsigned int intervals) +{ +} +static inline void t10_pi_prepare(struct request *rq, u8 protection_type) +{ +} +#endif #endif -- cgit From c29c2ebd2ae0066c026045a21aa33ccbfcd8bb3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 30 Jul 2018 20:43:51 -0700 Subject: net: update real_num_rx_queues even when !CONFIG_SYSFS We used to depend on real_num_rx_queues as a upper bound for sanity checks. For AF_XDP socket validation it's useful if the check behaves the same regardless of CONFIG_SYSFS setting. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c917467a2c7..3bf7e93c9e96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3431,8 +3431,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); #else static inline int netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxq) + unsigned int rxqs) { + dev->real_num_rx_queues = rxqs; return 0; } #endif -- cgit From 84c6b86875e01a08a0daa6fdd4a01b36bf0bf0b2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 30 Jul 2018 20:43:53 -0700 Subject: xsk: don't allow umem replace at stack level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently drivers have to check if they already have a umem installed for a given queue and return an error if so. Make better use of XDP_QUERY_XSK_UMEM and move this functionality to the core. We need to keep rtnl across the calls now. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Björn Töpel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3bf7e93c9e96..282e2e95ad5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -872,10 +872,10 @@ struct netdev_bpf { struct { struct bpf_offloaded_map *offmap; }; - /* XDP_SETUP_XSK_UMEM */ + /* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */ struct { - struct xdp_umem *umem; - u16 queue_id; + struct xdp_umem *umem; /* out for query*/ + u16 queue_id; /* in for query */ } xsk; }; }; @@ -3568,6 +3568,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, enum bpf_netdev_command cmd); +int xdp_umem_query(struct net_device *dev, u16 queue_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit From e6476c21447c4b17c47e476aade6facf050f31e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 30 Jul 2018 09:45:07 +0200 Subject: net: remove bogus RCU annotations on socket.wq We never use RCU protection for it, just a lot of cargo-cult rcu_deference_protects calls. Note that we do keep the kfree_rcu call for it, as the references through struct sock are RCU protected and thus might require a grace period before freeing. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Dumazet Acked-by: Paul E. McKenney Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 6554d3ba4396..e0930678c8bf 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -114,7 +114,7 @@ struct socket { unsigned long flags; - struct socket_wq __rcu *wq; + struct socket_wq *wq; struct file *file; struct sock *sk; -- cgit From 546c596cf5491fda1516536e049c6a36836eb884 Mon Sep 17 00:00:00 2001 From: Shunyong Yang Date: Wed, 18 Jul 2018 09:40:35 +0800 Subject: PCI: Unify PCI and normal DMA direction definitions Current DMA direction definitions in pci-dma-compat.h and dma-direction.h are mirrored in value. Unify them to enhance readability and avoid possible inconsistency. Signed-off-by: Shunyong Yang Signed-off-by: Bjorn Helgaas Cc: Joey Zheng --- include/linux/dma-direction.h | 6 ++---- include/linux/pci-dma-compat.h | 8 ++++---- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direction.h b/include/linux/dma-direction.h index 3649a031893a..9c96e30e6a0b 100644 --- a/include/linux/dma-direction.h +++ b/include/linux/dma-direction.h @@ -1,14 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_DMA_DIRECTION_H #define _LINUX_DMA_DIRECTION_H -/* - * These definitions mirror those in pci.h, so they can be used - * interchangeably with their PCI_ counterparts. - */ + enum dma_data_direction { DMA_BIDIRECTIONAL = 0, DMA_TO_DEVICE = 1, DMA_FROM_DEVICE = 2, DMA_NONE = 3, }; + #endif diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h index 0dd1a3f7b309..c3f1b44ade29 100644 --- a/include/linux/pci-dma-compat.h +++ b/include/linux/pci-dma-compat.h @@ -8,10 +8,10 @@ #include /* This defines the direction arg to the DMA mapping routines. */ -#define PCI_DMA_BIDIRECTIONAL 0 -#define PCI_DMA_TODEVICE 1 -#define PCI_DMA_FROMDEVICE 2 -#define PCI_DMA_NONE 3 +#define PCI_DMA_BIDIRECTIONAL DMA_BIDIRECTIONAL +#define PCI_DMA_TODEVICE DMA_TO_DEVICE +#define PCI_DMA_FROMDEVICE DMA_FROM_DEVICE +#define PCI_DMA_NONE DMA_NONE static inline void * pci_alloc_consistent(struct pci_dev *hwdev, size_t size, -- cgit From bb276262e88dae52cc717bb636b7468f66bb234e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 27 Jul 2018 11:33:13 -0700 Subject: mtd: spi-nor: only apply reset hacks to broken hardware Commit 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when exiting") is the latest from a long history of attempts to add reboot handling to handle stateful addressing modes on SPI flash. Some prior mostly-related discussions: http://lists.infradead.org/pipermail/linux-mtd/2013-March/046343.html [PATCH 1/3] mtd: m25p80: utilize dedicated 4-byte addressing commands http://lists.infradead.org/pipermail/barebox/2014-September/020682.html [RFC] MTD m25p80 3-byte addressing and boot problem http://lists.infradead.org/pipermail/linux-mtd/2015-February/057683.html [PATCH 2/2] m25p80: if supported put chip to deep power down if not used Previously, attempts to add reboot-time software reset handling were rejected, but the latest attempt was not. Quick summary of the problem: Some systems (e.g., boot ROM or bootloader) assume that they can read initial boot code from their SPI flash using 3-byte addressing. If the flash is left in 4-byte mode after reset, these systems won't boot. The above patch provided a shutdown/remove hook to attempt to reset the addressing mode before we reboot. Notably, this patch misses out on huge classes of unexpected reboots (e.g., crashes, watchdog resets). Unfortunately, it is essentially impossible to solve this problem 100%: if your system doesn't know how to reset the SPI flash to power-on defaults at initialization time, no amount of software can really rescue you -- there will always be a chance of some unexpected reset that leaves your flash in an addressing mode that your boot sequence didn't expect. While it is not directly harmful to perform hacks like the aforementioned commit on all 4-byte addressing flash, a properly-designed system should not need the hack -- and in fact, providing this hack may mask the fact that a given system is indeed broken. So this patch attempts to apply this unsound hack more narrowly, providing a strong suggestion to developers and system designers that this is truly a hack. With luck, system designers can catch their errors early on in their development cycle, rather than applying this hack long term. But apparently enough systems are out in the wild that we still have to provide this hack. Document a new device tree property to denote systems that do not have a proper hardware (or software) reset mechanism, and apply the hack (with a loud warning) only in this case. Signed-off-by: Brian Norris Reviewed-by: Guenter Roeck Signed-off-by: Boris Brezillon --- include/linux/mtd/spi-nor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index e60da0d34cc1..c922e97f205a 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -235,6 +235,7 @@ enum spi_nor_option_flags { SNOR_F_S3AN_ADDR_DEFAULT = BIT(3), SNOR_F_READY_XSR_RDY = BIT(4), SNOR_F_USE_CLSR = BIT(5), + SNOR_F_BROKEN_RESET = BIT(6), }; /** -- cgit From 3d3fe3c05d5a17ebdf55b936c51017c127c0ed44 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jul 2018 15:31:52 +0200 Subject: mtd: rawnand: allocate dynamically ONFI parameters during detection Now that it is possible to do dynamic allocations during the identification phase, convert the onfi_params structure (which is only needed with ONFI compliant chips) into a pointer that will be allocated only if needed. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 099fa166569a..efb2345359bb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -482,7 +482,7 @@ struct nand_parameters { DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); /* ONFI parameters */ - struct onfi_params onfi; + struct onfi_params *onfi; }; /* The maximum expected count of bytes in the NAND ID sequence */ @@ -1618,10 +1618,10 @@ struct platform_nand_data { /* return the supported asynchronous timing mode. */ static inline int onfi_get_async_timing_mode(struct nand_chip *chip) { - if (!chip->parameters.onfi.version) + if (!chip->parameters.onfi) return ONFI_TIMING_MODE_UNKNOWN; - return chip->parameters.onfi.async_timing_mode; + return chip->parameters.onfi->async_timing_mode; } int onfi_fill_data_interface(struct nand_chip *chip, -- cgit From 304d34360b099020a12af2abb7e1ac506f4ba16d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 28 Jul 2018 10:19:13 +0200 Subject: spi: add flags parameter to txrx_word function pointers Add the capability to specify the flag parameter used in bitbang_txrx_be_cpha{0,1} through the txrx_word function pointers of spi_bitbang data structure. That feature will be used to add spi-3wire support to the spi-gpio controller Signed-off-by: Lorenzo Bianconi Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index c1e61641a7f1..5847f00ef9cf 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -30,7 +30,7 @@ struct spi_bitbang { /* txrx_word[SPI_MODE_*]() just looks like a shift register */ u32 (*txrx_word[4])(struct spi_device *spi, unsigned nsecs, - u32 word, u8 bits); + u32 word, u8 bits, unsigned flags); }; /* you can call these default bitbang->master methods from your custom -- cgit From 4b859db2c60692560afbfef1b030d0ddef57b7ee Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 28 Jul 2018 10:19:14 +0200 Subject: spi: spi-gpio: add SPI_3WIRE support Add SPI_3WIRE support to spi-gpio controller introducing set_line_direction function pointer in spi_bitbang data structure. Spi-gpio controller has been tested using hts221 temp/rh iio sensor running in 3wire mode and lsm6dsm running in 4wire mode Signed-off-by: Lorenzo Bianconi Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 5847f00ef9cf..b7e021b274dc 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -31,6 +31,7 @@ struct spi_bitbang { u32 (*txrx_word[4])(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags); + int (*set_line_direction)(struct spi_device *spi, bool output); }; /* you can call these default bitbang->master methods from your custom -- cgit From ba113c3aa79a7f941ac162d05a3620bdc985c58d Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 31 Jul 2018 17:46:21 -0700 Subject: tcp: add data bytes sent stats Introduce a new TCP stat to record the number of bytes sent (RFC4898 tcpEStatsPerfHCDataOctetsOut) and expose it in both tcp_info (TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS). Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 58a8d7d71354..d0798dcd2cab 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -181,6 +181,9 @@ struct tcp_sock { u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut * total number of data segments sent. */ + u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut + * total number of data bytes sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. -- cgit From fb31c9b9f6c85b1bad569ecedbde78d9e37cd87b Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 31 Jul 2018 17:46:22 -0700 Subject: tcp: add data bytes retransmitted stats Introduce a new TCP stat to record the number of bytes retransmitted (RFC4898 tcpEStatsPerfOctetsRetrans) and expose it in both tcp_info (TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS). Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d0798dcd2cab..fb67f9a51b95 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -333,6 +333,9 @@ struct tcp_sock { * the first SYN. */ u32 undo_marker; /* snd_una upon a new recovery episode. */ int undo_retrans; /* number of undoable retransmissions. */ + u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans + * Total data bytes retransmitted + */ u32 total_retrans; /* Total retransmits for entire connection */ u32 urg_seq; /* Seq of received urgent pointer */ -- cgit From 7e10b6554ff2ce7f86d5d3eec3af5db8db482caa Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 31 Jul 2018 17:46:23 -0700 Subject: tcp: add dsack blocks received stats Introduce a new TCP stat to record the number of DSACK blocks received (RFC4989 tcpEStatsStackDSACKDups) and expose it in both tcp_info (TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS). Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fb67f9a51b95..da6281c549a5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -188,6 +188,9 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ -- cgit From 7ec65372ca534217b53fd208500cf7aac223a383 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 31 Jul 2018 17:46:24 -0700 Subject: tcp: add stat of data packet reordering events Introduce a new TCP stats to record the number of reordering events seen and expose it in both tcp_info (TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS). Application can use this stats to track the frequency of the reordering events in addition to the existing reordering stats which tracks the magnitude of the latest reordering event. Note: this new stats tracks reordering events triggered by ACKs, which could often be fewer than the actual number of packets being delivered out-of-order. Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index da6281c549a5..263e37271afd 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -220,8 +220,7 @@ struct tcp_sock { #define TCP_RACK_RECOVERY_THRESH 16 u8 reo_wnd_persist:5, /* No. of recovery since last adj */ dsack_seen:1, /* Whether DSACK seen after last adj */ - advanced:1, /* mstamp advanced since last lost marking */ - reord:1; /* reordering detected */ + advanced:1; /* mstamp advanced since last lost marking */ } rack; u16 advmss; /* Advertised MSS */ u8 compressed_ack; @@ -267,6 +266,7 @@ struct tcp_sock { u8 ecn_flags; /* ECN status bits. */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 reordering; /* Packet reordering metric. */ + u32 reord_seen; /* number of data packet reordering events */ u32 snd_up; /* Urgent pointer */ /* -- cgit From c971e6a006175bd0f195c6346c4e8bc4089bec00 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 May 2018 18:27:19 -0400 Subject: kill d_instantiate_no_diralias() The only user is fuse_create_new_entry(), and there it's used to mitigate the same mkdir/open-by-handle race as in nfs_mkdir(). The same solution applies - unhash the mkdir argument, then call d_splice_alias() and if that returns a reference to preexisting alias, dput() and report success. ->mkdir() argument left unhashed negative with the preexisting alias moved in the right place is just fine from the ->mkdir() callers point of view. Cc: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/dcache.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 66c6e17e61e5..0b83629a3d8f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -227,7 +227,6 @@ extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate_new(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); -extern int d_instantiate_no_diralias(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); -- cgit From 06bcb5168d7d49aa3ed449ff13a6d13c30afc3f0 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Thu, 2 Aug 2018 14:53:52 +0200 Subject: spi: spi-mem: Fix a typo in the documentation of struct spi_mem Fix a typo in the @drvpriv description. Signed-off-by: Frieder Schrempf Acked-by: Boris Brezillon Signed-off-by: Mark Brown --- include/linux/spi/spi-mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index bb4bd15ae1f6..951a2e949d5f 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -122,7 +122,7 @@ struct spi_mem_op { /** * struct spi_mem - describes a SPI memory device * @spi: the underlying SPI device - * @drvpriv: spi_mem_drviver private data + * @drvpriv: spi_mem_driver private data * * Extra information that describe the SPI memory device and may be needed by * the controller to properly handle this device should be placed here. -- cgit From 5d27a9c8ea9e967d00b92a76d4bb242bf7692f2b Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Thu, 2 Aug 2018 14:53:53 +0200 Subject: spi: spi-mem: Extend the SPI mem interface to set a custom memory name When porting (Q)SPI controller drivers from the MTD layer to the SPI layer, the naming scheme for the memory devices changes. To be able to keep compatibility with the old drivers naming scheme, a name field is added to struct spi_mem and a hook is added to let controller drivers set a custom name for the memory device. Example for the FSL QSPI driver: Name with the old driver: 21e0000.qspi, or with multiple devices: 21e0000.qspi-0, 21e0000.qspi-1, ... Name with the new driver without spi_mem_get_name: spi4.0 Suggested-by: Boris Brezillon Signed-off-by: Frieder Schrempf Reviewed-by: Boris Brezillon Signed-off-by: Mark Brown --- include/linux/spi/spi-mem.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 951a2e949d5f..0d64ccc4e584 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -123,6 +123,7 @@ struct spi_mem_op { * struct spi_mem - describes a SPI memory device * @spi: the underlying SPI device * @drvpriv: spi_mem_driver private data + * @name: name of the SPI memory device * * Extra information that describe the SPI memory device and may be needed by * the controller to properly handle this device should be placed here. @@ -133,6 +134,7 @@ struct spi_mem_op { struct spi_mem { struct spi_device *spi; void *drvpriv; + char *name; }; /** @@ -165,6 +167,13 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem) * limitations) * @supports_op: check if an operation is supported by the controller * @exec_op: execute a SPI memory operation + * @get_name: get a custom name for the SPI mem device from the controller. + * This might be needed if the controller driver has been ported + * to use the SPI mem layer and a custom name is used to keep + * mtdparts compatible. + * Note that if the implementation of this function allocates memory + * dynamically, then it should do so with devm_xxx(), as we don't + * have a ->free_name() function. * * This interface should be implemented by SPI controllers providing an * high-level interface to execute SPI memory operation, which is usually the @@ -176,6 +185,7 @@ struct spi_controller_mem_ops { const struct spi_mem_op *op); int (*exec_op)(struct spi_mem *mem, const struct spi_mem_op *op); + const char *(*get_name)(struct spi_mem *mem); }; /** @@ -234,6 +244,8 @@ bool spi_mem_supports_op(struct spi_mem *mem, int spi_mem_exec_op(struct spi_mem *mem, const struct spi_mem_op *op); +const char *spi_mem_get_name(struct spi_mem *mem); + int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv, struct module *owner); -- cgit From a83c0ea79d8b5705d09a39c73224332f9170a903 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 30 Jun 2018 17:54:59 +0300 Subject: docs/mm: bootmem: add kernel-doc description of 'struct bootmem_data' Signed-off-by: Mike Rapoport Signed-off-by: Jonathan Corbet --- include/linux/bootmem.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 7942a96b1a9d..42515195d7d8 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -27,9 +27,20 @@ extern unsigned long max_pfn; extern unsigned long long max_possible_pfn; #ifndef CONFIG_NO_BOOTMEM -/* - * node_bootmem_map is a map pointer - the bits represent all physical - * memory pages (including holes) on the node. +/** + * struct bootmem_data - per-node information used by the bootmem allocator + * @node_min_pfn: the starting physical address of the node's memory + * @node_low_pfn: the end physical address of the directly addressable memory + * @node_bootmem_map: is a bitmap pointer - the bits represent all physical + * memory pages (including holes) on the node. + * @last_end_off: the offset within the page of the end of the last allocation; + * if 0, the page used is full + * @hint_idx: the PFN of the page used with the last allocation; + * together with using this with the @last_end_offset field, + * a test can be made to see if allocations can be merged + * with the page used for the last allocation rather than + * using up a full new page. + * @list: list entry in the linked list ordered by the memory addresses */ typedef struct bootmem_data { unsigned long node_min_pfn; -- cgit From e1720fee27246dfb84f7c433e35367170a2d4436 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 30 Jun 2018 17:55:01 +0300 Subject: mm/memblock: add a name for memblock flags enumeration Since kernel-doc does not like anonymous enums the name is required for adding documentation. While on it, I've also updated all the function declarations to use 'enum memblock_flags' instead of unsigned long. Signed-off-by: Mike Rapoport Signed-off-by: Jonathan Corbet --- include/linux/memblock.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ca59883c8364..8b8fbceffd4d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -21,7 +21,7 @@ #define INIT_PHYSMEM_REGIONS 4 /* Definition of memblock flags. */ -enum { +enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ MEMBLOCK_MIRROR = 0x2, /* mirrored region */ @@ -31,7 +31,7 @@ enum { struct memblock_region { phys_addr_t base; phys_addr_t size; - unsigned long flags; + enum memblock_flags flags; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int nid; #endif @@ -72,7 +72,7 @@ void memblock_discard(void); phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, - int nid, ulong flags); + int nid, enum memblock_flags flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); void memblock_allow_resize(void); @@ -89,19 +89,19 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); -ulong choose_memblock_flags(void); +enum memblock_flags choose_memblock_flags(void); /* Low level functions */ int memblock_add_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, - int nid, unsigned long flags); + int nid, enum memblock_flags flags); -void __next_mem_range(u64 *idx, int nid, ulong flags, +void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_a, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); -void __next_mem_range_rev(u64 *idx, int nid, ulong flags, +void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_a, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); @@ -253,13 +253,13 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, NUMA_NO_NODE, MEMBLOCK_NONE, p_start, p_end, NULL) static inline void memblock_set_region_flags(struct memblock_region *r, - unsigned long flags) + enum memblock_flags flags) { r->flags |= flags; } static inline void memblock_clear_region_flags(struct memblock_region *r, - unsigned long flags) + enum memblock_flags flags) { r->flags &= ~flags; } @@ -317,10 +317,10 @@ static inline bool memblock_bottom_up(void) phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, - ulong flags); + enum memblock_flags flags); phys_addr_t memblock_alloc_base_nid(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr, - int nid, ulong flags); + int nid, enum memblock_flags flags); phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, -- cgit From 47cec4432ab06c4ba90c241b142a016f878ac6da Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 30 Jun 2018 17:55:02 +0300 Subject: docs/mm: memblock: update kernel-doc comments * make memblock_discard description kernel-doc compatible * add brief description for memblock_setclr_flag and describe its parameters * fixup return value descriptions Signed-off-by: Mike Rapoport Signed-off-by: Jonathan Corbet --- include/linux/memblock.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8b8fbceffd4d..63704c64583a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -239,7 +239,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, /** * for_each_resv_unavail_range - iterate through reserved and unavailable memory * @i: u64 used as loop variable - * @flags: pick from blocks based on memory attributes * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @@ -367,8 +366,10 @@ phys_addr_t memblock_get_current_limit(void); */ /** - * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region + * memblock_region_memory_base_pfn - get the lowest pfn of the memory region * @reg: memblock_region structure + * + * Return: the lowest pfn intersecting with the memory region */ static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg) { @@ -376,8 +377,10 @@ static inline unsigned long memblock_region_memory_base_pfn(const struct membloc } /** - * memblock_region_memory_end_pfn - Return the end_pfn this region + * memblock_region_memory_end_pfn - get the end pfn of the memory region * @reg: memblock_region structure + * + * Return: the end_pfn of the reserved region */ static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg) { @@ -385,8 +388,10 @@ static inline unsigned long memblock_region_memory_end_pfn(const struct memblock } /** - * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region + * memblock_region_reserved_base_pfn - get the lowest pfn of the reserved region * @reg: memblock_region structure + * + * Return: the lowest pfn intersecting with the reserved region */ static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg) { @@ -394,8 +399,10 @@ static inline unsigned long memblock_region_reserved_base_pfn(const struct membl } /** - * memblock_region_reserved_end_pfn - Return the end_pfn this region + * memblock_region_reserved_end_pfn - get the end pfn of the reserved region * @reg: memblock_region structure + * + * Return: the end_pfn of the reserved region */ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg) { -- cgit From 9a0de1bfe191220cb0437865261ab2f95cbb13ef Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 30 Jun 2018 17:55:04 +0300 Subject: docs/mm: memblock: add kernel-doc description for memblock types Signed-off-by: Mike Rapoport Signed-off-by: Jonathan Corbet --- include/linux/memblock.h | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 63704c64583a..516920549378 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -20,7 +20,13 @@ #define INIT_MEMBLOCK_REGIONS 128 #define INIT_PHYSMEM_REGIONS 4 -/* Definition of memblock flags. */ +/** + * enum memblock_flags - definition of memory region attributes + * @MEMBLOCK_NONE: no special request + * @MEMBLOCK_HOTPLUG: hotpluggable region + * @MEMBLOCK_MIRROR: mirrored region + * @MEMBLOCK_NOMAP: don't add to kernel direct mapping + */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ @@ -28,6 +34,13 @@ enum memblock_flags { MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ }; +/** + * struct memblock_region - represents a memory region + * @base: physical address of the region + * @size: size of the region + * @flags: memory region attributes + * @nid: NUMA node id + */ struct memblock_region { phys_addr_t base; phys_addr_t size; @@ -37,14 +50,30 @@ struct memblock_region { #endif }; +/** + * struct memblock_type - collection of memory regions of certain type + * @cnt: number of regions + * @max: size of the allocated array + * @total_size: size of all regions + * @regions: array of regions + * @name: the memory type symbolic name + */ struct memblock_type { - unsigned long cnt; /* number of regions */ - unsigned long max; /* size of the allocated array */ - phys_addr_t total_size; /* size of all regions */ + unsigned long cnt; + unsigned long max; + phys_addr_t total_size; struct memblock_region *regions; char *name; }; +/** + * struct memblock - memblock allocator metadata + * @bottom_up: is bottom up direction? + * @current_limit: physical address of the current allocation limit + * @memory: usabe memory regions + * @reserved: reserved memory regions + * @physmem: all physical memory + */ struct memblock { bool bottom_up; /* is bottom up direction? */ phys_addr_t current_limit; -- cgit From 2afc9166f79b8f6da5f347f48515215ceee4ae37 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 2 Aug 2018 10:51:40 -0700 Subject: scsi: sysfs: Introduce sysfs_{un,}break_active_protection() Introduce these two functions and export them such that the next patch can add calls to these functions from the SCSI core. Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Signed-off-by: Martin K. Petersen --- include/linux/sysfs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index b8bfdc173ec0..3c12198c0103 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -237,6 +237,9 @@ int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute **attr); int __must_check sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode); +struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr); +void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); @@ -350,6 +353,17 @@ static inline int sysfs_chmod_file(struct kobject *kobj, return 0; } +static inline struct kernfs_node * +sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr) +{ + return NULL; +} + +static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn) +{ +} + static inline void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) -- cgit From e7d0748dd71695b94f3a35c8bdc05226a7f3d919 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Aug 2018 15:22:13 -0600 Subject: block: Switch struct packet_command to use struct scsi_sense_hdr There is a lot of needless struct request_sense usage in the CDROM code. These can all be struct scsi_sense_hdr instead, to avoid any confusion over their respective structure sizes. This patch is a lot of noise changing "sense" to "sshdr", but the final code is more readable to distinguish between "sense" meaning "struct request_sense" and "sshdr" meaning "struct scsi_sense_hdr". Reviewed-by: Christoph Hellwig Signed-off-by: Kees Cook Signed-off-by: Jens Axboe --- include/linux/cdrom.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index e75dfd1f1dec..528271c60018 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -13,6 +13,7 @@ #include /* not really needed, later.. */ #include +#include #include struct packet_command @@ -21,7 +22,7 @@ struct packet_command unsigned char *buffer; unsigned int buflen; int stat; - struct request_sense *sense; + struct scsi_sense_hdr *sshdr; unsigned char data_direction; int quiet; int timeout; -- cgit From 9a47249d444d344051c7c0e909fad0e88515a5c2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 31 Jul 2018 21:11:00 +0200 Subject: random: Make crng state queryable It is very useful to be able to know whether or not get_random_bytes_wait / wait_for_random_bytes is going to block or not, or whether plain get_random_bytes is going to return good randomness or bad randomness. The particular use case is for mitigating certain attacks in WireGuard. A handshake packet arrives and is queued up. Elsewhere a worker thread takes items from the queue and processes them. In replying to these items, it needs to use some random data, and it has to be good random data. If we simply block until we can have good randomness, then it's possible for an attacker to fill the queue up with packets waiting to be processed. Upon realizing the queue is full, WireGuard will detect that it's under a denial of service attack, and behave accordingly. A better approach is just to drop incoming handshake packets if the crng is not yet initialized. This patch, therefore, makes that information directly accessible. Signed-off-by: Jason A. Donenfeld Signed-off-by: Theodore Ts'o --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index f1c9bc5cd231..445a0ea4ff49 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -36,6 +36,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int wait_for_random_bytes(void); +extern bool rng_is_initialized(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern int __must_check get_random_bytes_arch(void *buf, int nbytes); -- cgit From 0a4c58f5702858822621fa1177c7d3475f181ccb Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 14:27:17 -0700 Subject: bpf: add ability to charge bpf maps memory dynamically This commits extends existing bpf maps memory charging API to support dynamic charging/uncharging. This is required to account memory used by maps, if all entries are created dynamically after the map initialization. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b5ad95cf339..5a4a256473c3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -435,6 +435,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); +int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); +void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); void *bpf_map_area_alloc(size_t size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); -- cgit From de9cbbaadba5adf88a19e46df61f7054000838f6 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 14:27:18 -0700 Subject: bpf: introduce cgroup storage maps This commit introduces BPF_MAP_TYPE_CGROUP_STORAGE maps: a special type of maps which are implementing the cgroup storage. >From the userspace point of view it's almost a generic hash map with the (cgroup inode id, attachment type) pair used as a key. The only difference is that some operations are restricted: 1) a user can't create new entries, 2) a user can't remove existing entries. The lookup from userspace is o(log(n)). Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/bpf.h | 1 + include/linux/bpf_types.h | 3 +++ 3 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d50c2f0a655a..7d00d58869ed 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -4,19 +4,39 @@ #include #include +#include #include struct sock; struct sockaddr; struct cgroup; struct sk_buff; +struct bpf_map; +struct bpf_prog; struct bpf_sock_ops_kern; +struct bpf_cgroup_storage; #ifdef CONFIG_CGROUP_BPF extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +struct bpf_cgroup_storage_map; + +struct bpf_storage_buffer { + struct rcu_head rcu; + char data[0]; +}; + +struct bpf_cgroup_storage { + struct bpf_storage_buffer *buf; + struct bpf_cgroup_storage_map *map; + struct bpf_cgroup_storage_key key; + struct list_head list; + struct rb_node node; + struct rcu_head rcu; +}; + struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; @@ -77,6 +97,15 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog); +void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); +void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, + struct cgroup *cgroup, + enum bpf_attach_type type); +void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); +int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map); +void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -221,6 +250,15 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } +static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, + struct bpf_map *map) { return 0; } +static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, + struct bpf_map *map) {} +static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( + struct bpf_prog *prog) { return 0; } +static inline void bpf_cgroup_storage_free( + struct bpf_cgroup_storage *storage) {} + #define cgroup_bpf_enabled (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5a4a256473c3..9d1e4727495e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -282,6 +282,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ + struct bpf_map *cgroup_storage; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY void *security; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c5700c2d5549..add08be53b6f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) #ifdef CONFIG_CGROUPS BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) -- cgit From aa0ad5b0391e268bdecf6cda31268388844f8afd Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 14:27:19 -0700 Subject: bpf: pass a pointer to a cgroup storage using pcpu variable This commit introduces the bpf_cgroup_storage_set() helper, which will be used to pass a pointer to a cgroup storage to the bpf helper. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 7d00d58869ed..9a144ddbbc8f 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -21,6 +22,8 @@ struct bpf_cgroup_storage; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +DECLARE_PER_CPU(void*, bpf_cgroup_storage); + struct bpf_cgroup_storage_map; struct bpf_storage_buffer { @@ -97,6 +100,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) +{ + struct bpf_storage_buffer *buf; + + if (!storage) + return; + + buf = READ_ONCE(storage->buf); + this_cpu_write(bpf_cgroup_storage, &buf->data[0]); +} + struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog); void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, @@ -250,6 +264,7 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map) { return 0; } static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, -- cgit From d7bf2c10af053191e931b58704cd862fccb7f9de Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 14:27:20 -0700 Subject: bpf: allocate cgroup storage entries on attaching bpf programs If a bpf program is using cgroup local storage, allocate a bpf_cgroup_storage structure automatically on attaching the program to a cgroup and save the pointer into the corresponding bpf_prog_list entry. Analogically, release the cgroup local storage on detaching of the bpf program. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9a144ddbbc8f..f91b0f8ff3a9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -43,6 +43,7 @@ struct bpf_cgroup_storage { struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; + struct bpf_cgroup_storage *storage; }; struct bpf_prog_array; -- cgit From 394e40a29788820c9c0526b1c3497c9e0ec2a126 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 14:27:21 -0700 Subject: bpf: extend bpf_prog_array to store pointers to the cgroup storage This patch converts bpf_prog_array from an array of prog pointers to the array of struct bpf_prog_array_item elements. This allows to save a cgroup storage pointer for each bpf program efficiently attached to a cgroup. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9d1e4727495e..16be67888c30 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -349,9 +349,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, * The 'struct bpf_prog_array *' should only be replaced with xchg() * since other cpus are walking the array of pointers in parallel. */ +struct bpf_prog_array_item { + struct bpf_prog *prog; + struct bpf_cgroup_storage *cgroup_storage; +}; + struct bpf_prog_array { struct rcu_head rcu; - struct bpf_prog *progs[0]; + struct bpf_prog_array_item items[0]; }; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); @@ -372,7 +377,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array_item *_item; \ + struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ preempt_disable(); \ @@ -380,10 +386,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ goto _out; \ - _prog = _array->progs; \ - while ((__prog = READ_ONCE(*_prog))) { \ - _ret &= func(__prog, ctx); \ - _prog++; \ + _item = &_array->items[0]; \ + while ((_prog = READ_ONCE(_item->prog))) { \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ + _ret &= func(_prog, ctx); \ + _item++; \ } \ _out: \ rcu_read_unlock(); \ -- cgit From 3e6a4b3e0289dc9540a2c1d8a20657f4707fbabb Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 14:27:22 -0700 Subject: bpf/verifier: introduce BPF_PTR_TO_MAP_VALUE BPF_MAP_TYPE_CGROUP_STORAGE maps are special in a way that the access from the bpf program side is lookup-free. That means the result is guaranteed to be a valid pointer to the cgroup storage; no NULL-check is required. This patch introduces BPF_PTR_TO_MAP_VALUE return type, which is required to cause the verifier accept programs, which are not checking the map value pointer for being NULL. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 16be67888c30..ca4ac2a39def 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -155,6 +155,7 @@ enum bpf_arg_type { enum bpf_return_type { RET_INTEGER, /* function returns integer */ RET_VOID, /* function doesn't return anything */ + RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ }; -- cgit From cd3394317653837e2eb5c5d0904a8996102af9fc Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 14:27:24 -0700 Subject: bpf: introduce the bpf_get_local_storage() helper function The bpf_get_local_storage() helper function is used to get a pointer to the bpf local storage from a bpf program. It takes a pointer to a storage map and flags as arguments. Right now it accepts only cgroup storage maps, and flags argument has to be 0. Further it can be extended to support other types of local storage: e.g. thread local storage etc. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ca4ac2a39def..cd8790d2c6ed 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -788,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_get_local_storage_proto; + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -- cgit From 401c0d7712eb3189023efc9c0708a2ac984ed62e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 3 Aug 2018 11:50:39 +0200 Subject: spi: spi-mem: Constify spi_mem->name There is no reason to make spi_mem->name modifiable. Moreover, spi_mem_ops->get_name() returns a const char *, which generates a gcc warning when assigning the value returned by spi_mem_ops->get_name() to spi_mem->name. Fixes: 5d27a9c8ea9e ("spi: spi-mem: Extend the SPI mem interface to set a custom memory name") Reported-by: Stephen Rothwell Signed-off-by: Boris Brezillon Signed-off-by: Mark Brown --- include/linux/spi/spi-mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 0d64ccc4e584..62722fb7472d 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -134,7 +134,7 @@ struct spi_mem_op { struct spi_mem { struct spi_device *spi; void *drvpriv; - char *name; + const char *name; }; /** -- cgit From 2ad5157650b446f14a719280ba3670303bc4f439 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 11 Jul 2018 18:42:11 -0500 Subject: mailbox/omap: switch to SPDX license identifier Use the appropriate SPDX license identifier in the OMAP Mailbox driver source files and drop the previous boilerplate license text. Signed-off-by: Suman Anna Signed-off-by: Jassi Brar --- include/linux/omap-mailbox.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-mailbox.h b/include/linux/omap-mailbox.h index c726bd833761..6dbcd2da0332 100644 --- a/include/linux/omap-mailbox.h +++ b/include/linux/omap-mailbox.h @@ -1,9 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * omap-mailbox: interprocessor communication module for OMAP - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef OMAP_MAILBOX_H -- cgit From 623a6143a845bd485b00ba684f0ccef11835edab Mon Sep 17 00:00:00 2001 From: Houlong Wei Date: Wed, 25 Jul 2018 09:26:40 +0800 Subject: mailbox: mediatek: Add Mediatek CMDQ driver This patch is first version of Mediatek Command Queue(CMDQ) driver. The CMDQ is used to help write registers with critical time limitation, such as updating display configuration during the vblank. It controls Global Command Engine (GCE) hardware to achieve this requirement. Currently, CMDQ only supports display related hardwares, but we expect it can be extended to other hardwares for future requirements. Signed-off-by: Houlong Wei Signed-off-by: HS Liao Signed-off-by: CK Hu Signed-off-by: Jassi Brar --- include/linux/mailbox/mtk-cmdq-mailbox.h | 77 ++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 include/linux/mailbox/mtk-cmdq-mailbox.h (limited to 'include/linux') diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h new file mode 100644 index 000000000000..ccb73422c2fa --- /dev/null +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018 MediaTek Inc. + * + */ + +#ifndef __MTK_CMDQ_MAILBOX_H__ +#define __MTK_CMDQ_MAILBOX_H__ + +#include +#include +#include + +#define CMDQ_INST_SIZE 8 /* instruction is 64-bit */ +#define CMDQ_SUBSYS_SHIFT 16 +#define CMDQ_OP_CODE_SHIFT 24 +#define CMDQ_JUMP_PASS CMDQ_INST_SIZE + +#define CMDQ_WFE_UPDATE BIT(31) +#define CMDQ_WFE_WAIT BIT(15) +#define CMDQ_WFE_WAIT_VALUE 0x1 + +/* + * CMDQ_CODE_MASK: + * set write mask + * format: op mask + * CMDQ_CODE_WRITE: + * write value into target register + * format: op subsys address value + * CMDQ_CODE_JUMP: + * jump by offset + * format: op offset + * CMDQ_CODE_WFE: + * wait for event and clear + * it is just clear if no wait + * format: [wait] op event update:1 to_wait:1 wait:1 + * [clear] op event update:1 to_wait:0 wait:0 + * CMDQ_CODE_EOC: + * end of command + * format: op irq_flag + */ +enum cmdq_code { + CMDQ_CODE_MASK = 0x02, + CMDQ_CODE_WRITE = 0x04, + CMDQ_CODE_JUMP = 0x10, + CMDQ_CODE_WFE = 0x20, + CMDQ_CODE_EOC = 0x40, +}; + +enum cmdq_cb_status { + CMDQ_CB_NORMAL = 0, + CMDQ_CB_ERROR +}; + +struct cmdq_cb_data { + enum cmdq_cb_status sta; + void *data; +}; + +typedef void (*cmdq_async_flush_cb)(struct cmdq_cb_data data); + +struct cmdq_task_cb { + cmdq_async_flush_cb cb; + void *data; +}; + +struct cmdq_pkt { + void *va_base; + dma_addr_t pa_base; + size_t cmd_buf_size; /* command occupied size */ + size_t buf_size; /* real buffer size */ + struct cmdq_task_cb cb; + struct cmdq_task_cb async_cb; + void *cl; +}; + +#endif /* __MTK_CMDQ_MAILBOX_H__ */ -- cgit From 7cca1ed0bb248b8d5768d17f5afe297a832d66c0 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 31 Jul 2018 20:25:00 +0200 Subject: netfilter: nf_osf: move nf_osf_fingers to non-uapi header file All warnings (new ones prefixed by >>): >> ./usr/include/linux/netfilter/nf_osf.h:73: userspace cannot reference function or variable defined in the kernel Fixes: f9324952088f ("netfilter: nfnetlink_osf: extract nfnetlink_subsystem code from xt_osf.c") Reported-by: kbuild test robot Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_osf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h index aee460fcbd31..3e455d6f94d5 100644 --- a/include/linux/netfilter/nf_osf.h +++ b/include/linux/netfilter/nf_osf.h @@ -25,6 +25,8 @@ enum osf_fmatch_states { FMATCH_OPT_WRONG, }; +extern struct list_head nf_osf_fingers[2]; + struct nf_osf_finger { struct rcu_head rcu_head; struct list_head finger_entry; -- cgit From ddba40be59c9be4059288464f8e6f38fbba27495 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 31 Jul 2018 20:25:01 +0200 Subject: netfilter: nfnetlink_osf: rename nf_osf header file to nfnetlink_osf The first client of the nf_osf.h userspace header is nft_osf, coming in this batch, rename it to nfnetlink_osf.h as there are no userspace clients for this yet, hence this looks consistent with other nfnetlink subsystem. Suggested-by: Jan Engelhardt Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_osf.h | 44 --------------------------------- include/linux/netfilter/nfnetlink_osf.h | 44 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 44 deletions(-) delete mode 100644 include/linux/netfilter/nf_osf.h create mode 100644 include/linux/netfilter/nfnetlink_osf.h (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h deleted file mode 100644 index 3e455d6f94d5..000000000000 --- a/include/linux/netfilter/nf_osf.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFOSF_H -#define _NFOSF_H - -#include - -/* Initial window size option state machine: multiple of mss, mtu or - * plain numeric value. Can also be made as plain numeric value which - * is not a multiple of specified value. - */ -enum nf_osf_window_size_options { - OSF_WSS_PLAIN = 0, - OSF_WSS_MSS, - OSF_WSS_MTU, - OSF_WSS_MODULO, - OSF_WSS_MAX, -}; - -enum osf_fmatch_states { - /* Packet does not match the fingerprint */ - FMATCH_WRONG = 0, - /* Packet matches the fingerprint */ - FMATCH_OK, - /* Options do not match the fingerprint, but header does */ - FMATCH_OPT_WRONG, -}; - -extern struct list_head nf_osf_fingers[2]; - -struct nf_osf_finger { - struct rcu_head rcu_head; - struct list_head finger_entry; - struct nf_osf_user_finger finger; -}; - -bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, - int hooknum, struct net_device *in, struct net_device *out, - const struct nf_osf_info *info, struct net *net, - const struct list_head *nf_osf_fingers); - -const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers); - -#endif /* _NFOSF_H */ diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h new file mode 100644 index 000000000000..a7311bc03d3a --- /dev/null +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NFOSF_H +#define _NFOSF_H + +#include + +/* Initial window size option state machine: multiple of mss, mtu or + * plain numeric value. Can also be made as plain numeric value which + * is not a multiple of specified value. + */ +enum nf_osf_window_size_options { + OSF_WSS_PLAIN = 0, + OSF_WSS_MSS, + OSF_WSS_MTU, + OSF_WSS_MODULO, + OSF_WSS_MAX, +}; + +enum osf_fmatch_states { + /* Packet does not match the fingerprint */ + FMATCH_WRONG = 0, + /* Packet matches the fingerprint */ + FMATCH_OK, + /* Options do not match the fingerprint, but header does */ + FMATCH_OPT_WRONG, +}; + +extern struct list_head nf_osf_fingers[2]; + +struct nf_osf_finger { + struct rcu_head rcu_head; + struct list_head finger_entry; + struct nf_osf_user_finger finger; +}; + +bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, + int hooknum, struct net_device *in, struct net_device *out, + const struct nf_osf_info *info, struct net *net, + const struct list_head *nf_osf_fingers); + +const char *nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers); + +#endif /* _NFOSF_H */ -- cgit From 94276fa8a2a4c08ccb2e9d55e88b95dc972ccea3 Mon Sep 17 00:00:00 2001 From: Máté Eckl Date: Fri, 3 Aug 2018 13:36:13 +0200 Subject: netfilter: bridge: Expose nf_tables bridge hook priorities through uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Netfilter exposes standard hook priorities in case of ipv4, ipv6 and arp but not in case of bridge. This patch exposes the hook priority values of the bridge family (which are different from the formerly mentioned) via uapi so that they can be used by user-space applications just like the others. Signed-off-by: Máté Eckl Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index b671fdfd212b..fa0686500970 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -5,17 +5,6 @@ #include #include -enum nf_br_hook_priorities { - NF_BR_PRI_FIRST = INT_MIN, - NF_BR_PRI_NAT_DST_BRIDGED = -300, - NF_BR_PRI_FILTER_BRIDGED = -200, - NF_BR_PRI_BRNF = 0, - NF_BR_PRI_NAT_DST_OTHER = 100, - NF_BR_PRI_FILTER_OTHER = 200, - NF_BR_PRI_NAT_SRC = 300, - NF_BR_PRI_LAST = INT_MAX, -}; - #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); -- cgit From c2b6d621c4ffe9936adf7a55c8b1c769672c306f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Jun 2018 15:53:17 -0400 Subject: new primitive: discard_new_inode() We don't want open-by-handle picking half-set-up in-core struct inode from e.g. mkdir() having failed halfway through. In other words, we don't want such inodes returned by iget_locked() on their way to extinction. However, we can't just have them unhashed - otherwise open-by-handle immediately *after* that would've ended up creating a new in-core inode over the on-disk one that is in process of being freed right under us. Solution: new flag (I_CREATING) set by insert_inode_locked() and removed by unlock_new_inode() and a new primitive (discard_new_inode()) to be used by such halfway-through-setup failure exits instead of unlock_new_inode() / iput() combinations. That primitive unlocks new inode, but leaves I_CREATING in place. iget_locked() treats finding an I_CREATING inode as failure (-ESTALE, once we sort out the error propagation). insert_inode_locked() treats the same as instant -EBUSY. ilookup() treats those as icache miss. [Fix by Dan Carpenter folded in] Signed-off-by: Al Viro --- include/linux/fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..a42600565925 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * and work dirs among overlayfs mounts. * + * I_CREATING New object's inode in the middle of setting up. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_WB_SWITCH (1 << 13) -#define I_OVL_INUSE (1 << 14) +#define I_OVL_INUSE (1 << 14) +#define I_CREATING (1 << 15) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) @@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode); static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); +extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); -- cgit From 5bef915104f32c9d0bb5df6e86a98e31cb524e9a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Jun 2018 19:36:57 -0400 Subject: new helper: inode_fake_hash() open-coded in a quite a few places... Signed-off-by: Al Viro --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a42600565925..43941e230e2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -684,6 +684,17 @@ static inline int inode_unhashed(struct inode *inode) return hlist_unhashed(&inode->i_hash); } +/* + * __mark_inode_dirty expects inodes to be hashed. Since we don't + * want special inodes in the fileset inode space, we make them + * appear hashed, but do not put on any lists. hlist_del() + * will work fine and require no locking. + */ +static inline void inode_fake_hash(struct inode *inode) +{ + hlist_add_fake(&inode->i_hash); +} + /* * inode->i_mutex nesting subclasses for the lock validator: * -- cgit From 91305f2812624c0cf7ccbb44133b66d3b24676e4 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 1 Aug 2018 18:05:54 +0800 Subject: ptp_qoriq: support automatic configuration for ptp timer This patch is to support automatic configuration for ptp timer. If required ptp dts properties are not provided, driver could try to calculate a set of default configurations to initialize the ptp timer. This makes the driver work for many boards which don't have the required ptp dts properties in current kernel. Also the users could set dts properties by themselves according to their requirement. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/linux/fsl/ptp_qoriq.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index dc3dac40f069..c1f003aadcce 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -127,9 +127,13 @@ struct qoriq_ptp_registers { #define DRIVER "ptp_qoriq" -#define DEFAULT_CKSEL 1 #define N_EXT_TS 2 +#define DEFAULT_CKSEL 1 +#define DEFAULT_TMR_PRSC 2 +#define DEFAULT_FIPER1_PERIOD 1000000000 +#define DEFAULT_FIPER2_PERIOD 100000 + struct qoriq_ptp { void __iomem *base; struct qoriq_ptp_registers regs; -- cgit From 385114dec8a49b5e5945e77ba7de6356106713f4 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 2 Aug 2018 23:34:38 +0000 Subject: net: modify skb_rbtree_purge to return the truesize of all purged skbs. Tested: see the next patch is the series. Suggested-by: Eric Dumazet Signed-off-by: Peter Oskolkov Signed-off-by: Eric Dumazet Cc: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fd3cb1b247df..47848367c816 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2585,7 +2585,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -void skb_rbtree_purge(struct rb_root *root); +unsigned int skb_rbtree_purge(struct rb_root *root); void *netdev_alloc_frag(unsigned int fragsz); -- cgit From fa0f527358bd900ef92f925878ed6bfbd51305cc Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 2 Aug 2018 23:34:39 +0000 Subject: ip: use rb trees for IP frag queue. Similar to TCP OOO RX queue, it makes sense to use rb trees to store IP fragments, so that OOO fragments are inserted faster. Tested: - a follow-up patch contains a rather comprehensive ip defrag self-test (functional) - ran neper `udp_stream -c -H -F 100 -l 300 -T 20`: netstat --statistics Ip: 282078937 total packets received 0 forwarded 0 incoming packets discarded 946760 incoming packets delivered 18743456 requests sent out 101 fragments dropped after timeout 282077129 reassemblies required 944952 packets reassembled ok 262734239 packet reassembles failed (The numbers/stats above are somewhat better re: reassemblies vs a kernel without this patchset. More comprehensive performance testing TBD). Reported-by: Jann Horn Reported-by: Juha-Matti Tilli Suggested-by: Eric Dumazet Signed-off-by: Peter Oskolkov Signed-off-by: Eric Dumazet Cc: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 47848367c816..7ebdf158a795 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -676,13 +676,16 @@ struct sk_buff { * UDP receive path is one user. */ unsigned long dev_scratch; - int ip_defrag_offset; }; }; - struct rb_node rbnode; /* used in netem & tcp stack */ + struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ struct list_head list; }; - struct sock *sk; + + union { + struct sock *sk; + int ip_defrag_offset; + }; union { ktime_t tstamp; -- cgit From 55f2503c3b69328735e88031ff8d6ba291bd952b Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Tue, 31 Jul 2018 16:51:32 +0800 Subject: PM / reboot: Eliminate race between reboot and suspend At present, "systemctl suspend" and "shutdown" can run in parrallel. A system can suspend after devices_shutdown(), and resume. Then the shutdown task goes on to power off. This causes many devices are not really shut off. Hence replacing reboot_mutex with system_transition_mutex (renamed from pm_mutex) to achieve the exclusion. The renaming of pm_mutex as system_transition_mutex can be better to reflect the purpose of the mutex. Signed-off-by: Pingfan Liu Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 440b62f7502e..5a28ac9284f0 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -414,7 +414,7 @@ static inline bool hibernation_available(void) { return false; } #define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */ #define PM_POST_RESTORE 0x0006 /* Restore failed */ -extern struct mutex pm_mutex; +extern struct mutex system_transition_mutex; #ifdef CONFIG_PM_SLEEP void save_processor_state(void); -- cgit From bc2d8d262cba5736332cbc866acb11b1c5748aa9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Aug 2018 08:19:57 +0200 Subject: cpu/hotplug: Fix SMT supported evaluation Josh reported that the late SMT evaluation in cpu_smt_state_init() sets cpu_smt_control to CPU_SMT_NOT_SUPPORTED in case that 'nosmt' was supplied on the kernel command line as it cannot differentiate between SMT disabled by BIOS and SMT soft disable via 'nosmt'. That wreckages the state and makes the sysfs interface unusable. Rework this so that during bringup of the non boot CPUs the availability of SMT is determined in cpu_smt_allowed(). If a newly booted CPU is not a 'primary' thread then set the local cpu_smt_available marker and evaluate this explicitely right after the initial SMP bringup has finished. SMT evaulation on x86 is a trainwreck as the firmware has all the information _before_ booting the kernel, but there is no interface to query it. Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") Reported-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index ccdf3a67ce56..b216bd5bfd20 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -178,10 +178,12 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); +extern void cpu_smt_check_topology_early(void); extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_check_topology_early(void) { } static inline void cpu_smt_check_topology(void) { } #endif -- cgit From 6fdecfe32f903d9f5f35ee4464fd32ede470dcad Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Tue, 7 Aug 2018 10:02:44 -0700 Subject: net: phy: Add support for Broadcom Omega internal Combo GPHY Add support for the Broadcom Omega SoC internal Combo Ethernet GPHY to the bcm7xxx phy driver. Signed-off-by: Arun Parameswaran Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index daa9234a9baf..949e9af8d9d6 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -45,6 +45,7 @@ #define PHY_ID_BCM7445 0x600d8510 #define PHY_ID_BCM_CYGNUS 0xae025200 +#define PHY_ID_BCM_OMEGA 0xae025100 #define PHY_BCM_OUI_MASK 0xfffffc00 #define PHY_BCM_OUI_1 0x00206000 -- cgit From 8b92d0e3d400390660a26ef7f475524700fb86cf Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 8 Aug 2018 08:35:29 +0200 Subject: nvme.h: fixup ANA group descriptor format ANA Phase 3 draft had the 'reserved' field in the group descriptor format set to '23:17' (so that the first namespace identifier started at byte 24), but that got move with the approved TP to '31:17' (so that the first namespace identifier started at byte 32). Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 64c9175723de..a661861e9d56 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -446,7 +446,7 @@ struct nvme_ana_group_desc { __le32 nnsids; __le64 chgcnt; __u8 state; - __u8 rsvd17[7]; + __u8 rsvd17[15]; __le32 nsids[]; }; -- cgit From 93045d5942da60801e71764597d448cf37a798c1 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 7 Aug 2018 23:01:05 -0700 Subject: nvme.h: add support for ns write protect definitions Add various definitions from NVMe 1.3 TP 4005. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a661861e9d56..68e91ef5494c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -259,7 +259,7 @@ struct nvme_id_ctrl { __le16 awun; __le16 awupf; __u8 nvscc; - __u8 rsvd531; + __u8 nwpc; __le16 acwu; __u8 rsvd534[2]; __le32 sgls; @@ -320,7 +320,9 @@ struct nvme_id_ns { __u8 nvmcap[16]; __u8 rsvd64[28]; __le32 anagrpid; - __u8 rsvd96[8]; + __u8 rsvd96[3]; + __u8 nsattr; + __u8 rsvd100[4]; __u8 nguid[16]; __u8 eui64[8]; struct nvme_lbaf lbaf[16]; @@ -794,6 +796,7 @@ enum { NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, NVME_FEAT_RESV_PERSIST = 0x83, + NVME_FEAT_WRITE_PROTECT = 0x84, NVME_LOG_ERROR = 0x01, NVME_LOG_SMART = 0x02, NVME_LOG_FW_SLOT = 0x03, @@ -807,6 +810,14 @@ enum { NVME_FWACT_ACTV = (2 << 3), }; +/* NVMe Namespace Write Protect State */ +enum { + NVME_NS_NO_WRITE_PROTECT = 0, + NVME_NS_WRITE_PROTECT, + NVME_NS_WRITE_PROTECT_POWER_CYCLE, + NVME_NS_WRITE_PROTECT_PERMANENT, +}; + #define NVME_MAX_CHANGED_NAMESPACES 1024 struct nvme_identify { @@ -1153,6 +1164,8 @@ enum { NVME_SC_SGL_INVALID_OFFSET = 0x16, NVME_SC_SGL_INVALID_SUBTYPE = 0x17, + NVME_SC_NS_WRITE_PROTECTED = 0x20, + NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, NVME_SC_NS_NOT_READY = 0x82, -- cgit From 212dfd909ea8b630e5d6fa4d25aeec9c4b4b14a5 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 7 Aug 2018 22:06:52 +0200 Subject: netfilter: nfnetlink_osf: add missing enum in nfnetlink_osf uapi header xt_osf_window_size_options was originally part of include/uapi/linux/netfilter/xt_osf.h, restore it. Fixes: bfb15f2a95cb ("netfilter: extract Passive OS fingerprint infrastructure from xt_osf") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_osf.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h index a7311bc03d3a..ecf7dab81e9e 100644 --- a/include/linux/netfilter/nfnetlink_osf.h +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -4,18 +4,6 @@ #include -/* Initial window size option state machine: multiple of mss, mtu or - * plain numeric value. Can also be made as plain numeric value which - * is not a multiple of specified value. - */ -enum nf_osf_window_size_options { - OSF_WSS_PLAIN = 0, - OSF_WSS_MSS, - OSF_WSS_MTU, - OSF_WSS_MODULO, - OSF_WSS_MAX, -}; - enum osf_fmatch_states { /* Packet does not match the fingerprint */ FMATCH_WRONG = 0, -- cgit From 342ac8448f1fb213908656ae5581d0f37a5954e8 Mon Sep 17 00:00:00 2001 From: Denis Drozdov Date: Wed, 8 Aug 2018 16:23:48 -0700 Subject: net/mlx5: Use max_num_eqs for calculation of required MSIX vectors New firmware has defined new HCA capability field called "max_num_eqs", that is the number of available EQs after subtracting reserved FW EQs. Before this capability the FW reported the EQ number in "log_max_eqs", the reported value also contained FW reserved EQs, but the driver might be failing to load on 320 cpus systems due to the fact that FW reserved EQs were not available to the driver. Now the driver has to obtain max_num_eqs value from new FW to get real number of EQs available. Signed-off-by: Denis Drozdov Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 60c2308fe062..63e1ce4c4826 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1133,7 +1133,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 general_obj_types[0x40]; - u8 reserved_at_440[0x40]; + u8 reserved_at_440[0x20]; + + u8 reserved_at_460[0x10]; + u8 max_num_eqs[0x10]; u8 reserved_at_480[0x3]; u8 log_max_l2_table[0x5]; -- cgit From cc9c82a8668cf98748bfbaa83c5c092d5115c25b Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 8 Aug 2018 16:23:49 -0700 Subject: net/mlx5: Rename modify/query_vport state related enums Modify and query vport state commands share the same admin_state and op_mod values, rename the enums to fit them both. In addition, remove the esw prefix from the admin state enum as this also applied for vnic. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 6 +++--- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index d489494b0a84..11fa4e66afc5 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -946,9 +946,9 @@ enum { }; enum { - MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, - MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, - MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, + MLX5_VPORT_ADMIN_STATE_DOWN = 0x0, + MLX5_VPORT_ADMIN_STATE_UP = 0x1, + MLX5_VPORT_ADMIN_STATE_AUTO = 0x2, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 63e1ce4c4826..6ead9c1a5396 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3764,8 +3764,8 @@ struct mlx5_ifc_query_vport_state_out_bits { }; enum { - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT = 0x1, + MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT = 0x0, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT = 0x1, }; struct mlx5_ifc_query_vport_state_in_bits { -- cgit From 29d8ebd44de8999e292dbb4de76744d4a41039ec Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 8 Aug 2018 16:23:51 -0700 Subject: net/mlx5: Remove unused mlx5_query_vport_admin_state mlx5_query_vport_admin_state() is not used anywhere. Remove it. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/vport.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 9208cb8809ac..7e7c6dfcfb09 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -43,8 +43,6 @@ enum { }; u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); -u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, -- cgit From 74fe7b551f3385fa585d92616c85b3a575b2b2cb Mon Sep 17 00:00:00 2001 From: Crestez Dan Leonard Date: Tue, 7 Aug 2018 17:52:17 +0300 Subject: regmap: Add regmap_noinc_read API The regmap API usually assumes that bulk read operations will read a range of registers but some I2C/SPI devices have certain registers for which a such a read operation will return data from an internal FIFO instead. Add an explicit API to support bulk read without range semantics. Some linux drivers use regmap_bulk_read or regmap_raw_read for such registers, for example mpu6050 or bmi150 from IIO. This only happens to work because when caching is disabled a single regmap read op will map to a single bus read op (as desired). This breaks if caching is enabled and reg+1 happens to be a cacheable register. Without regmap support refactoring a driver to enable regmap caching requires separate I2C and SPI paths. This is exactly what regmap is supposed to help avoid. Suggested-by: Jonathan Cameron Signed-off-by: Crestez Dan Leonard Signed-off-by: Stefan Popa Signed-off-by: Mark Brown --- include/linux/regmap.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4f38068ffb71..19df946499d2 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -268,6 +268,13 @@ typedef void (*regmap_unlock)(void *); * field is NULL but precious_table (see below) is not, the * check is performed on such table (a register is precious if * it belongs to one of the ranges specified by precious_table). + * @readable_noinc_reg: Optional callback returning true if the register + * supports multiple read operations without incrementing + * the register number. If this field is NULL but + * rd_noinc_table (see below) is not, the check is + * performed on such table (a register is no increment + * readable if it belongs to one of the ranges specified + * by rd_noinc_table). * @disable_locking: This regmap is either protected by external means or * is guaranteed not be be accessed from multiple threads. * Don't use any locking mechanisms. @@ -295,6 +302,7 @@ typedef void (*regmap_unlock)(void *); * @rd_table: As above, for read access. * @volatile_table: As above, for volatile registers. * @precious_table: As above, for precious registers. + * @rd_noinc_table: As above, for no increment readable registers. * @reg_defaults: Power on reset values for registers (for use with * register cache support). * @num_reg_defaults: Number of elements in reg_defaults. @@ -344,6 +352,7 @@ struct regmap_config { bool (*readable_reg)(struct device *dev, unsigned int reg); bool (*volatile_reg)(struct device *dev, unsigned int reg); bool (*precious_reg)(struct device *dev, unsigned int reg); + bool (*readable_noinc_reg)(struct device *dev, unsigned int reg); bool disable_locking; regmap_lock lock; @@ -360,6 +369,7 @@ struct regmap_config { const struct regmap_access_table *rd_table; const struct regmap_access_table *volatile_table; const struct regmap_access_table *precious_table; + const struct regmap_access_table *rd_noinc_table; const struct reg_default *reg_defaults; unsigned int num_reg_defaults; enum regcache_type cache_type; @@ -946,6 +956,8 @@ int regmap_raw_write_async(struct regmap *map, unsigned int reg, int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val); int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); +int regmap_noinc_read(struct regmap *map, unsigned int reg, + void *val, size_t val_len); int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count); int regmap_update_bits_base(struct regmap *map, unsigned int reg, @@ -1196,6 +1208,13 @@ static inline int regmap_raw_read(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_noinc_read(struct regmap *map, unsigned int reg, + void *val, size_t val_len) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count) { -- cgit From b1f4267cc5448d20ae0c515a74141e74365e78a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Aug 2018 07:47:28 -0700 Subject: block: Remove two superfluous #include directives Commit 12f5b9314545 ("blk-mq: Remove generation seqeunce") removed the only seqcount_t and u64_stats_sync instances from but did not remove the corresponding #include directives. Since these include directives are no longer needed, remove them. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Keith Busch Cc: Ming Lei Cc: Jianchao Wang Cc: Hannes Reinecke , Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 050d599f5ea9..d6869e0e2b64 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,8 +27,6 @@ #include #include #include -#include -#include struct module; struct scsi_ioctl_command; -- cgit From 6bad9b210a228d2fe0e0efe26d9b115348529cee Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Aug 2018 07:53:36 -0700 Subject: blkcg: Introduce blkg_root_lookup() This new function will be used in a later patch to verify whether a queue has been dissociated from the cgroup controller before being released. Signed-off-by: Bart Van Assche Cc: Tejun Heo Cc: Christoph Hellwig Cc: Ming Lei Cc: Omar Sandoval Cc: Johannes Thumshirn Cc: Alexandru Moise <00moses.alexander00@gmail.com> Cc: Joseph Qi Cc: Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f7b910768306..1361cfc9b878 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -341,6 +341,23 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, return __blkg_lookup(blkcg, q, false); } +/** + * blkg_lookup - look up blkg for the specified request queue + * @q: request_queue of interest + * + * Lookup blkg for @q at the root level. See also blkg_lookup(). + */ +static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) +{ + struct blkcg_gq *blkg; + + rcu_read_lock(); + blkg = blkg_lookup(&blkcg_root, q); + rcu_read_unlock(); + + return blkg; +} + /** * blkg_to_pdata - get policy private data * @blkg: blkg of interest @@ -864,6 +881,7 @@ static inline bool blk_cgroup_congested(void) { return false; } static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } +static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) { return NULL; } static inline int blkcg_init_queue(struct request_queue *q) { return 0; } static inline void blkcg_drain_queue(struct request_queue *q) { } static inline void blkcg_exit_queue(struct request_queue *q) { } -- cgit From 209b43759d65b2cc99ce7757249aacc82b03c4e2 Mon Sep 17 00:00:00 2001 From: Michael Büsch Date: Tue, 31 Jul 2018 22:15:09 +0200 Subject: ssb: Remove SSB_WARN_ON, SSB_BUG_ON and SSB_DEBUG Use the standard WARN_ON instead. If a small kernel is desired, WARN_ON can be disabled globally. Also remove SSB_DEBUG. Besides WARN_ON it only adds a tiny debug check. Include this check unconditionally. Signed-off-by: Michael Buesch Signed-off-by: Kalle Valo --- include/linux/ssb/ssb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 3b43655cabe6..0d5a2691e7e9 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -499,11 +499,9 @@ struct ssb_bus { /* Internal-only stuff follows. Do not touch. */ struct list_head list; -#ifdef CONFIG_SSB_DEBUG /* Is the bus already powered up? */ bool powered_up; int power_warn_count; -#endif /* DEBUG */ }; enum ssb_quirks { -- cgit From 624c0f0239f04cce78c86afa95eb2841c84fbab1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Aug 2018 15:38:38 +0200 Subject: phylink: add helper for configuring 2500BaseX modes Add a helper for MAC drivers to use in their validate callback to deal with 2500BaseX vs 1000BaseX modes, where the hardware supports both but it is not possible to automatically select between them. This helper defaults to 1000BaseX, as that is the 802.3 standard, and will allow users to select 2500BaseX either by forcing the speed if AN is disabled, or by changing the advertising mask if AN is enabled. Disabling AN is not recommended as it is only the speed that we're interested in controlling, not the duplex or pause mode parameters. Signed-off-by: Russell King Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phylink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 50eeae025f1e..021fc6595856 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -234,5 +234,6 @@ int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); +void phylink_helper_basex_speed(struct phylink_link_state *state); #endif -- cgit From 48ae5554a076c1bca31448d60263e4038def9f6f Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 8 Aug 2018 09:04:29 +0100 Subject: net: stmmac: Add XGMAC 2.10 HWIF entry Add a new entry to HWIF table for XGMAC 2.10. For now we fill it with empty callbacks which will be added in posterior patches. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 32feac5bbd75..c43e9a01b892 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -190,5 +190,6 @@ struct plat_stmmacenet_data { bool tso_en; int mac_port_sel_speed; bool en_tx_lpi_clockgating; + int has_xgmac; }; #endif -- cgit From 2d2917f7747805a1f4188672f308d82a8ba01700 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 9 Aug 2018 14:04:14 -0600 Subject: PCI: Export pcie_has_flr() pcie_flr() suggests pcie_has_flr() to ensure that PCIe FLR support is present prior to calling. pcie_flr() is exported while pcie_has_flr() is not. Resolve this. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 307b336496f6..bace761deff2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1090,6 +1090,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); void pcie_print_link_status(struct pci_dev *dev); +bool pcie_has_flr(struct pci_dev *dev); int pcie_flr(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); -- cgit From 5e7baf0fcb2a3aef7329f3c7543d4695a46bd321 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 9 Aug 2018 11:13:49 -0700 Subject: qed/qede: Multi CoS support. This patch adds support for tc mqprio offload, using this different traffic classes on the adapter can be utilized based on configured priority to tc map. For example - tc qdisc add dev eth0 root mqprio num_tc 4 map 0 1 2 3 This will cause SKBs with priority 0,1,2,3 to transmit over tc 0,1,2,3 hardware queues respectively. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 2978fa4add42..a1310482c4ed 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -39,6 +39,10 @@ #include #include +/* 64 max queues * (1 rx + 4 tx-cos + 1 xdp) */ +#define QED_MIN_L2_CONS (2 + NUM_PHYS_TCS_4PORT_K2) +#define QED_MAX_L2_CONS (64 * (QED_MIN_L2_CONS)) + struct qed_queue_start_common_params { /* Should always be relative to entity sending this. */ u8 vport_id; @@ -49,6 +53,8 @@ struct qed_queue_start_common_params { struct qed_sb_info *p_sb; u8 sb_idx; + + u8 tc; }; struct qed_rxq_start_ret_params { -- cgit From 15693fd37fc3a49da356164ed15b571c175efc34 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 8 Aug 2018 19:40:31 +0800 Subject: net: skbuff.h: fix using plain integer as NULL warning Fixes the following sparse warning: ./include/linux/skbuff.h:2365:58: warning: Using plain integer as NULL pointer Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ebdf158a795..7e237a63a70c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2362,7 +2362,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; - if (skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0)) + if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); else skb_set_transport_header(skb, offset_hint); -- cgit From bd2e9567db72e37f7f4b90faa5133bc7365b5f65 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 9 Aug 2018 16:19:52 -0500 Subject: PCI: Hide ACS quirk declarations inside PCI core Move declarations for these functions: pci_dev_specific_acs_enabled() pci_dev_specific_enable_acs() from include/linux/pci.h to drivers/pci/pci.h because nothing outside the PCI core needs to use them. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 340029b2fb38..1e1ed049dd1c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1876,20 +1876,9 @@ enum pci_fixup_pass { #ifdef CONFIG_PCI_QUIRKS void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); -int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); -int pci_dev_specific_enable_acs(struct pci_dev *dev); #else static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } -static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev, - u16 acs_flags) -{ - return -ENOTTY; -} -static inline int pci_dev_specific_enable_acs(struct pci_dev *dev) -{ - return -ENOTTY; -} #endif void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); -- cgit From b0768a86585d4d951a30ff565f19598dbbd67897 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 3 Aug 2018 16:58:09 +0900 Subject: net: Export skb_headers_offset_update This is needed for veth XDP which does skb_copy_expand()-like operation. v2: - Drop skb_copy_header part because it has already been exported now. Signed-off-by: Toshiaki Makita Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ebdf158a795..e93b157f526c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1038,6 +1038,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, } struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +void skb_headers_offset_update(struct sk_buff *skb, int off); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); void skb_copy_header(struct sk_buff *new, const struct sk_buff *old); -- cgit From 0b19cc0a8694d4295383f88bc3441765875a57bc Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 3 Aug 2018 16:58:15 +0900 Subject: bpf: Make redirect_info accessible from modules We are going to add kern_flags field in redirect_info for kernel internal use. In order to avoid function call to access the flags, make redirect_info accessible from modules. Also as it is now non-static, add prefix bpf_ to redirect_info. v6: - Fix sparse warning around EXPORT_SYMBOL. Signed-off-by: Toshiaki Makita Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index c73dd7396886..4717af8b95e6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -537,6 +537,16 @@ struct sk_msg_buff { struct list_head list; }; +struct bpf_redirect_info { + u32 ifindex; + u32 flags; + struct bpf_map *map; + struct bpf_map *map_to_flush; + unsigned long map_owner; +}; + +DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) -- cgit From 2539650fadbf63a431e76535a9de7bff6ea5e409 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 3 Aug 2018 16:58:16 +0900 Subject: xdp: Helpers for disabling napi_direct of xdp_return_frame We need some mechanism to disable napi_direct on calling xdp_return_frame_rx_napi() from some context. When veth gets support of XDP_REDIRECT, it will redirects packets which are redirected from other devices. On redirection veth will reuse xdp_mem_info of the redirection source device to make return_frame work. But in this case .ndo_xdp_xmit() called from veth redirection uses xdp_mem_info which is not guarded by NAPI, because the .ndo_xdp_xmit() is not called directly from the rxq which owns the xdp_mem_info. This approach introduces a flag in bpf_redirect_info to indicate that napi_direct should be disabled even when _rx_napi variant is used as well as helper functions to use it. A NAPI handler who wants to use this flag needs to call xdp_set_return_frame_no_direct() before processing packets, and call xdp_clear_return_frame_no_direct() after xdp_do_flush_map() before exiting NAPI. v4: - Use bpf_redirect_info for storing the flag instead of xdp_mem_info to avoid per-frame copy cost. Signed-off-by: Toshiaki Makita Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 4717af8b95e6..2b072dab32c0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -543,10 +543,14 @@ struct bpf_redirect_info { struct bpf_map *map; struct bpf_map *map_to_flush; unsigned long map_owner; + u32 kern_flags; }; DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); +/* flags for bpf_redirect_info kern_flags */ +#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) @@ -775,6 +779,27 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +static inline bool xdp_return_frame_no_direct(void) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT; +} + +static inline void xdp_set_return_frame_no_direct(void) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT; +} + +static inline void xdp_clear_return_frame_no_direct(void) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT; +} + static inline int xdp_ok_fwd_dev(const struct net_device *fwd, unsigned int pktlen) { -- cgit From c9fbb2d25295a566b97d62e6904741e8e1702d83 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Aug 2018 10:47:43 +0200 Subject: net: Provide stub for __netif_set_xps_queue if there is no CONFIG_XPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building virtio_net driver without CONFIG_XPS fails with: drivers/net/virtio_net.c: In function ‘virtnet_set_affinity’: drivers/net/virtio_net.c:1910:3: error: implicit declaration of function ‘__netif_set_xps_queue’ [-Werror=implicit-function-declaration] __netif_set_xps_queue(vi->dev, mask, i, false); ^ Fixes: 4d99f6602cb5 ("net: allow to call netif_reset_xps_queues() under cpus_read_lock") Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 282e2e95ad5b..ca5ab98053c8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3412,6 +3412,13 @@ static inline int netif_set_xps_queue(struct net_device *dev, { return 0; } + +static inline int __netif_set_xps_queue(struct net_device *dev, + const unsigned long *mask, + u16 index, bool is_rxqs_map) +{ + return 0; +} #endif /** -- cgit From d799a4de0a250f1bdd99765bb8e55a5e2f469a1f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 3 Aug 2018 00:52:18 +0200 Subject: gpio: mmio: Fix up inverted direction registers The bgpio_init() takes one of two arguments to specify a register to set the direction of the GPIO line: either dirout that indicates that a 1 in the bit in that register sets the corresponding line to output, or dirin which indicates that a 1 in the bit in that register sets the corresponding line to input. Conversely setting the bit to 0 on these will turn the line into input and output respectively. One of these can be defined but not both. This means that a platform that sets a bit to 1 for output only defines dirout and a platform that sets a bit to 0 for output only defines dirin. In short this defines the polarity of the direction register. Both can also be left as NULL meaning the GPIO chip is either input only or output only. Tomer Maimon discovered that for get/set chips (those where the get and set registers are defined but no separate clear register, and specifying BGPIOF_READ_OUTPUT_REG_SET so that we say we want to read the output value from the SET register) we are unconditionally reading the value from the SET register when the direction bit is 1 and from the DAT register when the direction bit is 0, not taking the direction bit polarity into account. It would be expected that when the direction bit is inverted (dirin is defined but not dirout) we read the current value from the DAT register when the bit is 1 and from the SET register when the bit is 0. Currently only some versions of ATH79, brcmstb, some versions of CLP711x, GE, IOP and Loongson use the dirin mode (a 1 in the register means input). They are unaffected because BGPIOF_READ_OUTPUT_REG_SET is not set on any of them. (They do not read back the SET register to figure out the output value.) So this is no regression with current drivers. However the behaviour is wrong and does not work with Tomer's new driver where he needs to use the BGIOF_READ_OUTPUT_REG_SET. This fixes the above issue by: - Instead of defining separate functions for the inverted case, set up a flag in the gpio_chip that indicates that the direction is inverted. - Remove the special inverted functions for setting input/output and getting the direction, rely on the flag instead. - Respect this flag in bgpio_get_set() and bgpio_get_set_multiple() Reported-by: Tomer Maimon Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 5382b5183b7e..0ea328e71ec9 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -201,6 +201,8 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * @reg_set: output set register (out=high) for generic GPIO * @reg_clr: output clear register (out=low) for generic GPIO * @reg_dir: direction setting register for generic GPIO + * @bgpio_dir_inverted: indicates that the direction register is inverted + * (gpiolib private state variable) * @bgpio_bits: number of register bits used for a generic GPIO i.e. * * 8 * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep @@ -267,6 +269,7 @@ struct gpio_chip { void __iomem *reg_set; void __iomem *reg_clr; void __iomem *reg_dir; + bool bgpio_dir_inverted; int bgpio_bits; spinlock_t bgpio_lock; unsigned long bgpio_data; -- cgit From 5dc4c4b7d4e8115e7cde96a030f98cb3ab2e458c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Aug 2018 01:01:24 -0700 Subject: bpf: Introduce BPF_MAP_TYPE_REUSEPORT_SOCKARRAY This patch introduces a new map type BPF_MAP_TYPE_REUSEPORT_SOCKARRAY. To unleash the full potential of a bpf prog, it is essential for the userspace to be capable of directly setting up a bpf map which can then be consumed by the bpf prog to make decision. In this case, decide which SO_REUSEPORT sk to serve the incoming request. By adding BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, the userspace has total control and visibility on where a SO_REUSEPORT sk should be located in a bpf map. The later patch will introduce BPF_PROG_TYPE_SK_REUSEPORT such that the bpf prog can directly select a sk from the bpf map. That will raise the programmability of the bpf prog attached to a reuseport group (a group of sk serving the same IP:PORT). For example, in UDP, the bpf prog can peek into the payload (e.g. through the "data" pointer introduced in the later patch) to learn the application level's connection information and then decide which sk to pick from a bpf map. The userspace can tightly couple the sk's location in a bpf map with the application logic in generating the UDP payload's connection information. This connection info contact/API stays within the userspace. Also, when used with map-in-map, the userspace can switch the old-server-process's inner map to a new-server-process's inner map in one call "bpf_map_update_elem(outer_map, &index, &new_reuseport_array)". The bpf prog will then direct incoming requests to the new process instead of the old process. The old process can finish draining the pending requests (e.g. by "accept()") before closing the old-fds. [Note that deleting a fd from a bpf map does not necessary mean the fd is closed] During map_update_elem(), Only SO_REUSEPORT sk (i.e. which has already been added to a reuse->socks[]) can be used. That means a SO_REUSEPORT sk that is "bind()" for UDP or "bind()+listen()" for TCP. These conditions are ensured in "reuseport_array_update_check()". A SO_REUSEPORT sk can only be added once to a map (i.e. the same sk cannot be added twice even to the same map). SO_REUSEPORT already allows another sk to be created for the same IP:PORT. There is no need to re-create a similar usage in the BPF side. When a SO_REUSEPORT is deleted from the "reuse->socks[]" (e.g. "close()"), it will notify the bpf map to remove it from the map also. It is done through "bpf_sk_reuseport_detach()" and it will only be called if >=1 of the "reuse->sock[]" has ever been added to a bpf map. The map_update()/map_delete() has to be in-sync with the "reuse->socks[]". Hence, the same "reuseport_lock" used by "reuse->socks[]" has to be used here also. Care has been taken to ensure the lock is only acquired when the adding sk passes some strict tests. and freeing the map does not require the reuseport_lock. The reuseport_array will also support lookup from the syscall side. It will return a sock_gen_cookie(). The sock_gen_cookie() is on-demand (i.e. a sk's cookie is not generated until the very first map_lookup_elem()). The lookup cookie is 64bits but it goes against the logical userspace expectation on 32bits sizeof(fd) (and as other fd based bpf maps do also). It may catch user in surprise if we enforce value_size=8 while userspace still pass a 32bits fd during update. Supporting different value_size between lookup and update seems unintuitive also. We also need to consider what if other existing fd based maps want to return 64bits value from syscall's lookup in the future. Hence, reuseport_array supports both value_size 4 and 8, and assuming user will usually use value_size=4. The syscall's lookup will return ENOSPC on value_size=4. It will will only return 64bits value from sock_gen_cookie() when user consciously choose value_size=8 (as a signal that lookup is desired) which then requires a 64bits value in both lookup and update. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 28 ++++++++++++++++++++++++++++ include/linux/bpf_types.h | 3 +++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cd8790d2c6ed..db11662faea6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -524,6 +524,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) } struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); +int array_map_alloc_check(union bpf_attr *attr); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) @@ -769,6 +770,33 @@ static inline void __xsk_map_flush(struct bpf_map *map) } #endif +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); +#else +static inline void bpf_sk_reuseport_detach(struct sock *sk) +{ +} + +#ifdef CONFIG_BPF_SYSCALL +static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, + void *key, void *value) +{ + return -EOPNOTSUPP; +} + +static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, + void *key, void *value, + u64 map_flags) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_BPF_SYSCALL */ +#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */ + /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index add08be53b6f..14fd6c02d258 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -60,4 +60,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) #endif +#ifdef CONFIG_INET +BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) +#endif #endif -- cgit From 2dbb9b9e6df67d444fbe425c7f6014858d337adf Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Aug 2018 01:01:25 -0700 Subject: bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT This patch adds a BPF_PROG_TYPE_SK_REUSEPORT which can select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY. Like other non SK_FILTER/CGROUP_SKB program, it requires CAP_SYS_ADMIN. BPF_PROG_TYPE_SK_REUSEPORT introduces "struct sk_reuseport_kern" to store the bpf context instead of using the skb->cb[48]. At the SO_REUSEPORT sk lookup time, it is in the middle of transiting from a lower layer (ipv4/ipv6) to a upper layer (udp/tcp). At this point, it is not always clear where the bpf context can be appended in the skb->cb[48] to avoid saving-and-restoring cb[]. Even putting aside the difference between ipv4-vs-ipv6 and udp-vs-tcp. It is not clear if the lower layer is only ipv4 and ipv6 in the future and will it not touch the cb[] again before transiting to the upper layer. For example, in udp_gro_receive(), it uses the 48 byte NAPI_GRO_CB instead of IP[6]CB and it may still modify the cb[] after calling the udp[46]_lib_lookup_skb(). Because of the above reason, if sk->cb is used for the bpf ctx, saving-and-restoring is needed and likely the whole 48 bytes cb[] has to be saved and restored. Instead of saving, setting and restoring the cb[], this patch opts to create a new "struct sk_reuseport_kern" and setting the needed values in there. The new BPF_PROG_TYPE_SK_REUSEPORT and "struct sk_reuseport_(kern|md)" will serve all ipv4/ipv6 + udp/tcp combinations. There is no protocol specific usage at this point and it is also inline with the current sock_reuseport.c implementation (i.e. no protocol specific requirement). In "struct sk_reuseport_md", this patch exposes data/data_end/len with semantic similar to other existing usages. Together with "bpf_skb_load_bytes()" and "bpf_skb_load_bytes_relative()", the bpf prog can peek anywhere in the skb. The "bind_inany" tells the bpf prog that the reuseport group is bind-ed to a local INANY address which cannot be learned from skb. The new "bind_inany" is added to "struct sock_reuseport" which will be used when running the new "BPF_PROG_TYPE_SK_REUSEPORT" bpf prog in order to avoid repeating the "bind INANY" test on "sk_v6_rcv_saddr/sk->sk_rcv_saddr" every time a bpf prog is run. It can only be properly initialized when a "sk->sk_reuseport" enabled sk is adding to a hashtable (i.e. during "reuseport_alloc()" and "reuseport_add_sock()"). The new "sk_select_reuseport()" is the main helper that the bpf prog will use to select a SO_REUSEPORT sk. It is the only function that can use the new BPF_MAP_TYPE_REUSEPORT_ARRAY. As mentioned in the earlier patch, the validity of a selected sk is checked in run time in "sk_select_reuseport()". Doing the check in verification time is difficult and inflexible (consider the map-in-map use case). The runtime check is to compare the selected sk's reuseport_id with the reuseport_id that we want. This helper will return -EXXX if the selected sk cannot serve the incoming request (e.g. reuseport_id not match). The bpf prog can decide if it wants to do SK_DROP as its discretion. When the bpf prog returns SK_PASS, the kernel will check if a valid sk has been selected (i.e. "reuse_kern->selected_sk != NULL"). If it does , it will use the selected sk. If not, the kernel will select one from "reuse->socks[]" (as before this patch). The SK_DROP and SK_PASS handling logic will be in the next patch. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_types.h | 3 +++ include/linux/filter.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 14fd6c02d258..cd26c090e7c0 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) #ifdef CONFIG_BPF_LIRC_MODE2 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #endif +#ifdef CONFIG_INET +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/filter.h b/include/linux/filter.h index 2b072dab32c0..70e9d57677fe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -32,6 +32,7 @@ struct seccomp_data; struct bpf_prog_aux; struct xdp_rxq_info; struct xdp_buff; +struct sock_reuseport; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function @@ -833,6 +834,20 @@ void bpf_warn_invalid_xdp_action(u32 act); struct sock *do_sk_redirect_map(struct sk_buff *skb); struct sock *do_msg_redirect_map(struct sk_msg_buff *md); +#ifdef CONFIG_INET +struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, + struct bpf_prog *prog, struct sk_buff *skb, + u32 hash); +#else +static inline struct sock * +bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, + struct bpf_prog *prog, struct sk_buff *skb, + u32 hash) +{ + return NULL; +} +#endif + #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; -- cgit From 8217ca653ec601246832d562207bc24bdf652d2f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Aug 2018 01:01:26 -0700 Subject: bpf: Enable BPF_PROG_TYPE_SK_REUSEPORT bpf prog in reuseport selection This patch allows a BPF_PROG_TYPE_SK_REUSEPORT bpf prog to select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY introduced in the earlier patch. "bpf_run_sk_reuseport()" will return -ECONNREFUSED when the BPF_PROG_TYPE_SK_REUSEPORT prog returns SK_DROP. The callers, in inet[6]_hashtable.c and ipv[46]/udp.c, are modified to handle this case and return NULL immediately instead of continuing the sk search from its hashtable. It re-uses the existing SO_ATTACH_REUSEPORT_EBPF setsockopt to attach BPF_PROG_TYPE_SK_REUSEPORT. The "sk_reuseport_attach_bpf()" will check if the attaching bpf prog is in the new SK_REUSEPORT or the existing SOCKET_FILTER type and then check different things accordingly. One level of "__reuseport_attach_prog()" call is removed. The "sk_unhashed() && ..." and "sk->sk_reuseport_cb" tests are pushed back to "reuseport_attach_prog()" in sock_reuseport.c. sock_reuseport.c seems to have more knowledge on those test requirements than filter.c. In "reuseport_attach_prog()", after new_prog is attached to reuse->prog, the old_prog (if any) is also directly freed instead of returning the old_prog to the caller and asking the caller to free. The sysctl_optmem_max check is moved back to the "sk_reuseport_attach_filter()" and "sk_reuseport_attach_bpf()". As of other bpf prog types, the new BPF_PROG_TYPE_SK_REUSEPORT is only bounded by the usual "bpf_prog_charge_memlock()" during load time instead of bounded by both bpf_prog_charge_memlock and sysctl_optmem_max. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 70e9d57677fe..5d565c50bcb2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -753,6 +753,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); +void sk_reuseport_prog_free(struct bpf_prog *prog); int sk_detach_filter(struct sock *sk); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -- cgit From 19e226e8cc5da02f17ed119f9137036c0f0f5d80 Mon Sep 17 00:00:00 2001 From: Caleb Raitto Date: Thu, 9 Aug 2018 18:18:28 -0700 Subject: virtio: Make vp_set_vq_affinity() take a mask. Make vp_set_vq_affinity() take a cpumask instead of taking a single CPU. If there are fewer queues than cores, queue affinity should be able to map to multiple cores. Link: https://patchwork.ozlabs.org/patch/948149/ Suggested-by: Willem de Bruijn Signed-off-by: Caleb Raitto Acked-by: Gonglei Signed-off-by: David S. Miller --- include/linux/virtio_config.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 5559a2d31c46..32baf8e26735 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -79,7 +79,8 @@ struct virtio_config_ops { u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); - int (*set_vq_affinity)(struct virtqueue *vq, int cpu); + int (*set_vq_affinity)(struct virtqueue *vq, + const struct cpumask *cpu_mask); const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, int index); }; @@ -236,11 +237,11 @@ const char *virtio_bus_name(struct virtio_device *vdev) * */ static inline -int virtqueue_set_affinity(struct virtqueue *vq, int cpu) +int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) { struct virtio_device *vdev = vq->vdev; if (vdev->config->set_vq_affinity) - return vdev->config->set_vq_affinity(vq, cpu); + return vdev->config->set_vq_affinity(vq, cpu_mask); return 0; } -- cgit From b86d865cb1cae1e61527ea0b8977078bbf694328 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 10 Aug 2018 13:28:07 -0700 Subject: blkcg: Make blkg_root_lookup() work for queues in bypass mode For legacy queues the only call of blkg_root_lookup() happens after bypass mode has been enabled. Since blkg_lookup() returns NULL for queues in bypass mode, modify the blkg_root_lookup() such that it no longer depends on bypass mode. Rename the function into blk_queue_root_blkg() as suggested by Tejun. Suggested-by: Tejun Heo Fixes: 6bad9b210a22 ("blkcg: Introduce blkg_root_lookup()") Signed-off-by: Bart Van Assche Cc: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1361cfc9b878..34aec30e06c7 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -342,20 +342,14 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, } /** - * blkg_lookup - look up blkg for the specified request queue + * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair * @q: request_queue of interest * * Lookup blkg for @q at the root level. See also blkg_lookup(). */ -static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) +static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) { - struct blkcg_gq *blkg; - - rcu_read_lock(); - blkg = blkg_lookup(&blkcg_root, q); - rcu_read_unlock(); - - return blkg; + return q->root_blkg; } /** @@ -881,7 +875,8 @@ static inline bool blk_cgroup_congested(void) { return false; } static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) { return NULL; } +static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) +{ return NULL; } static inline int blkcg_init_queue(struct request_queue *q) { return 0; } static inline void blkcg_drain_queue(struct request_queue *q) { } static inline void blkcg_exit_queue(struct request_queue *q) { } -- cgit From e8d2bec0457962e8f348a9a3627b398f7fe5c5fc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 12 Aug 2018 01:59:17 +0200 Subject: bpf: decouple btf from seq bpf fs dump and enable more maps Commit a26ca7c982cb ("bpf: btf: Add pretty print support to the basic arraymap") and 699c86d6ec21 ("bpf: btf: add pretty print for hash/lru_hash maps") enabled support for BTF and dumping via BPF fs for array and hash/lru map. However, both can be decoupled from each other such that regular BPF maps can be supported for attaching BTF key/value information, while not all maps necessarily need to dump via map_seq_show_elem() callback. The basic sanity check which is a prerequisite for all maps is that key/value size has to match in any case, and some maps can have extra checks via map_check_btf() callback, e.g. probing certain types or indicating no support in general. With that we can also enable retrieving BTF info for per-cpu map types and lpm. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Yonghong Song --- include/linux/bpf.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index db11662faea6..523481a3471b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -23,7 +23,7 @@ struct bpf_prog; struct bpf_map; struct sock; struct seq_file; -struct btf; +struct btf_type; /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { @@ -48,8 +48,9 @@ struct bpf_map_ops { u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); - int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf, - u32 key_type_id, u32 value_type_id); + int (*map_check_btf)(const struct bpf_map *map, + const struct btf_type *key_type, + const struct btf_type *value_type); }; struct bpf_map { @@ -118,9 +119,13 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map) static inline bool bpf_map_support_seq_show(const struct bpf_map *map) { - return map->ops->map_seq_show_elem && map->ops->map_check_btf; + return map->btf && map->ops->map_seq_show_elem; } +int map_check_no_btf(const struct bpf_map *map, + const struct btf_type *key_type, + const struct btf_type *value_type); + extern const struct bpf_map_ops bpf_map_offload_ops; /* function argument constraints */ -- cgit From 7723628101aaeb1d723786747529b4ea65c5b5c5 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Sun, 12 Aug 2018 10:49:27 -0700 Subject: bpf: Introduce bpf_skb_ancestor_cgroup_id helper == Problem description == It's useful to be able to identify cgroup associated with skb in TC so that a policy can be applied to this skb, and existing bpf_skb_cgroup_id helper can help with this. Though in real life cgroup hierarchy and hierarchy to apply a policy to don't map 1:1. It's often the case that there is a container and corresponding cgroup, but there are many more sub-cgroups inside container, e.g. because it's delegated to containerized application to control resources for its subsystems, or to separate application inside container from infra that belongs to containerization system (e.g. sshd). At the same time it may be useful to apply a policy to container as a whole. If multiple containers like this are run on a host (what is often the case) and many of them have sub-cgroups, it may not be possible to apply per-container policy in TC with existing helpers such as bpf_skb_under_cgroup or bpf_skb_cgroup_id: * bpf_skb_cgroup_id will return id of immediate cgroup associated with skb, i.e. if it's a sub-cgroup inside container, it can't be used to identify container's cgroup; * bpf_skb_under_cgroup can work only with one cgroup and doesn't scale, i.e. if there are N containers on a host and a policy has to be applied to M of them (0 <= M <= N), it'd require M calls to bpf_skb_under_cgroup, and, if M changes, it'd require to rebuild & load new BPF program. == Solution == The patch introduces new helper bpf_skb_ancestor_cgroup_id that can be used to get id of cgroup v2 that is an ancestor of cgroup associated with skb at specified level of cgroup hierarchy. That way admin can place all containers on one level of cgroup hierarchy (what is a good practice in general and already used in many configurations) and identify specific cgroup on this level no matter what sub-cgroup skb is associated with. E.g. if there is a cgroup hierarchy: root/ root/container1/ root/container1/app11/ root/container1/app11/sub-app-a/ root/container1/app12/ root/container2/ root/container2/app21/ root/container2/app22/ root/container2/app22/sub-app-b/ , then having skb associated with root/container1/app11/sub-app-a/ it's possible to get ancestor at level 1, what is container1 and apply policy for this container, or apply another policy if it's container2. Policies can be kept e.g. in a hash map where key is a container cgroup id and value is an action. Levels where container cgroups are created are usually known in advance whether cgroup hierarchy inside container may be hard to predict especially in case when its creation is delegated to containerized application. == Implementation details == The helper gets ancestor by walking parents up to specified level. Another option would be to get different kind of "id" from cgroup->ancestor_ids[level] and use it with idr_find() to get struct cgroup for ancestor. But that would require radix lookup what doesn't seem to be better (at least it's not obviously better). Format of return value of the new helper is same as that of bpf_skb_cgroup_id. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- include/linux/cgroup.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c9fdf6f57913..32c553556bbd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -553,6 +553,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, return cgrp->ancestor_ids[ancestor->level] == ancestor->id; } +/** + * cgroup_ancestor - find ancestor of cgroup + * @cgrp: cgroup to find ancestor of + * @ancestor_level: level of ancestor to find starting from root + * + * Find ancestor of cgroup at specified level starting from root if it exists + * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at + * @ancestor_level. + * + * This function is safe to call as long as @cgrp is accessible. + */ +static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp, + int ancestor_level) +{ + struct cgroup *ptr; + + if (cgrp->level < ancestor_level) + return NULL; + + for (ptr = cgrp; + ptr && ptr->level > ancestor_level; + ptr = cgroup_parent(ptr)) + ; + + if (ptr && ptr->level == ancestor_level) + return ptr; + + return NULL; +} + /** * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry * @task: the task to be tested -- cgit From 9af18e56d43ca4864ce65c3542c513827c2697de Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 12 Aug 2018 09:14:03 -0400 Subject: cpumask: make cpumask_next_wrap available without smp The kbuild robot shows build failure on machines without CONFIG_SMP: drivers/net/virtio_net.c:1916:10: error: implicit declaration of function 'cpumask_next_wrap' cpumask_next_wrap is exported from lib/cpumask.o, which has lib-$(CONFIG_SMP) += cpumask.o same as other functions, also define it as static inline in the NR_CPUS==1 branch in include/linux/cpumask.h. If wrap is true and next == start, return nr_cpumask_bits, or 1. Else wrap across the range of valid cpus, here [0]. Fixes: 2ca653d607ce ("virtio_net: Stripe queue affinities across cores.") Signed-off-by: Willem de Bruijn Tested-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/linux/cpumask.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 57f20a0a7794..147bdec42215 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -159,6 +159,13 @@ static inline unsigned int cpumask_next_and(int n, return n+1; } +static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, + int start, bool wrap) +{ + /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */ + return (wrap && n == 0); +} + /* cpu must be a valid cpu, ie 0, so there's no other choice. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) -- cgit From cf916ffbe0c628bb072ea829f300cff1874162ce Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 29 Apr 2018 09:17:34 -0500 Subject: net/mlx5: Improve argument name for add flow API The last argument to mlx5_add_flow_rules passes the number of destinations in the struct pointed to by the dest arg. Change the name to better reflect this fact. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index c40f2fc68655..71fb503b2b52 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -177,7 +177,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, - int dest_num); + int num_dest); void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, -- cgit From b72ae8cac0caff86fe414fceb940655c2d1371c9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 29 Jul 2018 16:16:56 +0300 Subject: PCI: Add PCI_DEVICE_DATA() macro to fully describe device ID entry There are a lot of examples in the kernel where PCI_VDEVICE() is used and still looks not so convenient due to additional driver_data field attached. Introduce PCI_DEVICE_DATA() macro to fully describe device ID entry in shortest possible form. For example, before: { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD), (kernel_ulong_t) &dwc3_pci_mrfld_properties, }, after: { PCI_DEVICE_DATA(INTEL, MRFLD, &dwc3_pci_mrfld_properties) }, Drivers can be converted later on in independent way. While here, remove the unused macro with the same name from Ralink wireless driver. Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Acked-by: Kalle Valo # for rt2x00 --- include/linux/pci.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 340029b2fb38..bf3665c534c2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -819,6 +819,21 @@ struct pci_driver { .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0 +/** + * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form + * @vend: the vendor name (without PCI_VENDOR_ID_ prefix) + * @dev: the device name (without PCI_DEVICE_ID__ prefix) + * @data: the driver data to be filled + * + * This macro is used to create a struct pci_device_id that matches a + * specific PCI device. The subvendor, and subdevice fields will be set + * to PCI_ANY_ID. + */ +#define PCI_DEVICE_DATA(vend, dev, data) \ + .vendor = PCI_VENDOR_ID_##vend, .device = PCI_DEVICE_ID_##vend##_##dev, \ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0, \ + .driver_data = (kernel_ulong_t)(data) + enum { PCI_REASSIGN_ALL_RSRC = 0x00000001, /* Ignore firmware setup */ PCI_REASSIGN_ALL_BUS = 0x00000002, /* Reassign all bus numbers */ -- cgit From 2538fb89b8f4ee9e1c1759cfb3c7989d9ef1f6e7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Aug 2018 16:48:50 -0700 Subject: PCI: Limit config space size for Netronome NFP5000 Like the NFP4000 and NFP6000, the NFP5000 as an erratum where reading/ writing to PCI config space addresses above 0x600 can cause the NFP to generate PCIe completion timeouts. Limit the NFP5000's PF's config space size to 0x600 bytes as is already done for the NFP4000 and NFP6000. The NFP5000's VF is 0x6003 (PCI_DEVICE_ID_NETRONOME_NFP6000_VF), the same device ID as the NFP6000's VF. Thus, its config space is already limited by the existing use of quirk_nfp6000(). Signed-off-by: Jakub Kicinski Signed-off-by: Bjorn Helgaas Reviewed-by: Tony Egan --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 29502238e510..381fc009551d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2541,6 +2541,7 @@ #define PCI_DEVICE_ID_NETRONOME_NFP3200 0x3200 #define PCI_DEVICE_ID_NETRONOME_NFP3240 0x3240 #define PCI_DEVICE_ID_NETRONOME_NFP4000 0x4000 +#define PCI_DEVICE_ID_NETRONOME_NFP5000 0x5000 #define PCI_DEVICE_ID_NETRONOME_NFP6000 0x6000 #define PCI_DEVICE_ID_NETRONOME_NFP6000_VF 0x6003 -- cgit From 817aef260037f33ee0f44c17fe341323d3aebd6d Mon Sep 17 00:00:00 2001 From: Yannik Sembritzki Date: Thu, 16 Aug 2018 14:05:10 +0100 Subject: Replace magic for trusting the secondary keyring with #define Replace the use of a magic number that indicates that verify_*_signature() should use the secondary keyring with a symbol. Signed-off-by: Yannik Sembritzki Signed-off-by: David Howells Cc: keyrings@vger.kernel.org Cc: linux-security-module@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/verification.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/verification.h b/include/linux/verification.h index a10549a6c7cd..cfa4730d607a 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -12,6 +12,12 @@ #ifndef _LINUX_VERIFICATION_H #define _LINUX_VERIFICATION_H +/* + * Indicate that both builtin trusted keys and secondary trusted keys + * should be used. + */ +#define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL) + /* * The use to which an asymmetric key is being put. */ -- cgit