From 475c6bde7228e2d624153626941f290a314e4672 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Feb 2019 13:54:17 +0100 Subject: iwlwifi: mvm: fix TX crypto on 22560+ devices In the old days, we could transmit with HW crypto with an arbitrary key by filling it into TX_CMD. This was broken first with the advent of CCMP/GCMP-256 keys which don't fit there. This was broken *again* with the newer TX_CMD format on 22560+, where we simply cannot pass key material anymore. However, we forgot to update all the cases when we get a key from mac80211 and don't program it into the hardware but still return 0 for HW crypto on TX. In AP mode with WEP, we tried to fix this by programming the keys separately for each station later, but this ultimately turns out to be buggy, for example now it leaks memory when we have more than one WEP key. Fix this by simply using only SW crypto for WEP in newer devices by returning -EOPNOTSUPP instead of trying to program WEP keys later. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 64 ++++++----------------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 - drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 41 +-------------- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 7 +-- 4 files changed, 19 insertions(+), 94 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 3a92c09d4692..eeaeb8475666 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2714,9 +2714,6 @@ static void iwl_mvm_stop_ap_ibss(struct ieee80211_hw *hw, iwl_mvm_mac_ctxt_remove(mvm, vif); - kfree(mvmvif->ap_wep_key); - mvmvif->ap_wep_key = NULL; - mutex_unlock(&mvm->mutex); } @@ -3183,24 +3180,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, ret = iwl_mvm_update_sta(mvm, vif, sta); } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED) { - /* if wep is used, need to set the key for the station now */ - if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) { - mvm_sta->wep_key = - kmemdup(mvmvif->ap_wep_key, - sizeof(*mvmvif->ap_wep_key) + - mvmvif->ap_wep_key->keylen, - GFP_KERNEL); - if (!mvm_sta->wep_key) { - ret = -ENOMEM; - goto out_unlock; - } - - ret = iwl_mvm_set_sta_key(mvm, vif, sta, - mvm_sta->wep_key, - STA_KEY_IDX_INVALID); - } else { - ret = 0; - } + ret = 0; /* we don't support TDLS during DCM */ if (iwl_mvm_phy_ctx_count(mvm) > 1) @@ -3242,17 +3222,6 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, NL80211_TDLS_DISABLE_LINK); } - /* Remove STA key if this is an AP using WEP */ - if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) { - int rm_ret = iwl_mvm_remove_sta_key(mvm, vif, sta, - mvm_sta->wep_key); - - if (!ret) - ret = rm_ret; - kfree(mvm_sta->wep_key); - mvm_sta->wep_key = NULL; - } - if (unlikely(ret && test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status))) @@ -3439,20 +3408,12 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, break; case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: - if (vif->type == NL80211_IFTYPE_AP) { - struct iwl_mvm_vif *mvmvif = - iwl_mvm_vif_from_mac80211(vif); - - mvmvif->ap_wep_key = kmemdup(key, - sizeof(*key) + key->keylen, - GFP_KERNEL); - if (!mvmvif->ap_wep_key) - return -ENOMEM; - } - - if (vif->type != NL80211_IFTYPE_STATION) - return 0; - break; + if (vif->type == NL80211_IFTYPE_STATION) + break; + if (iwl_mvm_has_new_tx_api(mvm)) + return -EOPNOTSUPP; + /* support HW crypto on TX */ + return 0; default: /* currently FW supports only one optional cipher scheme */ if (hw->n_cipher_schemes && @@ -3540,12 +3501,17 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset); if (ret) { IWL_WARN(mvm, "set key failed\n"); + key->hw_key_idx = STA_KEY_IDX_INVALID; /* * can't add key for RX, but we don't need it - * in the device for TX so still return 0 + * in the device for TX so still return 0, + * unless we have new TX API where we cannot + * put key material into the TX_CMD */ - key->hw_key_idx = STA_KEY_IDX_INVALID; - ret = 0; + if (iwl_mvm_has_new_tx_api(mvm)) + ret = -EOPNOTSUPP; + else + ret = 0; } break; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index bca6f6b536d9..a50dc53df086 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -498,7 +498,6 @@ struct iwl_mvm_vif { netdev_features_t features; struct iwl_probe_resp_data __rcu *probe_resp_data; - struct ieee80211_key_conf *ap_wep_key; }; static inline struct iwl_mvm_vif * diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 498c315291cf..db26f0041a81 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -8,7 +8,7 @@ * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,7 +31,7 @@ * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -2333,21 +2333,6 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_enable_txq(mvm, NULL, mvmvif->cab_queue, 0, &cfg, timeout); - if (mvmvif->ap_wep_key) { - u8 key_offset = iwl_mvm_set_fw_key_idx(mvm); - - __set_bit(key_offset, mvm->fw_key_table); - - if (key_offset == STA_KEY_IDX_INVALID) - return -ENOSPC; - - ret = iwl_mvm_send_sta_key(mvm, mvmvif->mcast_sta.sta_id, - mvmvif->ap_wep_key, true, 0, NULL, 0, - key_offset, 0); - if (ret) - return ret; - } - return 0; } @@ -2419,28 +2404,6 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0); - if (mvmvif->ap_wep_key) { - int i; - - if (!__test_and_clear_bit(mvmvif->ap_wep_key->hw_key_idx, - mvm->fw_key_table)) { - IWL_ERR(mvm, "offset %d not used in fw key table.\n", - mvmvif->ap_wep_key->hw_key_idx); - return -ENOENT; - } - - /* track which key was deleted last */ - for (i = 0; i < STA_KEY_MAX_NUM; i++) { - if (mvm->fw_key_deleted[i] < U8_MAX) - mvm->fw_key_deleted[i]++; - } - mvm->fw_key_deleted[mvmvif->ap_wep_key->hw_key_idx] = 0; - ret = __iwl_mvm_remove_sta_key(mvm, mvmvif->mcast_sta.sta_id, - mvmvif->ap_wep_key, true); - if (ret) - return ret; - } - ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id); if (ret) IWL_WARN(mvm, "Failed sending remove station\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 79700c7310a1..b4d4071b865d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -8,7 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,7 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -394,7 +394,6 @@ struct iwl_mvm_rxq_dup_data { * the BA window. To be used for UAPSD only. * @ptk_pn: per-queue PTK PN data structures * @dup_data: per queue duplicate packet detection data - * @wep_key: used in AP mode. Is a duplicate of the WEP key. * @deferred_traffic_tid_map: indication bitmap of deferred traffic per-TID * @tx_ant: the index of the antenna to use for data tx to this station. Only * used during connection establishment (e.g. for the 4 way handshake @@ -426,8 +425,6 @@ struct iwl_mvm_sta { struct iwl_mvm_key_pn __rcu *ptk_pn[4]; struct iwl_mvm_rxq_dup_data *dup_data; - struct ieee80211_key_conf *wep_key; - u8 reserved_queue; /* Temporary, until the new TLC will control the Tx protection */ -- cgit From d1967ce641772dd5e27b2a7a97fd625700cc589c Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 10 Feb 2019 10:39:59 +0200 Subject: iwlwifi: add sync_nmi to trans ops Allow modules from outside pcie to call sync_nmi. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 7 +++++++ drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index bbebbf3efd57..fa750711d889 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -618,6 +618,7 @@ struct iwl_trans_ops { struct iwl_trans_dump_data *(*dump_data)(struct iwl_trans *trans, u32 dump_mask); void (*debugfs_cleanup)(struct iwl_trans *trans); + void (*sync_nmi)(struct iwl_trans *trans); }; /** @@ -1245,6 +1246,12 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans) } +static inline void iwl_trans_sync_nmi(struct iwl_trans *trans) +{ + if (trans->ops->sync_nmi) + trans->ops->sync_nmi(trans); +} + /***************************************************** * transport helper functions *****************************************************/ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index bf8b61a476c5..59213164f35e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1043,7 +1043,7 @@ static inline bool iwl_pcie_dbg_on(struct iwl_trans *trans) void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state); void iwl_trans_pcie_dump_regs(struct iwl_trans *trans); -void iwl_trans_sync_nmi(struct iwl_trans *trans); +void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans); #ifdef CONFIG_IWLWIFI_DEBUGFS int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index fe8269d023de..28d6ae8c9336 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3318,7 +3318,8 @@ static void iwl_trans_pcie_resume(struct iwl_trans *trans) .unref = iwl_trans_pcie_unref, \ .dump_data = iwl_trans_pcie_dump_data, \ .d3_suspend = iwl_trans_pcie_d3_suspend, \ - .d3_resume = iwl_trans_pcie_d3_resume + .d3_resume = iwl_trans_pcie_d3_resume, \ + .sync_nmi = iwl_trans_pcie_sync_nmi #ifdef CONFIG_PM_SLEEP #define IWL_TRANS_PM_OPS \ @@ -3637,7 +3638,7 @@ out_no_pci: return ERR_PTR(ret); } -void iwl_trans_sync_nmi(struct iwl_trans *trans) +void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans) { unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 88530d9f4a54..38d110338987 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -965,7 +965,7 @@ static int iwl_pcie_gen2_send_hcmd_sync(struct iwl_trans *trans, cmd_str); ret = -ETIMEDOUT; - iwl_trans_sync_nmi(trans); + iwl_trans_pcie_sync_nmi(trans); goto cancel; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 9fbd37d23e85..7be73e2c4681 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1960,7 +1960,7 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans, iwl_get_cmd_string(trans, cmd->id)); ret = -ETIMEDOUT; - iwl_trans_sync_nmi(trans); + iwl_trans_pcie_sync_nmi(trans); goto cancel; } -- cgit From 8625794e363946d153c5bc57ed30ab7616a9995a Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 10 Feb 2019 09:37:22 +0200 Subject: iwlwifi: dbg_ini: in case of region dump failure set memory to 0 In case the driver fails to dump a memory region, and this is the last region, then partial region would be extracted. Solve this by setting the data to zero in case of failure. This will cause dump to be a list of consecutive successful memory regions and trailing zeros with no partial memories extracted. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index f119c49cd39c..da5d9d79c372 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1614,6 +1614,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt, if (!range) { IWL_ERR(fwrt, "Failed to fill region header: id=%d, type=%d\n", le32_to_cpu(reg->region_id), type); + memset(*data, 0, le32_to_cpu((*data)->len)); return; } @@ -1623,6 +1624,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt, if (range_size < 0) { IWL_ERR(fwrt, "Failed to dump region: id=%d, type=%d\n", le32_to_cpu(reg->region_id), type); + memset(*data, 0, le32_to_cpu((*data)->len)); return; } range = range + range_size; -- cgit From b05d57c9b647b77edf9ac4550c493cf77c3923c7 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Tue, 12 Feb 2019 09:56:49 +0200 Subject: iwlwifi: dbg_ini: fix bad dump size calculation The driver initiates the size value with the size of the struct and then adds the size of the data and checks if the size is zero so size can not be equal to zero. Solve this by getting the data size, check that it is not equal to zero and only then add the struct size. Signed-off-by: Shahar S Matityahu Fixes: 7a14c23dcdee ("iwlwifi: dbg: dump data according to the new ini TLVs") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index da5d9d79c372..b99d38b37014 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1809,12 +1809,12 @@ _iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt, trigger = fwrt->dump.active_trigs[id].trig; - size = sizeof(*dump_file); - size += iwl_fw_ini_get_trigger_len(fwrt, trigger); - + size = iwl_fw_ini_get_trigger_len(fwrt, trigger); if (!size) return NULL; + size += sizeof(*dump_file); + dump_file = vzalloc(size); if (!dump_file) return NULL; -- cgit From 07d35b4270efd08e27cdad9a8d5fa31158cfc4e8 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 10 Feb 2019 10:42:16 +0200 Subject: iwlwifi: use sync nmi in case of init flow failure In case of alive interrupt timeout or any failure in the init flow the driver generates FW nmi. The driver assumes that the nmi will generate SW interrupt. This assumption does not hold and leads to faulty behavior in the recovery flow. Solve this by using sync nmi, this way, even if the driver does not receive SW interrupt, it still starts the recovery flow. Also remove the wait queue from iwl_fwrt_stop_device since the driver is handling the SW interrupt synchronously. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 26 +++----------------------- drivers/net/wireless/intel/iwlwifi/fw/init.c | 1 - drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 7 ------- 3 files changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index b99d38b37014..d7380016f1c0 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1944,14 +1944,10 @@ int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt, iwl_dump_error_desc->len = 0; ret = iwl_fw_dbg_collect_desc(fwrt, iwl_dump_error_desc, false, 0); - if (ret) { + if (ret) kfree(iwl_dump_error_desc); - } else { - set_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status); - - /* trigger nmi to halt the fw */ - iwl_force_nmi(fwrt->trans); - } + else + iwl_trans_sync_nmi(fwrt->trans); return ret; } @@ -2491,22 +2487,6 @@ IWL_EXPORT_SYMBOL(iwl_fw_dbg_apply_point); void iwl_fwrt_stop_device(struct iwl_fw_runtime *fwrt) { - /* if the wait event timeout elapses instead of wake up then - * the driver did not receive NMI interrupt and can not assume the FW - * is halted - */ - int ret = wait_event_timeout(fwrt->trans->fw_halt_waitq, - !test_bit(STATUS_FW_WAIT_DUMP, - &fwrt->trans->status), - msecs_to_jiffies(2000)); - if (!ret) { - /* failed to receive NMI interrupt, assuming the FW is stuck */ - set_bit(STATUS_FW_ERROR, &fwrt->trans->status); - - clear_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status); - } - - /* Assuming the op mode mutex is held at this point */ iwl_fw_dbg_collect_sync(fwrt); iwl_trans_stop_device(fwrt->trans); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c index 7adf4e4e841a..12310e3d2fc5 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/init.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c @@ -76,7 +76,6 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, fwrt->ops_ctx = ops_ctx; INIT_DELAYED_WORK(&fwrt->dump.wk, iwl_fw_error_dump_wk); iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir); - init_waitqueue_head(&fwrt->trans->fw_halt_waitq); } IWL_EXPORT_SYMBOL(iwl_fw_runtime_init); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index fa750711d889..d8690acee40c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -338,7 +338,6 @@ enum iwl_d3_status { * are sent * @STATUS_TRANS_IDLE: the trans is idle - general commands are not to be sent * @STATUS_TRANS_DEAD: trans is dead - avoid any read/write operation - * @STATUS_FW_WAIT_DUMP: if set, wait until cleared before collecting dump */ enum iwl_trans_status { STATUS_SYNC_HCMD_ACTIVE, @@ -351,7 +350,6 @@ enum iwl_trans_status { STATUS_TRANS_GOING_IDLE, STATUS_TRANS_IDLE, STATUS_TRANS_DEAD, - STATUS_FW_WAIT_DUMP, }; static inline int @@ -832,7 +830,6 @@ struct iwl_trans { u32 lmac_error_event_table[2]; u32 umac_error_event_table; unsigned int error_event_table_tlv_status; - wait_queue_head_t fw_halt_waitq; /* pointer to trans specific struct */ /*Ensure that this pointer will always be aligned to sizeof pointer */ @@ -1240,10 +1237,6 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans) /* prevent double restarts due to the same erroneous FW */ if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status)) iwl_op_mode_nic_error(trans->op_mode); - - if (test_and_clear_bit(STATUS_FW_WAIT_DUMP, &trans->status)) - wake_up(&trans->fw_halt_waitq); - } static inline void iwl_trans_sync_nmi(struct iwl_trans *trans) -- cgit From 0d5bad14226af0712c6eed06059a596fc89a4605 Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Mon, 25 Feb 2019 07:35:42 +0200 Subject: iwlwifi: rename structs to fit the new names rename few structs to fit the new marketing names Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 12 ++++++------ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 12 ++++++------ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index fdc56f821b5a..d5c29298ae3e 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -235,8 +235,8 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = { .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, }; -const struct iwl_cfg iwl22260_2ax_cfg = { - .name = "Intel(R) Wireless-AX 22260", +const struct iwl_cfg iwl_ax200_cfg_cc = { + .name = "Intel(R) Wi-Fi 6 AX200 160MHz", .fw_name_pre = IWL_CC_A_FW_PRE, IWL_DEVICE_22500, /* @@ -249,7 +249,7 @@ const struct iwl_cfg iwl22260_2ax_cfg = { }; const struct iwl_cfg killer1650x_2ax_cfg = { - .name = "Killer(R) Wireless-AX 1650x Wireless Network Adapter (200NGW)", + .name = "Killer(R) Wi-Fi 6 AX1650x 160MHz Wireless Network Adapter (200NGW)", .fw_name_pre = IWL_CC_A_FW_PRE, IWL_DEVICE_22500, /* @@ -262,7 +262,7 @@ const struct iwl_cfg killer1650x_2ax_cfg = { }; const struct iwl_cfg killer1650w_2ax_cfg = { - .name = "Killer(R) Wireless-AX 1650w Wireless Network Adapter (200D2W)", + .name = "Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)", .fw_name_pre = IWL_CC_A_FW_PRE, IWL_DEVICE_22500, /* @@ -328,7 +328,7 @@ const struct iwl_cfg killer1550s_2ac_cfg_qu_b0_jf_b0 = { }; const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = { - .name = "Killer(R) Wireless-AX 1650i Wireless Network Adapter (22560NGW)", + .name = "Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)", .fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE, IWL_DEVICE_22500, /* @@ -340,7 +340,7 @@ const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = { }; const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = { - .name = "Killer(R) Wireless-AX 1650s Wireless Network Adapter (22560D2W)", + .name = "Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)", .fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE, IWL_DEVICE_22500, /* diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index f5f87773667b..c91b537fa7ff 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -550,7 +550,7 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; extern const struct iwl_cfg iwl_ax101_cfg_qu_hr; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; -extern const struct iwl_cfg iwl22260_2ax_cfg; +extern const struct iwl_cfg iwl_ax200_cfg_cc; extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0; extern const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0; extern const struct iwl_cfg killer1650x_2ax_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 2b94e4cef56c..c35e50359d2c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -953,14 +953,14 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, {IWL_PCI_DEVICE(0xA0F0, 0x4070, iwl_ax101_cfg_qu_hr)}, - {IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x0088, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x008C, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x0080, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x0084, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x0088, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)}, {IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x4080, iwl22260_2ax_cfg)}, - {IWL_PCI_DEVICE(0x2723, 0x4088, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)}, + {IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x1a56, 0x1653, killer1650w_2ax_cfg)}, {IWL_PCI_DEVICE(0x1a56, 0x1654, killer1650x_2ax_cfg)}, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 28d6ae8c9336..1d6f3053f233 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3561,7 +3561,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && - (trans->cfg != &iwl22260_2ax_cfg || + (trans->cfg != &iwl_ax200_cfg_cc || trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) { u32 hw_status; -- cgit From 972d8e1377795556024e948357e82532890f2f7d Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Mon, 25 Feb 2019 08:07:03 +0200 Subject: iwlwifi: add new 0x2723/0x2080 card for 22000 add new PCI ID 0x2723/0x2080 for 22000 series Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index c35e50359d2c..9f1af8da9dc1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -959,6 +959,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)}, {IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x2080, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)}, {IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)}, -- cgit From b49c15e1211cc962cb73bbaaa5175ae068144893 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 19 Mar 2019 12:00:13 +0100 Subject: mac80211: un-schedule TXQs on powersave start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once a station enters powersave, its queues should not be returned by ieee80211_next_txq() anymore. They will be re-scheduled again after the station has woken up again Fixes: 1866760096bf4 ("mac80211: Add TXQ scheduling API") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7f8d93401ce0..bf0b187f994e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1568,7 +1568,15 @@ static void sta_ps_start(struct sta_info *sta) return; for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { - if (txq_has_queue(sta->sta.txq[tid])) + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi = to_txq_info(txq); + + spin_lock(&local->active_txq_lock[txq->ac]); + if (!list_empty(&txqi->schedule_order)) + list_del_init(&txqi->schedule_order); + spin_unlock(&local->active_txq_lock[txq->ac]); + + if (txq_has_queue(txq)) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); -- cgit From 40586e3fc400c00c11151804dcdc93f8c831c808 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Mar 2019 18:54:27 +0100 Subject: mac80211: fix unaligned access in mesh table hash function The pointer to the last four bytes of the address is not guaranteed to be aligned, so we need to use __get_unaligned_cpu32 here Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 95eb5064fa91..b76a2aefa9ec 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(*(u32 *)(addr+2), seed); + return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); } static const struct rhashtable_params mesh_rht_params = { -- cgit From 78be2d21cc1cd3069c6138dcfecec62583130171 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 15 Mar 2019 17:38:57 +0200 Subject: mac80211: Increase MAX_MSG_LEN Looks that 100 chars isn't enough for messages, as we keep getting warnings popping from different places due to message shortening. Instead of trying to shorten the prints, just increase the buffer size. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/trace_msg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 366b9e6f043e..40141df09f25 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright (C) 2019 Intel Corporation + */ + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -11,7 +16,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg -#define MAX_MSG_LEN 100 +#define MAX_MSG_LEN 120 DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf), -- cgit From 08a75a887ee46828b54600f4bb7068d872a5edd5 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 15 Mar 2019 17:39:00 +0200 Subject: cfg80211: Handle WMM rules in regulatory domain intersection The support added for regulatory WMM rules did not handle the case of regulatory domain intersections. Fix it. Signed-off-by: Ilan Peer Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database") Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/reg.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2f1bf91eb226..0ba778f371cb 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1309,6 +1309,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1, return dfs_region1; } +static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1, + const struct ieee80211_wmm_ac *wmm_ac2, + struct ieee80211_wmm_ac *intersect) +{ + intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min); + intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max); + intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot); + intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn); +} + /* * Helper for regdom_intersect(), this does the real * mathematical intersection fun @@ -1323,6 +1333,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule1, *power_rule2; struct ieee80211_power_rule *power_rule; + const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2; + struct ieee80211_wmm_rule *wmm_rule; u32 freq_diff, max_bandwidth1, max_bandwidth2; freq_range1 = &rule1->freq_range; @@ -1333,6 +1345,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, power_rule2 = &rule2->power_rule; power_rule = &intersected_rule->power_rule; + wmm_rule1 = &rule1->wmm_rule; + wmm_rule2 = &rule2->wmm_rule; + wmm_rule = &intersected_rule->wmm_rule; + freq_range->start_freq_khz = max(freq_range1->start_freq_khz, freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, @@ -1376,6 +1392,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms, rule2->dfs_cac_ms); + if (rule1->has_wmm && rule2->has_wmm) { + u8 ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + reg_wmm_rules_intersect(&wmm_rule1->client[ac], + &wmm_rule2->client[ac], + &wmm_rule->client[ac]); + reg_wmm_rules_intersect(&wmm_rule1->ap[ac], + &wmm_rule2->ap[ac], + &wmm_rule->ap[ac]); + } + + intersected_rule->has_wmm = true; + } else if (rule1->has_wmm) { + *wmm_rule = *wmm_rule1; + intersected_rule->has_wmm = true; + } else if (rule2->has_wmm) { + *wmm_rule = *wmm_rule2; + intersected_rule->has_wmm = true; + } else { + intersected_rule->has_wmm = false; + } + if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; -- cgit From eb9b64e3a9f8483e6e54f4e03b2ae14ae5db2690 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 16 Mar 2019 18:06:31 +0100 Subject: mac80211: fix memory accounting with A-MSDU aggregation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skb->truesize can change due to memory reallocation or when adding extra fragments. Adjust fq->memory_usage accordingly Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8a49a74c0a37..5f546de10d96 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3221,6 +3221,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + int orig_truesize; __be16 len; void *data; bool ret = false; @@ -3261,6 +3262,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!head || skb_is_gso(head)) goto out; + orig_truesize = head->truesize; orig_len = head->len; if (skb->len + head->len > max_amsdu_len) @@ -3318,6 +3320,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, *frag_tail = skb; out_recalc: + fq->memory_usage += head->truesize - orig_truesize; if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len; -- cgit From 344c9719c508bb3ef4e9c134066c83ff00ab6206 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 16:57:35 -0700 Subject: cfg80211: Change an 'else if' into an 'else' in cfg80211_calculate_bitrate_he When building with -Wsometimes-uninitialized, Clang warns: net/wireless/util.c:1223:11: warning: variable 'result' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] Clang can't evaluate at this point that WARN(1, ...) always returns true because __ret_warn_on is defined as !!(condition), which isn't immediately evaluated as 1. Change this branch to else so that it's clear to Clang that we intend to bail out here. Link: https://github.com/ClangBuiltLinux/linux/issues/382 Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/wireless/util.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index e4b8db5e81ec..75899b62bdc9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1220,9 +1220,11 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) else if (rate->bw == RATE_INFO_BW_HE_RU && rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26) result = rates_26[rate->he_gi]; - else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n", - rate->bw, rate->he_ru_alloc)) + else { + WARN(1, "invalid HE MCS: bw:%d, ru:%d\n", + rate->bw, rate->he_ru_alloc); return 0; + } /* now scale to the appropriate MCS */ tmp = result; -- cgit From 4856bfd230985e43e84c26473c91028ff0a533bd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 1 Mar 2019 14:48:37 +0100 Subject: mac80211: do not call driver wake_tx_queue op during reconfig There are several scenarios in which mac80211 can call drv_wake_tx_queue after ieee80211_restart_hw has been called and has not yet completed. Driver private structs are considered uninitialized until mac80211 has uploaded the vifs, stations and keys again, so using private tx queue data during that time is not safe. The driver can also not rely on drv_reconfig_complete to figure out when it is safe to accept drv_wake_tx_queue calls again, because it is only called after all tx queues are woken again. To fix this, bail out early in drv_wake_tx_queue if local->in_reconfig is set. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 28d022a3eee3..ae4f0be3b393 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1195,6 +1195,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + if (local->in_reconfig) + return; + if (!check_sdata_in_driver(sdata)) return; -- cgit From 90abf96abd9bb00f36c8d3640255e6bfa73f7495 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 25 Feb 2019 12:38:49 +0000 Subject: cfg80211: Use kmemdup in cfg80211_gen_new_ie() Use kmemdup rather than duplicating its implementation Signed-off-by: YueHaibing Signed-off-by: Johannes Berg --- net/wireless/scan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 287518c6caa4..04d888628f29 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -190,10 +190,9 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, /* copy subelement as we need to change its content to * mark an ie after it is processed. */ - sub_copy = kmalloc(subie_len, gfp); + sub_copy = kmemdup(subelement, subie_len, gfp); if (!sub_copy) return 0; - memcpy(sub_copy, subelement, subie_len); pos = &new_ie[0]; -- cgit From d6db02a88a4aaa1cd7105137c67ddec7f3bdbc05 Mon Sep 17 00:00:00 2001 From: Sunil Dutt Date: Mon, 25 Feb 2019 15:37:20 +0530 Subject: nl80211: Add NL80211_FLAG_CLEAR_SKB flag for other NL commands This commit adds NL80211_FLAG_CLEAR_SKB flag to other NL commands that carry key data to ensure they do not stick around on heap after the SKB is freed. Also introduced this flag for NL80211_CMD_VENDOR as there are sub commands which configure the keys. Signed-off-by: Sunil Dutt Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 25a9e3b5c154..47e30a58566c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13650,7 +13650,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEAUTHENTICATE, @@ -13701,7 +13702,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, @@ -13709,7 +13711,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DISCONNECT, @@ -13738,7 +13741,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMKSA, @@ -14090,7 +14094,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_SET_QOS_MAP, @@ -14145,7 +14150,8 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_set_pmk, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK, -- cgit From 5b989c18dab2e82bac8a5564a174794bf84b20e6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Mar 2019 11:03:35 +0100 Subject: mac80211: rework locking for txq scheduling / airtime fairness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Holding the lock around the entire duration of tx scheduling can create some nasty lock contention, especially when processing airtime information from the tx status or the rx path. Improve locking by only holding the active_txq_lock for lookups / scheduling list modifications. Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/mac80211.h | 49 +++++++++++++++++++------------------------------ net/mac80211/tx.c | 44 ++++++++++++++++---------------------------- 2 files changed, 35 insertions(+), 58 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ac2ed8ec662b..616998252dc7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6231,8 +6231,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * @hw: pointer as obtained from ieee80211_alloc_hw() * @ac: AC number to return packets from. * - * Should only be called between calls to ieee80211_txq_schedule_start() - * and ieee80211_txq_schedule_end(). * Returns the next txq if successful, %NULL if no queue is eligible. If a txq * is returned, it should be returned with ieee80211_return_txq() after the * driver has finished scheduling it. @@ -6240,51 +6238,42 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac); /** - * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() - * - * @hw: pointer as obtained from ieee80211_alloc_hw() - * @txq: pointer obtained from station or virtual interface - * - * Should only be called between calls to ieee80211_txq_schedule_start() - * and ieee80211_txq_schedule_end(). - */ -void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); - -/** - * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC + * ieee80211_txq_schedule_start - start new scheduling round for TXQs * * @hw: pointer as obtained from ieee80211_alloc_hw() * @ac: AC number to acquire locks for * - * Acquire locks needed to schedule TXQs from the given AC. Should be called - * before ieee80211_next_txq() or ieee80211_return_txq(). + * Should be called before ieee80211_next_txq() or ieee80211_return_txq(). + * The driver must not call multiple TXQ scheduling rounds concurrently. */ -void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) - __acquires(txq_lock); +void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac); + +/* (deprecated) */ +static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) +{ +} /** - * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC + * ieee80211_schedule_txq - schedule a TXQ for transmission * * @hw: pointer as obtained from ieee80211_alloc_hw() - * @ac: AC number to acquire locks for + * @txq: pointer obtained from station or virtual interface * - * Release locks previously acquired by ieee80211_txq_schedule_end(). + * Schedules a TXQ for transmission if it is not already scheduled. */ -void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) - __releases(txq_lock); +void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); /** - * ieee80211_schedule_txq - schedule a TXQ for transmission + * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() * * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface - * - * Schedules a TXQ for transmission if it is not already scheduled. Takes a - * lock, which means it must *not* be called between - * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end() */ -void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) - __acquires(txq_lock) __releases(txq_lock); +static inline void +ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +{ + ieee80211_schedule_txq(hw, txq); +} /** * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5f546de10d96..134a3da147c6 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3649,16 +3649,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue); struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_txq *ret = NULL; struct txq_info *txqi = NULL; - lockdep_assert_held(&local->active_txq_lock[ac]); + spin_lock_bh(&local->active_txq_lock[ac]); begin: txqi = list_first_entry_or_null(&local->active_txqs[ac], struct txq_info, schedule_order); if (!txqi) - return NULL; + goto out; if (txqi->txq.sta) { struct sta_info *sta = container_of(txqi->txq.sta, @@ -3675,21 +3676,25 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) if (txqi->schedule_round == local->schedule_round[ac]) - return NULL; + goto out; list_del_init(&txqi->schedule_order); txqi->schedule_round = local->schedule_round[ac]; - return &txqi->txq; + ret = &txqi->txq; + +out: + spin_unlock_bh(&local->active_txq_lock[ac]); + return ret; } EXPORT_SYMBOL(ieee80211_next_txq); -void ieee80211_return_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) +void ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) { struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); - lockdep_assert_held(&local->active_txq_lock[txq->ac]); + spin_lock_bh(&local->active_txq_lock[txq->ac]); if (list_empty(&txqi->schedule_order) && (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { @@ -3709,17 +3714,7 @@ void ieee80211_return_txq(struct ieee80211_hw *hw, list_add_tail(&txqi->schedule_order, &local->active_txqs[txq->ac]); } -} -EXPORT_SYMBOL(ieee80211_return_txq); -void ieee80211_schedule_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) - __acquires(txq_lock) __releases(txq_lock) -{ - struct ieee80211_local *local = hw_to_local(hw); - - spin_lock_bh(&local->active_txq_lock[txq->ac]); - ieee80211_return_txq(hw, txq); spin_unlock_bh(&local->active_txq_lock[txq->ac]); } EXPORT_SYMBOL(ieee80211_schedule_txq); @@ -3732,7 +3727,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct sta_info *sta; u8 ac = txq->ac; - lockdep_assert_held(&local->active_txq_lock[ac]); + spin_lock_bh(&local->active_txq_lock[ac]); if (!txqi->txq.sta) goto out; @@ -3762,34 +3757,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, sta->airtime[ac].deficit += sta->airtime_weight; list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]); + spin_unlock_bh(&local->active_txq_lock[ac]); return false; out: if (!list_empty(&txqi->schedule_order)) list_del_init(&txqi->schedule_order); + spin_unlock_bh(&local->active_txq_lock[ac]); return true; } EXPORT_SYMBOL(ieee80211_txq_may_transmit); void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) - __acquires(txq_lock) { struct ieee80211_local *local = hw_to_local(hw); spin_lock_bh(&local->active_txq_lock[ac]); local->schedule_round[ac]++; -} -EXPORT_SYMBOL(ieee80211_txq_schedule_start); - -void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) - __releases(txq_lock) -{ - struct ieee80211_local *local = hw_to_local(hw); - spin_unlock_bh(&local->active_txq_lock[ac]); } -EXPORT_SYMBOL(ieee80211_txq_schedule_end); +EXPORT_SYMBOL(ieee80211_txq_schedule_start); void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, -- cgit From f5ae2f932e2f8f4f79796f44832ae8fca26f188a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Mar 2019 13:32:30 +0100 Subject: iwlwifi: mvm: avoid possible deadlock in TX path iwl_mvm_tx_mpdu() may run from iwl_mvm_add_new_dqa_stream_wk(), where soft-IRQs aren't disabled. In this case, it may hold the station lock and be interrupted by a soft-IRQ that also wants to acquire said lock, leading to a deadlock. Fix it by disabling soft-IRQs in iwl_mvm_add_new_dqa_stream_wk(). Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index db26f0041a81..98d123dd7177 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1399,7 +1399,9 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid); list_del_init(&mvmtxq->list); + local_bh_disable(); iwl_mvm_mac_itxq_xmit(mvm->hw, txq); + local_bh_enable(); } mutex_unlock(&mvm->mutex); -- cgit From dcfe3b103dd1348706bbdcfb9921c65452a6144e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 12 Mar 2019 13:22:43 +0100 Subject: iwlwifi: mvm: update offloaded rate control on changes With offloaded rate control, if the station parameters (rates, NSS, bandwidth) change (sta_rc_update method), call iwl_mvm_rs_rate_init() to propagate those change to the firmware. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index eeaeb8475666..6a3b11dd2edf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3258,6 +3258,13 @@ static void iwl_mvm_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u32 changed) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + if (changed & (IEEE80211_RC_BW_CHANGED | + IEEE80211_RC_SUPP_RATES_CHANGED | + IEEE80211_RC_NSS_CHANGED)) + iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band, + true); if (vif->type == NL80211_IFTYPE_STATION && changed & IEEE80211_RC_NSS_CHANGED) -- cgit From debec2f23910cb17f2c0f6d5e30a8da00bb5f515 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 14 Mar 2019 14:57:08 +0200 Subject: iwlwifi: add support for quz firmwares Add a new configuration with a new firmware name for quz devices. And, since these devices have the same PCI device and subsystem IDs, we need to add some code to switch from a normal qu firmware to the quz firmware. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 18 ++++++++++++++++-- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 4 ++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index d5c29298ae3e..eb6defb6d0cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -82,6 +82,7 @@ #define IWL_22000_HR_A0_FW_PRE "iwlwifi-QuQnj-a0-hr-a0-" #define IWL_22000_SU_Z0_FW_PRE "iwlwifi-su-z0-" #define IWL_QU_B_JF_B_FW_PRE "iwlwifi-Qu-b0-jf-b0-" +#define IWL_QUZ_A_HR_B_FW_PRE "iwlwifi-QuZ-a0-hr-b0-" #define IWL_QNJ_B_JF_B_FW_PRE "iwlwifi-QuQnj-b0-jf-b0-" #define IWL_CC_A_FW_PRE "iwlwifi-cc-a0-" #define IWL_22000_SO_A_JF_B_FW_PRE "iwlwifi-so-a0-jf-b0-" @@ -105,8 +106,8 @@ IWL_22000_HR_A0_FW_PRE __stringify(api) ".ucode" #define IWL_22000_SU_Z0_MODULE_FIRMWARE(api) \ IWL_22000_SU_Z0_FW_PRE __stringify(api) ".ucode" -#define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \ - IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode" +#define IWL_QUZ_A_HR_B_MODULE_FIRMWARE(api) \ + IWL_QUZ_A_HR_B_FW_PRE __stringify(api) ".ucode" #define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \ IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode" #define IWL_QNJ_B_JF_B_MODULE_FIRMWARE(api) \ @@ -235,6 +236,18 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = { .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, }; +const struct iwl_cfg iwl_ax101_cfg_quz_hr = { + .name = "Intel(R) Wi-Fi 6 AX101", + .fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + const struct iwl_cfg iwl_ax200_cfg_cc = { .name = "Intel(R) Wi-Fi 6 AX200 160MHz", .fw_name_pre = IWL_CC_A_FW_PRE, @@ -444,6 +457,7 @@ MODULE_FIRMWARE(IWL_22000_HR_B_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_SU_Z0_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QNJ_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index c91b537fa7ff..93070848280a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -549,6 +549,7 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr; extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; extern const struct iwl_cfg iwl_ax101_cfg_qu_hr; +extern const struct iwl_cfg iwl_ax101_cfg_quz_hr; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; extern const struct iwl_cfg iwl_ax200_cfg_cc; extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index aea6d03e545a..e539bc94eff7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -327,6 +327,7 @@ enum { #define CSR_HW_REV_TYPE_NONE (0x00001F0) #define CSR_HW_REV_TYPE_QNJ (0x0000360) #define CSR_HW_REV_TYPE_QNJ_B0 (0x0000364) +#define CSR_HW_REV_TYPE_QUZ (0x0000354) #define CSR_HW_REV_TYPE_HR_CDB (0x0000340) #define CSR_HW_REV_TYPE_SO (0x0000370) #define CSR_HW_REV_TYPE_TY (0x0000420) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 1d6f3053f233..79c1dc05f948 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3543,6 +3543,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } else if (cfg == &iwl_ax101_cfg_qu_hr) { if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == + CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && + trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) { + trans->cfg = &iwl22000_2ax_cfg_qnj_hr_b0; + } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) { trans->cfg = &iwl_ax101_cfg_qu_hr; } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == -- cgit From a58d7525b8014115d57fd30186a84f6d30783f2c Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 12 Mar 2019 10:51:40 +0100 Subject: cfg80211: add ratelimited variants of err and warn wiphy_{err,warn}_ratelimited will be used by rt2x00 Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bb307a11ee63..13bfeb712d36 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7183,6 +7183,11 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev, #define wiphy_info(wiphy, format, args...) \ dev_info(&(wiphy)->dev, format, ##args) +#define wiphy_err_ratelimited(wiphy, format, args...) \ + dev_err_ratelimited(&(wiphy)->dev, format, ##args) +#define wiphy_warn_ratelimited(wiphy, format, args...) \ + dev_warn_ratelimited(&(wiphy)->dev, format, ##args) + #define wiphy_debug(wiphy, format, args...) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args) -- cgit From 45fcef8b727b6f171bc5443e8153181a367d7a15 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Mar 2019 08:56:22 +0100 Subject: mac80211_hwsim: calculate if_combination.max_interfaces If we just set this to 2048, and have multiple limits you can select from, the total number might run over and cause a warning in cfg80211. This doesn't make sense, so we just calculate the total max_interfaces now. Reported-by: syzbot+8f91bd563bbff230d0ee@syzkaller.appspotmail.com Fixes: 99e3a44bac37 ("mac80211_hwsim: allow setting iftype support") Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 0838af04d681..524eb5805995 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2644,7 +2644,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, enum nl80211_band band; const struct ieee80211_ops *ops = &mac80211_hwsim_ops; struct net *net; - int idx; + int idx, i; int n_limits = 0; if (WARN_ON(param->channels > 1 && !param->use_chanctx)) @@ -2768,12 +2768,23 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, goto failed_hw; } + data->if_combination.max_interfaces = 0; + for (i = 0; i < n_limits; i++) + data->if_combination.max_interfaces += + data->if_limits[i].max; + data->if_combination.n_limits = n_limits; - data->if_combination.max_interfaces = 2048; data->if_combination.limits = data->if_limits; - hw->wiphy->iface_combinations = &data->if_combination; - hw->wiphy->n_iface_combinations = 1; + /* + * If we actually were asked to support combinations, + * advertise them - if there's only a single thing like + * only IBSS then don't advertise it as combinations. + */ + if (data->if_combination.max_interfaces > 1) { + hw->wiphy->iface_combinations = &data->if_combination; + hw->wiphy->n_iface_combinations = 1; + } if (param->ciphers) { memcpy(data->ciphers, param->ciphers, -- cgit From 2b4a66980217332d91ab1785e1750857d6d52bc8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 18 Mar 2019 12:00:58 +0100 Subject: mac80211: make ieee80211_schedule_txq schedule empty TXQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently there is no way for the driver to signal to mac80211 that it should schedule a TXQ even if there are no packets on the mac80211 part of that queue. This is problematic if the driver has an internal retry queue to deal with software A-MPDU retry. This patch changes the behavior of ieee80211_schedule_txq to always schedule the queue, as its only user (ath9k) seems to expect such behavior already: it calls this function on tx status and on powersave wakeup whenever its internal retry queue is not empty. Also add an extra argument to ieee80211_return_txq to get the same behavior. This fixes an issue on ath9k where tx queues with packets to retry (and no new packets in mac80211) would not get serviced. Fixes: 89cea7493a346 ("ath9k: Switch to mac80211 TXQ scheduling and airtime APIs") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 4 ++-- drivers/net/wireless/ath/ath9k/xmit.c | 5 ++++- include/net/mac80211.h | 24 ++++++++++++++++++++---- net/mac80211/tx.c | 10 ++++++---- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index a20ea270d519..1acc622d2183 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2728,7 +2728,7 @@ static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb) num_msdus++; num_bytes += ret; } - ieee80211_return_txq(hw, txq); + ieee80211_return_txq(hw, txq, false); ieee80211_txq_schedule_end(hw, txq->ac); record->num_msdus = cpu_to_le16(num_msdus); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index b73c23d4ce86..41e89db244d2 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4089,7 +4089,7 @@ static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac) if (ret < 0) break; } - ieee80211_return_txq(hw, txq); + ieee80211_return_txq(hw, txq, false); ath10k_htt_tx_txq_update(hw, txq); if (ret == -EBUSY) break; @@ -4374,7 +4374,7 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw, if (ret < 0) break; } - ieee80211_return_txq(hw, txq); + ieee80211_return_txq(hw, txq, false); ath10k_htt_tx_txq_update(hw, txq); out: ieee80211_txq_schedule_end(hw, ac); diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 773d428ff1b0..b17e1ca40995 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1938,12 +1938,15 @@ void ath_txq_schedule(struct ath_softc *sc, struct ath_txq *txq) goto out; while ((queue = ieee80211_next_txq(hw, txq->mac80211_qnum))) { + bool force; + tid = (struct ath_atx_tid *)queue->drv_priv; ret = ath_tx_sched_aggr(sc, txq, tid); ath_dbg(common, QUEUE, "ath_tx_sched_aggr returned %d\n", ret); - ieee80211_return_txq(hw, queue); + force = !skb_queue_empty(&tid->retry_q); + ieee80211_return_txq(hw, queue, force); } out: diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 616998252dc7..112dc18c658f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6253,26 +6253,42 @@ static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) { } +void __ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, bool force); + /** * ieee80211_schedule_txq - schedule a TXQ for transmission * * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface * - * Schedules a TXQ for transmission if it is not already scheduled. + * Schedules a TXQ for transmission if it is not already scheduled, + * even if mac80211 does not have any packets buffered. + * + * The driver may call this function if it has buffered packets for + * this TXQ internally. */ -void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); +static inline void +ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +{ + __ieee80211_schedule_txq(hw, txq, true); +} /** * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() * * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface + * @force: schedule txq even if mac80211 does not have any buffered packets. + * + * The driver may set force=true if it has buffered packets for this TXQ + * internally. */ static inline void -ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, + bool force) { - ieee80211_schedule_txq(hw, txq); + __ieee80211_schedule_txq(hw, txq, force); } /** diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 134a3da147c6..2e816dd67be7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3688,8 +3688,9 @@ out: } EXPORT_SYMBOL(ieee80211_next_txq); -void ieee80211_schedule_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) +void __ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, + bool force) { struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); @@ -3697,7 +3698,8 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, spin_lock_bh(&local->active_txq_lock[txq->ac]); if (list_empty(&txqi->schedule_order) && - (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { + (force || !skb_queue_empty(&txqi->frags) || + txqi->tin.backlog_packets)) { /* If airtime accounting is active, always enqueue STAs at the * head of the list to ensure that they only get moved to the * back by the airtime DRR scheduler once they have a negative @@ -3717,7 +3719,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, spin_unlock_bh(&local->active_txq_lock[txq->ac]); } -EXPORT_SYMBOL(ieee80211_schedule_txq); +EXPORT_SYMBOL(__ieee80211_schedule_txq); bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) -- cgit From 78ad2341521d5ea96cb936244ed4c4c4ef9ec13b Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Sat, 9 Feb 2019 15:01:38 +0100 Subject: mac80211: Honor SW_CRYPTO_CONTROL for unicast keys in AP VLAN mode Restore SW_CRYPTO_CONTROL operation on AP_VLAN interfaces for unicast keys, the original override was intended to be done for group keys as those are treated specially by mac80211 and would always have been rejected. Now the situation is that AP_VLAN support must be enabled by the driver if it can support it (meaning it can support software crypto GTK TX). Thus, also simplify the code - if we get here with AP_VLAN and non- pairwise key, software crypto must be used (driver doesn't know about the interface) and can be used (driver must've advertised AP_VLAN if it also uses SW_CRYPTO_CONTROL). Fixes: db3bdcb9c3ff ("mac80211: allow AP_VLAN operation on crypto controlled devices") Signed-off-by: Alexander Wetzel [rewrite commit message] Signed-off-by: Johannes Berg --- net/mac80211/key.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 4700718e010f..37e372896230 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) * The driver doesn't know anything about VLAN interfaces. * Hence, don't send GTKs for VLAN interfaces to the driver. */ - if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) + if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + ret = 1; goto out_unsupported; + } } ret = drv_set_key(key->local, SET_KEY, sdata, @@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) /* all of these we can do in software - if driver can */ if (ret == 1) return 0; - if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return 0; + if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; - } return 0; default: return -EINVAL; -- cgit From 31634bf5dcc418b5b2cacd954394c0c4620db6a2 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Mar 2019 22:09:05 -0700 Subject: net/mlx5: FPGA, tls, hold rcu read lock a bit longer To avoid use-after-free, hold the rcu read lock until we are done copying flow data into the command buffer. Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") Reported-by: Eric Dumazet Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 8de64e88c670..08aa7266c8c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -217,22 +217,22 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, void *cmd; int ret; - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - rcu_read_unlock(); - - if (!flow) { - WARN_ONCE(1, "Received NULL pointer for handle\n"); - return -EINVAL; - } - buf = kzalloc(size, GFP_ATOMIC); if (!buf) return -ENOMEM; cmd = (buf + 1); + rcu_read_lock(); + flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); + if (unlikely(!flow)) { + rcu_read_unlock(); + WARN_ONCE(1, "Received NULL pointer for handle\n"); + kfree(buf); + return -EINVAL; + } mlx5_fpga_tls_flow_to_cmd(flow, cmd); + rcu_read_unlock(); MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); -- cgit From df3a8344d404a810b4aadbf19b08c8232fbaa715 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Mar 2019 01:05:41 -0700 Subject: net/mlx5: FPGA, tls, idr remove on flow delete Flow is kfreed on mlx5_fpga_tls_del_flow but kept in the idr data structure, this is risky and can cause use-after-free, since the idr_remove is delayed until tls_send_teardown_cmd completion. Instead of delaying idr_remove, in this patch we do it on mlx5_fpga_tls_del_flow, before actually kfree(flow). Added synchronize_rcu before kfree(flow) Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 43 ++++++++-------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 08aa7266c8c0..22a2ef111514 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, return ret; } -static void mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) +static void *mlx5_fpga_tls_release_swid(struct idr *idr, + spinlock_t *idr_spinlock, u32 swid) { unsigned long flags; + void *ptr; spin_lock_irqsave(idr_spinlock, flags); - idr_remove(idr, swid); + ptr = idr_remove(idr, swid); spin_unlock_irqrestore(idr_spinlock, flags); + return ptr; } static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, @@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, kfree(buf); } -struct mlx5_teardown_stream_context { - struct mlx5_fpga_tls_command_context cmd; - u32 swid; -}; - static void mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, struct mlx5_fpga_tls_command_context *cmd, struct mlx5_fpga_dma_buf *resp) { - struct mlx5_teardown_stream_context *ctx = - container_of(cmd, struct mlx5_teardown_stream_context, cmd); - if (resp) { u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); @@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, mlx5_fpga_err(fdev, "Teardown stream failed with syndrome = %d", syndrome); - else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx)) - mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr, - &fdev->tls->tx_idr_spinlock, - ctx->swid); - else - mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr, - &fdev->tls->rx_idr_spinlock, - ctx->swid); } mlx5_fpga_tls_put_command_ctx(cmd); } @@ -253,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow, u32 swid, gfp_t flags) { - struct mlx5_teardown_stream_context *ctx; + struct mlx5_fpga_tls_command_context *ctx; struct mlx5_fpga_dma_buf *buf; void *cmd; @@ -261,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, if (!ctx) return; - buf = &ctx->cmd.buf; + buf = &ctx->buf; cmd = (ctx + 1); MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); MLX5_SET(tls_cmd, cmd, swid, swid); @@ -272,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, buf->sg[0].data = cmd; buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - ctx->swid = swid; - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, mlx5_fpga_tls_teardown_completion); } @@ -283,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, struct mlx5_fpga_tls *tls = mdev->fpga->tls; void *flow; - rcu_read_lock(); if (direction_sx) - flow = idr_find(&tls->tx_idr, swid); + flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, + swid); else - flow = idr_find(&tls->rx_idr, swid); - - rcu_read_unlock(); + flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, + swid); if (!flow) { mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", @@ -297,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, return; } + synchronize_rcu(); /* before kfree(flow) */ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); } -- cgit From 192fba79822d9612af5ccd3f8aa05c922640ee13 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 28 Mar 2019 10:00:35 +0200 Subject: net/mlx5e: Skip un-needed tx recover if interface state is down Skip recover operation if interface is in down state as TX objects are not open. This fixes a bug were the recover flow re-opened TX objects which were not opened before, leading to a possible memory leak at driver unload. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 9d38e62cdf24..a85843e4e925 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -186,12 +186,18 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) { - int err; + int err = 0; rtnl_lock(); mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + mlx5e_close_locked(priv->netdev); err = mlx5e_open_locked(priv->netdev); + +out: mutex_unlock(&priv->state_lock); rtnl_unlock(); -- cgit From 484c1ada0bd2bdcb76f849ae77983e24320a2d1d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 28 Mar 2019 14:26:47 +0200 Subject: net/mlx5e: Use fail-safe channels reopen in tx reporter recover When requested to recover from error, the tx reporter might open new channels and close the existing ones. Use safe channels switch flow in order to guarantee opened channels at the end of the recover flow. For this purpose, define mlx5e_safe_reopen_channels function and use it within those flows. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 71c65cc17904..d3eaf2ceaa39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -858,6 +858,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs); * switching channels */ typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, mlx5e_fp_hw_modify hw_modify); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index a85843e4e925..476dd97f7f2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -194,8 +194,7 @@ static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto out; - mlx5e_close_locked(priv->netdev); - err = mlx5e_open_locked(priv->netdev); + err = mlx5e_safe_reopen_channels(priv); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b5fdbd3190d9..002e2adb3722 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2937,6 +2937,14 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, return 0; } +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); +} + void mlx5e_timestamp_init(struct mlx5e_priv *priv) { priv->tstamp.tx_type = HWTSTAMP_TX_OFF; @@ -4161,11 +4169,10 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) if (!report_failed) goto unlock; - mlx5e_close_locked(priv->netdev); - err = mlx5e_open_locked(priv->netdev); + err = mlx5e_safe_reopen_channels(priv); if (err) netdev_err(priv->netdev, - "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", err); unlock: -- cgit From 5d0bb3bac4b9f6c22280b04545626fdfd99edc6b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 21 Mar 2019 19:07:20 -0700 Subject: net/mlx5e: XDP, Avoid checksum complete when XDP prog is loaded XDP programs might change packets data contents which will make the reported skb checksum (checksum complete) invalid. When XDP programs are loaded/unloaded set/clear rx RQs MLX5E_RQ_STATE_NO_CSUM_COMPLETE flag. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5efce4a3ff79..76a3d01a489e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1768,7 +1768,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) struct mlx5e_channel *c; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) return 0; for (i = 0; i < channels->num; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 002e2adb3722..4187c43b20ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -951,7 +951,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)) + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3dde5c7e0739..91af48344743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -752,7 +752,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) goto csum_unnecessary; /* CQE csum doesn't cover padding octets in short ethernet -- cgit From 0aa1d18615c163f92935b806dcaff9157645233a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 12 Mar 2019 00:24:52 -0700 Subject: net/mlx5e: Rx, Fixup skb checksum for packets with tail padding When an ethernet frame with ip payload is padded, the padding octets are not covered by the hardware checksum. Prior to the cited commit, skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, the kernel will try to trim the padding bytes and subtract their checksum from skb->csum. In this patch we fixup skb->csum for any ip packet with tail padding of any size, if any padding found. FCS case is just one special case of this general purpose patch, hence, it is removed. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), Cc: Eric Dumazet Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 79 ++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 6 ++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 ++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 91af48344743..0e254de23f3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -712,17 +712,6 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) rq->stats->ecn_mark += !!rc; } -static u32 mlx5e_get_fcs(const struct sk_buff *skb) -{ - const void *fcs_bytes; - u32 _fcs_bytes; - - fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, - ETH_FCS_LEN, &_fcs_bytes); - - return __get_unaligned_cpu32(fcs_bytes); -} - static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { void *ip_p = skb->data + network_depth; @@ -733,6 +722,68 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) +#define MAX_PADDING 8 + +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); +} + +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) +{ + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; + + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); +} + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -781,10 +832,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); - if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_block_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb), - skb->len - ETH_FCS_LEN); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); stats->csum_complete++; return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1a78e05cbba8..b75aa8b8bf04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -59,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, @@ -151,6 +153,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; @@ -1190,6 +1194,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 4640d4f986f8..16c3b785f282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,6 +71,8 @@ struct mlx5e_sw_stats { u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; u64 rx_xdp_redirect; @@ -181,6 +183,8 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; -- cgit From 0318a7b7fcad9765931146efa7ca3a034194737c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 25 Mar 2019 22:10:59 -0700 Subject: net/mlx5e: Rx, Check ip headers sanity In the two places is_last_ethertype_ip is being called, the caller will be looking inside the ip header, to be safe, add ip{4,6} header sanity check. And return true only on valid ip headers, i.e: the whole header is contained in the linear part of the skb. Note: Such situation is very rare and hard to reproduce, since mlx5e allocates a large enough headroom to contain the largest header one can imagine. Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets") Reported-by: Cong Wang Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0e254de23f3d..36fd628188bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -692,7 +692,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, { *proto = ((struct ethhdr *)skb->data)->h_proto; *proto = __vlan_get_protocol(skb, *proto, network_depth); - return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; } static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) -- cgit From 5e0060b1491b299b1706414e61ede0b02265680e Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Fri, 29 Mar 2019 12:50:37 +0000 Subject: net/mlx5e: Protect against non-uplink representor for encap TC encap offload is supported only for the physical uplink representor. Fail for non uplink representor. Fixes: 3e621b19b0bb ("net/mlx5e: Support TC encapsulation offloads with upper devices") Signed-off-by: Dmytro Linkin Reviewed-by: Eli Britstein Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index fa2a3c444cdc..eec07b34b4ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -39,6 +39,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (!(mlx5e_eswitch_rep(*out_dev) && + mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + return -EOPNOTSUPP; + return 0; } -- cgit From 8c8811d46d00d119ffbe039a6e52a0b504df1c2c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 31 Mar 2019 12:53:03 +0000 Subject: Revert "net/mlx5e: Enable reporting checksum unnecessary also for L3 packets" This reverts commit b820e6fb0978f9c2ac438c199d2bb2f35950e9c9. Prior the commit we are reverting, checksum unnecessary was only set when both the L3 OK and L4 OK bits are set on the CQE. This caused packets of IP protocols such as SCTP which are not dealt by the current HW L4 parser (hence the L4 OK bit is not set, but the L4 header type none bit is set) to go through the checksum none code, where currently we wrongly report checksum unnecessary for them, a regression. Fix this by a revert. Note that on our usual track we report checksum complete, so the revert isn't expected to have any notable performance impact. Also, when we are not on the checksum complete track, the L4 protocols for which we report checksum none are not high performance ones, we will still report checksum unnecessary for UDP/TCP. Fixes: b820e6fb0978 ("net/mlx5e: Enable reporting checksum unnecessary also for L3 packets") Signed-off-by: Or Gerlitz Reported-by: Avi Urman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 36fd628188bc..c3b3002ff62f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -847,8 +847,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, csum_unnecessary: if (likely((cqe->hds_ip_ext & CQE_L3_OK) && - ((cqe->hds_ip_ext & CQE_L4_OK) || - (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) { + (cqe->hds_ip_ext & CQE_L4_OK))) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; -- cgit From 7ee2ace9c544a0886e02b54b625e521df8692d20 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 31 Aug 2018 14:29:16 +0300 Subject: net/mlx5e: Switch to Toeplitz RSS hash by default Although XOR hash function can perform very well on some special use cases, to align with all drivers, mlx5 driver should use Toeplitz hash by default. Toeplitz is more stable for the general use case and it is more standard and reliable. On top of that, since XOR (MLX5_RX_HASH_FN_INVERTED_XOR8) gives only a repeated 8 bits pattern. When used for udp tunneling RSS source port manipulation it results in fixed source port, which will cause bad RSS spread. Fixes: 2be6967cdbc9 ("net/mlx5e: Support ETH_RSS_HASH_XOR") Signed-off-by: Konstantin Khlebnikov Reviewed-by: Tariq Toukan Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4187c43b20ad..f7eb521db580 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4564,7 +4564,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, { enum mlx5e_traffic_types tt; - rss_params->hfunc = ETH_RSS_HASH_XOR; + rss_params->hfunc = ETH_RSS_HASH_TOP; netdev_rss_key_fill(rss_params->toeplitz_hash_key, sizeof(rss_params->toeplitz_hash_key)); mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, -- cgit From d4d0e40977ac450f32f2db5e4d8e23c9d2578899 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:12 +0000 Subject: mlxsw: spectrum_switchdev: Add MDB entries in prepare phase The driver cannot guarantee in the prepare phase that it will be able to write an MDB entry to the device. In case the driver returned success during the prepare phase, but then failed to add the entry in the commit phase, a WARNING [1] will be generated by the switchdev core. Fix this by doing the work in the prepare phase instead. [1] [ 358.544486] swp12s0: Commit of object (id=2) failed. [ 358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0 [ 358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350 [ 358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 358.580582] Workqueue: events switchdev_deferred_process_work [ 358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0 ... [ 358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286 [ 358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000 [ 358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8 [ 358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000 [ 358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000 [ 358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200 [ 358.659790] FS: 0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000 [ 358.668820] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0 [ 358.683188] Call Trace: [ 358.685918] switchdev_port_obj_add_deferred+0x13/0x60 [ 358.691655] switchdev_deferred_process+0x6b/0xf0 [ 358.696907] switchdev_deferred_process_work+0xa/0x10 [ 358.702548] process_one_work+0x1f5/0x3f0 [ 358.707022] worker_thread+0x28/0x3c0 [ 358.711099] ? process_one_work+0x3f0/0x3f0 [ 358.715768] kthread+0x10d/0x130 [ 358.719369] ? __kthread_create_on_node+0x180/0x180 [ 358.724815] ret_from_fork+0x35/0x40 Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel Reported-by: Alex Kushnarov Tested-by: Alex Kushnarov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f6ce386c3036..50111f228d77 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1630,7 +1630,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_index; int err = 0; - if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_commit(trans)) return 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); -- cgit From a8c133b06183c529c51cd0d54eb57d6b7078370c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:13 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for EMAD workqueue The EMAD workqueue is used to handle retransmission of EMAD packets that contain configuration data for the device's firmware. Given the workers need to allocate these packets and that the code is not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM flag. Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index d23d53c0e284..91cd6fa42e9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; - emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); if (!emad_wq) return -ENOMEM; mlxsw_core->emad_wq = emad_wq; -- cgit From 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:14 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw ordered workqueue The ordered workqueue is used to offload various objects such as routes and neighbours in the order they are notified. It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker tries to flush a non-WQ_MEM_RECLAIM workqueue. [1] [97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker [97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130 ... [97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 [97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] [97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130 ... [97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086 [97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000 [97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046 [97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0 [97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0 [97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0 [97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000 [97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0 [97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [97703.543115] Call Trace: [97703.543129] __flush_work+0xbd/0x1e0 [97703.543137] ? __cancel_work_timer+0x136/0x1b0 [97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0 [97703.543154] __cancel_work_timer+0x136/0x1b0 [97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core] [97703.543184] cancel_work_sync+0x10/0x20 [97703.543191] rhashtable_free_and_destroy+0x23/0x140 [97703.543198] rhashtable_destroy+0xd/0x10 [97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum] [97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum] [97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum] [97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum] [97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum] [97703.543484] process_one_work+0x1fd/0x400 [97703.543493] worker_thread+0x34/0x410 [97703.543500] kthread+0x121/0x140 [97703.543507] ? process_one_work+0x400/0x400 [97703.543512] ? kthread_park+0x90/0x90 [97703.543523] ret_from_fork+0x35/0x40 Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload") Signed-off-by: Ido Schimmel Reported-by: Semion Lisyansky Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 91cd6fa42e9a..2c0e3281bd7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1961,7 +1961,7 @@ static int __init mlxsw_core_module_init(void) mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); if (!mlxsw_wq) return -ENOMEM; - mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM, + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, mlxsw_core_driver_name); if (!mlxsw_owq) { err = -ENOMEM; -- cgit From b442fed1b724af0de087912a5718ddde1b87acbb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:15 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw workqueue The workqueue is used to periodically update the networking stack about activity / statistics of various objects such as neighbours and TC actions. It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 2c0e3281bd7a..f26a4ca29363 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1958,7 +1958,7 @@ static int __init mlxsw_core_module_init(void) { int err; - mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); if (!mlxsw_wq) return -ENOMEM; mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, -- cgit From 972fae683cbad5cf348268e76abc6d55cfb3ba87 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:15 +0000 Subject: mlxsw: spectrum_router: Do not check VRF MAC address Commit 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") enabled the driver to veto router interface (RIF) MAC addresses that it cannot support. This check should only be performed for interfaces for which the driver actually configures a RIF. A VRF upper is not one of them, so ignore it. Without this patch it is not possible to set an IP address on the VRF device and use it as a loopback. Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Tested-by: Alexander Petrovskiy Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 52fed8c7bf1e..902e766a8ed3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6781,7 +6781,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, /* A RIF is not created for macvlan netdevs. Their MAC is used to * populate the FDB */ - if (netif_is_macvlan(dev)) + if (netif_is_macvlan(dev) || netif_is_l3_master(dev)) return 0; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { -- cgit From 7052e2436373cc2c46981e165d1cbc5023f20dd7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:16 +0000 Subject: selftests: mlxsw: Test VRF MAC vetoing Test that it is possible to set an IP address on a VRF and that it is not vetoed. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/rtnetlink.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index c4cf6e6d800e..a6c196c8534c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -11,6 +11,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" rif_set_addr_test + rif_vrf_set_addr_test rif_inherit_bridge_addr_test rif_non_inherit_bridge_addr_test vlan_interface_deletion_test @@ -98,6 +99,25 @@ rif_set_addr_test() ip link set dev $swp1 addr $swp1_mac } +rif_vrf_set_addr_test() +{ + # Test that it is possible to set an IP address on a VRF upper despite + # its random MAC address. + RET=0 + + ip link add name vrf-test type vrf table 10 + ip link set dev $swp1 master vrf-test + + ip -4 address add 192.0.2.1/24 dev vrf-test + check_err $? "failed to set IPv4 address on VRF" + ip -6 address add 2001:db8:1::1/64 dev vrf-test + check_err $? "failed to set IPv6 address on VRF" + + log_test "RIF - setting IP address on VRF" + + ip link del dev vrf-test +} + rif_inherit_bridge_addr_test() { RET=0 -- cgit From d5949d92c29ce147a9cb9e21fcf8ad7c1ff327b1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:17 +0000 Subject: mlxsw: spectrum_buffers: Add a multicast pool for Spectrum-2 In Spectrum-1, when a multicast packet is admitted to the shared buffer it increases the quotas of all the ports and {port, TC} to which it is forwarded to. The above means that multicast packets are accounted multiple times in the shared buffer and can therefore cause the associated shared buffer pool to fill up very quickly. To work around this issue, commit e83c045e53d7 ("mlxsw: spectrum_buffers: Configure MC pool") added a dedicated multicast pool in which multicast packets are accounted. The issue is not present in Spectrum-2, but in order to be backward compatible with Spectrum-1, its default behavior is to allow a multicast packet to increase multiple egress quotas instead of one. Until the new (non-backward compatible) mode is supported, configure a dedicated multicast pool as in Spectrum-1. Fixes: fe099bf682ab ("mlxsw: spectrum_buffers: Add Spectrum-2 shared buffer configuration") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 9a79b5e11597..d633bef5f105 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -70,6 +70,7 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = { {MLXSW_REG_SBXX_DIR_EGRESS, 1}, {MLXSW_REG_SBXX_DIR_EGRESS, 2}, {MLXSW_REG_SBXX_DIR_EGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 15}, }; #define MLXSW_SP_SB_ING_TC_COUNT 8 @@ -428,6 +429,7 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, @@ -517,14 +519,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = { MLXSW_SP_SB_CM(0, 7, 4), MLXSW_SP_SB_CM(0, 7, 4), MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), - MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), MLXSW_SP_SB_CM(1, 0xff, 4), }; @@ -671,6 +673,7 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = { MLXSW_SP_SB_PM(0, 0), MLXSW_SP_SB_PM(0, 0), MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(10000, 90000), }; static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) -- cgit From b66b7bd2bdc1a74c46a0a470f9ac19629320d212 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 10 Apr 2019 11:06:59 -0500 Subject: ibmvnic: Enable GRO Enable Generic Receive Offload in the ibmvnic driver. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 51cfe95f3e24..cc22c5351513 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3837,7 +3837,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) adapter->ip_offload_ctrl.large_rx_ipv4 = 0; adapter->ip_offload_ctrl.large_rx_ipv6 = 0; - adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO; + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) adapter->netdev->features |= NETIF_F_IP_CSUM; -- cgit From dde746a35f8b7da4b9515dd3dc4708a9926fbd65 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 10 Apr 2019 11:07:00 -0500 Subject: ibmvnic: Fix netdev feature clobbering during a reset While determining offload capabilities of backing hardware during a device reset, the driver is clobbering current feature settings. Update hw_features on reset instead of features unless a feature is enabled that is no longer supported on the current backing device. Also enable features that were not supported prior to the reset but were previously enabled or requested by the user. This can occur if the reset is the result of a carrier change, such as a device failover or partition migration. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cc22c5351513..3dfb2d131eb7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3762,6 +3762,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + netdev_features_t old_hw_features = 0; union ibmvnic_crq crq; int i; @@ -3837,24 +3838,41 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) adapter->ip_offload_ctrl.large_rx_ipv4 = 0; adapter->ip_offload_ctrl.large_rx_ipv6 = 0; + if (adapter->state != VNIC_PROBING) { + old_hw_features = adapter->netdev->hw_features; + adapter->netdev->hw_features = 0; + } + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) - adapter->netdev->features |= NETIF_F_IP_CSUM; + adapter->netdev->hw_features |= NETIF_F_IP_CSUM; if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) - adapter->netdev->features |= NETIF_F_IPV6_CSUM; + adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; if ((adapter->netdev->features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) - adapter->netdev->features |= NETIF_F_RXCSUM; + adapter->netdev->hw_features |= NETIF_F_RXCSUM; if (buf->large_tx_ipv4) - adapter->netdev->features |= NETIF_F_TSO; + adapter->netdev->hw_features |= NETIF_F_TSO; if (buf->large_tx_ipv6) - adapter->netdev->features |= NETIF_F_TSO6; + adapter->netdev->hw_features |= NETIF_F_TSO6; - adapter->netdev->hw_features |= adapter->netdev->features; + if (adapter->state == VNIC_PROBING) { + adapter->netdev->features |= adapter->netdev->hw_features; + } else if (old_hw_features != adapter->netdev->hw_features) { + netdev_features_t tmp = 0; + + /* disable features no longer supported */ + adapter->netdev->features &= adapter->netdev->hw_features; + /* turn on features now supported if previously enabled */ + tmp = (old_hw_features ^ adapter->netdev->hw_features) & + adapter->netdev->hw_features; + adapter->netdev->features |= + tmp & adapter->netdev->wanted_features; + } memset(&crq, 0, sizeof(crq)); crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; -- cgit From 5a03bc73abed6ae196c15e9950afde19d48be12c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:30 -0700 Subject: net/tls: fix the IV leaks Commit f66de3ee2c16 ("net/tls: Split conf to rx + tx") made freeing of IV and record sequence number conditional to SW path only, but commit e8f69799810c ("net/tls: Add generic NIC offload infrastructure") also allocates that state for the device offload configuration. Remember to free it. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 135a7ee9db03..38b3b2a9835a 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock); static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) + if (ctx->tx_conf == TLS_HW) { kfree(tls_offload_ctx_tx(ctx)); + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); -- cgit From 35b71a34ada62c9573847a324bf06a133fe11b11 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:31 -0700 Subject: net/tls: don't leak partially sent record in device mode David reports that tls triggers warnings related to sk->sk_forward_alloc not being zero at destruction time: WARNING: CPU: 5 PID: 6831 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110 WARNING: CPU: 5 PID: 6831 at net/ipv4/af_inet.c:160 inet_sock_destruct+0x15b/0x170 When sender fills up the write buffer and dies from SIGPIPE. This is due to the device implementation not cleaning up the partially_sent_record. This is because commit a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") moved the partial record cleanup to the SW-only path. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: David Beckett Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/tls.h | 2 ++ net/tls/tls_device.c | 7 +++++++ net/tls/tls_main.c | 22 ++++++++++++++++++++++ net/tls/tls_sw.c | 15 +-------------- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index a5a938583295..c7f7dc344e73 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -307,6 +307,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_device_sk_destruct(struct sock *sk); +void tls_device_free_resources_tx(struct sock *sk); void tls_device_init(void); void tls_device_cleanup(void); int tls_tx_records(struct sock *sk, int flags); @@ -330,6 +331,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx, int flags); int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, int flags); +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 38b3b2a9835a..9f3bdbc1e593 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -219,6 +219,13 @@ void tls_device_sk_destruct(struct sock *sk) } EXPORT_SYMBOL(tls_device_sk_destruct); +void tls_device_free_resources_tx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + tls_free_partial_record(sk, tls_ctx); +} + static void tls_append_frag(struct tls_record_info *record, struct page_frag *pfrag, int size) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index df921a2904b9..a3cca1ef0098 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -208,6 +208,26 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, return tls_push_sg(sk, ctx, sg, offset, flags); } +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx) +{ + struct scatterlist *sg; + + sg = ctx->partially_sent_record; + if (!sg) + return false; + + while (1) { + put_page(sg_page(sg)); + sk_mem_uncharge(sk, sg->length); + + if (sg_is_last(sg)) + break; + sg++; + } + ctx->partially_sent_record = NULL; + return true; +} + static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); @@ -267,6 +287,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_free_resources_tx(sk); + } else if (ctx->tx_conf == TLS_HW) { + tls_device_free_resources_tx(sk); } if (ctx->rx_conf == TLS_SW) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 20b191227969..b50ced862f6f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2052,20 +2052,7 @@ void tls_sw_free_resources_tx(struct sock *sk) /* Free up un-sent records in tx_list. First, free * the partially sent record if any at head of tx_list. */ - if (tls_ctx->partially_sent_record) { - struct scatterlist *sg = tls_ctx->partially_sent_record; - - while (1) { - put_page(sg_page(sg)); - sk_mem_uncharge(sk, sg->length); - - if (sg_is_last(sg)) - break; - sg++; - } - - tls_ctx->partially_sent_record = NULL; - + if (tls_free_partial_record(sk, tls_ctx)) { rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); list_del(&rec->list); -- cgit From 4a9c2e3746e6151fd5d077259d79ce9ca86d47d7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:32 -0700 Subject: net: strparser: partially revert "strparser: Call skb_unclone conditionally" This reverts the first part of commit 4e485d06bb8c ("strparser: Call skb_unclone conditionally"). To build a message with multiple fragments we need our own root of frag_list. We can't simply use the frag_list of orig_skb, because it will lead to linking all orig_skbs together creating very long frag chains, and causing stack overflow on kfree_skb() (which is called recursively on the frag_lists). BUG: stack guard page was hit at 00000000d40fad41 (stack is 0000000029dde9f4..000000008cce03d5) kernel stack overflow (double-fault): 0000 [#1] PREEMPT SMP RIP: 0010:free_one_page+0x2b/0x490 Call Trace: __free_pages_ok+0x143/0x2c0 skb_release_data+0x8e/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 [...] skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 __kfree_skb+0xe/0x20 tcp_disconnect+0xd6/0x4d0 tcp_close+0xf4/0x430 ? tcp_check_oom+0xf0/0xf0 tls_sk_proto_close+0xe4/0x1e0 [tls] inet_release+0x36/0x60 __sock_release+0x37/0xa0 sock_close+0x11/0x20 __fput+0xa2/0x1d0 task_work_run+0x89/0xb0 exit_to_usermode_loop+0x9a/0xa0 do_syscall_64+0xc0/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Let's leave the second unclone conditional, as I'm not entirely sure what is its purpose :) Fixes: 4e485d06bb8c ("strparser: Call skb_unclone conditionally") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/strparser/strparser.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 860dcfb95ee4..fa6c977b4c41 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ - if (skb_has_frag_list(head)) { - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - STRP_STATS_INCR(strp->stats.mem_fail); - desc->error = err; - return 0; - } + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.mem_fail); + desc->error = err; + return 0; } if (unlikely(skb_shinfo(head)->frag_list)) { -- cgit From 903f1a187776bb8d79b13618ec05b25f86318885 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 16:23:39 -0700 Subject: net/tls: fix build without CONFIG_TLS_DEVICE buildbot noticed that TLS_HW is not defined if CONFIG_TLS_DEVICE=n. Wrap the cleanup branch into an ifdef, tls_device_free_resources_tx() wouldn't be compiled either in this case. Fixes: 35b71a34ada6 ("net/tls: don't leak partially sent record in device mode") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a3cca1ef0098..9547cea0ce3b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -287,8 +287,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_free_resources_tx(sk); +#ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); +#endif } if (ctx->rx_conf == TLS_SW) { -- cgit From 43c2adb9df7ddd6560fd3546d925b42cef92daa0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 8 Apr 2019 16:45:17 +0800 Subject: team: set slave to promisc if team is already in promisc mode After adding a team interface to bridge, the team interface will enter promisc mode. Then if we add a new slave to team0, the slave will keep promisc off. Fix it by setting slave to promisc on if team master is already in promisc mode, also do the same for allmulti. v2: add promisc and allmulti checking when delete ports Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/team/team.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6ed96fdfd96d..9ce61b019aad 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1246,6 +1246,23 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_option_port_add; } + /* set promiscuity level to new slave */ + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(port_dev, 1); + if (err) + goto err_set_slave_promisc; + } + + /* set allmulti level to new slave */ + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(port_dev, 1); + if (err) { + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(port_dev, -1); + goto err_set_slave_promisc; + } + } + netif_addr_lock_bh(dev); dev_uc_sync_multiple(port_dev, dev); dev_mc_sync_multiple(port_dev, dev); @@ -1262,6 +1279,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev, return 0; +err_set_slave_promisc: + __team_option_inst_del_port(team, port); + err_option_port_add: team_upper_dev_unlink(team, port); @@ -1307,6 +1327,12 @@ static int team_port_del(struct team *team, struct net_device *port_dev) team_port_disable(team, port); list_del_rcu(&port->list); + + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(port_dev, -1); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(port_dev, -1); + team_upper_dev_unlink(team, port); netdev_rx_handler_unregister(port_dev); team_port_disable_netpoll(port); -- cgit From 8065a779f17e94536a1c4dcee4f9d88011672f97 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Mon, 8 Apr 2019 19:45:27 -0400 Subject: failover: allow name change on IFF_UP slave interfaces When a netdev appears through hot plug then gets enslaved by a failover master that is already up and running, the slave will be opened right away after getting enslaved. Today there's a race that userspace (udev) may fail to rename the slave if the kernel (net_failover) opens the slave earlier than when the userspace rename happens. Unlike bond or team, the primary slave of failover can't be renamed by userspace ahead of time, since the kernel initiated auto-enslavement is unable to, or rather, is never meant to be synchronized with the rename request from userspace. As the failover slave interfaces are not designed to be operated directly by userspace apps: IP configuration, filter rules with regard to network traffic passing and etc., should all be done on master interface. In general, userspace apps only care about the name of master interface, while slave names are less important as long as admin users can see reliable names that may carry other information describing the netdev. For e.g., they can infer that "ens3nsby" is a standby slave of "ens3", while for a name like "eth0" they can't tell which master it belongs to. Historically the name of IFF_UP interface can't be changed because there might be admin script or management software that is already relying on such behavior and assumes that the slave name can't be changed once UP. But failover is special: with the in-kernel auto-enslavement mechanism, the userspace expectation for device enumeration and bring-up order is already broken. Previously initramfs and various userspace config tools were modified to bypass failover slaves because of auto-enslavement and duplicate MAC address. Similarly, in case that users care about seeing reliable slave name, the new type of failover slaves needs to be taken care of specifically in userspace anyway. It's less risky to lift up the rename restriction on failover slave which is already UP. Although it's possible this change may potentially break userspace component (most likely configuration scripts or management software) that assumes slave name can't be changed while UP, it's relatively a limited and controllable set among all userspace components, which can be fixed specifically to listen for the rename events on failover slaves. Userspace component interacting with slaves is expected to be changed to operate on failover master interface instead, as the failover slave is dynamic in nature which may come and go at any point. The goal is to make the role of failover slaves less relevant, and userspace components should only deal with failover master in the long run. Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module") Signed-off-by: Si-Wei Liu Reviewed-by: Liran Alon Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 16 +++++++++++++++- net/core/failover.c | 6 +++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 26f69cf763f4..324e872c91d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1500,6 +1500,7 @@ struct net_device_ops { * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device + * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1532,6 +1533,7 @@ enum netdev_priv_flags { IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, + IFF_LIVE_RENAME_OK = 1<<30, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1563,6 +1565,7 @@ enum netdev_priv_flags { #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER +#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK /** * struct net_device - The DEVICE structure. diff --git a/net/core/dev.c b/net/core/dev.c index fdcff29df915..f409406254dd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *dev, const char *newname) BUG_ON(!dev_net(dev)); net = dev_net(dev); - if (dev->flags & IFF_UP) + + /* Some auto-enslaved devices e.g. failover slaves are + * special, as userspace might rename the device after + * the interface had been brought up and running since + * the point kernel initiated auto-enslavement. Allow + * live name change even when these slave devices are + * up and running. + * + * Typically, users of these auto-enslaving devices + * don't actually care about slave name change, as + * they are supposed to operate on master interface + * directly. + */ + if (dev->flags & IFF_UP && + likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK))) return -EBUSY; write_seqcount_begin(&devnet_rename_seq); diff --git a/net/core/failover.c b/net/core/failover.c index 4a92a98ccce9..b5cd3c727285 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev) goto err_upper_link; } - slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; + slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_register && !fops->slave_register(slave_dev, failover_dev)) return NOTIFY_OK; netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); err_upper_link: netdev_rx_handler_unregister(slave_dev); done: @@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev) netdev_rx_handler_unregister(slave_dev); netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_unregister && !fops->slave_unregister(slave_dev, failover_dev)) -- cgit From b4f47f3848eb70986f75d06112af7b48b7f5f462 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 8 Apr 2019 17:59:50 -0700 Subject: net/tls: prevent bad memory access in tls_is_sk_tx_device_offloaded() Unlike '&&' operator, the '&' does not have short-circuit evaluation semantics. IOW both sides of the operator always get evaluated. Fix the wrong operator in tls_is_sk_tx_device_offloaded(), which would lead to out-of-bounds access for for non-full sockets. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index c7f7dc344e73..5934246b2c6f 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -381,7 +381,7 @@ tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) { #ifdef CONFIG_SOCK_VALIDATE_XMIT - return sk_fullsock(sk) & + return sk_fullsock(sk) && (smp_load_acquire(&sk->sk_validate_xmit_skb) == &tls_validate_xmit_skb); #else -- cgit From 813dbeb656d6c90266f251d8bd2b02d445afa63f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 9 Apr 2019 12:10:25 +0800 Subject: vhost: reject zero size iova range We used to accept zero size iova range which will lead a infinite loop in translate_desc(). Fixing this by failing the request in this case. Reported-by: syzbot+d21e6e297322a900c128@syzkaller.appspotmail.com Fixes: 6b1e6cc7 ("vhost: new device IOTLB API") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5ace833de746..351af88231ad 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -911,8 +911,12 @@ static int vhost_new_umem_range(struct vhost_umem *umem, u64 start, u64 size, u64 end, u64 userspace_addr, int perm) { - struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); + struct vhost_umem_node *tmp, *node; + if (!size) + return -EFAULT; + + node = kmalloc(sizeof(*node), GFP_ATOMIC); if (!node) return -ENOMEM; -- cgit From d1841533e54876f152a30ac398a34f47ad6590b1 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Tue, 9 Apr 2019 14:59:24 +0700 Subject: tipc: missing entries in name table of publications When binding multiple services with specific type 1Ki, 2Ki.., this leads to some entries in the name table of publications missing when listed out via 'tipc name show'. The problem is at identify zero last_type conditional provided via netlink. The first is initial 'type' when starting name table dummping. The second is continuously with zero type (node state service type). Then, lookup function failure to finding node state service type in next iteration. To solve this, adding more conditional to marked as dirty type and lookup correct service type for the next iteration instead of select the first service as initial 'type' zero. Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/name_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bff241f03525..89993afe0fbd 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, for (; i < TIPC_NAMETBL_SIZE; i++) { head = &tn->nametbl->services[i]; - if (*last_type) { + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { service = tipc_service_find(net, *last_type); if (!service) return -EPIPE; -- cgit From 988dc4a9a3b66be75b30405a5494faf0dc7cffb6 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 9 Apr 2019 11:47:20 +0200 Subject: net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv gue tunnels run iptunnel_pull_offloads on received skbs. This can determine a possible use-after-free accessing guehdr pointer since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device) Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/fou.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 79e98e21cdd7..12ce6c526d72 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) struct guehdr *guehdr; void *data; u16 doffset = 0; + u8 proto_ctype; if (!fou) return 1; @@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(guehdr->control)) return gue_control_message(skb, guehdr); + proto_ctype = guehdr->proto_ctype; __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); if (iptunnel_pull_offloads(skb)) goto drop; - return -guehdr->proto_ctype; + return -proto_ctype; drop: kfree_skb(skb); -- cgit From fd57770dd198f5b2ddd5b9e6bf282cf98d63adb9 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 11 Apr 2019 11:17:30 +0200 Subject: net/smc: wait for pending work before clcsock release_sock When the clcsock is already released using sock_release() and a pending smc_listen_work accesses the clcsock than that will fail. Solve this by canceling and waiting for the work to complete first. Because the work holds the sock_lock it must make sure that the lock is not hold before the new helper smc_clcsock_release() is invoked. And before the smc_listen_work starts working check if the parent listen socket is still valid, otherwise stop the work early. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 14 ++++++++------ net/smc/smc_close.c | 25 +++++++++++++++++++++---- net/smc/smc_close.h | 1 + 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 77ef53596d18..9bdaed2f2e35 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -167,10 +167,9 @@ static int smc_release(struct socket *sock) if (sk->sk_state == SMC_CLOSED) { if (smc->clcsock) { - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); + release_sock(sk); + smc_clcsock_release(smc); + lock_sock(sk); } if (!smc->use_fallback) smc_conn_free(&smc->conn); @@ -1037,13 +1036,13 @@ static void smc_listen_out(struct smc_sock *new_smc) struct smc_sock *lsmc = new_smc->listen_smc; struct sock *newsmcsk = &new_smc->sk; - lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); smc_accept_enqueue(&lsmc->sk, newsmcsk); + release_sock(&lsmc->sk); } else { /* no longer listening */ smc_close_non_accepted(newsmcsk); } - release_sock(&lsmc->sk); /* Wake up accept */ lsmc->sk.sk_data_ready(&lsmc->sk); @@ -1237,6 +1236,9 @@ static void smc_listen_work(struct work_struct *work) int rc = 0; u8 ibport; + if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) + return smc_listen_out_err(new_smc); + if (new_smc->use_fallback) { smc_listen_out_connected(new_smc); return; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 2ad37e998509..fc06720b53c1 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -21,6 +21,22 @@ #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) +/* release the clcsock that is assigned to the smc_sock */ +void smc_clcsock_release(struct smc_sock *smc) +{ + struct socket *tcp; + + if (smc->listen_smc && current_work() != &smc->smc_listen_work) + cancel_work_sync(&smc->smc_listen_work); + mutex_lock(&smc->clcsock_release_lock); + if (smc->clcsock) { + tcp = smc->clcsock; + smc->clcsock = NULL; + sock_release(tcp); + } + mutex_unlock(&smc->clcsock_release_lock); +} + static void smc_close_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -321,6 +337,7 @@ static void smc_close_passive_work(struct work_struct *work) close_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); struct smc_cdc_conn_state_flags *rxflags; + bool release_clcsock = false; struct sock *sk = &smc->sk; int old_state; @@ -400,13 +417,13 @@ wakeup: if ((sk->sk_state == SMC_CLOSED) && (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); - if (smc->clcsock) { - sock_release(smc->clcsock); - smc->clcsock = NULL; - } + if (smc->clcsock) + release_clcsock = true; } } release_sock(sk); + if (release_clcsock) + smc_clcsock_release(smc); sock_put(sk); /* sock_hold done by schedulers of close_work */ } diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 19eb6a211c23..e0e3b5df25d2 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -23,5 +23,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); +void smc_clcsock_release(struct smc_sock *smc); #endif /* SMC_CLOSE_H */ -- cgit From e183d4e414b64711baf7a04e214b61969ca08dfa Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 11 Apr 2019 11:17:31 +0200 Subject: net/smc: fix a NULL pointer dereference In case alloc_ordered_workqueue fails, the fix returns NULL to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ism.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 2fff79db1a59..e89e918b88e0 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -289,6 +289,11 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, INIT_LIST_HEAD(&smcd->vlan); smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", WQ_MEM_RECLAIM, name); + if (!smcd->event_wq) { + kfree(smcd->conn); + kfree(smcd); + return NULL; + } return smcd; } EXPORT_SYMBOL_GPL(smcd_alloc_dev); -- cgit From 07603b230895a74ebb1e2a1231ac45c29c2a8cd3 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 11 Apr 2019 11:17:32 +0200 Subject: net/smc: propagate file from SMC to TCP socket fcntl(fd, F_SETOWN, getpid()) selects the recipient of SIGURG signals that are delivered when out-of-band data arrives on socket fd. If an SMC socket program makes use of such an fcntl() call, it fails in case of fallback to TCP-mode. In case of fallback the traffic is processed with the internal TCP socket. Propagating field "file" from the SMC socket to the internal TCP socket fixes the issue. Reviewed-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/sock.h | 6 ------ net/smc/af_smc.c | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8de5ee258b93..341f8bafa0cf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2084,12 +2084,6 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) * @p: poll_table * * See the comments in the wq_has_sleeper function. - * - * Do not derive sock from filp->private_data here. An SMC socket establishes - * an internal TCP socket that is used in the fallback case. All socket - * operations on the SMC socket are then forwarded to the TCP socket. In case of - * poll, the filp->private_data pointer references the SMC socket because the - * TCP socket has no file assigned. */ static inline void sock_poll_wait(struct file *filp, struct socket *sock, poll_table *p) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9bdaed2f2e35..d2a0d15f809c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -445,10 +445,19 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_switch_to_fallback(struct smc_sock *smc) +{ + smc->use_fallback = true; + if (smc->sk.sk_socket && smc->sk.sk_socket->file) { + smc->clcsock->file = smc->sk.sk_socket->file; + smc->clcsock->file->private_data = smc->clcsock; + } +} + /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = reason_code; smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -774,10 +783,14 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_err = -rc; out: - if (smc->sk.sk_err) - smc->sk.sk_state_change(&smc->sk); - else - smc->sk.sk_write_space(&smc->sk); + if (!sock_flag(&smc->sk, SOCK_DEAD)) { + if (smc->sk.sk_err) { + smc->sk.sk_state_change(&smc->sk); + } else { /* allow polling before and after fallback decision */ + smc->clcsock->sk->sk_write_space(smc->clcsock->sk); + smc->sk.sk_write_space(&smc->sk); + } + } kfree(smc->connect_info); smc->connect_info = NULL; release_sock(&smc->sk); @@ -934,8 +947,13 @@ struct sock *smc_accept_dequeue(struct sock *parent, sock_put(new_sk); /* final */ continue; } - if (new_sock) + if (new_sock) { sock_graft(new_sk, new_sock); + if (isk->use_fallback) { + smc_sk(new_sk)->clcsock->file = new_sock->file; + isk->clcsock->file->private_data = isk->clcsock; + } + } return new_sk; } return NULL; @@ -1086,7 +1104,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, return; } smc_conn_free(&new_smc->conn); - new_smc->use_fallback = true; + smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = reason_code; if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code) < 0) { @@ -1246,7 +1264,7 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - new_smc->use_fallback = true; + smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; smc_listen_out_connected(new_smc); return; @@ -1503,7 +1521,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { rc = -EINVAL; @@ -1705,7 +1723,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT) { - smc->use_fallback = true; + smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { if (!smc->use_fallback) -- cgit From 8ef659f1a840c953a59442ff1400ec73baf3b601 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 11 Apr 2019 11:17:33 +0200 Subject: net/smc: fix return code from FLUSH command The FLUSH command is used to empty the pnet table. No return code is expected from the command. Commit a9d8b0b1e3d6 added namespace support for the pnet table and changed the FLUSH command processing to call smc_pnet_remove_by_pnetid() to remove the pnet entries. This function returns -ENOENT when no entry was deleted, which is now the return code of the FLUSH command. As a result the FLUSH command will return an error when the pnet table is already empty. Restore the expected behavior and let FLUSH always return 0. Fixes: a9d8b0b1e3d6 ("net/smc: add pnet table namespace support") Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 8d2f6296279c..0285c7f9e79b 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -603,7 +603,8 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - return smc_pnet_remove_by_pnetid(net, NULL); + smc_pnet_remove_by_pnetid(net, NULL); + return 0; } /* SMC_PNETID generic netlink operation definition */ -- cgit From f61bca58f6c36e666c2b807697f25e5e98708162 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 11 Apr 2019 11:17:34 +0200 Subject: net/smc: move unhash before release of clcsock Commit <26d92e951fe0> ("net/smc: move unhash as early as possible in smc_release()") fixes one occurrence in the smc code, but the same pattern exists in other places. This patch covers the remaining occurrences and makes sure, the unhash operation is done before the smc->clcsock is released. This avoids a potential use-after-free in smc_diag_dump(). Reviewed-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d2a0d15f809c..6f869ef49b32 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -884,11 +884,11 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) if (rc < 0) lsk->sk_err = -rc; if (rc < 0 || lsk->sk_state == SMC_CLOSED) { + new_sk->sk_prot->unhash(new_sk); if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); /* final */ *new_smc = NULL; goto out; @@ -939,11 +939,11 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { + new_sk->sk_prot->unhash(new_sk); if (isk->clcsock) { sock_release(isk->clcsock); isk->clcsock = NULL; } - new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); /* final */ continue; } @@ -973,6 +973,7 @@ void smc_close_non_accepted(struct sock *sk) sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } + sk->sk_prot->unhash(sk); if (smc->clcsock) { struct socket *tcp; @@ -988,7 +989,6 @@ void smc_close_non_accepted(struct sock *sk) smc_conn_free(&smc->conn); } release_sock(sk); - sk->sk_prot->unhash(sk); sock_put(sk); /* final sock_put */ } -- cgit From 5ee15c101f29e0093ffb5448773ccbc786eb313b Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 11 Apr 2019 12:26:32 +0200 Subject: net: thunderx: raise XDP MTU to 1508 The thunderx driver splits frames bigger than 1530 bytes to multiple pages, making impossible to run an eBPF program on it. This leads to a maximum MTU of 1508 if QinQ is in use. The thunderx driver forbids to load an eBPF program if the MTU is higher than 1500 bytes. Raise the limit to 1508 so it is possible to use L2 protocols which need some more headroom. Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") Signed-off-by: Matteo Croce Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 28eac9056211..debc8c861c6b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -32,6 +32,13 @@ #define DRV_NAME "nicvf" #define DRV_VERSION "1.0" +/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs + * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed + * this value, keeping headroom for the 14 byte Ethernet header and two + * VLAN tags (for QinQ) + */ +#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2) + /* Supported devices */ static const struct pci_device_id nicvf_id_table[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, @@ -1830,8 +1837,10 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) bool bpf_attached = false; int ret = 0; - /* For now just support only the usual MTU sized frames */ - if (prog && (dev->mtu > 1500)) { + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (prog && dev->mtu > MAX_XDP_MTU) { netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", dev->mtu); return -EOPNOTSUPP; -- cgit From 1f227d16083b2e280b7dde4ca78883d75593f2fd Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 11 Apr 2019 12:26:33 +0200 Subject: net: thunderx: don't allow jumbo frames with XDP The thunderx driver forbids to load an eBPF program if the MTU is too high, but this can be circumvented by loading the eBPF, then raising the MTU. Fix this by limiting the MTU if an eBPF program is already loaded. Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") Signed-off-by: Matteo Croce Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index debc8c861c6b..c032bef1b776 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1589,6 +1589,15 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) struct nicvf *nic = netdev_priv(netdev); int orig_mtu = netdev->mtu; + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) { + netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + netdev->mtu); + return -EINVAL; + } + netdev->mtu = new_mtu; if (!netif_running(netdev)) -- cgit From c5b493ce192bd7a4e7bd073b5685aad121eeef82 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 11 Apr 2019 15:08:25 +0300 Subject: net: bridge: multicast: use rcu to access port list from br_multicast_start_querier br_multicast_start_querier() walks over the port list but it can be called from a timer with only multicast_lock held which doesn't protect the port list, so use RCU to walk over it. Fixes: c83b8fab06fc ("bridge: Restart queries when last querier expires") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 02da21d771c9..45e7f4173bba 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2031,7 +2031,8 @@ static void br_multicast_start_querier(struct net_bridge *br, __br_multicast_open(br, query); - list_for_each_entry(port, &br->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; @@ -2043,6 +2044,7 @@ static void br_multicast_start_querier(struct net_bridge *br, br_multicast_enable(&port->ip6_own_query); #endif } + rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val) -- cgit From d7c3a206e6338e4ccdf030719dec028e26a521d5 Mon Sep 17 00:00:00 2001 From: Andy Duan Date: Tue, 9 Apr 2019 03:40:56 +0000 Subject: net: fec: manage ahb clock in runtime pm Some SOC like i.MX6SX clock have some limits: - ahb clock should be disabled before ipg. - ahb and ipg clocks are required for MAC MII bus. So, move the ahb clock to runtime management together with ipg clock. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 697c2427f2b7..a96ad20ee484 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1840,13 +1840,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) int ret; if (enable) { - ret = clk_prepare_enable(fep->clk_ahb); - if (ret) - return ret; - ret = clk_prepare_enable(fep->clk_enet_out); if (ret) - goto failed_clk_enet_out; + return ret; if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1866,7 +1862,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) phy_reset_after_clk_enable(ndev->phydev); } else { - clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1885,8 +1880,6 @@ failed_clk_ref: failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); -failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ahb); return ret; } @@ -3470,6 +3463,9 @@ fec_probe(struct platform_device *pdev) ret = clk_prepare_enable(fep->clk_ipg); if (ret) goto failed_clk_ipg; + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + goto failed_clk_ahb; fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { @@ -3563,6 +3559,9 @@ failed_reset: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); failed_regulator: + clk_disable_unprepare(fep->clk_ahb); +failed_clk_ahb: + clk_disable_unprepare(fep->clk_ipg); failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: @@ -3686,6 +3685,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); return 0; @@ -3695,8 +3695,20 @@ static int __maybe_unused fec_runtime_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + int ret; - return clk_prepare_enable(fep->clk_ipg); + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + return ret; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + + return 0; + +failed_clk_ipg: + clk_disable_unprepare(fep->clk_ahb); + return ret; } static const struct dev_pm_ops fec_pm_ops = { -- cgit From d3706566ae3d92677b932dd156157fd6c72534b1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 9 Apr 2019 19:53:55 +0800 Subject: net: netrom: Fix error cleanup path of nr_proto_init Syzkaller report this: BUG: unable to handle kernel paging request at fffffbfff830524b PGD 237fe8067 P4D 237fe8067 PUD 237e64067 PMD 1c9716067 PTE 0 Oops: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 5.0.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:__list_add_valid+0x21/0xe0 lib/list_debug.c:23 Code: 8b 0c 24 e9 17 fd ff ff 90 55 48 89 fd 48 8d 7a 08 53 48 89 d3 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 48 83 ec 08 <80> 3c 02 00 0f 85 8b 00 00 00 48 8b 53 08 48 39 f2 75 35 48 89 f2 RSP: 0018:ffff8881ea2278d0 EFLAGS: 00010282 RAX: dffffc0000000000 RBX: ffffffffc1829250 RCX: 1ffff1103d444ef4 RDX: 1ffffffff830524b RSI: ffffffff85659300 RDI: ffffffffc1829258 RBP: ffffffffc1879250 R08: fffffbfff0acb269 R09: fffffbfff0acb269 R10: ffff8881ea2278f0 R11: fffffbfff0acb268 R12: ffffffffc1829250 R13: dffffc0000000000 R14: 0000000000000008 R15: ffffffffc187c830 FS: 00007fe0361df700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffbfff830524b CR3: 00000001eb39a001 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __list_add include/linux/list.h:60 [inline] list_add include/linux/list.h:79 [inline] proto_register+0x444/0x8f0 net/core/sock.c:3375 nr_proto_init+0x73/0x4b3 [netrom] ? 0xffffffffc1628000 ? 0xffffffffc1628000 do_one_initcall+0xbc/0x47d init/main.c:887 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe0361dec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00007fe0361dec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0361df6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: netrom(+) ax25 fcrypt pcbc af_alg arizona_ldo1 v4l2_common videodev media v4l2_dv_timings hdlc ide_cd_mod snd_soc_sigmadsp_regmap snd_soc_sigmadsp intel_spi_platform intel_spi mtd spi_nor snd_usbmidi_lib usbcore lcd ti_ads7950 hi6421_regulator snd_soc_kbl_rt5663_max98927 snd_soc_hdac_hdmi snd_hda_ext_core snd_hda_core snd_soc_rt5663 snd_soc_core snd_pcm_dmaengine snd_compress snd_soc_rl6231 mac80211 rtc_rc5t583 spi_slave_time leds_pwm hid_gt683r hid industrialio_triggered_buffer kfifo_buf industrialio ir_kbd_i2c rc_core led_class_flash dwc_xlgmac snd_ymfpci gameport snd_mpu401_uart snd_rawmidi snd_ac97_codec snd_pcm ac97_bus snd_opl3_lib snd_timer snd_seq_device snd_hwdep snd soundcore iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ide_pci_generic piix aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ide_core psmouse input_leds i2c_piix4 serio_raw intel_agp intel_gtt ata_generic agpgart pata_acpi parport_pc rtc_cmos parport floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: rxrpc] Dumping ftrace buffer: (ftrace buffer empty) CR2: fffffbfff830524b ---[ end trace 039ab24b305c4b19 ]--- If nr_proto_init failed, it may forget to call proto_unregister, tiggering this issue.This patch rearrange code of nr_proto_init to avoid such issues. Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/net/netrom.h | 2 +- net/netrom/af_netrom.c | 76 ++++++++++++++++++++++++++++++------------ net/netrom/nr_loopback.c | 2 +- net/netrom/nr_route.c | 2 +- net/netrom/sysctl_net_netrom.c | 5 ++- 5 files changed, 61 insertions(+), 26 deletions(-) diff --git a/include/net/netrom.h b/include/net/netrom.h index 5a0714ff500f..80f15b1c1a48 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *); int nr_t1timer_running(struct sock *); /* sysctl_net_netrom.c */ -void nr_register_sysctl(void); +int nr_register_sysctl(void); void nr_unregister_sysctl(void); #endif diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1d3144d19903..71ffd1a6dc7c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void) int i; int rc = proto_register(&nr_proto, 0); - if (rc != 0) - goto out; + if (rc) + return rc; if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n"); - return -1; + pr_err("NET/ROM: %s - nr_ndevs parameter too large\n", + __func__); + rc = -EINVAL; + goto unregister_proto; } dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL); - if (dev_nr == NULL) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); - return -1; + if (!dev_nr) { + pr_err("NET/ROM: %s - unable to allocate device array\n", + __func__); + rc = -ENOMEM; + goto unregister_proto; } for (i = 0; i < nr_ndevs; i++) { @@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void) sprintf(name, "nr%d", i); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup); if (!dev) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); + rc = -ENOMEM; goto fail; } dev->base_addr = i; - if (register_netdev(dev)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n"); + rc = register_netdev(dev); + if (rc) { free_netdev(dev); goto fail; } @@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void) dev_nr[i] = dev; } - if (sock_register(&nr_family_ops)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n"); + rc = sock_register(&nr_family_ops); + if (rc) goto fail; - } - register_netdevice_notifier(&nr_dev_notifier); + rc = register_netdevice_notifier(&nr_dev_notifier); + if (rc) + goto out_sock; ax25_register_pid(&nr_pid); ax25_linkfail_register(&nr_linkfail_notifier); #ifdef CONFIG_SYSCTL - nr_register_sysctl(); + rc = nr_register_sysctl(); + if (rc) + goto out_sysctl; #endif nr_loopback_init(); - proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops); - proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops); - proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops); -out: - return rc; + rc = -ENOMEM; + if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops)) + goto proc_remove1; + if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net, + &nr_neigh_seqops)) + goto proc_remove2; + if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net, + &nr_node_seqops)) + goto proc_remove3; + + return 0; + +proc_remove3: + remove_proc_entry("nr_neigh", init_net.proc_net); +proc_remove2: + remove_proc_entry("nr", init_net.proc_net); +proc_remove1: + + nr_loopback_clear(); + nr_rt_free(); + +#ifdef CONFIG_SYSCTL + nr_unregister_sysctl(); +out_sysctl: +#endif + ax25_linkfail_release(&nr_linkfail_notifier); + ax25_protocol_release(AX25_P_NETROM); + unregister_netdevice_notifier(&nr_dev_notifier); +out_sock: + sock_unregister(PF_NETROM); fail: while (--i >= 0) { unregister_netdev(dev_nr[i]); free_netdev(dev_nr[i]); } kfree(dev_nr); +unregister_proto: proto_unregister(&nr_proto); - rc = -1; - goto out; + return rc; } module_init(nr_proto_init); diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 215ad22a9647..93d13f019981 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused) } } -void __exit nr_loopback_clear(void) +void nr_loopback_clear(void) { del_timer_sync(&loopback_timer); skb_queue_purge(&loopback_queue); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6485f593e2f0..b76aa668a94b 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = { /* * Free all memory associated with the nodes and routes lists. */ -void __exit nr_rt_free(void) +void nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index ba1c368b3f18..771011b84270 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = { { } }; -void __init nr_register_sysctl(void) +int __init nr_register_sysctl(void) { nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table); + if (!nr_table_header) + return -ENOMEM; + return 0; } void nr_unregister_sysctl(void) -- cgit From a5f622984a623df9a84cf43f6b098d8dd76fbe05 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 9 Apr 2019 14:23:10 -0700 Subject: selftests: fib_tests: Fix 'Command line is not complete' errors A couple of tests are verifying a route has been removed. The helper expects the prefix as the first part of the expected output. When checking that a route has been deleted the prefix is empty leading to an invalid ip command: $ ip ro ls match Command line is not complete. Try option "help" Fix by moving the comparison of expected output and output to a new function that is used by both check_route and check_route6. Use the new helper for the 2 checks on route removal. Also, remove the reset of 'set -x' in route_setup which overrides the user managed setting. Fixes: d69faad76584c ("selftests: fib_tests: Add prefix route tests with metric") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 94 ++++++++++++++------------------ 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 1080ff55a788..0d2a5f4f1e63 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -605,6 +605,39 @@ run_cmd() return $rc } +check_expected() +{ + local out="$1" + local expected="$2" + local rc=0 + + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route6() @@ -652,31 +685,7 @@ check_route6() pfx=$1 out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } route_cleanup() @@ -725,7 +734,7 @@ route_setup() ip -netns ns2 addr add 172.16.103.2/24 dev veth4 ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 - set +ex + set +e } # assumption is that basic add of a single path route works @@ -960,7 +969,8 @@ ipv6_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route6 "" + out=$($IP -6 ro ls match 2001:db8:104::/64) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" @@ -1091,38 +1101,13 @@ check_route() local pfx local expected="$1" local out - local rc=0 set -- $expected pfx=$1 [ "${pfx}" = "unreachable" ] && pfx=$2 out=$($IP ro ls match ${pfx}) - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } # assumption is that basic add of a single path route works @@ -1387,7 +1372,8 @@ ipv4_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route "" + out=$($IP ro ls match 172.16.104.0/24) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" -- cgit From e3058450965972e67cc0e5492c08c4cdadafc134 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Apr 2019 05:55:23 -0700 Subject: dctcp: more accurate tracking of packets delivery After commit e21db6f69a95 ("tcp: track total bytes delivered with ECN CE marks") core TCP stack does a very good job tracking ECN signals. The "sender's best estimate of CE information" Yuchung mentioned in his patch is indeed the best we can do. DCTCP can use tp->delivered_ce and tp->delivered to not duplicate the logic, and use the existing best estimate. This solves some problems, since current DCTCP logic does not deal with losses and/or GRO or ack aggregation very well. This also removes a dubious use of inet_csk(sk)->icsk_ack.rcv_mss (this should have been tp->mss_cache), and a 64 bit divide. Finally, we can see that the DCTCP logic, calling dctcp_update_alpha() for every ACK could be done differently, calling it only once per RTT. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Soheil Hassas Yeganeh Cc: Florian Westphal Cc: Daniel Borkmann Cc: Lawrence Brakmo Cc: Abdul Kabbani Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_dctcp.c | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 359da68d7c06..477cb4aa456c 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -49,9 +49,8 @@ #define DCTCP_MAX_ALPHA 1024U struct dctcp { - u32 acked_bytes_ecn; - u32 acked_bytes_total; - u32 prior_snd_una; + u32 old_delivered; + u32 old_delivered_ce; u32 prior_rcv_nxt; u32 dctcp_alpha; u32 next_seq; @@ -73,8 +72,8 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) { ca->next_seq = tp->snd_nxt; - ca->acked_bytes_ecn = 0; - ca->acked_bytes_total = 0; + ca->old_delivered = tp->delivered; + ca->old_delivered_ce = tp->delivered_ce; } static void dctcp_init(struct sock *sk) @@ -86,7 +85,6 @@ static void dctcp_init(struct sock *sk) sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); - ca->prior_snd_una = tp->snd_una; ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); @@ -118,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); - u32 acked_bytes = tp->snd_una - ca->prior_snd_una; - - /* If ack did not advance snd_una, count dupack as MSS size. - * If ack did update window, do not count it at all. - */ - if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE)) - acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss; - if (acked_bytes) { - ca->acked_bytes_total += acked_bytes; - ca->prior_snd_una = tp->snd_una; - - if (flags & CA_ACK_ECE) - ca->acked_bytes_ecn += acked_bytes; - } /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { - u64 bytes_ecn = ca->acked_bytes_ecn; + u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); - if (bytes_ecn) { + if (delivered_ce) { + u32 delivered = tp->delivered - ca->old_delivered; + /* If dctcp_shift_g == 1, a 32bit value would overflow - * after 8 Mbytes. + * after 8 M packets. */ - bytes_ecn <<= (10 - dctcp_shift_g); - do_div(bytes_ecn, max(1U, ca->acked_bytes_total)); + delivered_ce <<= (10 - dctcp_shift_g); + delivered_ce /= max(1U, delivered); - alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA); + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); } /* dctcp_alpha can be read from dctcp_get_info() without * synchro, so we ask compiler to not use dctcp_alpha @@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Fill it also in case of VEGASINFO due to req struct limits. * We can still correctly retrieve it later. @@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, info->dctcp.dctcp_enabled = 1; info->dctcp.dctcp_ce_state = (u16) ca->ce_state; info->dctcp.dctcp_alpha = ca->dctcp_alpha; - info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; - info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; + info->dctcp.dctcp_ab_ecn = tp->mss_cache * + (tp->delivered_ce - ca->old_delivered_ce); + info->dctcp.dctcp_ab_tot = tp->mss_cache * + (tp->delivered - ca->old_delivered); } *attr = INET_DIAG_DCTCPINFO; -- cgit From dd3ac9a684358b8c1d5c432ca8322aaf5e4f28ee Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:51:52 +0900 Subject: net/rds: Check address length before reading address family syzbot is reporting uninitialized value at rds_connect() [1] and rds_bind() [2]. This is because syzbot is passing ulen == 0 whereas these functions expect that it is safe to access sockaddr->family field in order to determine minimal address length for validation. [1] https://syzkaller.appspot.com/bug?id=f4e61c010416c1e6f0fa3ffe247561b60a50ad71 [2] https://syzkaller.appspot.com/bug?id=a4bf9e41b7e055c3823fdcd83e8c58ca7270e38f Reported-by: syzbot Reported-by: syzbot Signed-off-by: Tetsuo Handa Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/af_rds.c | 3 +++ net/rds/bind.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index d6cc97fbbbb0..2b969f99ef13 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -543,6 +543,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; + lock_sock(sk); switch (uaddr->sa_family) { diff --git a/net/rds/bind.c b/net/rds/bind.c index 17c9d9f0c848..0f4398e7f2a7 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* We allow an RDS socket to be bound to either IPv4 or IPv6 * address. */ + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; if (uaddr->sa_family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; -- cgit From 238ffdc49ef98b15819cfd5e3fb23194e3ea3d39 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:52:36 +0900 Subject: mISDN: Check address length before reading address family KMSAN will complain if valid address length passed to bind() is shorter than sizeof("struct sockaddr_mISDN"->family) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- drivers/isdn/mISDN/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 4ab8b1b6608f..a14e35d40538 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -710,10 +710,10 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct sock *sk = sock->sk; int err = 0; - if (!maddr || maddr->family != AF_ISDN) + if (addr_len < sizeof(struct sockaddr_mISDN)) return -EINVAL; - if (addr_len < sizeof(struct sockaddr_mISDN)) + if (!maddr || maddr->family != AF_ISDN) return -EINVAL; lock_sock(sk); -- cgit From 175f7c1f01d30b2088491bee4636fbf846fb76ce Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:53:10 +0900 Subject: sctp: Check address length before reading address family KMSAN will complain if valid address length passed to connect() is shorter than sizeof("struct sockaddr"->sa_family) bytes. Signed-off-by: Tetsuo Handa Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9874e60c9b0d..4583fa914e62 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4847,7 +4847,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* Validate addr_len before calling common connect/connectx routine. */ - af = sctp_get_af_specific(addr->sa_family); + af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL : + sctp_get_af_specific(addr->sa_family); if (!af || addr_len < af->sockaddr_len) { err = -EINVAL; } else { -- cgit From d852be84770c0611f8b76bd7046c6a814c5b9f11 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:53:38 +0900 Subject: net: netlink: Check address length before reading groups field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_nl) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f28e937320a3..216ab915dd54 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -988,7 +988,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err = 0; - unsigned long groups = nladdr->nl_groups; + unsigned long groups; bool bound; if (addr_len < sizeof(struct sockaddr_nl)) @@ -996,6 +996,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_family != AF_NETLINK) return -EINVAL; + groups = nladdr->nl_groups; /* Only superuser is allowed to listen multicasts */ if (groups) { -- cgit From a9107a14a9b9112775459ad291fc5de0f2513ce0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:54:05 +0900 Subject: rxrpc: Check address length before reading srx_service field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_rxrpc) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 96f2952bbdfd..c54dce3ca0dd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -135,7 +135,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct rxrpc_local *local; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - u16 service_id = srx->srx_service; + u16 service_id; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -143,6 +143,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) ret = rxrpc_validate_address(rx, srx, len); if (ret < 0) goto error; + service_id = srx->srx_service; lock_sock(&rx->sk); -- cgit From bd7d46ddca06f1fadd68ceb99bc6e6f808ab50f2 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:54:33 +0900 Subject: Bluetooth: Check address length before reading address field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_sco) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/bluetooth/sco.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9a580999ca57..d892b7c3cc42 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -523,12 +523,12 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, struct sock *sk = sock->sk; int err = 0; - BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); - if (!addr || addr_len < sizeof(struct sockaddr_sco) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); + lock_sock(sk); if (sk->sk_state != BT_OPEN) { -- cgit From c68e747d0a98f44a4e49071940a692fa83630e47 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:55:14 +0900 Subject: llc: Check address length before reading address field KMSAN will complain if valid address length passed to bind() is shorter than sizeof(struct sockaddr_llc) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- net/llc/af_llc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b99e73a7e7e0..2017b7d780f5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -320,14 +320,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct llc_sap *sap; int rc = -EINVAL; - dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); - lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (unlikely(addr->sllc_family != AF_LLC)) goto out; + dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { -- cgit From ba024f2574a19557f92116ec6be129b26ae66e97 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:55:47 +0900 Subject: bpf: Check address length before reading address family KMSAN will complain if valid address length passed to bpf_bind() is shorter than sizeof("struct sockaddr"->sa_family) bytes. Signed-off-by: Tetsuo Handa Acked-by: Andrey Ignatov Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index fc92ebc4e200..27e61ffd9039 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4383,6 +4383,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, * Only binding to IP is supported. */ err = -EINVAL; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return err; if (addr->sa_family == AF_INET) { if (addr_len < sizeof(struct sockaddr_in)) return err; -- cgit From bddc028a4f2ac8cf4d0cd1c696b5f95d8305a553 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:56:39 +0900 Subject: udpv6: Check address length before reading address family KMSAN will complain if valid address length passed to udpv6_pre_connect() is shorter than sizeof("struct sockaddr"->sa_family) bytes. (This patch is bogus if it is guaranteed that udpv6_pre_connect() is always called after checking "struct sockaddr"->sa_family. In that case, we want a comment why we don't need to check valid address length here.) Signed-off-by: Tetsuo Handa Acked-by: Song Liu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b444483cdb2b..622eeaf5732b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1047,6 +1047,8 @@ static void udp_v6_flush_pending_frames(struct sock *sk) static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; /* The following checks are replicated from __ip6_datagram_connect() * and intended to prevent BPF program called below from accessing * bytes that are out of the bound specified by user in addr_len. -- cgit From 2170e2157d7c5398f84477935553d63a93a1f6b8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Mar 2019 09:34:19 +0100 Subject: mt76: mt7603: add missing initialization for dev->ps_lock Fixes lockdep complaint and a potential race condition Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c index d54dda67d036..3af45949e868 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c @@ -510,6 +510,8 @@ int mt7603_register_device(struct mt7603_dev *dev) bus_ops->rmw = mt7603_rmw; dev->mt76.bus = bus_ops; + spin_lock_init(&dev->ps_lock); + INIT_DELAYED_WORK(&dev->mac_work, mt7603_mac_work); tasklet_init(&dev->pre_tbtt_tasklet, mt7603_pre_tbtt_tasklet, (unsigned long)dev); -- cgit From aa3cb24be18b9b537750c354c5cff96c3d17ae44 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Mar 2019 09:34:20 +0100 Subject: mt76: mt7603: fix sequence number assignment If the MT_TXD3_SN_VALID flag is not set in the tx descriptor, the hardware assigns the sequence number. However, the rest of the code assumes that the sequence number specified in the 802.11 header gets transmitted. This was causing issues with the aggregation setup, which worked for the initial one (where the sequence numbers were still close), but not for further teardown/re-establishing of sessions. Additionally, the overwrite of the TID sequence number in WTBL2 was resetting the hardware assigned sequence numbers, causing them to drift further apart. Fix this by using the software assigned sequence numbers Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 53 ++++++---------------- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 6 +-- drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h | 2 +- 3 files changed, 18 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 5e31d7da96fc..5abc02b57818 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -343,7 +343,7 @@ void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid) MT_BA_CONTROL_1_RESET)); } -void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn, +void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ba_size) { u32 addr = mt7603_wtbl2_addr(wcid); @@ -358,43 +358,6 @@ void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn, mt76_clear(dev, addr + (15 * 4), tid_mask); return; } - mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000); - - mt7603_mac_stop(dev); - switch (tid) { - case 0: - mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID0_SN, ssn); - break; - case 1: - mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID1_SN, ssn); - break; - case 2: - mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID2_SN_LO, - ssn); - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID2_SN_HI, - ssn >> 8); - break; - case 3: - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID3_SN, ssn); - break; - case 4: - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID4_SN, ssn); - break; - case 5: - mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID5_SN_LO, - ssn); - mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID5_SN_HI, - ssn >> 4); - break; - case 6: - mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID6_SN, ssn); - break; - case 7: - mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID7_SN, ssn); - break; - } - mt7603_wtbl_update(dev, wcid, MT_WTBL_UPDATE_WTBL2); - mt7603_mac_start(dev); for (i = 7; i > 0; i--) { if (ba_size >= MT_AGG_SIZE_LIMIT(i)) @@ -827,6 +790,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_tx_rate *rate = &info->control.rates[0]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data; struct ieee80211_vif *vif = info->control.vif; struct mt7603_vif *mvif; int wlan_idx; @@ -834,6 +798,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi, int tx_count = 8; u8 frame_type, frame_subtype; u16 fc = le16_to_cpu(hdr->frame_control); + u16 seqno = 0; u8 vif_idx = 0; u32 val; u8 bw; @@ -919,7 +884,17 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi, tx_count = 0x1f; val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count) | - FIELD_PREP(MT_TXD3_SEQ, le16_to_cpu(hdr->seq_ctrl)); + MT_TXD3_SN_VALID; + + if (ieee80211_is_data_qos(hdr->frame_control)) + seqno = le16_to_cpu(hdr->seq_ctrl); + else if (ieee80211_is_back_req(hdr->frame_control)) + seqno = le16_to_cpu(bar->start_seq_num); + else + val &= ~MT_TXD3_SN_VALID; + + val |= FIELD_PREP(MT_TXD3_SEQ, seqno >> 4); + txwi[3] = cpu_to_le32(val); if (key) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index cc0fe0933b2d..31a7ca691195 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -584,13 +584,13 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, case IEEE80211_AMPDU_TX_OPERATIONAL: mtxq->aggr = true; mtxq->send_bar = false; - mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, ba_size); + mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, ba_size); break; case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: mtxq->aggr = false; ieee80211_send_bar(vif, sta->addr, tid, mtxq->agg_ssn); - mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1); + mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1); break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = *ssn << 4; @@ -598,7 +598,7 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; - mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1); + mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h index 79f332429432..6049f3b7c8fe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h @@ -200,7 +200,7 @@ void mt7603_beacon_set_timer(struct mt7603_dev *dev, int idx, int intval); int mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb); void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data); void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid); -void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn, +void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ba_size); void mt7603_pse_client_reset(struct mt7603_dev *dev); -- cgit From 9dc27bcbe78c5d3926f48b1105840f349c827766 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Mar 2019 09:34:21 +0100 Subject: mt76: mt7603: send BAR after powersave wakeup Now that the sequence number allocation is fixed, we can finally send a BAR at powersave wakeup time to refresh the receiver side reorder window Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 31a7ca691195..a3c4ef198bfe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -372,7 +372,7 @@ mt7603_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps) struct mt7603_sta *msta = (struct mt7603_sta *)sta->drv_priv; struct sk_buff_head list; - mt76_stop_tx_queues(&dev->mt76, sta, false); + mt76_stop_tx_queues(&dev->mt76, sta, true); mt7603_wtbl_set_ps(dev, msta, ps); if (ps) return; -- cgit From 746ba11f170603bf1eaade817553a6c2e9135bbe Mon Sep 17 00:00:00 2001 From: Vijayakumar Durai Date: Wed, 27 Mar 2019 11:03:17 +0100 Subject: rt2x00: do not increment sequence number while re-transmitting Currently rt2x00 devices retransmit the management frames with incremented sequence number if hardware is assigning the sequence. This is HW bug fixed already for non-QOS data frames, but it should be fixed for management frames except beacon. Without fix retransmitted frames have wrong SN: AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1648, FN=0, Flags=........C Frame is not being retransmitted 1648 1 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1649, FN=0, Flags=....R...C Frame is being retransmitted 1649 1 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1650, FN=0, Flags=....R...C Frame is being retransmitted 1650 1 With the fix SN stays correctly the same: 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=........C 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C Cc: stable@vger.kernel.org Signed-off-by: Vijayakumar Durai [sgruszka: simplify code, change comments and changelog] Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00.h | 1 - drivers/net/wireless/ralink/rt2x00/rt2x00mac.c | 10 ---------- drivers/net/wireless/ralink/rt2x00/rt2x00queue.c | 15 +++++++++------ 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 4b1744e9fb78..50b92ca92bd7 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -673,7 +673,6 @@ enum rt2x00_state_flags { CONFIG_CHANNEL_HT40, CONFIG_POWERSAVING, CONFIG_HT_DISABLED, - CONFIG_QOS_DISABLED, CONFIG_MONITORING, /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 2825560e2424..e8462f25d252 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -642,18 +642,8 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, rt2x00dev->intf_associated--; rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated); - - clear_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); } - /* - * Check for access point which do not support 802.11e . We have to - * generate data frames sequence number in S/W for such AP, because - * of H/W bug. - */ - if (changes & BSS_CHANGED_QOS && !bss_conf->qos) - set_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); - /* * When the erp information has changed, we should perform * additional configuration steps. For all other changes we are done. diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c index 92ddc19e7bf7..4834b4eb0206 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c @@ -201,15 +201,18 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev, if (!rt2x00_has_cap_flag(rt2x00dev, REQUIRE_SW_SEQNO)) { /* * rt2800 has a H/W (or F/W) bug, device incorrectly increase - * seqno on retransmited data (non-QOS) frames. To workaround - * the problem let's generate seqno in software if QOS is - * disabled. + * seqno on retransmitted data (non-QOS) and management frames. + * To workaround the problem let's generate seqno in software. + * Except for beacons which are transmitted periodically by H/W + * hence hardware has to assign seqno for them. */ - if (test_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags)) - __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); - else + if (ieee80211_is_beacon(hdr->frame_control)) { + __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); /* H/W will generate sequence number */ return; + } + + __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); } /* -- cgit From bafdf85dfa59374f927ff597bc8c259193afda30 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 5 Apr 2019 13:42:56 +0200 Subject: mt76x02: avoid status_list.lock and sta->rate_ctrl_lock dependency Move ieee80211_tx_status_ext() outside of status_list lock section in order to avoid locking dependency and possible deadlock reposed by LOCKDEP in below warning. Also do mt76_tx_status_lock() just before it's needed. [ 440.224832] WARNING: possible circular locking dependency detected [ 440.224833] 5.1.0-rc2+ #22 Not tainted [ 440.224834] ------------------------------------------------------ [ 440.224835] kworker/u16:28/2362 is trying to acquire lock: [ 440.224836] 0000000089b8cacf (&(&q->lock)->rlock#2){+.-.}, at: mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.224842] but task is already holding lock: [ 440.224842] 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211] [ 440.224863] which lock already depends on the new lock. [ 440.224863] the existing dependency chain (in reverse order) is: [ 440.224864] -> #3 (&(&sta->lock)->rlock){+.-.}: [ 440.224869] _raw_spin_lock_bh+0x34/0x40 [ 440.224880] ieee80211_start_tx_ba_session+0xe4/0x3d0 [mac80211] [ 440.224894] minstrel_ht_get_rate+0x45c/0x510 [mac80211] [ 440.224906] rate_control_get_rate+0xc1/0x140 [mac80211] [ 440.224918] ieee80211_tx_h_rate_ctrl+0x195/0x3c0 [mac80211] [ 440.224930] ieee80211_xmit_fast+0x26d/0xa50 [mac80211] [ 440.224942] __ieee80211_subif_start_xmit+0xfc/0x310 [mac80211] [ 440.224954] ieee80211_subif_start_xmit+0x38/0x390 [mac80211] [ 440.224956] dev_hard_start_xmit+0xb8/0x300 [ 440.224957] __dev_queue_xmit+0x7d4/0xbb0 [ 440.224968] ip6_finish_output2+0x246/0x860 [ipv6] [ 440.224978] mld_sendpack+0x1bd/0x360 [ipv6] [ 440.224987] mld_ifc_timer_expire+0x1a4/0x2f0 [ipv6] [ 440.224989] call_timer_fn+0x89/0x2a0 [ 440.224990] run_timer_softirq+0x1bd/0x4d0 [ 440.224992] __do_softirq+0xdb/0x47c [ 440.224994] irq_exit+0xfa/0x100 [ 440.224996] smp_apic_timer_interrupt+0x9a/0x220 [ 440.224997] apic_timer_interrupt+0xf/0x20 [ 440.224999] cpuidle_enter_state+0xc1/0x470 [ 440.225000] do_idle+0x21a/0x260 [ 440.225001] cpu_startup_entry+0x19/0x20 [ 440.225004] start_secondary+0x135/0x170 [ 440.225006] secondary_startup_64+0xa4/0xb0 [ 440.225007] -> #2 (&(&sta->rate_ctrl_lock)->rlock){+.-.}: [ 440.225009] _raw_spin_lock_bh+0x34/0x40 [ 440.225022] rate_control_tx_status+0x4f/0xb0 [mac80211] [ 440.225031] ieee80211_tx_status_ext+0x142/0x1a0 [mac80211] [ 440.225035] mt76x02_send_tx_status+0x2e4/0x340 [mt76x02_lib] [ 440.225037] mt76x02_tx_status_data+0x31/0x40 [mt76x02_lib] [ 440.225040] mt76u_tx_status_data+0x51/0xa0 [mt76_usb] [ 440.225042] process_one_work+0x237/0x5d0 [ 440.225043] worker_thread+0x3c/0x390 [ 440.225045] kthread+0x11d/0x140 [ 440.225046] ret_from_fork+0x3a/0x50 [ 440.225047] -> #1 (&(&list->lock)->rlock#8){+.-.}: [ 440.225049] _raw_spin_lock_bh+0x34/0x40 [ 440.225052] mt76_tx_status_skb_add+0x51/0x100 [mt76] [ 440.225054] mt76x02u_tx_prepare_skb+0xbd/0x116 [mt76x02_usb] [ 440.225056] mt76u_tx_queue_skb+0x5f/0x180 [mt76_usb] [ 440.225058] mt76_tx+0x93/0x190 [mt76] [ 440.225070] ieee80211_tx_frags+0x148/0x210 [mac80211] [ 440.225081] __ieee80211_tx+0x75/0x1b0 [mac80211] [ 440.225092] ieee80211_tx+0xde/0x110 [mac80211] [ 440.225105] __ieee80211_tx_skb_tid_band+0x72/0x90 [mac80211] [ 440.225122] ieee80211_send_auth+0x1f3/0x360 [mac80211] [ 440.225141] ieee80211_auth.cold.40+0x6c/0x100 [mac80211] [ 440.225156] ieee80211_mgd_auth.cold.50+0x132/0x15f [mac80211] [ 440.225171] cfg80211_mlme_auth+0x149/0x360 [cfg80211] [ 440.225181] nl80211_authenticate+0x273/0x2e0 [cfg80211] [ 440.225183] genl_family_rcv_msg+0x196/0x3a0 [ 440.225184] genl_rcv_msg+0x47/0x8e [ 440.225185] netlink_rcv_skb+0x3a/0xf0 [ 440.225187] genl_rcv+0x24/0x40 [ 440.225188] netlink_unicast+0x16d/0x210 [ 440.225189] netlink_sendmsg+0x204/0x3b0 [ 440.225191] sock_sendmsg+0x36/0x40 [ 440.225193] ___sys_sendmsg+0x259/0x2b0 [ 440.225194] __sys_sendmsg+0x47/0x80 [ 440.225196] do_syscall_64+0x60/0x1f0 [ 440.225197] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 440.225198] -> #0 (&(&q->lock)->rlock#2){+.-.}: [ 440.225200] lock_acquire+0xb9/0x1a0 [ 440.225202] _raw_spin_lock_bh+0x34/0x40 [ 440.225204] mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225215] ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211] [ 440.225225] ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211] [ 440.225235] ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211] [ 440.225236] process_one_work+0x237/0x5d0 [ 440.225237] worker_thread+0x3c/0x390 [ 440.225239] kthread+0x11d/0x140 [ 440.225240] ret_from_fork+0x3a/0x50 [ 440.225240] other info that might help us debug this: [ 440.225241] Chain exists of: &(&q->lock)->rlock#2 --> &(&sta->rate_ctrl_lock)->rlock --> &(&sta->lock)->rlock [ 440.225243] Possible unsafe locking scenario: [ 440.225244] CPU0 CPU1 [ 440.225244] ---- ---- [ 440.225245] lock(&(&sta->lock)->rlock); [ 440.225245] lock(&(&sta->rate_ctrl_lock)->rlock); [ 440.225246] lock(&(&sta->lock)->rlock); [ 440.225247] lock(&(&q->lock)->rlock#2); [ 440.225248] *** DEADLOCK *** [ 440.225249] 5 locks held by kworker/u16:28/2362: [ 440.225250] #0: 0000000048fcd291 ((wq_completion)phy0){+.+.}, at: process_one_work+0x1b5/0x5d0 [ 440.225252] #1: 00000000f1c6828f ((work_completion)(&sta->ampdu_mlme.work)){+.+.}, at: process_one_work+0x1b5/0x5d0 [ 440.225254] #2: 00000000433d2b2c (&sta->ampdu_mlme.mtx){+.+.}, at: ieee80211_ba_session_work+0x5c/0x2f0 [mac80211] [ 440.225265] #3: 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211] [ 440.225276] #4: 000000009d7b9a44 (rcu_read_lock){....}, at: ieee80211_agg_start_txq+0x33/0x2b0 [mac80211] [ 440.225286] stack backtrace: [ 440.225288] CPU: 2 PID: 2362 Comm: kworker/u16:28 Not tainted 5.1.0-rc2+ #22 [ 440.225289] Hardware name: LENOVO 20KGS23S0P/20KGS23S0P, BIOS N23ET55W (1.30 ) 08/31/2018 [ 440.225300] Workqueue: phy0 ieee80211_ba_session_work [mac80211] [ 440.225301] Call Trace: [ 440.225304] dump_stack+0x85/0xc0 [ 440.225306] print_circular_bug.isra.38.cold.58+0x15c/0x195 [ 440.225307] check_prev_add.constprop.48+0x5f0/0xc00 [ 440.225309] ? check_prev_add.constprop.48+0x39d/0xc00 [ 440.225311] ? __lock_acquire+0x41d/0x1100 [ 440.225312] __lock_acquire+0xd98/0x1100 [ 440.225313] ? __lock_acquire+0x41d/0x1100 [ 440.225315] lock_acquire+0xb9/0x1a0 [ 440.225317] ? mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225319] _raw_spin_lock_bh+0x34/0x40 [ 440.225321] ? mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225323] mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225334] ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211] [ 440.225344] ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211] [ 440.225354] ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211] [ 440.225356] process_one_work+0x237/0x5d0 [ 440.225358] worker_thread+0x3c/0x390 [ 440.225359] ? wq_calc_node_cpumask+0x70/0x70 [ 440.225360] kthread+0x11d/0x140 [ 440.225362] ? kthread_create_on_node+0x40/0x40 [ 440.225363] ret_from_fork+0x3a/0x50 Cc: stable@vger.kernel.org Fixes: 88046b2c9f6d ("mt76: add support for reporting tx status with skb") Signed-off-by: Stanislaw Gruszka Acked-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 9ed231abe916..4fe5a83ca5a4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -466,7 +466,6 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, return; rcu_read_lock(); - mt76_tx_status_lock(mdev, &list); if (stat->wcid < ARRAY_SIZE(dev->mt76.wcid)) wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]); @@ -479,6 +478,8 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, drv_priv); } + mt76_tx_status_lock(mdev, &list); + if (wcid) { if (stat->pktid >= MT_PACKET_ID_FIRST) status.skb = mt76_tx_status_skb_get(mdev, wcid, @@ -498,7 +499,9 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, if (*update == 0 && stat_val == stat_cache && stat->wcid == msta->status.wcid && msta->n_frames < 32) { msta->n_frames++; - goto out; + mt76_tx_status_unlock(mdev, &list); + rcu_read_unlock(); + return; } mt76x02_mac_fill_tx_status(dev, status.info, &msta->status, @@ -514,11 +517,10 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, if (status.skb) mt76_tx_status_skb_done(mdev, status.skb, &list); - else - ieee80211_tx_status_ext(mt76_hw(dev), &status); - -out: mt76_tx_status_unlock(mdev, &list); + + if (!status.skb) + ieee80211_tx_status_ext(mt76_hw(dev), &status); rcu_read_unlock(); } -- cgit From 0a2c34f18c94b596562bf3d019fceab998b8b584 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Apr 2019 14:45:12 +0100 Subject: vxge: fix return of a free'd memblock on a failed dma mapping Currently if a pci dma mapping failure is detected a free'd memblock address is returned rather than a NULL (that indicates an error). Fix this by ensuring NULL is returned on this error case. Addresses-Coverity: ("Use after free") Fixes: 528f727279ae ("vxge: code cleanup and reorganization") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 7cde387e5ec6..51cd57ab3d95 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -2366,6 +2366,7 @@ static void *__vxge_hw_blockpool_malloc(struct __vxge_hw_device *devh, u32 size, dma_object->addr))) { vxge_os_dma_free(devh->pdev, memblock, &dma_object->acc_handle); + memblock = NULL; goto exit; } -- cgit From 1dc2b3d65523780ed1972d446c76e62e13f3e8f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Apr 2019 15:13:27 +0100 Subject: qede: fix write to free'd pointer error and double free of ptp The err2 error return path calls qede_ptp_disable that cleans up on an error and frees ptp. After this, the free'd ptp is dereferenced when ptp->clock is set to NULL and the code falls-through to error path err1 that frees ptp again. Fix this by calling qede_ptp_disable and exiting via an error return path that does not set ptp->clock or kfree ptp. Addresses-Coverity: ("Write to pointer after free") Fixes: 035744975aec ("qede: Add support for PTP resource locking.") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 5f3f42a25361..bddb2b5982dc 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -490,18 +490,17 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc) ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev); if (IS_ERR(ptp->clock)) { - rc = -EINVAL; DP_ERR(edev, "PTP clock registration failed\n"); + qede_ptp_disable(edev); + rc = -EINVAL; goto err2; } return 0; -err2: - qede_ptp_disable(edev); - ptp->clock = NULL; err1: kfree(ptp); +err2: edev->ptp = NULL; return rc; -- cgit From 56d282d9db19f85f759b7a81f0829b58c00571b0 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:33:40 +0100 Subject: rxrpc: Clear socket error When an ICMP or ICMPV6 error is received, the error will be attached to the socket (sk_err) and the report function will get called. Clear any pending error here by calling sock_error(). This would cause the following attempt to use the socket to fail with the error code stored by the ICMP error, resulting in unexpected errors with various side effects depending on the context. Signed-off-by: Marc Dionne Signed-off-by: David Howells Tested-by: Jonathan Billings Signed-off-by: David S. Miller --- net/rxrpc/peer_event.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bc05af89fc38..6e84d878053c 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -157,6 +157,11 @@ void rxrpc_error_report(struct sock *sk) _enter("%p{%d}", sk, local->debug_id); + /* Clear the outstanding error value on the socket so that it doesn't + * cause kernel_sendmsg() to return it later. + */ + sock_error(sk); + skb = sock_dequeue_err_skb(sk); if (!skb) { _leave("UDP socket errqueue empty"); -- cgit From 4611da30d679a4b0a2c2b5d4d7b3fbbafc922df7 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:33:47 +0100 Subject: rxrpc: Make rxrpc_kernel_check_life() indicate if call completed Make rxrpc_kernel_check_life() pass back the life counter through the argument list and return true if the call has not yet completed. Suggested-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.txt | 16 +++++++++------- fs/afs/rxrpc.c | 4 ++-- include/net/af_rxrpc.h | 4 +++- net/rxrpc/af_rxrpc.c | 14 +++++++++----- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 2df5894353d6..cd7303d7fa25 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1009,16 +1009,18 @@ The kernel interface functions are as follows: (*) Check call still alive. - u32 rxrpc_kernel_check_life(struct socket *sock, - struct rxrpc_call *call); + bool rxrpc_kernel_check_life(struct socket *sock, + struct rxrpc_call *call, + u32 *_life); void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call); - The first function returns a number that is updated when ACKs are received - from the peer (notably including PING RESPONSE ACKs which we can elicit by - sending PING ACKs to see if the call still exists on the server). The - caller should compare the numbers of two calls to see if the call is still - alive after waiting for a suitable interval. + The first function passes back in *_life a number that is updated when + ACKs are received from the peer (notably including PING RESPONSE ACKs + which we can elicit by sending PING ACKs to see if the call still exists + on the server). The caller should compare the numbers of two calls to see + if the call is still alive after waiting for a suitable interval. It also + returns true as long as the call hasn't yet reached the completed state. This allows the caller to work out if the server is still contactable and if the call is still alive on the server while waiting for the server to diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 2c588f9bbbda..5cb11aff9298 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -621,7 +621,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, rtt2 = 2; timeout = rtt2; - last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); + rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life); add_wait_queue(&call->waitq, &myself); for (;;) { @@ -639,7 +639,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); + rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life); if (timeout == 0 && life == last_life && signal_pending(current)) { if (stalled) diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 2bfb87eb98ce..78c856cba4f5 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -61,10 +61,12 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); -u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); +bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *, + u32 *); void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); +bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c54dce3ca0dd..ae8c5d7f3bf1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -371,18 +371,22 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_check_life - Check to see whether a call is still alive * @sock: The socket the call is on * @call: The call to check + * @_life: Where to store the life value * * Allow a kernel service to find out whether a call is still alive - ie. we're - * getting ACKs from the server. Returns a number representing the life state - * which can be compared to that returned by a previous call. + * getting ACKs from the server. Passes back in *_life a number representing + * the life state which can be compared to that returned by a previous call and + * return true if the call is still alive. * * If the life state stalls, rxrpc_kernel_probe_life() should be called and * then 2RTT waited. */ -u32 rxrpc_kernel_check_life(const struct socket *sock, - const struct rxrpc_call *call) +bool rxrpc_kernel_check_life(const struct socket *sock, + const struct rxrpc_call *call, + u32 *_life) { - return call->acks_latest; + *_life = call->acks_latest; + return call->state != RXRPC_CALL_COMPLETE; } EXPORT_SYMBOL(rxrpc_kernel_check_life); -- cgit From 8e8715aaa905f6593f610f950d513e81fab5006a Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:33:54 +0100 Subject: rxrpc: Allow errors to be returned from rxrpc_queue_packet() Change rxrpc_queue_packet()'s signature so that it can return any error code it may encounter when trying to send the packet. This allows the caller to eventually do something in case of error - though it should be noted that the packet has been queued and a resend is scheduled. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/sendmsg.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 46c9312085b1..bec64deb7b0a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -152,12 +152,13 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, } /* - * Queue a DATA packet for transmission, set the resend timeout and send the - * packet immediately + * Queue a DATA packet for transmission, set the resend timeout and send + * the packet immediately. Returns the error from rxrpc_send_data_packet() + * in case the caller wants to do something with it. */ -static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, - struct sk_buff *skb, bool last, - rxrpc_notify_end_tx_t notify_end_tx) +static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, + struct sk_buff *skb, bool last, + rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long now; @@ -250,7 +251,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, out: rxrpc_free_skb(skb, rxrpc_skb_tx_freed); - _leave(""); + _leave(" = %d", ret); + return ret; } /* @@ -423,9 +425,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (ret < 0) goto out; - rxrpc_queue_packet(rx, call, skb, - !msg_data_left(msg) && !more, - notify_end_tx); + ret = rxrpc_queue_packet(rx, call, skb, + !msg_data_left(msg) && !more, + notify_end_tx); + /* Should check for failure here */ skb = NULL; } } while (msg_data_left(msg) > 0); -- cgit From f7f1dd3162efc7ffdbcdb9da1ad1599f8ab51296 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 12 Apr 2019 16:34:02 +0100 Subject: afs: Check for rxrpc call completion in wait loop Check the state of the rxrpc call backing an afs call in each iteration of the call wait loop in case the rxrpc call has already been terminated at the rxrpc layer. Interrupt the wait loop and mark the afs call as complete if the rxrpc layer call is complete. There were cases where rxrpc errors were not passed up to afs, which could result in this loop waiting forever for an afs call to transition to AFS_CALL_COMPLETE while the rx call was already complete. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- fs/afs/rxrpc.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5cb11aff9298..c14001b42d20 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -610,6 +610,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, bool stalled = false; u64 rtt; u32 life, last_life; + bool rxrpc_complete = false; DECLARE_WAITQUEUE(myself, current); @@ -639,7 +640,12 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life); + if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) { + /* rxrpc terminated the call. */ + rxrpc_complete = true; + break; + } + if (timeout == 0 && life == last_life && signal_pending(current)) { if (stalled) @@ -663,12 +669,16 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); - /* Kill off the call if it's still live. */ if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) { - _debug("call interrupted"); - if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) - afs_set_call_complete(call, -EINTR, 0); + if (rxrpc_complete) { + afs_set_call_complete(call, call->error, call->abort_code); + } else { + /* Kill off the call if it's still live. */ + _debug("call interrupted"); + if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + RX_USER_ABORT, -EINTR, "KWI")) + afs_set_call_complete(call, -EINTR, 0); + } } spin_lock_bh(&call->state_lock); -- cgit From 39ce67557568962fa9d1593741f76c4cc6762469 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 12 Apr 2019 16:34:09 +0100 Subject: rxrpc: Trace received connection aborts Trace received calls that are aborted due to a connection abort, typically because of authentication failure. Without this, connection aborts don't show up in the trace log. Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/conn_event.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b6fca8ebb117..8d31fb4c51e1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -153,7 +153,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl) + enum rxrpc_call_completion compl, + rxrpc_serial_t serial) { struct rxrpc_call *call; int i; @@ -173,6 +174,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, call->call_id, 0, conn->abort_code, conn->error); + else + trace_rxrpc_rx_abort(call, serial, + conn->abort_code); if (rxrpc_set_call_completion(call, compl, conn->abort_code, conn->error)) @@ -213,8 +217,6 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); - msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; @@ -242,6 +244,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); whdr.serial = htonl(serial); _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); @@ -321,7 +324,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, conn->error = -ECONNABORTED; conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: -- cgit From 1a2391c30c0b9d041bc340f68df81d49c53546cc Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Fri, 12 Apr 2019 16:34:16 +0100 Subject: rxrpc: Fix detection of out of order acks The rxrpc packet serial number cannot be safely used to compute out of order ack packets for several reasons: 1. The allocation of serial numbers cannot be assumed to imply the order by which acks are populated and transmitted. In some rxrpc implementations, delayed acks and ping acks are transmitted asynchronously to the receipt of data packets and so may be transmitted out of order. As a result, they can race with idle acks. 2. Serial numbers are allocated by the rxrpc connection and not the call and as such may wrap independently if multiple channels are in use. In any case, what matters is whether the ack packet provides new information relating to the bounds of the window (the firstPacket and previousPacket in the ACK data). Fix this by discarding packets that appear to wind back the window bounds rather than on serial number procession. Fixes: 298bc15b2079 ("rxrpc: Only take the rwind and mtu values from latest ACK") Signed-off-by: Jeffrey Altman Signed-off-by: David Howells Tested-by: Marc Dionne Signed-off-by: David S. Miller --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4b1a534d290a..062ca9dc29b8 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -654,6 +654,7 @@ struct rxrpc_call { u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ + rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9128aa0e40aa..4c6f9d0a00e7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -837,7 +837,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u8 acks[RXRPC_MAXACKS]; } buf; rxrpc_serial_t acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; _enter(""); @@ -851,13 +851,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); + prev_pkt = ntohl(buf.ack.previousPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? buf.ack.reason : RXRPC_ACK__INVALID); trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial, - first_soft_ack, ntohl(buf.ack.previousPacket), + first_soft_ack, prev_pkt, summary.ack_reason, nr_acks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -878,8 +879,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (outside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) return; buf.info.rxMTU = 0; @@ -890,12 +892,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, spin_lock(&call->input_lock); - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (inside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) goto out; call->acks_latest_ts = skb->tstamp; call->acks_latest = sp->hdr.serial; + call->ackr_first_seq = first_soft_ack; + call->ackr_prev_seq = prev_pkt; + /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); -- cgit From ed0de45a1008991fdaa27a0152befcb74d126a8b Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Fri, 12 Apr 2019 16:19:27 -0400 Subject: ipv4: recompile ip options in ipv4_link_failure Recompile IP options since IPCB may not be valid anymore when ipv4_link_failure is called from arp_error_report. Refer to the commit 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") and the commit before that (9ef6b42ad6fd) for a similar issue. Signed-off-by: Stephen Suryaputra Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a5da63e5faa2..0206789bc2b7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1186,8 +1186,16 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; + struct ip_options opt; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + /* Recompile ip options since IPCB may not be valid anymore. + */ + memset(&opt, 0, sizeof(opt)); + opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) + return; + + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); rt = skb_rtable(skb); if (rt) -- cgit From c543cb4a5f07e09237ec0fc2c60c9f131b2c79ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 13 Apr 2019 17:32:21 -0700 Subject: ipv4: ensure rcu_read_lock() in ipv4_link_failure() fib_compute_spec_dst() needs to be called under rcu protection. syzbot reported : WARNING: suspicious RCU usage 5.1.0-rc4+ #165 Not tainted include/linux/inetdevice.h:220 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by swapper/0/0: #0: 0000000051b67925 ((&n->timer)){+.-.}, at: lockdep_copy_map include/linux/lockdep.h:170 [inline] #0: 0000000051b67925 ((&n->timer)){+.-.}, at: call_timer_fn+0xda/0x720 kernel/time/timer.c:1315 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.1.0-rc4+ #165 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5162 __in_dev_get_rcu include/linux/inetdevice.h:220 [inline] fib_compute_spec_dst+0xbbd/0x1030 net/ipv4/fib_frontend.c:294 spec_dst_fill net/ipv4/ip_options.c:245 [inline] __ip_options_compile+0x15a7/0x1a10 net/ipv4/ip_options.c:343 ipv4_link_failure+0x172/0x400 net/ipv4/route.c:1195 dst_link_failure include/net/dst.h:427 [inline] arp_error_report+0xd1/0x1c0 net/ipv4/arp.c:297 neigh_invalidate+0x24b/0x570 net/core/neighbour.c:995 neigh_timer_handler+0xc35/0xf30 net/core/neighbour.c:1081 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:293 invoke_softirq kernel/softirq.c:374 [inline] irq_exit+0x180/0x1d0 kernel/softirq.c:414 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Stephen Suryaputra Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0206789bc2b7..88ce038dd495 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1185,14 +1185,20 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_link_failure(struct sk_buff *skb) { - struct rtable *rt; struct ip_options opt; + struct rtable *rt; + int res; /* Recompile ip options since IPCB may not be valid anymore. */ memset(&opt, 0, sizeof(opt)); opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) + + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); -- cgit From 9ac6bb1414ac0d45fe9cefbd1f5b06f0e1a3c98a Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:05 +0300 Subject: qed: Delete redundant doorbell recovery types DB_REC_DRY_RUN (running doorbell recovery without sending doorbells) is never used. DB_REC_ONCE (send a single doorbell from the doorbell recovery) is not needed anymore because by running the periodic handler we make sure we check the overflow status later instead. This patch is needed because in the next patches, the only doorbell recovery type being used is DB_REC_REAL_DEAL, and the fixes are much cleaner without this enum. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 3 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 69 ++++++++++++------------------- drivers/net/ethernet/qlogic/qed/qed_int.c | 6 +-- drivers/net/ethernet/qlogic/qed/qed_int.h | 4 +- 4 files changed, 31 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 43a57ec296fd..1514ec93b989 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -920,8 +920,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc); /* doorbell recovery mechanism */ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn); -void qed_db_recovery_execute(struct qed_hwfn *p_hwfn, - enum qed_db_rec_exec db_exec); +void qed_db_recovery_execute(struct qed_hwfn *p_hwfn); bool qed_edpm_enabled(struct qed_hwfn *p_hwfn); /* Other Linux specific common definitions */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 9df8c4b3b54e..da9df81d651d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -300,26 +300,19 @@ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn) /* Ring the doorbell of a single doorbell recovery entry */ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, - struct qed_db_recovery_entry *db_entry, - enum qed_db_rec_exec db_exec) -{ - if (db_exec != DB_REC_ONCE) { - /* Print according to width */ - if (db_entry->db_width == DB_REC_WIDTH_32B) { - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, - "%s doorbell address %p data %x\n", - db_exec == DB_REC_DRY_RUN ? - "would have rung" : "ringing", - db_entry->db_addr, - *(u32 *)db_entry->db_data); - } else { - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, - "%s doorbell address %p data %llx\n", - db_exec == DB_REC_DRY_RUN ? - "would have rung" : "ringing", - db_entry->db_addr, - *(u64 *)(db_entry->db_data)); - } + struct qed_db_recovery_entry *db_entry) +{ + /* Print according to width */ + if (db_entry->db_width == DB_REC_WIDTH_32B) { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "ringing doorbell address %p data %x\n", + db_entry->db_addr, + *(u32 *)db_entry->db_data); + } else { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "ringing doorbell address %p data %llx\n", + db_entry->db_addr, + *(u64 *)(db_entry->db_data)); } /* Sanity */ @@ -334,14 +327,12 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, wmb(); /* Ring the doorbell */ - if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) { - if (db_entry->db_width == DB_REC_WIDTH_32B) - DIRECT_REG_WR(db_entry->db_addr, - *(u32 *)(db_entry->db_data)); - else - DIRECT_REG_WR64(db_entry->db_addr, - *(u64 *)(db_entry->db_data)); - } + if (db_entry->db_width == DB_REC_WIDTH_32B) + DIRECT_REG_WR(db_entry->db_addr, + *(u32 *)(db_entry->db_data)); + else + DIRECT_REG_WR64(db_entry->db_addr, + *(u64 *)(db_entry->db_data)); /* Flush the write combined buffer. Next doorbell may come from a * different entity to the same address... @@ -350,29 +341,21 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, } /* Traverse the doorbell recovery entry list and ring all the doorbells */ -void qed_db_recovery_execute(struct qed_hwfn *p_hwfn, - enum qed_db_rec_exec db_exec) +void qed_db_recovery_execute(struct qed_hwfn *p_hwfn) { struct qed_db_recovery_entry *db_entry = NULL; - if (db_exec != DB_REC_ONCE) { - DP_NOTICE(p_hwfn, - "Executing doorbell recovery. Counter was %d\n", - p_hwfn->db_recovery_info.db_recovery_counter); + DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n", + p_hwfn->db_recovery_info.db_recovery_counter); - /* Track amount of times recovery was executed */ - p_hwfn->db_recovery_info.db_recovery_counter++; - } + /* Track amount of times recovery was executed */ + p_hwfn->db_recovery_info.db_recovery_counter++; /* Protect the list */ spin_lock_bh(&p_hwfn->db_recovery_info.lock); list_for_each_entry(db_entry, - &p_hwfn->db_recovery_info.list, list_entry) { - qed_db_recovery_ring(p_hwfn, db_entry, db_exec); - if (db_exec == DB_REC_ONCE) - break; - } - + &p_hwfn->db_recovery_info.list, list_entry) + qed_db_recovery_ring(p_hwfn, db_entry); spin_unlock_bh(&p_hwfn->db_recovery_info.lock); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index e23980e301b6..3546e253c75f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -411,10 +411,8 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow); - if (!overflow) { - qed_db_recovery_execute(p_hwfn, DB_REC_ONCE); + if (!overflow) return 0; - } if (qed_edpm_enabled(p_hwfn)) { rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); @@ -429,7 +427,7 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); /* Repeat all last doorbells (doorbell drop recovery) */ - qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL); + qed_db_recovery_execute(p_hwfn); return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h index 1f356ed4f761..d473b522afc5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.h +++ b/drivers/net/ethernet/qlogic/qed/qed_int.h @@ -192,8 +192,8 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev); /** * @brief - Doorbell Recovery handler. - * Run DB_REAL_DEAL doorbell recovery in case of PF overflow - * (and flush DORQ if needed), otherwise run DB_REC_ONCE. + * Run doorbell recovery in case of PF overflow (and flush DORQ if + * needed). * * @param p_hwfn * @param p_ptt -- cgit From b61b04ad81d5f975349d66abbecabf96ba211140 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:06 +0300 Subject: qed: Fix the doorbell address sanity check Fix the condition which verifies that doorbell address is inside the doorbell bar by checking that the end of the address is within range as well. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index da9df81d651d..866cdc86a3f2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -102,11 +102,15 @@ static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn, /* Doorbell address sanity (address within doorbell bar range) */ static bool qed_db_rec_sanity(struct qed_dev *cdev, - void __iomem *db_addr, void *db_data) + void __iomem *db_addr, + enum qed_db_rec_width db_width, + void *db_data) { + u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64; + /* Make sure doorbell address is within the doorbell bar */ if (db_addr < cdev->doorbells || - (u8 __iomem *)db_addr > + (u8 __iomem *)db_addr + width > (u8 __iomem *)cdev->doorbells + cdev->db_size) { WARN(true, "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n", @@ -159,7 +163,7 @@ int qed_db_recovery_add(struct qed_dev *cdev, } /* Sanitize doorbell address */ - if (!qed_db_rec_sanity(cdev, db_addr, db_data)) + if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data)) return -EINVAL; /* Obtain hwfn from doorbell address */ @@ -205,10 +209,6 @@ int qed_db_recovery_del(struct qed_dev *cdev, return 0; } - /* Sanitize doorbell address */ - if (!qed_db_rec_sanity(cdev, db_addr, db_data)) - return -EINVAL; - /* Obtain hwfn from doorbell address */ p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr); @@ -317,7 +317,7 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, /* Sanity */ if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr, - db_entry->db_data)) + db_entry->db_width, db_entry->db_data)) return; /* Flush the write combined buffer. Since there are multiple doorbelling -- cgit From d4476b8a6151b2dd86c09b5acec64f66430db55d Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:07 +0300 Subject: qed: Fix missing DORQ attentions When the DORQ (doorbell block) is overflowed, all PFs get attentions at the same time. If one PF finished handling the attention before another PF even started, the second PF might miss the DORQ's attention bit and not handle the attention at all. If the DORQ attention is missed and the issue is not resolved, another attention will not be sent, therefore each attention is treated as a potential DORQ attention. As a result, the attention callback is called more frequently so the debug print was moved to reduce its quantity. The number of periodic doorbell recovery handler schedules was reduced because it was the previous way to mitigating the missed attention issue. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 1 + drivers/net/ethernet/qlogic/qed/qed_int.c | 20 ++++++++++++++++++-- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 1514ec93b989..fcc2d745c375 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -436,6 +436,7 @@ struct qed_db_recovery_info { /* Lock to protect the doorbell recovery mechanism list */ spinlock_t lock; + bool dorq_attn; u32 db_recovery_counter; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 3546e253c75f..fe3286fc956b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -438,17 +438,19 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; int rc; - int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); - DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts); + p_hwfn->db_recovery_info.dorq_attn = true; /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If * This PF also requires overflow recovery we will be interrupted again. * The masked almost full indication may also be set. Ignoring. */ + int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) return 0; + DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts); + /* check if db_drop or overflow happened */ if (int_sts & (DORQ_REG_INT_STS_DB_DROP | DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) { @@ -505,6 +507,17 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) return -EINVAL; } +static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn) +{ + if (p_hwfn->db_recovery_info.dorq_attn) + goto out; + + /* Call DORQ callback if the attention was missed */ + qed_dorq_attn_cb(p_hwfn); +out: + p_hwfn->db_recovery_info.dorq_attn = false; +} + /* Instead of major changes to the data-structure, we have a some 'special' * identifiers for sources that changed meaning between adapters. */ @@ -1078,6 +1091,9 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, } } + /* Handle missed DORQ attention */ + qed_dorq_attn_handler(p_hwfn); + /* Clear IGU indication for the deasserted bits */ DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_IGU_CMD + diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f164d4acebcb..6de23b56b294 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -970,7 +970,7 @@ static void qed_update_pf_params(struct qed_dev *cdev, } } -#define QED_PERIODIC_DB_REC_COUNT 100 +#define QED_PERIODIC_DB_REC_COUNT 10 #define QED_PERIODIC_DB_REC_INTERVAL_MS 100 #define QED_PERIODIC_DB_REC_INTERVAL \ msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS) -- cgit From 0d72c2ac89185f179da1e8a91c40c82f3fa38f0b Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:08 +0300 Subject: qed: Fix the DORQ's attentions handling Separate the overflow handling from the hardware interrupt status analysis. The interrupt status is a single register and is common for all PFs. The first PF reading the register is not necessarily the one who overflowed. All PFs must check their overflow status on every attention. In this change we clear the sticky indication in the attention handler to allow doorbells to be processed again as soon as possible, but running the doorbell recovery is scheduled for the periodic handler to reduce the time spent in the attention handler. Checking the need for DORQ flush was changed to "db_bar_no_edpm" because qed_edpm_enabled()'s result could change dynamically and might have prevented a needed flush. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 3 ++ drivers/net/ethernet/qlogic/qed/qed_int.c | 61 +++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index fcc2d745c375..127c89b22ef0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -431,6 +431,8 @@ struct qed_qm_info { u8 num_pf_rls; }; +#define QED_OVERFLOW_BIT 1 + struct qed_db_recovery_info { struct list_head list; @@ -438,6 +440,7 @@ struct qed_db_recovery_info { spinlock_t lock; bool dorq_attn; u32 db_recovery_counter; + unsigned long overflow; }; struct storm_stats { diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index fe3286fc956b..8848d5bed6e5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -378,6 +378,9 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn, u32 count = QED_DB_REC_COUNT; u32 usage = 1; + /* Flush any pending (e)dpms as they may never arrive */ + qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1); + /* wait for usage to zero or count to run out. This is necessary since * EDPM doorbell transactions can take multiple 64b cycles, and as such * can "split" over the pci. Possibly, the doorbell drop can happen with @@ -406,23 +409,24 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn, int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 overflow; + u32 attn_ovfl, cur_ovfl; int rc; - overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); - DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow); - if (!overflow) + attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT, + &p_hwfn->db_recovery_info.overflow); + cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); + if (!cur_ovfl && !attn_ovfl) return 0; - if (qed_edpm_enabled(p_hwfn)) { + DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n", + attn_ovfl, cur_ovfl); + + if (cur_ovfl && !p_hwfn->db_bar_no_edpm) { rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); if (rc) return rc; } - /* Flush any pending (e)dpm as they may never arrive */ - qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1); - /* Release overflow sticky indication (stop silently dropping everything) */ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); @@ -432,13 +436,35 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return 0; } -static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) +static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn) { - u32 int_sts, first_drop_reason, details, address, all_drops_reason; struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; + u32 overflow; int rc; - p_hwfn->db_recovery_info.dorq_attn = true; + overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); + if (!overflow) + goto out; + + /* Run PF doorbell recovery in next periodic handler */ + set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow); + + if (!p_hwfn->db_bar_no_edpm) { + rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); + if (rc) + goto out; + } + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); +out: + /* Schedule the handler even if overflow was not detected */ + qed_periodic_db_rec_start(p_hwfn); +} + +static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) +{ + u32 int_sts, first_drop_reason, details, address, all_drops_reason; + struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If @@ -477,11 +503,6 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4, first_drop_reason, all_drops_reason); - rc = qed_db_rec_handler(p_hwfn, p_ptt); - qed_periodic_db_rec_start(p_hwfn); - if (rc) - return rc; - /* Clear the doorbell drop details and prepare for next drop */ qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0); @@ -507,6 +528,14 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) return -EINVAL; } +static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) +{ + p_hwfn->db_recovery_info.dorq_attn = true; + qed_dorq_attn_overflow(p_hwfn); + + return qed_dorq_attn_int_sts(p_hwfn); +} + static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn) { if (p_hwfn->db_recovery_info.dorq_attn) -- cgit From 69f23a09daf9790acb801aaef4bc7aea6f69eec1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 14 Apr 2019 11:02:05 -0700 Subject: rtnetlink: fix rtnl_valid_stats_req() nlmsg_len check Jakub forgot to either use nlmsg_len() or nlmsg_msg_size(), allowing KMSAN to detect a possible uninit-value in rtnl_stats_get BUG: KMSAN: uninit-value in rtnl_stats_get+0x6d9/0x11d0 net/core/rtnetlink.c:4997 CPU: 0 PID: 10428 Comm: syz-executor034 Not tainted 5.1.0-rc2+ #24 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x131/0x2a0 mm/kmsan/kmsan.c:619 __msan_warning+0x7a/0xf0 mm/kmsan/kmsan_instr.c:310 rtnl_stats_get+0x6d9/0x11d0 net/core/rtnetlink.c:4997 rtnetlink_rcv_msg+0x115b/0x1550 net/core/rtnetlink.c:5192 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2485 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5210 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] ___sys_sendmsg+0xdb3/0x1220 net/socket.c:2137 __sys_sendmsg net/socket.c:2175 [inline] __do_sys_sendmsg net/socket.c:2184 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2182 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2182 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Fixes: 51bc860d4a99 ("rtnetlink: stats: validate attributes in get as well as dumps") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a51cab95ba64..220c56e93659 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4948,7 +4948,7 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, { struct if_stats_msg *ifsm; - if (nlh->nlmsg_len < sizeof(*ifsm)) { + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } -- cgit From 8ed633b9baf9ec7d593ebb8e256312ff1c70ab37 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 12 Apr 2019 16:36:33 -0400 Subject: Revert "net-sysfs: Fix memory leak in netdev_register_kobject" This reverts commit 6b70fc94afd165342876e53fc4b2f7d085009945. The reverted bugfix will cause another issue. Reported by syzbot+6024817a931b2830bc93@syzkaller.appspotmail.com. See https://syzkaller.appspot.com/x/log.txt?x=1737671b200000 for details. Signed-off-by: Wang Hai Acked-by: Andy Shevchenko Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f8f94303a1f5..8f8b7b6c2945 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1747,20 +1747,16 @@ int netdev_register_kobject(struct net_device *ndev) error = device_add(dev); if (error) - goto error_put_device; + return error; error = register_queue_kobjects(ndev); - if (error) - goto error_device_del; + if (error) { + device_del(dev); + return error; + } pm_runtime_set_memalloc_noio(dev, true); - return 0; - -error_device_del: - device_del(dev); -error_put_device: - put_device(dev); return error; } -- cgit From 92480b3977fd3884649d404cbbaf839b70035699 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 12 Apr 2019 15:04:10 +0200 Subject: bonding: fix event handling for stacked bonds When a bond is enslaved to another bond, bond_netdev_event() only handles the event as if the bond is a master, and skips treating the bond as a slave. This leads to a refcount leak on the slave, since we don't remove the adjacency to its master and the master holds a reference on the slave. Reproducer: ip link add bondL type bond ip link add bondU type bond ip link set bondL master bondU ip link del bondL No "Fixes:" tag, this code is older than git history. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b59708c35faf..ee610721098e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3213,8 +3213,12 @@ static int bond_netdev_event(struct notifier_block *this, return NOTIFY_DONE; if (event_dev->flags & IFF_MASTER) { + int ret; + netdev_dbg(event_dev, "IFF_MASTER\n"); - return bond_master_netdev_event(event, event_dev); + ret = bond_master_netdev_event(event, event_dev); + if (ret != NOTIFY_DONE) + return ret; } if (event_dev->flags & IFF_SLAVE) { -- cgit From 789445b960c16baf626c050f762126189b45b82d Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 13 Apr 2019 09:52:15 +0200 Subject: MAINTAINERS: normalize Woojung Huh's email address MAINTAINERS contains a lower-case and upper-case variant of Woojung Huh' s email address. Only keep the lower-case variant in MAINTAINERS. Signed-off-by: Lukas Bulwahn Acked-by: Woojung Huh Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 27b0de13506c..601679a213f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10145,7 +10145,7 @@ F: drivers/spi/spi-at91-usart.c F: Documentation/devicetree/bindings/mfd/atmel-usart.txt MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER -M: Woojung Huh +M: Woojung Huh M: Microchip Linux Driver Support L: netdev@vger.kernel.org S: Maintained -- cgit From 9c69a13205151c0d801de9f9d83a818e6e8f60ec Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Sun, 14 Apr 2019 14:21:29 -0700 Subject: route: Avoid crash from dereferencing NULL rt->from When __ip6_rt_update_pmtu() is called, rt->from is RCU dereferenced, but is never checked for null - rt6_flush_exceptions() may have removed the entry. [ 1913.989004] RIP: 0010:ip6_rt_cache_alloc+0x13/0x170 [ 1914.209410] Call Trace: [ 1914.214798] [ 1914.219226] __ip6_rt_update_pmtu+0xb0/0x190 [ 1914.228649] ip6_tnl_xmit+0x2c2/0x970 [ip6_tunnel] [ 1914.239223] ? ip6_tnl_parse_tlv_enc_lim+0x32/0x1a0 [ip6_tunnel] [ 1914.252489] ? __gre6_xmit+0x148/0x530 [ip6_gre] [ 1914.262678] ip6gre_tunnel_xmit+0x17e/0x3c7 [ip6_gre] [ 1914.273831] dev_hard_start_xmit+0x8d/0x1f0 [ 1914.283061] sch_direct_xmit+0xfa/0x230 [ 1914.291521] __qdisc_run+0x154/0x4b0 [ 1914.299407] net_tx_action+0x10e/0x1f0 [ 1914.307678] __do_softirq+0xca/0x297 [ 1914.315567] irq_exit+0x96/0xa0 [ 1914.322494] smp_apic_timer_interrupt+0x68/0x130 [ 1914.332683] apic_timer_interrupt+0xf/0x20 [ 1914.341721] Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Jonathan Lemon Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0302e0eb07af..7178e32eb15d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2330,6 +2330,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_lock(); from = rcu_dereference(rt6->from); + if (!from) { + rcu_read_unlock(); + return; + } nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); -- cgit From 614c70f35cd77a9af8e2ca841dcdb121cec3068f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 15 Apr 2019 16:47:03 +0100 Subject: bnx2x: fix spelling mistake "dicline" -> "decline" There is a spelling mistake in a BNX2X_ERR message, fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index a9bdc21873d3..10ff37d6dc78 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -957,7 +957,7 @@ int bnx2x_vfpf_update_vlan(struct bnx2x *bp, u16 vid, u8 vf_qid, bool add) bnx2x_sample_bulletin(bp); if (bp->shadow_bulletin.content.valid_bitmap & 1 << VLAN_VALID) { - BNX2X_ERR("Hypervisor will dicline the request, avoiding\n"); + BNX2X_ERR("Hypervisor will decline the request, avoiding\n"); rc = -EINVAL; goto out; } -- cgit From 3b2e2904deb314cc77a2192f506f2fd44e3d10d0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 11 Apr 2019 13:56:39 +0300 Subject: net: bridge: fix per-port af_packet sockets When the commit below was introduced it changed two visible things: - the skb was no longer passed through the protocol handlers with the original device - the skb was passed up the stack with skb->dev = bridge The first change broke af_packet sockets on bridge ports. For example we use them for hostapd which listens for ETH_P_PAE packets on the ports. We discussed two possible fixes: - create a clone and pass it through NF_HOOK(), act on the original skb based on the result - somehow signal to the caller from the okfn() that it was called, meaning the skb is ok to be passed, which this patch is trying to implement via returning 1 from the bridge link-local okfn() Note that we rely on the fact that NF_QUEUE/STOLEN would return 0 and drop/error would return < 0 thus the okfn() is called only when the return was 1, so we signal to the caller that it was called by preserving the return value from nf_hook(). Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_input.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5ea7e56119c1..ba303ee99b9b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -197,13 +197,10 @@ static void __br_handle_local_finish(struct sk_buff *skb) /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_bridge_port *p = br_port_get_rcu(skb->dev); - __br_handle_local_finish(skb); - BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; - br_pass_frame_up(skb); - return 0; + /* return 1 to signal the okfn() was called so it's ok to use the skb */ + return 1; } /* @@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } - /* Deliver packet to local host only */ - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), - NULL, skb, skb->dev, NULL, br_handle_local_finish); - return RX_HANDLER_CONSUMED; + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) + * Thus return 1 from the okfn() to signal the skb is ok to pass + */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish) == 1) { + return RX_HANDLER_PASS; + } else { + return RX_HANDLER_CONSUMED; + } } forward: -- cgit From ad910c7c01269f229a97c335f2dc669fff750f65 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Apr 2019 19:14:45 +0200 Subject: net/core: work around section mismatch warning for ptp_classifier The routine ptp_classifier_init() uses an initializer for an automatic struct type variable which refers to an __initdata symbol. This is perfectly legal, but may trigger a section mismatch warning when running the compiler in -fpic mode, due to the fact that the initializer may be emitted into an anonymous .data section thats lack the __init annotation. So work around it by using assignments instead. Signed-off-by: Ard Biesheuvel Signed-off-by: Gerald Schaefer Signed-off-by: David S. Miller --- net/core/ptp_classifier.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 703cf76aa7c2..7109c168b5e0 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -185,9 +185,10 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog_kern ptp_prog = { - .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, - }; + struct sock_fprog_kern ptp_prog; + + ptp_prog.len = ARRAY_SIZE(ptp_filter); + ptp_prog.filter = ptp_filter; BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog)); } -- cgit From 899537b73557aafbdd11050b501cf54b4f5c45af Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 Apr 2019 15:57:23 -0500 Subject: net: atm: Fix potential Spectre v1 vulnerabilities arg is controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/atm/lec.c:715 lec_mcast_attach() warn: potential spectre issue 'dev_lec' [r] (local cap) Fix this by sanitizing arg before using it to index dev_lec. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/atm/lec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index d7f5cf5b7594..ad4f829193f0 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + if (arg < 0 || arg >= MAX_LEC_ITF) + return -EINVAL; + arg = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); @@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) i = arg; if (arg >= MAX_LEC_ITF) return -EINVAL; + i = array_index_nospec(arg, MAX_LEC_ITF); if (!dev_lec[i]) { int size; -- cgit From d85e8be2a5a02869f815dd0ac2d743deb4cd7957 Mon Sep 17 00:00:00 2001 From: Yuya Kusakabe Date: Tue, 16 Apr 2019 10:22:28 +0900 Subject: net: Fix missing meta data in skb with vlan packet skb_reorder_vlan_header() should move XDP meta data with ethernet header if XDP meta data exists. Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") Signed-off-by: Yuya Kusakabe Signed-off-by: Takeru Hayasaka Co-developed-by: Takeru Hayasaka Reviewed-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef2cd5712098..40796b8bf820 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { - int mac_len; + int mac_len, meta_len; + void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); @@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } + + meta_len = skb_metadata_len(skb); + if (meta_len) { + meta = skb_metadata_end(skb) - meta_len; + memmove(meta + VLAN_HLEN, meta, meta_len); + } + skb->mac_header += VLAN_HLEN; return skb; } -- cgit From f7a937801b9f8788519a23b12cb4d6c2c84d84be Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Tue, 16 Apr 2019 10:48:07 +0700 Subject: tipc: fix link established but not in session According to the link FSM, when a link endpoint got RESET_MSG (- a traditional one without the stopping bit) from its peer, it moves to PEER_RESET state and raises a LINK_DOWN event which then resets the link itself. Its state will become ESTABLISHING after the reset event and the link will be re-established soon after this endpoint starts to send ACTIVATE_MSG to the peer. There is no problem with this mechanism, however the link resetting has cleared the link 'in_session' flag (along with the other important link data such as: the link 'mtu') that was correctly set up at the 1st step (i.e. when this endpoint received the peer RESET_MSG). As a result, the link will become ESTABLISHED, but the 'in_session' flag is not set, and all STATE_MSG from its peer will be dropped at the link_validate_msg(). It means the link not synced and will sooner or later face a failure. Since the link reset action is obviously needed for a new link session (this is also true in the other situations), the problem here is that the link is re-established a bit too early when the link endpoints are not really in-sync yet. The commit forces a resync as already done in the previous commit 91986ee166cf ("tipc: fix link session and re-establish issues") by simply varying the link 'peer_session' value at the link_reset(). Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/link.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 341ecd796aa4..131aa2f0fd27 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -869,6 +869,8 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_head_init(&list); l->in_session = false; + /* Force re-synch of peer session number before establishing */ + l->peer_session--; l->session++; l->mtu = l->advertised_mtu; -- cgit From 4bcd4ec1017205644a2697bccbc3b5143f522f5f Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 16 Apr 2019 13:10:09 +0800 Subject: tipc: set sysctl_tipc_rmem and named_timeout right range We find that sysctl_tipc_rmem and named_timeout do not have the right minimum setting. sysctl_tipc_rmem should be larger than zero, like sysctl_tcp_rmem. And named_timeout as a timeout setting should be not less than zero. Fixes: cc79dd1ba9c10 ("tipc: change socket buffer overflow control to respect sk_rcvbuf") Fixes: a5325ae5b8bff ("tipc: add name distributor resiliency queue") Signed-off-by: Jie Liu Reported-by: Qiang Ning Reviewed-by: Zhiqiang Liu Reviewed-by: Miaohe Lin Signed-off-by: David S. Miller --- net/tipc/sysctl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 3481e4906bd6..9df82a573aa7 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -38,6 +38,8 @@ #include +static int zero; +static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -46,14 +48,16 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "named_timeout", .data = &sysctl_tipc_named_timeout, .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "sk_filter", -- cgit From 3321b6c23fb330e690ed5cf1336c827e07843200 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Apr 2019 12:43:17 +0100 Subject: qed: fix spelling mistake "faspath" -> "fastpath" There is a spelling mistake in a DP_INFO message, fix it. Signed-off-by: Colin Ian King Reviewed-by: Mukesh Ojha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 9faaa6df78ed..2f318aaf2b05 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1591,7 +1591,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN; } else { DP_INFO(p_hwfn, - "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n", + "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's fastpath HSI %02x.%02x\n", vf->abs_vf_id, req->vfdev_info.eth_fp_hsi_major, req->vfdev_info.eth_fp_hsi_minor, -- cgit From 600bea7dba1a72874ae0cd9bc66bf2abfe43b49d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 16 Apr 2019 16:15:56 +0300 Subject: net: bridge: fix netlink export of vlan_stats_per_port option Since the introduction of the vlan_stats_per_port option the netlink export of it has been broken since I made a typo and used the ifla attribute instead of the bridge option to retrieve its state. Sysfs export is fine, only netlink export has been affected. Fixes: 9163a0fc1f0c0 ("net: bridge: add support for per-port vlan stats") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9c07591b0232..7104cf13da84 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) || nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT, - br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT))) + br_opt_get(br, BROPT_VLAN_STATS_PER_PORT))) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -- cgit From a8fd48b50deaa20808bbf0f6685f6f1acba6a64c Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 16 Apr 2019 17:51:58 +0300 Subject: ocelot: Don't sleep in atomic context (irqs_disabled()) Preemption disabled at: [] dev_set_rx_mode+0x1c/0x38 Call trace: [] dump_backtrace+0x0/0x3d0 [] show_stack+0x14/0x20 [] dump_stack+0xac/0xe4 [] ___might_sleep+0x164/0x238 [] __might_sleep+0x50/0x88 [] kmem_cache_alloc+0x17c/0x1d0 [] ocelot_set_rx_mode+0x108/0x188 [mscc_ocelot_common] [] __dev_set_rx_mode+0x58/0xa0 [] dev_set_rx_mode+0x24/0x38 Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index a1d0d6e42533..6cb2f03b67e6 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -613,7 +613,7 @@ static int ocelot_mact_mc_add(struct ocelot_port *port, struct netdev_hw_addr *hw_addr) { struct ocelot *ocelot = port->ocelot; - struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL); + struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_ATOMIC); if (!ha) return -ENOMEM; -- cgit From 1e1caa9735f90b5452fc48685c23552e56aa1920 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 16 Apr 2019 17:51:59 +0300 Subject: ocelot: Clean up stats update deferred work This is preventive cleanup that may save troubles later. No need to cancel repeateadly queued work if code is properly refactored. Don't let the ethtool -s process interfere with the stat workqueue scheduling. Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 6cb2f03b67e6..d715ef4fc92f 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -959,10 +959,8 @@ static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data) ETH_GSTRING_LEN); } -static void ocelot_check_stats(struct work_struct *work) +static void ocelot_update_stats(struct ocelot *ocelot) { - struct delayed_work *del_work = to_delayed_work(work); - struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work); int i, j; mutex_lock(&ocelot->stats_lock); @@ -986,11 +984,19 @@ static void ocelot_check_stats(struct work_struct *work) } } - cancel_delayed_work(&ocelot->stats_work); + mutex_unlock(&ocelot->stats_lock); +} + +static void ocelot_check_stats_work(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct ocelot *ocelot = container_of(del_work, struct ocelot, + stats_work); + + ocelot_update_stats(ocelot); + queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); - - mutex_unlock(&ocelot->stats_lock); } static void ocelot_get_ethtool_stats(struct net_device *dev, @@ -1001,7 +1007,7 @@ static void ocelot_get_ethtool_stats(struct net_device *dev, int i; /* check and update now */ - ocelot_check_stats(&ocelot->stats_work.work); + ocelot_update_stats(ocelot); /* Copy all counters */ for (i = 0; i < ocelot->num_stats; i++) @@ -1809,7 +1815,7 @@ int ocelot_init(struct ocelot *ocelot) ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6), ANA_CPUQ_8021_CFG, i); - INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats); + INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); return 0; -- cgit From 50ce163a72d817a99e8974222dcf2886d5deb1ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Apr 2019 10:55:20 -0700 Subject: tcp: tcp_grow_window() needs to respect tcp_space() For some reason, tcp_grow_window() correctly tests if enough room is present before attempting to increase tp->rcv_ssthresh, but does not prevent it to grow past tcp_space() This is causing hard to debug issues, like failing the (__tcp_select_window(sk) >= tp->rcv_wnd) test in __tcp_ack_snd_check(), causing ACK delays and possibly slow flows. Depending on tcp_rmem[2], MTU, skb->len/skb->truesize ratio, we can see the problem happening on "netperf -t TCP_RR -- -r 2000,2000" after about 60 round trips, when the active side no longer sends immediate acks. This bug predates git history. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5def3c48870e..731d3045b50a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + int room; + + room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; /* Check #1 */ - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_under_memory_pressure(sk)) { + if (room > 0 && !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) if (incr) { incr = max_t(int, incr, 2 * skb->len); - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, - tp->window_clamp); + tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } } -- cgit From e6986423d28362aafe64d3757bbbc493f2687f8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Apr 2019 22:31:14 +0200 Subject: socket: fix compat SO_RCVTIMEO_NEW/SO_SNDTIMEO_NEW It looks like the new socket options only work correctly for native execution, but in case of compat mode fall back to the old behavior as we ignore the 'old_timeval' flag. Rework so we treat SO_RCVTIMEO_NEW/SO_SNDTIMEO_NEW the same way in compat and native 32-bit mode. Cc: Deepa Dinamani Fixes: a9beb86ae6e5 ("sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW") Signed-off-by: Arnd Bergmann Acked-by: Deepa Dinamani Signed-off-by: David S. Miller --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 782343bb925b..067878a1e4c5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -348,7 +348,7 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; } - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; *(struct old_timeval32 *)optval = tv32; return sizeof(tv32); @@ -372,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool { struct __kernel_sock_timeval tv; - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; if (optlen < sizeof(tv32)) -- cgit