From b03d519d3460f3aaf8b5afef582dd98466925352 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 16 Feb 2022 13:37:29 -0800 Subject: ice: store VF pointer instead of VF ID The VSI structure contains a vf_id field used to associate a VSI with a VF. This is used mainly for ICE_VSI_VF as well as partially for ICE_VSI_CTRL associated with the VFs. This API was designed with the idea that VFs are stored in a simple array that was expected to be static throughout most of the driver's life. We plan on refactoring VF storage in a few key ways: 1) converting from a simple static array to a hash table 2) using krefs to track VF references obtained from the hash table 3) use RCU to delay release of VF memory until after all references are dropped This is motivated by the goal to ensure that the lifetime of VF structures is accounted for, and prevent various use-after-free bugs. With the existing vsi->vf_id, the reference tracking for VFs would become somewhat convoluted, because each VSI maintains a vf_id field which will then require performing a look up. This means all these flows will require reference tracking and proper usage of rcu_read_lock, etc. We know that the VF VSI will always be backed by a valid VF structure, because the VSI is created during VF initialization and removed before the VF is destroyed. Rely on this and store a reference to the VF in the VSI structure instead of storing a VF ID. This will simplify the usage and avoid the need to perform lookups on the hash table in the future. For ICE_VSI_VF, it is expected that vsi->vf is always non-NULL after ice_vsi_alloc succeeds. Because of this, use WARN_ON when checking if a vsi->vf pointer is valid when dealing with VF VSIs. This will aid in debugging code which violates this assumption and avoid more disastrous panics. Signed-off-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 95dfd04e8c3a..b80e9be3167c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2439,7 +2439,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) + if (vsi->type == ICE_VSI_CTRL && vsi->vf) err = devm_request_irq(dev, irq_num, vsi->irq_handler, IRQF_SHARED, q_vector->name, q_vector); @@ -3386,14 +3386,14 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL); } static struct ice_vsi * ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_channel *ch) { - return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch); + return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch); } /** @@ -3407,7 +3407,7 @@ ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, static struct ice_vsi * ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL); } /** @@ -3421,7 +3421,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi * ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL); } /** -- cgit From c4c2c7db64e19f815956c750c461d67867f1cdaf Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 16 Feb 2022 13:37:35 -0800 Subject: ice: convert ice_for_each_vf to include VF entry iterator The ice_for_each_vf macro is intended to be used to loop over all VFs. The current implementation relies on an iterator that is the index into the VF array in the PF structure. This forces all users to perform a look up themselves. This abstraction forces a lot of duplicate work on callers and leaks the interface implementation to the caller. Replace this with an implementation that includes the VF pointer the primary iterator. This version simplifies callers which just want to iterate over every VF, as they no longer need to perform their own lookup. The "i" iterator value is replaced with a new unsigned int "bkt" parameter, as this will match the necessary interface for replacing the VF array with a hash table. For now, the bkt is the VF ID, but in the future it will simply be the hash bucket index. Document that it should not be treated as a VF ID. This change aims to simplify switching from the array to a hash table. I considered alternative implementations such as an xarray but decided that the hash table was the simplest and most suitable implementation. I also looked at methods to hide the bkt iterator entirely, but I couldn't come up with a feasible solution that worked for hash table iterators. Signed-off-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 44 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b80e9be3167c..bc657916ba50 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -505,7 +505,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_hw *hw = &pf->hw; struct ice_vsi *vsi; - unsigned int i; + struct ice_vf *vf; + unsigned int bkt; dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type); @@ -520,8 +521,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) ice_vc_notify_reset(pf); /* Disable VFs until reset is completed */ - ice_for_each_vf(pf, i) - ice_set_vf_state_qs_dis(&pf->vf[i]); + ice_for_each_vf(pf, bkt, vf) + ice_set_vf_state_qs_dis(vf); if (ice_is_eswitch_mode_switchdev(pf)) { if (reset_type != ICE_RESET_PFR) @@ -1666,7 +1667,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - unsigned int i; + struct ice_vf *vf; + unsigned int bkt; u32 reg; if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) { @@ -1754,47 +1756,45 @@ static void ice_handle_mdd_event(struct ice_pf *pf) /* Check to see if one of the VFs caused an MDD event, and then * increment counters and set print pending */ - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - reg = rd32(hw, VP_MDET_TX_PQM(i)); + ice_for_each_vf(pf, bkt, vf) { + reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id)); if (reg & VP_MDET_TX_PQM_VALID_M) { - wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); + wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_TX_TCLAN(i)); + reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id)); if (reg & VP_MDET_TX_TCLAN_VALID_M) { - wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); + wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_TX_TDPU(i)); + reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id)); if (reg & VP_MDET_TX_TDPU_VALID_M) { - wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); + wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_RX(i)); + reg = rd32(hw, VP_MDET_RX(vf->vf_id)); if (reg & VP_MDET_RX_VALID_M) { - wr32(hw, VP_MDET_RX(i), 0xFFFF); + wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF); vf->mdd_rx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", - i); + vf->vf_id); /* Since the queue is disabled on VF Rx MDD events, the * PF can be configured to reset the VF through ethtool @@ -1805,9 +1805,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf) * reset, so print the event prior to reset. */ ice_print_vf_rx_mdd_event(vf); - mutex_lock(&pf->vf[i].cfg_lock); - ice_reset_vf(&pf->vf[i], false); - mutex_unlock(&pf->vf[i].cfg_lock); + mutex_lock(&vf->cfg_lock); + ice_reset_vf(vf, false); + mutex_unlock(&vf->cfg_lock); } } } -- cgit From 000773c00f52f2a6084ec04c2efdc2a28ee29d9c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 16 Feb 2022 13:37:36 -0800 Subject: ice: factor VF variables to separate structure We maintain a number of values for VFs within the ice_pf structure. This includes the VF table, the number of allocated VFs, the maximum number of supported SR-IOV VFs, the number of queue pairs per VF, the number of MSI-X vectors per VF, and a bitmap of the VFs with detected MDD events. We're about to add a few more variables to this list. Clean this up first by extracting these members out into a new ice_vfs structure defined in ice_virtchnl_pf.h Signed-off-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index bc657916ba50..9028a925743a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3712,7 +3712,7 @@ static void ice_set_pf_caps(struct ice_pf *pf) clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); if (func_caps->common_cap.sr_iov_1_1) { set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); - pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs, + pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs, ICE_MAX_VF_COUNT); } clear_bit(ICE_FLAG_RSS_ENA, pf->flags); -- cgit From 3d5985a185e6abfc0b38ed187819016a79eca864 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 16 Feb 2022 13:37:38 -0800 Subject: ice: convert VF storage to hash table with krefs and RCU The ice driver stores VF structures in a simple array which is allocated once at the time of VF creation. The VF structures are then accessed from the array by their VF ID. The ID must be between 0 and the number of allocated VFs. Multiple threads can access this table: * .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust * interrupts, such as due to messages from the VF using the virtchnl communication * processing such as device reset * commands to add or remove VFs The current implementation does not keep track of when all threads are done operating on a VF and can potentially result in use-after-free issues caused by one thread accessing a VF structure after it has been released when removing VFs. Some of these are prevented with various state flags and checks. In addition, this structure is quite static and does not support a planned future where virtualization can be more dynamic. As we begin to look at supporting Scalable IOV with the ice driver (as opposed to just supporting Single Root IOV), this structure is not sufficient. In the future, VFs will be able to be added and removed individually and dynamically. To allow for this, and to better protect against a whole class of use-after-free bugs, replace the VF storage with a combination of a hash table and krefs to reference track all of the accesses to VFs through the hash table. A hash table still allows efficient look up of the VF given its ID, but also allows adding and removing VFs. It does not require contiguous VF IDs. The use of krefs allows the cleanup of the VF memory to be delayed until after all threads have released their reference (by calling ice_put_vf). To prevent corruption of the hash table, a combination of RCU and the mutex table_lock are used. Addition and removal from the hash table use the RCU-aware hash macros. This allows simple read-only look ups that iterate to locate a single VF can be fast using RCU. Accesses which modify the hash table, or which can't take RCU because they sleep, will hold the mutex lock. By using this design, we have a stronger guarantee that the VF structure can't be released until after all threads are finished operating on it. We also pave the way for the more dynamic Scalable IOV implementation in the future. Signed-off-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9028a925743a..289e5c99e313 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -521,8 +521,10 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) ice_vc_notify_reset(pf); /* Disable VFs until reset is completed */ + mutex_lock(&pf->vfs.table_lock); ice_for_each_vf(pf, bkt, vf) ice_set_vf_state_qs_dis(vf); + mutex_unlock(&pf->vfs.table_lock); if (ice_is_eswitch_mode_switchdev(pf)) { if (reset_type != ICE_RESET_PFR) @@ -1756,6 +1758,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) /* Check to see if one of the VFs caused an MDD event, and then * increment counters and set print pending */ + mutex_lock(&pf->vfs.table_lock); ice_for_each_vf(pf, bkt, vf) { reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id)); if (reg & VP_MDET_TX_PQM_VALID_M) { @@ -1811,6 +1814,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) } } } + mutex_unlock(&pf->vfs.table_lock); ice_print_vfs_mdd_events(pf); } @@ -3680,6 +3684,7 @@ static void ice_deinit_pf(struct ice_pf *pf) mutex_destroy(&pf->sw_mutex); mutex_destroy(&pf->tc_mutex); mutex_destroy(&pf->avail_q_mutex); + mutex_destroy(&pf->vfs.table_lock); if (pf->avail_txqs) { bitmap_free(pf->avail_txqs); @@ -3779,6 +3784,9 @@ static int ice_init_pf(struct ice_pf *pf) return -ENOMEM; } + mutex_init(&pf->vfs.table_lock); + hash_init(pf->vfs.table); + return 0; } -- cgit