diff options
Diffstat (limited to 'drivers/net/ethernet')
35 files changed, 721 insertions, 670 deletions
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index e104fb02817d..aa0d2f3aaeaa 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth) if (dma_mapping_error(greth->dev, dma_addr)) { if (netif_msg_ifup(greth)) dev_err(greth->dev, "Could not create initial DMA mapping\n"); + dev_kfree_skb(skb); goto cleanup; } greth->rx_skbuff[i] = skb; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 55dfdb34e37b..f4ca0c6c0f51 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -71,13 +71,14 @@ config BCM63XX_ENET config BCMGENET tristate "Broadcom GENET internal MAC support" depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835 select MII select PHYLIB select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) + select BROADCOM_PHY if ARCH_BCM2835 help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 98f3dc460ca7..f2f95493ec89 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2239,7 +2239,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_unregister_interrupts; + goto err_destroy_workqueue; } nic->msg_enable = debug; @@ -2248,6 +2248,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_destroy_workqueue: + destroy_workqueue(nic->nicvf_rx_mode_wq); err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index cacd454ac696..c39b866e2582 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); + kfree(cmd_buff); return err; } @@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); + kfree(cmd_buff); return err; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9471baa11d39..5528b0af82ae 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1216,7 +1216,8 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_set(ndev); + if (fep->quirks & FEC_QUIRK_HAS_COALESCE) + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 93846bace028..ce2571c16e43 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = (pos + 1) % rxq->num; if (rx_pkts_num >= limit) diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index ffcf797dfa90..f867e9531117 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = dma_ring_incr(pos, RX_DESC_NUM); } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 36bc4fd91ef4..04acd1a992fa 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5931,9 +5931,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, e1000_tx_queue(tx_ring, tx_flags, count); /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(tx_ring, - (MAX_SKB_FRAGS * + ((MAX_SKB_FRAGS + 1) * DIV_ROUND_UP(PAGE_SIZE, - adapter->tx_fifo_limit) + 2)); + adapter->tx_fifo_limit) + 4)); if (!netdev_xmit_more() || netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 616d27ec3226..887a735fe2a7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4466,11 +4466,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, return -EOPNOTSUPP; /* First 4 bytes of L4 header */ - if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip4_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip4_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Type of Service is not supported. */ @@ -4509,11 +4505,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, else return -EOPNOTSUPP; - if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip6_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip6_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Traffic class is not supported. */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6861b3e2ced3..95485b56d6c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10656,6 +10656,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) } /** + * i40e_clean_xps_state - clean xps state for every tx_ring + * @vsi: ptr to the VSI + **/ +static void i40e_clean_xps_state(struct i40e_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i]) + clear_bit(__I40E_TX_XPS_INIT_DONE, + vsi->tx_rings[i]->state); +} + +/** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure * @@ -10679,8 +10694,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf) i40e_pf_quiesce_all_vsi(pf); for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) + if (pf->vsi[v]) { + i40e_clean_xps_state(pf->vsi[v]); pf->vsi[v]->seid = 0; + } } i40e_shutdown_adminq(&pf->hw); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 72ddcefc45b1..635f93d60318 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1578,6 +1578,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_cleanup_reset_vf(vf); i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states); return true; @@ -1701,6 +1702,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) } i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(__I40E_VF_DISABLE, pf->state); return true; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2b23b4714a26..a9a7f8b52140 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1111,8 +1111,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + ice_ptp_link_change(pf, pf->hw.pf_id, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -6340,8 +6339,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, pf->hw.pf_id, true); } /* Perform an initial read of the statistics registers now to @@ -6773,8 +6771,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev && vsi->type == ICE_VSI_PF) { vlan_err = ice_vsi_del_vlan_zero(vsi); - if (!ice_is_e810(&vsi->back->hw)) - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 13e75279e71c..d63161d73eb1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -600,6 +600,23 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) } /** + * ice_ptp_is_tx_tracker_up - Check if Tx tracker is ready for new timestamps + * @tx: the PTP Tx timestamp tracker to check + * + * Check that a given PTP Tx timestamp tracker is up, i.e. that it is ready + * to accept new timestamp requests. + * + * Assumes the tx->lock spinlock is already held. + */ +static bool +ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) +{ + lockdep_assert_held(&tx->lock); + + return tx->init && !tx->calibrating; +} + +/** * ice_ptp_tx_tstamp - Process Tx timestamps for a port * @tx: the PTP Tx timestamp tracker * @@ -608,11 +625,13 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * * If a given index has a valid timestamp, perform the following steps: * - * 1) copy the timestamp out of the PHY register - * 4) clear the timestamp valid bit in the PHY register - * 5) unlock the index by clearing the associated in_use bit. - * 2) extend the 40b timestamp value to get a 64bit timestamp - * 3) send that timestamp to the stack + * 1) check that the timestamp request is not stale + * 2) check that a timestamp is ready and available in the PHY memory bank + * 3) read and copy the timestamp out of the PHY register + * 4) unlock the index by clearing the associated in_use bit + * 5) check if the timestamp is stale, and discard if so + * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value + * 7) send this 64 bit timestamp to the stack * * Returns true if all timestamps were handled, and false if any slots remain * without a timestamp. @@ -623,24 +642,45 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * interrupt. In some cases hardware might not interrupt us again when the * timestamp is captured. * - * Note that we only take the tracking lock when clearing the bit and when - * checking if we need to re-queue this task. The only place where bits can be - * set is the hard xmit routine where an SKB has a request flag set. The only - * places where we clear bits are this work function, or the periodic cleanup - * thread. If the cleanup thread clears a bit we're processing we catch it - * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread - * starts a new timestamp, we might not begin processing it right away but we - * will notice it at the end when we re-queue the task. If a Tx thread starts - * a new timestamp just after this function exits without re-queuing, - * the interrupt when the timestamp finishes should trigger. Avoiding holding - * the lock for the entire function is important in order to ensure that Tx - * threads do not get blocked while waiting for the lock. + * Note that we do not hold the tracking lock while reading the Tx timestamp. + * This is because reading the timestamp requires taking a mutex that might + * sleep. + * + * The only place where we set in_use is when a new timestamp is initiated + * with a slot index. This is only called in the hard xmit routine where an + * SKB has a request flag set. The only places where we clear this bit is this + * function, or during teardown when the Tx timestamp tracker is being + * removed. A timestamp index will never be re-used until the in_use bit for + * that index is cleared. + * + * If a Tx thread starts a new timestamp, we might not begin processing it + * right away but we will notice it at the end when we re-queue the task. + * + * If a Tx thread starts a new timestamp just after this function exits, the + * interrupt for that timestamp should re-trigger this function once + * a timestamp is ready. + * + * In cases where the PTP hardware clock was directly adjusted, some + * timestamps may not be able to safely use the timestamp extension math. In + * this case, software will set the stale bit for any outstanding Tx + * timestamps when the clock is adjusted. Then this function will discard + * those captured timestamps instead of sending them to the stack. + * + * If a Tx packet has been waiting for more than 2 seconds, it is not possible + * to correctly extend the timestamp using the cached PHC time. It is + * extremely unlikely that a packet will ever take this long to timestamp. If + * we detect a Tx timestamp request that has waited for this long we assume + * the packet will never be sent by hardware and discard it without reading + * the timestamp register. */ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; - bool ts_handled = true; + bool more_timestamps; struct ice_pf *pf; + struct ice_hw *hw; + u64 tstamp_ready; + int err; u8 idx; if (!tx->init) @@ -648,44 +688,86 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); + hw = &pf->hw; + + /* Read the Tx ready status first */ + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) + return false; for_each_set_bit(idx, tx->in_use, tx->len) { struct skb_shared_hwtstamps shhwtstamps = {}; - u8 phy_idx = idx + tx->quad_offset; - u64 raw_tstamp, tstamp; + u8 phy_idx = idx + tx->offset; + u64 raw_tstamp = 0, tstamp; + bool drop_ts = false; struct sk_buff *skb; - int err; + + /* Drop packets which have waited for more than 2 seconds */ + if (time_is_before_jiffies(tx->tstamps[idx].start + 2 * HZ)) { + drop_ts = true; + + /* Count the number of Tx timestamps that timed out */ + pf->ptp.tx_hwtstamp_timeouts++; + } + + /* Only read a timestamp from the PHY if its marked as ready + * by the tstamp_ready register. This avoids unnecessary + * reading of timestamps which are not yet valid. This is + * important as we must read all timestamps which are valid + * and only timestamps which are valid during each interrupt. + * If we do not, the hardware logic for generating a new + * interrupt can get stuck on some devices. + */ + if (!(tstamp_ready & BIT_ULL(phy_idx))) { + if (drop_ts) + goto skip_ts_read; + + continue; + } ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); - err = ice_read_phy_tstamp(&pf->hw, tx->quad, phy_idx, - &raw_tstamp); + err = ice_read_phy_tstamp(hw, tx->block, phy_idx, &raw_tstamp); if (err) continue; ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); - /* Check if the timestamp is invalid or stale */ - if (!(raw_tstamp & ICE_PTP_TS_VALID) || + /* For PHYs which don't implement a proper timestamp ready + * bitmap, verify that the timestamp value is different + * from the last cached timestamp. If it is not, skip this for + * now assuming it hasn't yet been captured by hardware. + */ + if (!drop_ts && tx->verify_cached && raw_tstamp == tx->tstamps[idx].cached_tstamp) continue; - /* The timestamp is valid, so we'll go ahead and clear this - * index and then send the timestamp up to the stack. - */ + /* Discard any timestamp value without the valid bit set */ + if (!(raw_tstamp & ICE_PTP_TS_VALID)) + drop_ts = true; + +skip_ts_read: spin_lock(&tx->lock); - tx->tstamps[idx].cached_tstamp = raw_tstamp; + if (tx->verify_cached && raw_tstamp) + tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; + if (test_and_clear_bit(idx, tx->stale)) + drop_ts = true; spin_unlock(&tx->lock); - /* it's (unlikely but) possible we raced with the cleanup - * thread for discarding old timestamp requests. + /* It is unlikely but possible that the SKB will have been + * flushed at this point due to link change or teardown. */ if (!skb) continue; + if (drop_ts) { + dev_kfree_skb_any(skb); + continue; + } + /* Extend the timestamp using cached PHC time */ tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); if (tstamp) { @@ -701,11 +783,10 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * poll for remaining timestamps. */ spin_lock(&tx->lock); - if (!bitmap_empty(tx->in_use, tx->len)) - ts_handled = false; + more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); spin_unlock(&tx->lock); - return ts_handled; + return !more_timestamps; } /** @@ -713,26 +794,33 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * @tx: Tx tracking structure to initialize * * Assumes that the length has already been initialized. Do not call directly, - * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead. + * use the ice_ptp_init_tx_* instead. */ static int ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) { - tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL); - if (!tx->tstamps) - return -ENOMEM; + unsigned long *in_use, *stale; + struct ice_tx_tstamp *tstamps; + + tstamps = kcalloc(tx->len, sizeof(*tstamps), GFP_KERNEL); + in_use = bitmap_zalloc(tx->len, GFP_KERNEL); + stale = bitmap_zalloc(tx->len, GFP_KERNEL); + + if (!tstamps || !in_use || !stale) { + kfree(tstamps); + bitmap_free(in_use); + bitmap_free(stale); - tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL); - if (!tx->in_use) { - kfree(tx->tstamps); - tx->tstamps = NULL; return -ENOMEM; } - spin_lock_init(&tx->lock); - + tx->tstamps = tstamps; + tx->in_use = in_use; + tx->stale = stale; tx->init = 1; + spin_lock_init(&tx->lock); + return 0; } @@ -740,31 +828,71 @@ ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker * @pf: Board private structure * @tx: the tracker to flush + * + * Called during teardown when a Tx tracker is being removed. */ static void ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + struct ice_hw *hw = &pf->hw; + u64 tstamp_ready; + int err; u8 idx; - for (idx = 0; idx < tx->len; idx++) { - u8 phy_idx = idx + tx->quad_offset; + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) { + dev_dbg(ice_pf_to_dev(pf), "Failed to get the Tx tstamp ready bitmap for block %u, err %d\n", + tx->block, err); + + /* If we fail to read the Tx timestamp ready bitmap just + * skip clearing the PHY timestamps. + */ + tstamp_ready = 0; + } + + for_each_set_bit(idx, tx->in_use, tx->len) { + u8 phy_idx = idx + tx->offset; + struct sk_buff *skb; + + /* In case this timestamp is ready, we need to clear it. */ + if (!hw->reset_ongoing && (tstamp_ready & BIT_ULL(phy_idx))) + ice_clear_phy_tstamp(hw, tx->block, phy_idx); spin_lock(&tx->lock); - if (tx->tstamps[idx].skb) { - dev_kfree_skb_any(tx->tstamps[idx].skb); - tx->tstamps[idx].skb = NULL; - pf->ptp.tx_hwtstamp_flushed++; - } + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; clear_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); spin_unlock(&tx->lock); - /* Clear any potential residual timestamp in the PHY block */ - if (!pf->hw.reset_ongoing) - ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); + /* Count the number of Tx timestamps flushed */ + pf->ptp.tx_hwtstamp_flushed++; + + /* Free the SKB after we've cleared the bit */ + dev_kfree_skb_any(skb); } } /** + * ice_ptp_mark_tx_tracker_stale - Mark unfinished timestamps as stale + * @tx: the tracker to mark + * + * Mark currently outstanding Tx timestamps as stale. This prevents sending + * their timestamp value to the stack. This is required to prevent extending + * the 40bit hardware timestamp incorrectly. + * + * This should be called when the PTP clock is modified such as after a set + * time request. + */ +static void +ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx) +{ + spin_lock(&tx->lock); + bitmap_or(tx->stale, tx->stale, tx->in_use, tx->len); + spin_unlock(&tx->lock); +} + +/** * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker * @pf: Board private structure * @tx: Tx tracking structure to release @@ -774,7 +902,12 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static void ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + spin_lock(&tx->lock); tx->init = 0; + spin_unlock(&tx->lock); + + /* wait for potentially outstanding interrupt to complete */ + synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); ice_ptp_flush_tx_tracker(pf, tx); @@ -784,6 +917,9 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) bitmap_free(tx->in_use); tx->in_use = NULL; + bitmap_free(tx->stale); + tx->stale = NULL; + tx->len = 0; } @@ -801,9 +937,10 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static int ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) { - tx->quad = port / ICE_PORTS_PER_QUAD; - tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT; - tx->len = INDEX_PER_PORT; + tx->block = port / ICE_PORTS_PER_QUAD; + tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E822; + tx->len = INDEX_PER_PORT_E822; + tx->verify_cached = 0; return ice_ptp_alloc_tx_tracker(tx); } @@ -819,59 +956,19 @@ ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) static int ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) { - tx->quad = pf->hw.port_info->lport; - tx->quad_offset = 0; - tx->len = INDEX_PER_QUAD; + tx->block = pf->hw.port_info->lport; + tx->offset = 0; + tx->len = INDEX_PER_PORT_E810; + /* The E810 PHY does not provide a timestamp ready bitmap. Instead, + * verify new timestamps against cached copy of the last read + * timestamp. + */ + tx->verify_cached = 1; return ice_ptp_alloc_tx_tracker(tx); } /** - * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped - * @pf: pointer to the PF struct - * @tx: PTP Tx tracker to clean up - * - * Loop through the Tx timestamp requests and see if any of them have been - * waiting for a long time. Discard any SKBs that have been waiting for more - * than 2 seconds. This is long enough to be reasonably sure that the - * timestamp will never be captured. This might happen if the packet gets - * discarded before it reaches the PHY timestamping block. - */ -static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx) -{ - struct ice_hw *hw = &pf->hw; - u8 idx; - - if (!tx->init) - return; - - for_each_set_bit(idx, tx->in_use, tx->len) { - struct sk_buff *skb; - u64 raw_tstamp; - - /* Check if this SKB has been waiting for too long */ - if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) - continue; - - /* Read tstamp to be able to use this register again */ - ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, - &raw_tstamp); - - spin_lock(&tx->lock); - skb = tx->tstamps[idx].skb; - tx->tstamps[idx].skb = NULL; - clear_bit(idx, tx->in_use); - spin_unlock(&tx->lock); - - /* Count the number of Tx timestamps which have timed out */ - pf->ptp.tx_hwtstamp_timeouts++; - - /* Free the SKB after we've cleared the bit */ - dev_kfree_skb_any(skb); - } -} - -/** * ice_ptp_update_cached_phctime - Update the cached PHC time values * @pf: Board specific private structure * @@ -941,20 +1038,13 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf) * @pf: Board specific private structure * * This function must be called when the cached PHC time is no longer valid, - * such as after a time adjustment. It discards any outstanding Tx timestamps, - * and updates the cached PHC time for both the PF and Rx rings. If updating - * the PHC time cannot be done immediately, a warning message is logged and - * the work item is scheduled. - * - * These steps are required in order to ensure that we do not accidentally - * report a timestamp extended by the wrong PHC cached copy. Note that we - * do not directly update the cached timestamp here because it is possible - * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we - * would have to try again. During that time window, timestamps might be - * requested and returned with an invalid extension. Thus, on failure to - * immediately update the cached PHC time we would need to zero the value - * anyways. For this reason, we just zero the value immediately and queue the - * update work item. + * such as after a time adjustment. It marks any currently outstanding Tx + * timestamps as stale and updates the cached PHC time for both the PF and Rx + * rings. + * + * If updating the PHC time cannot be done immediately, a warning message is + * logged and the work item is scheduled immediately to minimize the window + * with a wrong cached timestamp. */ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) { @@ -978,8 +1068,12 @@ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) msecs_to_jiffies(10)); } - /* Flush any outstanding Tx timestamps */ - ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx); + /* Mark any outstanding timestamps as stale, since they might have + * been captured in hardware before the time update. This could lead + * to us extending them with the wrong cached value resulting in + * incorrect timestamp values. + */ + ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx); } /** @@ -1060,19 +1154,6 @@ static u64 ice_base_incval(struct ice_pf *pf) } /** - * ice_ptp_reset_ts_memory_quad - Reset timestamp memory for one quad - * @pf: The PF private data structure - * @quad: The quad (0-4) - */ -static void ice_ptp_reset_ts_memory_quad(struct ice_pf *pf, int quad) -{ - struct ice_hw *hw = &pf->hw; - - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); -} - -/** * ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state * @port: PTP port for which Tx FIFO is checked */ @@ -1124,7 +1205,7 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) dev_dbg(ice_pf_to_dev(pf), "Port %d Tx FIFO still not empty; resetting quad %d\n", port->port_num, quad); - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_reset_ts_memory_quad_e822(hw, quad); port->tx_fifo_busy_cnt = FIFO_OK; return 0; } @@ -1133,130 +1214,49 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) } /** - * ice_ptp_check_tx_offset_valid - Check if the Tx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Tx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_tx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - u32 val; - int err; - - err = ice_ptp_check_tx_fifo(port); - if (err) - return err; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_TX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read TX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return -EAGAIN; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_rx_offset_valid - Check if the Rx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Rx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_rx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_RX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read RX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return err; - } - - if (!(val & P_REG_RX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_offset_valid - Check port offset valid bit - * @port: Port for which offset valid bit is checked - * - * Returns 0 if both Tx and Rx offset are valid, and -EAGAIN if one of the - * offset is not ready. - */ -static int ice_ptp_check_offset_valid(struct ice_ptp_port *port) -{ - int tx_err, rx_err; - - /* always check both Tx and Rx offset validity */ - tx_err = ice_ptp_check_tx_offset_valid(port); - rx_err = ice_ptp_check_rx_offset_valid(port); - - if (tx_err || rx_err) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_wait_for_offset_valid - Check for valid Tx and Rx offsets + * ice_ptp_wait_for_offsets - Check for valid Tx and Rx offsets * @work: Pointer to the kthread_work structure for this task * - * Check whether both the Tx and Rx offsets are valid for enabling the vernier - * calibration. + * Check whether hardware has completed measuring the Tx and Rx offset values + * used to configure and enable vernier timestamp calibration. + * + * Once the offset in either direction is measured, configure the associated + * registers with the calibrated offset values and enable timestamping. The Tx + * and Rx directions are configured independently as soon as their associated + * offsets are known. * - * Once we have valid offsets from hardware, update the total Tx and Rx - * offsets, and exit bypass mode. This enables more precise timestamps using - * the extra data measured during the vernier calibration process. + * This function reschedules itself until both Tx and Rx calibration have + * completed. */ -static void ice_ptp_wait_for_offset_valid(struct kthread_work *work) +static void ice_ptp_wait_for_offsets(struct kthread_work *work) { struct ice_ptp_port *port; - int err; - struct device *dev; struct ice_pf *pf; struct ice_hw *hw; + int tx_err; + int rx_err; port = container_of(work, struct ice_ptp_port, ov_work.work); pf = ptp_port_to_pf(port); hw = &pf->hw; - dev = ice_pf_to_dev(pf); - - if (ice_is_reset_in_progress(pf->state)) - return; - if (ice_ptp_check_offset_valid(port)) { - /* Offsets not ready yet, try again later */ + if (ice_is_reset_in_progress(pf->state)) { + /* wait for device driver to complete reset */ kthread_queue_delayed_work(pf->ptp.kworker, &port->ov_work, msecs_to_jiffies(100)); return; } - /* Offsets are valid, so it is safe to exit bypass mode */ - err = ice_phy_exit_bypass_e822(hw, port->port_num); - if (err) { - dev_warn(dev, "Failed to exit bypass mode for PHY port %u, err %d\n", - port->port_num, err); + tx_err = ice_ptp_check_tx_fifo(port); + if (!tx_err) + tx_err = ice_phy_cfg_tx_offset_e822(hw, port->port_num); + rx_err = ice_phy_cfg_rx_offset_e822(hw, port->port_num); + if (tx_err || rx_err) { + /* Tx and/or Rx offset not yet configured, try again later */ + kthread_queue_delayed_work(pf->ptp.kworker, + &port->ov_work, + msecs_to_jiffies(100)); return; } } @@ -1317,16 +1317,20 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_cancel_delayed_work_sync(&ptp_port->ov_work); /* temporarily disable Tx timestamps while calibrating PHY offset */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = true; + spin_unlock(&ptp_port->tx.lock); ptp_port->tx_fifo_busy_cnt = 0; - /* Start the PHY timer in bypass mode */ - err = ice_start_phy_timer_e822(hw, port, true); + /* Start the PHY timer in Vernier mode */ + err = ice_start_phy_timer_e822(hw, port); if (err) goto out_unlock; /* Enable Tx timestamps right away */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = false; + spin_unlock(&ptp_port->tx.lock); kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); @@ -1341,45 +1345,33 @@ out_unlock: } /** - * ice_ptp_link_change - Set or clear port registers for timestamping + * ice_ptp_link_change - Reconfigure PTP after link status change * @pf: Board private structure * @port: Port for which the PHY start is set * @linkup: Link is up or down */ -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) { struct ice_ptp_port *ptp_port; - if (!test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - return 0; + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; - if (port >= ICE_NUM_EXTERNAL_PORTS) - return -EINVAL; + if (WARN_ON_ONCE(port >= ICE_NUM_EXTERNAL_PORTS)) + return; ptp_port = &pf->ptp.port; - if (ptp_port->port_num != port) - return -EINVAL; + if (WARN_ON_ONCE(ptp_port->port_num != port)) + return; - /* Update cached link err for this port immediately */ + /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; - if (!test_bit(ICE_FLAG_PTP, pf->flags)) - /* PTP is not setup */ - return -EAGAIN; - - return ice_ptp_port_phy_restart(ptp_port); -} - -/** - * ice_ptp_reset_ts_memory - Reset timestamp memory for all quads - * @pf: The PF private data structure - */ -static void ice_ptp_reset_ts_memory(struct ice_pf *pf) -{ - int quad; + /* E810 devices do not need to reconfigure the PHY */ + if (ice_is_e810(&pf->hw)) + return; - quad = pf->hw.port_info->lport / ICE_PORTS_PER_QUAD; - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_port_phy_restart(ptp_port); } /** @@ -1397,7 +1389,7 @@ static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold) int quad; u32 val; - ice_ptp_reset_ts_memory(pf); + ice_ptp_reset_ts_memory(hw); for (quad = 0; quad < ICE_MAX_QUAD; quad++) { err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, @@ -2332,11 +2324,14 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) { u8 idx; - /* Check if this tracker is initialized */ - if (!tx->init || tx->calibrating) + spin_lock(&tx->lock); + + /* Check that this tracker is accepting new timestamp requests */ + if (!ice_ptp_is_tx_tracker_up(tx)) { + spin_unlock(&tx->lock); return -1; + } - spin_lock(&tx->lock); /* Find and set the first available index */ idx = find_first_zero_bit(tx->in_use, tx->len); if (idx < tx->len) { @@ -2345,6 +2340,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) * requests. */ set_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); tx->tstamps[idx].start = jiffies; tx->tstamps[idx].skb = skb_get(skb); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; @@ -2359,7 +2355,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) if (idx >= tx->len) return -1; else - return idx + tx->quad_offset; + return idx + tx->offset; } /** @@ -2384,8 +2380,6 @@ static void ice_ptp_periodic_work(struct kthread_work *work) err = ice_ptp_update_cached_phctime(pf); - ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx); - /* Run twice a second or reschedule if phc update failed */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, msecs_to_jiffies(err ? 10 : 500)); @@ -2462,7 +2456,7 @@ pfr: err = ice_ptp_init_tx_e810(pf, &ptp->port.tx); } else { kthread_init_delayed_work(&ptp->port.ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); err = ice_ptp_init_tx_e822(pf, &ptp->port.tx, ptp->port.port_num); } @@ -2625,7 +2619,7 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) return ice_ptp_init_tx_e810(pf, &ptp_port->tx); kthread_init_delayed_work(&ptp_port->ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num); } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 028349295b71..9cda2f43e0e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -93,9 +93,14 @@ struct ice_perout_channel { * we discard old requests that were not fulfilled within a 2 second time * window. * Timestamp values in the PHY are read only and do not get cleared except at - * hardware reset or when a new timestamp value is captured. The cached_tstamp - * field is used to detect the case where a new timestamp has not yet been - * captured, ensuring that we avoid sending stale timestamp data to the stack. + * hardware reset or when a new timestamp value is captured. + * + * Some PHY types do not provide a "ready" bitmap indicating which timestamp + * indexes are valid. In these cases, we use a cached_tstamp to keep track of + * the last timestamp we read for a given index. If the current timestamp + * value is the same as the cached value, we assume a new timestamp hasn't + * been captured. This avoids reporting stale timestamps to the stack. This is + * only done if the verify_cached flag is set in ice_ptp_tx structure. */ struct ice_tx_tstamp { struct sk_buff *skb; @@ -105,30 +110,35 @@ struct ice_tx_tstamp { /** * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port - * @lock: lock to prevent concurrent write to in_use bitmap + * @lock: lock to prevent concurrent access to fields of this struct * @tstamps: array of len to store outstanding requests * @in_use: bitmap of len to indicate which slots are in use - * @quad: which quad the timestamps are captured in - * @quad_offset: offset into timestamp block of the quad to get the real index + * @stale: bitmap of len to indicate slots which have stale timestamps + * @block: which memory block (quad or port) the timestamps are captured in + * @offset: offset into timestamp block to get the real index * @len: length of the tstamps and in_use fields. * @init: if true, the tracker is initialized; * @calibrating: if true, the PHY is calibrating the Tx offset. During this * window, timestamps are temporarily disabled. + * @verify_cached: if true, verify new timestamp differs from last read value */ struct ice_ptp_tx { spinlock_t lock; /* lock protecting in_use bitmap */ struct ice_tx_tstamp *tstamps; unsigned long *in_use; - u8 quad; - u8 quad_offset; + unsigned long *stale; + u8 block; + u8 offset; u8 len; - u8 init; - u8 calibrating; + u8 init : 1; + u8 calibrating : 1; + u8 verify_cached : 1; }; /* Quad and port information for initializing timestamp blocks */ #define INDEX_PER_QUAD 64 -#define INDEX_PER_PORT (INDEX_PER_QUAD / ICE_PORTS_PER_QUAD) +#define INDEX_PER_PORT_E822 16 +#define INDEX_PER_PORT_E810 64 /** * struct ice_ptp_port - data used to initialize an external port for PTP @@ -256,7 +266,7 @@ void ice_ptp_reset(struct ice_pf *pf); void ice_ptp_prepare_for_reset(struct ice_pf *pf); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) { @@ -291,7 +301,8 @@ static inline void ice_ptp_reset(struct ice_pf *pf) { } static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } -static inline int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) -{ return 0; } +static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +{ +} #endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ #endif /* _ICE_PTP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 1f8dd50db524..a38614d21ea8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -656,6 +656,32 @@ ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx) } /** + * ice_ptp_reset_ts_memory_quad_e822 - Clear all timestamps from the quad block + * @hw: pointer to the HW struct + * @quad: the quad to read from + * + * Clear all timestamps from the PHY quad block that is shared between the + * internal PHYs on the E822 devices. + */ +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad) +{ + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); +} + +/** + * ice_ptp_reset_ts_memory_e822 - Clear all timestamps from all quad blocks + * @hw: pointer to the HW struct + */ +static void ice_ptp_reset_ts_memory_e822(struct ice_hw *hw) +{ + unsigned int quad; + + for (quad = 0; quad < ICE_MAX_QUAD; quad++) + ice_ptp_reset_ts_memory_quad_e822(hw, quad); +} + +/** * ice_read_cgu_reg_e822 - Read a CGU register * @hw: pointer to the HW struct * @addr: Register address to read @@ -1715,21 +1741,48 @@ ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * adjust Tx timestamps by. This is calculated by combining some known static * latency along with the Vernier offset computations done by hardware. * - * This function must be called only after the offset registers are valid, - * i.e. after the Vernier calibration wait has passed, to ensure that the PHY - * has measured the offset. + * This function will not return successfully until the Tx offset calculations + * have been completed, which requires waiting until at least one packet has + * been transmitted by the device. It is safe to call this function + * periodically until calibration succeeds, as it will only program the offset + * once. * * To avoid overflow, when calculating the offset based on the known static * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_TX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -1783,46 +1836,8 @@ static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_tx_offset_e822 - Configure Tx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Tx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_tx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd); - - /* Program the fixed Tx offset into the P_REG_TOTAL_TX_OFFSET_L - * register, then indicate that the Tx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by the Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Tx vernier offset calibration complete\n", + port); return 0; } @@ -2026,6 +2041,11 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * measurements taken in hardware with some data about known fixed delay as * well as adjusting for multi-lane alignment delay. * + * This function will not return successfully until the Rx offset calculations + * have been completed, which requires waiting until at least one packet has + * been received by the device. It is safe to call this function periodically + * until calibration succeeds, as it will only program the offset once. + * * This function must be called only after the offset registers are valid, * i.e. after the Vernier calibration wait has passed, to ensure that the PHY * has measured the offset. @@ -2034,13 +2054,38 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, pmd, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_RX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -2101,46 +2146,8 @@ static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_rx_offset_e822 - Configure fixed Rx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Rx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_rx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd); - - /* Program the fixed Rx offset into the P_REG_TOTAL_RX_OFFSET_L - * register, then indicate that the Rx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Rx vernier offset calibration complete\n", + port); return 0; } @@ -2323,20 +2330,14 @@ ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset) * ice_start_phy_timer_e822 - Start the PHY clock timer * @hw: pointer to the HW struct * @port: the PHY port to start - * @bypass: if true, start the PHY in bypass mode * * Start the clock of a PHY port. This must be done as part of the flow to * re-calibrate Tx and Rx timestamping offsets whenever the clock time is * initialized or when link speed changes. * - * Bypass mode enables timestamps immediately without waiting for Vernier - * calibration to complete. Hardware will still continue taking Vernier - * measurements on Tx or Rx of packets, but they will not be applied to - * timestamps. Use ice_phy_exit_bypass_e822 to exit bypass mode once hardware - * has completed offset calculation. + * Hardware will take Vernier measurements on Tx or Rx of packets. */ -int -ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) { u32 lo, hi, val; u64 incval; @@ -2414,110 +2415,42 @@ ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) if (err) return err; - if (bypass) { - val |= P_REG_PS_BYPASS_MODE_M; - /* Enter BYPASS mode, enabling timestamps immediately. */ - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); - if (err) - return err; - - /* Program the fixed Tx offset */ - err = ice_phy_cfg_fixed_tx_offset_e822(hw, port); - if (err) - return err; - - /* Program the fixed Rx offset */ - err = ice_phy_cfg_fixed_rx_offset_e822(hw, port); - if (err) - return err; - } - ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port); return 0; } /** - * ice_phy_exit_bypass_e822 - Exit bypass mode, after vernier calculations + * ice_get_phy_tx_tstamp_ready_e822 - Read Tx memory status register * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * After hardware finishes vernier calculations for the Tx and Rx offset, this - * function can be used to exit bypass mode by updating the total Tx and Rx - * offsets, and then disabling bypass. This will enable hardware to include - * the more precise offset calibrations, increasing precision of the generated - * timestamps. + * @quad: the timestamp quad to read from + * @tstamp_ready: contents of the Tx memory status register * - * This cannot be done until hardware has measured the offsets, which requires - * waiting until at least one packet has been sent and received by the device. + * Read the Q_REG_TX_MEMORY_STATUS register indicating which timestamps in + * the PHY are ready. A set bit means the corresponding timestamp is valid and + * ready to be captured from the PHY timestamp block. */ -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port) +static int +ice_get_phy_tx_tstamp_ready_e822(struct ice_hw *hw, u8 quad, u64 *tstamp_ready) { + u32 hi, lo; int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Tx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - - err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Rx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - - err = ice_phy_cfg_tx_offset_e822(hw, port); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Tx offset for port %u, err %d\n", - port, err); - return err; - } - - err = ice_phy_cfg_rx_offset_e822(hw, port); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Rx offset for port %u, err %d\n", - port, err); - return err; - } - /* Exit bypass mode now that the offset has been updated */ - err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_U, &hi); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read P_REG_PS for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_U for quad %u, err %d\n", + quad, err); return err; } - if (!(val & P_REG_PS_BYPASS_MODE_M)) - ice_debug(hw, ICE_DBG_PTP, "Port %u not in bypass mode\n", - port); - - val &= ~P_REG_PS_BYPASS_MODE_M; - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_L, &lo); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to disable bypass for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_L for quad %u, err %d\n", + quad, err); return err; } - dev_info(ice_hw_to_dev(hw), "Exiting bypass mode on PHY port %u\n", - port); + *tstamp_ready = (u64)hi << 32 | (u64)lo; return 0; } @@ -3196,6 +3129,22 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) return ice_clear_phy_tstamp_e822(hw, block, idx); } +/** + * ice_get_phy_tx_tstamp_ready_e810 - Read Tx memory status register + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @tstamp_ready: contents of the Tx memory status register + * + * E810 devices do not use a Tx memory status register. Instead simply + * indicate that all timestamps are currently ready. + */ +static int +ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready) +{ + *tstamp_ready = 0xFFFFFFFFFFFFFFFF; + return 0; +} + /* E810T SMA functions * * The following functions operate specifically on E810T hardware and are used @@ -3379,6 +3328,18 @@ bool ice_is_pca9575_present(struct ice_hw *hw) } /** + * ice_ptp_reset_ts_memory - Reset timestamp memory for all blocks + * @hw: pointer to the HW struct + */ +void ice_ptp_reset_ts_memory(struct ice_hw *hw) +{ + if (ice_is_e810(hw)) + return; + + ice_ptp_reset_ts_memory_e822(hw); +} + +/** * ice_ptp_init_phc - Initialize PTP hardware clock * @hw: pointer to the HW struct * @@ -3399,3 +3360,24 @@ int ice_ptp_init_phc(struct ice_hw *hw) else return ice_ptp_init_phc_e822(hw); } + +/** + * ice_get_phy_tx_tstamp_ready - Read PHY Tx memory status indication + * @hw: pointer to the HW struct + * @block: the timestamp block to check + * @tstamp_ready: storage for the PHY Tx memory status information + * + * Check the PHY for Tx timestamp memory status. This reports a 64 bit value + * which indicates which timestamps in the block may be captured. A set bit + * means the timestamp can be read. An unset bit means the timestamp is not + * ready and software should avoid reading the register. + */ +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) +{ + if (ice_is_e810(hw)) + return ice_get_phy_tx_tstamp_ready_e810(hw, block, + tstamp_ready); + else + return ice_get_phy_tx_tstamp_ready_e822(hw, block, + tstamp_ready); +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 2bda64c76abc..3b68cb91bd81 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -133,7 +133,9 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval); int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj); int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp); int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); +void ice_ptp_reset_ts_memory(struct ice_hw *hw); int ice_ptp_init_phc(struct ice_hw *hw); +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready); /* E822 family functions */ int ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val); @@ -141,6 +143,7 @@ int ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val); int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val); int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val); int ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time); +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad); /** * ice_e822_time_ref - Get the current TIME_REF from capabilities @@ -184,8 +187,9 @@ static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref) /* E822 Vernier calibration functions */ int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset); -int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass); -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port); +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port); /* E810 family functions */ int ice_ptp_init_phy_e810(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 36acec89d3d4..7d60da1b7bf4 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1413,6 +1413,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) *data = 1; return -1; } + wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8); + wr32(E1000_EIMS, BIT(0)); } else if (adapter->flags & IGB_FLAG_HAS_MSI) { shared_int = false; if (request_irq(irq, diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c2cb98d24f5c..f8925cac61e4 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4270,7 +4270,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) /* Use the cpu associated to the rxq when it is online, in all * the other cases, use the cpu 0 which can't be offline. */ - if (cpu_online(pp->rxq_def)) + if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def)) elected_cpu = pp->rxq_def; max_cpu = num_present_cpus(); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index e421714524c2..044cc211424e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -1159,7 +1159,12 @@ int otx2_init_tc(struct otx2_nic *nic) return err; tc->flow_ht_params = tc_flow_ht_params; - return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + if (err) { + kfree(tc->tc_entries_bitmap); + tc->tc_entries_bitmap = NULL; + } + return err; } EXPORT_SYMBOL(otx2_init_tc); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 06b6cc53fa02..a6271449617f 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -174,9 +174,10 @@ mtk_wed_wo_reset(struct mtk_wed_device *dev) mtk_wdma_tx_reset(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); - mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, - MTK_WED_WO_CMD_CHANGE_STATE, &state, - sizeof(state), false); + if (mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, + MTK_WED_WO_CMD_CHANGE_STATE, &state, + sizeof(state), false)) + return; if (readx_poll_timeout(mtk_wed_wo_read_status, dev, val, val == MTK_WED_WOIF_DISABLE_DONE, @@ -576,12 +577,10 @@ mtk_wed_deinit(struct mtk_wed_device *dev) } static void -mtk_wed_detach(struct mtk_wed_device *dev) +__mtk_wed_detach(struct mtk_wed_device *dev) { struct mtk_wed_hw *hw = dev->hw; - mutex_lock(&hw_lock); - mtk_wed_deinit(dev); mtk_wdma_rx_reset(dev); @@ -590,9 +589,11 @@ mtk_wed_detach(struct mtk_wed_device *dev) mtk_wed_free_tx_rings(dev); if (mtk_wed_get_rx_capa(dev)) { - mtk_wed_wo_reset(dev); + if (hw->wed_wo) + mtk_wed_wo_reset(dev); mtk_wed_free_rx_rings(dev); - mtk_wed_wo_deinit(hw); + if (hw->wed_wo) + mtk_wed_wo_deinit(hw); } if (dev->wlan.bus_type == MTK_WED_BUS_PCIE) { @@ -612,6 +613,13 @@ mtk_wed_detach(struct mtk_wed_device *dev) module_put(THIS_MODULE); hw->wed_dev = NULL; +} + +static void +mtk_wed_detach(struct mtk_wed_device *dev) +{ + mutex_lock(&hw_lock); + __mtk_wed_detach(dev); mutex_unlock(&hw_lock); } @@ -1494,8 +1502,10 @@ mtk_wed_attach(struct mtk_wed_device *dev) ret = mtk_wed_wo_init(hw); } out: - if (ret) - mtk_wed_detach(dev); + if (ret) { + dev_err(dev->hw->dev, "failed to attach wed device\n"); + __mtk_wed_detach(dev); + } unlock: mutex_unlock(&hw_lock); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c index f9539e6233c9..6bad0d262f28 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c @@ -207,6 +207,9 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data, if (dev->hw->version == 1) return 0; + if (WARN_ON(!wo)) + return -ENODEV; + return mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, id, data, len, true); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index a2ee695a3f17..3340b4a694c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -363,93 +363,7 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { }; static struct mlxsw_sp_ipip_parms -mlxsw_sp1_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) -{ - struct mlxsw_sp_ipip_parms parms = {0}; - - WARN_ON_ONCE(1); - return parms; -} - -static int -mlxsw_sp1_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry, - bool force, char *ratr_pl) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static int -mlxsw_sp1_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - u32 tunnel_index) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static bool mlxsw_sp1_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) -{ - return false; -} - -static struct mlxsw_sp_rif_ipip_lb_config -mlxsw_sp1_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) -{ - struct mlxsw_sp_rif_ipip_lb_config config = {0}; - - WARN_ON_ONCE(1); - return config; -} - -static int -mlxsw_sp1_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - struct netlink_ext_ack *extack) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static int -mlxsw_sp1_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static void -mlxsw_sp1_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_ipip_entry *ipip_entry) -{ - WARN_ON_ONCE(1); -} - -static const struct mlxsw_sp_ipip_ops mlxsw_sp1_ipip_gre6_ops = { - .dev_type = ARPHRD_IP6GRE, - .ul_proto = MLXSW_SP_L3_PROTO_IPV6, - .inc_parsing_depth = true, - .parms_init = mlxsw_sp1_ipip_netdev_parms_init_gre6, - .nexthop_update = mlxsw_sp1_ipip_nexthop_update_gre6, - .decap_config = mlxsw_sp1_ipip_decap_config_gre6, - .can_offload = mlxsw_sp1_ipip_can_offload_gre6, - .ol_loopback_config = mlxsw_sp1_ipip_ol_loopback_config_gre6, - .ol_netdev_change = mlxsw_sp1_ipip_ol_netdev_change_gre6, - .rem_ip_addr_set = mlxsw_sp1_ipip_rem_addr_set_gre6, - .rem_ip_addr_unset = mlxsw_sp1_ipip_rem_addr_unset_gre6, -}; - -const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[] = { - [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, - [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp1_ipip_gre6_ops, -}; - -static struct mlxsw_sp_ipip_parms -mlxsw_sp2_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) +mlxsw_sp_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) { struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev); @@ -464,9 +378,9 @@ mlxsw_sp2_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) } static int -mlxsw_sp2_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry, - bool force, char *ratr_pl) +mlxsw_sp_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); enum mlxsw_reg_ratr_op op; @@ -482,9 +396,9 @@ mlxsw_sp2_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, } static int -mlxsw_sp2_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - u32 tunnel_index) +mlxsw_sp_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + u32 tunnel_index) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); @@ -519,8 +433,8 @@ mlxsw_sp2_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); } -static bool mlxsw_sp2_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) +static bool mlxsw_sp_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) { struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(ol_dev); bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS; @@ -534,8 +448,8 @@ static bool mlxsw_sp2_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_rif_ipip_lb_config -mlxsw_sp2_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) +mlxsw_sp_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) { struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev); enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; @@ -553,20 +467,20 @@ mlxsw_sp2_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp2_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - struct netlink_ext_ack *extack) +mlxsw_sp_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) { struct mlxsw_sp_ipip_parms new_parms; - new_parms = mlxsw_sp2_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev); + new_parms = mlxsw_sp_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev); return mlxsw_sp_ipip_ol_netdev_change_gre(mlxsw_sp, ipip_entry, &new_parms, extack); } static int -mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) { return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &ipip_entry->parms.daddr.addr6, @@ -574,24 +488,44 @@ mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, } static void -mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_ipip_entry *ipip_entry) { mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6); } +static const struct mlxsw_sp_ipip_ops mlxsw_sp1_ipip_gre6_ops = { + .dev_type = ARPHRD_IP6GRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV6, + .inc_parsing_depth = true, + .double_rif_entry = true, + .parms_init = mlxsw_sp_ipip_netdev_parms_init_gre6, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre6, + .decap_config = mlxsw_sp_ipip_decap_config_gre6, + .can_offload = mlxsw_sp_ipip_can_offload_gre6, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre6, + .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre6, + .rem_ip_addr_set = mlxsw_sp_ipip_rem_addr_set_gre6, + .rem_ip_addr_unset = mlxsw_sp_ipip_rem_addr_unset_gre6, +}; + +const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[] = { + [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, + [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp1_ipip_gre6_ops, +}; + static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = { .dev_type = ARPHRD_IP6GRE, .ul_proto = MLXSW_SP_L3_PROTO_IPV6, .inc_parsing_depth = true, - .parms_init = mlxsw_sp2_ipip_netdev_parms_init_gre6, - .nexthop_update = mlxsw_sp2_ipip_nexthop_update_gre6, - .decap_config = mlxsw_sp2_ipip_decap_config_gre6, - .can_offload = mlxsw_sp2_ipip_can_offload_gre6, - .ol_loopback_config = mlxsw_sp2_ipip_ol_loopback_config_gre6, - .ol_netdev_change = mlxsw_sp2_ipip_ol_netdev_change_gre6, - .rem_ip_addr_set = mlxsw_sp2_ipip_rem_addr_set_gre6, - .rem_ip_addr_unset = mlxsw_sp2_ipip_rem_addr_unset_gre6, + .parms_init = mlxsw_sp_ipip_netdev_parms_init_gre6, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre6, + .decap_config = mlxsw_sp_ipip_decap_config_gre6, + .can_offload = mlxsw_sp_ipip_can_offload_gre6, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre6, + .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre6, + .rem_ip_addr_set = mlxsw_sp_ipip_rem_addr_set_gre6, + .rem_ip_addr_unset = mlxsw_sp_ipip_rem_addr_unset_gre6, }; const struct mlxsw_sp_ipip_ops *mlxsw_sp2_ipip_ops_arr[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 8cc259dcc8d0..a35f009da561 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -49,6 +49,7 @@ struct mlxsw_sp_ipip_ops { int dev_type; enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ bool inc_parsing_depth; + bool double_rif_entry; struct mlxsw_sp_ipip_parms (*parms_init)(const struct net_device *ol_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 48f1fa62a4fd..c22c3ac4e2a1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -18,6 +18,7 @@ #include <linux/jhash.h> #include <linux/net_namespace.h> #include <linux/mutex.h> +#include <linux/genalloc.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -59,6 +60,7 @@ struct mlxsw_sp_rif { int mtu; u16 rif_index; u8 mac_profile_id; + u8 rif_entries; u16 vr_id; const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp *mlxsw_sp; @@ -77,6 +79,7 @@ struct mlxsw_sp_rif_params { }; u16 vid; bool lag; + bool double_entry; }; struct mlxsw_sp_rif_subport { @@ -1068,6 +1071,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, lb_params = (struct mlxsw_sp_rif_params_ipip_lb) { .common.dev = ol_dev, .common.lag = false, + .common.double_entry = ipip_ops->double_rif_entry, .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), }; @@ -7826,18 +7830,26 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type); } -static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index) +static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index, + u8 rif_entries) { - int i; + *p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table, + rif_entries); + if (*p_rif_index == 0) + return -ENOBUFS; + *p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { - if (!mlxsw_sp->router->rifs[i]) { - *p_rif_index = i; - return 0; - } - } + /* RIF indexes must be aligned to the allocation size. */ + WARN_ON_ONCE(*p_rif_index % rif_entries); - return -ENOBUFS; + return 0; +} + +static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + u8 rif_entries) +{ + gen_pool_free(mlxsw_sp->router->rifs_table, + MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries); } static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, @@ -8081,6 +8093,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { + u8 rif_entries = params->double_entry ? 2 : 1; u32 tb_id = l3mdev_fib_table(params->dev); const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp_fid *fid = NULL; @@ -8098,7 +8111,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, return ERR_CAST(vr); vr->rif_count++; - err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); goto err_rif_index_alloc; @@ -8113,6 +8126,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; + rif->rif_entries = rif_entries; if (ops->fid_get) { fid = ops->fid_get(rif, extack); @@ -8146,7 +8160,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rif_counters_alloc(rif); } - atomic_inc(&mlxsw_sp->router->rifs_count); + atomic_add(rif_entries, &mlxsw_sp->router->rifs_count); return rif; err_stats_enable: @@ -8162,6 +8176,7 @@ err_fid_get: dev_put(rif->dev); kfree(rif); err_rif_alloc: + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); err_rif_index_alloc: vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); @@ -8173,10 +8188,12 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; + u8 rif_entries = rif->rif_entries; + u16 rif_index = rif->rif_index; struct mlxsw_sp_vr *vr; int i; - atomic_dec(&mlxsw_sp->router->rifs_count); + atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; @@ -8198,6 +8215,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp->router->rifs[rif->rif_index] = NULL; dev_put(rif->dev); kfree(rif); + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); } @@ -9771,42 +9789,51 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *ul_rif; + u8 rif_entries = 1; u16 rif_index; int err; - err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); return ERR_PTR(err); } ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); - if (!ul_rif) - return ERR_PTR(-ENOMEM); + if (!ul_rif) { + err = -ENOMEM; + goto err_rif_alloc; + } mlxsw_sp->router->rifs[rif_index] = ul_rif; ul_rif->mlxsw_sp = mlxsw_sp; + ul_rif->rif_entries = rif_entries; err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true); if (err) goto ul_rif_op_err; - atomic_inc(&mlxsw_sp->router->rifs_count); + atomic_add(rif_entries, &mlxsw_sp->router->rifs_count); return ul_rif; ul_rif_op_err: mlxsw_sp->router->rifs[rif_index] = NULL; kfree(ul_rif); +err_rif_alloc: + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); return ERR_PTR(err); } static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) { struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + u8 rif_entries = ul_rif->rif_entries; + u16 rif_index = ul_rif->rif_index; - atomic_dec(&mlxsw_sp->router->rifs_count); + atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; kfree(ul_rif); + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); } static struct mlxsw_sp_rif * @@ -9940,11 +9967,43 @@ static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, }; +static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct gen_pool *rifs_table; + int err; + + rifs_table = gen_pool_create(0, -1); + if (!rifs_table) + return -ENOMEM; + + gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align, + NULL); + + err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET, + MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1); + if (err) + goto err_gen_pool_add; + + mlxsw_sp->router->rifs_table = rifs_table; + + return 0; + +err_gen_pool_add: + gen_pool_destroy(rifs_table); + return err; +} + +static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + gen_pool_destroy(mlxsw_sp->router->rifs_table); +} + static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) { u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_core *core = mlxsw_sp->core; + int err; if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES)) return -EIO; @@ -9957,6 +10016,10 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->router->rifs) return -ENOMEM; + err = mlxsw_sp_rifs_table_init(mlxsw_sp); + if (err) + goto err_rifs_table_init; + idr_init(&mlxsw_sp->router->rif_mac_profiles_idr); atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0); atomic_set(&mlxsw_sp->router->rifs_count, 0); @@ -9970,6 +10033,10 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) mlxsw_sp); return 0; + +err_rifs_table_init: + kfree(mlxsw_sp->router->rifs); + return err; } static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) @@ -9986,6 +10053,7 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_RESOURCE_RIF_MAC_PROFILES); WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr)); idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr); + mlxsw_sp_rifs_table_fini(mlxsw_sp); kfree(mlxsw_sp->router->rifs); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index c5dfb972b433..37d6e4c80e6a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -15,8 +15,12 @@ struct mlxsw_sp_router_nve_decap { u8 valid:1; }; +/* gen_pool_alloc() returns 0 when allocation fails, so use an offset */ +#define MLXSW_SP_ROUTER_GENALLOC_OFFSET 0x100 + struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; + struct gen_pool *rifs_table; struct mlxsw_sp_rif **rifs; struct idr rif_mac_profiles_idr; atomic_t rif_mac_profiles_count; diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 81a8ccca7e5e..5693784eec5b 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); @@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 66360c8c5a38..141897dfe388 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -317,7 +317,7 @@ int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb) next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry); db_hw = &next_dcb_hw->db[0]; if (!(db_hw->status & FDMA_DCB_STATUS_DONE)) - tx->dropped++; + return -EINVAL; db = list_first_entry(&tx->db_list, struct sparx5_db, list); list_move_tail(&db->list, &tx->db_list); next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index f8382d3124e0..d25f4f09faa0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -897,6 +897,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_ports: sparx5_cleanup_ports(sparx5); + if (sparx5->mact_queue) + destroy_workqueue(sparx5->mact_queue); cleanup_config: kfree(configs); cleanup_pnode: @@ -923,6 +925,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) sparx5_vcap_destroy(sparx5); /* Unregister netdevs */ sparx5_unregister_notifier_blocks(sparx5); + destroy_workqueue(sparx5->mact_queue); return 0; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 83c16ca5b30f..6db6ac6a3bbc 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -234,9 +234,8 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) sparx5_set_port_ifh(ifh, port->portno); if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { - ret = sparx5_ptp_txtstamp_request(port, skb); - if (ret) - return ret; + if (sparx5_ptp_txtstamp_request(port, skb) < 0) + return NETDEV_TX_BUSY; sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op); sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type); @@ -250,23 +249,31 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) else ret = sparx5_inject(sparx5, ifh, skb, dev); - if (ret == NETDEV_TX_OK) { - stats->tx_bytes += skb->len; - stats->tx_packets++; + if (ret == -EBUSY) + goto busy; + if (ret < 0) + goto drop; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - return ret; + stats->tx_bytes += skb->len; + stats->tx_packets++; + sparx5->tx.packets++; - dev_kfree_skb_any(skb); - } else { - stats->tx_dropped++; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return NETDEV_TX_OK; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - sparx5_ptp_txtstamp_release(port, skb); - } - return ret; + dev_consume_skb_any(skb); + return NETDEV_TX_OK; +drop: + stats->tx_dropped++; + sparx5->tx.dropped++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +busy: + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + sparx5_ptp_txtstamp_release(port, skb); + return NETDEV_TX_BUSY; } static enum hrtimer_restart sparx5_injection_timeout(struct hrtimer *tmr) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ad1277ac7f0d..2f6a048dee90 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1363,10 +1363,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) xdp_do_flush(); } -static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; u8 arm_bit; + int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1375,26 +1376,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) else mana_poll_tx_cq(cq); - if (cq->work_done < cq->budget && - napi_complete_done(&cq->napi, cq->work_done)) { + w = cq->work_done; + + if (w < cq->budget && + napi_complete_done(&cq->napi, w)) { arm_bit = SET_ARM_BIT; } else { arm_bit = 0; } mana_gd_ring_cq(gdma_queue, arm_bit); + + return w; } static int mana_poll(struct napi_struct *napi, int budget) { struct mana_cq *cq = container_of(napi, struct mana_cq, napi); + int w; cq->work_done = 0; cq->budget = budget; - mana_cq_handler(cq, cq->gdma_cq); + w = mana_cq_handler(cq, cq->gdma_cq); - return min(cq->work_done, budget); + return min(w, budget); } static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c index 4247bca09807..aa8aba4ff7aa 100644 --- a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c +++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c @@ -503,7 +503,7 @@ nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb, max_len = max(max_reply_size, round_up(skb->len, 4)); if (max_len > mbox_max) { nn_dp_warn(&nn->dp, - "message too big for tha mailbox: %u/%u vs %u\n", + "message too big for the mailbox: %u/%u vs %u\n", skb->len, max_reply_size, mbox_max); return -EMSGSIZE; } diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index 2b427d8ccb2f..ccacb6ab6c39 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -282,7 +282,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) dma_len = skb_headlen(skb); if (skb_is_gso(skb)) type = NFDK_DESC_TX_TYPE_TSO; - else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -927,7 +927,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, dma_len = pkt_len; dma_addr = rxbuf->dma_addr + dma_off; - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -1325,7 +1325,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txbuf = &tx_ring->ktxbufs[wr_idx]; dma_len = skb_headlen(skb); - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 6bc923326268..33f723a9f471 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -841,7 +841,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) napi_gro_receive(&priv->napi[q], priv->rx_1st_skb); stats->rx_packets++; - stats->rx_bytes += priv->rx_1st_skb->len; + stats->rx_bytes += pkt_len; break; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 50f6b4a14be4..eb6d9cd8e93f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); - axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe"); - axi->axi_fb = of_property_read_bool(np, "snps,axi_fb"); - axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); - axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); + axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe"); + axi->axi_fb = of_property_read_bool(np, "snps,fb"); + axi->axi_mb = of_property_read_bool(np, "snps,mb"); + axi->axi_rb = of_property_read_bool(np, "snps,rb"); if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) axi->axi_wr_osr_lmt = 1; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index a8a6f0b48906..de112ab3195c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1462,7 +1462,7 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy if (speed == SPEED_1000) mac_control |= CPSW_SL_CTL_GIG; - if (speed == SPEED_10 && interface == PHY_INTERFACE_MODE_RGMII) + if (speed == SPEED_10 && phy_interface_mode_is_rgmii(interface)) /* Can be used with in band mode only */ mac_control |= CPSW_SL_CTL_EXT_EN; if (speed == SPEED_100 && interface == PHY_INTERFACE_MODE_RMII) |