From 1eecc7ab82c42133b748e1895275942a054a7f67 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 28 Feb 2024 13:45:17 +0100 Subject: net: lan78xx: fix runtime PM count underflow on link stop Current driver has some asymmetry in the runtime PM calls. On lan78xx_open() it will call usb_autopm_get() and unconditionally usb_autopm_put(). And on lan78xx_stop() it will call only usb_autopm_put(). So far, it was working only because this driver do not activate autosuspend by default, so it was visible only by warning "Runtime PM usage count underflow!". Since, with current driver, we can't use runtime PM with active link, execute lan78xx_open()->usb_autopm_put() only in error case. Otherwise, keep ref counting high as long as interface is open. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Oleksij Rempel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ba6c8ac2a736..d2aa2c5b1989 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3135,7 +3135,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } -- cgit From eb2c11b27c58a62b5027b77f702c15cd0ca38f7d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Feb 2024 17:06:56 +0100 Subject: net: bql: fix building with BQL disabled It is now possible to disable BQL, but that causes the cpsw driver to break: drivers/net/ethernet/ti/am65-cpsw-nuss.c:297:28: error: no member named 'dql' in 'struct netdev_queue' 297 | dql_avail(&netif_txq->dql), There is already a helper function in net/sch_generic.h that could be used to help here. Move its implementation into the common linux/netdevice.h along with the other bql interfaces and change both users over to the new interface. Fixes: ea7f3cfaa588 ("net: bql: allow the config to be disabled") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 9d2f4ac783e4..2939a21ca74f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -294,7 +294,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev, txqueue, netif_tx_queue_stopped(netif_txq), jiffies_to_msecs(jiffies - trans_start), - dql_avail(&netif_txq->dql), + netdev_queue_dql_avail(netif_txq), k3_cppi_desc_pool_avail(tx_chn->desc_pool)); if (netif_tx_queue_stopped(netif_txq)) { -- cgit From cbf996f52c4e658b3fb4349a869a62fd2d4c3c1c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:51 +0100 Subject: ixgbe: {dis, en}able irqs in ixgbe_txrx_ring_{dis, en}able Currently routines that are supposed to toggle state of ring pair do not take care of associated interrupt with queue vector that these rings belong to. This causes funky issues such as dead interface due to irq misconfiguration, as per Pavel's report from Closes: tag. Add a function responsible for disabling single IRQ in EIMC register and call this as a very first thing when disabling ring pair during xsk_pool setup. For enable let's reuse ixgbe_irq_enable_queues(). Besides this, disable/enable NAPI as first/last thing when dealing with closing or opening ring pair that xsk_pool is being configured on. Reported-by: Pavel Vazharov Closes: https://lore.kernel.org/netdev/CAJEV1ijxNyPTwASJER1bcZzS9nMoZJqfR86nu_3jFFVXzZQ4NA@mail.gmail.com/ Fixes: 024aa5800f32 ("ixgbe: added Rx/Tx ring disable/enable functions") Signed-off-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 +++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bd541527c8c7..99876b765b08 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2939,8 +2939,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -10524,6 +10524,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); } +/** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + /** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure @@ -10540,6 +10578,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10548,9 +10591,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10578,9 +10618,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10589,6 +10626,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** -- cgit From d562b11c1eac7d73f4c778b4cbe5468f86b1f20d Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:52 +0100 Subject: i40e: disable NAPI right after disabling irqs when handling xsk_pool Disable NAPI before shutting down queues that this particular NAPI contains so that the order of actions in i40e_queue_pair_disable() mirrors what we do in i40e_queue_pair_enable(). Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 54eb55464e31..89a3401d20ab 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13560,9 +13560,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); -- cgit From 99099c6bc75a30b76bb5d6774a0509ab6f06af05 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:53 +0100 Subject: ice: reorder disabling IRQ and NAPI in ice_qp_dis ice_qp_dis() currently does things in very mixed way. Tx is stopped before disabling IRQ on related queue vector, then it takes care of disabling Rx and finally NAPI is disabled. Let us start with disabling IRQs in the first place followed by turning off NAPI. Then it is safe to handle queues. One subtle change on top of that is that even though ice_qp_ena() looks more sane, clear ICE_CFG_BUSY as the last thing there. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8b81a1677045..2eecd0f39aa6 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -179,6 +179,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -195,13 +199,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -259,11 +260,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) return err; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); return 0; } -- cgit From 4035c72dc1ba81a96f94de84dfd5409056c1d9c9 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 23 Feb 2024 07:40:24 +0100 Subject: ice: reconfig host after changing MSI-X on VF During VSI reconfiguration filters and VSI config which is set in ice_vf_init_host_cfg() are lost. Recall the host configuration function to restore them. Without this config VF on which MSI-X amount was changed might had a connection problems. Fixes: 4d38cb44bd32 ("ice: manage VFs MSI-X using resource tracking") Reviewed-by: Jacob Keller Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index a94a1c48c3de..b0f78c2f2790 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1068,6 +1068,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) struct ice_pf *pf = pci_get_drvdata(pdev); u16 prev_msix, prev_queues, queues; bool needs_rebuild = false; + struct ice_vsi *vsi; struct ice_vf *vf; int id; @@ -1102,6 +1103,10 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (!vf) return -ENOENT; + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -ENOENT; + prev_msix = vf->num_msix; prev_queues = vf->num_vf_qs; @@ -1122,7 +1127,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (vf->first_vector_idx < 0) goto unroll; - if (ice_vf_reconfig_vsi(vf)) { + if (ice_vf_reconfig_vsi(vf) || ice_vf_init_host_cfg(vf, vsi)) { /* Try to rebuild with previous values */ needs_rebuild = true; goto unroll; @@ -1148,8 +1153,10 @@ unroll: if (vf->first_vector_idx < 0) return -EINVAL; - if (needs_rebuild) + if (needs_rebuild) { ice_vf_reconfig_vsi(vf); + ice_vf_init_host_cfg(vf, vsi); + } ice_ena_vf_mappings(vf); ice_put_vf(vf); -- cgit From 8deeefb24786ea7950b37bde4516b286c877db00 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 19 Oct 2023 04:49:54 +0300 Subject: Revert "net/mlx5: Block entering switchdev mode with ns inconsistency" This reverts commit 662404b24a4c4d839839ed25e3097571f5938b9b. The revert is required due to the suspicion it is not good for anything and cause crash. Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency") Signed-off-by: Gavin Li Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 23 ---------------------- 1 file changed, 23 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0455134c98e..14b3bd3c5e2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3658,22 +3658,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - struct net *devl_net, *netdev_net; - bool ret = false; - - mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); - if (dev->mlx5e_res.uplink_netdev) { - netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); - devl_net = devlink_net(devlink); - ret = net_eq(devl_net, netdev_net); - } - mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); - return ret; -} - int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -3718,13 +3702,6 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && - !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { - NL_SET_ERR_MSG_MOD(extack, - "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); - return -EPERM; - } - mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { -- cgit From b7bbd698c90591546d22093181e266785f08c18b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 13 Dec 2023 17:07:08 -0800 Subject: Revert "net/mlx5e: Check the number of elements before walk TC rhashtable" This reverts commit 4e25b661f484df54b6751b65f9ea2434a3b67539. This Commit was mistakenly applied by pulling the wrong tag, remove it. Fixes: 4e25b661f484 ("net/mlx5e: Check the number of elements before walk TC rhashtable") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 190f10aba170..5a0047bdcb51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) xa_for_each(&esw->offloads.vport_reps, i, rep) { rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) + if (!rpriv || !rpriv->netdev) continue; rhashtable_walk_enter(&rpriv->tc_ht, &iter); -- cgit From 85ea2c5c5ef5f24fe6e6e7028ddd90be1cb5d27e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 11 Jan 2024 01:27:47 +0000 Subject: net/mlx5: E-switch, Change flow rule destination checking The checking in the cited commit is not accurate. In the common case, VF destination is internal, and uplink destination is external. However, uplink destination with packet reformat is considered as internal because firmware uses LB+hairpin to support it. Update the checking so header rewrite rules with both internal and external destinations are not allowed. Fixes: e0e22d59b47a ("net/mlx5: E-switch, Add checking for flow rule destinations") Signed-off-by: Jianbo Liu Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 23 +++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 14b3bd3c5e2f..baaae628b0a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -535,21 +535,26 @@ esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) } static bool -esw_dests_to_vf_pf_vports(struct mlx5_flow_destination *dests, int max_dest) +esw_dests_to_int_external(struct mlx5_flow_destination *dests, int max_dest) { - bool vf_dest = false, pf_dest = false; + bool internal_dest = false, external_dest = false; int i; for (i = 0; i < max_dest; i++) { - if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT && + dests[i].type != MLX5_FLOW_DESTINATION_TYPE_UPLINK) continue; - if (dests[i].vport.num == MLX5_VPORT_UPLINK) - pf_dest = true; + /* Uplink dest is external, but considered as internal + * if there is reformat because firmware uses LB+hairpin to support it. + */ + if (dests[i].vport.num == MLX5_VPORT_UPLINK && + !(dests[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) + external_dest = true; else - vf_dest = true; + internal_dest = true; - if (vf_dest && pf_dest) + if (internal_dest && external_dest) return true; } @@ -695,9 +700,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, /* Header rewrite with combined wire+loopback in FDB is not allowed */ if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) && - esw_dests_to_vf_pf_vports(dest, i)) { + esw_dests_to_int_external(dest, i)) { esw_warn(esw->dev, - "FDB: Header rewrite with forwarding to both PF and VF is not allowed\n"); + "FDB: Header rewrite with forwarding to both internal and external dests is not allowed\n"); rule = ERR_PTR(-EINVAL); goto err_esw_get; } -- cgit From ac8082a3c7a158640a2c493ec437dd9da881a6a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 16 Jan 2024 20:13:34 +0200 Subject: net/mlx5: Fix fw reporter diagnose output Restore fw reporter diagnose to print the syndrome even if it is zero. Following the cited commit, in this case (syndrome == 0) command returns no output at all. This fix restores command output in case syndrome is cleared: $ devlink health diagnose pci/0000:82:00.0 reporter fw Syndrome: 0 Fixes: d17f98bf7cc9 ("net/mlx5: devlink health: use retained error fmsg API") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 8ff6dc9bc803..b5c709bba155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -452,10 +452,10 @@ mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct health_buffer __iomem *h = health->health; u8 synd = ioread8(&h->synd); + devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); if (!synd) return 0; - devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); return 0; -- cgit From 5e6107b499f3fc4748109e1d87fd9603b34f1e0d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 28 Jan 2024 20:43:58 +0200 Subject: net/mlx5: Check capability for fw_reset Functions which can't access MFRL (Management Firmware Reset Level) register, have no use of fw_reset structures or events. Remove fw_reset structures allocation and registration for fw reset events notifications for these functions. Having the devlink param enable_remote_dev_reset on functions that don't have this capability is misleading as these functions are not allowed to influence the reset flow. Hence, this patch removes this parameter for such functions. In addition, return not supported on devlink reload action fw_activate for these functions. Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event") Signed-off-by: Moshe Shemesh Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 22 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3e064234f6fe..98d4306929f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -157,6 +157,12 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } + if (action == DEVLINK_RELOAD_ACTION_FW_ACTIVATE && + !dev->priv.fw_reset) { + NL_SET_ERR_MSG_MOD(extack, "FW activate is unsupported for this function"); + return -EOPNOTSUPP; + } + if (mlx5_core_is_pf(dev) && pci_num_vf(pdev)) NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index f27eab6e4929..2911aa34a5be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -703,19 +703,30 @@ void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); mlx5_eq_notifier_register(dev, &fw_reset->nb); } void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) { - mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!fw_reset) + return; + + mlx5_eq_notifier_unregister(dev, &fw_reset->nb); } void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); cancel_work_sync(&fw_reset->fw_live_patch_work); cancel_work_sync(&fw_reset->reset_request_work); @@ -733,9 +744,13 @@ static const struct devlink_param mlx5_fw_reset_devlink_params[] = { int mlx5_fw_reset_init(struct mlx5_core_dev *dev) { - struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + struct mlx5_fw_reset *fw_reset; int err; + if (!MLX5_CAP_MCAM_REG(dev, mfrl)) + return 0; + + fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); if (!fw_reset) return -ENOMEM; fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); @@ -771,6 +786,9 @@ void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + devl_params_unregister(priv_to_devlink(dev), mlx5_fw_reset_devlink_params, ARRAY_SIZE(mlx5_fw_reset_devlink_params)); -- cgit From dd238b702064b21d25b4fc39a19699319746d655 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 25 Dec 2023 01:47:05 +0000 Subject: net/mlx5e: Change the warning when ignore_flow_level is not supported Downgrade the print from mlx5_core_warn() to mlx5_core_dbg(), as it is just a statement of fact that firmware doesn't support ignore flow level. And change the wording to "firmware flow level support is missing", to make it more accurate. Fixes: ae2ee3be99a8 ("net/mlx5: CT: Remove warning of ignore_flow_level support for VFs") Signed-off-by: Jianbo Liu Suggested-by: Elliott, Robert (Servers) Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 86bf007fd05b..b500cc2c9689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -37,7 +37,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { if (priv->mdev->coredev_type == MLX5_COREDEV_PF) - mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + mlx5_core_dbg(priv->mdev, "firmware flow level support is missing\n"); err = -EOPNOTSUPP; goto err_check; } -- cgit From a71f2147b64941efee156bfda54fd6461d0f95df Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 13 Mar 2023 17:03:03 +0200 Subject: net/mlx5e: Fix MACsec state loss upon state update in offload path The packet number attribute of the SA is incremented by the device rather than the software stack when enabling hardware offload. Because the packet number attribute is managed by the hardware, the software has no insight into the value of the packet number attribute actually written by the device. Previously when MACsec offload was enabled, the hardware object for handling the offload was destroyed when the SA was disabled. Re-enabling the SA would lead to a new hardware object being instantiated. This new hardware object would not have any recollection of the correct packet number for the SA. Instead, destroy the flow steering rule when deactivating the SA and recreate it upon reactivation, preserving the original hardware object. Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Signed-off-by: Emeel Hakim Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/macsec.c | 82 ++++++++++++++-------- 1 file changed, 51 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index d4ebd8743114..b2cabd6ab86c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -310,9 +310,9 @@ static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_o mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } -static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *sa, - bool is_tx, struct net_device *netdev, u32 fs_id) +static void mlx5e_macsec_cleanup_sa_fs(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) { int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : MLX5_ACCEL_MACSEC_ACTION_DECRYPT; @@ -322,20 +322,49 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, mlx5_macsec_fs_del_rule(macsec->mdev->macsec_fs, sa->macsec_rule, action, netdev, fs_id); - mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); sa->macsec_rule = NULL; } +static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) +{ + mlx5e_macsec_cleanup_sa_fs(macsec, sa, is_tx, netdev, fs_id); + mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); +} + +static int mlx5e_macsec_init_sa_fs(struct macsec_context *ctx, + struct mlx5e_macsec_sa *sa, bool encrypt, + bool is_tx, u32 *fs_id) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5_macsec_fs *macsec_fs = priv->mdev->macsec_fs; + struct mlx5_macsec_rule_attrs rule_attrs; + union mlx5_macsec_rule *macsec_rule; + + rule_attrs.macsec_obj_id = sa->macsec_obj_id; + rule_attrs.sci = sa->sci; + rule_attrs.assoc_num = sa->assoc_num; + rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + macsec_rule = mlx5_macsec_fs_add_rule(macsec_fs, ctx, &rule_attrs, fs_id); + if (!macsec_rule) + return -ENOMEM; + + sa->macsec_rule = macsec_rule; + + return 0; +} + static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, bool encrypt, bool is_tx, u32 *fs_id) { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; - struct mlx5_macsec_rule_attrs rule_attrs; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; - union mlx5_macsec_rule *macsec_rule; int err; obj_attrs.next_pn = sa->next_pn; @@ -357,20 +386,12 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, if (err) return err; - rule_attrs.macsec_obj_id = sa->macsec_obj_id; - rule_attrs.sci = sa->sci; - rule_attrs.assoc_num = sa->assoc_num; - rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : - MLX5_ACCEL_MACSEC_ACTION_DECRYPT; - - macsec_rule = mlx5_macsec_fs_add_rule(mdev->macsec_fs, ctx, &rule_attrs, fs_id); - if (!macsec_rule) { - err = -ENOMEM; - goto destroy_macsec_object; + if (sa->active) { + err = mlx5e_macsec_init_sa_fs(ctx, sa, encrypt, is_tx, fs_id); + if (err) + goto destroy_macsec_object; } - sa->macsec_rule = macsec_rule; - return 0; destroy_macsec_object: @@ -526,9 +547,7 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) goto destroy_sa; macsec_device->tx_sa[assoc_num] = tx_sa; - if (!secy->operational || - assoc_num != tx_sc->encoding_sa || - !tx_sa->active) + if (!secy->operational) goto out; err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); @@ -595,7 +614,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; if (ctx_tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } else { @@ -604,7 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } out: mutex_unlock(&macsec->lock); @@ -1030,8 +1049,9 @@ static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + if (rx_sa->active) + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); kfree(rx_sa); rx_sc->rx_sa[assoc_num] = NULL; @@ -1112,8 +1132,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, if (!rx_sa || !rx_sa->macsec_rule) continue; - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + mlx5e_macsec_cleanup_sa_fs(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); } } @@ -1124,8 +1144,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, - &rx_sc->sc_xarray_element->fs_id); + err = mlx5e_macsec_init_sa_fs(ctx, rx_sa, true, false, + &rx_sc->sc_xarray_element->fs_id); if (err) goto out; } @@ -1178,7 +1198,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) if (!tx_sa) continue; - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } for (i = 0; i < MACSEC_NUM_AN; ++i) { @@ -1187,7 +1207,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) continue; if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } -- cgit From b7cf07586c40f926063d4d09f7de28ff82f62b2a Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 5 Feb 2024 13:12:28 -0800 Subject: net/mlx5e: Use a memory barrier to enforce PTP WQ xmit submission tracking occurs after populating the metadata_map Just simply reordering the functions mlx5e_ptp_metadata_map_put and mlx5e_ptpsq_track_metadata in the mlx5e_txwqe_complete context is not good enough since both the compiler and CPU are free to reorder these two functions. If reordering does occur, the issue that was supposedly fixed by 7e3f3ba97e6c ("net/mlx5e: Track xmit submission to PTP WQ after populating metadata map") will be seen. This will lead to NULL pointer dereferences in mlx5e_ptpsq_mark_ts_cqes_undelivered in the NAPI polling context due to the tracking list being populated before the metadata map. Fixes: 7e3f3ba97e6c ("net/mlx5e: Track xmit submission to PTP WQ after populating metadata map") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed CC: Vadim Fedorenko --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5c166d9d2dca..2fa076b23fbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -401,6 +401,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_skb_cb_hwtstamp_init(skb); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); + /* ensure skb is put on metadata_map before tracking the index */ + wmb(); mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); if (!netif_tx_queue_stopped(sq->txq) && mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { -- cgit From 90502d433c0e7e5483745a574cb719dd5d05b10c Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Thu, 8 Feb 2024 15:09:34 -0800 Subject: net/mlx5e: Switch to using _bh variant of of spinlock API in port timestamping NAPI poll context The NAPI poll context is a softirq context. Do not use normal spinlock API in this context to prevent concurrency issues. Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed CC: Vadim Fedorenko --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 078f56a3cbb2..ca05b3252a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -42,9 +42,9 @@ mlx5e_ptp_port_ts_cqe_list_add(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metad WARN_ON_ONCE(tracker->inuse); tracker->inuse = true; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_add_tail(&tracker->entry, &list->tracker_list_head); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } static void @@ -54,9 +54,9 @@ mlx5e_ptp_port_ts_cqe_list_remove(struct mlx5e_ptp_port_ts_cqe_list *list, u8 me WARN_ON_ONCE(!tracker->inuse); tracker->inuse = false; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_del(&tracker->entry); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata) @@ -155,7 +155,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; struct mlx5e_ptp_port_ts_cqe_tracker *pos, *n; - spin_lock(&cqe_list->tracker_list_lock); + spin_lock_bh(&cqe_list->tracker_list_lock); list_for_each_entry_safe(pos, n, &cqe_list->tracker_list_head, entry) { struct sk_buff *skb = mlx5e_ptp_metadata_map_lookup(metadata_map, pos->metadata_id); @@ -170,7 +170,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, pos->inuse = false; list_del(&pos->entry); } - spin_unlock(&cqe_list->tracker_list_lock); + spin_unlock_bh(&cqe_list->tracker_list_lock); } #define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) -- cgit From 1ca1ba465e55b9460e4e75dec9fff31e708fec74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 13:11:52 +0000 Subject: geneve: make sure to pull inner header in geneve_rx() syzbot triggered a bug in geneve_rx() [1] Issue is similar to the one I fixed in commit 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. [1] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in geneve_rx drivers/net/geneve.c:279 [inline] BUG: KMSAN: uninit-value in geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] geneve_rx drivers/net/geneve.c:279 [inline] geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 udp_queue_rcv_one_skb+0x1d39/0x1f20 net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x6ae/0x6e0 net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x184/0x4b0 net/ipv4/udp.c:2346 __udp4_lib_rcv+0x1c6b/0x3010 net/ipv4/udp.c:2422 udp_rcv+0x7d/0xa0 net/ipv4/udp.c:2604 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 process_backlog+0x480/0x8b0 net/core/dev.c:5976 __napi_poll+0xe3/0x980 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x8b8/0x1870 net/core/dev.c:6778 __do_softirq+0x1b7/0x7c5 kernel/softirq.c:553 do_softirq+0x9a/0xf0 kernel/softirq.c:454 __local_bh_enable_ip+0x9b/0xa0 kernel/softirq.c:381 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline] __dev_queue_xmit+0x2768/0x51c0 net/core/dev.c:4378 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x5cb/0xbc0 mm/slub.c:3903 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1296 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6394 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2783 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x70c2/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Reported-and-tested-by: syzbot+6a1423ff3f97159aae64@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/geneve.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 32c51c244153..c4ed36c71897 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) -- cgit From 89d72d4125e94aa3c2140fedd97ce07ba9e37674 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Mar 2024 09:06:08 +0100 Subject: net: sparx5: Fix use after free inside sparx5_del_mact_entry Based on the static analyzis of the code it looks like when an entry from the MAC table was removed, the entry was still used after being freed. More precise the vid of the mac_entry was used after calling devm_kfree on the mac_entry. The fix consists in first using the vid of the mac_entry to delete the entry from the HW and after that to free it. Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240301080608.3053468-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918ea2..75868b3f548e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); -- cgit From 330068589389ccae3452db15ecacc3e147ac9c1c Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 7 Feb 2024 16:42:43 -0800 Subject: idpf: disable local BH when scheduling napi for marker packets Fix softirq's not being handled during napi_schedule() call when receiving marker packets for queue disable by disabling local bottom half. The issue can be seen on ifdown: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #08!!! Using ftrace to catch the failing scenario: ifconfig [003] d.... 22739.830624: softirq_raise: vec=3 [action=NET_RX] -0 [003] ..s.. 22739.831357: softirq_entry: vec=3 [action=NET_RX] No interrupt and CPU is idle. After the patch when disabling local BH before calling napi_schedule: ifconfig [003] d.... 22993.928336: softirq_raise: vec=3 [action=NET_RX] ifconfig [003] ..s1. 22993.928337: softirq_entry: vec=3 [action=NET_RX] Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Reviewed-by: Jesse Brandeburg Reviewed-by: Przemek Kitszel Signed-off-by: Emil Tantilov Signed-off-by: Alan Brady Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d0cdd63b3d5b..390977a76de2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -2087,8 +2087,10 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); /* schedule the napi to receive all the marker packets */ + local_bh_disable(); for (i = 0; i < vport->num_q_vectors; i++) napi_schedule(&vport->q_vectors[i].napi); + local_bh_enable(); return idpf_wait_for_marker_event(vport); } -- cgit From 2652b99e43403dc464f3648483ffb38e48872fe4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Jan 2024 13:51:58 -0800 Subject: ice: virtchnl: stop pretending to support RSS over AQ or registers The E800 series hardware uses the same iAVF driver as older devices, including the virtchnl negotiation scheme. This negotiation scheme includes a mechanism to determine what type of RSS should be supported, including RSS over PF virtchnl messages, RSS over firmware AdminQ messages, and RSS via direct register access. The PF driver will always prefer VIRTCHNL_VF_OFFLOAD_RSS_PF if its supported by the VF driver. However, if an older VF driver is loaded, it may request only VIRTCHNL_VF_OFFLOAD_RSS_REG or VIRTCHNL_VF_OFFLOAD_RSS_AQ. The ice driver happily agrees to support these methods. Unfortunately, the underlying hardware does not support these mechanisms. The E800 series VFs don't have the appropriate registers for RSS_REG. The mailbox queue used by VFs for VF to PF communication blocks messages which do not have the VF-to-PF opcode. Stop lying to the VF that it could support RSS over AdminQ or registers, as these interfaces do not work when the hardware is operating on an E800 series device. In practice this is unlikely to be hit by any normal user. The iAVF driver has supported RSS over PF virtchnl commands since 2016, and always defaults to using RSS_PF if possible. In principle, nothing actually stops the existing VF from attempting to access the registers or send an AQ command. However a properly coded VF will check the capability flags and will report a more useful error if it detects a case where the driver does not support the RSS offloads that it does. Fixes: 1071a8358a28 ("ice: Implement virtchnl commands for AVF support") Signed-off-by: Jacob Keller Reviewed-by: Alan Brady Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 9 +-------- drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c | 2 -- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index c925813ec9ca..6f2328a049bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5e19d48a05b4..d796dbd2a440 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING -- cgit From 06e456a05d669ca30b224b8ed962421770c1496c Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Wed, 28 Feb 2024 18:54:48 +0300 Subject: net: ice: Fix potential NULL pointer dereference in ice_bridge_setlink() The function ice_bridge_setlink() may encounter a NULL pointer dereference if nlmsg_find_attr() returns NULL and br_spec is dereferenced subsequently in nla_for_each_nested(). To address this issue, add a check to ensure that br_spec is not NULL before proceeding with the nested attribute iteration. Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") Signed-off-by: Rand Deeb Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 59c7e37f175f..df6a68ab747e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8013,6 +8013,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; -- cgit From 9224fc86f1776193650a33a275cac628952f80a9 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 1 Mar 2024 14:37:08 +0100 Subject: ice: fix uninitialized dplls mutex usage The pf->dplls.lock mutex is initialized too late, after its first use. Move it to the top of ice_dpll_init. Note that the "err_exit" error path destroys the mutex. And the mutex is the last thing destroyed in ice_dpll_deinit. This fixes the following warning with CONFIG_DEBUG_MUTEXES: ice 0000:10:00.0: The DDP package was successfully loaded: ICE OS Default Package version 1.3.36.0 ice 0000:10:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link) ice 0000:10:00.0: PTP init successful ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 410 at kernel/locking/mutex.c:587 __mutex_lock+0x773/0xd40 Modules linked in: crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic ice(+) nvme nvme_c> CPU: 0 PID: 410 Comm: kworker/0:4 Not tainted 6.8.0-rc5+ #3 Hardware name: HPE ProLiant DL110 Gen10 Plus/ProLiant DL110 Gen10 Plus, BIOS U56 10/19/2023 Workqueue: events work_for_cpu_fn RIP: 0010:__mutex_lock+0x773/0xd40 Code: c0 0f 84 1d f9 ff ff 44 8b 35 0d 9c 69 01 45 85 f6 0f 85 0d f9 ff ff 48 c7 c6 12 a2 a9 85 48 c7 c7 12 f1 a> RSP: 0018:ff7eb1a3417a7ae0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000000 RDX: 0000000000000002 RSI: ffffffff85ac2bff RDI: 00000000ffffffff RBP: ff7eb1a3417a7b80 R08: 0000000000000000 R09: 00000000ffffbfff R10: ff7eb1a3417a7978 R11: ff32b80f7fd2e568 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ff32b7f02c50e0d8 FS: 0000000000000000(0000) GS:ff32b80efe800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055b5852cc000 CR3: 000000003c43a004 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x84/0x170 ? __mutex_lock+0x773/0xd40 ? report_bug+0x1c7/0x1d0 ? prb_read_valid+0x1b/0x30 ? handle_bug+0x42/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? __mutex_lock+0x773/0xd40 ? rcu_is_watching+0x11/0x50 ? __kmalloc_node_track_caller+0x346/0x490 ? ice_dpll_lock_status_get+0x28/0x50 [ice] ? __pfx_ice_dpll_lock_status_get+0x10/0x10 [ice] ? ice_dpll_lock_status_get+0x28/0x50 [ice] ice_dpll_lock_status_get+0x28/0x50 [ice] dpll_device_get_one+0x14f/0x2e0 dpll_device_event_send+0x7d/0x150 dpll_device_register+0x124/0x180 ice_dpll_init_dpll+0x7b/0xd0 [ice] ice_dpll_init+0x224/0xa40 [ice] ? _dev_info+0x70/0x90 ice_load+0x468/0x690 [ice] ice_probe+0x75b/0xa10 [ice] ? _raw_spin_unlock_irqrestore+0x4f/0x80 ? process_one_work+0x1a3/0x500 local_pci_probe+0x47/0xa0 work_for_cpu_fn+0x17/0x30 process_one_work+0x20d/0x500 worker_thread+0x1df/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0x103/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 irq event stamp: 125197 hardirqs last enabled at (125197): [] finish_task_switch.isra.0+0x12d/0x3d0 hardirqs last disabled at (125196): [] __schedule+0xea4/0x19f0 softirqs last enabled at (105334): [] napi_get_frags_check+0x1a/0x60 softirqs last disabled at (105332): [] napi_get_frags_check+0x1a/0x60 ---[ end trace 0000000000000000 ]--- Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Signed-off-by: Michal Schmidt Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index adfa1f2a80a6..fda9140c0da4 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -2120,6 +2120,7 @@ void ice_dpll_init(struct ice_pf *pf) struct ice_dplls *d = &pf->dplls; int err = 0; + mutex_init(&d->lock); err = ice_dpll_init_info(pf, cgu); if (err) goto err_exit; @@ -2132,7 +2133,6 @@ void ice_dpll_init(struct ice_pf *pf) err = ice_dpll_init_pins(pf, cgu); if (err) goto deinit_pps; - mutex_init(&d->lock); if (cgu) { err = ice_dpll_init_worker(pf); if (err) -- cgit From 6c5b6ca7642f2992502a22dbd8b80927de174b67 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 4 Mar 2024 16:37:07 -0800 Subject: ice: fix typo in assignment Fix an obviously incorrect assignment, created with a typo or cut-n-paste error. Fixes: 5995ef88e3a8 ("ice: realloc VSI stats arrays") Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 097bf8fd6bf0..fc23dbe302b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3192,7 +3192,7 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) } } - tx_ring_stats = vsi_stat->rx_ring_stats; + tx_ring_stats = vsi_stat->tx_ring_stats; vsi_stat->tx_ring_stats = krealloc_array(vsi_stat->tx_ring_stats, req_txq, sizeof(*vsi_stat->tx_ring_stats), -- cgit From 36c824ca3e4fa8d1224c2dcdeaca39d2ca86a42f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 28 Feb 2024 18:26:03 +0100 Subject: i40e: Fix firmware version comparison function Helper i40e_is_fw_ver_eq() compares incorrectly given firmware version as it returns true when the major version of running firmware is greater than the given major version that is wrong and results in failure during getting of DCB configuration where this helper is used. Fix the check and return true only if the running FW version is exactly equals to the given version. Reproducer: 1. Load i40e driver 2. Check dmesg output [root@host ~]# modprobe i40e [root@host ~]# dmesg | grep 'i40e.*DCB' [ 74.750642] i40e 0000:02:00.0: Query for DCB configuration failed, err -EIO aq_err I40E_AQ_RC_EINVAL [ 74.759770] i40e 0000:02:00.0: DCB init failed -5, disabled [ 74.966550] i40e 0000:02:00.1: Query for DCB configuration failed, err -EIO aq_err I40E_AQ_RC_EINVAL [ 74.975683] i40e 0000:02:00.1: DCB init failed -5, disabled Fixes: cf488e13221f ("i40e: Add other helpers to check version of running firmware and AQ API") Signed-off-by: Ivan Vecera Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_prototype.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index af4269330581..ce1f11b8ad65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -567,8 +567,7 @@ static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min) **/ static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min) { - return (hw->aq.fw_maj_ver > maj || - (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min)); + return (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min); } #endif /* _I40E_PROTOTYPE_H_ */ -- cgit From ef27f655b438bed4c83680e4f01e1cde2739854b Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Mon, 19 Feb 2024 10:08:43 +0100 Subject: igc: avoid returning frame twice in XDP_REDIRECT When a frame can not be transmitted in XDP_REDIRECT (e.g. due to a full queue), it is necessary to free it by calling xdp_return_frame_rx_napi. However, this is the responsibility of the caller of the ndo_xdp_xmit (see for example bq_xmit_all in kernel/bpf/devmap.c) and thus calling it inside igc_xdp_xmit (which is the ndo_xdp_xmit of the igc driver) as well will lead to memory corruption. In fact, bq_xmit_all expects that it can return all frames after the last successfully transmitted one. Therefore, break for the first not transmitted frame, but do not call xdp_return_frame_rx_napi in igc_xdp_xmit. This is equally implemented in other Intel drivers such as the igb. There are two alternatives to this that were rejected: 1. Return num_frames as all the frames would have been transmitted and release them inside igc_xdp_xmit. While it might work technically, it is not what the return value is meant to represent (i.e. the number of SUCCESSFULLY transmitted packets). 2. Rework kernel/bpf/devmap.c and all drivers to support non-consecutively dropped packets. Besides being complex, it likely has a negative performance impact without a significant gain since it is anyway unlikely that the next frame can be transmitted if the previous one was dropped. The memory corruption can be reproduced with the following script which leads to a kernel panic after a few seconds. It basically generates more traffic than a i225 NIC can transmit and pushes it via XDP_REDIRECT from a virtual interface to the physical interface where frames get dropped. #!/bin/bash INTERFACE=enp4s0 INTERFACE_IDX=`cat /sys/class/net/$INTERFACE/ifindex` sudo ip link add dev veth1 type veth peer name veth2 sudo ip link set up $INTERFACE sudo ip link set up veth1 sudo ip link set up veth2 cat << EOF > redirect.bpf.c SEC("prog") int redirect(struct xdp_md *ctx) { return bpf_redirect($INTERFACE_IDX, 0); } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c redirect.bpf.c -o redirect.bpf.o sudo ip link set veth2 xdp obj redirect.bpf.o cat << EOF > pass.bpf.c SEC("prog") int pass(struct xdp_md *ctx) { return XDP_PASS; } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c pass.bpf.c -o pass.bpf.o sudo ip link set $INTERFACE xdp obj pass.bpf.o cat << EOF > trafgen.cfg { /* Ethernet Header */ 0xe8, 0x6a, 0x64, 0x41, 0xbf, 0x46, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 1, 1, # IP Src - adapt as needed 10, 0, 1, 2, # IP Dest - adapt as needed const16(6666), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } EOF sudo trafgen -i trafgen.cfg -b3000MB -o veth1 --cpp Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action") Signed-off-by: Florian Kauer Reviewed-by: Maciej Fijalkowski Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ba8d3fe186ae..81c21a893ede 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6487,7 +6487,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6503,16 +6503,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6520,7 +6519,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, -- cgit From ba54b1a276a6b69d80649942fe5334d19851443e Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 18 Feb 2024 09:42:21 +0200 Subject: intel: legacy: Partial revert of field get conversion Refactoring of the field get conversion introduced a regression in the legacy Wake On Lan from a magic packet with i219 devices. Rx address copied not correctly from MAC to PHY with FIELD_GET macro. Fixes: b9a452545075 ("intel: legacy: field get conversion") Suggested-by: Vitaly Lifshits Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a2788fd5f8bb..19e450a5bd31 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2559,7 +2559,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), - FIELD_GET(E1000_RAH_AV, mac_reg)); + (u16)((mac_reg & E1000_RAH_AV) >> 16)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); -- cgit From f267f262815033452195f46c43b572159262f533 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 5 Mar 2024 10:08:28 +0100 Subject: xdp, bonding: Fix feature flags when there are no slave devs anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY") changed the driver from reporting everything as supported before a device was bonded into having the driver report that no XDP feature is supported until a real device is bonded as it seems to be more truthful given eventually real underlying devices decide what XDP features are supported. The change however did not take into account when all slave devices get removed from the bond device. In this case after 9b0ed890ac2a, the driver keeps reporting a feature mask of 0x77, that is, NETDEV_XDP_ACT_MASK & ~NETDEV_XDP_ACT_XSK_ZEROCOPY whereas it should have reported a feature mask of 0. Fix it by resetting XDP feature flags in the same way as if no XDP program is attached to the bond device. This was uncovered by the XDP bond selftest which let BPF CI fail. After adjusting the starting masks on the latter to 0 instead of NETDEV_XDP_ACT_MASK the test passes again together with this fix. Fixes: 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY") Signed-off-by: Daniel Borkmann Cc: Magnus Karlsson Cc: Prashant Batra Cc: Toke Høiland-Jørgensen Cc: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Message-ID: <20240305090829.17131-1-daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a11748b8d69b..cd0683bcca03 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1811,7 +1811,7 @@ void bond_xdp_set_features(struct net_device *bond_dev) ASSERT_RTNL(); - if (!bond_xdp_check(bond)) { + if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) { xdp_clear_features_flag(bond_dev); return; } -- cgit From 289e922582af5b4721ba02e86bde4d9ba918158a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 17:35:32 -0800 Subject: dpll: move all dpll<>netdev helpers to dpll code Older versions of GCC really want to know the full definition of the type involved in rcu_assign_pointer(). struct dpll_pin is defined in a local header, net/core can't reach it. Move all the netdev <> dpll code into dpll, where the type is known. Otherwise we'd need multiple function calls to jump between the compilation units. This is the same problem the commit under fixes was trying to address, but with rcu_assign_pointer() not rcu_dereference(). Some of the exports are not needed, networking core can't be a module, we only need exports for the helpers used by drivers. Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/35a869c8-52e8-177-1d4d-e57578b99b6@linux-m68k.org/ Fixes: 640f41ed33b5 ("dpll: fix build failure due to rcu_dereference_check() on unknown type") Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240305013532.694866-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 25 +++++++++++++---- drivers/dpll/dpll_netlink.c | 38 ++++++++++++++++---------- drivers/net/ethernet/intel/ice/ice_dpll.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/dpll.c | 4 +-- 4 files changed, 48 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 241db366b2c7..7f686d179fc9 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -42,11 +42,6 @@ struct dpll_pin_registration { void *priv; }; -struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ - return rcu_dereference_rtnl(dev->dpll_pin); -} - struct dpll_device *dpll_device_get_by_id(int id) { if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED)) @@ -513,6 +508,26 @@ err_pin_prop: return ERR_PTR(ret); } +static void dpll_netdev_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + rtnl_lock(); + rcu_assign_pointer(dev->dpll_pin, dpll_pin); + rtnl_unlock(); +} + +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + dpll_netdev_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(dpll_netdev_pin_set); + +void dpll_netdev_pin_clear(struct net_device *dev) +{ + dpll_netdev_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(dpll_netdev_pin_clear); + /** * dpll_pin_get - find existing or create new dpll pin * @clock_id: clock_id of creator diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 4ca9ad16cd95..b57355e0c214 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include "dpll_core.h" #include "dpll_netlink.h" @@ -47,18 +48,6 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) return 0; } -/** - * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin - * @pin: pin pointer - * - * Return: byte size of pin handle attribute for given pin. - */ -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) -{ - return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ -} -EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); - /** * dpll_msg_add_pin_handle - attach pin handle attribute to a given message * @msg: pointer to sk_buff message to attach a pin handle @@ -68,7 +57,7 @@ EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); * * 0 - success * * -EMSGSIZE - no space in message to attach pin handle */ -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) { if (!pin) return 0; @@ -76,7 +65,28 @@ int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) return -EMSGSIZE; return 0; } -EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle); + +static struct dpll_pin *dpll_netdev_pin(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->dpll_pin); +} + +/** + * dpll_netdev_pin_handle_size - get size of pin handle attribute of a netdev + * @dev: netdev from which to get the pin + * + * Return: byte size of pin handle attribute, or 0 if @dev has no pin. + */ +size_t dpll_netdev_pin_handle_size(const struct net_device *dev) +{ + return dpll_netdev_pin(dev) ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ +} + +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev) +{ + return dpll_msg_add_pin_handle(msg, dpll_netdev_pin(dev)); +} static int dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index adfa1f2a80a6..c59e972dbaae 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1597,7 +1597,7 @@ static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf) } if (WARN_ON_ONCE(!vsi || !vsi->netdev)) return; - netdev_dpll_pin_clear(vsi->netdev); + dpll_netdev_pin_clear(vsi->netdev); dpll_pin_put(rclk->pin); } @@ -1641,7 +1641,7 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin, } if (WARN_ON((!vsi || !vsi->netdev))) return -EINVAL; - netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin); + dpll_netdev_pin_set(vsi->netdev, pf->dplls.rclk.pin); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c index 928bf24d4b12..d74a5aaf4268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -261,7 +261,7 @@ static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll, { if (mdpll->tracking_netdev) return; - netdev_dpll_pin_set(netdev, mdpll->dpll_pin); + dpll_netdev_pin_set(netdev, mdpll->dpll_pin); mdpll->tracking_netdev = netdev; } @@ -269,7 +269,7 @@ static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll) { if (!mdpll->tracking_netdev) return; - netdev_dpll_pin_clear(mdpll->tracking_netdev); + dpll_netdev_pin_clear(mdpll->tracking_netdev); mdpll->tracking_netdev = NULL; } -- cgit From b7fb7729c94fb2d23c79ff44f7a2da089c92d81c Mon Sep 17 00:00:00 2001 From: "Tobias Jakobi (Compleo)" Date: Mon, 4 Mar 2024 16:41:35 +0100 Subject: net: dsa: microchip: fix register write order in ksz8_ind_write8() This bug was noticed while re-implementing parts of the kernel driver in userspace using spidev. The goal was to enable some of the errata workarounds that Microchip describes in their errata sheet [1]. Both the errata sheet and the regular datasheet of e.g. the KSZ8795 imply that you need to do this for indirect register accesses: - write a 16-bit value to a control register pair (this value consists of the indirect register table, and the offset inside the table) - either read or write an 8-bit value from the data storage register (indicated by REG_IND_BYTE in the kernel) The current implementation has the order swapped. It can be proven, by reading back some indirect register with known content (the EEE register modified in ksz8_handle_global_errata() is one of these), that this implementation does not work. Private discussion with Oleksij Rempel of Pengutronix has revealed that the workaround was apparantly never tested on actual hardware. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ProductDocuments/Errata/KSZ87xx-Errata-DS80000687C.pdf Signed-off-by: Tobias Jakobi (Compleo) Reviewed-by: Oleksij Rempel Fixes: 7b6e6235b664 ("net: dsa: microchip: ksz8795: handle eee specif erratum") Link: https://lore.kernel.org/r/20240304154135.161332-1-tobias.jakobi.compleo@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8795.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 61b71bcfe396..c3da97abce20 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); -- cgit From ba18deddd6d502da71fd6b6143c53042271b82bd Mon Sep 17 00:00:00 2001 From: Yongzhi Liu Date: Wed, 6 Mar 2024 18:57:14 +0800 Subject: net: pds_core: Fix possible double free in error handling path When auxiliary_device_add() returns error and then calls auxiliary_device_uninit(), Callback function pdsc_auxbus_dev_release calls kfree(padev) to free memory. We shouldn't call kfree(padev) again in the error handling path. Fix this by cleaning up the redundant kfree() and putting the error handling back to where the errors happened. Fixes: 4569cce43bc6 ("pds_core: add auxiliary_bus devices") Signed-off-by: Yongzhi Liu Reviewed-by: Wojciech Drewek Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240306105714.20597-1-hyperlyzcs@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/pds_core/auxbus.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index 11c23a7f3172..fd1a5149c003 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -160,23 +160,19 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf, if (err < 0) { dev_warn(cf->dev, "auxiliary_device_init of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out; + kfree(padev); + return ERR_PTR(err); } err = auxiliary_device_add(aux_dev); if (err) { dev_warn(cf->dev, "auxiliary_device_add of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out_uninit; + auxiliary_device_uninit(aux_dev); + return ERR_PTR(err); } return padev; - -err_out_uninit: - auxiliary_device_uninit(aux_dev); -err_out: - kfree(padev); - return ERR_PTR(err); } int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) -- cgit