diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_main.c')
| -rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 9443 |
1 files changed, 6956 insertions, 2487 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2db93d3f6d23..d8192aa23254 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1,38 +1,23 @@ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ - -#include <linux/etherdevice.h> -#include <linux/of_net.h> -#include <linux/pci.h> -#include <linux/bpf.h> +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2013 - 2021 Intel Corporation. */ + +#include <generated/utsrelease.h> +#include <linux/crash_dump.h> +#include <linux/net/intel/libie/pctype.h> +#include <linux/if_bridge.h> +#include <linux/if_macvlan.h> +#include <linux/module.h> +#include <net/pkt_cls.h> +#include <net/xdp_sock_drv.h> /* Local includes */ #include "i40e.h" +#include "i40e_devids.h" #include "i40e_diag.h" -#include <net/udp_tunnel.h> +#include "i40e_lan_hmc.h" +#include "i40e_virtchnl_pf.h" +#include "i40e_xsk.h" + /* All i40e tracepoints are defined by the include below, which * must be included exactly once across the whole kernel with * CREATE_TRACE_POINTS defined @@ -44,31 +29,31 @@ const char i40e_driver_name[] = "i40e"; static const char i40e_driver_string[] = "Intel(R) Ethernet Connection XL710 Network Driver"; -#define DRV_KERN "-k" - -#define DRV_VERSION_MAJOR 2 -#define DRV_VERSION_MINOR 1 -#define DRV_VERSION_BUILD 14 -#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ - __stringify(DRV_VERSION_MINOR) "." \ - __stringify(DRV_VERSION_BUILD) DRV_KERN -const char i40e_driver_version_str[] = DRV_VERSION; -static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation."; +static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation."; /* a bit of forward declarations */ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi); static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired); static int i40e_add_vsi(struct i40e_vsi *vsi); static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi); -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); -static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired); +static void i40e_prep_for_reset(struct i40e_pf *pf); +static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, + bool lock_acquired); static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); +static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf); +static int i40e_restore_interrupt_scheme(struct i40e_pf *pf); +static bool i40e_check_recovery_mode(struct i40e_pf *pf); +static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); +static int i40e_get_capabilities(struct i40e_pf *pf, + enum i40e_admin_queue_opc list_type); +static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf); /* i40e_pci_tbl - PCI Device ID Table * @@ -85,16 +70,23 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_A), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_BC), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722_A), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_XXV710_N3000), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0}, /* required last entry */ @@ -107,28 +99,63 @@ static int debug = -1; module_param(debug, uint, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)"); -MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); +MODULE_IMPORT_NS("LIBIE"); +MODULE_IMPORT_NS("LIBIE_ADMINQ"); +MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; +static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f, + struct net_device *netdev, int delta) +{ + struct netdev_hw_addr_list *ha_list; + struct netdev_hw_addr *ha; + + if (!f || !netdev) + return; + + if (is_unicast_ether_addr(f->macaddr) || is_link_local_ether_addr(f->macaddr)) + ha_list = &netdev->uc; + else + ha_list = &netdev->mc; + + netdev_hw_addr_list_for_each(ha, ha_list) { + if (ether_addr_equal(ha->addr, f->macaddr)) { + ha->refcount += delta; + if (ha->refcount <= 0) + ha->refcount = 1; + break; + } + } +} + /** - * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code + * i40e_hw_to_dev - get device pointer from the hardware structure + * @hw: pointer to the device HW structure + **/ +struct device *i40e_hw_to_dev(struct i40e_hw *hw) +{ + struct i40e_pf *pf = i40e_hw_to_pf(hw); + + return &pf->pdev->dev; +} + +/** + * i40e_allocate_dma_mem - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested * @alignment: what to align the allocation to **/ -int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem, - u64 size, u32 alignment) +int i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem, + u64 size, u32 alignment) { - struct i40e_pf *pf = (struct i40e_pf *)hw->back; + struct i40e_pf *pf = i40e_hw_to_pf(hw); mem->size = ALIGN(size, alignment); - mem->va = dma_zalloc_coherent(&pf->pdev->dev, mem->size, - &mem->pa, GFP_KERNEL); + mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa, + GFP_KERNEL); if (!mem->va) return -ENOMEM; @@ -136,13 +163,13 @@ int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem, } /** - * i40e_free_dma_mem_d - OS specific memory free for shared code + * i40e_free_dma_mem - OS specific memory free for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem) +int i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem) { - struct i40e_pf *pf = (struct i40e_pf *)hw->back; + struct i40e_pf *pf = i40e_hw_to_pf(hw); dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa); mem->va = NULL; @@ -153,13 +180,13 @@ int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem) } /** - * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code + * i40e_allocate_virt_mem - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested **/ -int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem, - u32 size) +int i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem, + u32 size) { mem->size = size; mem->va = kzalloc(size, GFP_KERNEL); @@ -171,11 +198,11 @@ int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem, } /** - * i40e_free_virt_mem_d - OS specific memory free for shared code + * i40e_free_virt_mem - OS specific memory free for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem) +int i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem) { /* it's ok to kfree a NULL pointer */ kfree(mem->va); @@ -193,10 +220,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem) * @id: an owner id to stick on the items assigned * * Returns the base item index of the lump, or negative for error - * - * The search_hint trick and lack of advanced fit-finding only work - * because we're highly likely to have all the same size lump requests. - * Linear search time and any fragmentation should be minimal. **/ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, u16 needed, u16 id) @@ -206,13 +229,26 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) { dev_info(&pf->pdev->dev, - "param err: pile=%p needed=%d id=0x%04x\n", - pile, needed, id); + "param err: pile=%s needed=%d id=0x%04x\n", + pile ? "<valid>" : "<null>", needed, id); return -EINVAL; } - /* start the linear search with an imperfect hint */ - i = pile->search_hint; + /* Allocate last queue in the pile for FDIR VSI queue + * so it doesn't fragment the qp_pile + */ + if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) { + if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) { + dev_err(&pf->pdev->dev, + "Cannot allocate queue %d for I40E_VSI_FDIR\n", + pile->num_entries - 1); + return -ENOMEM; + } + pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT; + return pile->num_entries - 1; + } + + i = 0; while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) { @@ -231,7 +267,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, for (j = 0; j < needed; j++) pile->list[i+j] = id | I40E_PILE_VALID_BIT; ret = i; - pile->search_hint = i + j; break; } @@ -254,7 +289,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) { int valid_id = (id | I40E_PILE_VALID_BIT); int count = 0; - int i; + u16 i; if (!pile || index >= pile->num_entries) return -EINVAL; @@ -266,24 +301,23 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) count++; } - if (count && index < pile->search_hint) - pile->search_hint = index; return count; } /** * i40e_find_vsi_from_id - searches for the vsi with the given id - * @pf - the pf structure to search for the vsi - * @id - id of the vsi it is searching for + * @pf: the pf structure to search for the vsi + * @id: id of the vsi it is searching for **/ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id) { + struct i40e_vsi *vsi; int i; - for (i = 0; i < pf->num_alloc_vsi; i++) - if (pf->vsi[i] && (pf->vsi[i]->id == id)) - return pf->vsi[i]; + i40e_pf_for_each_vsi(pf, i, vsi) + if (vsi->id == id) + return vsi; return NULL; } @@ -296,56 +330,39 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id) **/ void i40e_service_event_schedule(struct i40e_pf *pf) { - if (!test_bit(__I40E_DOWN, pf->state) && - !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + if ((!test_bit(__I40E_DOWN, pf->state) && + !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) || + test_bit(__I40E_RECOVERY_MODE, pf->state)) queue_work(i40e_wq, &pf->service_task); } /** * i40e_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure + * @txqueue: queue number timing out * * If any port has noticed a Tx timeout, it is likely that the whole * device is munged, not just the one netdev port, so go for the full * reset. **/ -static void i40e_tx_timeout(struct net_device *netdev) +static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_ring *tx_ring = NULL; - unsigned int i, hung_queue = 0; + unsigned int i; u32 head, val; pf->tx_timeout_count++; - /* find the stopped queue the same way the stack does */ - for (i = 0; i < netdev->num_tx_queues; i++) { - struct netdev_queue *q; - unsigned long trans_start; - - q = netdev_get_tx_queue(netdev, i); - trans_start = q->trans_start; - if (netif_xmit_stopped(q) && - time_after(jiffies, - (trans_start + netdev->watchdog_timeo))) { - hung_queue = i; - break; - } - } - - if (i == netdev->num_tx_queues) { - netdev_info(netdev, "tx_timeout: no netdev hung queue found\n"); - } else { - /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_queue_pairs; i++) { - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { - if (hung_queue == - vsi->tx_rings[i]->queue_index) { - tx_ring = vsi->tx_rings[i]; - break; - } + /* with txqueue index, find the tx_ring struct */ + for (i = 0; i < vsi->num_queue_pairs; i++) { + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { + if (txqueue == + vsi->tx_rings[i]->queue_index) { + tx_ring = vsi->tx_rings[i]; + break; } } } @@ -356,10 +373,14 @@ static void i40e_tx_timeout(struct net_device *netdev) (pf->tx_timeout_last_recovery + netdev->watchdog_timeo))) return; /* don't do any new action before the next timeout */ + /* don't kick off another recovery if one is already pending */ + if (test_and_set_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state)) + return; + if (tx_ring) { head = i40e_get_head(tx_ring); /* Read interrupt register */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) val = rd32(&pf->hw, I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx + tx_ring->vsi->base_vector - 1)); @@ -367,14 +388,14 @@ static void i40e_tx_timeout(struct net_device *netdev) val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n", - vsi->seid, hung_queue, tx_ring->next_to_clean, + vsi->seid, txqueue, tx_ring->next_to_clean, head, tx_ring->next_to_use, readl(tx_ring->tail), val); } pf->tx_timeout_last_recovery = jiffies; - netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n", - pf->tx_timeout_recovery_level, hung_queue); + netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n", + pf->tx_timeout_recovery_level, txqueue); switch (pf->tx_timeout_recovery_level) { case 1: @@ -387,7 +408,9 @@ static void i40e_tx_timeout(struct net_device *netdev) set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state); break; default: - netdev_err(netdev, "tx_timeout recovery unsuccessful\n"); + netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n"); + set_bit(__I40E_DOWN_REQUESTED, pf->state); + set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state); break; } @@ -419,10 +442,10 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; @@ -431,6 +454,7 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, /** * i40e_get_netdev_stats_struct - Get statistics for netdev interface * @netdev: network interface device structure + * @stats: data structure to store statistics * * Returns the address of the device statistics structure. * The statistics are actually updated from the service task. @@ -439,9 +463,9 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_ring *tx_ring, *rx_ring; struct i40e_vsi *vsi = np->vsi; struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi); + struct i40e_ring *ring; int i; if (test_bit(__I40E_VSI_DOWN, vsi->state)) @@ -455,24 +479,30 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, u64 bytes, packets; unsigned int start; - tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); - if (!tx_ring) + ring = READ_ONCE(vsi->tx_rings[i]); + if (!ring) continue; - i40e_get_netdev_stats_struct_tx(tx_ring, stats); + i40e_get_netdev_stats_struct_tx(ring, stats); - rx_ring = &tx_ring[1]; + if (i40e_enabled_xdp_vsi(vsi)) { + ring = READ_ONCE(vsi->xdp_rings[i]); + if (!ring) + continue; + i40e_get_netdev_stats_struct_tx(ring, stats); + } + ring = READ_ONCE(vsi->rx_rings[i]); + if (!ring) + continue; do { - start = u64_stats_fetch_begin_irq(&rx_ring->syncp); - packets = rx_ring->stats.packets; - bytes = rx_ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); + start = u64_stats_fetch_begin(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; - if (i40e_enabled_xdp_vsi(vsi)) - i40e_get_netdev_stats_struct_tx(&rx_ring[1], stats); } rcu_read_unlock(); @@ -482,6 +512,7 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, stats->tx_dropped = vsi_stats->tx_dropped; stats->rx_errors = vsi_stats->rx_errors; stats->rx_dropped = vsi_stats->rx_dropped; + stats->rx_missed_errors = vsi_stats->rx_missed_errors; stats->rx_crc_errors = vsi_stats->rx_crc_errors; stats->rx_length_errors = vsi_stats->rx_length_errors; } @@ -524,25 +555,65 @@ void i40e_vsi_reset_stats(struct i40e_vsi *vsi) **/ void i40e_pf_reset_stats(struct i40e_pf *pf) { + struct i40e_veb *veb; int i; memset(&pf->stats, 0, sizeof(pf->stats)); memset(&pf->stats_offsets, 0, sizeof(pf->stats_offsets)); pf->stat_offsets_loaded = false; - for (i = 0; i < I40E_MAX_VEB; i++) { - if (pf->veb[i]) { - memset(&pf->veb[i]->stats, 0, - sizeof(pf->veb[i]->stats)); - memset(&pf->veb[i]->stats_offsets, 0, - sizeof(pf->veb[i]->stats_offsets)); - pf->veb[i]->stat_offsets_loaded = false; - } + i40e_pf_for_each_veb(pf, i, veb) { + memset(&veb->stats, 0, sizeof(veb->stats)); + memset(&veb->stats_offsets, 0, sizeof(veb->stats_offsets)); + memset(&veb->tc_stats, 0, sizeof(veb->tc_stats)); + memset(&veb->tc_stats_offsets, 0, sizeof(veb->tc_stats_offsets)); + veb->stat_offsets_loaded = false; } pf->hw_csum_rx_error = 0; } /** + * i40e_compute_pci_to_hw_id - compute index form PCI function. + * @vsi: ptr to the VSI to read from. + * @hw: ptr to the hardware info. + **/ +static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw) +{ + int pf_count = i40e_get_pf_count(hw); + + if (vsi->type == I40E_VSI_SRIOV) + return (hw->port * BIT(7)) / pf_count + vsi->vf_id; + + return hw->port + BIT(7); +} + +/** + * i40e_stat_update64 - read and update a 64 bit stat from the chip. + * @hw: ptr to the hardware info. + * @hireg: the high 32 bit reg to read. + * @loreg: the low 32 bit reg to read. + * @offset_loaded: has the initial offset been loaded yet. + * @offset: ptr to current offset value. + * @stat: ptr to the stat. + * + * Since the device stats are not reset at PFReset, they will not + * be zeroed when the driver starts. We'll save the first values read + * and use them as offsets to be subtracted from the raw values in order + * to report stats that count from zero. + **/ +static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg, + bool offset_loaded, u64 *offset, u64 *stat) +{ + u64 new_data; + + new_data = rd64(hw, loreg); + + if (!offset_loaded || new_data < *offset) + *offset = new_data; + *stat = new_data - *offset; +} + +/** * i40e_stat_update48 - read and update a 48 bit stat from the chip * @hw: ptr to the hardware info * @hireg: the high 32 bit reg to read @@ -600,6 +671,44 @@ static void i40e_stat_update32(struct i40e_hw *hw, u32 reg, } /** + * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat + * @hw: ptr to the hardware info + * @reg: the hw reg to read and clear + * @stat: ptr to the stat + **/ +static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat) +{ + u32 new_data = rd32(hw, reg); + + wr32(hw, reg, 1); /* must write a nonzero value to clear register */ + *stat += new_data; +} + +/** + * i40e_stats_update_rx_discards - update rx_discards. + * @vsi: ptr to the VSI to be updated. + * @hw: ptr to the hardware info. + * @stat_idx: VSI's stat_counter_idx. + * @offset_loaded: ptr to the VSI's stat_offsets_loaded. + * @stat_offset: ptr to stat_offset to store first read of specific register. + * @stat: ptr to VSI's stat to be updated. + **/ +static void +i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw, + int stat_idx, bool offset_loaded, + struct i40e_eth_stats *stat_offset, + struct i40e_eth_stats *stat) +{ + i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded, + &stat_offset->rx_discards, &stat->rx_discards); + i40e_stat_update64(hw, + I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)), + I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)), + offset_loaded, &stat_offset->rx_discards_other, + &stat->rx_discards_other); +} + +/** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated **/ @@ -618,15 +727,9 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi) i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); - i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), - vsi->stat_offsets_loaded, - &oes->rx_discards, &es->rx_discards); i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unknown_protocol, &es->rx_unknown_protocol); - i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx), - vsi->stat_offsets_loaded, - &oes->tx_errors, &es->tx_errors); i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), @@ -661,6 +764,10 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi) I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); + + i40e_stats_update_rx_discards(vsi, hw, stat_idx, + vsi->stat_offsets_loaded, oes, es); + vsi->stat_offsets_loaded = true; } @@ -668,7 +775,7 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi) * i40e_update_veb_stats - Update Switch component statistics * @veb: the VEB being updated **/ -static void i40e_update_veb_stats(struct i40e_veb *veb) +void i40e_update_veb_stats(struct i40e_veb *veb) { struct i40e_pf *pf = veb->pf; struct i40e_hw *hw = &pf->hw; @@ -755,18 +862,19 @@ static void i40e_update_veb_stats(struct i40e_veb *veb) **/ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) { + u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy; struct i40e_pf *pf = vsi->back; struct rtnl_link_stats64 *ons; struct rtnl_link_stats64 *ns; /* netdev stats */ struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ - u32 tx_restart, tx_busy; + u64 tx_restart, tx_busy; struct i40e_ring *p; - u32 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; u64 tx_force_wb; + u64 tx_stopped; u64 rx_p, rx_b; u64 tx_p, tx_b; u16 q; @@ -786,44 +894,83 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_b = rx_p = 0; tx_b = tx_p = 0; tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; + tx_stopped = 0; rx_page = 0; rx_buf = 0; + rx_reuse = 0; + rx_alloc = 0; + rx_waive = 0; + rx_busy = 0; rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ - p = ACCESS_ONCE(vsi->tx_rings[q]); + p = READ_ONCE(vsi->tx_rings[q]); + if (!p) + continue; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; + tx_stopped += p->tx_stats.tx_stopped; + + /* locate Rx ring */ + p = READ_ONCE(vsi->rx_rings[q]); + if (!p) + continue; - /* Rx queue is part of the same block as Tx queue */ - p = &p[1]; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); rx_b += bytes; rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; rx_page += p->rx_stats.alloc_page_failed; + rx_reuse += p->rx_stats.page_reuse_count; + rx_alloc += p->rx_stats.page_alloc_count; + rx_waive += p->rx_stats.page_waive_count; + rx_busy += p->rx_stats.page_busy_count; + + if (i40e_enabled_xdp_vsi(vsi)) { + /* locate XDP ring */ + p = READ_ONCE(vsi->xdp_rings[q]); + if (!p) + continue; + + do { + start = u64_stats_fetch_begin(&p->syncp); + packets = p->stats.packets; + bytes = p->stats.bytes; + } while (u64_stats_fetch_retry(&p->syncp, start)); + tx_b += bytes; + tx_p += packets; + tx_restart += p->tx_stats.restart_queue; + tx_busy += p->tx_stats.tx_busy; + tx_linearize += p->tx_stats.tx_linearize; + tx_force_wb += p->tx_stats.tx_force_wb; + } } rcu_read_unlock(); vsi->tx_restart = tx_restart; vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; vsi->tx_force_wb = tx_force_wb; + vsi->tx_stopped = tx_stopped; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; + vsi->rx_page_reuse = rx_reuse; + vsi->rx_page_alloc = rx_alloc; + vsi->rx_page_waive = rx_waive; + vsi->rx_page_busy = rx_busy; ns->rx_packets = rx_p; ns->rx_bytes = rx_b; @@ -836,13 +983,15 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) ns->tx_errors = es->tx_errors; ons->multicast = oes->rx_multicast; ns->multicast = es->rx_multicast; - ons->rx_dropped = oes->rx_discards; - ns->rx_dropped = es->rx_discards; + ons->rx_dropped = oes->rx_discards_other; + ns->rx_dropped = es->rx_discards_other; + ons->rx_missed_errors = oes->rx_discards; + ns->rx_missed_errors = es->rx_discards; ons->tx_dropped = oes->tx_discards; ns->tx_dropped = es->tx_discards; /* pull in a couple PF stats if this is the main vsi */ - if (vsi == pf->vsi[pf->lan_vsi]) { + if (vsi->type == I40E_VSI_MAIN) { ns->rx_crc_errors = pf->stats.crc_errors; ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes; ns->rx_length_errors = pf->stats.rx_length_errors; @@ -1040,26 +1189,21 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) &osd->rx_jabber, &nsd->rx_jabber); /* FDIR stats */ - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_match, &nsd->fd_atr_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_sb_match, &nsd->fd_sb_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)), + &nsd->fd_sb_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_tunnel_match); val = rd32(hw, I40E_PRTPM_EEE_STAT); nsd->tx_lpi_status = - (val & I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK) >> - I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT; + FIELD_GET(I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK, val); nsd->rx_lpi_status = - (val & I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK) >> - I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT; + FIELD_GET(I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK, val); i40e_stat_update32(hw, I40E_PRTPM_TLPIC, pf->stat_offsets_loaded, &osd->tx_lpi_count, &nsd->tx_lpi_count); @@ -1067,14 +1211,14 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) pf->stat_offsets_loaded, &osd->rx_lpi_count, &nsd->rx_lpi_count); - if (pf->flags & I40E_FLAG_FD_SB_ENABLED && - !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)) + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && + !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) nsd->fd_sb_status = true; else nsd->fd_sb_status = false; - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED && - !(pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED)) + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && + !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) nsd->fd_atr_status = true; else nsd->fd_atr_status = false; @@ -1092,13 +1236,54 @@ void i40e_update_stats(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - if (vsi == pf->vsi[pf->lan_vsi]) + if (vsi->type == I40E_VSI_MAIN) i40e_update_pf_stats(pf); i40e_update_vsi_stats(vsi); } /** + * i40e_count_all_filters - counts VSI MAC filters + * @vsi: the VSI to be searched + * + * Return: count of MAC filters in any state. + */ +int i40e_count_all_filters(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct hlist_node *h; + int bkt, cnt = 0; + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) + cnt++; + + return cnt; +} + +/** + * i40e_count_active_filters - counts VSI MAC filters + * @vsi: the VSI to be searched + * + * Return: count of active MAC filters. + */ +int i40e_count_active_filters(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct hlist_node *h; + int bkt; + int cnt = 0; + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC || + f->state == I40E_FILTER_ACTIVE) + ++cnt; + } + + return cnt; +} + +/** * i40e_find_filter - Search VSI filter list for specific mac/vlan filter * @vsi: the VSI to be searched * @macaddr: the MAC address @@ -1277,6 +1462,8 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, new->f = add_head; new->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; /* Add the new filter to the tmp list */ hlist_add_head(&new->hlist, tmp_add_list); @@ -1294,6 +1481,116 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, } /** + * i40e_get_vf_new_vlan - Get new vlan id on a vf + * @vsi: the vsi to configure + * @new_mac: new mac filter to be added + * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Get new VLAN id based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * Returns the value of the new vlan filter or + * the old value if no new filter is needed. + */ +static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi, + struct i40e_new_mac_filter *new_mac, + struct i40e_mac_filter *f, + int vlan_filters, + bool trusted) +{ + s16 pvid = le16_to_cpu(vsi->info.pvid); + struct i40e_pf *pf = vsi->back; + bool is_any; + + if (new_mac) + f = new_mac->f; + + if (pvid && f->vlan != pvid) + return pvid; + + is_any = (trusted || + !test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, pf->flags)); + + if ((vlan_filters && f->vlan == I40E_VLAN_ANY) || + (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) || + (is_any && !vlan_filters && f->vlan == 0)) { + if (is_any) + return I40E_VLAN_ANY; + else + return 0; + } + + return f->vlan; +} + +/** + * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary + * @vsi: the vsi to configure + * @tmp_add_list: list of filters ready to be added + * @tmp_del_list: list of filters ready to be deleted + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Correct VF VLAN filters based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. + * + * This function is only expected to be called from within + * i40e_sync_vsi_filters. + * + * NOTE: This function expects to be called while under the + * mac_filter_hash_lock + */ +static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, + struct hlist_head *tmp_add_list, + struct hlist_head *tmp_del_list, + int vlan_filters, + bool trusted) +{ + struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new_mac; + struct hlist_node *h; + int bkt, new_vlan; + + hlist_for_each_entry(new_mac, tmp_add_list, hlist) { + new_mac->f->vlan = i40e_get_vf_new_vlan(vsi, new_mac, NULL, + vlan_filters, trusted); + } + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + new_vlan = i40e_get_vf_new_vlan(vsi, NULL, f, vlan_filters, + trusted); + if (new_vlan != f->vlan) { + add_head = i40e_add_filter(vsi, f->macaddr, new_vlan); + if (!add_head) + return -ENOMEM; + /* Create a temporary i40e_new_mac_filter */ + new_mac = kzalloc(sizeof(*new_mac), GFP_ATOMIC); + if (!new_mac) + return -ENOMEM; + new_mac->f = add_head; + new_mac->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new_mac->hlist, tmp_add_list); + + /* Put the original filter into the delete list */ + f->state = I40E_FILTER_REMOVE; + hash_del(&f->hlist); + hlist_add_head(&f->hlist, tmp_del_list); + } + } + + vsi->has_vlan_filter = !!vlan_filters; + return 0; +} + +/** * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM * @vsi: the PF Main VSI - inappropriate for any other VSI * @macaddr: the MAC address @@ -1360,21 +1657,14 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, ether_addr_copy(f->macaddr, macaddr); f->vlan = vlan; - /* If we're in overflow promisc mode, set the state directly - * to failed, so we don't bother to try sending the filter - * to the hardware. - */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state)) - f->state = I40E_FILTER_FAILED; - else - f->state = I40E_FILTER_NEW; + f->state = I40E_FILTER_NEW; INIT_HLIST_NODE(&f->hlist); key = i40e_addr_to_hkey(macaddr); hash_add(vsi->mac_filter_hash, &f->hlist, key); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - vsi->back->flags |= I40E_FLAG_FILTER_SYNC; + set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } /* If we're asked to add a filter that has been marked for removal, it @@ -1396,9 +1686,8 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, * @vsi: VSI to remove from * @f: the filter to remove from the list * - * This function should be called instead of i40e_del_filter only if you know - * the exact filter you will remove already, such as via i40e_find_filter or - * i40e_find_mac. + * This function requires you've found * the exact filter you will remove + * already, such as via i40e_find_filter or i40e_find_mac. * * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. @@ -1424,30 +1713,7 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f) } vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - vsi->back->flags |= I40E_FLAG_FILTER_SYNC; -} - -/** - * i40e_del_filter - Remove a MAC/VLAN filter from the VSI - * @vsi: the VSI to be searched - * @macaddr: the MAC address - * @vlan: the VLAN - * - * NOTE: This function is expected to be called with mac_filter_hash_lock - * being held. - * ANOTHER NOTE: This function MUST be called from within the context of - * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() - * instead of list_for_each_entry(). - **/ -void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan) -{ - struct i40e_mac_filter *f; - - if (!vsi || !macaddr) - return; - - f = i40e_find_filter(vsi, macaddr, vlan); - __i40e_del_filter(vsi, f); + set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } /** @@ -1469,6 +1735,7 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi, struct hlist_node *h; int bkt; + lockdep_assert_held(&vsi->mac_filter_hash_lock); if (vsi->info.pvid) return i40e_add_filter(vsi, macaddr, le16_to_cpu(vsi->info.pvid)); @@ -1504,8 +1771,7 @@ int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr) bool found = false; int bkt; - WARN(!spin_is_locked(&vsi->mac_filter_hash_lock), - "Missing mac_filter_hash_lock\n"); + lockdep_assert_held(&vsi->mac_filter_hash_lock); hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (ether_addr_equal(macaddr, f->macaddr)) { __i40e_del_filter(vsi, f); @@ -1537,14 +1803,8 @@ static int i40e_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) { - netdev_info(netdev, "already using mac address %pM\n", - addr->sa_data); - return 0; - } - - if (test_bit(__I40E_VSI_DOWN, vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state)) + if (test_bit(__I40E_DOWN, pf->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return -EADDRNOTAVAIL; if (ether_addr_equal(hw->mac.addr, addr->sa_data)) @@ -1553,27 +1813,199 @@ static int i40e_set_mac(struct net_device *netdev, void *p) else netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); + /* Copy the address first, so that we avoid a possible race with + * .set_rx_mode(). + * - Remove old address from MAC filter + * - Copy new address + * - Add new address to MAC filter + */ spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); - i40e_add_mac_filter(vsi, addr->sa_data); + eth_hw_addr_set(netdev, addr->sa_data); + i40e_add_mac_filter(vsi, netdev->dev_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); - ether_addr_copy(netdev->dev_addr, addr->sa_data); + if (vsi->type == I40E_VSI_MAIN) { - i40e_status ret; + int ret; - ret = i40e_aq_mac_address_write(&vsi->back->hw, - I40E_AQC_WRITE_TYPE_LAA_WOL, + ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL, addr->sa_data, NULL); if (ret) - netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n", - i40e_stat_str(hw, ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); + netdev_info(netdev, "Ignoring error from firmware on LAA update, status %pe, AQ ret %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); } /* schedule our worker thread which will take care of * applying the new filter changes */ - i40e_service_event_schedule(vsi->back); + i40e_service_event_schedule(pf); + return 0; +} + +/** + * i40e_config_rss_aq - Prepare for RSS using AQ commands + * @vsi: vsi structure + * @seed: RSS hash seed + * @lut: pointer to lookup table of lut_size + * @lut_size: size of the lookup table + **/ +static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret = 0; + + if (seed) { + struct i40e_aqc_get_set_rss_key_data *seed_dw = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS key, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); + return ret; + } + } + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN; + + ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); + return ret; + } + } + return ret; +} + +/** + * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used + * @vsi: VSI structure + **/ +static int i40e_vsi_config_rss(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + u8 seed[I40E_HKEY_ARRAY_SIZE]; + u8 *lut; + int ret; + + if (!test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) + return 0; + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use the user configured hash keys and lookup table if there is one, + * otherwise use default + */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + return ret; +} + +/** + * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config + * @vsi: the VSI being configured, + * @ctxt: VSI context structure + * @enabled_tc: number of traffic classes to enable + * + * Prepares VSI tc_config to have queue configurations based on MQPRIO options. + **/ +static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi, + struct i40e_vsi_context *ctxt, + u8 enabled_tc) +{ + u16 qcount = 0, max_qcount, qmap, sections = 0; + int i, override_q, pow, num_qps, ret; + u8 netdev_tc = 0, offset = 0; + + if (vsi->type != I40E_VSI_MAIN) + return -EINVAL; + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc; + vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; + num_qps = vsi->mqprio_qopt.qopt.count[0]; + + /* find the next higher power-of-2 of num queue pairs */ + pow = ilog2(num_qps); + if (!is_power_of_2(num_qps)) + pow++; + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup queue offset/count for all TCs for given VSI */ + max_qcount = vsi->mqprio_qopt.qopt.count[0]; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + /* See if the given TC is enabled for the given VSI */ + if (vsi->tc_config.enabled_tc & BIT(i)) { + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount = vsi->mqprio_qopt.qopt.count[i]; + if (qcount > max_qcount) + max_qcount = qcount; + vsi->tc_config.tc_info[i].qoffset = offset; + vsi->tc_config.tc_info[i].qcount = qcount; + vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++; + } else { + /* TC is not enabled so set the offset to + * default queue and allocate one queue + * for the given TC. + */ + vsi->tc_config.tc_info[i].qoffset = 0; + vsi->tc_config.tc_info[i].qcount = 1; + vsi->tc_config.tc_info[i].netdev_tc = 0; + } + } + + /* Set actual Tx/Rx queue pairs */ + vsi->num_queue_pairs = offset + qcount; + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); + ctxt->info.valid_sections |= cpu_to_le16(sections); + + /* Reconfigure RSS for main VSI with max queue count */ + vsi->rss_size = max_qcount; + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed to reconfig rss for num_queues (%u)\n", + max_qcount); + return ret; + } + vsi->reconfig_rss = true; + dev_dbg(&vsi->back->pdev->dev, + "Reconfigured rss with num_queues (%u)\n", max_qcount); + + /* Find queue count available for channel VSIs and starting offset + * for channel VSIs + */ + override_q = vsi->mqprio_qopt.qopt.count[0]; + if (override_q && override_q < vsi->num_queue_pairs) { + vsi->cnt_q_avail = vsi->num_queue_pairs - override_q; + vsi->next_base_queue = override_q; + } return 0; } @@ -1592,21 +2024,48 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, bool is_add) { struct i40e_pf *pf = vsi->back; + u16 num_tc_qps = 0; u16 sections = 0; u8 netdev_tc = 0; - u16 numtc = 0; + u16 numtc = 1; u16 qcount; u8 offset; u16 qmap; int i; - u16 num_tc_qps = 0; sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; offset = 0; + /* zero out queue mapping, it will get updated on the end of the function */ + memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping)); - if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { + if (vsi->type == I40E_VSI_MAIN) { + /* This code helps add more queue to the VSI if we have + * more cores than RSS can support, the higher cores will + * be served by ATR or other filters. Furthermore, the + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes + * We need at least one queue pair for the interface + * to be usable as we see in else statement. + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) + vsi->num_queue_pairs = pf->num_lan_msix; + else + vsi->num_queue_pairs = 1; + } + + /* Number of queues per enabled TC */ + if (vsi->type == I40E_VSI_MAIN || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0)) + num_tc_qps = vsi->num_queue_pairs; + else + num_tc_qps = vsi->alloc_queue_pairs; + + if (enabled_tc && test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) { /* Find numtc from enabled TC bitmap */ - for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) /* TC is enabled */ numtc++; } @@ -1614,18 +2073,17 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, dev_warn(&pf->pdev->dev, "DCB is enabled but no TC enabled, forcing TC0\n"); numtc = 1; } - } else { - /* At least TC0 is enabled in case of non-DCB case */ - numtc = 1; + num_tc_qps = num_tc_qps / numtc; + num_tc_qps = min_t(int, num_tc_qps, + i40e_pf_get_max_q_per_tc(pf)); } vsi->tc_config.numtc = numtc; vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; - /* Number of queues per enabled TC */ - qcount = vsi->alloc_queue_pairs; - num_tc_qps = qcount / numtc; - num_tc_qps = min_t(int, num_tc_qps, i40e_pf_get_max_q_per_tc(pf)); + /* Do not allow use more TC queue pairs than MSI-X vectors exist */ + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) + num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix); /* Setup queue offset/count for all TCs for given VSI */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { @@ -1636,9 +2094,16 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, switch (vsi->type) { case I40E_VSI_MAIN: - qcount = min_t(int, pf->alloc_rss_size, - num_tc_qps); - break; + if ((!test_bit(I40E_FLAG_FD_SB_ENA, + pf->flags) && + !test_bit(I40E_FLAG_FD_ATR_ENA, + pf->flags)) || + vsi->tc_config.enabled_tc != 1) { + qcount = min_t(int, pf->alloc_rss_size, + num_tc_qps); + break; + } + fallthrough; case I40E_VSI_FDIR: case I40E_VSI_SRIOV: case I40E_VSI_VMDQ2: @@ -1677,15 +2142,11 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, } ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } - - /* Set actual Tx/Rx queue pairs */ - vsi->num_queue_pairs = offset; - if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) { - if (vsi->req_queue_pairs > 0) - vsi->num_queue_pairs = vsi->req_queue_pairs; - else if (pf->flags & I40E_FLAG_MSIX_ENABLED) - vsi->num_queue_pairs = pf->num_lan_msix; - } + /* Do not change previously set num_queue_pairs for PFs and VFs*/ + if ((vsi->type == I40E_VSI_MAIN && numtc != 1) || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) || + (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV)) + vsi->num_queue_pairs = offset; /* Scheduler section valid can only be set for ADD VSI */ if (is_add) { @@ -1739,6 +2200,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + i40e_del_mac_filter(vsi, addr); return 0; @@ -1763,13 +2232,8 @@ static void i40e_set_rx_mode(struct net_device *netdev) /* check for other flag changes */ if (vsi->current_netdev_flags != vsi->netdev->flags) { vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - vsi->back->flags |= I40E_FLAG_FILTER_SYNC; + set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } - - /* schedule our worker thread which will take care of - * applying the new filter changes - */ - i40e_service_event_schedule(vsi->back); } /** @@ -1812,12 +2276,13 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, hlist_for_each_entry_safe(new, h, from, hlist) { /* We can simply free the wrapper structure */ hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } } /** - * i40e_next_entry - Get the next non-broadcast filter from a list + * i40e_next_filter - Get the next non-broadcast filter from a list * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we @@ -1840,7 +2305,7 @@ struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next) * from firmware * @count: Number of filters added * @add_list: return data from fw - * @head: pointer to first filter in current batch + * @add_head: pointer to first filter in current batch * * MAC filter entries from list were slated to be added to device. Returns * number of successful filters. Note that 0 does NOT mean success! @@ -1894,19 +2359,18 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, int num_del, int *retval) { struct i40e_hw *hw = &vsi->back->hw; - i40e_status aq_ret; - int aq_err; + enum libie_aq_err aq_status; + int aq_ret; - aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL); - aq_err = hw->aq.asq_last_status; + aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL, + &aq_status); /* Explicitly ignore and do not report when firmware returns ENOENT */ - if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) { + if (aq_ret && !(aq_status == LIBIE_AQ_RC_ENOENT)) { *retval = -EIO; dev_info(&vsi->back->pdev->dev, - "ignoring delete macvlan error on %s, err %s, aq_err %s\n", - vsi_name, i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, aq_err)); + "ignoring delete macvlan error on %s, err %pe, aq_err %s\n", + vsi_name, ERR_PTR(aq_ret), libie_aq_str(aq_status)); } } @@ -1917,38 +2381,48 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, * @list: the list of filters to send to firmware * @add_head: Position in the add hlist * @num_add: the number of filters to add - * @promisc_change: set to true on exit if promiscuous mode was forced on * * Send a request to firmware via AdminQ to add a chunk of filters. Will set - * promisc_changed to true if the firmware has run out of space for more - * filters. + * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of + * space for more filters. */ static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, struct i40e_new_mac_filter *add_head, - int num_add, bool *promisc_changed) + int num_add) { struct i40e_hw *hw = &vsi->back->hw; - int aq_err, fcnt; + enum libie_aq_err aq_status; + int fcnt; - i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL); - aq_err = hw->aq.asq_last_status; + i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status); fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { - *promisc_changed = true; - set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); - dev_warn(&vsi->back->pdev->dev, - "Error %s adding RX filters on %s, promiscuous mode forced on\n", - i40e_aq_str(hw, aq_err), - vsi_name); + if (vsi->type == I40E_VSI_MAIN) { + set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + dev_warn(&vsi->back->pdev->dev, + "Error %s adding RX filters on %s, promiscuous mode forced on\n", + libie_aq_str(aq_status), vsi_name); + } else if (vsi->type == I40E_VSI_SRIOV || + vsi->type == I40E_VSI_VMDQ1 || + vsi->type == I40E_VSI_VMDQ2) { + dev_warn(&vsi->back->pdev->dev, + "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n", + libie_aq_str(aq_status), vsi_name, vsi_name); + } else { + dev_warn(&vsi->back->pdev->dev, + "Error %s adding RX filters on %s, incorrect VSI type: %i.\n", + libie_aq_str(aq_status), vsi_name, vsi->type); + } } } /** * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags * @vsi: pointer to the VSI + * @vsi_name: the VSI name * @f: filter data * * This function sets or clears the promiscuous broadcast flags for VLAN @@ -1957,13 +2431,14 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, * * Returns status indicating success or failure; **/ -static i40e_status +static int i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_mac_filter *f) { - bool enable = f->state == I40E_FILTER_NEW; + bool enable = f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC; struct i40e_hw *hw = &vsi->back->hw; - i40e_status aq_ret; + int aq_ret; if (f->vlan == I40E_VLAN_ANY) { aq_ret = i40e_aq_set_vsi_broadcast(hw, @@ -1978,11 +2453,79 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) + if (aq_ret) { + set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, - "Error %s setting broadcast promiscuous mode on %s\n", - i40e_aq_str(hw, hw->aq.asq_last_status), - vsi_name); + "Error %s, forcing overflow promiscuous on %s\n", + libie_aq_str(hw->aq.asq_last_status), vsi_name); + } + + return aq_ret; +} + +/** + * i40e_set_promiscuous - set promiscuous mode + * @pf: board private structure + * @promisc: promisc on or off + * + * There are different ways of setting promiscuous mode on a PF depending on + * what state/environment we're in. This identifies and sets it appropriately. + * Returns 0 on success. + **/ +static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc) +{ + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + struct i40e_hw *hw = &pf->hw; + int aq_ret; + + if (vsi->type == I40E_VSI_MAIN && + i40e_pf_get_main_veb(pf) && + !test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { + /* set defport ON for Main VSI instead of true promisc + * this way we will get all unicast/multicast and VLAN + * promisc behavior but will not get VF or VMDq traffic + * replicated on the Main VSI. + */ + if (promisc) + aq_ret = i40e_aq_set_default_vsi(hw, + vsi->seid, + NULL); + else + aq_ret = i40e_aq_clear_default_vsi(hw, + vsi->seid, + NULL); + if (aq_ret) { + dev_info(&pf->pdev->dev, + "Set default VSI failed, err %pe, aq_err %s\n", + ERR_PTR(aq_ret), + libie_aq_str(hw->aq.asq_last_status)); + } + } else { + aq_ret = i40e_aq_set_vsi_unicast_promiscuous( + hw, + vsi->seid, + promisc, NULL, + true); + if (aq_ret) { + dev_info(&pf->pdev->dev, + "set unicast promisc failed, err %pe, aq_err %s\n", + ERR_PTR(aq_ret), + libie_aq_str(hw->aq.asq_last_status)); + } + aq_ret = i40e_aq_set_vsi_multicast_promiscuous( + hw, + vsi->seid, + promisc, NULL); + if (aq_ret) { + dev_info(&pf->pdev->dev, + "set multicast promisc failed, err %pe, aq_err %s\n", + ERR_PTR(aq_ret), + libie_aq_str(hw->aq.asq_last_status)); + } + } + + if (!aq_ret) + pf->cur_promisc = promisc; return aq_ret; } @@ -2001,17 +2544,17 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) struct i40e_mac_filter *f; struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; + bool old_overflow, new_overflow; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; - bool promisc_changed = false; char vsi_name[16] = "PF"; int filter_list_len = 0; - i40e_status aq_ret = 0; u32 changed_flags = 0; struct hlist_node *h; struct i40e_pf *pf; int num_add = 0; int num_del = 0; + int aq_ret = 0; int retval = 0; u16 cmd_flags; int list_size; @@ -2025,6 +2568,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) usleep_range(1000, 2000); pf = vsi->back; + old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + if (vsi->netdev) { changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; vsi->current_netdev_flags = vsi->netdev->flags; @@ -2064,6 +2609,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* Add it to the hash list */ hlist_add_head(&new->hlist, &tmp_add_list); + f->state = I40E_FILTER_NEW_SYNC; } /* Count the number of active (current and new) VLAN @@ -2074,10 +2620,18 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) vlan_filters++; } - retval = i40e_correct_mac_vlan_filters(vsi, - &tmp_add_list, - &tmp_del_list, - vlan_filters); + if (vsi->type != I40E_VSI_SRIOV) + retval = i40e_correct_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters); + else if (pf->vf) + retval = i40e_correct_vf_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters, pf->vf[vsi->vf_id].trusted); + + hlist_for_each_entry(new, &tmp_add_list, hlist) + netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); + if (retval) goto err_no_memory_locked; @@ -2157,12 +2711,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) num_add = 0; hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)) { - new->state = I40E_FILTER_FAILED; - continue; - } - /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ @@ -2198,15 +2746,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_add == filter_list_len) { i40e_aqc_add_filters(vsi, vsi_name, add_list, - add_head, num_add, - &promisc_changed); + add_head, num_add); memset(add_list, 0, list_size); num_add = 0; } } if (num_add) { i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, - num_add, &promisc_changed); + num_add); } /* Now move all of the filters from the temp add list back to * the VSI's list. @@ -2214,9 +2761,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) spin_lock_bh(&vsi->mac_filter_hash_lock); hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* Only update the state if we're still NEW */ - if (new->f->state == I40E_FILTER_NEW) + if (new->f->state == I40E_FILTER_NEW || + new->f->state == I40E_FILTER_NEW_SYNC) new->f->state = new->state; hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -2235,33 +2784,34 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } spin_unlock_bh(&vsi->mac_filter_hash_lock); - /* If promiscuous mode has changed, we need to calculate a new - * threshold for when we are safe to exit - */ - if (promisc_changed) - vsi->promisc_threshold = (vsi->active_filters * 3) / 4; - /* Check if we are able to exit overflow promiscuous mode. We can * safely exit if we didn't just enter, we no longer have any failed * filters, and we have reduced filters below the threshold value. */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) && - !promisc_changed && !failed_filters && - (vsi->active_filters < vsi->promisc_threshold)) { + if (old_overflow && !failed_filters && + vsi->active_filters < vsi->promisc_threshold) { dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n", vsi_name); clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); - promisc_changed = true; vsi->promisc_threshold = 0; } /* if the VF is not trusted do not do promisc */ - if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) { + if (vsi->type == I40E_VSI_SRIOV && pf->vf && + !pf->vf[vsi->vf_id].trusted) { clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); goto out; } + new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + + /* If we are entering overflow promiscuous, we need to calculate a new + * threshold for when we are safe to exit + */ + if (!old_overflow && new_overflow) + vsi->promisc_threshold = (vsi->active_filters * 3) / 4; + /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc; @@ -2275,94 +2825,31 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) retval = i40e_aq_rc_to_posix(aq_ret, hw->aq.asq_last_status); dev_info(&pf->pdev->dev, - "set multi promisc failed on %s, err %s aq_err %s\n", + "set multi promisc failed on %s, err %pe aq_err %s\n", vsi_name, - i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); + ERR_PTR(aq_ret), + libie_aq_str(hw->aq.asq_last_status)); + } else { + dev_info(&pf->pdev->dev, "%s allmulti mode.\n", + cur_multipromisc ? "entering" : "leaving"); } } - if ((changed_flags & IFF_PROMISC) || promisc_changed) { + if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || - test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)); - if ((vsi->type == I40E_VSI_MAIN) && - (pf->lan_veb != I40E_NO_VEB) && - !(pf->flags & I40E_FLAG_MFP_ENABLED)) { - /* set defport ON for Main VSI instead of true promisc - * this way we will get all unicast/multicast and VLAN - * promisc behavior but will not get VF or VMDq traffic - * replicated on the Main VSI. - */ - if (pf->cur_promisc != cur_promisc) { - pf->cur_promisc = cur_promisc; - if (cur_promisc) - aq_ret = - i40e_aq_set_default_vsi(hw, - vsi->seid, - NULL); - else - aq_ret = - i40e_aq_clear_default_vsi(hw, - vsi->seid, - NULL); - if (aq_ret) { - retval = i40e_aq_rc_to_posix(aq_ret, - hw->aq.asq_last_status); - dev_info(&pf->pdev->dev, - "Set default VSI failed on %s, err %s, aq_err %s\n", - vsi_name, - i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, - hw->aq.asq_last_status)); - } - } - } else { - aq_ret = i40e_aq_set_vsi_unicast_promiscuous( - hw, - vsi->seid, - cur_promisc, NULL, - true); - if (aq_ret) { - retval = - i40e_aq_rc_to_posix(aq_ret, - hw->aq.asq_last_status); - dev_info(&pf->pdev->dev, - "set unicast promisc failed on %s, err %s, aq_err %s\n", - vsi_name, - i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, - hw->aq.asq_last_status)); - } - aq_ret = i40e_aq_set_vsi_multicast_promiscuous( - hw, - vsi->seid, - cur_promisc, NULL); - if (aq_ret) { - retval = - i40e_aq_rc_to_posix(aq_ret, - hw->aq.asq_last_status); - dev_info(&pf->pdev->dev, - "set multicast promisc failed on %s, err %s, aq_err %s\n", - vsi_name, - i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, - hw->aq.asq_last_status)); - } - } - aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, - vsi->seid, - cur_promisc, NULL); + new_overflow); + aq_ret = i40e_set_promiscuous(pf, cur_promisc); if (aq_ret) { retval = i40e_aq_rc_to_posix(aq_ret, - pf->hw.aq.asq_last_status); + hw->aq.asq_last_status); dev_info(&pf->pdev->dev, - "set brdcast promisc failed, err %s, aq_err %s\n", - i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, - hw->aq.asq_last_status)); + "Setting promiscuous %s failed on %s, err %pe aq_err %s\n", + cur_promisc ? "on" : "off", + vsi_name, + ERR_PTR(aq_ret), + libie_aq_str(hw->aq.asq_last_status)); } } out: @@ -2392,20 +2879,27 @@ err_no_memory_locked: **/ static void i40e_sync_filters_subtask(struct i40e_pf *pf) { + struct i40e_vsi *vsi; int v; - if (!pf || !(pf->flags & I40E_FLAG_FILTER_SYNC)) + if (!pf) + return; + if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state)) return; - pf->flags &= ~I40E_FLAG_FILTER_SYNC; + if (test_bit(__I40E_VF_DISABLE, pf->state)) { + set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); + return; + } - for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v] && - (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) { - int ret = i40e_sync_vsi_filters(pf->vsi[v]); + i40e_pf_for_each_vsi(pf, v, vsi) { + if ((vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) && + !test_bit(__I40E_VSI_RELEASING, vsi->state)) { + int ret = i40e_sync_vsi_filters(vsi); if (ret) { /* come back and try again later */ - pf->flags |= I40E_FLAG_FILTER_SYNC; + set_bit(__I40E_MACVLAN_SYNC_PENDING, + pf->state); break; } } @@ -2413,15 +2907,35 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) } /** - * i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * i40e_calculate_vsi_rx_buf_len - Calculates buffer length + * + * @vsi: VSI to calculate rx_buf_len from + */ +static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) +{ + if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) + return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048); + + return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; +} + +/** + * i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI * @vsi: the vsi + * @xdp_prog: XDP program **/ -static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi) +static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi, + struct bpf_prog *xdp_prog) { - if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) - return I40E_RXBUFFER_2048; + u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); + u16 chain_len; + + if (xdp_prog && !xdp_prog->aux->xdp_has_frags) + chain_len = 1; else - return I40E_RXBUFFER_3072; + chain_len = I40E_MAX_CHAINED_RX_BUFFERS; + + return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER); } /** @@ -2436,53 +2950,37 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + int frame_size; - if (i40e_enabled_xdp_vsi(vsi)) { - int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - - if (frame_size > i40e_max_xdp_frame_size(vsi)) - return -EINVAL; + frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog); + if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) { + netdev_err(netdev, "Error changing mtu to %d, Max is %d\n", + new_mtu, frame_size - I40E_PACKET_HDR_PAD); + return -EINVAL; } - netdev_info(netdev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) i40e_vsi_reinit_locked(vsi); - pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED | - I40E_FLAG_CLIENT_L2_CHANGE); + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); + set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); return 0; } /** - * i40e_ioctl - Access the hwtstamp interface - * @netdev: network interface device structure - * @ifr: interface request data - * @cmd: ioctl command - **/ -int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_pf *pf = np->vsi->back; - - switch (cmd) { - case SIOCGHWTSTAMP: - return i40e_ptp_get_ts_config(pf, ifr); - case SIOCSHWTSTAMP: - return i40e_ptp_set_ts_config(pf, ifr); - default: - return -EOPNOTSUPP; - } -} - -/** * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI * @vsi: the vsi being adjusted **/ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi) { struct i40e_vsi_context ctxt; - i40e_status ret; + int ret; + + /* Don't modify stripping options if a port VLAN is active */ + if (vsi->info.pvid) + return; if ((vsi->info.valid_sections & cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) && @@ -2498,10 +2996,9 @@ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi) ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, - "update vlan stripping failed, err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); + "update vlan stripping failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(vsi->back->hw.aq.asq_last_status)); } } @@ -2512,7 +3009,11 @@ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi) void i40e_vlan_stripping_disable(struct i40e_vsi *vsi) { struct i40e_vsi_context ctxt; - i40e_status ret; + int ret; + + /* Don't modify stripping options if a port VLAN is active */ + if (vsi->info.pvid) + return; if ((vsi->info.valid_sections & cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) && @@ -2529,30 +3030,13 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi) ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, - "update vlan stripping failed, err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); + "update vlan stripping failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(vsi->back->hw.aq.asq_last_status)); } } /** - * i40e_vlan_rx_register - Setup or shutdown vlan offload - * @netdev: network interface to be adjusted - * @features: netdev features to test if VLAN offload is enabled or not - **/ -static void i40e_vlan_rx_register(struct net_device *netdev, u32 features) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - - if (features & NETIF_F_HW_VLAN_CTAG_RX) - i40e_vlan_stripping_enable(vsi); - else - i40e_vlan_stripping_disable(vsi); -} - -/** * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address * @vsi: the vsi being configured * @vid: vlan id to be added (0 = untagged only , -1 = any) @@ -2572,8 +3056,21 @@ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) int bkt; hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { - if (f->state == I40E_FILTER_REMOVE) + /* If we're asked to add a filter that has been marked for + * removal, it is safe to simply restore it to active state. + * __i40e_del_filter will have simply deleted any filters which + * were previously marked NEW or FAILED, so if it is currently + * marked REMOVE it must have previously been ACTIVE. Since we + * haven't yet run the sync filters task, just restore this + * filter to the ACTIVE state so that the sync task leaves it + * in place. + */ + if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) { + f->state = I40E_FILTER_ACTIVE; + continue; + } else if (f->state == I40E_FILTER_REMOVE) { continue; + } add_f = i40e_add_filter(vsi, f->macaddr, vid); if (!add_f) { dev_info(&vsi->back->pdev->dev, @@ -2595,9 +3092,20 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid) { int err; - if (!vid || vsi->info.pvid) + if (vsi->info.pvid) return -EINVAL; + /* The network stack will attempt to add VID=0, with the intention to + * receive priority tagged packets with a VLAN of 0. Our HW receives + * these packets by default when configured to receive untagged + * packets, so we don't need to add a filter for this case. + * Additionally, HW interprets adding a VID=0 filter as meaning to + * receive *only* tagged traffic and stops receiving untagged traffic. + * Thus, we do not want to actually add a filter for VID=0 + */ + if (!vid) + return 0; + /* Locked once because all functions invoked below iterates list*/ spin_lock_bh(&vsi->mac_filter_hash_lock); err = i40e_add_vlan_all_mac(vsi, vid); @@ -2660,6 +3168,7 @@ void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid) /** * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload * @netdev: network interface to be adjusted + * @proto: unused protocol value * @vid: vlan id to be added * * net_device_ops implementation for adding vlan ids @@ -2674,15 +3183,7 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev, if (vid >= VLAN_N_VID) return -EINVAL; - /* If the network stack called us with vid = 0 then - * it is asking to receive priority tagged packets with - * vlan id 0. Our HW receives them by default when configured - * to receive untagged packets so there is no need to add an - * extra filter for vlan 0 tagged packets. - */ - if (vid) - ret = i40e_vsi_add_vlan(vsi, vid); - + ret = i40e_vsi_add_vlan(vsi, vid); if (!ret) set_bit(vid, vsi->active_vlans); @@ -2690,8 +3191,26 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev, } /** + * i40e_vlan_rx_add_vid_up - Add a vlan id filter to HW offload in UP path + * @netdev: network interface to be adjusted + * @proto: unused protocol value + * @vid: vlan id to be added + **/ +static void i40e_vlan_rx_add_vid_up(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + + if (vid >= VLAN_N_VID) + return; + set_bit(vid, vsi->active_vlans); +} + +/** * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload * @netdev: network interface to be adjusted + * @proto: unused protocol value * @vid: vlan id to be removed * * net_device_ops implementation for removing vlan ids @@ -2714,44 +3233,6 @@ static int i40e_vlan_rx_kill_vid(struct net_device *netdev, } /** - * i40e_macaddr_init - explicitly write the mac address filters - * - * @vsi: pointer to the vsi - * @macaddr: the MAC address - * - * This is needed when the macaddr has been obtained by other - * means than the default, e.g., from Open Firmware or IDPROM. - * Returns 0 on success, negative on failure - **/ -static int i40e_macaddr_init(struct i40e_vsi *vsi, u8 *macaddr) -{ - int ret; - struct i40e_aqc_add_macvlan_element_data element; - - ret = i40e_aq_mac_address_write(&vsi->back->hw, - I40E_AQC_WRITE_TYPE_LAA_WOL, - macaddr, NULL); - if (ret) { - dev_info(&vsi->back->pdev->dev, - "Addr change for VSI failed: %d\n", ret); - return -EADDRNOTAVAIL; - } - - memset(&element, 0, sizeof(element)); - ether_addr_copy(element.mac_addr, macaddr); - element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH); - ret = i40e_aq_add_macvlan(&vsi->back->hw, vsi->seid, &element, 1, NULL); - if (ret) { - dev_info(&vsi->back->pdev->dev, - "add filter failed err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); - } - return ret; -} - -/** * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up * @vsi: the vsi being brought back up **/ @@ -2762,11 +3243,14 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi) if (!vsi->netdev) return; - i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features); + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + i40e_vlan_stripping_enable(vsi); + else + i40e_vlan_stripping_disable(vsi); for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) - i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), - vid); + i40e_vlan_rx_add_vid_up(vsi->netdev, htons(ETH_P_8021Q), + vid); } /** @@ -2777,7 +3261,7 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi) int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) { struct i40e_vsi_context ctxt; - i40e_status ret; + int ret; vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi->info.pvid = cpu_to_le16(vid); @@ -2790,10 +3274,9 @@ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, - "add pvid failed, err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); + "add pvid failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(vsi->back->hw.aq.asq_last_status)); return -ENOENT; } @@ -2808,9 +3291,9 @@ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) **/ void i40e_vsi_remove_pvid(struct i40e_vsi *vsi) { - i40e_vlan_stripping_disable(vsi); - vsi->info.pvid = 0; + + i40e_vlan_stripping_disable(vsi); } /** @@ -2908,29 +3391,38 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) **/ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) { - struct i40e_vsi *vsi = ring->vsi; - cpumask_var_t mask; + int cpu; - if (!ring->q_vector || !ring->netdev) + if (!ring->q_vector || !ring->netdev || ring->ch) return; - /* Single TC mode enable XPS */ - if (vsi->tc_config.numtc <= 1) { - if (!test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) - netif_set_xps_queue(ring->netdev, - &ring->q_vector->affinity_mask, - ring->queue_index); - } else if (alloc_cpumask_var(&mask, GFP_KERNEL)) { - /* Disable XPS to allow selection based on TC */ - bitmap_zero(cpumask_bits(mask), nr_cpumask_bits); - netif_set_xps_queue(ring->netdev, mask, ring->queue_index); - free_cpumask_var(mask); - } + /* We only initialize XPS once, so as not to overwrite user settings */ + if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state)) + return; - /* schedule our worker thread which will take care of - * applying the new filter changes - */ - i40e_service_event_schedule(vsi->back); + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), + ring->queue_index); +} + +/** + * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled + * @ring: The Tx or Rx ring + * + * Returns the AF_XDP buffer pool or NULL. + **/ +static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring) +{ + bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); + int qid = ring->queue_index; + + if (ring_is_xdp(ring)) + qid -= ring->vsi->alloc_queue_pairs; + + if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(ring->vsi->netdev, qid); } /** @@ -2945,12 +3437,15 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_txq tx_ctx; - i40e_status err = 0; u32 qtx_ctl = 0; + int err = 0; + + if (ring_is_xdp(ring)) + ring->xsk_pool = i40e_xsk_pool(ring); /* some ATR related tx ring init */ - if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) { - ring->atr_sample_rate = vsi->back->atr_sample_rate; + if (test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) { + ring->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE; ring->atr_count = 0; } else { ring->atr_sample_rate = 0; @@ -2965,9 +3460,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) tx_ctx.new_context = 1; tx_ctx.base = (ring->dma / 128); tx_ctx.qlen = ring->count; - tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED)); - tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP); + if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags) || + test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) + tx_ctx.fd_ena = 1; + if (test_bit(I40E_FLAG_PTP_ENA, vsi->back->flags)) + tx_ctx.timesync_ena = 1; /* FDIR VSI tx ring can still use RS bit and writebacks */ if (vsi->type != I40E_VSI_FDIR) tx_ctx.head_wb_ena = 1; @@ -2984,7 +3481,14 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) * initialization. This has to be done regardless of * DCB as by default everything is mapped to TC0. */ - tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]); + + if (ring->ch) + tx_ctx.rdylist = + le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]); + + else + tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]); + tx_ctx.rdylist_act = 0; /* clear the context in the HMC */ @@ -3006,16 +3510,25 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) } /* Now associate this queue with this PCI function */ - if (vsi->type == I40E_VSI_VMDQ2) { - qtx_ctl = I40E_QTX_CTL_VM_QUEUE; - qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) & - I40E_QTX_CTL_VFVM_INDX_MASK; + if (ring->ch) { + if (ring->ch->type == I40E_VSI_VMDQ2) + qtx_ctl = I40E_QTX_CTL_VM_QUEUE; + else + return -EINVAL; + + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_VFVM_INDX_MASK, + ring->ch->vsi_number); } else { - qtx_ctl = I40E_QTX_CTL_PF_QUEUE; + if (vsi->type == I40E_VSI_VMDQ2) { + qtx_ctl = I40E_QTX_CTL_VM_QUEUE; + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_VFVM_INDX_MASK, + vsi->id); + } else { + qtx_ctl = I40E_QTX_CTL_PF_QUEUE; + } } - qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & - I40E_QTX_CTL_PF_INDX_MASK); + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_PF_INDX_MASK, hw->pf_id); wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl); i40e_flush(hw); @@ -3026,6 +3539,17 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) } /** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + +/** * i40e_configure_rx_ring - Configure a receive ring context * @ring: The Rx ring to configure * @@ -3038,23 +3562,67 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_rxq rx_ctx; - i40e_status err = 0; + int err = 0; + bool ok; - ring->state = 0; + bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); ring->rx_buf_len = vsi->rx_buf_len; + /* XDP RX-queue info only needed for RX rings exposed to XDP */ + if (ring->vsi->type != I40E_VSI_MAIN) + goto skip; + + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + } + + ring->xsk_pool = i40e_xsk_pool(ring); + if (ring->xsk_pool) { + xdp_rxq_info_unreg(&ring->xdp_rxq); + ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL); + if (err) + return err; + dev_info(&vsi->back->pdev->dev, + "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", + ring->queue_index); + + } else { + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + return err; + } + +skip: + xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); + rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); rx_ctx.base = (ring->dma / 128); rx_ctx.qlen = ring->count; - /* use 32 byte descriptors */ - rx_ctx.dsize = 1; + /* use 16 byte descriptors */ + rx_ctx.dsize = 0; /* descriptor type is always zero * rx_ctx.dtype = 0; @@ -3065,7 +3633,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (hw->revision_id == 0) rx_ctx.lrxqthresh = 0; else - rx_ctx.lrxqthresh = 2; + rx_ctx.lrxqthresh = 1; rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; /* this controls whether VLAN is stripped from inner headers */ @@ -3092,16 +3660,38 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) } /* configure Rx buffer alignment */ - if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) + if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) { + if (I40E_2K_TOO_SMALL_WITH_PADDING) { + dev_info(&vsi->back->pdev->dev, + "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); + return -EOPNOTSUPP; + } clear_ring_build_skb_enabled(ring); - else + } else { set_ring_build_skb_enabled(ring); + } + + ring->rx_offset = i40e_rx_offset(ring); /* cache tail for quicker writes, and clear the reg before use */ ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); writel(0, ring->tail); - i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); + if (ring->xsk_pool) { + xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); + ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)); + } else { + ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); + } + if (!ok) { + /* Log this in case the user has forgotten to give the kernel + * any buffers, even later in the application. + */ + dev_info(&vsi->back->pdev->dev, + "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n", + ring->xsk_pool ? "AF_XDP ZC enabled " : "", + ring->queue_index, pf_q); + } return 0; } @@ -3120,7 +3710,7 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) err = i40e_configure_tx_ring(vsi->tx_rings[i]); - if (!i40e_enabled_xdp_vsi(vsi)) + if (err || !i40e_enabled_xdp_vsi(vsi)) return err; for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) @@ -3140,20 +3730,16 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) int err = 0; u16 i; - if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) { - vsi->max_frame = I40E_MAX_RXBUFFER; - vsi->rx_buf_len = I40E_RXBUFFER_2048; + vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog); + vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); + #if (PAGE_SIZE < 8192) - } else if (!I40E_2K_TOO_SMALL_WITH_PADDING && - (vsi->netdev->mtu <= ETH_DATA_LEN)) { - vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN; + if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING && + vsi->netdev->mtu <= ETH_DATA_LEN) { vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; -#endif - } else { - vsi->max_frame = I40E_MAX_RXBUFFER; - vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 : - I40E_RXBUFFER_2048; + vsi->max_frame = vsi->rx_buf_len; } +#endif /* set up individual rings */ for (i = 0; i < vsi->num_queue_pairs && !err; i++) @@ -3172,7 +3758,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) u16 qoffset, qcount; int i, n; - if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { + if (!test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) { /* Reset the TC information */ for (i = 0; i < vsi->num_queue_pairs; i++) { rx_ring = vsi->rx_rings[i]; @@ -3180,6 +3766,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) rx_ring->dcb_tc = 0; tx_ring->dcb_tc = 0; } + return; } for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) { @@ -3203,19 +3790,26 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) **/ static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi) { - struct i40e_pf *pf = vsi->back; - int err; - if (vsi->netdev) i40e_set_rx_mode(vsi->netdev); +} - if (!!(pf->flags & I40E_FLAG_PF_MAC)) { - err = i40e_macaddr_init(vsi, pf->hw.mac.addr); - if (err) { - dev_warn(&pf->pdev->dev, - "could not set up macaddr; err %d\n", err); - } - } +/** + * i40e_reset_fdir_filter_cnt - Reset flow director filter counters + * @pf: Pointer to the targeted PF + * + * Set all flow director counters to 0. + */ +static void i40e_reset_fdir_filter_cnt(struct i40e_pf *pf) +{ + pf->fd_tcp4_filter_cnt = 0; + pf->fd_udp4_filter_cnt = 0; + pf->fd_sctp4_filter_cnt = 0; + pf->fd_ip4_filter_cnt = 0; + pf->fd_tcp6_filter_cnt = 0; + pf->fd_udp6_filter_cnt = 0; + pf->fd_sctp6_filter_cnt = 0; + pf->fd_ip6_filter_cnt = 0; } /** @@ -3231,14 +3825,11 @@ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; struct hlist_node *node; - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) + if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return; /* Reset FDir counters as we're replaying all existing filters */ - pf->fd_tcp4_filter_cnt = 0; - pf->fd_udp4_filter_cnt = 0; - pf->fd_sctp4_filter_cnt = 0; - pf->fd_ip4_filter_cnt = 0; + i40e_reset_fdir_filter_cnt(pf); hlist_for_each_entry_safe(filter, node, &pf->fdir_filter_list, fdir_node) { @@ -3286,19 +3877,30 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i]; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = + ITR_TO_REG(vsi->rx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), - q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.target_itr >> 1); + q_vector->rx.current_itr = q_vector->rx.target_itr; + + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = + ITR_TO_REG(vsi->tx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), - q_vector->tx.itr); + q_vector->tx.target_itr >> 1); + q_vector->tx.current_itr = q_vector->tx.target_itr; + + /* Set ITR for software interrupts triggered after exiting + * busy-loop polling. + */ + wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), + I40E_ITR_20K); + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); - /* Linked list for the queuepairs assigned to this vector */ + /* begin of linked list for RX queue assigned to this vector */ wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp); for (q = 0; q < q_vector->num_ringpairs; q++) { u32 nextqp = has_xdp ? qp + vsi->alloc_queue_pairs : qp; @@ -3314,6 +3916,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) wr32(hw, I40E_QINT_RQCTL(qp), val); if (has_xdp) { + /* TX queue with next queue set to TX */ val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | @@ -3323,7 +3926,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) wr32(hw, I40E_QINT_TQCTL(nextqp), val); } - + /* TX queue with next RX or end of linked list */ val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | @@ -3346,7 +3949,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) /** * i40e_enable_misc_int_causes - enable the non-queue interrupts - * @hw: ptr to the hardware info + * @pf: pointer to private device data structure **/ static void i40e_enable_misc_int_causes(struct i40e_pf *pf) { @@ -3366,10 +3969,10 @@ static void i40e_enable_misc_int_causes(struct i40e_pf *pf) I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK; - if (pf->flags & I40E_FLAG_IWARP_ENABLED) + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; - if (pf->flags & I40E_FLAG_PTP) + if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, val); @@ -3392,44 +3995,36 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) struct i40e_q_vector *q_vector = vsi->q_vectors[0]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - u32 val; /* set the ITR configuration */ - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1); + q_vector->tx.current_itr = q_vector->tx.target_itr; i40e_enable_misc_int_causes(pf); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); - /* Associate the queue pair to the vector and enable the queue int */ - val = I40E_QINT_RQCTL_CAUSE_ENA_MASK | - (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | - (nextqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)| - (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); - - wr32(hw, I40E_QINT_RQCTL(0), val); + /* Associate the queue pair to the vector and enable the queue + * interrupt RX queue in linked list with next queue set to TX + */ + wr32(hw, I40E_QINT_RQCTL(0), I40E_QINT_RQCTL_VAL(nextqp, 0, TX)); if (i40e_enabled_xdp_vsi(vsi)) { - val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | - (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT)| - (I40E_QUEUE_TYPE_TX - << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); - - wr32(hw, I40E_QINT_TQCTL(nextqp), val); + /* TX queue in linked list with next queue set to TX */ + wr32(hw, I40E_QINT_TQCTL(nextqp), + I40E_QINT_TQCTL_VAL(nextqp, 0, TX)); } - val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | - (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | - (I40E_QUEUE_END_OF_LIST << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); - - wr32(hw, I40E_QINT_TQCTL(0), val); + /* last TX queue so the next RX queue doesn't matter */ + wr32(hw, I40E_QINT_TQCTL(0), + I40E_QINT_TQCTL_VAL(I40E_QUEUE_END_OF_LIST, 0, RX)); i40e_flush(hw); } @@ -3449,15 +4044,14 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf) /** * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0 * @pf: board private structure - * @clearpba: true when all pending interrupt events should be cleared **/ -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba) +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 val; val = I40E_PFINT_DYN_CTL0_INTENA_MASK | - (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) | + I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, val); @@ -3495,7 +4089,7 @@ static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify, struct i40e_q_vector *q_vector = container_of(notify, struct i40e_q_vector, affinity_notify); - q_vector->affinity_mask = *mask; + cpumask_copy(&q_vector->affinity_mask, mask); } /** @@ -3524,6 +4118,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) int tx_int_idx = 0; int vector, err; int irq_num; + int cpu; for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; @@ -3556,11 +4151,18 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) } /* register for affinity change notifications */ + q_vector->irq_num = irq_num; q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); - /* assign the mask for this irq */ - irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); + /* Spread affinity hints out across online CPUs. + * + * get_cpu_mask returns a static constant mask with + * a permanent lifetime so it's ok to pass to + * irq_update_affinity_hint without making a copy. + */ + cpu = cpumask_local_spread(q_vector->v_idx, -1); + irq_update_affinity_hint(irq_num, get_cpu_mask(cpu)); } vsi->irqs_ready = true; @@ -3571,8 +4173,8 @@ free_queue_irqs: vector--; irq_num = pf->msix_entries[base + vector].vector; irq_set_affinity_notifier(irq_num, NULL); - irq_set_affinity_hint(irq_num, NULL); - free_irq(irq_num, &vsi->q_vectors[vector]); + irq_update_affinity_hint(irq_num, NULL); + free_irq(irq_num, vsi->q_vectors[vector]); } return err; } @@ -3606,7 +4208,7 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi) } /* disable each interrupt */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = vsi->base_vector; i < (vsi->num_q_vectors + vsi->base_vector); i++) wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0); @@ -3632,11 +4234,11 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; int i; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { - i40e_irq_dynamic_enable_icr0(pf, true); + i40e_irq_dynamic_enable_icr0(pf); } i40e_flush(&pf->hw); @@ -3644,14 +4246,19 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) } /** - * i40e_stop_misc_vector - Stop the vector that handles non-queue events + * i40e_free_misc_vector - Free the vector that handles non-queue events * @pf: board private structure **/ -static void i40e_stop_misc_vector(struct i40e_pf *pf) +static void i40e_free_misc_vector(struct i40e_pf *pf) { /* Disable ICR 0 */ wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0); i40e_flush(&pf->hw); + + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { + free_irq(pf->msix_entries[0].vector, pf); + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); + } } /** @@ -3683,7 +4290,7 @@ static irqreturn_t i40e_intr(int irq, void *data) (icr0 & I40E_PFINT_ICR0_SWINT_MASK)) pf->sw_int_count++; - if ((pf->flags & I40E_FLAG_IWARP_ENABLED) && + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) && (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) { ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n"); @@ -3692,7 +4299,7 @@ static irqreturn_t i40e_intr(int irq, void *data) /* only q0 is used in MSI/Legacy mode, and none are used in MSIX */ if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_q_vector *q_vector = vsi->q_vectors[0]; /* We do not have a way to disarm Queue causes while leaving @@ -3717,8 +4324,16 @@ static irqreturn_t i40e_intr(int irq, void *data) } if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) { - ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; - set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); + /* disable any further VFLR event notifications */ + if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) { + u32 reg = rd32(hw, I40E_PFINT_ICR0_ENA); + + reg &= ~I40E_PFINT_ICR0_VFLR_MASK; + wr32(hw, I40E_PFINT_ICR0_ENA, reg); + } else { + ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; + set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); + } } if (icr0 & I40E_PFINT_ICR0_GRST_MASK) { @@ -3726,8 +4341,7 @@ static irqreturn_t i40e_intr(int irq, void *data) set_bit(__I40E_RESET_INTR_RECEIVED, pf->state); ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK; val = rd32(hw, I40E_GLGEN_RSTAT); - val = (val & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) - >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; + val = FIELD_GET(I40E_GLGEN_RSTAT_RESET_TYPE_MASK, val); if (val == I40E_RESET_CORER) { pf->corer_count++; } else if (val == I40E_RESET_GLOBR) { @@ -3749,10 +4363,13 @@ static irqreturn_t i40e_intr(int irq, void *data) if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) { u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0); - if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) { - icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; + if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK) + schedule_work(&pf->ptp_extts0_work); + + if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) i40e_ptp_tx_hwtstamp(pf); - } + + icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; } /* If a critical error is pending we have no choice but to reset the @@ -3777,9 +4394,10 @@ static irqreturn_t i40e_intr(int irq, void *data) enable_intr: /* re-enable interrupt causes */ wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); - if (!test_bit(__I40E_DOWN, pf->state)) { + if (!test_bit(__I40E_DOWN, pf->state) || + test_bit(__I40E_RECOVERY_MODE, pf->state)) { i40e_service_event_schedule(pf); - i40e_irq_dynamic_enable_icr0(pf, false); + i40e_irq_dynamic_enable_icr0(pf); } return ret; @@ -3811,7 +4429,7 @@ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget) break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if the descriptor isn't done, no work yet to do */ if (!(eop_desc->cmd_type_offset_bsz & @@ -3864,7 +4482,7 @@ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget) i += tx_ring->count; tx_ring->next_to_clean = i; - if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx); return budget > 0; @@ -3952,6 +4570,7 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi) num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start); q_vector->num_ringpairs = num_ringpairs; + q_vector->reg_idx = q_vector->v_idx + vsi->base_vector - 1; q_vector->rx.count = 0; q_vector->tx.count = 0; @@ -3976,9 +4595,9 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename) struct i40e_pf *pf = vsi->back; int err; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) err = i40e_vsi_request_irq_msix(vsi, basename); - else if (pf->flags & I40E_FLAG_MSI_ENABLED) + else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) err = request_irq(pf->pdev->irq, i40e_intr, 0, pf->int_name, pf); else @@ -4010,7 +4629,7 @@ static void i40e_netpoll(struct net_device *netdev) if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++) i40e_msix_clean_rings(0, vsi->q_vectors[i]); } else { @@ -4102,8 +4721,8 @@ static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable) * @is_xdp: true if the queue is used for XDP * @enable: start or stop the queue **/ -static int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, - bool is_xdp, bool enable) +int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, + bool is_xdp, bool enable) { int ret; @@ -4122,11 +4741,10 @@ static int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, } /** - * i40e_vsi_control_tx - Start or stop a VSI's rings + * i40e_vsi_enable_tx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_tx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; @@ -4135,7 +4753,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, - false /*is xdp*/, enable); + false /*is xdp*/, true); if (ret) break; @@ -4144,11 +4762,10 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q + vsi->alloc_queue_pairs, - true /*is xdp*/, enable); + true /*is xdp*/, true); if (ret) break; } - return ret; } @@ -4187,9 +4804,9 @@ static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable) * @pf_q: the PF queue to configure * @enable: start or stop the queue * - * This function enables or disables a single queue. Note that any delay - * required after the operation is expected to be handled by the caller of - * this function. + * This function enables or disables a single queue. Note that + * any delay required after the operation is expected to be + * handled by the caller of this function. **/ static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable) { @@ -4219,35 +4836,49 @@ static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable) } /** - * i40e_vsi_control_rx - Start or stop a VSI's rings - * @vsi: the VSI being configured + * i40e_control_wait_rx_q + * @pf: the PF structure + * @pf_q: queue being configured * @enable: start or stop the rings + * + * This function enables or disables a single queue along with waiting + * for the change to finish. The caller of this function should handle + * the delays needed in the case of disabling queues. **/ -static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) +int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable) +{ + int ret = 0; + + i40e_control_rx_q(pf, pf_q, enable); + + /* wait for the change to finish */ + ret = i40e_pf_rxq_wait(pf, pf_q, enable); + if (ret) + return ret; + + return ret; +} + +/** + * i40e_vsi_enable_rx - Start a VSI's rings + * @vsi: the VSI being configured + **/ +static int i40e_vsi_enable_rx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - i40e_control_rx_q(pf, pf_q, enable); - - /* wait for the change to finish */ - ret = i40e_pf_rxq_wait(pf, pf_q, enable); + ret = i40e_control_wait_rx_q(pf, pf_q, true); if (ret) { dev_info(&pf->pdev->dev, - "VSI seid %d Rx ring %d %sable timeout\n", - vsi->seid, pf_q, (enable ? "en" : "dis")); + "VSI seid %d Rx ring %d enable timeout\n", + vsi->seid, pf_q); break; } } - /* Due to HW errata, on Rx disable only, the register can indicate done - * before it really is. Needs 50ms to be sure - */ - if (!enable) - mdelay(50); - return ret; } @@ -4260,29 +4891,43 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) int ret = 0; /* do rx first for enable and last for disable */ - ret = i40e_vsi_control_rx(vsi, true); + ret = i40e_vsi_enable_rx(vsi); if (ret) return ret; - ret = i40e_vsi_control_tx(vsi, true); + ret = i40e_vsi_enable_tx(vsi); return ret; } +#define I40E_DISABLE_TX_GAP_MSEC 50 + /** * i40e_vsi_stop_rings - Stop a VSI's rings * @vsi: the VSI being configured **/ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { + struct i40e_pf *pf = vsi->back; + u32 pf_q, tx_q_end, rx_q_end; + /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); - /* do rx first for enable and last for disable - * Ignore return value, we need to shutdown whatever we can - */ - i40e_vsi_control_tx(vsi, false); - i40e_vsi_control_rx(vsi, false); + tx_q_end = vsi->base_queue + + vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); + for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++) + i40e_pre_tx_queue_cfg(&pf->hw, pf_q, false); + + rx_q_end = vsi->base_queue + vsi->num_queue_pairs; + for (pf_q = vsi->base_queue; pf_q < rx_q_end; pf_q++) + i40e_control_rx_q(pf, pf_q, false); + + msleep(I40E_DISABLE_TX_GAP_MSEC); + for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++) + wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); + + i40e_vsi_wait_queues_disabled(vsi); } /** @@ -4320,7 +4965,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) u32 val, qp; int i; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { if (!vsi->q_vectors) return; @@ -4342,9 +4987,8 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) /* clear the affinity notifier in the IRQ descriptor */ irq_set_affinity_notifier(irq_num, NULL); - /* clear the affinity_mask in the IRQ descriptor */ - irq_set_affinity_hint(irq_num, NULL); - synchronize_irq(irq_num); + /* remove our suggested affinity mask for this IRQ */ + irq_update_affinity_hint(irq_num, NULL); free_irq(irq_num, vsi->q_vectors[i]); /* Tear down the interrupt queue link list @@ -4355,8 +4999,8 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) * next_q field of the registers. */ val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1)); - qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) - >> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT; + qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK, + val); val |= I40E_QUEUE_END_OF_LIST << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT; wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val); @@ -4378,8 +5022,8 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) val = rd32(hw, I40E_QINT_TQCTL(qp)); - next = (val & I40E_QINT_TQCTL_NEXTQ_INDX_MASK) - >> I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT; + next = FIELD_GET(I40E_QINT_TQCTL_NEXTQ_INDX_MASK, + val); val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK | I40E_QINT_TQCTL_MSIX0_INDX_MASK | @@ -4397,8 +5041,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) free_irq(pf->pdev->irq, pf); val = rd32(hw, I40E_PFINT_LNKLST0); - qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) - >> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT; + qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK, val); val |= I40E_QUEUE_END_OF_LIST << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT; wr32(hw, I40E_PFINT_LNKLST0, val); @@ -4483,16 +5126,17 @@ static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi) static void i40e_reset_interrupt_capability(struct i40e_pf *pf) { /* If we're in Legacy mode, the interrupt was cleaned in vsi_close */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { pci_disable_msix(pf->pdev); kfree(pf->msix_entries); pf->msix_entries = NULL; kfree(pf->irq_pile); pf->irq_pile = NULL; - } else if (pf->flags & I40E_FLAG_MSI_ENABLED) { + } else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) { pci_disable_msi(pf->pdev); } - pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED); + clear_bit(I40E_FLAG_MSI_ENA, pf->flags); + clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); } /** @@ -4504,21 +5148,20 @@ static void i40e_reset_interrupt_capability(struct i40e_pf *pf) **/ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { + struct i40e_vsi *vsi; int i; - i40e_stop_misc_vector(pf); - if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } + if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) + i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1); - for (i = 0; i < pf->num_alloc_vsi; i++) - if (pf->vsi[i]) - i40e_vsi_free_q_vectors(pf->vsi[i]); + + i40e_pf_for_each_vsi(pf, i, vsi) + i40e_vsi_free_q_vectors(vsi); + i40e_reset_interrupt_capability(pf); } @@ -4573,9 +5216,9 @@ static void i40e_vsi_close(struct i40e_vsi *vsi) i40e_vsi_free_tx_resources(vsi); i40e_vsi_free_rx_resources(vsi); vsi->current_netdev_flags = 0; - pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) - pf->flags |= I40E_FLAG_CLIENT_RESET; + set_bit(__I40E_CLIENT_RESET, pf->state); } /** @@ -4615,12 +5258,11 @@ static void i40e_unquiesce_vsi(struct i40e_vsi *vsi) **/ static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf) { + struct i40e_vsi *vsi; int v; - for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) - i40e_quiesce_vsi(pf->vsi[v]); - } + i40e_pf_for_each_vsi(pf, v, vsi) + i40e_quiesce_vsi(vsi); } /** @@ -4629,12 +5271,11 @@ static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf) **/ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf) { + struct i40e_vsi *vsi; int v; - for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) - i40e_unquiesce_vsi(pf->vsi[v]); - } + i40e_pf_for_each_vsi(pf, v, vsi) + i40e_unquiesce_vsi(vsi); } /** @@ -4695,14 +5336,13 @@ wait_rx: **/ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { + struct i40e_vsi *vsi; int v, ret = 0; - for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { - if (pf->vsi[v]) { - ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); - if (ret) - break; - } + i40e_pf_for_each_vsi(pf, v, vsi) { + ret = i40e_vsi_wait_queues_disabled(vsi); + if (ret) + break; } return ret; @@ -4711,104 +5351,6 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) #endif /** - * i40e_detect_recover_hung_queue - Function to detect and recover hung_queue - * @q_idx: TX queue number - * @vsi: Pointer to VSI struct - * - * This function checks specified queue for given VSI. Detects hung condition. - * We proactively detect hung TX queues by checking if interrupts are disabled - * but there are pending descriptors. If it appears hung, attempt to recover - * by triggering a SW interrupt. - **/ -static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) -{ - struct i40e_ring *tx_ring = NULL; - struct i40e_pf *pf; - u32 val, tx_pending; - int i; - - pf = vsi->back; - - /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_queue_pairs; i++) { - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { - if (q_idx == vsi->tx_rings[i]->queue_index) { - tx_ring = vsi->tx_rings[i]; - break; - } - } - } - - if (!tx_ring) - return; - - /* Read interrupt register */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) - val = rd32(&pf->hw, - I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx + - tx_ring->vsi->base_vector - 1)); - else - val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); - - tx_pending = i40e_get_tx_pending(tx_ring); - - /* Interrupts are disabled and TX pending is non-zero, - * trigger the SW interrupt (don't wait). Worst case - * there will be one extra interrupt which may result - * into not cleaning any queues because queues are cleaned. - */ - if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) - i40e_force_wb(vsi, tx_ring->q_vector); -} - -/** - * i40e_detect_recover_hung - Function to detect and recover hung_queues - * @pf: pointer to PF struct - * - * LAN VSI has netdev and netdev has TX queues. This function is to check - * each of those TX queues if they are hung, trigger recovery by issuing - * SW interrupt. - **/ -static void i40e_detect_recover_hung(struct i40e_pf *pf) -{ - struct net_device *netdev; - struct i40e_vsi *vsi; - int i; - - /* Only for LAN VSI */ - vsi = pf->vsi[pf->lan_vsi]; - - if (!vsi) - return; - - /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */ - if (test_bit(__I40E_VSI_DOWN, vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state)) - return; - - /* Make sure type is MAIN VSI */ - if (vsi->type != I40E_VSI_MAIN) - return; - - netdev = vsi->netdev; - if (!netdev) - return; - - /* Bail out if netif_carrier is not OK */ - if (!netif_carrier_ok(netdev)) - return; - - /* Go thru' TX queues for netdev */ - for (i = 0; i < netdev->num_tx_queues; i++) { - struct netdev_queue *q; - - q = netdev_get_tx_queue(netdev, i); - if (q) - i40e_detect_recover_hung_queue(i, vsi); - } -} - -/** * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP * @pf: pointer to PF * @@ -4899,6 +5441,24 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) } /** + * i40e_mqprio_get_enabled_tc - Get enabled traffic classes + * @pf: PF being queried + * + * Query the current MQPRIO configuration and return the number of + * traffic classes enabled. + **/ +static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf) +{ + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + u8 num_tc = vsi->mqprio_qopt.qopt.num_tc; + u8 enabled_tc = 1, i; + + for (i = 1; i < num_tc; i++) + enabled_tc |= BIT(i); + return enabled_tc; +} + +/** * i40e_pf_get_num_tc - Get enabled traffic classes for PF * @pf: PF being queried * @@ -4906,18 +5466,22 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) **/ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) { - struct i40e_hw *hw = &pf->hw; u8 i, enabled_tc = 1; u8 num_tc = 0; - struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; - /* If DCB is not enabled then always in single TC */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (i40e_is_tc_mqprio_enabled(pf)) { + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + + return vsi->mqprio_qopt.qopt.num_tc; + } + + /* If neither MQPRIO nor DCB is enabled, then always use single TC */ + if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return 1; /* SFP mode will be enabled for all TCs on port */ - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - return i40e_dcb_get_num_tc(dcbcfg); + if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) + return i40e_dcb_get_num_tc(&pf->hw.local_dcbx_config); /* MFP mode return count of enabled TCs for this PF */ if (pf->hw.func_caps.iscsi) @@ -4933,19 +5497,24 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) } /** - * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes + * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * * Return a bitmap for enabled traffic classes for this PF. **/ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) { - /* If DCB is not enabled for this PF then just return default TC */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (i40e_is_tc_mqprio_enabled(pf)) + return i40e_mqprio_get_enabled_tc(pf); + + /* If neither MQPRIO nor DCB is enabled for this PF then just return + * default TC + */ + if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return I40E_DEFAULT_TRAFFIC_CLASS; /* SFP mode we want PF to be enabled for all TCs */ - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) + if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config); /* MFP enabled and iSCSI PF type */ @@ -4967,17 +5536,17 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0}; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - i40e_status ret; u32 tc_bw_max; + int ret; int i; /* Get the VSI level BW configuration */ ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL); if (ret) { dev_info(&pf->pdev->dev, - "couldn't get PF vsi bw config, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't get PF vsi bw config, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); return -EINVAL; } @@ -4986,9 +5555,9 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) NULL); if (ret) { dev_info(&pf->pdev->dev, - "couldn't get PF vsi ets bw config, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't get PF vsi ets bw config, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); return -EINVAL; } @@ -5019,7 +5588,7 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC * @vsi: the VSI being configured * @enabled_tc: TC bitmap - * @bw_credits: BW shared credits per TC + * @bw_share: BW shared credits per TC * * Returns 0 on success, negative value on failure **/ @@ -5027,19 +5596,31 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, u8 *bw_share) { struct i40e_aqc_configure_vsi_tc_bw_data bw_data; - i40e_status ret; + struct i40e_pf *pf = vsi->back; + int ret; int i; + /* There is no need to reset BW when mqprio mode is on. */ + if (i40e_is_tc_mqprio_enabled(pf)) + return 0; + if (!vsi->mqprio_qopt.qopt.hw && !test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { + ret = i40e_set_bw_limit(vsi, vsi->seid, 0); + if (ret) + dev_info(&pf->pdev->dev, + "Failed to reset tx rate for vsi->seid %u\n", + vsi->seid); + return ret; + } + memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; - ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, &bw_data, - NULL); + ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL); if (ret) { - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "AQ command Config VSI BW allocation per TC failed = %d\n", - vsi->back->hw.aq.asq_last_status); + pf->hw.aq.asq_last_status); return -EINVAL; } @@ -5092,6 +5673,9 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc) vsi->tc_config.tc_info[i].qoffset); } + if (i40e_is_tc_mqprio_enabled(pf)) + return; + /* Assign UP2TC map for the VSI */ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { /* Get the actual TC# for the UP */ @@ -5122,6 +5706,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, } /** + * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI + * @vsi: the VSI being reconfigured + * @vsi_offset: offset from main VF VSI + */ +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset) +{ + struct i40e_vsi_context ctxt = {}; + struct i40e_pf *pf; + struct i40e_hw *hw; + int ret; + + if (!vsi) + return -EINVAL; + pf = vsi->back; + hw = &pf->hw; + + ctxt.seid = vsi->seid; + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + ctxt.flags = I40E_AQ_VSI_TYPE_VF; + ctxt.info = vsi->info; + + i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc, + false); + if (vsi->reconfig_rss) { + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } + + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, "Update vsi config failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + return ret; +} + +/** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured * @enabled_tc: TC bitmap @@ -5137,12 +5773,15 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) { u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; int ret = 0; int i; /* Check if enabled_tc is same as existing or new TCs */ - if (vsi->tc_config.enabled_tc == enabled_tc) + if (vsi->tc_config.enabled_tc == enabled_tc && + vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) return ret; /* Enable ETS TCs with equal BW Share for now across all VSIs */ @@ -5153,10 +5792,40 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); if (ret) { - dev_info(&vsi->back->pdev->dev, + struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0}; + + dev_info(&pf->pdev->dev, "Failed configuring TC map %d for VSI %d\n", enabled_tc, vsi->seid); - goto out; + ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, + &bw_config, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed querying vsi bw info, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); + goto out; + } + if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) { + u8 valid_tc = bw_config.tc_valid_bits & enabled_tc; + + if (!valid_tc) + valid_tc = bw_config.tc_valid_bits; + /* Always enable TC0, no matter what */ + valid_tc |= 1; + dev_info(&pf->pdev->dev, + "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n", + enabled_tc, bw_config.tc_valid_bits, valid_tc); + enabled_tc = valid_tc; + } + + ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); + if (ret) { + dev_err(&pf->pdev->dev, + "Unable to configure TC map %d for VSI %d\n", + enabled_tc, vsi->seid); + goto out; + } } /* Update Queue Pairs Mapping for currently enabled UPs */ @@ -5165,22 +5834,43 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.info = vsi->info; - i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); + if (i40e_is_tc_mqprio_enabled(pf)) { + ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc); + if (ret) + goto out; + } else { + i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); + } - if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { + /* On destroying the qdisc, reset vsi->rss_size, as number of enabled + * queues changed. + */ + if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) { + vsi->rss_size = min_t(int, vsi->back->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } + if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } - /* Update the VSI after updating the VSI queue-mapping information */ - ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); + /* Update the VSI after updating the VSI queue-mapping + * information + */ + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { - dev_info(&vsi->back->pdev->dev, - "Update vsi tc config failed, err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, + "Update vsi tc config failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); goto out; } /* update the local VSI info with updated queue map */ @@ -5190,11 +5880,10 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) /* Update current VSI BW information */ ret = i40e_vsi_get_bw_info(vsi); if (ret) { - dev_info(&vsi->back->pdev->dev, - "Failed updating vsi bw info, err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, + "Failed updating vsi bw info, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); goto out; } @@ -5205,6 +5894,833 @@ out: } /** + * i40e_vsi_reconfig_tc - Reconfigure VSI Tx Scheduler for stored TC map + * @vsi: VSI to be reconfigured + * + * This reconfigures a particular VSI for TCs that are mapped to the + * TC bitmap stored previously for the VSI. + * + * Context: It is expected that the VSI queues have been quisced before + * calling this function. + * + * Return: 0 on success, negative value on failure + **/ +static int i40e_vsi_reconfig_tc(struct i40e_vsi *vsi) +{ + u8 enabled_tc; + + enabled_tc = vsi->tc_config.enabled_tc; + vsi->tc_config.enabled_tc = 0; + + return i40e_vsi_config_tc(vsi, enabled_tc); +} + +/** + * i40e_get_link_speed - Returns link speed for the interface + * @vsi: VSI to be configured + * + **/ +static int i40e_get_link_speed(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + + switch (pf->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + return 40000; + case I40E_LINK_SPEED_25GB: + return 25000; + case I40E_LINK_SPEED_20GB: + return 20000; + case I40E_LINK_SPEED_10GB: + return 10000; + case I40E_LINK_SPEED_1GB: + return 1000; + default: + return -EINVAL; + } +} + +/** + * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits + * @vsi: Pointer to vsi structure + * @max_tx_rate: max TX rate in bytes to be converted into Mbits + * + * Helper function to convert units before send to set BW limit + **/ +static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate) +{ + if (max_tx_rate < I40E_BW_MBPS_DIVISOR) { + dev_warn(&vsi->back->pdev->dev, + "Setting max tx rate to minimum usable value of 50Mbps.\n"); + max_tx_rate = I40E_BW_CREDIT_DIVISOR; + } else { + do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); + } + + return max_tx_rate; +} + +/** + * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate + * @vsi: VSI to be configured + * @seid: seid of the channel/VSI + * @max_tx_rate: max TX rate to be configured as BW limit + * + * Helper function to set BW limit for a given VSI + **/ +int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) +{ + struct i40e_pf *pf = vsi->back; + u64 credits = 0; + int speed = 0; + int ret = 0; + + speed = i40e_get_link_speed(vsi); + if (max_tx_rate > speed) { + dev_err(&pf->pdev->dev, + "Invalid max tx rate %llu specified for VSI seid %d.", + max_tx_rate, seid); + return -EINVAL; + } + if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) { + dev_warn(&pf->pdev->dev, + "Setting max tx rate to minimum usable value of 50Mbps.\n"); + max_tx_rate = I40E_BW_CREDIT_DIVISOR; + } + + /* Tx rate credits are in values of 50Mbps, 0 is disabled */ + credits = max_tx_rate; + do_div(credits, I40E_BW_CREDIT_DIVISOR); + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits, + I40E_MAX_BW_INACTIVE_ACCUM, NULL); + if (ret) + dev_err(&pf->pdev->dev, + "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %pe aq_err %s\n", + max_tx_rate, seid, ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); + return ret; +} + +/** + * i40e_remove_queue_channels - Remove queue channels for the TCs + * @vsi: VSI to be configured + * + * Remove queue channels for the TCs + **/ +static void i40e_remove_queue_channels(struct i40e_vsi *vsi) +{ + struct i40e_cloud_filter *cfilter; + enum libie_aq_err last_aq_status; + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct hlist_node *node; + int ret, i; + + /* Reset rss size that was stored when reconfiguring rss for + * channel VSIs with non-power-of-2 queue count. + */ + vsi->current_rss_size = 0; + + /* perform cleanup for channels if they exist */ + if (list_empty(&vsi->ch_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + struct i40e_vsi *p_vsi; + + list_del(&ch->list); + p_vsi = ch->parent_vsi; + if (!p_vsi || !ch->initialized) { + kfree(ch); + continue; + } + /* Reset queue contexts */ + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = NULL; + + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = NULL; + } + + /* Reset BW configured for this VSI via mqprio */ + ret = i40e_set_bw_limit(vsi, ch->seid, 0); + if (ret) + dev_info(&vsi->back->pdev->dev, + "Failed to reset tx rate for ch->seid %u\n", + ch->seid); + + /* delete cloud filters associated with this channel */ + hlist_for_each_entry_safe(cfilter, node, + &pf->cloud_filter_list, cloud_node) { + if (cfilter->seid != ch->seid) + continue; + + hash_del(&cfilter->cloud_node); + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, + cfilter, + false); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, + false); + last_aq_status = pf->hw.aq.asq_last_status; + if (ret) + dev_info(&pf->pdev->dev, + "Failed to delete cloud filter, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(last_aq_status)); + kfree(cfilter); + } + + /* delete VSI from FW */ + ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, + NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "unable to remove channel (%d) for parent VSI(%d)\n", + ch->seid, p_vsi->seid); + kfree(ch); + } + INIT_LIST_HEAD(&vsi->ch_list); +} + +/** + * i40e_get_max_queues_for_channel + * @vsi: ptr to VSI to which channels are associated with + * + * Helper function which returns max value among the queue counts set on the + * channels/TCs created. + **/ +static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int max = 0; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (!ch->initialized) + continue; + if (ch->num_queue_pairs > max) + max = ch->num_queue_pairs; + } + + return max; +} + +/** + * i40e_validate_num_queues - validate num_queues w.r.t channel + * @pf: ptr to PF device + * @num_queues: number of queues + * @vsi: the parent VSI + * @reconfig_rss: indicates should the RSS be reconfigured or not + * + * This function validates number of queues in the context of new channel + * which is being established and determines if RSS should be reconfigured + * or not for parent VSI. + **/ +static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues, + struct i40e_vsi *vsi, bool *reconfig_rss) +{ + int max_ch_queues; + + if (!reconfig_rss) + return -EINVAL; + + *reconfig_rss = false; + if (vsi->current_rss_size) { + if (num_queues > vsi->current_rss_size) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) > vsi's current_size(%d)\n", + num_queues, vsi->current_rss_size); + return -EINVAL; + } else if ((num_queues < vsi->current_rss_size) && + (!is_power_of_2(num_queues))) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n", + num_queues, vsi->current_rss_size); + return -EINVAL; + } + } + + if (!is_power_of_2(num_queues)) { + /* Find the max num_queues configured for channel if channel + * exist. + * if channel exist, then enforce 'num_queues' to be more than + * max ever queues configured for channel. + */ + max_ch_queues = i40e_get_max_queues_for_channel(vsi); + if (num_queues < max_ch_queues) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) < max queues configured for channel(%d)\n", + num_queues, max_ch_queues); + return -EINVAL; + } + *reconfig_rss = true; + } + + return 0; +} + +/** + * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size + * @vsi: the VSI being setup + * @rss_size: size of RSS, accordingly LUT gets reprogrammed + * + * This function reconfigures RSS by reprogramming LUTs using 'rss_size' + **/ +static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size) +{ + struct i40e_pf *pf = vsi->back; + u8 seed[I40E_HKEY_ARRAY_SIZE]; + struct i40e_hw *hw = &pf->hw; + int local_rss_size; + u8 *lut; + int ret; + + if (!vsi->rss_size) + return -EINVAL; + + if (rss_size > vsi->rss_size) + return -EINVAL; + + local_rss_size = min_t(int, vsi->rss_size, rss_size); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Ignoring user configured lut if there is one */ + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size); + + /* Use user configured hash key if there is one, otherwise + * use default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + + ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(hw->aq.asq_last_status)); + kfree(lut); + return ret; + } + kfree(lut); + + /* Do the update w.r.t. storing rss_size */ + if (!vsi->orig_rss_size) + vsi->orig_rss_size = vsi->rss_size; + vsi->current_rss_size = local_rss_size; + + return ret; +} + +/** + * i40e_channel_setup_queue_map - Setup a channel queue map + * @pf: ptr to PF device + * @ctxt: VSI context structure + * @ch: ptr to channel structure + * + * Setup queue map for a specific channel + **/ +static void i40e_channel_setup_queue_map(struct i40e_pf *pf, + struct i40e_vsi_context *ctxt, + struct i40e_channel *ch) +{ + u16 qcount, qmap, sections = 0; + u8 offset = 0; + int pow; + + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + + qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix); + ch->num_queue_pairs = qcount; + + /* find the next higher power-of-2 of num queue pairs */ + pow = ilog2(qcount); + if (!is_power_of_2(qcount)) + pow++; + + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + + ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */ + ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue); + ctxt->info.valid_sections |= cpu_to_le16(sections); +} + +/** + * i40e_add_channel - add a channel by adding VSI + * @pf: ptr to PF device + * @uplink_seid: underlying HW switching element (VEB) ID + * @ch: ptr to channel structure + * + * Add a channel (VSI) using add_vsi and queue_map + **/ +static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, + struct i40e_channel *ch) +{ + struct i40e_hw *hw = &pf->hw; + struct i40e_vsi_context ctxt; + u8 enabled_tc = 0x1; /* TC0 enabled */ + int ret; + + if (ch->type != I40E_VSI_VMDQ2) { + dev_info(&pf->pdev->dev, + "add new vsi failed, ch->type %d\n", ch->type); + return -EINVAL; + } + + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = 0; + ctxt.uplink_seid = uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + if (ch->type == I40E_VSI_VMDQ2) + ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2; + + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id = + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); + } + + /* Set queue map for a given VSI context */ + i40e_channel_setup_queue_map(pf, &ctxt, ch); + + /* Now time to create VSI */ + ret = i40e_aq_add_vsi(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "add new vsi failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); + return -ENOENT; + } + + /* Success, update channel, set enabled_tc only if the channel + * is not a macvlan + */ + ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc; + ch->seid = ctxt.seid; + ch->vsi_number = ctxt.vsi_number; + ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx); + + /* copy just the sections touched not the entire info + * since not all sections are valid as returned by + * update vsi params + */ + ch->info.mapping_flags = ctxt.info.mapping_flags; + memcpy(&ch->info.queue_mapping, + &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping)); + memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping, + sizeof(ctxt.info.tc_mapping)); + + return 0; +} + +static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch, + u8 *bw_share) +{ + struct i40e_aqc_configure_vsi_tc_bw_data bw_data; + int ret; + int i; + + memset(&bw_data, 0, sizeof(bw_data)); + bw_data.tc_valid_bits = ch->enabled_tc; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + bw_data.tc_bw_credits[i] = bw_share[i]; + + ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid, + &bw_data, NULL); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n", + vsi->back->hw.aq.asq_last_status, ch->seid); + return -EINVAL; + } + + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + ch->info.qs_handle[i] = bw_data.qs_handles[i]; + + return 0; +} + +/** + * i40e_channel_config_tx_ring - config TX ring associated with new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Configure TX rings associated with channel (VSI) since queues are being + * from parent VSI. + **/ +static int i40e_channel_config_tx_ring(struct i40e_pf *pf, + struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; + int ret; + int i; + + /* Enable ETS TCs with equal BW Share for now across all VSIs */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (ch->enabled_tc & BIT(i)) + bw_share[i] = 1; + } + + /* configure BW for new VSI */ + ret = i40e_channel_config_bw(vsi, ch, bw_share); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed configuring TC map %d for channel (seid %u)\n", + ch->enabled_tc, ch->seid); + return ret; + } + + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + + /* Get to TX ring ptr of main VSI, for re-setup TX queue + * context + */ + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = ch; + + /* Get the RX ring ptr */ + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = ch; + } + + return 0; +} + +/** + * i40e_setup_hw_channel - setup new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * @uplink_seid: underlying HW switching element (VEB) ID + * @type: type of channel to be created (VMDq2/VF) + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and configures TX rings accordingly + **/ +static inline int i40e_setup_hw_channel(struct i40e_pf *pf, + struct i40e_vsi *vsi, + struct i40e_channel *ch, + u16 uplink_seid, u8 type) +{ + int ret; + + ch->initialized = false; + ch->base_queue = vsi->next_base_queue; + ch->type = type; + + /* Proceed with creation of channel (VMDq2) VSI */ + ret = i40e_add_channel(pf, uplink_seid, ch); + if (ret) { + dev_info(&pf->pdev->dev, + "failed to add_channel using uplink_seid %u\n", + uplink_seid); + return ret; + } + + /* Mark the successful creation of channel */ + ch->initialized = true; + + /* Reconfigure TX queues using QTX_CTL register */ + ret = i40e_channel_config_tx_ring(pf, vsi, ch); + if (ret) { + dev_info(&pf->pdev->dev, + "failed to configure TX rings for channel %u\n", + ch->seid); + return ret; + } + + /* update 'next_base_queue' */ + vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs; + dev_dbg(&pf->pdev->dev, + "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n", + ch->seid, ch->vsi_number, ch->stat_counter_idx, + ch->num_queue_pairs, + vsi->next_base_queue); + return ret; +} + +/** + * i40e_setup_channel - setup new channel using uplink element + * @pf: ptr to PF device + * @vsi: pointer to the VSI to set up the channel within + * @ch: ptr to channel structure + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and uplink switching element (uplink_seid) + **/ +static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + struct i40e_vsi *main_vsi; + u8 vsi_type; + u16 seid; + int ret; + + if (vsi->type == I40E_VSI_MAIN) { + vsi_type = I40E_VSI_VMDQ2; + } else { + dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n", + vsi->type); + return false; + } + + /* underlying switching element */ + main_vsi = i40e_pf_get_main_vsi(pf); + seid = main_vsi->uplink_seid; + + /* create channel (VSI), configure TX rings */ + ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type); + if (ret) { + dev_err(&pf->pdev->dev, "failed to setup hw_channel\n"); + return false; + } + + return ch->initialized ? true : false; +} + +/** + * i40e_validate_and_set_switch_mode - sets up switch mode correctly + * @vsi: ptr to VSI which has PF backing + * + * Sets up switch mode correctly if it needs to be changed and perform + * what are allowed modes. + **/ +static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) +{ + u8 mode; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret; + + ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities); + if (ret) + return -EINVAL; + + if (hw->dev_caps.switch_mode) { + /* if switch mode is set, support mode2 (non-tunneled for + * cloud filter) for now + */ + u32 switch_mode = hw->dev_caps.switch_mode & + I40E_SWITCH_MODE_MASK; + if (switch_mode >= I40E_CLOUD_FILTER_MODE1) { + if (switch_mode == I40E_CLOUD_FILTER_MODE2) + return 0; + dev_err(&pf->pdev->dev, + "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n", + hw->dev_caps.switch_mode); + return -EINVAL; + } + } + + /* Set Bit 7 to be valid */ + mode = I40E_AQ_SET_SWITCH_BIT7_VALID; + + /* Set L4type for TCP support */ + mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP; + + /* Set cloud filter mode */ + mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; + + /* Prep mode field for set_switch_config */ + ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags, + pf->last_sw_conf_valid_flags, + mode, NULL); + if (ret && hw->aq.asq_last_status != LIBIE_AQ_RC_ESRCH) + dev_err(&pf->pdev->dev, + "couldn't set switch config bits, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); + + return ret; +} + +/** + * i40e_create_queue_channel - function to create channel + * @vsi: VSI to be configured + * @ch: ptr to channel (it contains channel specific params) + * + * This function creates channel (VSI) using num_queues specified by user, + * reconfigs RSS if needed. + **/ +int i40e_create_queue_channel(struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + struct i40e_pf *pf = vsi->back; + bool reconfig_rss; + int err; + + if (!ch) + return -EINVAL; + + if (!ch->num_queue_pairs) { + dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n", + ch->num_queue_pairs); + return -EINVAL; + } + + /* validate user requested num_queues for channel */ + err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi, + &reconfig_rss); + if (err) { + dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n", + ch->num_queue_pairs); + return -EINVAL; + } + + /* By default we are in VEPA mode, if this is the first VF/VMDq + * VSI to be added switch to VEB mode. + */ + + if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); + + if (vsi->type == I40E_VSI_MAIN) { + if (i40e_is_tc_mqprio_enabled(pf)) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); + else + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); + } + /* now onwards for main VSI, number of queues will be value + * of TC0's queue count + */ + } + + /* By this time, vsi->cnt_q_avail shall be set to non-zero and + * it should be more than num_queues + */ + if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) { + dev_dbg(&pf->pdev->dev, + "Error: cnt_q_avail (%u) less than num_queues %d\n", + vsi->cnt_q_avail, ch->num_queue_pairs); + return -EINVAL; + } + + /* reconfig_rss only if vsi type is MAIN_VSI */ + if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) { + err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs); + if (err) { + dev_info(&pf->pdev->dev, + "Error: unable to reconfig rss for num_queues (%u)\n", + ch->num_queue_pairs); + return -EINVAL; + } + } + + if (!i40e_setup_channel(pf, vsi, ch)) { + dev_info(&pf->pdev->dev, "Failed to setup channel\n"); + return -EINVAL; + } + + dev_info(&pf->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + ch->seid, ch->num_queue_pairs); + + /* configure VSI for BW limit */ + if (ch->max_tx_rate) { + u64 credits = ch->max_tx_rate; + + if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) + return -EINVAL; + + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&pf->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + ch->max_tx_rate, + credits, + ch->seid); + } + + /* in case of VF, this will be main SRIOV VSI */ + ch->parent_vsi = vsi; + + /* and update main_vsi's count for queue_available to use */ + vsi->cnt_q_avail -= ch->num_queue_pairs; + + return 0; +} + +/** + * i40e_configure_queue_channels - Add queue channel for the given TCs + * @vsi: VSI to be configured + * + * Configures queue channel mapping to the given TCs + **/ +static int i40e_configure_queue_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch; + u64 max_rate = 0; + int ret = 0, i; + + /* Create app vsi with the TCs. Main VSI with TC0 is already set up */ + vsi->tc_seid_map[0] = vsi->seid; + for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (vsi->tc_config.enabled_tc & BIT(i)) { + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + + INIT_LIST_HEAD(&ch->list); + ch->num_queue_pairs = + vsi->tc_config.tc_info[i].qcount; + ch->base_queue = + vsi->tc_config.tc_info[i].qoffset; + + /* Bandwidth limit through tc interface is in bytes/s, + * change to Mbit/s + */ + max_rate = vsi->mqprio_qopt.max_rate[i]; + do_div(max_rate, I40E_BW_MBPS_DIVISOR); + ch->max_tx_rate = max_rate; + + list_add_tail(&ch->list, &vsi->ch_list); + + ret = i40e_create_queue_channel(vsi, ch); + if (ret) { + dev_err(&vsi->back->pdev->dev, + "Failed creating queue channel with TC%d: queues %d\n", + i, ch->num_queue_pairs); + goto err_free; + } + vsi->tc_seid_map[i] = ch->seid; + } + } + + /* reset to reconfigure TX queue contexts */ + i40e_do_reset(vsi->back, I40E_PF_RESET_FLAG, true); + return ret; + +err_free: + i40e_remove_queue_channels(vsi); + return ret; +} + +/** * i40e_veb_config_tc - Configure TCs for given VEB * @veb: given VEB * @enabled_tc: TC bitmap @@ -5235,9 +6751,8 @@ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc) &bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, - "VEB bw config failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "VEB bw config failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); goto out; } @@ -5245,9 +6760,8 @@ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc) ret = i40e_veb_get_bw_info(veb); if (ret) { dev_info(&pf->pdev->dev, - "Failed getting veb bw config, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "Failed getting veb bw config, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); } out: @@ -5265,48 +6779,48 @@ out: **/ static void i40e_dcb_reconfigure(struct i40e_pf *pf) { + struct i40e_vsi *vsi; + struct i40e_veb *veb; u8 tc_map = 0; int ret; - u8 v; + int v; /* Enable the TCs available on PF to all VEBs */ tc_map = i40e_pf_get_tc_map(pf); - for (v = 0; v < I40E_MAX_VEB; v++) { - if (!pf->veb[v]) - continue; - ret = i40e_veb_config_tc(pf->veb[v], tc_map); + if (tc_map == I40E_DEFAULT_TRAFFIC_CLASS) + return; + + i40e_pf_for_each_veb(pf, v, veb) { + ret = i40e_veb_config_tc(veb, tc_map); if (ret) { dev_info(&pf->pdev->dev, "Failed configuring TC for VEB seid=%d\n", - pf->veb[v]->seid); + veb->seid); /* Will try to configure as many components */ } } /* Update each VSI */ - for (v = 0; v < pf->num_alloc_vsi; v++) { - if (!pf->vsi[v]) - continue; - + i40e_pf_for_each_vsi(pf, v, vsi) { /* - Enable all TCs for the LAN VSI * - For all others keep them at TC0 for now */ - if (v == pf->lan_vsi) + if (vsi->type == I40E_VSI_MAIN) tc_map = i40e_pf_get_tc_map(pf); else tc_map = I40E_DEFAULT_TRAFFIC_CLASS; - ret = i40e_vsi_config_tc(pf->vsi[v], tc_map); + ret = i40e_vsi_config_tc(vsi, tc_map); if (ret) { dev_info(&pf->pdev->dev, "Failed configuring TC for VSI seid=%d\n", - pf->vsi[v]->seid); + vsi->seid); /* Will try to configure as many components */ } else { /* Re-configure VSI vectors based on updated TC map */ - i40e_vsi_map_rings_to_vectors(pf->vsi[v]); - if (pf->vsi[v]->netdev) - i40e_dcbnl_set_all(pf->vsi[v]); + i40e_vsi_map_rings_to_vectors(vsi); + if (vsi->netdev) + i40e_dcbnl_set_all(vsi); } } } @@ -5326,9 +6840,33 @@ static int i40e_resume_port_tx(struct i40e_pf *pf) ret = i40e_aq_resume_port_tx(hw, NULL); if (ret) { dev_info(&pf->pdev->dev, - "Resume Port Tx failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "Resume Port Tx failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); + /* Schedule PF reset to recover */ + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); + i40e_service_event_schedule(pf); + } + + return ret; +} + +/** + * i40e_suspend_port_tx - Suspend port Tx + * @pf: PF struct + * + * Suspend a port's Tx and issue a PF reset in case of failure. + **/ +static int i40e_suspend_port_tx(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + int ret; + + ret = i40e_aq_suspend_port_tx(hw, pf->mac_seid, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Suspend Port Tx failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); @@ -5338,6 +6876,289 @@ static int i40e_resume_port_tx(struct i40e_pf *pf) } /** + * i40e_hw_set_dcb_config - Program new DCBX settings into HW + * @pf: PF being configured + * @new_cfg: New DCBX configuration + * + * Program DCB settings into HW and reconfigure VEB/VSIs on + * given PF. Uses "Set LLDP MIB" AQC to program the hardware. + **/ +static int i40e_hw_set_dcb_config(struct i40e_pf *pf, + struct i40e_dcbx_config *new_cfg) +{ + struct i40e_dcbx_config *old_cfg = &pf->hw.local_dcbx_config; + int ret; + + /* Check if need reconfiguration */ + if (!memcmp(&new_cfg, &old_cfg, sizeof(new_cfg))) { + dev_dbg(&pf->pdev->dev, "No Change in DCB Config required.\n"); + return 0; + } + + /* Config change disable all VSIs */ + i40e_pf_quiesce_all_vsi(pf); + + /* Copy the new config to the current config */ + *old_cfg = *new_cfg; + old_cfg->etsrec = old_cfg->etscfg; + ret = i40e_set_dcb_config(&pf->hw); + if (ret) { + dev_info(&pf->pdev->dev, + "Set DCB Config failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); + goto out; + } + + /* Changes in configuration update VEB/VSI */ + i40e_dcb_reconfigure(pf); +out: + /* In case of reset do not try to resume anything */ + if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { + /* Re-start the VSIs if disabled */ + ret = i40e_resume_port_tx(pf); + /* In case of error no point in resuming VSIs */ + if (ret) + goto err; + i40e_pf_unquiesce_all_vsi(pf); + } +err: + return ret; +} + +/** + * i40e_hw_dcb_config - Program new DCBX settings into HW + * @pf: PF being configured + * @new_cfg: New DCBX configuration + * + * Program DCB settings into HW and reconfigure VEB/VSIs on + * given PF + **/ +int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg) +{ + struct i40e_aqc_configure_switching_comp_ets_data ets_data; + u8 prio_type[I40E_MAX_TRAFFIC_CLASS] = {0}; + u32 mfs_tc[I40E_MAX_TRAFFIC_CLASS]; + struct i40e_dcbx_config *old_cfg; + u8 mode[I40E_MAX_TRAFFIC_CLASS]; + struct i40e_rx_pb_config pb_cfg; + struct i40e_hw *hw = &pf->hw; + u8 num_ports = hw->num_ports; + bool need_reconfig; + int ret = -EINVAL; + u8 lltc_map = 0; + u8 tc_map = 0; + u8 new_numtc; + u8 i; + + dev_dbg(&pf->pdev->dev, "Configuring DCB registers directly\n"); + /* Un-pack information to Program ETS HW via shared API + * numtc, tcmap + * LLTC map + * ETS/NON-ETS arbiter mode + * max exponent (credit refills) + * Total number of ports + * PFC priority bit-map + * Priority Table + * BW % per TC + * Arbiter mode between UPs sharing same TC + * TSA table (ETS or non-ETS) + * EEE enabled or not + * MFS TC table + */ + + new_numtc = i40e_dcb_get_num_tc(new_cfg); + + memset(&ets_data, 0, sizeof(ets_data)); + for (i = 0; i < new_numtc; i++) { + tc_map |= BIT(i); + switch (new_cfg->etscfg.tsatable[i]) { + case I40E_IEEE_TSA_ETS: + prio_type[i] = I40E_DCB_PRIO_TYPE_ETS; + ets_data.tc_bw_share_credits[i] = + new_cfg->etscfg.tcbwtable[i]; + break; + case I40E_IEEE_TSA_STRICT: + prio_type[i] = I40E_DCB_PRIO_TYPE_STRICT; + lltc_map |= BIT(i); + ets_data.tc_bw_share_credits[i] = + I40E_DCB_STRICT_PRIO_CREDITS; + break; + default: + /* Invalid TSA type */ + need_reconfig = false; + goto out; + } + } + + old_cfg = &hw->local_dcbx_config; + /* Check if need reconfiguration */ + need_reconfig = i40e_dcb_need_reconfig(pf, old_cfg, new_cfg); + + /* If needed, enable/disable frame tagging, disable all VSIs + * and suspend port tx + */ + if (need_reconfig) { + /* Enable DCB tagging only when more than one TC */ + if (new_numtc > 1) + set_bit(I40E_FLAG_DCB_ENA, pf->flags); + else + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + + set_bit(__I40E_PORT_SUSPENDED, pf->state); + /* Reconfiguration needed quiesce all VSIs */ + i40e_pf_quiesce_all_vsi(pf); + ret = i40e_suspend_port_tx(pf); + if (ret) + goto err; + } + + /* Configure Port ETS Tx Scheduler */ + ets_data.tc_valid_bits = tc_map; + ets_data.tc_strict_priority_flags = lltc_map; + ret = i40e_aq_config_switch_comp_ets + (hw, pf->mac_seid, &ets_data, + i40e_aqc_opc_modify_switching_comp_ets, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Modify Port ETS failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); + goto out; + } + + /* Configure Rx ETS HW */ + memset(&mode, I40E_DCB_ARB_MODE_ROUND_ROBIN, sizeof(mode)); + i40e_dcb_hw_set_num_tc(hw, new_numtc); + i40e_dcb_hw_rx_fifo_config(hw, I40E_DCB_ARB_MODE_ROUND_ROBIN, + I40E_DCB_ARB_MODE_STRICT_PRIORITY, + I40E_DCB_DEFAULT_MAX_EXPONENT, + lltc_map); + i40e_dcb_hw_rx_cmd_monitor_config(hw, new_numtc, num_ports); + i40e_dcb_hw_rx_ets_bw_config(hw, new_cfg->etscfg.tcbwtable, mode, + prio_type); + i40e_dcb_hw_pfc_config(hw, new_cfg->pfc.pfcenable, + new_cfg->etscfg.prioritytable); + i40e_dcb_hw_rx_up2tc_config(hw, new_cfg->etscfg.prioritytable); + + /* Configure Rx Packet Buffers in HW */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); + + mfs_tc[i] = main_vsi->netdev->mtu; + mfs_tc[i] += I40E_PACKET_HDR_PAD; + } + + i40e_dcb_hw_calculate_pool_sizes(hw, num_ports, + false, new_cfg->pfc.pfcenable, + mfs_tc, &pb_cfg); + i40e_dcb_hw_rx_pb_config(hw, &pf->pb_cfg, &pb_cfg); + + /* Update the local Rx Packet buffer config */ + pf->pb_cfg = pb_cfg; + + /* Inform the FW about changes to DCB configuration */ + ret = i40e_aq_dcb_updated(&pf->hw, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "DCB Updated failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); + goto out; + } + + /* Update the port DCBx configuration */ + *old_cfg = *new_cfg; + + /* Changes in configuration update VEB/VSI */ + i40e_dcb_reconfigure(pf); +out: + /* Re-start the VSIs if disabled */ + if (need_reconfig) { + ret = i40e_resume_port_tx(pf); + + clear_bit(__I40E_PORT_SUSPENDED, pf->state); + /* In case of error no point in resuming VSIs */ + if (ret) + goto err; + + /* Wait for the PF's queues to be disabled */ + ret = i40e_pf_wait_queues_disabled(pf); + if (ret) { + /* Schedule PF reset to recover */ + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); + i40e_service_event_schedule(pf); + goto err; + } else { + i40e_pf_unquiesce_all_vsi(pf); + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); + set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); + } + /* registers are set, lets apply */ + if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) + ret = i40e_hw_set_dcb_config(pf, new_cfg); + } + +err: + return ret; +} + +/** + * i40e_dcb_sw_default_config - Set default DCB configuration when DCB in SW + * @pf: PF being queried + * + * Set default DCB configuration in case DCB is to be done in SW. + **/ +int i40e_dcb_sw_default_config(struct i40e_pf *pf) +{ + struct i40e_dcbx_config *dcb_cfg = &pf->hw.local_dcbx_config; + struct i40e_aqc_configure_switching_comp_ets_data ets_data; + struct i40e_hw *hw = &pf->hw; + int err; + + if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) { + /* Update the local cached instance with TC0 ETS */ + memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config)); + pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING; + pf->tmp_cfg.etscfg.maxtcs = 0; + pf->tmp_cfg.etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW; + pf->tmp_cfg.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS; + pf->tmp_cfg.pfc.willing = I40E_IEEE_DEFAULT_PFC_WILLING; + pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; + /* FW needs one App to configure HW */ + pf->tmp_cfg.numapps = I40E_IEEE_DEFAULT_NUM_APPS; + pf->tmp_cfg.app[0].selector = I40E_APP_SEL_ETHTYPE; + pf->tmp_cfg.app[0].priority = I40E_IEEE_DEFAULT_APP_PRIO; + pf->tmp_cfg.app[0].protocolid = I40E_APP_PROTOID_FCOE; + + return i40e_hw_set_dcb_config(pf, &pf->tmp_cfg); + } + + memset(&ets_data, 0, sizeof(ets_data)); + ets_data.tc_valid_bits = I40E_DEFAULT_TRAFFIC_CLASS; /* TC0 only */ + ets_data.tc_strict_priority_flags = 0; /* ETS */ + ets_data.tc_bw_share_credits[0] = I40E_IEEE_DEFAULT_ETS_TCBW; /* 100% to TC0 */ + + /* Enable ETS on the Physical port */ + err = i40e_aq_config_switch_comp_ets + (hw, pf->mac_seid, &ets_data, + i40e_aqc_opc_enable_switching_comp_ets, NULL); + if (err) { + dev_info(&pf->pdev->dev, + "Enable Port ETS failed, err %pe aq_err %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + err = -ENOENT; + goto out; + } + + /* Update the local cached instance with TC0 ETS */ + dcb_cfg->etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING; + dcb_cfg->etscfg.cbs = 0; + dcb_cfg->etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS; + dcb_cfg->etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW; + +out: + return err; +} + +/** * i40e_init_pf_dcb - Initialize DCB configuration * @pf: PF being configured * @@ -5347,14 +7168,32 @@ static int i40e_resume_port_tx(struct i40e_pf *pf) static int i40e_init_pf_dcb(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - int err = 0; + int err; - /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */ - if (pf->flags & I40E_FLAG_NO_DCB_SUPPORT) + /* Do not enable DCB for SW1 and SW2 images even if the FW is capable + * Also do not enable DCBx if FW LLDP agent is disabled + */ + if (test_bit(I40E_HW_CAP_NO_DCB_SUPPORT, pf->hw.caps)) { + dev_info(&pf->pdev->dev, "DCB is not supported.\n"); + err = -EOPNOTSUPP; goto out; - - /* Get the initial DCB configuration */ - err = i40e_init_dcb(hw); + } + if (test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) { + dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n"); + err = i40e_dcb_sw_default_config(pf); + if (err) { + dev_info(&pf->pdev->dev, "Could not initialize SW DCB\n"); + goto out; + } + dev_info(&pf->pdev->dev, "SW DCB initialization succeeded.\n"); + pf->dcbx_cap = DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + /* at init capable but disabled */ + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + goto out; + } + err = i40e_init_dcb(hw, true); if (!err) { /* Device/Function is not DCBX capable */ if ((!hw->func_caps.dcb) || @@ -5366,43 +7205,70 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_IEEE; - pf->flags |= I40E_FLAG_DCB_CAPABLE; + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Enable DCB tagging only when more than one TC * or explicitly disable if only one TC */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1) - pf->flags |= I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_ENA, pf->flags); else - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); dev_dbg(&pf->pdev->dev, "DCBX offload is supported for this PF.\n"); } + } else if (pf->hw.aq.asq_last_status == LIBIE_AQ_RC_EPERM) { + dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n"); + set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags); } else { dev_info(&pf->pdev->dev, - "Query for DCB configuration failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, err), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "Query for DCB configuration failed, err %pe aq_err %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); } out: return err; } #endif /* CONFIG_I40E_DCB */ -#define SPEED_SIZE 14 -#define FC_SIZE 8 + +static void i40e_print_link_message_eee(struct i40e_vsi *vsi, + const char *speed, const char *fc) +{ + struct ethtool_keee kedata; + + memzero_explicit(&kedata, sizeof(kedata)); + if (vsi->netdev->ethtool_ops->get_eee) + vsi->netdev->ethtool_ops->get_eee(vsi->netdev, &kedata); + + if (!linkmode_empty(kedata.supported)) + netdev_info(vsi->netdev, + "NIC Link is Up, %sbps Full Duplex, Flow Control: %s, EEE: %s\n", + speed, fc, + kedata.eee_enabled ? "Enabled" : "Disabled"); + else + netdev_info(vsi->netdev, + "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n", + speed, fc); +} + /** * i40e_print_link_message - print link up or down * @vsi: the VSI for which link needs a message + * @isup: true of link is up, false otherwise */ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) { enum i40e_aq_link_speed new_speed; + struct i40e_pf *pf = vsi->back; char *speed = "Unknown"; char *fc = "Unknown"; char *fec = ""; + char *req_fec = ""; char *an = ""; - new_speed = vsi->back->hw.phy.link_info.link_speed; + if (isup) + new_speed = pf->hw.phy.link_info.link_speed; + else + new_speed = I40E_LINK_SPEED_UNKNOWN; if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return; @@ -5416,13 +7282,13 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) /* Warn user if link speed on NPAR enabled partition is not at * least 10GB */ - if (vsi->back->hw.func_caps.npar_enable && - (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || - vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) + if (pf->hw.func_caps.npar_enable && + (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || + pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) netdev_warn(vsi->netdev, "The partition detected link speed that is less than 10Gbps\n"); - switch (vsi->back->hw.phy.link_info.link_speed) { + switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: speed = "40 G"; break; @@ -5435,6 +7301,12 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) case I40E_LINK_SPEED_10GB: speed = "10 G"; break; + case I40E_LINK_SPEED_5GB: + speed = "5 G"; + break; + case I40E_LINK_SPEED_2_5GB: + speed = "2.5 G"; + break; case I40E_LINK_SPEED_1GB: speed = "1000 M"; break; @@ -5445,7 +7317,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - switch (vsi->back->hw.fc.current_mode) { + switch (pf->hw.fc.current_mode) { case I40E_FC_FULL: fc = "RX/TX"; break; @@ -5460,23 +7332,58 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { - fec = ", FEC: None"; - an = ", Autoneg: False"; + if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { + req_fec = "None"; + fec = "None"; + an = "False"; - if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) - an = ", Autoneg: True"; + if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + an = "True"; - if (vsi->back->hw.phy.link_info.fec_info & + if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) - fec = ", FEC: CL74 FC-FEC/BASE-R"; - else if (vsi->back->hw.phy.link_info.fec_info & + fec = "CL74 FC-FEC/BASE-R"; + else if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) - fec = ", FEC: CL108 RS-FEC"; + fec = "CL108 RS-FEC"; + + /* 'CL108 RS-FEC' should be displayed when RS is requested, or + * both RS and FC are requested + */ + if (vsi->back->hw.phy.link_info.req_fec_info & + (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) { + if (vsi->back->hw.phy.link_info.req_fec_info & + I40E_AQ_REQUEST_FEC_RS) + req_fec = "CL108 RS-FEC"; + else + req_fec = "CL74 FC-FEC/BASE-R"; + } + netdev_info(vsi->netdev, + "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", + speed, req_fec, fec, an, fc); + } else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) { + req_fec = "None"; + fec = "None"; + an = "False"; + + if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + an = "True"; + + if (pf->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_KR_ENA) + fec = "CL74 FC-FEC/BASE-R"; + + if (pf->hw.phy.link_info.req_fec_info & + I40E_AQ_REQUEST_FEC_KR) + req_fec = "CL74 FC-FEC/BASE-R"; + + netdev_info(vsi->netdev, + "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", + speed, req_fec, fec, an, fc); + } else { + i40e_print_link_message_eee(vsi, speed, fc); } - netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s, Flow Control: %s\n", - speed, fec, an, fc); } /** @@ -5488,7 +7395,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; int err; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) i40e_vsi_configure_msix(vsi); else i40e_configure_msi_and_legacy(vsi); @@ -5507,15 +7414,6 @@ static int i40e_up_complete(struct i40e_vsi *vsi) i40e_print_link_message(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - } else if (vsi->netdev) { - i40e_print_link_message(vsi, false); - /* need to check for qualified module here*/ - if ((pf->hw.phy.link_info.link_info & - I40E_AQ_MEDIA_AVAILABLE) && - (!(pf->hw.phy.link_info.an_info & - I40E_AQ_QUALIFIED_MODULE))) - netdev_err(vsi->netdev, - "the driver failed to link because an unqualified module was detected."); } /* replay FDIR SB filters */ @@ -5529,7 +7427,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) /* On the next run of the service_task, notify any clients of the new * opened netdev */ - pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); i40e_service_event_schedule(pf); return 0; @@ -5546,7 +7444,6 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - WARN_ON(in_interrupt()); while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) usleep_range(1000, 2000); i40e_down(vsi); @@ -5556,6 +7453,107 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi) } /** + * i40e_force_link_state - Force the link status + * @pf: board private structure + * @is_up: whether the link state should be forced up or down + **/ +static int i40e_force_link_state(struct i40e_pf *pf, bool is_up) +{ + struct i40e_aq_get_phy_abilities_resp abilities; + struct i40e_aq_set_phy_config config = {0}; + bool non_zero_phy_type = is_up; + struct i40e_hw *hw = &pf->hw; + u64 mask; + u8 speed; + int err; + + /* Card might've been put in an unstable state by other drivers + * and applications, which causes incorrect speed values being + * set on startup. In order to clear speed registers, we call + * get_phy_capabilities twice, once to get initial state of + * available speeds, and once to get current PHY config. + */ + err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, + NULL); + if (err) { + dev_err(&pf->pdev->dev, + "failed to get phy cap., ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(hw->aq.asq_last_status)); + return err; + } + speed = abilities.link_speed; + + /* Get the current phy config */ + err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, + NULL); + if (err) { + dev_err(&pf->pdev->dev, + "failed to get phy cap., ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(hw->aq.asq_last_status)); + return err; + } + + /* If link needs to go up, but was not forced to go down, + * and its speed values are OK, no need for a flap + * if non_zero_phy_type was set, still need to force up + */ + if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) + non_zero_phy_type = true; + else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0) + return 0; + + /* To force link we need to set bits for all supported PHY types, + * but there are now more than 32, so we need to split the bitmap + * across two fields. + */ + mask = I40E_PHY_TYPES_BITMASK; + config.phy_type = + non_zero_phy_type ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0; + config.phy_type_ext = + non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0; + /* Copy the old settings, except of phy_type */ + config.abilities = abilities.abilities; + if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) { + if (is_up) + config.abilities |= I40E_AQ_PHY_ENABLE_LINK; + else + config.abilities &= ~(I40E_AQ_PHY_ENABLE_LINK); + } + if (abilities.link_speed != 0) + config.link_speed = abilities.link_speed; + else + config.link_speed = speed; + config.eee_capability = abilities.eee_capability; + config.eeer = abilities.eeer_val; + config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_PHY_FEC_CONFIG_MASK; + err = i40e_aq_set_phy_config(hw, &config, NULL); + + if (err) { + dev_err(&pf->pdev->dev, + "set phy config ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + return err; + } + + /* Update the link info */ + err = i40e_update_link_info(hw); + if (err) { + /* Wait a little bit (on 40G cards it sometimes takes a really + * long time for link to come back from the atomic reset) + * and try once more + */ + msleep(1000); + i40e_update_link_info(hw); + } + + i40e_aq_set_link_restart_an(hw, is_up, NULL); + + return 0; +} + +/** * i40e_up - Bring the connection back up after being down * @vsi: the VSI being configured **/ @@ -5563,6 +7561,11 @@ int i40e_up(struct i40e_vsi *vsi) { int err; + if (vsi->type == I40E_VSI_MAIN && + (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) || + test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags))) + i40e_force_link_state(vsi->back, true); + err = i40e_vsi_configure(vsi); if (!err) err = i40e_up_complete(vsi); @@ -5587,85 +7590,1403 @@ void i40e_down(struct i40e_vsi *vsi) } i40e_vsi_disable_irq(vsi); i40e_vsi_stop_rings(vsi); + if (vsi->type == I40E_VSI_MAIN && + (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) || + test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags))) + i40e_force_link_state(vsi->back, false); i40e_napi_disable_all(vsi); for (i = 0; i < vsi->num_queue_pairs; i++) { i40e_clean_tx_ring(vsi->tx_rings[i]); - if (i40e_enabled_xdp_vsi(vsi)) + if (i40e_enabled_xdp_vsi(vsi)) { + /* Make sure that in-progress ndo_xdp_xmit and + * ndo_xsk_wakeup calls are completed. + */ + synchronize_rcu(); i40e_clean_tx_ring(vsi->xdp_rings[i]); + } i40e_clean_rx_ring(vsi->rx_rings[i]); } } /** + * i40e_validate_mqprio_qopt- validate queue mapping info + * @vsi: the VSI being configured + * @mqprio_qopt: queue parametrs + **/ +static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 sum_max_rate = 0; + u64 max_rate = 0; + int i; + + if (mqprio_qopt->qopt.offset[0] != 0 || + mqprio_qopt->qopt.num_tc < 1 || + mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS) + return -EINVAL; + for (i = 0; ; i++) { + if (!mqprio_qopt->qopt.count[i]) + return -EINVAL; + if (mqprio_qopt->min_rate[i]) { + dev_err(&vsi->back->pdev->dev, + "Invalid min tx rate (greater than 0) specified\n"); + return -EINVAL; + } + max_rate = mqprio_qopt->max_rate[i]; + do_div(max_rate, I40E_BW_MBPS_DIVISOR); + sum_max_rate += max_rate; + + if (i >= mqprio_qopt->qopt.num_tc - 1) + break; + if (mqprio_qopt->qopt.offset[i + 1] != + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + } + if (vsi->num_queue_pairs < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { + dev_err(&vsi->back->pdev->dev, + "Failed to create traffic channel, insufficient number of queues.\n"); + return -EINVAL; + } + if (sum_max_rate > i40e_get_link_speed(vsi)) { + dev_err(&vsi->back->pdev->dev, + "Invalid max tx rate specified\n"); + return -EINVAL; + } + return 0; +} + +/** + * i40e_vsi_set_default_tc_config - set default values for tc configuration + * @vsi: the VSI being configured + **/ +static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) +{ + u16 qcount; + int i; + + /* Only TC0 is enabled */ + vsi->tc_config.numtc = 1; + vsi->tc_config.enabled_tc = 1; + qcount = min_t(int, vsi->alloc_queue_pairs, + i40e_pf_get_max_q_per_tc(vsi->back)); + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + /* For the TC that is not enabled set the offset to default + * queue and allocate one queue for the given TC. + */ + vsi->tc_config.tc_info[i].qoffset = 0; + if (i == 0) + vsi->tc_config.tc_info[i].qcount = qcount; + else + vsi->tc_config.tc_info[i].qcount = 1; + vsi->tc_config.tc_info[i].netdev_tc = 0; + } +} + +/** + * i40e_del_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function deletes a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static int i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_remove_macvlan_element_data element; + int status; + + memset(&element, 0, sizeof(element)); + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; + status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_add_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function adds a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static int i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_add_macvlan_element_data element; + u16 cmd_flags = 0; + int status; + + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.queue_number = 0; + element.match_method = I40E_AQC_MM_ERR_NO_RES; + cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; + element.flags = cpu_to_le16(cmd_flags); + status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_reset_ch_rings - Reset the queue contexts in a channel + * @vsi: the VSI we want to access + * @ch: the channel we want to access + */ +static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch) +{ + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + int i; + + for (i = 0; i < ch->num_queue_pairs; i++) { + pf_q = ch->base_queue + i; + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = NULL; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = NULL; + } +} + +/** + * i40e_free_macvlan_channels + * @vsi: the VSI we want to access + * + * This function frees the Qs of the channel VSI from + * the stack and also deletes the channel VSIs which + * serve as macvlans. + */ +static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int ret; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + struct i40e_vsi *parent_vsi; + + if (i40e_is_channel_macvlan(ch)) { + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + + list_del(&ch->list); + parent_vsi = ch->parent_vsi; + if (!parent_vsi || !ch->initialized) { + kfree(ch); + continue; + } + + /* remove the VSI */ + ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, + NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "unable to remove channel (%d) for parent VSI(%d)\n", + ch->seid, parent_vsi->seid); + kfree(ch); + } + vsi->macvlan_cnt = 0; +} + +/** + * i40e_fwd_ring_up - bring the macvlan device up + * @vsi: the VSI we want to access + * @vdev: macvlan netdevice + * @fwd: the private fwd structure + */ +static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, + struct i40e_fwd_adapter *fwd) +{ + struct i40e_channel *ch = NULL, *ch_tmp, *iter; + int ret = 0, num_tc = 1, i, aq_err; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* Go through the list and find an available channel */ + list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) { + if (!i40e_is_channel_macvlan(iter)) { + iter->fwd = fwd; + /* record configuration for macvlan interface in vdev */ + for (i = 0; i < num_tc; i++) + netdev_bind_sb_channel_queue(vsi->netdev, vdev, + i, + iter->num_queue_pairs, + iter->base_queue); + for (i = 0; i < iter->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = iter->base_queue + i; + + /* Get to TX ring ptr */ + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = iter; + + /* Get the RX ring ptr */ + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = iter; + } + ch = iter; + break; + } + } + + if (!ch) + return -EINVAL; + + /* Guarantee all rings are updated before we update the + * MAC address filter. + */ + wmb(); + + /* Add a mac filter */ + ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); + if (ret) { + /* if we cannot add the MAC rule then disable the offload */ + macvlan_release_l2fw_offload(vdev); + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->netdev = NULL; + } + dev_info(&pf->pdev->dev, + "Error adding mac filter on macvlan err %pe, aq_err %s\n", + ERR_PTR(ret), libie_aq_str(aq_err)); + netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n"); + } + + return ret; +} + +/** + * i40e_setup_macvlans - create the channels which will be macvlans + * @vsi: the VSI we want to access + * @macvlan_cnt: no. of macvlans to be setup + * @qcnt: no. of Qs per macvlan + * @vdev: macvlan netdevice + */ +static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt, + struct net_device *vdev) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + struct i40e_vsi_context ctxt; + u16 sections, qmap, num_qps; + struct i40e_channel *ch; + int i, pow, ret = 0; + u8 offset = 0; + + if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt) + return -EINVAL; + + num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt); + + /* find the next higher power-of-2 of num queue pairs */ + pow = fls(roundup_pow_of_two(num_qps) - 1); + + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup context bits for the main VSI */ + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.seid = vsi->seid; + ctxt.pf_num = vsi->back->hw.pf_id; + ctxt.vf_num = 0; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.info = vsi->info; + ctxt.info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); + ctxt.info.valid_sections |= cpu_to_le16(sections); + + /* Reconfigure RSS for main VSI with new max queue count */ + vsi->rss_size = max_t(u16, num_qps, qcnt); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed to reconfig RSS for num_queues (%u)\n", + vsi->rss_size); + return ret; + } + vsi->reconfig_rss = true; + dev_dbg(&vsi->back->pdev->dev, + "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size); + vsi->next_base_queue = num_qps; + vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps; + + /* Update the VSI after updating the VSI queue-mapping + * information + */ + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Update vsi tc config failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + /* Create channels for macvlans */ + INIT_LIST_HEAD(&vsi->macvlan_list); + for (i = 0; i < macvlan_cnt; i++) { + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + INIT_LIST_HEAD(&ch->list); + ch->num_queue_pairs = qcnt; + if (!i40e_setup_channel(pf, vsi, ch)) { + ret = -EINVAL; + kfree(ch); + goto err_free; + } + ch->parent_vsi = vsi; + vsi->cnt_q_avail -= ch->num_queue_pairs; + vsi->macvlan_cnt++; + list_add_tail(&ch->list, &vsi->macvlan_list); + } + + return ret; + +err_free: + dev_info(&pf->pdev->dev, "Failed to setup macvlans\n"); + i40e_free_macvlan_channels(vsi); + + return ret; +} + +/** + * i40e_fwd_add - configure macvlans + * @netdev: net device to configure + * @vdev: macvlan netdevice + **/ +static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_fwd_adapter *fwd; + int avail_macvlan, ret; + + if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { + netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); + return ERR_PTR(-EINVAL); + } + if (i40e_is_tc_mqprio_enabled(pf)) { + netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n"); + return ERR_PTR(-EINVAL); + } + if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) { + netdev_info(netdev, "Not enough vectors available to support macvlans\n"); + return ERR_PTR(-EINVAL); + } + + /* The macvlan device has to be a single Q device so that the + * tc_to_txq field can be reused to pick the tx queue. + */ + if (netif_is_multiqueue(vdev)) + return ERR_PTR(-ERANGE); + + if (!vsi->macvlan_cnt) { + /* reserve bit 0 for the pf device */ + set_bit(0, vsi->fwd_bitmask); + + /* Try to reserve as many queues as possible for macvlans. First + * reserve 3/4th of max vectors, then half, then quarter and + * calculate Qs per macvlan as you go + */ + vectors = pf->num_lan_msix; + if (vectors <= I40E_MAX_MACVLANS && vectors > 64) { + /* allocate 4 Qs per macvlan and 32 Qs to the PF*/ + q_per_macvlan = 4; + macvlan_cnt = (vectors - 32) / 4; + } else if (vectors <= 64 && vectors > 32) { + /* allocate 2 Qs per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 2; + macvlan_cnt = (vectors - 16) / 2; + } else if (vectors <= 32 && vectors > 16) { + /* allocate 1 Q per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 1; + macvlan_cnt = vectors - 16; + } else if (vectors <= 16 && vectors > 8) { + /* allocate 1 Q per macvlan and 8 Qs to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 8; + } else { + /* allocate 1 Q per macvlan and 1 Q to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 1; + } + + if (macvlan_cnt == 0) + return ERR_PTR(-EBUSY); + + /* Quiesce VSI queues */ + i40e_quiesce_vsi(vsi); + + /* sets up the macvlans but does not "enable" them */ + ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan, + vdev); + if (ret) + return ERR_PTR(ret); + + /* Unquiesce VSI */ + i40e_unquiesce_vsi(vsi); + } + avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask, + vsi->macvlan_cnt); + if (avail_macvlan >= I40E_MAX_MACVLANS) + return ERR_PTR(-EBUSY); + + /* create the fwd struct */ + fwd = kzalloc(sizeof(*fwd), GFP_KERNEL); + if (!fwd) + return ERR_PTR(-ENOMEM); + + set_bit(avail_macvlan, vsi->fwd_bitmask); + fwd->bit_no = avail_macvlan; + netdev_set_sb_channel(vdev, avail_macvlan); + fwd->netdev = vdev; + + if (!netif_running(netdev)) + return fwd; + + /* Set fwd ring up */ + ret = i40e_fwd_ring_up(vsi, vdev, fwd); + if (ret) { + /* unbind the queues and drop the subordinate channel config */ + netdev_unbind_sb_channel(netdev, vdev); + netdev_set_sb_channel(vdev, 0); + + kfree(fwd); + return ERR_PTR(-EINVAL); + } + + return fwd; +} + +/** + * i40e_del_all_macvlans - Delete all the mac filters on the channels + * @vsi: the VSI we want to access + */ +static void i40e_del_all_macvlans(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, + ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + } + } +} + +/** + * i40e_fwd_del - delete macvlan interfaces + * @netdev: net device to configure + * @vdev: macvlan netdevice + */ +static void i40e_fwd_del(struct net_device *netdev, void *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_fwd_adapter *fwd = vdev; + struct i40e_channel *ch, *ch_tmp; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + /* Find the channel associated with the macvlan and del mac filter */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch) && + ether_addr_equal(i40e_channel_mac(ch), + fwd->netdev->dev_addr)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(netdev, fwd->netdev); + netdev_set_sb_channel(fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } else { + dev_info(&pf->pdev->dev, + "Error deleting mac filter on macvlan err %pe, aq_err %s\n", + ERR_PTR(ret), libie_aq_str(aq_err)); + } + break; + } + } +} + +/** * i40e_setup_tc - configure multiple traffic classes * @netdev: net device to configure - * @tc: number of traffic classes to enable + * @type_data: tc offload data **/ -static int i40e_setup_tc(struct net_device *netdev, u8 tc) +static int i40e_setup_tc(struct net_device *netdev, void *type_data) { + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - u8 enabled_tc = 0; + u8 enabled_tc = 0, num_tc, hw; + bool need_reset = false; + int old_queue_pairs; int ret = -EINVAL; + u16 mode; int i; - /* Check if DCB enabled to continue */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) { - netdev_info(netdev, "DCB is not enabled for adapter\n"); - goto exit; + old_queue_pairs = vsi->num_queue_pairs; + num_tc = mqprio_qopt->qopt.num_tc; + hw = mqprio_qopt->qopt.hw; + mode = mqprio_qopt->mode; + if (!hw) { + clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + goto config_tc; } /* Check if MFP enabled */ - if (pf->flags & I40E_FLAG_MFP_ENABLED) { - netdev_info(netdev, "Configuring TC not supported in MFP mode\n"); - goto exit; + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { + netdev_info(netdev, + "Configuring TC not supported in MFP mode\n"); + return ret; } + switch (mode) { + case TC_MQPRIO_MODE_DCB: + clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); - /* Check whether tc count is within enabled limit */ - if (tc > i40e_pf_get_num_tc(pf)) { - netdev_info(netdev, "TC count greater than enabled on link for adapter\n"); - goto exit; + /* Check if DCB enabled to continue */ + if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { + netdev_info(netdev, + "DCB is not enabled for adapter\n"); + return ret; + } + + /* Check whether tc count is within enabled limit */ + if (num_tc > i40e_pf_get_num_tc(pf)) { + netdev_info(netdev, + "TC count greater than enabled on link for adapter\n"); + return ret; + } + break; + case TC_MQPRIO_MODE_CHANNEL: + if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { + netdev_info(netdev, + "Full offload of TC Mqprio options is not supported when DCB is enabled\n"); + return ret; + } + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) + return ret; + ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt); + if (ret) + return ret; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, + sizeof(*mqprio_qopt)); + set_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + break; + default: + return -EINVAL; } +config_tc: /* Generate TC map for number of tc requested */ - for (i = 0; i < tc; i++) + for (i = 0; i < num_tc; i++) enabled_tc |= BIT(i); /* Requesting same TC configuration as already enabled */ - if (enabled_tc == vsi->tc_config.enabled_tc) + if (enabled_tc == vsi->tc_config.enabled_tc && + mode != TC_MQPRIO_MODE_CHANNEL) return 0; /* Quiesce VSI queues */ i40e_quiesce_vsi(vsi); + if (!hw && !i40e_is_tc_mqprio_enabled(pf)) + i40e_remove_queue_channels(vsi); + /* Configure VSI for enabled TCs */ ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) { netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n", vsi->seid); + need_reset = true; + goto exit; + } else if (enabled_tc && + (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) { + netdev_info(netdev, + "Failed to create channel. Override queues (%u) not power of 2\n", + vsi->tc_config.tc_info[0].qcount); + ret = -EINVAL; + need_reset = true; goto exit; } + dev_info(&vsi->back->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + vsi->seid, vsi->tc_config.tc_info[0].qcount); + + if (i40e_is_tc_mqprio_enabled(pf)) { + if (vsi->mqprio_qopt.max_rate[0]) { + u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi, + vsi->mqprio_qopt.max_rate[0]); + + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (!ret) { + u64 credits = max_tx_rate; + + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + max_tx_rate, + credits, + vsi->seid); + } else { + need_reset = true; + goto exit; + } + } + ret = i40e_configure_queue_channels(vsi); + if (ret) { + vsi->num_queue_pairs = old_queue_pairs; + netdev_info(netdev, + "Failed configuring queue channels\n"); + need_reset = true; + goto exit; + } + } + +exit: + /* Reset the configuration data to defaults, only TC0 is enabled */ + if (need_reset) { + i40e_vsi_set_default_tc_config(vsi); + need_reset = false; + } + /* Unquiesce VSI */ i40e_unquiesce_vsi(vsi); + return ret; +} -exit: +/** + * i40e_set_cld_element - sets cloud filter element data + * @filter: cloud filter rule + * @cld: ptr to cloud filter element data + * + * This is helper function to copy data into cloud filter element + **/ +static inline void +i40e_set_cld_element(struct i40e_cloud_filter *filter, + struct i40e_aqc_cloud_filters_element_data *cld) +{ + u32 ipa; + int i; + + memset(cld, 0, sizeof(*cld)); + ether_addr_copy(cld->outer_mac, filter->dst_mac); + ether_addr_copy(cld->inner_mac, filter->src_mac); + + if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6) + return; + + if (filter->n_proto == ETH_P_IPV6) { +#define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1) + for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) { + ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]); + + *(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa); + } + } else { + ipa = be32_to_cpu(filter->dst_ipv4); + + memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa)); + } + + cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id)); + + /* tenant_id is not supported by FW now, once the support is enabled + * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id) + */ + if (filter->tenant_id) + return; +} + +/** + * i40e_add_del_cloud_filter - Add/del cloud filter + * @vsi: pointer to VSI + * @filter: cloud filter rule + * @add: if true, add, if false, delete + * + * Add or delete a cloud filter for a specific flow spec. + * Returns 0 if the filter were successfully added. + **/ +int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, bool add) +{ + struct i40e_aqc_cloud_filters_element_data cld_filter; + struct i40e_pf *pf = vsi->back; + int ret; + static const u16 flag_table[128] = { + [I40E_CLOUD_FILTER_FLAGS_OMAC] = + I40E_AQC_ADD_CLOUD_FILTER_OMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN, + [I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID, + [I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] = + I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID, + [I40E_CLOUD_FILTER_FLAGS_IIP] = + I40E_AQC_ADD_CLOUD_FILTER_IIP, + }; + + if (filter->flags >= ARRAY_SIZE(flag_table)) + return -EIO; + + memset(&cld_filter, 0, sizeof(cld_filter)); + + /* copy element needed to add cloud filter from filter */ + i40e_set_cld_element(filter, &cld_filter); + + if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE) + cld_filter.flags = cpu_to_le16(filter->tunnel_type << + I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT); + + if (filter->n_proto == ETH_P_IPV6) + cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | + I40E_AQC_ADD_CLOUD_FLAGS_IPV6); + else + cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | + I40E_AQC_ADD_CLOUD_FLAGS_IPV4); + + if (add) + ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid, + &cld_filter, 1); + else + ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid, + &cld_filter, 1); + if (ret) + dev_dbg(&pf->pdev->dev, + "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n", + add ? "add" : "delete", filter->dst_port, ret, + pf->hw.aq.asq_last_status); + else + dev_info(&pf->pdev->dev, + "%s cloud filter for VSI: %d\n", + add ? "Added" : "Deleted", filter->seid); + return ret; +} + +/** + * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf + * @vsi: pointer to VSI + * @filter: cloud filter rule + * @add: if true, add, if false, delete + * + * Add or delete a cloud filter for a specific flow spec using big buffer. + * Returns 0 if the filter were successfully added. + **/ +int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add) +{ + struct i40e_aqc_cloud_filters_element_bb cld_filter; + struct i40e_pf *pf = vsi->back; + int ret; + + /* Both (src/dst) valid mac_addr are not supported */ + if ((is_valid_ether_addr(filter->dst_mac) && + is_valid_ether_addr(filter->src_mac)) || + (is_multicast_ether_addr(filter->dst_mac) && + is_multicast_ether_addr(filter->src_mac))) + return -EOPNOTSUPP; + + /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP + * ports are not supported via big buffer now. + */ + if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP) + return -EOPNOTSUPP; + + /* adding filter using src_port/src_ip is not supported at this stage */ + if (filter->src_port || + (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) || + !ipv6_addr_any(&filter->ip.v6.src_ip6)) + return -EOPNOTSUPP; + + memset(&cld_filter, 0, sizeof(cld_filter)); + + /* copy element needed to add cloud filter from filter */ + i40e_set_cld_element(filter, &cld_filter.element); + + if (is_valid_ether_addr(filter->dst_mac) || + is_valid_ether_addr(filter->src_mac) || + is_multicast_ether_addr(filter->dst_mac) || + is_multicast_ether_addr(filter->src_mac)) { + /* MAC + IP : unsupported mode */ + if (filter->dst_ipv4) + return -EOPNOTSUPP; + + /* since we validated that L4 port must be valid before + * we get here, start with respective "flags" value + * and update if vlan is present or not + */ + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT); + + if (filter->vlan_id) { + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT); + } + + } else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) || + !ipv6_addr_any(&filter->ip.v6.dst_ip6)) { + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT); + if (filter->n_proto == ETH_P_IPV6) + cld_filter.element.flags |= + cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6); + else + cld_filter.element.flags |= + cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4); + } else { + dev_err(&pf->pdev->dev, + "either mac or ip has to be valid for cloud filter\n"); + return -EINVAL; + } + + /* Now copy L4 port in Byte 6..7 in general fields */ + cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] = + be16_to_cpu(filter->dst_port); + + if (add) { + /* Validate current device switch mode, change if necessary */ + ret = i40e_validate_and_set_switch_mode(vsi); + if (ret) { + dev_err(&pf->pdev->dev, + "failed to set switch mode, ret %d\n", + ret); + return ret; + } + + ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid, + &cld_filter, 1); + } else { + ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid, + &cld_filter, 1); + } + + if (ret) + dev_dbg(&pf->pdev->dev, + "Failed to %s cloud filter(big buffer) err %d aq_err %d\n", + add ? "add" : "delete", ret, pf->hw.aq.asq_last_status); + else + dev_info(&pf->pdev->dev, + "%s cloud filter for VSI: %d, L4 port: %d\n", + add ? "add" : "delete", filter->seid, + ntohs(filter->dst_port)); return ret; } -static int __i40e_setup_tc(struct net_device *netdev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +/** + * i40e_parse_cls_flower - Parse tc flower filters provided by kernel + * @vsi: Pointer to VSI + * @f: Pointer to struct flow_cls_offload + * @filter: Pointer to cloud filter structure + * + **/ +static int i40e_parse_cls_flower(struct i40e_vsi *vsi, + struct flow_cls_offload *f, + struct i40e_cloud_filter *filter) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; + u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; + struct i40e_pf *pf = vsi->back; + u8 field_flags = 0; + + if (dissector->used_keys & + ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | + BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID))) { + dev_err(&pf->pdev->dev, "Unsupported key used: 0x%llx\n", + dissector->used_keys); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + + flow_rule_match_enc_keyid(rule, &match); + if (match.mask->keyid != 0) + field_flags |= I40E_CLOUD_FIELD_TEN_ID; + + filter->tenant_id = be32_to_cpu(match.key->keyid); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + filter->n_proto = n_proto_key & n_proto_mask; + filter->ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + + /* use is_broadcast and is_zero to check for all 0xf or 0 */ + if (!is_zero_ether_addr(match.mask->dst)) { + if (is_broadcast_ether_addr(match.mask->dst)) { + field_flags |= I40E_CLOUD_FIELD_OMAC; + } else { + dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n", + match.mask->dst); + return -EIO; + } + } + + if (!is_zero_ether_addr(match.mask->src)) { + if (is_broadcast_ether_addr(match.mask->src)) { + field_flags |= I40E_CLOUD_FIELD_IMAC; + } else { + dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n", + match.mask->src); + return -EIO; + } + } + ether_addr_copy(filter->dst_mac, match.key->dst); + ether_addr_copy(filter->src_mac, match.key->src); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id) { + if (match.mask->vlan_id == VLAN_VID_MASK) { + field_flags |= I40E_CLOUD_FIELD_IVLAN; + + } else { + dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n", + match.mask->vlan_id); + return -EIO; + } + } + + filter->vlan_id = cpu_to_be16(match.key->vlan_id); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + f->common.extack)) + return -EOPNOTSUPP; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be32(0xffffffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n", + &match.mask->dst); + return -EIO; + } + } + + if (match.mask->src) { + if (match.mask->src == cpu_to_be32(0xffffffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n", + &match.mask->src); + return -EIO; + } + } + + if (field_flags & I40E_CLOUD_FIELD_TEN_ID) { + dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n"); + return -EIO; + } + filter->dst_ipv4 = match.key->dst; + filter->src_ipv4 = match.key->src; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + + /* src and dest IPV6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1), which can be represented as ::1 + */ + if (ipv6_addr_loopback(&match.key->dst) || + ipv6_addr_loopback(&match.key->src)) { + dev_err(&pf->pdev->dev, + "Bad ipv6, addr is LOOPBACK\n"); + return -EIO; + } + if (!ipv6_addr_any(&match.mask->dst) || + !ipv6_addr_any(&match.mask->src)) + field_flags |= I40E_CLOUD_FIELD_IIP; + + memcpy(&filter->src_ipv6, &match.key->src.s6_addr32, + sizeof(filter->src_ipv6)); + memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32, + sizeof(filter->dst_ipv6)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + if (match.mask->src) { + if (match.mask->src == cpu_to_be16(0xffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n", + be16_to_cpu(match.mask->src)); + return -EIO; + } + } + + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be16(0xffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n", + be16_to_cpu(match.mask->dst)); + return -EIO; + } + } + + filter->dst_port = match.key->dst; + filter->src_port = match.key->src; + + switch (filter->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + dev_err(&pf->pdev->dev, + "Only UDP and TCP transport are supported\n"); + return -EINVAL; + } + } + filter->flags = field_flags; + return 0; +} + +/** + * i40e_handle_tclass: Forward to a traffic class on the device + * @vsi: Pointer to VSI + * @tc: traffic class index on the device + * @filter: Pointer to cloud filter structure + * + **/ +static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc, + struct i40e_cloud_filter *filter) +{ + struct i40e_channel *ch, *ch_tmp; + + /* direct to a traffic class on the same device */ + if (tc == 0) { + filter->seid = vsi->seid; + return 0; + } else if (vsi->tc_config.enabled_tc & BIT(tc)) { + if (!filter->dst_port) { + dev_err(&vsi->back->pdev->dev, + "Specify destination port to direct to traffic class that is not default\n"); + return -EINVAL; + } + if (list_empty(&vsi->ch_list)) + return -EINVAL; + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, + list) { + if (ch->seid == vsi->tc_seid_map[tc]) + filter->seid = ch->seid; + } + return 0; + } + dev_err(&vsi->back->pdev->dev, "TC is not enabled\n"); + return -EINVAL; +} + +/** + * i40e_configure_clsflower - Configure tc flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct flow_cls_offload + * + **/ +static int i40e_configure_clsflower(struct i40e_vsi *vsi, + struct flow_cls_offload *cls_flower) +{ + int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + struct i40e_cloud_filter *filter = NULL; + struct i40e_pf *pf = vsi->back; + int err = 0; + + if (tc < 0) { + dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); + return -EOPNOTSUPP; + } + + if (!tc) { + dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination"); + return -EINVAL; + } + + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) + return -EBUSY; + + if (pf->fdir_pf_active_filters || + (!hlist_empty(&pf->fdir_filter_list))) { + dev_err(&vsi->back->pdev->dev, + "Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n"); + return -EINVAL; + } + + if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags)) { + dev_err(&vsi->back->pdev->dev, + "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n"); + clear_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags); + clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, vsi->back->flags); + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->cookie = cls_flower->cookie; + + err = i40e_parse_cls_flower(vsi, cls_flower, filter); + if (err < 0) + goto err; + + err = i40e_handle_tclass(vsi, tc, filter); + if (err < 0) + goto err; + + /* Add cloud filter */ + if (filter->dst_port) + err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true); + else + err = i40e_add_del_cloud_filter(vsi, filter, true); + + if (err) { + dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n", + err); + goto err; + } + + /* add filter to the ordered list */ + INIT_HLIST_NODE(&filter->cloud_node); + + hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list); + + pf->num_cloud_filters++; + + return err; +err: + kfree(filter); + return err; +} + +/** + * i40e_find_cloud_filter - Find the could filter in the list + * @vsi: Pointer to VSI + * @cookie: filter specific cookie + * + **/ +static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi, + unsigned long *cookie) { - if (tc->type != TC_SETUP_MQPRIO) + struct i40e_cloud_filter *filter = NULL; + struct hlist_node *node2; + + hlist_for_each_entry_safe(filter, node2, + &vsi->back->cloud_filter_list, cloud_node) + if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) + return filter; + return NULL; +} + +/** + * i40e_delete_clsflower - Remove tc flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct flow_cls_offload + * + **/ +static int i40e_delete_clsflower(struct i40e_vsi *vsi, + struct flow_cls_offload *cls_flower) +{ + struct i40e_cloud_filter *filter = NULL; + struct i40e_pf *pf = vsi->back; + int err = 0; + + filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie); + + if (!filter) return -EINVAL; - tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + hash_del(&filter->cloud_node); + + if (filter->dst_port) + err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false); + else + err = i40e_add_del_cloud_filter(vsi, filter, false); - return i40e_setup_tc(netdev, tc->mqprio->num_tc); + kfree(filter); + if (err) { + dev_err(&pf->pdev->dev, + "Failed to delete cloud filter, err %pe\n", + ERR_PTR(err)); + return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); + } + + pf->num_cloud_filters--; + if (!pf->num_cloud_filters) + if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) && + !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) { + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags); + clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); + } + return 0; +} + +/** + * i40e_setup_tc_cls_flower - flower classifier offloads + * @np: net device to configure + * @cls_flower: offload data + **/ +static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, + struct flow_cls_offload *cls_flower) +{ + struct i40e_vsi *vsi = np->vsi; + + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return i40e_configure_clsflower(vsi, cls_flower); + case FLOW_CLS_DESTROY: + return i40e_delete_clsflower(vsi, cls_flower); + case FLOW_CLS_STATS: + return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } +} + +static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct i40e_netdev_priv *np = cb_priv; + + if (!tc_cls_can_offload_and_chain0(np->vsi->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return i40e_setup_tc_cls_flower(np, type_data); + + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(i40e_block_cb_list); + +static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + + switch (type) { + case TC_SETUP_QDISC_MQPRIO: + return i40e_setup_tc(netdev, type_data); + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &i40e_block_cb_list, + i40e_setup_tc_block_cb, + np, np, true); + default: + return -EOPNOTSUPP; + } } /** @@ -5694,6 +9015,9 @@ int i40e_open(struct net_device *netdev) netif_carrier_off(netdev); + if (i40e_force_link_state(pf, true)) + return -EAGAIN; + err = i40e_vsi_open(vsi); if (err) return err; @@ -5705,13 +9029,33 @@ int i40e_open(struct net_device *netdev) TCP_FLAG_FIN | TCP_FLAG_CWR) >> 16); wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); - udp_tunnel_get_rx_info(netdev); return 0; } /** + * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues + * @vsi: vsi structure + * + * This updates netdev's number of tx/rx queues + * + * Returns status of setting tx/rx queues + **/ +static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi) +{ + int ret; + + ret = netif_set_real_num_rx_queues(vsi->netdev, + vsi->num_queue_pairs); + if (ret) + return ret; + + return netif_set_real_num_tx_queues(vsi->netdev, + vsi->num_queue_pairs); +} + +/** * i40e_vsi_open - * @vsi: the VSI to open * @@ -5747,13 +9091,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi) goto err_setup_rx; /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(vsi->netdev, - vsi->num_queue_pairs); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(vsi->netdev, - vsi->num_queue_pairs); + err = i40e_netif_set_realnum_tx_rx_queues(vsi); if (err) goto err_set_queues; @@ -5762,6 +9100,8 @@ int i40e_vsi_open(struct i40e_vsi *vsi) dev_driver_string(&pf->pdev->dev), dev_name(&pf->pdev->dev)); err = i40e_vsi_request_irq(vsi, int_name); + if (err) + goto err_setup_rx; } else { err = -EINVAL; @@ -5782,8 +9122,8 @@ err_setup_rx: i40e_vsi_free_rx_resources(vsi); err_setup_tx: i40e_vsi_free_tx_resources(vsi); - if (vsi == pf->vsi[pf->lan_vsi]) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); + if (vsi->type == I40E_VSI_MAIN) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); return err; } @@ -5820,32 +9160,81 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf) INIT_LIST_HEAD(&pf->l4_flex_pit_list); pf->fdir_pf_active_filters = 0; - pf->fd_tcp4_filter_cnt = 0; - pf->fd_udp4_filter_cnt = 0; - pf->fd_sctp4_filter_cnt = 0; - pf->fd_ip4_filter_cnt = 0; + i40e_reset_fdir_filter_cnt(pf); /* Reprogram the default input set for TCP/IPv4 */ - i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP, + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for TCP/IPv6 */ + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_TCP, + I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for UDP/IPv4 */ - i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP, + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_UDP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for UDP/IPv6 */ + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_UDP, + I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for SCTP/IPv4 */ - i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP, + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_SCTP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for SCTP/IPv6 */ + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_SCTP, + I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for Other/IPv4 */ - i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER, + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_OTHER, + I40E_L3_SRC_MASK | I40E_L3_DST_MASK); + + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_FRAG_IPV4, + I40E_L3_SRC_MASK | I40E_L3_DST_MASK); + + /* Reprogram the default input set for Other/IPv6 */ + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_OTHER, + I40E_L3_SRC_MASK | I40E_L3_DST_MASK); + + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_FRAG_IPV6, I40E_L3_SRC_MASK | I40E_L3_DST_MASK); } /** + * i40e_cloud_filter_exit - Cleans up the cloud filters + * @pf: Pointer to PF + * + * This function destroys the hlist where all the cloud filters + * were saved. + **/ +static void i40e_cloud_filter_exit(struct i40e_pf *pf) +{ + struct i40e_cloud_filter *cfilter; + struct hlist_node *node; + + hlist_for_each_entry_safe(cfilter, node, + &pf->cloud_filter_list, cloud_node) { + hlist_del(&cfilter->cloud_node); + kfree(cfilter); + } + pf->num_cloud_filters = 0; + + if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) && + !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) { + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags); + clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); + } +} + +/** * i40e_close - Disables a network interface * @netdev: network interface device structure * @@ -5878,10 +9267,9 @@ int i40e_close(struct net_device *netdev) **/ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) { + struct i40e_vsi *vsi; u32 val; - - WARN_ON(in_interrupt()); - + int i; /* do the biggest reset indicated */ if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) { @@ -5911,7 +9299,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) wr32(&pf->hw, I40E_GLGEN_RTRIG, val); i40e_flush(&pf->hw); - } else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) { + } else if (reset_flags & I40E_PF_RESET_FLAG) { /* Request a PF Reset * @@ -5924,30 +9312,33 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) dev_dbg(&pf->pdev->dev, "PFR requested\n"); i40e_handle_reset_warning(pf, lock_acquired); - } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { - int v; + } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { + /* Request a PF Reset + * + * Resets PF and reinitializes PFs VSI. + */ + i40e_prep_for_reset(pf); + i40e_reset_and_rebuild(pf, true, lock_acquired); + dev_info(&pf->pdev->dev, + test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ? + "FW LLDP is disabled\n" : + "FW LLDP is enabled\n"); + } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { /* Find the VSI(s) that requested a re-init */ - dev_info(&pf->pdev->dev, - "VSI reinit requested\n"); - for (v = 0; v < pf->num_alloc_vsi; v++) { - struct i40e_vsi *vsi = pf->vsi[v]; + dev_info(&pf->pdev->dev, "VSI reinit requested\n"); - if (vsi != NULL && - test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED, + i40e_pf_for_each_vsi(pf, i, vsi) { + if (test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED, vsi->state)) - i40e_vsi_reinit_locked(pf->vsi[v]); + i40e_vsi_reinit_locked(vsi); } } else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) { - int v; - /* Find the VSI(s) that needs to be brought down */ dev_info(&pf->pdev->dev, "VSI down requested\n"); - for (v = 0; v < pf->num_alloc_vsi; v++) { - struct i40e_vsi *vsi = pf->vsi[v]; - if (vsi != NULL && - test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED, + i40e_pf_for_each_vsi(pf, i, vsi) { + if (test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state)) { set_bit(__I40E_VSI_DOWN, vsi->state); i40e_down(vsi); @@ -6023,16 +9414,23 @@ bool i40e_dcb_need_reconfig(struct i40e_pf *pf, static int i40e_handle_lldp_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { - struct i40e_aqc_lldp_get_mib *mib = - (struct i40e_aqc_lldp_get_mib *)&e->desc.params.raw; + struct i40e_aqc_lldp_get_mib *mib = libie_aq_raw(&e->desc); struct i40e_hw *hw = &pf->hw; struct i40e_dcbx_config tmp_dcbx_cfg; bool need_reconfig = false; int ret = 0; u8 type; + /* X710-T*L 2.5G and 5G speeds don't support DCB */ + if (I40E_IS_X710TL_DEVICE(hw->device_id) && + (hw->phy.link_info.link_speed & + ~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) && + !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) + /* let firmware decide if the DCB should be disabled */ + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + /* Not DCB capable or capability disabled */ - if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) return ret; /* Ignore if event is not for Nearest Bridge */ @@ -6062,10 +9460,19 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, /* Get updated DCBX data from firmware */ ret = i40e_get_dcb_config(&pf->hw); if (ret) { - dev_info(&pf->pdev->dev, - "Failed querying DCB configuration data from firmware, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + /* X710-T*L 2.5G and 5G speeds don't support DCB */ + if (I40E_IS_X710TL_DEVICE(hw->device_id) && + (hw->phy.link_info.link_speed & + (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) { + dev_warn(&pf->pdev->dev, + "DCB is not supported for X710-T*L 2.5/5G speeds\n"); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + } else { + dev_info(&pf->pdev->dev, + "Failed querying DCB configuration data from firmware, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); + } goto exit; } @@ -6086,9 +9493,9 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, /* Enable DCB tagging only when more than one TC */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1) - pf->flags |= I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_ENA, pf->flags); else - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */ @@ -6112,8 +9519,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, i40e_service_event_schedule(pf); } else { i40e_pf_unquiesce_all_vsi(pf); - pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED | - I40E_FLAG_CLIENT_L2_CHANGE); + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); + set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } exit: @@ -6145,8 +9552,7 @@ void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags) static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { - struct i40e_aqc_lan_overflow *data = - (struct i40e_aqc_lan_overflow *)&e->desc.params.raw; + struct i40e_aqc_lan_overflow *data = libie_aq_raw(&e->desc); u32 queue = le32_to_cpu(data->prtdcb_rupto); u32 qtx_ctl = le32_to_cpu(data->otx_ctl); struct i40e_hw *hw = &pf->hw; @@ -6156,31 +9562,18 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, dev_dbg(&pf->pdev->dev, "overflow Rx Queue Number = %d QTX_CTL=0x%08x\n", queue, qtx_ctl); - /* Queue belongs to VF, find the VF and issue VF reset */ - if (((qtx_ctl & I40E_QTX_CTL_PFVF_Q_MASK) - >> I40E_QTX_CTL_PFVF_Q_SHIFT) == I40E_QTX_CTL_VF_QUEUE) { - vf_id = (u16)((qtx_ctl & I40E_QTX_CTL_VFVM_INDX_MASK) - >> I40E_QTX_CTL_VFVM_INDX_SHIFT); - vf_id -= hw->func_caps.vf_base_id; - vf = &pf->vf[vf_id]; - i40e_vc_notify_vf_reset(vf); - /* Allow VF to process pending reset notification */ - msleep(20); - i40e_reset_vf(vf, false); - } -} - -/** - * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters - * @pf: board private structure - **/ -u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf) -{ - u32 val, fcnt_prog; + if (FIELD_GET(I40E_QTX_CTL_PFVF_Q_MASK, qtx_ctl) != + I40E_QTX_CTL_VF_QUEUE) + return; - val = rd32(&pf->hw, I40E_PFQF_FDSTAT); - fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK); - return fcnt_prog; + /* Queue belongs to VF, find the VF and issue VF reset */ + vf_id = FIELD_GET(I40E_QTX_CTL_VFVM_INDX_MASK, qtx_ctl); + vf_id -= hw->func_caps.vf_base_id; + vf = &pf->vf[vf_id]; + i40e_vc_notify_vf_reset(vf); + /* Allow VF to process pending reset notification */ + msleep(20); + i40e_reset_vf(vf, false); } /** @@ -6193,8 +9586,7 @@ u32 i40e_get_current_fd_count(struct i40e_pf *pf) val = rd32(&pf->hw, I40E_PFQF_FDSTAT); fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) + - ((val & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >> - I40E_PFQF_FDSTAT_BEST_CNT_SHIFT); + FIELD_GET(I40E_PFQF_FDSTAT_BEST_CNT_MASK, val); return fcnt_prog; } @@ -6208,12 +9600,115 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf) val = rd32(&pf->hw, I40E_GLQF_FDCNT_0); fcnt_prog = (val & I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK) + - ((val & I40E_GLQF_FDCNT_0_BESTCNT_MASK) >> - I40E_GLQF_FDCNT_0_BESTCNT_SHIFT); + FIELD_GET(I40E_GLQF_FDCNT_0_BESTCNT_MASK, val); return fcnt_prog; } /** + * i40e_reenable_fdir_sb - Restore FDir SB capability + * @pf: board private structure + **/ +static void i40e_reenable_fdir_sb(struct i40e_pf *pf) +{ + if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && + (I40E_DEBUG_FD & pf->hw.debug_mask)) + dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); +} + +/** + * i40e_reenable_fdir_atr - Restore FDir ATR capability + * @pf: board private structure + **/ +static void i40e_reenable_fdir_atr(struct i40e_pf *pf) +{ + if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) { + /* ATR uses the same filtering logic as SB rules. It only + * functions properly if the input set mask is at the default + * settings. It is safe to restore the default input set + * because there are no active TCPv4 filter rules. + */ + i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP, + I40E_L3_SRC_MASK | I40E_L3_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && + (I40E_DEBUG_FD & pf->hw.debug_mask)) + dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); + } +} + +/** + * i40e_delete_invalid_filter - Delete an invalid FDIR filter + * @pf: board private structure + * @filter: FDir filter to remove + */ +static void i40e_delete_invalid_filter(struct i40e_pf *pf, + struct i40e_fdir_filter *filter) +{ + /* Update counters */ + pf->fdir_pf_active_filters--; + pf->fd_inv = 0; + + switch (filter->flow_type) { + case TCP_V4_FLOW: + pf->fd_tcp4_filter_cnt--; + break; + case UDP_V4_FLOW: + pf->fd_udp4_filter_cnt--; + break; + case SCTP_V4_FLOW: + pf->fd_sctp4_filter_cnt--; + break; + case TCP_V6_FLOW: + pf->fd_tcp6_filter_cnt--; + break; + case UDP_V6_FLOW: + pf->fd_udp6_filter_cnt--; + break; + case SCTP_V6_FLOW: + pf->fd_udp6_filter_cnt--; + break; + case IP_USER_FLOW: + switch (filter->ipl4_proto) { + case IPPROTO_TCP: + pf->fd_tcp4_filter_cnt--; + break; + case IPPROTO_UDP: + pf->fd_udp4_filter_cnt--; + break; + case IPPROTO_SCTP: + pf->fd_sctp4_filter_cnt--; + break; + case IPPROTO_IP: + pf->fd_ip4_filter_cnt--; + break; + } + break; + case IPV6_USER_FLOW: + switch (filter->ipl4_proto) { + case IPPROTO_TCP: + pf->fd_tcp6_filter_cnt--; + break; + case IPPROTO_UDP: + pf->fd_udp6_filter_cnt--; + break; + case IPPROTO_SCTP: + pf->fd_sctp6_filter_cnt--; + break; + case IPPROTO_IP: + pf->fd_ip6_filter_cnt--; + break; + } + break; + } + + /* Remove the filter from the list and free memory */ + hlist_del(&filter->fdir_node); + kfree(filter); +} + +/** * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled * @pf: board private structure **/ @@ -6231,39 +9726,23 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) fcnt_avail = pf->fdir_pf_filter_count; if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) || (pf->fd_add_err == 0) || - (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) { - if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) { - pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED; - if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && - (I40E_DEBUG_FD & pf->hw.debug_mask)) - dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); - } - } + (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) + i40e_reenable_fdir_sb(pf); /* We should wait for even more space before re-enabling ATR. * Additionally, we cannot enable ATR as long as we still have TCP SB * rules active. */ if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) && - (pf->fd_tcp4_filter_cnt == 0)) { - if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) { - pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (I40E_DEBUG_FD & pf->hw.debug_mask)) - dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); - } - } + pf->fd_tcp4_filter_cnt == 0 && pf->fd_tcp6_filter_cnt == 0) + i40e_reenable_fdir_atr(pf); /* if hw had a problem adding a filter, delete it */ if (pf->fd_inv > 0) { hlist_for_each_entry_safe(filter, node, - &pf->fdir_filter_list, fdir_node) { - if (filter->fd_id == pf->fd_inv) { - hlist_del(&filter->fdir_node); - kfree(filter); - pf->fdir_pf_active_filters--; - } - } + &pf->fdir_filter_list, fdir_node) + if (filter->fd_id == pf->fd_inv) + i40e_delete_invalid_filter(pf, filter); } } @@ -6300,7 +9779,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } pf->fd_flush_timestamp = jiffies; - pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED; + set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); /* flush all filters */ wr32(&pf->hw, I40E_PFQF_CTL_1, I40E_PFQF_CTL_1_CLEARFDTABLE_MASK); @@ -6318,9 +9797,9 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n"); } else { /* replay sideband filters */ - i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]); + i40e_fdir_filter_restore(i40e_pf_get_main_vsi(pf)); if (!disable_atr && !pf->fd_tcp4_filter_cnt) - pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; + clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n"); @@ -6328,7 +9807,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } /** - * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed + * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed * @pf: board private structure **/ u32 i40e_get_current_atr_cnt(struct i40e_pf *pf) @@ -6336,13 +9815,6 @@ u32 i40e_get_current_atr_cnt(struct i40e_pf *pf) return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters; } -/* We can see up to 256 filter programming desc in transit if the filters are - * being applied really fast; before we see the first - * filter miss error on Rx queue 0. Accumulating enough error messages before - * reacting will make sure we don't cause flush too often. - */ -#define I40E_MAX_FD_PROGRAM_ERROR 256 - /** * i40e_fdir_reinit_subtask - Worker thread to reinit FDIR filter table * @pf: board private structure @@ -6403,6 +9875,7 @@ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up) **/ static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up) { + struct i40e_vsi *vsi; struct i40e_pf *pf; int i; @@ -6410,15 +9883,10 @@ static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up) return; pf = veb->pf; - /* depth first... */ - for (i = 0; i < I40E_MAX_VEB; i++) - if (pf->veb[i] && (pf->veb[i]->uplink_seid == veb->seid)) - i40e_veb_link_event(pf->veb[i], link_up); - - /* ... now the local VSIs */ - for (i = 0; i < pf->num_alloc_vsi; i++) - if (pf->vsi[i] && (pf->vsi[i]->uplink_seid == veb->seid)) - i40e_vsi_link_event(pf->vsi[i], link_up); + /* Send link event to contained VSIs */ + i40e_pf_for_each_vsi(pf, i, vsi) + if (vsi->uplink_seid == veb->seid) + i40e_vsi_link_event(vsi, link_up); } /** @@ -6427,30 +9895,28 @@ static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up) **/ static void i40e_link_event(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + struct i40e_veb *veb = i40e_pf_get_main_veb(pf); u8 new_link_speed, old_link_speed; - i40e_status status; bool new_link, old_link; - - /* save off old link status information */ - pf->hw.phy.link_info_old = pf->hw.phy.link_info; + int status; +#ifdef CONFIG_I40E_DCB + int err; +#endif /* CONFIG_I40E_DCB */ /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; - old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP); - status = i40e_get_link_status(&pf->hw, &new_link); /* On success, disable temp link polling */ - if (status == I40E_SUCCESS) { - if (pf->flags & I40E_FLAG_TEMP_LINK_POLLING) - pf->flags &= ~I40E_FLAG_TEMP_LINK_POLLING; + if (status == 0) { + clear_bit(__I40E_TEMP_LINK_POLLING, pf->state); } else { /* Enable link polling temporarily until i40e_get_link_status - * returns I40E_SUCCESS + * returns 0 */ - pf->flags |= I40E_FLAG_TEMP_LINK_POLLING; + set_bit(__I40E_TEMP_LINK_POLLING, pf->state); dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n", status); return; @@ -6465,22 +9931,49 @@ static void i40e_link_event(struct i40e_pf *pf) new_link == netif_carrier_ok(vsi->netdev))) return; - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - i40e_print_link_message(vsi, new_link); + if (!new_link && old_link) + pf->link_down_events++; + + i40e_print_link_message(vsi, new_link); /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. */ - if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb]) - i40e_veb_link_event(pf->veb[pf->lan_veb], new_link); + if (veb) + i40e_veb_link_event(veb, new_link); else i40e_vsi_link_event(vsi, new_link); if (pf->vf) i40e_vc_notify_link_state(pf); - if (pf->flags & I40E_FLAG_PTP) + if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) i40e_ptp_set_increment(pf); +#ifdef CONFIG_I40E_DCB + if (new_link == old_link) + return; + /* Not SW DCB so firmware will take care of default settings */ + if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) + return; + + /* We cover here only link down, as after link up in case of SW DCB + * SW LLDP agent will take care of setting it up + */ + if (!new_link) { + dev_dbg(&pf->pdev->dev, "Reconfig DCB to single TC as result of Link Down\n"); + memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg)); + err = i40e_dcb_sw_default_config(pf); + if (err) { + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + } else { + pf->dcbx_cap = DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + } + } +#endif /* CONFIG_I40E_DCB */ } /** @@ -6489,6 +9982,8 @@ static void i40e_link_event(struct i40e_pf *pf) **/ static void i40e_watchdog_subtask(struct i40e_pf *pf) { + struct i40e_vsi *vsi; + struct i40e_veb *veb; int i; /* if interface is down do nothing */ @@ -6502,22 +9997,21 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf) return; pf->service_timer_previous = jiffies; - if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) || - (pf->flags & I40E_FLAG_TEMP_LINK_POLLING)) + if (test_bit(I40E_FLAG_LINK_POLLING_ENA, pf->flags) || + test_bit(__I40E_TEMP_LINK_POLLING, pf->state)) i40e_link_event(pf); /* Update the stats for active netdevs so the network stack * can look at updated numbers whenever it cares to */ - for (i = 0; i < pf->num_alloc_vsi; i++) - if (pf->vsi[i] && pf->vsi[i]->netdev) - i40e_update_stats(pf->vsi[i]); + i40e_pf_for_each_vsi(pf, i, vsi) + if (vsi->netdev) + i40e_update_stats(vsi); - if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) { + if (test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)) { /* Update the stats for the active switching components */ - for (i = 0; i < I40E_MAX_VEB; i++) - if (pf->veb[i]) - i40e_update_veb_stats(pf->veb[i]); + i40e_pf_for_each_veb(pf, i, veb) + i40e_update_veb_stats(veb); } i40e_ptp_rx_hang(pf); @@ -6557,7 +10051,7 @@ static void i40e_reset_subtask(struct i40e_pf *pf) * precedence before starting a new reset sequence. */ if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) { - i40e_prep_for_reset(pf, false); + i40e_prep_for_reset(pf); i40e_reset(pf); i40e_rebuild(pf, false, false); } @@ -6578,8 +10072,7 @@ static void i40e_reset_subtask(struct i40e_pf *pf) static void i40e_handle_link_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { - struct i40e_aqc_get_link_status *status = - (struct i40e_aqc_get_link_status *)&e->desc.params.raw; + struct i40e_aqc_get_link_status *status = libie_aq_raw(&e->desc); /* Do a new status request to re-enable LSE reporting * and load new status information into the hw struct @@ -6589,12 +10082,26 @@ static void i40e_handle_link_event(struct i40e_pf *pf, */ i40e_link_event(pf); - /* check for unqualified module, if link is down */ - if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && - (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && - (!(status->link_info & I40E_AQ_LINK_UP))) + /* Check if module meets thermal requirements */ + if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) { + dev_err(&pf->pdev->dev, + "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n"); dev_err(&pf->pdev->dev, - "The driver failed to link because an unqualified module was detected.\n"); + "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + } else { + /* check for unqualified module, if link is down, suppress + * the message if link was forced to be down. + */ + if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && + (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && + (!(status->link_info & I40E_AQ_LINK_UP)) && + (!test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))) { + dev_err(&pf->pdev->dev, + "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n"); + dev_err(&pf->pdev->dev, + "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + } + } } /** @@ -6606,9 +10113,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) struct i40e_arq_event_info event; struct i40e_hw *hw = &pf->hw; u16 pending, i = 0; - i40e_status ret; u16 opcode; u32 oldval; + int ret; u32 val; /* Do not run clean AQ when PF reset fails */ @@ -6616,7 +10123,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) return; /* check for error indications */ - val = rd32(&pf->hw, pf->hw.aq.arq.len); + val = rd32(&pf->hw, I40E_PF_ARQLEN); oldval = val; if (val & I40E_PF_ARQLEN_ARQVFE_MASK) { if (hw->debug_mask & I40E_DEBUG_AQ) @@ -6635,9 +10142,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK; } if (oldval != val) - wr32(&pf->hw, pf->hw.aq.arq.len, val); + wr32(&pf->hw, I40E_PF_ARQLEN, val); - val = rd32(&pf->hw, pf->hw.aq.asq.len); + val = rd32(&pf->hw, I40E_PF_ATQLEN); oldval = val; if (val & I40E_PF_ATQLEN_ATQVFE_MASK) { if (pf->hw.debug_mask & I40E_DEBUG_AQ) @@ -6655,7 +10162,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK; } if (oldval != val) - wr32(&pf->hw, pf->hw.aq.asq.len, val); + wr32(&pf->hw, I40E_PF_ATQLEN, val); event.buf_len = I40E_MAX_AQ_BUF_SIZE; event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); @@ -6664,7 +10171,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) do { ret = i40e_clean_arq_element(hw, &event, &pending); - if (ret == I40E_ERR_ADMIN_QUEUE_NO_WORK) + if (ret == -EALREADY) break; else if (ret) { dev_info(&pf->pdev->dev, "ARQ event error %d\n", ret); @@ -6675,7 +10182,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) switch (opcode) { case i40e_aqc_opc_get_link_status: + rtnl_lock(); i40e_handle_link_event(pf, &event); + rtnl_unlock(); break; case i40e_aqc_opc_send_msg_to_pf: ret = i40e_vc_process_vf_msg(pf, @@ -6689,7 +10198,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) dev_dbg(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n"); #ifdef CONFIG_I40E_DCB rtnl_lock(); - ret = i40e_handle_lldp_event(pf, &event); + i40e_handle_lldp_event(pf, &event); rtnl_unlock(); #endif /* CONFIG_I40E_DCB */ break; @@ -6713,9 +10222,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) opcode); break; } - } while (i++ < pf->adminq_work_limit); + } while (i++ < I40E_AQ_WORK_LIMIT); - if (i < pf->adminq_work_limit) + if (i < I40E_AQ_WORK_LIMIT) clear_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state); /* re-enable Admin queue interrupt cause */ @@ -6760,7 +10269,7 @@ static void i40e_verify_eeprom(struct i40e_pf *pf) **/ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret; @@ -6770,9 +10279,8 @@ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, - "couldn't get PF vsi config, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't get PF vsi config, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return; } ctxt.flags = I40E_AQ_VSI_TYPE_PF; @@ -6782,9 +10290,8 @@ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, - "update vsi switch failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "update vsi switch failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); } } @@ -6796,7 +10303,7 @@ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) **/ static void i40e_disable_pf_switch_lb(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret; @@ -6806,9 +10313,8 @@ static void i40e_disable_pf_switch_lb(struct i40e_pf *pf) ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, - "couldn't get PF vsi config, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't get PF vsi config, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return; } ctxt.flags = I40E_AQ_VSI_TYPE_PF; @@ -6818,9 +10324,8 @@ static void i40e_disable_pf_switch_lb(struct i40e_pf *pf) ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, - "update vsi switch failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "update vsi switch failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); } } @@ -6846,104 +10351,101 @@ static void i40e_config_bridge_mode(struct i40e_veb *veb) } /** - * i40e_reconstitute_veb - rebuild the VEB and anything connected to it + * i40e_reconstitute_veb - rebuild the VEB and VSIs connected to it * @veb: pointer to the VEB instance * - * This is a recursive function that first builds the attached VSIs then - * recurses in to build the next layer of VEB. We track the connections - * through our own index numbers because the seid's from the HW could - * change across the reset. + * This is a function that builds the attached VSIs. We track the connections + * through our own index numbers because the seid's from the HW could change + * across the reset. **/ static int i40e_reconstitute_veb(struct i40e_veb *veb) { struct i40e_vsi *ctl_vsi = NULL; struct i40e_pf *pf = veb->pf; - int v, veb_idx; - int ret; + struct i40e_vsi *vsi; + int v, ret; - /* build VSI that owns this VEB, temporarily attached to base VEB */ - for (v = 0; v < pf->num_alloc_vsi && !ctl_vsi; v++) { - if (pf->vsi[v] && - pf->vsi[v]->veb_idx == veb->idx && - pf->vsi[v]->flags & I40E_VSI_FLAG_VEB_OWNER) { - ctl_vsi = pf->vsi[v]; - break; - } - } - if (!ctl_vsi) { - dev_info(&pf->pdev->dev, - "missing owner VSI for veb_idx %d\n", veb->idx); - ret = -ENOENT; - goto end_reconstitute; + /* As we do not maintain PV (port virtualizer) switch element then + * there can be only one non-floating VEB that have uplink to MAC SEID + * and its control VSI is the main one. + */ + if (WARN_ON(veb->uplink_seid && veb->uplink_seid != pf->mac_seid)) { + dev_err(&pf->pdev->dev, + "Invalid uplink SEID for VEB %d\n", veb->idx); + return -ENOENT; } - if (ctl_vsi != pf->vsi[pf->lan_vsi]) - ctl_vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid; - ret = i40e_add_vsi(ctl_vsi); - if (ret) { - dev_info(&pf->pdev->dev, - "rebuild of veb_idx %d owner VSI failed: %d\n", - veb->idx, ret); - goto end_reconstitute; + + if (veb->uplink_seid == pf->mac_seid) { + /* Check that the LAN VSI has VEB owning flag set */ + ctl_vsi = i40e_pf_get_main_vsi(pf); + + if (WARN_ON(ctl_vsi->veb_idx != veb->idx || + !(ctl_vsi->flags & I40E_VSI_FLAG_VEB_OWNER))) { + dev_err(&pf->pdev->dev, + "Invalid control VSI for VEB %d\n", veb->idx); + return -ENOENT; + } + + /* Add the control VSI to switch */ + ret = i40e_add_vsi(ctl_vsi); + if (ret) { + dev_err(&pf->pdev->dev, + "Rebuild of owner VSI for VEB %d failed: %d\n", + veb->idx, ret); + return ret; + } + + i40e_vsi_reset_stats(ctl_vsi); } - i40e_vsi_reset_stats(ctl_vsi); /* create the VEB in the switch and move the VSI onto the VEB */ ret = i40e_add_veb(veb, ctl_vsi); if (ret) - goto end_reconstitute; + return ret; - if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) - veb->bridge_mode = BRIDGE_MODE_VEB; - else - veb->bridge_mode = BRIDGE_MODE_VEPA; - i40e_config_bridge_mode(veb); + if (veb->uplink_seid) { + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) + veb->bridge_mode = BRIDGE_MODE_VEB; + else + veb->bridge_mode = BRIDGE_MODE_VEPA; + i40e_config_bridge_mode(veb); + } /* create the remaining VSIs attached to this VEB */ - for (v = 0; v < pf->num_alloc_vsi; v++) { - if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi) + i40e_pf_for_each_vsi(pf, v, vsi) { + if (vsi == ctl_vsi) continue; - if (pf->vsi[v]->veb_idx == veb->idx) { - struct i40e_vsi *vsi = pf->vsi[v]; - + if (vsi->veb_idx == veb->idx) { vsi->uplink_seid = veb->seid; ret = i40e_add_vsi(vsi); if (ret) { dev_info(&pf->pdev->dev, "rebuild of vsi_idx %d failed: %d\n", v, ret); - goto end_reconstitute; + return ret; } i40e_vsi_reset_stats(vsi); } } - /* create any VEBs attached to this VEB - RECURSION */ - for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) { - if (pf->veb[veb_idx] && pf->veb[veb_idx]->veb_idx == veb->idx) { - pf->veb[veb_idx]->uplink_seid = veb->seid; - ret = i40e_reconstitute_veb(pf->veb[veb_idx]); - if (ret) - break; - } - } - -end_reconstitute: return ret; } /** * i40e_get_capabilities - get info about the HW * @pf: the PF struct + * @list_type: AQ capability to be queried **/ -static int i40e_get_capabilities(struct i40e_pf *pf) +static int i40e_get_capabilities(struct i40e_pf *pf, + enum i40e_admin_queue_opc list_type) { - struct i40e_aqc_list_capabilities_element_resp *cap_buf; + struct libie_aqc_list_caps_elem *cap_buf; u16 data_size; int buf_len; int err; - buf_len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); + buf_len = 40 * sizeof(struct libie_aqc_list_caps_elem); do { cap_buf = kzalloc(buf_len, GFP_KERNEL); if (!cap_buf) @@ -6951,45 +10453,61 @@ static int i40e_get_capabilities(struct i40e_pf *pf) /* this loads the data into the hw struct for us */ err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len, - &data_size, - i40e_aqc_opc_list_func_capabilities, - NULL); + &data_size, list_type, + NULL); /* data loaded, buffer no longer needed */ kfree(cap_buf); - if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) { + if (pf->hw.aq.asq_last_status == LIBIE_AQ_RC_ENOMEM) { /* retry with a larger buffer */ buf_len = data_size; - } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { + } else if (pf->hw.aq.asq_last_status != LIBIE_AQ_RC_OK || err) { dev_info(&pf->pdev->dev, - "capability discovery failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, err), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "capability discovery failed, err %pe aq_err %s\n", + ERR_PTR(err), + libie_aq_str(pf->hw.aq.asq_last_status)); return -ENODEV; } } while (err); - if (pf->hw.debug_mask & I40E_DEBUG_USER) - dev_info(&pf->pdev->dev, - "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n", - pf->hw.pf_id, pf->hw.func_caps.num_vfs, - pf->hw.func_caps.num_msix_vectors, - pf->hw.func_caps.num_msix_vectors_vf, - pf->hw.func_caps.fd_filters_guaranteed, - pf->hw.func_caps.fd_filters_best_effort, - pf->hw.func_caps.num_tx_qp, - pf->hw.func_caps.num_vsis); - + if (pf->hw.debug_mask & I40E_DEBUG_USER) { + if (list_type == i40e_aqc_opc_list_func_capabilities) { + dev_info(&pf->pdev->dev, + "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n", + pf->hw.pf_id, pf->hw.func_caps.num_vfs, + pf->hw.func_caps.num_msix_vectors, + pf->hw.func_caps.num_msix_vectors_vf, + pf->hw.func_caps.fd_filters_guaranteed, + pf->hw.func_caps.fd_filters_best_effort, + pf->hw.func_caps.num_tx_qp, + pf->hw.func_caps.num_vsis); + } else if (list_type == i40e_aqc_opc_list_dev_capabilities) { + dev_info(&pf->pdev->dev, + "switch_mode=0x%04x, function_valid=0x%08x\n", + pf->hw.dev_caps.switch_mode, + pf->hw.dev_caps.valid_functions); + dev_info(&pf->pdev->dev, + "SR-IOV=%d, num_vfs for all function=%u\n", + pf->hw.dev_caps.sr_iov_1_1, + pf->hw.dev_caps.num_vfs); + dev_info(&pf->pdev->dev, + "num_vsis=%u, num_rx:%u, num_tx=%u\n", + pf->hw.dev_caps.num_vsis, + pf->hw.dev_caps.num_rx_qp, + pf->hw.dev_caps.num_tx_qp); + } + } + if (list_type == i40e_aqc_opc_list_func_capabilities) { #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \ + pf->hw.func_caps.num_vfs) - if (pf->hw.revision_id == 0 && (DEF_NUM_VSI > pf->hw.func_caps.num_vsis)) { - dev_info(&pf->pdev->dev, - "got num_vsis %d, setting num_vsis to %d\n", - pf->hw.func_caps.num_vsis, DEF_NUM_VSI); - pf->hw.func_caps.num_vsis = DEF_NUM_VSI; + if (pf->hw.revision_id == 0 && + pf->hw.func_caps.num_vsis < DEF_NUM_VSI) { + dev_info(&pf->pdev->dev, + "got num_vsis %d, setting num_vsis to %d\n", + pf->hw.func_caps.num_vsis, DEF_NUM_VSI); + pf->hw.func_caps.num_vsis = DEF_NUM_VSI; + } } - return 0; } @@ -7001,7 +10519,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi); **/ static void i40e_fdir_sb_setup(struct i40e_pf *pf) { - struct i40e_vsi *vsi; + struct i40e_vsi *main_vsi, *vsi; /* quick workaround for an NVM issue that leaves a critical register * uninitialized @@ -7018,7 +10536,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]); } - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) + if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return; /* find existing VSI and see if it needs configuring */ @@ -7026,11 +10544,12 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) /* create a new VSI if none exists */ if (!vsi) { - vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR, - pf->vsi[pf->lan_vsi]->seid, 0); + main_vsi = i40e_pf_get_main_vsi(pf); + vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR, main_vsi->seid, 0); if (!vsi) { dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n"); - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); return; } } @@ -7053,17 +10572,130 @@ static void i40e_fdir_teardown(struct i40e_pf *pf) } /** + * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs + * @vsi: PF main vsi + * @seid: seid of main or channel VSIs + * + * Rebuilds cloud filters associated with main VSI and channel VSIs if they + * existed before reset + **/ +static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid) +{ + struct i40e_cloud_filter *cfilter; + struct i40e_pf *pf = vsi->back; + struct hlist_node *node; + int ret; + + /* Add cloud filters back if they exist */ + hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list, + cloud_node) { + if (cfilter->seid != seid) + continue; + + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, + true); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + + if (ret) { + dev_dbg(&pf->pdev->dev, + "Failed to rebuild cloud filter, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); + return ret; + } + } + return 0; +} + +/** + * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset + * @vsi: PF main vsi + * + * Rebuilds channel VSIs if they existed before reset + **/ +static int i40e_rebuild_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int ret; + + if (list_empty(&vsi->ch_list)) + return 0; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (!ch->initialized) + break; + /* Proceed with creation of channel (VMDq2) VSI */ + ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "failed to rebuild channels using uplink_seid %u\n", + vsi->uplink_seid); + return ret; + } + /* Reconfigure TX queues using QTX_CTL register */ + ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "failed to configure TX rings for channel %u\n", + ch->seid); + return ret; + } + /* update 'next_base_queue' */ + vsi->next_base_queue = vsi->next_base_queue + + ch->num_queue_pairs; + if (ch->max_tx_rate) { + u64 credits = ch->max_tx_rate; + + if (i40e_set_bw_limit(vsi, ch->seid, + ch->max_tx_rate)) + return -EINVAL; + + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + ch->max_tx_rate, + credits, + ch->seid); + } + ret = i40e_rebuild_cloud_filters(vsi, ch->seid); + if (ret) { + dev_dbg(&vsi->back->pdev->dev, + "Failed to rebuild cloud filters for channel VSI %u\n", + ch->seid); + return ret; + } + } + return 0; +} + +/** + * i40e_clean_xps_state - clean xps state for every tx_ring + * @vsi: ptr to the VSI + **/ +static void i40e_clean_xps_state(struct i40e_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i]) + clear_bit(__I40E_TX_XPS_INIT_DONE, + vsi->tx_rings[i]->state); +} + +/** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure - * @lock_acquired: indicates whether or not the lock has been acquired - * before this function was called. * * Close up the VFs and other things in prep for PF Reset. **/ -static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) +static void i40e_prep_for_reset(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - i40e_status ret = 0; + struct i40e_vsi *vsi; + int ret = 0; u32 v; clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state); @@ -7075,16 +10707,11 @@ static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n"); /* quiesce the VSIs and their queues that are not already DOWN */ - /* pf_quiesce_all_vsi modifies netdev structures -rtnl_lock needed */ - if (!lock_acquired) - rtnl_lock(); i40e_pf_quiesce_all_vsi(pf); - if (!lock_acquired) - rtnl_unlock(); - for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) - pf->vsi[v]->seid = 0; + i40e_pf_for_each_vsi(pf, v, vsi) { + i40e_clean_xps_state(vsi); + vsi->seid = 0; } i40e_shutdown_adminq(&pf->hw); @@ -7096,6 +10723,11 @@ static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) dev_warn(&pf->pdev->dev, "shutdown_lan_hmc failed: %d\n", ret); } + + /* Save the current PTP time so that we can restore the time after the + * reset completes. + */ + i40e_ptp_save_hw_time(pf); } /** @@ -7106,11 +10738,11 @@ static void i40e_send_version(struct i40e_pf *pf) { struct i40e_driver_version dv; - dv.major_version = DRV_VERSION_MAJOR; - dv.minor_version = DRV_VERSION_MINOR; - dv.build_version = DRV_VERSION_BUILD; + dv.major_version = 0xff; + dv.minor_version = 0xff; + dv.build_version = 0xff; dv.subbuild_version = 0; - strlcpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string)); + strscpy(dv.driver_string, UTS_RELEASE, sizeof(dv.driver_string)); i40e_aq_send_driver_version(&pf->hw, &dv, NULL); } @@ -7155,7 +10787,9 @@ static void i40e_get_oem_version(struct i40e_hw *hw) &gen_snap); i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_RELEASE_OFFSET, &release); - hw->nvm.oem_ver = (gen_snap << I40E_OEM_SNAP_SHIFT) | release; + hw->nvm.oem_ver = + FIELD_PREP(I40E_OEM_GEN_MASK | I40E_OEM_SNAP_MASK, gen_snap) | + FIELD_PREP(I40E_OEM_RELEASE_MASK, release); hw->nvm.eetrack = I40E_OEM_EETRACK_ID; } @@ -7166,7 +10800,7 @@ static void i40e_get_oem_version(struct i40e_hw *hw) static int i40e_reset(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - i40e_status ret; + int ret; ret = i40e_pf_reset(hw); if (ret) { @@ -7188,32 +10822,80 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { + const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; - u8 set_fc_aq_fail = 0; - i40e_status ret; + struct i40e_veb *veb; + int ret; u32 val; int v; - if (test_bit(__I40E_DOWN, pf->state)) + if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && + is_recovery_mode_reported) + i40e_set_ethtool_ops(vsi->netdev); + + if (test_bit(__I40E_DOWN, pf->state) && + !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); /* rebuild the basics for the AdminQ, HMC, and initial HW switch */ ret = i40e_init_adminq(&pf->hw); if (ret) { - dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); goto clear_recovery; } i40e_get_oem_version(&pf->hw); + if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) { + /* The following delay is necessary for firmware update. */ + mdelay(1000); + } + /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); + /* if we are going out of or into recovery mode we have to act + * accordingly with regard to resources initialization + * and deinitialization + */ + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (i40e_get_capabilities(pf, + i40e_aqc_opc_list_func_capabilities)) + goto end_unlock; + + if (is_recovery_mode_reported) { + /* we're staying in recovery mode so we'll reinitialize + * misc vector here + */ + if (i40e_setup_misc_vector_for_recovery_mode(pf)) + goto end_unlock; + } else { + if (!lock_acquired) + rtnl_lock(); + /* we're going out of recovery mode so we'll free + * the IRQ allocated specifically for recovery mode + * and restore the interrupt scheme + */ + free_irq(pf->pdev->irq, pf); + i40e_clear_interrupt_scheme(pf); + if (i40e_restore_interrupt_scheme(pf)) + goto end_unlock; + } + + /* tell the firmware that we're starting */ + i40e_send_version(pf); + + /* bail out in case recovery mode was detected, as there is + * no need for further configuration. + */ + goto end_unlock; + } + i40e_clear_pxe_mode(hw); - ret = i40e_get_capabilities(pf); + ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (ret) goto end_core_reset; @@ -7230,17 +10912,36 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } #ifdef CONFIG_I40E_DCB - ret = i40e_init_pf_dcb(pf); - if (ret) { - dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n", ret); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; - /* Continue without DCB enabled */ + /* Enable FW to write a default DCB config on link-up + * unless I40E_FLAG_TC_MQPRIO was enabled or DCB + * is not supported with new link speed + */ + if (i40e_is_tc_mqprio_enabled(pf)) { + i40e_aq_set_dcb_parameters(hw, false, NULL); + } else { + if (I40E_IS_X710TL_DEVICE(hw->device_id) && + (hw->phy.link_info.link_speed & + (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) { + i40e_aq_set_dcb_parameters(hw, false, NULL); + dev_warn(&pf->pdev->dev, + "DCB is not supported for X710-T*L 2.5/5G speeds\n"); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + } else { + i40e_aq_set_dcb_parameters(hw, true, NULL); + ret = i40e_init_pf_dcb(pf); + if (ret) { + dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n", + ret); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + /* Continue without DCB enabled */ + } + } } + #endif /* CONFIG_I40E_DCB */ - /* do basic switch setup */ if (!lock_acquired) rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); + ret = i40e_setup_pf_switch(pf, reinit, true); if (ret) goto end_unlock; @@ -7252,64 +10953,49 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) I40E_AQ_EVENT_MEDIA_NA | I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (ret) - dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - - /* make sure our flow control settings are restored */ - ret = i40e_set_fc(&pf->hw, &set_fc_aq_fail, true); - if (ret) - dev_dbg(&pf->pdev->dev, "setting flow control: ret = %s last_status = %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); /* Rebuild the VSIs and VEBs that existed before reset. * They are still in our local switch element arrays, so only * need to rebuild the switch model in the HW. * * If there were VEBs but the reconstitution failed, we'll try - * try to recover minimal use by getting the basic PF VSI working. + * to recover minimal use by getting the basic PF VSI working. */ - if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) { + if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); - /* find the one VEB connected to the MAC, and find orphans */ - for (v = 0; v < I40E_MAX_VEB; v++) { - if (!pf->veb[v]) - continue; - - if (pf->veb[v]->uplink_seid == pf->mac_seid || - pf->veb[v]->uplink_seid == 0) { - ret = i40e_reconstitute_veb(pf->veb[v]); - if (!ret) - continue; + /* Rebuild VEBs */ + i40e_pf_for_each_veb(pf, v, veb) { + ret = i40e_reconstitute_veb(veb); + if (!ret) + continue; - /* If Main VEB failed, we're in deep doodoo, - * so give up rebuilding the switch and set up - * for minimal rebuild of PF VSI. - * If orphan failed, we'll report the error - * but try to keep going. - */ - if (pf->veb[v]->uplink_seid == pf->mac_seid) { - dev_info(&pf->pdev->dev, - "rebuild of switch failed: %d, will try to set up simple PF connection\n", - ret); - pf->vsi[pf->lan_vsi]->uplink_seid - = pf->mac_seid; - break; - } else if (pf->veb[v]->uplink_seid == 0) { - dev_info(&pf->pdev->dev, - "rebuild of orphan VEB failed: %d\n", - ret); - } + /* If Main VEB failed, we're in deep doodoo, + * so give up rebuilding the switch and set up + * for minimal rebuild of PF VSI. + * If orphan failed, we'll report the error + * but try to keep going. + */ + if (veb->uplink_seid == pf->mac_seid) { + dev_info(&pf->pdev->dev, + "rebuild of switch failed: %d, will try to set up simple PF connection\n", + ret); + vsi->uplink_seid = pf->mac_seid; + break; + } else if (veb->uplink_seid == 0) { + dev_info(&pf->pdev->dev, + "rebuild of orphan VEB failed: %d\n", + ret); } } } - if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) { + if (vsi->uplink_seid == pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n"); /* no VEB, so rebuild only the Main VSI */ - ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]); + ret = i40e_add_vsi(vsi); if (ret) { dev_info(&pf->pdev->dev, "rebuild of Main VSI failed: %d\n", ret); @@ -7317,6 +11003,35 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } } + if (vsi->mqprio_qopt.max_rate[0]) { + u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi, + vsi->mqprio_qopt.max_rate[0]); + u64 credits = 0; + + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (ret) + goto end_unlock; + + credits = max_tx_rate; + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + max_tx_rate, + credits, + vsi->seid); + } + + ret = i40e_rebuild_cloud_filters(vsi, vsi->seid); + if (ret) + goto end_unlock; + + /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs + * for this main VSI if they exist + */ + ret = i40e_rebuild_channels(vsi); + if (ret) + goto end_unlock; + /* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing * a reset in the case of small MSS+TSO. @@ -7331,18 +11046,20 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) wr32(hw, I40E_REG_MSS, val); } - if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { + if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) { msleep(75); ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (ret) - dev_info(&pf->pdev->dev, "link restart failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); } /* reinit the misc interrupt */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { ret = i40e_setup_misc_vector(pf); + if (ret) + goto end_unlock; + } /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out @@ -7360,6 +11077,14 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) if (!lock_acquired) rtnl_unlock(); + /* Restore promiscuous settings */ + ret = i40e_set_promiscuous(pf, pf->cur_promisc); + if (ret) + dev_warn(&pf->pdev->dev, + "Failed to restore promiscuous setting: %s, err %pe aq_err %s\n", + pf->cur_promisc ? "on" : "off", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); + i40e_reset_all_vfs(pf, true); /* tell the firmware that we're starting */ @@ -7375,6 +11100,7 @@ end_core_reset: clear_bit(__I40E_RESET_FAILED, pf->state); clear_recovery: clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state); + clear_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state); } /** @@ -7388,6 +11114,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { int ret; + + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return; /* Now we wait for GRST to settle out. * We don't have to delete the VEBs or VSIs from the hw switch * because the reset will make them disappear. @@ -7395,6 +11124,8 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, ret = i40e_reset(pf); if (!ret) i40e_rebuild(pf, reinit, lock_acquired); + else + dev_err(&pf->pdev->dev, "%s: i40e_reset() FAILED", __func__); } /** @@ -7408,11 +11139,72 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, **/ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired) { - i40e_prep_for_reset(pf, lock_acquired); + i40e_prep_for_reset(pf); i40e_reset_and_rebuild(pf, false, lock_acquired); } /** + * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event + * @pf: board private structure + * @vf: pointer to the VF structure + * @is_tx: true - for Tx event, false - for Rx + */ +static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf, + bool is_tx) +{ + dev_err(&pf->pdev->dev, is_tx ? + "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" : + "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n", + is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count, + pf->hw.pf_id, + vf->vf_id, + vf->default_lan_addr.addr, + str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))); +} + +/** + * i40e_print_vfs_mdd_events - print VFs malicious driver detect event + * @pf: pointer to the PF structure + * + * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events. + */ +static void i40e_print_vfs_mdd_events(struct i40e_pf *pf) +{ + unsigned int i; + + /* check that there are pending MDD events to print */ + if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state)) + return; + + if (!__ratelimit(&pf->mdd_message_rate_limit)) + return; + + for (i = 0; i < pf->num_alloc_vfs; i++) { + struct i40e_vf *vf = &pf->vf[i]; + bool is_printed = false; + + /* only print Rx MDD event message if there are new events */ + if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) { + vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count; + i40e_print_vf_mdd_event(pf, vf, false); + is_printed = true; + } + + /* only print Tx MDD event message if there are new events */ + if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) { + vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count; + i40e_print_vf_mdd_event(pf, vf, true); + is_printed = true; + } + + if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) + dev_info(&pf->pdev->dev, + "Use PF Control I/F to re-enable the VF #%d\n", + i); + } +} + +/** * i40e_handle_mdd_event * @pf: pointer to the PF structure * @@ -7422,25 +11214,25 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; bool mdd_detected = false; - bool pf_mdd_detected = false; struct i40e_vf *vf; u32 reg; int i; - if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state)) + if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) { + /* Since the VF MDD event logging is rate limited, check if + * there are pending MDD events. + */ + i40e_print_vfs_mdd_events(pf); return; + } /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { - u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> - I40E_GL_MDET_TX_PF_NUM_SHIFT; - u16 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >> - I40E_GL_MDET_TX_VF_NUM_SHIFT; - u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> - I40E_GL_MDET_TX_EVENT_SHIFT; - u16 queue = ((reg & I40E_GL_MDET_TX_QUEUE_MASK) >> - I40E_GL_MDET_TX_QUEUE_SHIFT) - + u8 pf_num = FIELD_GET(I40E_GL_MDET_TX_PF_NUM_MASK, reg); + u16 vf_num = FIELD_GET(I40E_GL_MDET_TX_VF_NUM_MASK, reg); + u8 event = FIELD_GET(I40E_GL_MDET_TX_EVENT_MASK, reg); + u16 queue = FIELD_GET(I40E_GL_MDET_TX_QUEUE_MASK, reg) - pf->hw.func_caps.base_queue; if (netif_msg_tx_err(pf)) dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on TX queue %d PF number 0x%02x VF number 0x%02x\n", @@ -7450,12 +11242,9 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { - u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> - I40E_GL_MDET_RX_FUNCTION_SHIFT; - u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> - I40E_GL_MDET_RX_EVENT_SHIFT; - u16 queue = ((reg & I40E_GL_MDET_RX_QUEUE_MASK) >> - I40E_GL_MDET_RX_QUEUE_SHIFT) - + u8 func = FIELD_GET(I40E_GL_MDET_RX_FUNCTION_MASK, reg); + u8 event = FIELD_GET(I40E_GL_MDET_RX_EVENT_MASK, reg); + u16 queue = FIELD_GET(I40E_GL_MDET_RX_QUEUE_MASK, reg) - pf->hw.func_caps.base_queue; if (netif_msg_rx_err(pf)) dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on RX queue %d of function 0x%02x\n", @@ -7468,115 +11257,59 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); - dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); - pf_mdd_detected = true; + dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n"); } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); - dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n"); - pf_mdd_detected = true; - } - /* Queue belongs to the PF, initiate a reset */ - if (pf_mdd_detected) { - set_bit(__I40E_PF_RESET_REQUESTED, pf->state); - i40e_service_event_schedule(pf); + dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n"); } } /* see if one of the VFs needs its hand slapped */ for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) { + bool is_mdd_on_tx = false; + bool is_mdd_on_rx = false; + vf = &(pf->vf[i]); reg = rd32(hw, I40E_VP_MDET_TX(i)); if (reg & I40E_VP_MDET_TX_VALID_MASK) { + set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state); wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF); - vf->num_mdd_events++; - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", - i); + vf->mdd_tx_events.count++; + set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); + is_mdd_on_tx = true; } reg = rd32(hw, I40E_VP_MDET_RX(i)); if (reg & I40E_VP_MDET_RX_VALID_MASK) { + set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state); wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF); - vf->num_mdd_events++; - dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n", - i); + vf->mdd_rx_events.count++; + set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); + is_mdd_on_rx = true; } - if (vf->num_mdd_events > I40E_DEFAULT_NUM_MDD_EVENTS_ALLOWED) { - dev_info(&pf->pdev->dev, - "Too many MDD events on VF %d, disabled\n", i); - dev_info(&pf->pdev->dev, - "Use PF Control I/F to re-enable the VF\n"); - set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); + if ((is_mdd_on_tx || is_mdd_on_rx) && + test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) { + /* VF MDD event counters will be cleared by + * reset, so print the event prior to reset. + */ + if (is_mdd_on_rx) + i40e_print_vf_mdd_event(pf, vf, false); + if (is_mdd_on_tx) + i40e_print_vf_mdd_event(pf, vf, true); + + i40e_vc_reset_vf(vf, true); } } - /* re-enable mdd interrupt cause */ - clear_bit(__I40E_MDD_EVENT_PENDING, pf->state); reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); i40e_flush(hw); -} - -/** - * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters - * @pf: board private structure - **/ -static void i40e_sync_udp_filters(struct i40e_pf *pf) -{ - int i; - - /* loop through and set pending bit for all active UDP filters */ - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->udp_ports[i].port) - pf->pending_udp_bitmap |= BIT_ULL(i); - } - pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; -} - -/** - * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW - * @pf: board private structure - **/ -static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) -{ - struct i40e_hw *hw = &pf->hw; - i40e_status ret; - u16 port; - int i; - - if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC)) - return; - - pf->flags &= ~I40E_FLAG_UDP_FILTER_SYNC; - - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->pending_udp_bitmap & BIT_ULL(i)) { - pf->pending_udp_bitmap &= ~BIT_ULL(i); - port = pf->udp_ports[i].port; - if (port) - ret = i40e_aq_add_udp_tunnel(hw, port, - pf->udp_ports[i].type, - NULL, NULL); - else - ret = i40e_aq_del_udp_tunnel(hw, i, NULL); - - if (ret) { - dev_dbg(&pf->pdev->dev, - "%s %s port %d, index %d failed, err %s aq_err %s\n", - pf->udp_ports[i].type ? "vxlan" : "geneve", - port ? "add" : "delete", - port, i, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); - pf->udp_ports[i].port = 0; - } - } - } + i40e_print_vfs_mdd_events(pf); } /** @@ -7591,33 +11324,35 @@ static void i40e_service_task(struct work_struct *work) unsigned long start_time = jiffies; /* don't bother with service tasks if a reset is in progress */ - if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_SUSPENDED, pf->state)) return; if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state)) return; - i40e_detect_recover_hung(pf); - i40e_sync_filters_subtask(pf); - i40e_reset_subtask(pf); - i40e_handle_mdd_event(pf); - i40e_vc_process_vflr_event(pf); - i40e_watchdog_subtask(pf); - i40e_fdir_reinit_subtask(pf); - if (pf->flags & I40E_FLAG_CLIENT_RESET) { - /* Client subtask will reopen next time through. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], true); - pf->flags &= ~I40E_FLAG_CLIENT_RESET; - } else { - i40e_client_subtask(pf); - if (pf->flags & I40E_FLAG_CLIENT_L2_CHANGE) { - i40e_notify_client_of_l2_param_changes( - pf->vsi[pf->lan_vsi]); - pf->flags &= ~I40E_FLAG_CLIENT_L2_CHANGE; + if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) { + i40e_detect_recover_hung(pf); + i40e_sync_filters_subtask(pf); + i40e_reset_subtask(pf); + i40e_handle_mdd_event(pf); + i40e_vc_process_vflr_event(pf); + i40e_watchdog_subtask(pf); + i40e_fdir_reinit_subtask(pf); + if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) { + /* Client subtask will reopen next time through. */ + i40e_notify_client_of_netdev_close(pf, true); + } else { + i40e_client_subtask(pf); + if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE, + pf->state)) + i40e_notify_client_of_l2_param_changes(pf); } + i40e_sync_filters_subtask(pf); + } else { + i40e_reset_subtask(pf); } - i40e_sync_filters_subtask(pf); - i40e_sync_udp_filters_subtask(pf); + i40e_clean_adminq_subtask(pf); /* flush memory to make sure state is correct before next watchdog */ @@ -7637,11 +11372,11 @@ static void i40e_service_task(struct work_struct *work) /** * i40e_service_timer - timer callback - * @data: pointer to PF struct + * @t: timer list pointer **/ -static void i40e_service_timer(unsigned long data) +static void i40e_service_timer(struct timer_list *t) { - struct i40e_pf *pf = (struct i40e_pf *)data; + struct i40e_pf *pf = timer_container_of(pf, t, service_timer); mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); @@ -7659,9 +11394,13 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) switch (vsi->type) { case I40E_VSI_MAIN: vsi->alloc_queue_pairs = pf->num_lan_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) vsi->num_q_vectors = pf->num_lan_msix; else vsi->num_q_vectors = 1; @@ -7670,22 +11409,32 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) case I40E_VSI_FDIR: vsi->alloc_queue_pairs = 1; - vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT, - I40E_REQ_DESCRIPTOR_MULTIPLE); + vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT, + I40E_REQ_DESCRIPTOR_MULTIPLE); + vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT, + I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_fdsb_msix; break; case I40E_VSI_VMDQ2: vsi->alloc_queue_pairs = pf->num_vmdq_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_vmdq_msix; break; case I40E_VSI_SRIOV: vsi->alloc_queue_pairs = pf->num_vf_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); break; default: @@ -7693,12 +11442,17 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) return -ENODATA; } + if (is_kdump_kernel()) { + vsi->num_tx_desc = I40E_MIN_NUM_DESCRIPTORS; + vsi->num_rx_desc = I40E_MIN_NUM_DESCRIPTORS; + } + return 0; } /** * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi - * @type: VSI pointer + * @vsi: VSI pointer * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. * * On error: returns error code (negative) @@ -7798,6 +11552,12 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) hash_init(vsi->mac_filter_hash); vsi->irqs_ready = false; + if (type == I40E_VSI_MAIN) { + vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL); + if (!vsi->af_xdp_zc_qps) + goto err_rings; + } + ret = i40e_set_num_rings_in_vsi(vsi); if (ret) goto err_rings; @@ -7816,6 +11576,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) goto unlock_pf; err_rings: + bitmap_free(vsi->af_xdp_zc_qps); pf->next_vsi = i - 1; kfree(vsi); unlock_pf: @@ -7825,7 +11586,7 @@ unlock_pf: /** * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI - * @type: VSI pointer + * @vsi: VSI pointer * @free_qvectors: a bool to specify if q_vectors need to be freed. * * On error: returns error code (negative) @@ -7878,18 +11639,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) mutex_lock(&pf->switch_mutex); if (!pf->vsi[vsi->idx]) { - dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n", - vsi->idx, vsi->idx, vsi, vsi->type); + dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n", + vsi->idx, vsi->idx, vsi->type); goto unlock_vsi; } if (pf->vsi[vsi->idx] != vsi) { dev_err(&pf->pdev->dev, - "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n", + "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n", pf->vsi[vsi->idx]->idx, - pf->vsi[vsi->idx], pf->vsi[vsi->idx]->type, - vsi->idx, vsi, vsi->type); + vsi->idx, vsi->type); goto unlock_vsi; } @@ -7897,6 +11657,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); + bitmap_free(vsi->af_xdp_zc_qps); i40e_vsi_free_arrays(vsi, true); i40e_clear_rss_config_user(vsi); @@ -7923,10 +11684,10 @@ static void i40e_vsi_clear_rings(struct i40e_vsi *vsi) if (vsi->tx_rings && vsi->tx_rings[0]) { for (i = 0; i < vsi->alloc_queue_pairs; i++) { kfree_rcu(vsi->tx_rings[i], rcu); - vsi->tx_rings[i] = NULL; - vsi->rx_rings[i] = NULL; + WRITE_ONCE(vsi->tx_rings[i], NULL); + WRITE_ONCE(vsi->rx_rings[i], NULL); if (vsi->xdp_rings) - vsi->xdp_rings[i] = NULL; + WRITE_ONCE(vsi->xdp_rings[i], NULL); } } } @@ -7954,13 +11715,13 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; - if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) + if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps)) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - ring->tx_itr_setting = pf->tx_itr_default; - vsi->tx_rings[i] = ring++; + ring->itr_setting = pf->tx_itr_default; + WRITE_ONCE(vsi->tx_rings[i], ring++); if (!i40e_enabled_xdp_vsi(vsi)) goto setup_rx; @@ -7971,14 +11732,14 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->vsi = vsi; ring->netdev = NULL; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; - if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) + if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps)) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); - ring->tx_itr_setting = pf->tx_itr_default; - vsi->xdp_rings[i] = ring++; + ring->itr_setting = pf->tx_itr_default; + WRITE_ONCE(vsi->xdp_rings[i], ring++); setup_rx: ring->queue_index = i; @@ -7987,11 +11748,11 @@ setup_rx: ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_rx_desc; ring->size = 0; ring->dcb_tc = 0; - ring->rx_itr_setting = pf->rx_itr_default; - vsi->rx_rings[i] = ring; + ring->itr_setting = pf->rx_itr_default; + WRITE_ONCE(vsi->rx_rings[i], ring); } return 0; @@ -8038,7 +11799,7 @@ static int i40e_init_msix(struct i40e_pf *pf) int v_actual; int iwarp_requested = 0; - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return -ENODEV; /* The number of vectors we'll request will be comprised of: @@ -8077,7 +11838,7 @@ static int i40e_init_msix(struct i40e_pf *pf) vectors_left -= pf->num_lan_msix; /* reserve one vector for sideband flow director */ - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (vectors_left) { pf->num_fdsb_msix = 1; v_budget++; @@ -8088,7 +11849,7 @@ static int i40e_init_msix(struct i40e_pf *pf) } /* can we reserve enough for iWARP? */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { iwarp_requested = pf->num_iwarp_msix; if (!vectors_left) @@ -8100,22 +11861,29 @@ static int i40e_init_msix(struct i40e_pf *pf) } /* any vectors left over go for VMDq support */ - if (pf->flags & I40E_FLAG_VMDQ_ENABLED) { - int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps; - int vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted); - + if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags)) { if (!vectors_left) { pf->num_vmdq_msix = 0; pf->num_vmdq_qps = 0; } else { + int vmdq_vecs_wanted = + pf->num_vmdq_vsis * pf->num_vmdq_qps; + int vmdq_vecs = + min_t(int, vectors_left, vmdq_vecs_wanted); + /* if we're short on vectors for what's desired, we limit * the queues per vmdq. If this is still more than are * available, the user will need to change the number of * queues/vectors used by the PF later with the ethtool * channels command */ - if (vmdq_vecs < vmdq_vecs_wanted) + if (vectors_left < vmdq_vecs_wanted) { pf->num_vmdq_qps = 1; + vmdq_vecs_wanted = pf->num_vmdq_vsis; + vmdq_vecs = min_t(int, + vectors_left, + vmdq_vecs_wanted); + } pf->num_vmdq_msix = pf->num_vmdq_qps; v_budget += vmdq_vecs; @@ -8150,7 +11918,7 @@ static int i40e_init_msix(struct i40e_pf *pf) v_actual = i40e_reserve_msix_vectors(pf, v_budget); if (v_actual < I40E_MIN_MSIX) { - pf->flags &= ~I40E_FLAG_MSIX_ENABLED; + clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); kfree(pf->msix_entries); pf->msix_entries = NULL; pci_disable_msix(pf->pdev); @@ -8163,7 +11931,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_qps = 1; pf->num_lan_msix = 1; - } else if (!vectors_left) { + } else if (v_actual != v_budget) { /* If we have limited resources, we will start with no vectors * for the special features and then allocate vectors to some * of these features based on the policy and at the end disable @@ -8172,7 +11940,8 @@ static int i40e_init_msix(struct i40e_pf *pf) int vec; dev_info(&pf->pdev->dev, - "MSI-X vector limit reached, attempting to redistribute vectors\n"); + "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n", + v_actual, v_budget); /* reserve the misc vector */ vec = v_actual - 1; @@ -8187,7 +11956,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_msix = 1; break; case 3: - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->num_lan_msix = 1; pf->num_iwarp_msix = 1; } else { @@ -8195,7 +11964,7 @@ static int i40e_init_msix(struct i40e_pf *pf) } break; default: - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->num_iwarp_msix = min_t(int, (vec / 3), iwarp_requested); pf->num_vmdq_vsis = min_t(int, (vec / 3), @@ -8204,7 +11973,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_vsis = min_t(int, (vec / 2), I40E_DEFAULT_NUM_VMDQ_VSI); } - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { pf->num_fdsb_msix = 1; vec--; } @@ -8216,21 +11985,20 @@ static int i40e_init_msix(struct i40e_pf *pf) } } - if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && - (pf->num_fdsb_msix == 0)) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && pf->num_fdsb_msix == 0) { dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } - if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && - (pf->num_vmdq_msix == 0)) { + if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_msix == 0) { dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n"); - pf->flags &= ~I40E_FLAG_VMDQ_ENABLED; + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); } - if ((pf->flags & I40E_FLAG_IWARP_ENABLED) && - (pf->num_iwarp_msix == 0)) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) && + pf->num_iwarp_msix == 0) { dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n"); - pf->flags &= ~I40E_FLAG_IWARP_ENABLED; + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); } i40e_debug(&pf->hw, I40E_DEBUG_INIT, "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n", @@ -8246,11 +12014,10 @@ static int i40e_init_msix(struct i40e_pf *pf) * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector * @vsi: the VSI being configured * @v_idx: index of the vector in the vsi struct - * @cpu: cpu to be used on affinity_mask * * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ -static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) +static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx) { struct i40e_q_vector *q_vector; @@ -8261,14 +12028,10 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) q_vector->vsi = vsi; q_vector->v_idx = v_idx; - cpumask_set_cpu(cpu, &q_vector->affinity_mask); + cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask); if (vsi->netdev) - netif_napi_add(vsi->netdev, &q_vector->napi, - i40e_napi_poll, NAPI_POLL_WEIGHT); - - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.latency_range = I40E_LOW_LATENCY; + netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll); /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; @@ -8286,25 +12049,20 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - int err, v_idx, num_q_vectors, current_cpu; + int err, v_idx, num_q_vectors; /* if not MSIX, give the one vector only to the LAN VSI */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) num_q_vectors = vsi->num_q_vectors; - else if (vsi == pf->vsi[pf->lan_vsi]) + else if (vsi->type == I40E_VSI_MAIN) num_q_vectors = 1; else return -EINVAL; - current_cpu = cpumask_first(cpu_online_mask); - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu); + err = i40e_vsi_alloc_q_vector(vsi, v_idx); if (err) goto err_out; - current_cpu = cpumask_next(current_cpu, cpu_online_mask); - if (unlikely(current_cpu >= nr_cpu_ids)) - current_cpu = cpumask_first(cpu_online_mask); } return 0; @@ -8325,48 +12083,48 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) int vectors = 0; ssize_t size; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { vectors = i40e_init_msix(pf); if (vectors < 0) { - pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | - I40E_FLAG_IWARP_ENABLED | - I40E_FLAG_RSS_ENABLED | - I40E_FLAG_DCB_CAPABLE | - I40E_FLAG_DCB_ENABLED | - I40E_FLAG_SRIOV_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_VMDQ_ENABLED); + clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); + clear_bit(I40E_FLAG_RSS_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); /* rework the queue expectations without MSIX */ i40e_determine_queue_usage(pf); } } - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) && - (pf->flags & I40E_FLAG_MSI_ENABLED)) { + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && + test_bit(I40E_FLAG_MSI_ENA, pf->flags)) { dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n"); vectors = pci_enable_msi(pf->pdev); if (vectors < 0) { dev_info(&pf->pdev->dev, "MSI init failed - %d\n", vectors); - pf->flags &= ~I40E_FLAG_MSI_ENABLED; + clear_bit(I40E_FLAG_MSI_ENA, pf->flags); } vectors = 1; /* one MSI or Legacy vector */ } - if (!(pf->flags & (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED))) + if (!test_bit(I40E_FLAG_MSI_ENA, pf->flags) && + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n"); /* set up vector assignment tracking */ size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors); pf->irq_pile = kzalloc(size, GFP_KERNEL); - if (!pf->irq_pile) { - dev_err(&pf->pdev->dev, "error allocating irq_pile memory\n"); + if (!pf->irq_pile) return -ENOMEM; - } + pf->irq_pile->num_entries = vectors; - pf->irq_pile->search_hint = 0; /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); @@ -8375,6 +12133,101 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) } /** + * i40e_restore_interrupt_scheme - Restore the interrupt scheme + * @pf: private board data structure + * + * Restore the interrupt scheme that was cleared when we suspended the + * device. This should be called during resume to re-allocate the q_vectors + * and reacquire IRQs. + */ +static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) +{ + struct i40e_vsi *vsi; + int err, i; + + /* We cleared the MSI and MSI-X flags when disabling the old interrupt + * scheme. We need to re-enabled them here in order to attempt to + * re-acquire the MSI or MSI-X vectors + */ + set_bit(I40E_FLAG_MSI_ENA, pf->flags); + set_bit(I40E_FLAG_MSIX_ENA, pf->flags); + + err = i40e_init_interrupt_scheme(pf); + if (err) + return err; + + /* Now that we've re-acquired IRQs, we need to remap the vectors and + * rings together again. + */ + i40e_pf_for_each_vsi(pf, i, vsi) { + err = i40e_vsi_alloc_q_vectors(vsi); + if (err) + goto err_unwind; + + i40e_vsi_map_rings_to_vectors(vsi); + } + + err = i40e_setup_misc_vector(pf); + if (err) + goto err_unwind; + + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) + i40e_client_update_msix_info(pf); + + return 0; + +err_unwind: + while (i--) { + if (pf->vsi[i]) + i40e_vsi_free_q_vectors(pf->vsi[i]); + } + + return err; +} + +/** + * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle + * non queue events in recovery mode + * @pf: board private structure + * + * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage + * the non-queue interrupts, e.g. AdminQ and errors in recovery mode. + * This is handled differently than in recovery mode since no Tx/Rx resources + * are being allocated. + **/ +static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf) +{ + int err; + + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { + err = i40e_setup_misc_vector(pf); + + if (err) { + dev_info(&pf->pdev->dev, + "MSI-X misc vector request failed, error %d\n", + err); + return err; + } + } else { + u32 flags = test_bit(I40E_FLAG_MSI_ENA, pf->flags) ? 0 : IRQF_SHARED; + + err = request_irq(pf->pdev->irq, i40e_intr, flags, + pf->int_name, pf); + + if (err) { + dev_info(&pf->pdev->dev, + "MSI/legacy misc vector request failed, error %d\n", + err); + return err; + } + i40e_enable_misc_int_causes(pf); + i40e_irq_dynamic_enable_icr0(pf); + } + + return 0; +} + +/** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events * @pf: board private structure * @@ -8387,13 +12240,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) struct i40e_hw *hw = &pf->hw; int err = 0; - /* Only request the irq if this is the first time through, and - * not when we're rebuilding after a Reset - */ - if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { + /* Only request the IRQ once, the first time through. */ + if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) { err = request_irq(pf->msix_entries[0].vector, i40e_intr, 0, pf->int_name, pf); if (err) { + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); dev_info(&pf->pdev->dev, "request_irq for %s failed: %d\n", pf->int_name, err); @@ -8405,55 +12257,16 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) /* associate no queues to the misc vector */ wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST); - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1); i40e_flush(hw); - i40e_irq_dynamic_enable_icr0(pf, true); + i40e_irq_dynamic_enable_icr0(pf); return err; } /** - * i40e_config_rss_aq - Prepare for RSS using AQ commands - * @vsi: vsi structure - * @seed: RSS hash seed - **/ -static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, - u8 *lut, u16 lut_size) -{ - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - int ret = 0; - - if (seed) { - struct i40e_aqc_get_set_rss_key_data *seed_dw = - (struct i40e_aqc_get_set_rss_key_data *)seed; - ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40e_stat_str(hw, ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; - - ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40e_stat_str(hw, ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - return ret; -} - -/** * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands * @vsi: Pointer to vsi structure * @seed: Buffter to store the hash keys @@ -8474,24 +12287,22 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, (struct i40e_aqc_get_set_rss_key_data *)seed); if (ret) { dev_info(&pf->pdev->dev, - "Cannot get RSS key, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "Cannot get RSS key, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); return ret; } } if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + bool pf_lut = vsi->type == I40E_VSI_MAIN; ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) { dev_info(&pf->pdev->dev, - "Cannot get RSS lut, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "Cannot get RSS lut, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); return ret; } } @@ -8500,46 +12311,6 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, } /** - * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used - * @vsi: VSI structure - **/ -static int i40e_vsi_config_rss(struct i40e_vsi *vsi) -{ - u8 seed[I40E_HKEY_ARRAY_SIZE]; - struct i40e_pf *pf = vsi->back; - u8 *lut; - int ret; - - if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) - return 0; - - if (!vsi->rss_size) - vsi->rss_size = min_t(int, pf->alloc_rss_size, - vsi->num_queue_pairs); - if (!vsi->rss_size) - return -EINVAL; - - lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); - if (!lut) - return -ENOMEM; - /* Use the user configured hash keys and lookup table if there is one, - * otherwise use default - */ - if (vsi->rss_lut_user) - memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); - else - i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); - if (vsi->rss_hkey_user) - memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); - else - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); - kfree(lut); - - return ret; -} - -/** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure * @seed: RSS hash seed @@ -8640,7 +12411,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return i40e_config_rss_aq(vsi, seed, lut, lut_size); else return i40e_config_rss_reg(vsi, seed, lut, lut_size); @@ -8651,7 +12422,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) * @vsi: Pointer to VSI structure * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries - * lut_size: Size of buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ @@ -8659,7 +12430,7 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return i40e_get_rss_aq(vsi, seed, lut, lut_size); else return i40e_get_rss_reg(vsi, seed, lut, lut_size); @@ -8687,7 +12458,7 @@ void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, **/ static int i40e_pf_config_rss(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); u8 seed[I40E_HKEY_ARRAY_SIZE]; u8 *lut; struct i40e_hw *hw = &pf->hw; @@ -8698,7 +12469,7 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); - hena |= i40e_pf_get_default_rss_hena(pf); + hena |= i40e_pf_get_default_rss_hashcfg(pf); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); @@ -8713,8 +12484,13 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) /* Determine the RSS size of the VSI */ if (!vsi->rss_size) { u16 qcount; - - qcount = vsi->num_queue_pairs / vsi->tc_config.numtc; + /* If the firmware does something weird during VSI init, we + * could end up with zero TCs. Check for that to avoid + * divide-by-zero. It probably won't pass traffic, but it also + * won't panic. + */ + qcount = vsi->num_queue_pairs / + (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1); vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount); } if (!vsi->rss_size) @@ -8754,19 +12530,22 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) **/ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); int new_rss_size; - if (!(pf->flags & I40E_FLAG_RSS_ENABLED)) + if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags)) return 0; + queue_count = min_t(int, queue_count, num_online_cpus()); new_rss_size = min_t(int, queue_count, pf->rss_size_max); if (queue_count != vsi->num_queue_pairs) { u16 qcount; vsi->req_queue_pairs = queue_count; - i40e_prep_for_reset(pf, true); + i40e_prep_for_reset(pf); + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return pf->alloc_rss_size; pf->alloc_rss_size = new_rss_size; @@ -8796,11 +12575,11 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition * @pf: board private structure **/ -i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf) +int i40e_get_partition_bw_setting(struct i40e_pf *pf) { - i40e_status status; bool min_valid, max_valid; u32 max_bw, min_bw; + int status; status = i40e_read_bw_from_alt_ram(&pf->hw, &max_bw, &min_bw, &min_valid, &max_valid); @@ -8819,10 +12598,12 @@ i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf) * i40e_set_partition_bw_setting - Set BW settings for this PF partition * @pf: board private structure **/ -i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf) +int i40e_set_partition_bw_setting(struct i40e_pf *pf) { struct i40e_aqc_configure_partition_bw_data bw_data; - i40e_status status; + int status; + + memset(&bw_data, 0, sizeof(bw_data)); /* Set the valid bit for this PF */ bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id)); @@ -8836,85 +12617,54 @@ i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf) } /** - * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition + * i40e_is_total_port_shutdown_enabled - read NVM and return value + * if total port shutdown feature is enabled for this PF * @pf: board private structure **/ -i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf) -{ - /* Commit temporary BW setting to permanent NVM image */ - enum i40e_admin_queue_err last_aq_status; - i40e_status ret; - u16 nvm_word; - - if (pf->hw.partition_id != 1) { - dev_info(&pf->pdev->dev, - "Commit BW only works on partition 1! This is partition %d", - pf->hw.partition_id); - ret = I40E_NOT_SUPPORTED; - goto bw_commit_out; +static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf) +{ +#define I40E_TOTAL_PORT_SHUTDOWN_ENABLED BIT(4) +#define I40E_FEATURES_ENABLE_PTR 0x2A +#define I40E_CURRENT_SETTING_PTR 0x2B +#define I40E_LINK_BEHAVIOR_WORD_OFFSET 0x2D +#define I40E_LINK_BEHAVIOR_WORD_LENGTH 0x1 +#define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED BIT(0) +#define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH 4 + u16 sr_emp_sr_settings_ptr = 0; + u16 features_enable = 0; + u16 link_behavior = 0; + int read_status = 0; + bool ret = false; + + read_status = i40e_read_nvm_word(&pf->hw, + I40E_SR_EMP_SR_SETTINGS_PTR, + &sr_emp_sr_settings_ptr); + if (read_status) + goto err_nvm; + read_status = i40e_read_nvm_word(&pf->hw, + sr_emp_sr_settings_ptr + + I40E_FEATURES_ENABLE_PTR, + &features_enable); + if (read_status) + goto err_nvm; + if (I40E_TOTAL_PORT_SHUTDOWN_ENABLED & features_enable) { + read_status = i40e_read_nvm_module_data(&pf->hw, + I40E_SR_EMP_SR_SETTINGS_PTR, + I40E_CURRENT_SETTING_PTR, + I40E_LINK_BEHAVIOR_WORD_OFFSET, + I40E_LINK_BEHAVIOR_WORD_LENGTH, + &link_behavior); + if (read_status) + goto err_nvm; + link_behavior >>= (pf->hw.port * I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH); + ret = I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED & link_behavior; } + return ret; - /* Acquire NVM for read access */ - ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ); - last_aq_status = pf->hw.aq.asq_last_status; - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot acquire NVM for read access, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, last_aq_status)); - goto bw_commit_out; - } - - /* Read word 0x10 of NVM - SW compatibility word 1 */ - ret = i40e_aq_read_nvm(&pf->hw, - I40E_SR_NVM_CONTROL_WORD, - 0x10, sizeof(nvm_word), &nvm_word, - false, NULL); - /* Save off last admin queue command status before releasing - * the NVM - */ - last_aq_status = pf->hw.aq.asq_last_status; - i40e_release_nvm(&pf->hw); - if (ret) { - dev_info(&pf->pdev->dev, "NVM read error, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, last_aq_status)); - goto bw_commit_out; - } - - /* Wait a bit for NVM release to complete */ - msleep(50); - - /* Acquire NVM for write access */ - ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE); - last_aq_status = pf->hw.aq.asq_last_status; - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot acquire NVM for write access, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, last_aq_status)); - goto bw_commit_out; - } - /* Write it back out unchanged to initiate update NVM, - * which will force a write of the shadow (alt) RAM to - * the NVM - thus storing the bandwidth values permanently. - */ - ret = i40e_aq_update_nvm(&pf->hw, - I40E_SR_NVM_CONTROL_WORD, - 0x10, sizeof(nvm_word), - &nvm_word, true, NULL); - /* Save off last admin queue command status before releasing - * the NVM - */ - last_aq_status = pf->hw.aq.asq_last_status; - i40e_release_nvm(&pf->hw); - if (ret) - dev_info(&pf->pdev->dev, - "BW settings NOT SAVED, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, last_aq_status)); -bw_commit_out: - +err_nvm: + dev_warn(&pf->pdev->dev, + "total-port-shutdown feature is off due to read nvm error: %pe\n", + ERR_PTR(read_status)); return ret; } @@ -8930,15 +12680,16 @@ static int i40e_sw_init(struct i40e_pf *pf) { int err = 0; int size; + u16 pow; /* Set default capability flags */ - pf->flags = I40E_FLAG_RX_CSUM_ENABLED | - I40E_FLAG_MSI_ENABLED | - I40E_FLAG_MSIX_ENABLED; + bitmap_zero(pf->flags, I40E_PF_FLAGS_NBITS); + set_bit(I40E_FLAG_MSI_ENA, pf->flags); + set_bit(I40E_FLAG_MSIX_ENA, pf->flags); /* Set default ITR */ - pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF; - pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF; + pf->rx_itr_default = I40E_ITR_RX_DEF; + pf->tx_itr_default = I40E_ITR_TX_DEF; /* Depending on PF configurations, it is possible that the RSS * maximum might end up larger than the available queues @@ -8948,15 +12699,20 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); + + /* find the next higher power-of-2 of num cpus */ + pow = roundup_pow_of_two(num_online_cpus()); + pf->rss_size_max = min_t(int, pf->rss_size_max, pow); + if (pf->hw.func_caps.rss) { - pf->flags |= I40E_FLAG_RSS_ENABLED; + set_bit(I40E_FLAG_RSS_ENA, pf->flags); pf->alloc_rss_size = min_t(int, pf->rss_size_max, num_online_cpus()); } /* MFP mode enabled */ if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) { - pf->flags |= I40E_FLAG_MFP_ENABLED; + set_bit(I40E_FLAG_MFP_ENA, pf->flags); dev_info(&pf->pdev->dev, "MFP mode Enabled\n"); if (i40e_get_partition_bw_setting(pf)) { dev_warn(&pf->pdev->dev, @@ -8973,90 +12729,57 @@ static int i40e_sw_init(struct i40e_pf *pf) if ((pf->hw.func_caps.fd_filters_guaranteed > 0) || (pf->hw.func_caps.fd_filters_best_effort > 0)) { - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE; - if (pf->flags & I40E_FLAG_MFP_ENABLED && + set_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && pf->hw.num_partitions > 1) dev_info(&pf->pdev->dev, "Flow Director Sideband mode Disabled in MFP mode\n"); else - pf->flags |= I40E_FLAG_FD_SB_ENABLED; + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); pf->fdir_pf_filter_count = pf->hw.func_caps.fd_filters_guaranteed; pf->hw.fdir_shared_filter_count = pf->hw.func_caps.fd_filters_best_effort; } - if ((pf->hw.mac.type == I40E_MAC_XL710) && - (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4))) { - pf->flags |= I40E_FLAG_RESTART_AUTONEG; - /* No DCB support for FW < v4.33 */ - pf->flags |= I40E_FLAG_NO_DCB_SUPPORT; - } - - /* Disable FW LLDP if FW < v4.3 */ - if ((pf->hw.mac.type == I40E_MAC_XL710) && - (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || - (pf->hw.aq.fw_maj_ver < 4))) - pf->flags |= I40E_FLAG_STOP_FW_LLDP; - - /* Use the FW Set LLDP MIB API if FW > v4.40 */ - if ((pf->hw.mac.type == I40E_MAC_XL710) && - (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) || - (pf->hw.aq.fw_maj_ver >= 5))) - pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB; + /* Enable HW ATR eviction if possible */ + if (test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps)) + set_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags); - if (pf->hw.func_caps.vmdq) { + if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; - pf->flags |= I40E_FLAG_VMDQ_ENABLED; + set_bit(I40E_FLAG_VMDQ_ENA, pf->flags); pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf); } - if (pf->hw.func_caps.iwarp) { - pf->flags |= I40E_FLAG_IWARP_ENABLED; + if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) { + set_bit(I40E_FLAG_IWARP_ENA, pf->flags); /* IWARP needs one extra vector for CQP just like MISC.*/ pf->num_iwarp_msix = (int)num_online_cpus() + 1; } + /* Stopping FW LLDP engine is supported on XL710 and X722 + * starting from FW versions determined in i40e_init_adminq. + * Stopping the FW LLDP engine is not supported on XL710 + * if NPAR is functioning so unset this hw flag in this case. + */ + if (pf->hw.mac.type == I40E_MAC_XL710 && + pf->hw.func_caps.npar_enable) + clear_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps); #ifdef CONFIG_PCI_IOV if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; - pf->flags |= I40E_FLAG_SRIOV_ENABLED; + set_bit(I40E_FLAG_SRIOV_ENA, pf->flags); pf->num_req_vfs = min_t(int, pf->hw.func_caps.num_vfs, I40E_MAX_VF_COUNT); } #endif /* CONFIG_PCI_IOV */ - if (pf->hw.mac.type == I40E_MAC_X722) { - pf->flags |= I40E_FLAG_RSS_AQ_CAPABLE - | I40E_FLAG_128_QP_RSS_CAPABLE - | I40E_FLAG_HW_ATR_EVICT_CAPABLE - | I40E_FLAG_OUTER_UDP_CSUM_CAPABLE - | I40E_FLAG_WB_ON_ITR_CAPABLE - | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE - | I40E_FLAG_NO_PCI_LINK_CHECK - | I40E_FLAG_USE_SET_LLDP_MIB - | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE - | I40E_FLAG_PTP_L4_CAPABLE - | I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE; - } else if ((pf->hw.aq.api_maj_ver > 1) || - ((pf->hw.aq.api_maj_ver == 1) && - (pf->hw.aq.api_min_ver > 4))) { - /* Supported in FW API version higher than 1.4 */ - pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; - } - - /* Enable HW ATR eviction if possible */ - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) - pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED; - - pf->eeprom_version = 0xDEAD; pf->lan_veb = I40E_NO_VEB; pf->lan_vsi = I40E_NO_VSI; /* By default FW has this off for performance reasons */ - pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + clear_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); /* set up queue assignment tracking */ size = sizeof(struct i40e_lump_tracking) @@ -9067,10 +12790,19 @@ static int i40e_sw_init(struct i40e_pf *pf) goto sw_init_done; } pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp; - pf->qp_pile->search_hint = 0; pf->tx_timeout_recovery_level = 1; + if (pf->hw.mac.type != I40E_MAC_X722 && + i40e_is_total_port_shutdown_enabled(pf)) { + /* Link down on close must be on when total port shutdown + * is enabled for a given port + */ + set_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags); + set_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags); + dev_info(&pf->pdev->dev, + "total-port-shutdown was enabled, link-down-on-close is forced on\n"); + } mutex_init(&pf->switch_mutex); sw_init_done: @@ -9093,29 +12825,33 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) */ if (features & NETIF_F_NTUPLE) { /* Enable filters and mark for reset */ - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) + if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) need_reset = true; - /* enable FD_SB only if there is MSI-X vector */ - if (pf->num_fdsb_msix > 0) - pf->flags |= I40E_FLAG_FD_SB_ENABLED; + /* enable FD_SB only if there is MSI-X vector and no cloud + * filters exist + */ + if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) { + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); + } } else { /* turn off filters, mark for reset and clear SW filter list */ - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { need_reset = true; i40e_fdir_filter_exit(pf); } - pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_SB_AUTO_DISABLED); + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); + /* reset fd counters */ pf->fd_add_err = 0; pf->fd_atr_cnt = 0; /* if ATR was auto disabled it can be re-enabled. */ - if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) { - pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); - } } return need_reset; } @@ -9143,6 +12879,29 @@ static void i40e_clear_rss_lut(struct i40e_vsi *vsi) } /** + * i40e_set_loopback - turn on/off loopback mode on underlying PF + * @vsi: ptr to VSI + * @ena: flag to indicate the on/off setting + */ +static int i40e_set_loopback(struct i40e_vsi *vsi, bool ena) +{ + bool if_running = netif_running(vsi->netdev) && + !test_and_set_bit(__I40E_VSI_DOWN, vsi->state); + int ret; + + if (if_running) + i40e_down(vsi); + + ret = i40e_aq_set_mac_loopback(&vsi->back->hw, ena, NULL); + if (ret) + netdev_err(vsi->netdev, "Failed to toggle loopback state\n"); + if (if_running) + i40e_up(vsi); + + return ret; +} + +/** * i40e_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted * @features: the feature set that the stack is suggesting @@ -9167,128 +12926,67 @@ static int i40e_set_features(struct net_device *netdev, else i40e_vlan_stripping_disable(vsi); - need_reset = i40e_set_ntuple(pf, features); + if (!(features & NETIF_F_HW_TC) && + (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) { + dev_err(&pf->pdev->dev, + "Offloaded tc filters active, can't turn hw_tc_offload off"); + return -EINVAL; + } - if (need_reset) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); + if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt) + i40e_del_all_macvlans(vsi); - return 0; -} + need_reset = i40e_set_ntuple(pf, features); -/** - * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port - * @pf: board private structure - * @port: The UDP port to look up - * - * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found - **/ -static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port) -{ - u8 i; + if (need_reset) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->udp_ports[i].port == port) - return i; - } + if ((features ^ netdev->features) & NETIF_F_LOOPBACK) + return i40e_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK)); - return i; + return 0; } -/** - * i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - **/ -static void i40e_udp_tunnel_add(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int i40e_udp_tunnel_set_port(struct net_device *netdev, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - u16 port = ntohs(ti->port); - u8 next_idx; - u8 idx; - - idx = i40e_get_udp_port_idx(pf, port); - - /* Check if port already exists */ - if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { - netdev_info(netdev, "port %d already offloaded\n", port); - return; - } - - /* Now check if there is space to add the new port */ - next_idx = i40e_get_udp_port_idx(pf, 0); + struct i40e_hw *hw = &np->vsi->back->hw; + u8 type, filter_index; + int ret; - if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) { - netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n", - port); - return; - } + type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN : + I40E_AQC_TUNNEL_TYPE_NGE; - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)) - return; - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE; - break; - default: - return; + ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index, + NULL); + if (ret) { + netdev_info(netdev, "add UDP port failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); + return -EIO; } - /* New port: add it and mark its index in the bitmap */ - pf->udp_ports[next_idx].port = port; - pf->pending_udp_bitmap |= BIT_ULL(next_idx); - pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; + udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index); + return 0; } -/** - * i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away - * @netdev: This physical port's netdev - * @ti: Tunnel endpoint information - **/ -static void i40e_udp_tunnel_del(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int i40e_udp_tunnel_unset_port(struct net_device *netdev, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - u16 port = ntohs(ti->port); - u8 idx; - - idx = i40e_get_udp_port_idx(pf, port); - - /* Check if port already exists */ - if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS) - goto not_found; + struct i40e_hw *hw = &np->vsi->back->hw; + int ret; - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN) - goto not_found; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE) - goto not_found; - break; - default: - goto not_found; + ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL); + if (ret) { + netdev_info(netdev, "delete UDP port failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); + return -EIO; } - /* if port exists, set it to 0 (mark for deletion) - * and make it pending - */ - pf->udp_ports[idx].port = 0; - pf->pending_udp_bitmap |= BIT_ULL(idx); - pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; - - return; -not_found: - netdev_warn(netdev, "UDP port %d was not found, not deleting\n", - port); + return 0; } static int i40e_get_phys_port_id(struct net_device *netdev, @@ -9298,7 +12996,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev, struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; - if (!(pf->flags & I40E_FLAG_PORT_ID_VALID)) + if (!test_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps)) return -EOPNOTSUPP; ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id)); @@ -9313,18 +13011,22 @@ static int i40e_get_phys_port_id(struct net_device *netdev, * @tb: pointer to array of nladdr (unused) * @dev: the net device pointer * @addr: the MAC address entry being added + * @vid: VLAN ID * @flags: instructions from stack about fdb operation + * @notified: whether notification was emitted + * @extack: netlink extended ack, unused currently */ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, bool *notified, + struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_pf *pf = np->vsi->back; int err = 0; - if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED)) + if (!test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) return -EOPNOTSUPP; if (vid) { @@ -9358,6 +13060,8 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * i40e_ndo_bridge_setlink - Set the hardware bridge mode * @dev: the netdev being configured * @nlh: RTNL message + * @flags: bridge flags + * @extack: netlink extended ack * * Inserts a new hardware bridge if not already created and * enables the bridging mode requested (VEB or VEPA). If the @@ -9370,41 +13074,37 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], **/ static int i40e_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - struct i40e_veb *veb = NULL; struct nlattr *attr, *br_spec; - int i, rem; + struct i40e_veb *veb; + int rem; /* Only for PF VSI for now */ - if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) + if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; /* Find the HW bridge for PF VSI */ - for (i = 0; i < I40E_MAX_VEB && !veb; i++) { - if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid) - veb = pf->veb[i]; - } + veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid); br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - __u16 mode; - - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); - mode = nla_get_u16(attr); if ((mode != BRIDGE_MODE_VEPA) && (mode != BRIDGE_MODE_VEB)) return -EINVAL; /* Insert a new HW bridge */ if (!veb) { - veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid, + veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); if (veb) { veb->bridge_mode = mode; @@ -9419,11 +13119,10 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, veb->bridge_mode = mode; /* TODO: If no VFs or VMDq VSIs, disallow VEB mode */ if (mode == BRIDGE_MODE_VEB) - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); else - pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), - true); + clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); break; } } @@ -9451,19 +13150,14 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - struct i40e_veb *veb = NULL; - int i; + struct i40e_veb *veb; /* Only for PF VSI for now */ - if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) + if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; /* Find the HW bridge for the PF VSI */ - for (i = 0; i < I40E_MAX_VEB && !veb; i++) { - if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid) - veb = pf->veb[i]; - } - + veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid); if (!veb) return 0; @@ -9497,12 +13191,12 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; /* IPLEN and EIPLEN can support at most 127 dwords */ - len = skb_transport_header(skb) - skb_network_header(skb); + len = skb_network_header_len(skb); if (len & ~(127 * 4)) goto out_err; @@ -9520,7 +13214,7 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, } /* No need to validate L4LEN as TCP is the only protocol with a - * a flexible value and we support all possible values supported + * flexible value and we support all possible values supported * by TCP, which is at most 15 dwords */ @@ -9533,33 +13227,50 @@ out_err: * i40e_xdp_setup - add/remove an XDP program * @vsi: VSI to changed * @prog: XDP program + * @extack: netlink extended ack **/ -static int i40e_xdp_setup(struct i40e_vsi *vsi, - struct bpf_prog *prog) +static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { - int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int frame_size = i40e_max_vsi_frame_size(vsi, prog); struct i40e_pf *pf = vsi->back; struct bpf_prog *old_prog; bool need_reset; int i; - /* Don't allow frames that span over multiple buffers */ - if (frame_size > vsi->rx_buf_len) + /* VSI shall be deleted in a moment, block loading new programs */ + if (prog && test_bit(__I40E_IN_REMOVE, pf->state)) return -EINVAL; - if (!i40e_enabled_xdp_vsi(vsi) && !prog) - return 0; + /* Don't allow frames that span over multiple buffers */ + if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); + return -EINVAL; + } /* When turning XDP on->off/off->on we reset and rebuild the rings. */ need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); - if (need_reset) - i40e_prep_for_reset(pf, true); + i40e_prep_for_reset(pf); old_prog = xchg(&vsi->xdp_prog, prog); - if (need_reset) + if (need_reset) { + if (!prog) { + xdp_features_clear_redirect_target(vsi->netdev); + /* Wait until ndo_xsk_wakeup completes. */ + synchronize_rcu(); + } i40e_reset_and_rebuild(pf, true, true); + } + + if (!i40e_enabled_xdp_vsi(vsi) && prog) { + if (i40e_realloc_rx_bi_zc(vsi, true)) + return -ENOMEM; + } else if (i40e_enabled_xdp_vsi(vsi) && !prog) { + if (i40e_realloc_rx_bi_zc(vsi, false)) + return -ENOMEM; + } for (i = 0; i < vsi->num_queue_pairs; i++) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); @@ -9567,16 +13278,283 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, if (old_prog) bpf_prog_put(old_prog); + /* Kick start the NAPI context if there is an AF_XDP socket open + * on that queue id. This so that receiving will start. + */ + if (need_reset && prog) { + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->xdp_rings[i]->xsk_pool) + (void)i40e_xsk_wakeup(vsi->netdev, i, + XDP_WAKEUP_RX); + xdp_features_set_redirect_target(vsi->netdev, true); + } + return 0; } /** - * i40e_xdp - implements ndo_xdp for i40e + * i40e_enter_busy_conf - Enters busy config state + * @vsi: vsi + * + * Returns 0 on success, <0 for failure. + **/ +static int i40e_enter_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + int timeout = 50; + + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + return 0; +} + +/** + * i40e_exit_busy_conf - Exits busy config state + * @vsi: vsi + **/ +static void i40e_exit_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + + clear_bit(__I40E_CONFIG_BUSY, pf->state); +} + +/** + * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair) +{ + memset(&vsi->rx_rings[queue_pair]->rx_stats, 0, + sizeof(vsi->rx_rings[queue_pair]->rx_stats)); + memset(&vsi->tx_rings[queue_pair]->stats, 0, + sizeof(vsi->tx_rings[queue_pair]->stats)); + if (i40e_enabled_xdp_vsi(vsi)) { + memset(&vsi->xdp_rings[queue_pair]->stats, 0, + sizeof(vsi->xdp_rings[queue_pair]->stats)); + } +} + +/** + * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair) +{ + i40e_clean_tx_ring(vsi->tx_rings[queue_pair]); + if (i40e_enabled_xdp_vsi(vsi)) { + /* Make sure that in-progress ndo_xdp_xmit calls are + * completed. + */ + synchronize_rcu(); + i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]); + } + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); +} + +/** + * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + **/ +static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_q_vector *q_vector = rxr->q_vector; + + if (!vsi->netdev) + return; + + /* All rings in a qp belong to the same qvector. */ + if (q_vector->rx.ring || q_vector->tx.ring) { + if (enable) + napi_enable(&q_vector->napi); + else + napi_disable(&q_vector->napi); + } +} + +/** + * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + * + * Returns 0 on success, <0 on failure. + **/ +static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_pf *pf = vsi->back; + int pf_q, ret = 0; + + pf_q = vsi->base_queue + queue_pair; + ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, + false /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + i40e_control_rx_q(pf, pf_q, enable); + ret = i40e_pf_rxq_wait(pf, pf_q, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + /* Due to HW errata, on Rx disable only, the register can + * indicate done before it really is. Needs 50ms to be sure + */ + if (!enable) + mdelay(50); + + if (!i40e_enabled_xdp_vsi(vsi)) + return ret; + + ret = i40e_control_wait_tx_q(vsi->seid, pf, + pf_q + vsi->alloc_queue_pairs, + true /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d XDP Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + } + + return ret; +} + +/** + * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* All rings in a qp belong to the same qvector. */ + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) + i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx); + else + i40e_irq_dynamic_enable_icr0(pf); + + i40e_flush(hw); +} + +/** + * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* For simplicity, instead of removing the qp interrupt causes + * from the interrupt linked list, we simply disable the interrupt, and + * leave the list intact. + * + * All rings in a qp belong to the same qvector. + */ + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { + u32 intpf = vsi->base_vector + rxr->q_vector->v_idx; + + wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0); + i40e_flush(hw); + synchronize_irq(pf->msix_entries[intpf].vector); + } else { + /* Legacy and MSI mode - this stops all interrupt handling */ + wr32(hw, I40E_PFINT_ICR0_ENA, 0); + wr32(hw, I40E_PFINT_DYN_CTL0, 0); + i40e_flush(hw); + synchronize_irq(pf->pdev->irq); + } +} + +/** + * i40e_queue_pair_disable - Disables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_enter_busy_conf(vsi); + if (err) + return err; + + i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); + err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); + i40e_queue_pair_clean_rings(vsi, queue_pair); + i40e_queue_pair_reset_stats(vsi, queue_pair); + + return err; +} + +/** + * i40e_queue_pair_enable - Enables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]); + if (err) + return err; + + if (i40e_enabled_xdp_vsi(vsi)) { + err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]); + if (err) + return err; + } + + err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]); + if (err) + return err; + + err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */); + i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */); + i40e_queue_pair_enable_irq(vsi, queue_pair); + + i40e_exit_busy_conf(vsi); + + return err; +} + +/** + * i40e_xdp - implements ndo_bpf for i40e * @dev: netdevice * @xdp: XDP command **/ static int i40e_xdp(struct net_device *dev, - struct netdev_xdp *xdp) + struct netdev_bpf *xdp) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; @@ -9586,10 +13564,10 @@ static int i40e_xdp(struct net_device *dev, switch (xdp->command) { case XDP_SETUP_PROG: - return i40e_xdp_setup(vsi, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_attached = i40e_enabled_xdp_vsi(vsi); - return 0; + return i40e_xdp_setup(vsi, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + return i40e_xsk_pool_setup(vsi, xdp->xsk.pool, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -9604,7 +13582,6 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = i40e_set_mac, .ndo_change_mtu = i40e_change_mtu, - .ndo_do_ioctl = i40e_ioctl, .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, @@ -9612,22 +13589,28 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_poll_controller = i40e_netpoll, #endif .ndo_setup_tc = __i40e_setup_tc, + .ndo_select_queue = i40e_lan_select_queue, .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, + .ndo_get_vf_stats = i40e_get_vf_stats, .ndo_set_vf_rate = i40e_ndo_set_vf_bw, .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, .ndo_set_vf_trust = i40e_ndo_set_vf_trust, - .ndo_udp_tunnel_add = i40e_udp_tunnel_add, - .ndo_udp_tunnel_del = i40e_udp_tunnel_del, .ndo_get_phys_port_id = i40e_get_phys_port_id, .ndo_fdb_add = i40e_ndo_fdb_add, .ndo_features_check = i40e_features_check, .ndo_bridge_getlink = i40e_ndo_bridge_getlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink, - .ndo_xdp = i40e_xdp, + .ndo_bpf = i40e_xdp, + .ndo_xdp_xmit = i40e_xdp_xmit, + .ndo_xsk_wakeup = i40e_xsk_wakeup, + .ndo_dfwd_add_station = i40e_fwd_add, + .ndo_dfwd_del_station = i40e_fwd_del, + .ndo_hwtstamp_get = i40e_ptp_hwtstamp_get, + .ndo_hwtstamp_set = i40e_ptp_hwtstamp_set, }; /** @@ -9658,8 +13641,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np->vsi = vsi; hw_enc_features = NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | + NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_SOFT_FEATURES | NETIF_F_TSO | @@ -9668,16 +13650,21 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_UDP_L4 | NETIF_F_SCTP_CRC | NETIF_F_RXHASH | NETIF_F_RXCSUM | 0; - if (!(pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE)) + if (!test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps)) netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic; + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; netdev->hw_enc_features |= hw_enc_features; @@ -9685,17 +13672,40 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->hw_features |= NETIF_F_NTUPLE; +#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ + NETIF_F_GSO_GRE_CSUM | \ + NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | \ + NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) + + netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES; + netdev->features |= NETIF_F_GSO_PARTIAL | + I40E_GSO_PARTIAL_FEATURES; + + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES; + + /* enable macvlan offloads */ + netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; + hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - netdev->hw_features |= hw_features; + if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) + hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + + netdev->hw_features |= hw_features | NETIF_F_LOOPBACK; netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + netdev->features &= ~NETIF_F_HW_TC; + if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); ether_addr_copy(mac_addr, hw->mac.perm_addr); @@ -9713,11 +13723,23 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_add_mac_filter(vsi, mac_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); + + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG; + netdev->xdp_zc_max_segs = I40E_MAX_BUFFER_TXD; } else { - /* relate the VSI_VMDQ name to the VSI_MAIN name */ - snprintf(netdev->name, IFNAMSIZ, "%sv%%d", - pf->vsi[pf->lan_vsi]->netdev->name); - random_ether_addr(mac_addr); + /* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we + * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to + * the end, which is 4 bytes long, so force truncation of the + * original name by IFNAMSIZ - 4 + */ + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); + + snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d", IFNAMSIZ - 4, + main_vsi->netdev->name); + eth_random_addr(mac_addr); spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_add_mac_filter(vsi, mac_addr); @@ -9742,9 +13764,12 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) i40e_add_mac_filter(vsi, broadcast); spin_unlock_bh(&vsi->mac_filter_hash_lock); - ether_addr_copy(netdev->dev_addr, mac_addr); + eth_hw_addr_set(netdev, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); + /* i40iw_net_event() reads 16 bytes from neigh->primary_key */ + netdev->neigh_priv_len = sizeof(u32) * 4; + netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; /* Setup netdev TC information */ @@ -9756,8 +13781,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) /* MTU range: 68 - 9706 */ netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = I40E_MAX_RXBUFFER - - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD; return 0; } @@ -9789,7 +13813,7 @@ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; /* Uplink is not a bridge so default to VEB */ - if (vsi->veb_idx == I40E_NO_VEB) + if (vsi->veb_idx >= I40E_MAX_VEB) return 1; veb = pf->veb[vsi->veb_idx]; @@ -9846,10 +13870,9 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) ctxt.flags = I40E_AQ_VSI_TYPE_PF; if (ret) { dev_info(&pf->pdev->dev, - "couldn't get PF vsi config, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "couldn't get PF vsi config, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); return -ENOENT; } vsi->info = ctxt.info; @@ -9860,8 +13883,32 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) enabled_tc = i40e_pf_get_tc_map(pf); + /* Source pruning is enabled by default, so the flag is + * negative logic - if it's set, we need to fiddle with + * the VSI to disable source pruning. + */ + if (test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, pf->flags)) { + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.seid = pf->main_vsi_seid; + ctxt.pf_num = pf->hw.pf_id; + ctxt.vf_num = 0; + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id = + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "update vsi failed, err %d aq_err %s\n", + ret, + libie_aq_str(pf->hw.aq.asq_last_status)); + ret = -ENOENT; + goto err; + } + } + /* MFP mode setup queue map and update VSI */ - if ((pf->flags & I40E_FLAG_MFP_ENABLED) && + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(pf->hw.func_caps.iscsi)) { /* NIC type PF */ memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = pf->main_vsi_seid; @@ -9871,10 +13918,9 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, - "update vsi failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "update vsi failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); ret = -ENOENT; goto err; } @@ -9890,13 +13936,14 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) */ ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) { + /* Single TC condition is not fatal, + * message and continue + */ dev_info(&pf->pdev->dev, - "failed to configure TCs for main VSI tc_map 0x%08x, err %s aq_err %s\n", + "failed to configure TCs for main VSI tc_map 0x%08x, err %pe aq_err %s\n", enabled_tc, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); - ret = -ENOENT; + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); } } break; @@ -9907,7 +13954,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; ctxt.flags = I40E_AQ_VSI_TYPE_PF; - if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) && + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags) && (i40e_is_vsi_uplink_mode_veb(vsi))) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); @@ -9955,7 +14002,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } - if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= @@ -9988,10 +14035,9 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) ret = i40e_aq_add_vsi(hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, - "add vsi failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "add vsi failed, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); ret = -ENOENT; goto err; } @@ -10001,28 +14047,27 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) vsi->id = ctxt.vsi_number; } - vsi->active_filters = 0; - clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); spin_lock_bh(&vsi->mac_filter_hash_lock); + vsi->active_filters = 0; /* If macvlan filters already exist, force them to get loaded */ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { f->state = I40E_FILTER_NEW; f_count++; } spin_unlock_bh(&vsi->mac_filter_hash_lock); + clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); if (f_count) { vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - pf->flags |= I40E_FLAG_FILTER_SYNC; + set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); } /* Update VSI BW information */ ret = i40e_vsi_get_bw_info(vsi); if (ret) { dev_info(&pf->pdev->dev, - "couldn't get vsi bw info, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't get vsi bw info, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); /* VSI is already added so not tearing that up */ ret = 0; } @@ -10041,7 +14086,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; struct hlist_node *h; - struct i40e_veb *veb = NULL; + struct i40e_veb *veb; struct i40e_pf *pf; u16 uplink_seid; int i, n, bkt; @@ -10054,13 +14099,13 @@ int i40e_vsi_release(struct i40e_vsi *vsi) vsi->seid, vsi->uplink_seid); return -ENODEV; } - if (vsi == pf->vsi[pf->lan_vsi] && - !test_bit(__I40E_DOWN, pf->state)) { + if (vsi->type == I40E_VSI_MAIN && !test_bit(__I40E_DOWN, pf->state)) { dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } - + set_bit(__I40E_VSI_RELEASING, vsi->state); uplink_seid = vsi->uplink_seid; + if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { vsi->netdev_registered = false; @@ -10074,6 +14119,9 @@ int i40e_vsi_release(struct i40e_vsi *vsi) i40e_vsi_disable_irq(vsi); } + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); + spin_lock_bh(&vsi->mac_filter_hash_lock); /* clear the sync flag on all filters */ @@ -10101,29 +14149,28 @@ int i40e_vsi_release(struct i40e_vsi *vsi) /* If this was the last thing on the VEB, except for the * controlling VSI, remove the VEB, which puts the controlling - * VSI onto the next level down in the switch. + * VSI onto the uplink port. * * Well, okay, there's one more exception here: don't remove - * the orphan VEBs yet. We'll wait for an explicit remove request + * the floating VEBs yet. We'll wait for an explicit remove request * from up the network stack. */ - for (n = 0, i = 0; i < pf->num_alloc_vsi; i++) { - if (pf->vsi[i] && - pf->vsi[i]->uplink_seid == uplink_seid && - (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) { - n++; /* count the VSIs */ - } - } - for (i = 0; i < I40E_MAX_VEB; i++) { - if (!pf->veb[i]) - continue; - if (pf->veb[i]->uplink_seid == uplink_seid) - n++; /* count the VEBs */ - if (pf->veb[i]->seid == uplink_seid) - veb = pf->veb[i]; + veb = i40e_pf_get_veb_by_seid(pf, uplink_seid); + if (veb && veb->uplink_seid) { + n = 0; + + /* Count non-controlling VSIs present on the VEB */ + i40e_pf_for_each_vsi(pf, i, vsi) + if (vsi->uplink_seid == uplink_seid && + (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) + n++; + + /* If there is no VSI except the control one then release + * the VEB and put the control VSI onto VEB uplink. + */ + if (!n) + i40e_veb_release(veb); } - if (n == 0 && veb && veb->uplink_seid != 0) - i40e_veb_release(veb); return 0; } @@ -10167,7 +14214,7 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi) /* In Legacy mode, we do not have to get any other vector since we * piggyback on the misc/ICR0 for queue interrupts. */ - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return ret; if (vsi->num_q_vectors) vsi->base_vector = i40e_get_lump(pf, pf->irq_pile, @@ -10196,9 +14243,9 @@ vector_setup_out: **/ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) { + struct i40e_vsi *main_vsi; u16 alloc_queue_pairs; struct i40e_pf *pf; - u8 enabled_tc; int ret; if (!vsi) @@ -10230,10 +14277,10 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) /* Update the FW view of the VSI. Force a reset of TC and queue * layout configurations. */ - enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc; - pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0; - pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid; - i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc); + main_vsi = i40e_pf_get_main_vsi(pf); + main_vsi->seid = pf->main_vsi_seid; + i40e_vsi_reconfig_tc(main_vsi); + if (vsi->type == I40E_VSI_MAIN) i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr); @@ -10254,6 +14301,8 @@ err_rings: free_netdev(vsi->netdev); vsi->netdev = NULL; } + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); i40e_aq_delete_element(&pf->hw, vsi->seid, NULL); err_vsi: i40e_vsi_clear(vsi); @@ -10279,8 +14328,8 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, struct i40e_vsi *vsi = NULL; struct i40e_veb *veb = NULL; u16 alloc_queue_pairs; - int ret, i; int v_idx; + int ret; /* The requested uplink_seid must be either * - the PF's port seid @@ -10295,21 +14344,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, * * Find which uplink_seid we were given and create a new VEB if needed */ - for (i = 0; i < I40E_MAX_VEB; i++) { - if (pf->veb[i] && pf->veb[i]->seid == uplink_seid) { - veb = pf->veb[i]; - break; - } - } - + veb = i40e_pf_get_veb_by_seid(pf, uplink_seid); if (!veb && uplink_seid != pf->mac_seid) { - - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (pf->vsi[i] && pf->vsi[i]->seid == uplink_seid) { - vsi = pf->vsi[i]; - break; - } - } + vsi = i40e_pf_get_vsi_by_seid(pf, uplink_seid); if (!vsi) { dev_info(&pf->pdev->dev, "no such uplink_seid %d\n", uplink_seid); @@ -10317,13 +14354,13 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, } if (vsi->uplink_seid == pf->mac_seid) - veb = i40e_veb_setup(pf, 0, pf->mac_seid, vsi->seid, + veb = i40e_veb_setup(pf, pf->mac_seid, vsi->seid, vsi->tc_config.enabled_tc); else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) - veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid, + veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); if (veb) { - if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) { + if (vsi->type != I40E_VSI_MAIN) { dev_info(&vsi->back->pdev->dev, "New VSI creation error, uplink seid of LAN VSI expected.\n"); return NULL; @@ -10332,16 +14369,13 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, * already enabled, in which case we can't force VEPA * mode. */ - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { veb->bridge_mode = BRIDGE_MODE_VEPA; - pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); } i40e_config_bridge_mode(veb); } - for (i = 0; i < I40E_MAX_VEB && !veb; i++) { - if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid) - veb = pf->veb[i]; - } + veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid); if (!veb) { dev_info(&pf->pdev->dev, "couldn't add VEB\n"); return NULL; @@ -10387,32 +14421,29 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, switch (vsi->type) { /* setup the netdev if needed */ case I40E_VSI_MAIN: - /* Apply relevant filters if a platform-specific mac - * address was selected. - */ - if (!!(pf->flags & I40E_FLAG_PF_MAC)) { - ret = i40e_macaddr_init(vsi, pf->hw.mac.addr); - if (ret) { - dev_warn(&pf->pdev->dev, - "could not set up macaddr; err %d\n", - ret); - } - } case I40E_VSI_VMDQ2: ret = i40e_config_netdev(vsi); if (ret) goto err_netdev; - ret = register_netdev(vsi->netdev); + ret = i40e_netif_set_realnum_tx_rx_queues(vsi); if (ret) goto err_netdev; + if (vsi->type == I40E_VSI_MAIN) { + ret = i40e_devlink_create_port(pf); + if (ret) + goto err_netdev; + SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); + } + ret = register_netdev(vsi->netdev); + if (ret) + goto err_dl_port; vsi->netdev_registered = true; netif_carrier_off(vsi->netdev); #ifdef CONFIG_I40E_DCB /* Setup DCB netlink interface */ i40e_dcbnl_setup(vsi); #endif /* CONFIG_I40E_DCB */ - /* fall through */ - + fallthrough; case I40E_VSI_FDIR: /* set up vectors and rings if needed */ ret = i40e_vsi_setup_vectors(vsi); @@ -10428,18 +14459,21 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, i40e_vsi_reset_stats(vsi); break; - default: /* no netdev or rings for the other VSI types */ break; } - if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) && - (vsi->type == I40E_VSI_VMDQ2)) { + if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) && + vsi->type == I40E_VSI_VMDQ2) { ret = i40e_vsi_config_rss(vsi); + if (ret) + goto err_config; } return vsi; +err_config: + i40e_vsi_clear_rings(vsi); err_rings: i40e_vsi_free_q_vectors(vsi); err_msix: @@ -10449,6 +14483,9 @@ err_msix: free_netdev(vsi->netdev); vsi->netdev = NULL; } +err_dl_port: + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); err_netdev: i40e_aq_delete_element(&pf->hw, vsi->seid, NULL); err_vsi: @@ -10477,9 +14514,8 @@ static int i40e_veb_get_bw_info(struct i40e_veb *veb) &bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, - "query veb bw config failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, hw->aq.asq_last_status)); + "query veb bw config failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); goto out; } @@ -10487,9 +14523,8 @@ static int i40e_veb_get_bw_info(struct i40e_veb *veb) &ets_data, NULL); if (ret) { dev_info(&pf->pdev->dev, - "query veb bw ets config failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, hw->aq.asq_last_status)); + "query veb bw ets config failed, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); goto out; } @@ -10568,29 +14603,24 @@ static void i40e_switch_branch_release(struct i40e_veb *branch) struct i40e_pf *pf = branch->pf; u16 branch_seid = branch->seid; u16 veb_idx = branch->idx; + struct i40e_vsi *vsi; + struct i40e_veb *veb; int i; /* release any VEBs on this VEB - RECURSION */ - for (i = 0; i < I40E_MAX_VEB; i++) { - if (!pf->veb[i]) - continue; - if (pf->veb[i]->uplink_seid == branch->seid) - i40e_switch_branch_release(pf->veb[i]); - } + i40e_pf_for_each_veb(pf, i, veb) + if (veb->uplink_seid == branch->seid) + i40e_switch_branch_release(veb); /* Release the VSIs on this VEB, but not the owner VSI. * * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing * the VEB itself, so don't use (*branch) after this loop. */ - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (!pf->vsi[i]) - continue; - if (pf->vsi[i]->uplink_seid == branch_seid && - (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) { - i40e_vsi_release(pf->vsi[i]); - } - } + i40e_pf_for_each_vsi(pf, i, vsi) + if (vsi->uplink_seid == branch_seid && + (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) + i40e_vsi_release(vsi); /* There's one corner case where the VEB might not have been * removed, so double check it here and remove it if needed. @@ -10628,38 +14658,35 @@ static void i40e_veb_clear(struct i40e_veb *veb) **/ void i40e_veb_release(struct i40e_veb *veb) { - struct i40e_vsi *vsi = NULL; + struct i40e_vsi *vsi, *vsi_it; struct i40e_pf *pf; int i, n = 0; pf = veb->pf; /* find the remaining VSI and check for extras */ - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (pf->vsi[i] && pf->vsi[i]->uplink_seid == veb->seid) { + i40e_pf_for_each_vsi(pf, i, vsi_it) + if (vsi_it->uplink_seid == veb->seid) { + if (vsi_it->flags & I40E_VSI_FLAG_VEB_OWNER) + vsi = vsi_it; n++; - vsi = pf->vsi[i]; } - } - if (n != 1) { + + /* Floating VEB has to be empty and regular one must have + * single owner VSI. + */ + if ((veb->uplink_seid && n != 1) || (!veb->uplink_seid && n != 0)) { dev_info(&pf->pdev->dev, "can't remove VEB %d with %d VSIs left\n", veb->seid, n); return; } - /* move the remaining VSI to uplink veb */ - vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER; + /* For regular VEB move the owner VSI to uplink port */ if (veb->uplink_seid) { + vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER; vsi->uplink_seid = veb->uplink_seid; - if (veb->uplink_seid == pf->mac_seid) - vsi->veb_idx = I40E_NO_VEB; - else - vsi->veb_idx = veb->veb_idx; - } else { - /* floating VEB */ - vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid; - vsi->veb_idx = pf->vsi[pf->lan_vsi]->veb_idx; + vsi->veb_idx = I40E_NO_VEB; } i40e_aq_delete_element(&pf->hw, veb->seid, NULL); @@ -10674,19 +14701,18 @@ void i40e_veb_release(struct i40e_veb *veb) static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) { struct i40e_pf *pf = veb->pf; - bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED); + bool enable_stats = !!test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); int ret; - ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid, - veb->enabled_tc, false, + ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi ? vsi->seid : 0, + veb->enabled_tc, vsi ? false : true, &veb->seid, enable_stats, NULL); /* get a VEB from the hardware */ if (ret) { dev_info(&pf->pdev->dev, - "couldn't add VEB, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't add VEB, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return -EPERM; } @@ -10695,24 +14721,24 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) &veb->stats_idx, NULL, NULL, NULL); if (ret) { dev_info(&pf->pdev->dev, - "couldn't get VEB statistics idx, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't get VEB statistics idx, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return -EPERM; } ret = i40e_veb_get_bw_info(veb); if (ret) { dev_info(&pf->pdev->dev, - "couldn't get VEB bw info, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't get VEB bw info, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); i40e_aq_delete_element(&pf->hw, veb->seid, NULL); return -ENOENT; } - vsi->uplink_seid = veb->seid; - vsi->veb_idx = veb->idx; - vsi->flags |= I40E_VSI_FLAG_VEB_OWNER; + if (vsi) { + vsi->uplink_seid = veb->seid; + vsi->veb_idx = veb->idx; + vsi->flags |= I40E_VSI_FLAG_VEB_OWNER; + } return 0; } @@ -10720,7 +14746,6 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) /** * i40e_veb_setup - Set up a VEB * @pf: board private structure - * @flags: VEB setup flags * @uplink_seid: the switch element to link to * @vsi_seid: the initial VSI seid * @enabled_tc: Enabled TC bit-map @@ -10733,12 +14758,12 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) * Returns pointer to the successfully allocated VEB sw struct on * success, otherwise returns NULL on failure. **/ -struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, - u16 uplink_seid, u16 vsi_seid, - u8 enabled_tc) +struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 uplink_seid, + u16 vsi_seid, u8 enabled_tc) { - struct i40e_veb *veb, *uplink_veb = NULL; - int vsi_idx, veb_idx; + struct i40e_vsi *vsi = NULL; + struct i40e_veb *veb; + int veb_idx; int ret; /* if one seid is 0, the other must be 0 to create a floating relay */ @@ -10751,26 +14776,11 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, } /* make sure there is such a vsi and uplink */ - for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++) - if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid) - break; - if (vsi_idx >= pf->num_alloc_vsi && vsi_seid != 0) { - dev_info(&pf->pdev->dev, "vsi seid %d not found\n", - vsi_seid); - return NULL; - } - - if (uplink_seid && uplink_seid != pf->mac_seid) { - for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) { - if (pf->veb[veb_idx] && - pf->veb[veb_idx]->seid == uplink_seid) { - uplink_veb = pf->veb[veb_idx]; - break; - } - } - if (!uplink_veb) { - dev_info(&pf->pdev->dev, - "uplink seid %d not found\n", uplink_seid); + if (vsi_seid) { + vsi = i40e_pf_get_vsi_by_seid(pf, vsi_seid); + if (!vsi) { + dev_err(&pf->pdev->dev, "vsi seid %d not found\n", + vsi_seid); return NULL; } } @@ -10780,16 +14790,15 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, if (veb_idx < 0) goto err_alloc; veb = pf->veb[veb_idx]; - veb->flags = flags; veb->uplink_seid = uplink_seid; - veb->veb_idx = (uplink_veb ? uplink_veb->idx : I40E_NO_VEB); veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1); /* create the VEB in the switch */ - ret = i40e_add_veb(veb, pf->vsi[vsi_idx]); + ret = i40e_add_veb(veb, vsi); if (ret) goto err_veb; - if (vsi_idx == pf->lan_vsi) + + if (vsi && vsi->idx == pf->lan_vsi) pf->lan_veb = veb->idx; return veb; @@ -10817,6 +14826,7 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, u16 uplink_seid = le16_to_cpu(ele->uplink_seid); u8 element_type = ele->element_type; u16 seid = le16_to_cpu(ele->seid); + struct i40e_veb *veb; if (printconfig) dev_info(&pf->pdev->dev, @@ -10831,17 +14841,15 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, /* Main VEB? */ if (uplink_seid != pf->mac_seid) break; - if (pf->lan_veb == I40E_NO_VEB) { + veb = i40e_pf_get_main_veb(pf); + if (!veb) { int v; /* find existing or else empty VEB */ - for (v = 0; v < I40E_MAX_VEB; v++) { - if (pf->veb[v] && (pf->veb[v]->seid == seid)) { - pf->lan_veb = v; - break; - } - } - if (pf->lan_veb == I40E_NO_VEB) { + veb = i40e_pf_get_veb_by_seid(pf, seid); + if (veb) { + pf->lan_veb = veb->idx; + } else { v = i40e_veb_mem_alloc(pf); if (v < 0) break; @@ -10849,10 +14857,14 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, } } - pf->veb[pf->lan_veb]->seid = seid; - pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid; - pf->veb[pf->lan_veb]->pf = pf; - pf->veb[pf->lan_veb]->veb_idx = I40E_NO_VEB; + /* Try to get again main VEB as pf->lan_veb may have changed */ + veb = i40e_pf_get_main_veb(pf); + if (!veb) + break; + + veb->seid = seid; + veb->uplink_seid = pf->mac_seid; + veb->pf = pf; break; case I40E_SWITCH_ELEMENT_TYPE_VSI: if (num_reported != 1) @@ -10861,12 +14873,11 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, * the PF's VSI */ pf->mac_seid = uplink_seid; - pf->pf_seid = downlink_seid; pf->main_vsi_seid = seid; if (printconfig) dev_info(&pf->pdev->dev, "pf_seid=%d main_vsi_seid=%d\n", - pf->pf_seid, pf->main_vsi_seid); + downlink_seid, pf->main_vsi_seid); break; case I40E_SWITCH_ELEMENT_TYPE_PF: case I40E_SWITCH_ELEMENT_TYPE_VF: @@ -10912,10 +14923,8 @@ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig) &next_seid, NULL); if (ret) { dev_info(&pf->pdev->dev, - "get switch config failed err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "get switch config failed err %d aq_err %s\n", + ret, libie_aq_str(pf->hw.aq.asq_last_status)); kfree(aq_buf); return -ENOENT; } @@ -10945,11 +14954,13 @@ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig) * i40e_setup_pf_switch - Setup the HW switch on startup or after reset * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. + * @lock_acquired: indicates whether or not the lock has been acquired * * Returns 0 on success, negative value on failure **/ -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired) { + struct i40e_vsi *main_vsi; u16 flags = 0; int ret; @@ -10957,9 +14968,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) ret = i40e_fetch_switch_configuration(pf, false); if (ret) { dev_info(&pf->pdev->dev, - "couldn't fetch switch config, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + "couldn't fetch switch config, err %pe aq_err %s\n", + ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return ret; } i40e_pf_reset_stats(pf); @@ -10971,55 +14981,58 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) */ if ((pf->hw.pf_id == 0) && - !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) + !test_bit(I40E_FLAG_TRUE_PROMISC_ENA, pf->flags)) { flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; + pf->last_sw_conf_flags = flags; + } if (pf->hw.pf_id == 0) { u16 valid_flags; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; - ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, + ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0, NULL); - if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) { + if (ret && pf->hw.aq.asq_last_status != LIBIE_AQ_RC_ESRCH) { dev_info(&pf->pdev->dev, - "couldn't set switch config bits, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + "couldn't set switch config bits, err %pe aq_err %s\n", + ERR_PTR(ret), + libie_aq_str(pf->hw.aq.asq_last_status)); /* not a fatal problem, just keep going */ } + pf->last_sw_conf_valid_flags = valid_flags; } /* first time setup */ - if (pf->lan_vsi == I40E_NO_VSI || reinit) { - struct i40e_vsi *vsi = NULL; + main_vsi = i40e_pf_get_main_vsi(pf); + if (!main_vsi || reinit) { + struct i40e_veb *veb; u16 uplink_seid; /* Set up the PF VSI associated with the PF's main VSI * that is already in the HW switch */ - if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb]) - uplink_seid = pf->veb[pf->lan_veb]->seid; + veb = i40e_pf_get_main_veb(pf); + if (veb) + uplink_seid = veb->seid; else uplink_seid = pf->mac_seid; - if (pf->lan_vsi == I40E_NO_VSI) - vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0); + if (!main_vsi) + main_vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, + uplink_seid, 0); else if (reinit) - vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]); - if (!vsi) { + main_vsi = i40e_vsi_reinit_setup(main_vsi); + if (!main_vsi) { dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n"); + i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); return -EAGAIN; } } else { /* force a reset of TC and queue layout configurations */ - u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc; - - pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0; - pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid; - i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc); + main_vsi->seid = pf->main_vsi_seid; + i40e_vsi_reconfig_tc(main_vsi); } - i40e_vlan_stripping_disable(pf->vsi[pf->lan_vsi]); + i40e_vlan_stripping_disable(main_vsi); i40e_fdir_sb_setup(pf); @@ -11034,20 +15047,22 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) /* enable RSS in the HW, even for only one queue, as the stack can use * the hash */ - if ((pf->flags & I40E_FLAG_RSS_ENABLED)) + if (test_bit(I40E_FLAG_RSS_ENA, pf->flags)) i40e_pf_config_rss(pf); /* fill in link information and enable LSE reporting */ i40e_link_event(pf); - /* Initialize user-specific link properties */ - pf->fc_autoneg_status = ((pf->hw.phy.link_info.an_info & - I40E_AQ_AN_COMPLETED) ? true : false); - i40e_ptp_init(pf); + if (!lock_acquired) + rtnl_lock(); + /* repopulate tunnel port filters */ - i40e_sync_udp_filters(pf); + udp_tunnel_nic_reset_ntf(main_vsi->netdev); + + if (!lock_acquired) + rtnl_unlock(); return ret; } @@ -11059,6 +15074,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) static void i40e_determine_queue_usage(struct i40e_pf *pf) { int queues_left; + int q_max; pf->num_lan_qps = 0; @@ -11069,67 +15085,72 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) queues_left = pf->hw.func_caps.num_tx_qp; if ((queues_left == 1) || - !(pf->flags & I40E_FLAG_MSIX_ENABLED)) { + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { /* one qp for PF, no queues for anything else */ queues_left = 0; pf->alloc_rss_size = pf->num_lan_qps = 1; /* make sure all the fancies are disabled */ - pf->flags &= ~(I40E_FLAG_RSS_ENABLED | - I40E_FLAG_IWARP_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_DCB_CAPABLE | - I40E_FLAG_DCB_ENABLED | - I40E_FLAG_SRIOV_ENABLED | - I40E_FLAG_VMDQ_ENABLED); - } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_DCB_CAPABLE))) { + clear_bit(I40E_FLAG_RSS_ENA, pf->flags); + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); + } else if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags) && + !test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && + !test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && + !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) { /* one qp for PF */ pf->alloc_rss_size = pf->num_lan_qps = 1; queues_left -= pf->num_lan_qps; - pf->flags &= ~(I40E_FLAG_RSS_ENABLED | - I40E_FLAG_IWARP_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_DCB_ENABLED | - I40E_FLAG_VMDQ_ENABLED); + clear_bit(I40E_FLAG_RSS_ENA, pf->flags); + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } else { /* Not enough queues for all TCs */ - if ((pf->flags & I40E_FLAG_DCB_CAPABLE) && - (queues_left < I40E_MAX_TRAFFIC_CLASS)) { - pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | - I40E_FLAG_DCB_ENABLED); + if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags) && + queues_left < I40E_MAX_TRAFFIC_CLASS) { + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n"); } - pf->num_lan_qps = max_t(int, pf->rss_size_max, - num_online_cpus()); - pf->num_lan_qps = min_t(int, pf->num_lan_qps, - pf->hw.func_caps.num_tx_qp); + + /* limit lan qps to the smaller of qps, cpus or msix */ + q_max = max_t(int, pf->rss_size_max, num_online_cpus()); + q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp); + q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors); + pf->num_lan_qps = q_max; queues_left -= pf->num_lan_qps; } - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (queues_left > 1) { queues_left -= 1; /* save 1 queue for FD */ } else { - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n"); } } - if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && pf->num_vf_qps && pf->num_req_vfs && queues_left) { pf->num_req_vfs = min_t(int, pf->num_req_vfs, (queues_left / pf->num_vf_qps)); queues_left -= (pf->num_req_vfs * pf->num_vf_qps); } - if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && + if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) { pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis, (queues_left / pf->num_vmdq_qps)); @@ -11140,7 +15161,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) dev_dbg(&pf->pdev->dev, "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n", pf->hw.func_caps.num_tx_qp, - !!(pf->flags & I40E_FLAG_FD_SB_ENABLED), + !!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags), pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs, pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left); @@ -11164,7 +15185,8 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) settings->hash_lut_size = I40E_HASH_LUT_SIZE_128; /* Flow Director is enabled */ - if (pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)) + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) || + test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) settings->enable_fdir = true; /* Ethtype and MACVLAN filters enabled for PF */ @@ -11181,6 +15203,7 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) #define REMAIN(__x) (INFO_STRING_LEN - (__x)) static void i40e_print_features(struct i40e_pf *pf) { + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; char *buf; int i; @@ -11191,29 +15214,28 @@ static void i40e_print_features(struct i40e_pf *pf) i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id); #ifdef CONFIG_PCI_IOV - i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); + i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif - i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", - pf->hw.func_caps.num_vsis, - pf->vsi[pf->lan_vsi]->num_queue_pairs); - if (pf->flags & I40E_FLAG_RSS_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " RSS"); - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " FD_ATR"); - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { - i += snprintf(&buf[i], REMAIN(i), " FD_SB"); - i += snprintf(&buf[i], REMAIN(i), " NTUPLE"); - } - if (pf->flags & I40E_FLAG_DCB_CAPABLE) - i += snprintf(&buf[i], REMAIN(i), " DCB"); - i += snprintf(&buf[i], REMAIN(i), " VxLAN"); - i += snprintf(&buf[i], REMAIN(i), " Geneve"); - if (pf->flags & I40E_FLAG_PTP) - i += snprintf(&buf[i], REMAIN(i), " PTP"); - if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " VEB"); + i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", + pf->hw.func_caps.num_vsis, main_vsi->num_queue_pairs); + if (test_bit(I40E_FLAG_RSS_ENA, pf->flags)) + i += scnprintf(&buf[i], REMAIN(i), " RSS"); + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) + i += scnprintf(&buf[i], REMAIN(i), " FD_ATR"); + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { + i += scnprintf(&buf[i], REMAIN(i), " FD_SB"); + i += scnprintf(&buf[i], REMAIN(i), " NTUPLE"); + } + if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) + i += scnprintf(&buf[i], REMAIN(i), " DCB"); + i += scnprintf(&buf[i], REMAIN(i), " VxLAN"); + i += scnprintf(&buf[i], REMAIN(i), " Geneve"); + if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) + i += scnprintf(&buf[i], REMAIN(i), " PTP"); + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) + i += scnprintf(&buf[i], REMAIN(i), " VEB"); else - i += snprintf(&buf[i], REMAIN(i), " VEPA"); + i += scnprintf(&buf[i], REMAIN(i), " VEPA"); dev_info(&pf->pdev->dev, "%s\n", buf); kfree(buf); @@ -11237,6 +15259,255 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) } /** + * i40e_set_fec_in_flags - helper function for setting FEC options in flags + * @fec_cfg: FEC option to set in flags + * @flags: ptr to flags in which we set FEC option + **/ +void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags) +{ + if (fec_cfg & I40E_AQ_SET_FEC_AUTO) { + set_bit(I40E_FLAG_RS_FEC, flags); + set_bit(I40E_FLAG_BASE_R_FEC, flags); + } + if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) || + (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) { + set_bit(I40E_FLAG_RS_FEC, flags); + clear_bit(I40E_FLAG_BASE_R_FEC, flags); + } + if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) || + (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) { + set_bit(I40E_FLAG_BASE_R_FEC, flags); + clear_bit(I40E_FLAG_RS_FEC, flags); + } + if (fec_cfg == 0) { + clear_bit(I40E_FLAG_RS_FEC, flags); + clear_bit(I40E_FLAG_BASE_R_FEC, flags); + } +} + +/** + * i40e_check_recovery_mode - check if we are running transition firmware + * @pf: board private structure + * + * Check registers indicating the firmware runs in recovery mode. Sets the + * appropriate driver state. + * + * Returns true if the recovery mode was detected, false otherwise + **/ +static bool i40e_check_recovery_mode(struct i40e_pf *pf) +{ + u32 val = rd32(&pf->hw, I40E_GL_FWSTS); + + if (val & I40E_GL_FWSTS_FWS1B_MASK) { + dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n"); + dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); + set_bit(__I40E_RECOVERY_MODE, pf->state); + + return true; + } + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) + dev_info(&pf->pdev->dev, "Please do Power-On Reset to initialize adapter in normal mode with full functionality.\n"); + + return false; +} + +/** + * i40e_pf_loop_reset - perform reset in a loop. + * @pf: board private structure + * + * This function is useful when a NIC is about to enter recovery mode. + * When a NIC's internal data structures are corrupted the NIC's + * firmware is going to enter recovery mode. + * Right after a POR it takes about 7 minutes for firmware to enter + * recovery mode. Until that time a NIC is in some kind of intermediate + * state. After that time period the NIC almost surely enters + * recovery mode. The only way for a driver to detect intermediate + * state is to issue a series of pf-resets and check a return value. + * If a PF reset returns success then the firmware could be in recovery + * mode so the caller of this code needs to check for recovery mode + * if this function returns success. There is a little chance that + * firmware will hang in intermediate state forever. + * Since waiting 7 minutes is quite a lot of time this function waits + * 10 seconds and then gives up by returning an error. + * + * Return 0 on success, negative on failure. + **/ +static int i40e_pf_loop_reset(struct i40e_pf *pf) +{ + /* wait max 10 seconds for PF reset to succeed */ + const unsigned long time_end = jiffies + 10 * HZ; + struct i40e_hw *hw = &pf->hw; + int ret; + + ret = i40e_pf_reset(hw); + while (ret != 0 && time_before(jiffies, time_end)) { + usleep_range(10000, 20000); + ret = i40e_pf_reset(hw); + } + + if (ret == 0) + pf->pfr_count++; + else + dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret); + + return ret; +} + +/** + * i40e_check_fw_empr - check if FW issued unexpected EMP Reset + * @pf: board private structure + * + * Check FW registers to determine if FW issued unexpected EMP Reset. + * Every time when unexpected EMP Reset occurs the FW increments + * a counter of unexpected EMP Resets. When the counter reaches 10 + * the FW should enter the Recovery mode + * + * Returns true if FW issued unexpected EMP Reset + **/ +static bool i40e_check_fw_empr(struct i40e_pf *pf) +{ + const u32 fw_sts = rd32(&pf->hw, I40E_GL_FWSTS) & + I40E_GL_FWSTS_FWS1B_MASK; + return (fw_sts > I40E_GL_FWSTS_FWS1B_EMPR_0) && + (fw_sts <= I40E_GL_FWSTS_FWS1B_EMPR_10); +} + +/** + * i40e_handle_resets - handle EMP resets and PF resets + * @pf: board private structure + * + * Handle both EMP resets and PF resets and conclude whether there are + * any issues regarding these resets. If there are any issues then + * generate log entry. + * + * Return 0 if NIC is healthy or negative value when there are issues + * with resets + **/ +static int i40e_handle_resets(struct i40e_pf *pf) +{ + const int pfr = i40e_pf_loop_reset(pf); + const bool is_empr = i40e_check_fw_empr(pf); + + if (is_empr || pfr != 0) + dev_crit(&pf->pdev->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n"); + + return is_empr ? -EIO : pfr; +} + +/** + * i40e_init_recovery_mode - initialize subsystems needed in recovery mode + * @pf: board private structure + * @hw: ptr to the hardware info + * + * This function does a minimal setup of all subsystems needed for running + * recovery mode. + * + * Returns 0 on success, negative on failure + **/ +static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw) +{ + struct i40e_vsi *vsi; + int err; + int v_idx; + + pci_set_drvdata(pf->pdev, pf); + pci_save_state(pf->pdev); + + /* set up periodic task facility */ + timer_setup(&pf->service_timer, i40e_service_timer, 0); + pf->service_timer_period = HZ; + + INIT_WORK(&pf->service_task, i40e_service_task); + clear_bit(__I40E_SERVICE_SCHED, pf->state); + + err = i40e_init_interrupt_scheme(pf); + if (err) + goto err_switch_setup; + + /* The number of VSIs reported by the FW is the minimum guaranteed + * to us; HW supports far more and we share the remaining pool with + * the other PFs. We allocate space for more than the guarantee with + * the understanding that we might not get them all later. + */ + if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC) + pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; + else + pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; + + /* Set up the vsi struct and our local tracking of the MAIN PF vsi. */ + pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), + GFP_KERNEL); + if (!pf->vsi) { + err = -ENOMEM; + goto err_switch_setup; + } + + /* We allocate one VSI which is needed as absolute minimum + * in order to register the netdev + */ + v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN); + if (v_idx < 0) { + err = v_idx; + goto err_switch_setup; + } + pf->lan_vsi = v_idx; + vsi = pf->vsi[v_idx]; + if (!vsi) { + err = -EFAULT; + goto err_switch_setup; + } + vsi->alloc_queue_pairs = 1; + err = i40e_config_netdev(vsi); + if (err) + goto err_switch_setup; + err = register_netdev(vsi->netdev); + if (err) + goto err_switch_setup; + vsi->netdev_registered = true; + i40e_dbg_pf_init(pf); + + err = i40e_setup_misc_vector_for_recovery_mode(pf); + if (err) + goto err_switch_setup; + + /* tell the firmware that we're starting */ + i40e_send_version(pf); + + /* since everything's happy, start the service_task timer */ + mod_timer(&pf->service_timer, + round_jiffies(jiffies + pf->service_timer_period)); + + return 0; + +err_switch_setup: + i40e_reset_interrupt_capability(pf); + timer_shutdown_sync(&pf->service_timer); + i40e_shutdown_adminq(hw); + iounmap(hw->hw_addr); + pci_release_mem_regions(pf->pdev); + pci_disable_device(pf->pdev); + i40e_free_pf(pf); + + return err; +} + +/** + * i40e_set_subsystem_device_id - set subsystem device id + * @hw: pointer to the hardware info + * + * Set PCI subsystem device id either from a pci_dev structure or + * a specific FW register. + **/ +static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw) +{ + struct i40e_pf *pf = i40e_hw_to_pf(hw); + + hw->subsystem_device_id = pf->pdev->subsystem_device ? + pf->pdev->subsystem_device : + (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX); +} + +/** * i40e_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in i40e_pci_tbl @@ -11250,15 +15521,20 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct i40e_aq_get_phy_abilities_resp abilities; +#ifdef CONFIG_I40E_DCB + enum i40e_get_fw_lldp_status_resp lldp_status; +#endif /* CONFIG_I40E_DCB */ + struct i40e_vsi *vsi; struct i40e_pf *pf; struct i40e_hw *hw; - static u16 pfs_found; u16 wol_nvm_bits; + char nvm_ver[32]; u16 link_status; +#ifdef CONFIG_I40E_DCB + int status; +#endif /* CONFIG_I40E_DCB */ int err; u32 val; - u32 i; - u8 set_fc_aq_fail; err = pci_enable_device_mem(pdev); if (err) @@ -11267,12 +15543,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* set up for high or low dma */ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "DMA configuration failed: 0x%x\n", err); - goto err_dma; - } + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; } /* set up pci connections */ @@ -11283,7 +15556,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_pci_reg; } - pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); /* Now that we have a PCI connection, we need to do the @@ -11291,7 +15563,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * the Admin Queue structures and then querying for the * device's current profile information. */ - pf = kzalloc(sizeof(*pf), GFP_KERNEL); + pf = i40e_alloc_pf(&pdev->dev); if (!pf) { err = -ENOMEM; goto err_pf_alloc; @@ -11301,11 +15573,20 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) set_bit(__I40E_DOWN, pf->state); hw = &pf->hw; - hw->back = pf; pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0), I40E_MAX_CSR_SPACE); - + /* We believe that the highest register to read is + * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size + * is not less than that before mapping to prevent a + * kernel panic. + */ + if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) { + dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n", + pf->ioremap_len); + err = -ENOMEM; + goto err_ioremap; + } hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len); if (!hw->hw_addr) { err = -EIO; @@ -11318,14 +15599,21 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->device_id = pdev->device; pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; + i40e_set_subsystem_device_id(hw); hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); hw->bus.bus_id = pdev->bus->number; - pf->instance = pfs_found; + + /* Select something other than the 802.1ad ethertype for the + * switch to use internally and drop on ingress. + */ + hw->switch_tag = 0xffff; + hw->first_tag = ETH_P_8021AD; + hw->second_tag = ETH_P_8021Q; INIT_LIST_HEAD(&pf->l3_flex_pit_list); INIT_LIST_HEAD(&pf->l4_flex_pit_list); + INIT_LIST_HEAD(&pf->ddp_old_prof); /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove @@ -11353,18 +15641,29 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Reset here to make sure all is clean and to define PF 'n' */ i40e_clear_hw(hw); - err = i40e_pf_reset(hw); + + err = i40e_set_mac_type(hw); if (err) { - dev_info(&pdev->dev, "Initial pf_reset failed: %d\n", err); + dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n", + err); goto err_pf_reset; } - pf->pfr_count++; - hw->aq.num_arq_entries = I40E_AQ_LEN; - hw->aq.num_asq_entries = I40E_AQ_LEN; + err = i40e_handle_resets(pf); + if (err) + goto err_pf_reset; + + i40e_check_recovery_mode(pf); + + if (is_kdump_kernel()) { + hw->aq.num_arq_entries = I40E_MIN_ARQ_LEN; + hw->aq.num_asq_entries = I40E_MIN_ASQ_LEN; + } else { + hw->aq.num_arq_entries = I40E_AQ_LEN; + hw->aq.num_asq_entries = I40E_AQ_LEN; + } hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE; hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE; - pf->adminq_work_limit = I40E_AQ_WORK_LIMIT; snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", @@ -11382,9 +15681,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = i40e_init_adminq(hw); if (err) { - if (err == I40E_ERR_FIRMWARE_API_VERSION) + if (err == -EIO) dev_info(&pdev->dev, - "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); + "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n", + hw->aq.api_maj_ver, + hw->aq.api_min_ver, + I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw)); else dev_info(&pdev->dev, "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n"); @@ -11392,21 +15695,31 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_pf_reset; } i40e_get_oem_version(hw); + i40e_get_pba_string(hw); - /* provide nvm, fw, api versions */ - dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n", + /* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */ + i40e_nvm_version_str(hw, nvm_ver, sizeof(nvm_ver)); + dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, - hw->aq.api_maj_ver, hw->aq.api_min_ver, - i40e_nvm_version_str(hw)); - - if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) - dev_info(&pdev->dev, - "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); - else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR || - hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) + hw->aq.api_maj_ver, hw->aq.api_min_ver, nvm_ver, + hw->vendor_id, hw->device_id, hw->subsystem_vendor_id, + hw->subsystem_device_id); + + if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw) + 1)) + dev_dbg(&pdev->dev, + "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n", + hw->aq.api_maj_ver, + hw->aq.api_min_ver, + I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw)); + else if (i40e_is_aq_api_ver_lt(hw, 1, 4)) dev_info(&pdev->dev, - "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); + "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n", + hw->aq.api_maj_ver, + hw->aq.api_min_ver, + I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw)); i40e_verify_eeprom(pf); @@ -11415,7 +15728,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); i40e_clear_pxe_mode(hw); - err = i40e_get_capabilities(pf); + + err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (err) goto err_adminq_setup; @@ -11425,6 +15739,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_sw_init; } + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) + return i40e_init_recovery_mode(pf, hw); + err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (err) { @@ -11443,9 +15760,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * Ignore error return codes because if it was already disabled via * hardware settings this will fail */ - if (pf->flags & I40E_FLAG_STOP_FW_LLDP) { + if (test_bit(I40E_HW_CAP_STOP_FW_LLDP, pf->hw.caps)) { dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n"); - i40e_aq_stop_lldp(hw, true, NULL); + i40e_aq_stop_lldp(hw, true, false, NULL); } /* allow a platform config to override the HW addr */ @@ -11460,21 +15777,37 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ether_addr_copy(hw->mac.perm_addr, hw->mac.addr); i40e_get_port_mac_addr(hw, hw->mac.port_addr); if (is_valid_ether_addr(hw->mac.port_addr)) - pf->flags |= I40E_FLAG_PORT_ID_VALID; + set_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps); + i40e_ptp_alloc_pins(pf); pci_set_drvdata(pdev, pf); pci_save_state(pdev); + #ifdef CONFIG_I40E_DCB + status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status); + (!status && + lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ? + (clear_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) : + (set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)); + dev_info(&pdev->dev, + test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ? + "FW LLDP is disabled\n" : + "FW LLDP is enabled\n"); + + /* Enable FW to write default DCB config on link-up */ + i40e_aq_set_dcb_parameters(hw, true, NULL); + err = i40e_init_pf_dcb(pf); if (err) { dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err); - pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); /* Continue without DCB enabled */ } #endif /* CONFIG_I40E_DCB */ /* set up periodic task facility */ - setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf); + timer_setup(&pf->service_timer, i40e_service_timer, 0); pf->service_timer_period = HZ; INIT_WORK(&pf->service_task, i40e_service_task); @@ -11494,6 +15827,21 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_switch_setup; + /* Reduce Tx and Rx pairs for kdump + * When MSI-X is enabled, it's not allowed to use more TC queue + * pairs than MSI-X vectors (pf->num_lan_msix) exist. Thus + * vsi->num_queue_pairs will be equal to pf->num_lan_msix, i.e., 1. + */ + if (is_kdump_kernel()) + pf->num_lan_msix = 1; + + pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port; + pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port; + pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared; + pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS; + pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN | + UDP_TUNNEL_TYPE_GENEVE; + /* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with * the other PFs. We allocate space for more than the guarantee with @@ -11503,6 +15851,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; else pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; + if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { + dev_warn(&pf->pdev->dev, + "limiting the VSI count due to UDP tunnel limitation %d > %d\n", + pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); + pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; + } /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), @@ -11514,44 +15868,26 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_PCI_IOV /* prep for VF support */ - if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && - (pf->flags & I40E_FLAG_MSIX_ENABLED) && + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && + test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && !test_bit(__I40E_BAD_EEPROM, pf->state)) { if (pci_num_vf(pdev)) - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); } #endif - err = i40e_setup_pf_switch(pf, false); + err = i40e_setup_pf_switch(pf, false, false); if (err) { dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; } - /* Make sure flow control is set according to current settings */ - err = i40e_set_fc(hw, &set_fc_aq_fail, true); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_GET) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on get_phy_cap\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_SET) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on set_phy_config\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_UPDATE) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on get_link_info\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); + vsi = i40e_pf_get_main_vsi(pf); + INIT_LIST_HEAD(&vsi->ch_list); /* if FDIR VSI was set up, start it now */ - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { - i40e_vsi_open(pf->vsi[i]); - break; - } - } + vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); + if (vsi) + i40e_vsi_open(vsi); /* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. @@ -11561,9 +15897,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) I40E_AQ_EVENT_MEDIA_NA | I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (err) - dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, err), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + + /* VF MDD event logs are rate limited to one second intervals */ + ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1); /* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing @@ -11576,14 +15914,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) wr32(hw, I40E_REG_MSS, val); } - if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { + if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) { msleep(75); err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (err) - dev_info(&pf->pdev->dev, "link restart failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, err), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n", + ERR_PTR(err), + libie_aq_str(pf->hw.aq.asq_last_status)); } /* The main driver is (mostly) up and happy. We need to set this state * before setting up the misc vector or we get a race and the vector @@ -11596,19 +15933,21 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * the misc functionality and queue processing is combined in * the same vector and that gets setup at open. */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { err = i40e_setup_misc_vector(pf); if (err) { dev_info(&pdev->dev, "setup of misc vector failed: %d\n", err); + i40e_cloud_filter_exit(pf); + i40e_fdir_teardown(pf); goto err_vsis; } } #ifdef CONFIG_PCI_IOV /* prep for VF support */ - if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && - (pf->flags & I40E_FLAG_MSIX_ENABLED) && + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && + test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && !test_bit(__I40E_BAD_EEPROM, pf->state)) { /* disable link interrupts for VFs */ val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM); @@ -11628,7 +15967,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } #endif /* CONFIG_PCI_IOV */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile, pf->num_iwarp_msix, I40E_IWARP_IRQ_PILE_ID); @@ -11636,7 +15975,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_info(&pdev->dev, "failed to get tracking for %d vectors for IWARP err=%d\n", pf->num_iwarp_msix, pf->iwarp_base_vector); - pf->flags &= ~I40E_FLAG_IWARP_ENABLED; + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); } } @@ -11650,7 +15989,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) round_jiffies(jiffies + pf->service_timer_period)); /* add this PF to client device list and launch a client service task */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { err = i40e_lan_add_device(pf); if (err) dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n", @@ -11663,7 +16002,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * and will report PCI Gen 1 x 1 by default so don't bother * checking them. */ - if (!(pf->flags & I40E_FLAG_NO_PCI_LINK_CHECK)) { + if (!test_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, pf->hw.caps)) { char speed[PCI_SPEED_SIZE] = "Unknown"; char width[PCI_WIDTH_SIZE] = "Unknown"; @@ -11677,23 +16016,23 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) switch (hw->bus.speed) { case i40e_bus_speed_8000: - strncpy(speed, "8.0", PCI_SPEED_SIZE); break; + strscpy(speed, "8.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_5000: - strncpy(speed, "5.0", PCI_SPEED_SIZE); break; + strscpy(speed, "5.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_2500: - strncpy(speed, "2.5", PCI_SPEED_SIZE); break; + strscpy(speed, "2.5", PCI_SPEED_SIZE); break; default: break; } switch (hw->bus.width) { case i40e_bus_width_pcie_x8: - strncpy(width, "8", PCI_WIDTH_SIZE); break; + strscpy(width, "8", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x4: - strncpy(width, "4", PCI_WIDTH_SIZE); break; + strscpy(width, "4", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x2: - strncpy(width, "2", PCI_WIDTH_SIZE); break; + strscpy(width, "2", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x1: - strncpy(width, "1", PCI_WIDTH_SIZE); break; + strscpy(width, "1", PCI_WIDTH_SIZE); break; default: break; } @@ -11711,17 +16050,30 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* get the requested speeds from the fw */ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL); if (err) - dev_dbg(&pf->pdev->dev, "get requested speeds ret = %s last_status = %s\n", - i40e_stat_str(&pf->hw, err), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + dev_dbg(&pf->pdev->dev, "get requested speeds ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); pf->hw.phy.link_info.requested_speeds = abilities.link_speed; + /* set the FEC config due to the board capabilities */ + i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, pf->flags); + /* get the supported phy types from the fw */ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); if (err) - dev_dbg(&pf->pdev->dev, "get supported phy types ret = %s last_status = %s\n", - i40e_stat_str(&pf->hw, err), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + dev_dbg(&pf->pdev->dev, "get supported phy types ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + +#define MAX_FRAME_SIZE_DEFAULT 0x2600 + + err = i40e_aq_set_mac_config(hw, MAX_FRAME_SIZE_DEFAULT, NULL); + if (err) + dev_warn(&pdev->dev, "set mac config ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + + /* Make sure the MFS is set to the expected value */ + val = rd32(hw, I40E_PRTGL_SAH); + FIELD_MODIFY(I40E_PRTGL_SAH_MFS_MASK, &val, MAX_FRAME_SIZE_DEFAULT); + wr32(hw, I40E_PRTGL_SAH, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out @@ -11733,13 +16085,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->main_vsi_seid); if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) || - (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) - pf->flags |= I40E_FLAG_PHY_CONTROLS_LEDS; + (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) + set_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps); if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722) - pf->flags |= I40E_FLAG_HAVE_CRT_RETIMER; + set_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps); /* print a string summarizing features */ i40e_print_features(pf); + i40e_devlink_register(pf); + return 0; /* Unwind what we've done if something failed in the setup */ @@ -11749,7 +16103,7 @@ err_vsis: kfree(pf->vsi); err_switch_setup: i40e_reset_interrupt_capability(pf); - del_timer_sync(&pf->service_timer); + timer_shutdown_sync(&pf->service_timer); err_mac_addr: err_configure_lan_hmc: (void)i40e_shutdown_lan_hmc(hw); @@ -11760,9 +16114,8 @@ err_adminq_setup: err_pf_reset: iounmap(hw->hw_addr); err_ioremap: - kfree(pf); + i40e_free_pf(pf); err_pf_alloc: - pci_disable_pcie_error_reporting(pdev); pci_release_mem_regions(pdev); err_pci_reg: err_dma: @@ -11783,9 +16136,13 @@ static void i40e_remove(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; - i40e_status ret_code; + struct i40e_vsi *vsi; + struct i40e_veb *veb; + int ret_code; int i; + i40e_devlink_unregister(pf); + i40e_dbg_pf_exit(pf); i40e_ptp_stop(pf); @@ -11794,46 +16151,68 @@ static void i40e_remove(struct pci_dev *pdev) i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); + /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE + * flags, once they are set, i40e_rebuild should not be called as + * i40e_prep_for_reset always returns early. + */ + while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + usleep_range(1000, 2000); + set_bit(__I40E_IN_REMOVE, pf->state); + + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) { + set_bit(__I40E_VF_RESETS_DISABLED, pf->state); + i40e_free_vfs(pf); + clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); + } /* no more scheduling of any task */ set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); - if (pf->service_timer.data) - del_timer_sync(&pf->service_timer); + if (pf->service_timer.function) + timer_shutdown_sync(&pf->service_timer); if (pf->service_task.func) cancel_work_sync(&pf->service_task); + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + struct i40e_vsi *vsi = pf->vsi[0]; + + /* We know that we have allocated only one vsi for this PF, + * it was just for registering netdevice, so the interface + * could be visible in the 'ifconfig' output + */ + unregister_netdev(vsi->netdev); + free_netdev(vsi->netdev); + + goto unmap; + } + /* Client close must be called explicitly here because the timer * has been stopped. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); - - if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { - i40e_free_vfs(pf); - pf->flags &= ~I40E_FLAG_SRIOV_ENABLED; - } + i40e_notify_client_of_netdev_close(pf, false); i40e_fdir_teardown(pf); /* If there is a switch structure or any orphans, remove them. * This will leave only the PF's VSI remaining. */ - for (i = 0; i < I40E_MAX_VEB; i++) { - if (!pf->veb[i]) - continue; - - if (pf->veb[i]->uplink_seid == pf->mac_seid || - pf->veb[i]->uplink_seid == 0) - i40e_switch_branch_release(pf->veb[i]); - } + i40e_pf_for_each_veb(pf, i, veb) + if (veb->uplink_seid == pf->mac_seid || + veb->uplink_seid == 0) + i40e_switch_branch_release(veb); - /* Now we can shutdown the PF's VSI, just before we kill + /* Now we can shutdown the PF's VSIs, just before we kill * adminq and hmc. */ - if (pf->vsi[pf->lan_vsi]) - i40e_vsi_release(pf->vsi[pf->lan_vsi]); + i40e_pf_for_each_vsi(pf, i, vsi) { + i40e_vsi_close(vsi); + i40e_vsi_release(vsi); + pf->vsi[i] = NULL; + } + + i40e_cloud_filter_exit(pf); /* remove attached clients */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { ret_code = i40e_lan_del_device(pf); if (ret_code) dev_warn(&pdev->dev, "Failed to delete client device: %d\n", @@ -11849,6 +16228,12 @@ static void i40e_remove(struct pci_dev *pdev) ret_code); } +unmap: + /* Free MSI/legacy interrupt 0 when in recovery mode. */ + if (test_bit(__I40E_RECOVERY_MODE, pf->state) && + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) + free_irq(pf->pdev->irq, pf); + /* shutdown the adminq */ i40e_shutdown_adminq(hw); @@ -11857,17 +16242,19 @@ static void i40e_remove(struct pci_dev *pdev) mutex_destroy(&hw->aq.asq_mutex); /* Clear all dynamic memory lists of rings, q_vectors, and VSIs */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (pf->vsi[i]) { - i40e_vsi_clear_rings(pf->vsi[i]); - i40e_vsi_clear(pf->vsi[i]); - pf->vsi[i] = NULL; - } + i40e_pf_for_each_vsi(pf, i, vsi) { + if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) + i40e_vsi_clear_rings(vsi); + + i40e_vsi_clear(vsi); + pf->vsi[i] = NULL; } + rtnl_unlock(); - for (i = 0; i < I40E_MAX_VEB; i++) { - kfree(pf->veb[i]); + i40e_pf_for_each_veb(pf, i, veb) { + kfree(veb); pf->veb[i] = NULL; } @@ -11875,23 +16262,156 @@ static void i40e_remove(struct pci_dev *pdev) kfree(pf->vsi); iounmap(hw->hw_addr); - kfree(pf); + i40e_free_pf(pf); pci_release_mem_regions(pdev); - pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } /** + * i40e_enable_mc_magic_wake - enable multicast magic packet wake up + * using the mac_address_write admin q function + * @pf: pointer to i40e_pf struct + **/ +static void i40e_enable_mc_magic_wake(struct i40e_pf *pf) +{ + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); + struct i40e_hw *hw = &pf->hw; + u8 mac_addr[6]; + u16 flags = 0; + int ret; + + /* Get current MAC address in case it's an LAA */ + if (main_vsi && main_vsi->netdev) { + ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr); + } else { + dev_err(&pf->pdev->dev, + "Failed to retrieve MAC address; using default\n"); + ether_addr_copy(mac_addr, hw->mac.addr); + } + + /* The FW expects the mac address write cmd to first be called with + * one of these flags before calling it again with the multicast + * enable flags. + */ + flags = I40E_AQC_WRITE_TYPE_LAA_WOL; + + if (hw->func_caps.flex10_enable && hw->partition_id != 1) + flags = I40E_AQC_WRITE_TYPE_LAA_ONLY; + + ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); + if (ret) { + dev_err(&pf->pdev->dev, + "Failed to update MAC address registers; cannot enable Multicast Magic packet wake up"); + return; + } + + flags = I40E_AQC_MC_MAG_EN + | I40E_AQC_WOL_PRESERVE_ON_PFR + | I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG; + ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); + if (ret) + dev_err(&pf->pdev->dev, + "Failed to enable Multicast Magic Packet wake up\n"); +} + +/** + * i40e_io_suspend - suspend all IO operations + * @pf: pointer to i40e_pf struct + * + **/ +static int i40e_io_suspend(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + + set_bit(__I40E_DOWN, pf->state); + + /* Ensure service task will not be running */ + timer_delete_sync(&pf->service_timer); + cancel_work_sync(&pf->service_task); + + /* Client close must be called explicitly here because the timer + * has been stopped. + */ + i40e_notify_client_of_netdev_close(pf, false); + + if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && + pf->wol_en) + i40e_enable_mc_magic_wake(pf); + + /* Since we're going to destroy queues during the + * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this + * whole section + */ + rtnl_lock(); + + i40e_prep_for_reset(pf); + + wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); + wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + + /* Clear the interrupt scheme and release our IRQs so that the system + * can safely hibernate even when there are a large number of CPUs. + * Otherwise hibernation might fail when mapping all the vectors back + * to CPU0. + */ + i40e_clear_interrupt_scheme(pf); + + rtnl_unlock(); + + return 0; +} + +/** + * i40e_io_resume - resume IO operations + * @pf: pointer to i40e_pf struct + * + **/ +static int i40e_io_resume(struct i40e_pf *pf) +{ + struct device *dev = &pf->pdev->dev; + int err; + + /* We need to hold the RTNL lock prior to restoring interrupt schemes, + * since we're going to be restoring queues + */ + rtnl_lock(); + + /* We cleared the interrupt scheme when we suspended, so we need to + * restore it now to resume device functionality. + */ + err = i40e_restore_interrupt_scheme(pf); + if (err) { + dev_err(dev, "Cannot restore interrupt scheme: %d\n", + err); + } + + clear_bit(__I40E_DOWN, pf->state); + i40e_reset_and_rebuild(pf, false, true); + + rtnl_unlock(); + + /* Clear suspended state last after everything is recovered */ + clear_bit(__I40E_SUSPENDED, pf->state); + + /* Restart the service task */ + mod_timer(&pf->service_timer, + round_jiffies(jiffies + pf->service_timer_period)); + + return 0; +} + +/** * i40e_pci_error_detected - warning that something funky happened in PCI land * @pdev: PCI device information struct + * @error: the type of PCI error * * Called to warn that something happened and the error handling steps * are in progress. Allows the driver to quiesce things, be ready for * remediation. **/ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { struct i40e_pf *pf = pci_get_drvdata(pdev); @@ -11905,7 +16425,7 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, /* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, pf->state)) - i40e_prep_for_reset(pf, false); + i40e_io_suspend(pf); /* Request a slot reset */ return PCI_ERS_RESULT_NEED_RESET; @@ -11924,18 +16444,17 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); pci_ers_result_t result; - int err; u32 reg; dev_dbg(&pdev->dev, "%s\n", __func__); - if (pci_enable_device_mem(pdev)) { + /* enable I/O and memory of the device */ + if (pci_enable_device(pdev)) { dev_info(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); result = PCI_ERS_RESULT_DISCONNECT; } else { pci_set_master(pdev); pci_restore_state(pdev); - pci_save_state(pdev); pci_wake_from_d3(pdev, false); reg = rd32(&pf->hw, I40E_GLGEN_RTRIG); @@ -11945,80 +16464,53 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev) result = PCI_ERS_RESULT_DISCONNECT; } - err = pci_cleanup_aer_uncorrect_error_status(pdev); - if (err) { - dev_info(&pdev->dev, - "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", - err); - /* non-fatal, continue */ - } - return result; } /** - * i40e_pci_error_resume - restart operations after PCI error recovery + * i40e_pci_error_reset_prepare - prepare device driver for pci reset * @pdev: PCI device information struct - * - * Called to allow the driver to bring things back up after PCI error - * and/or reset recovery has finished. - **/ -static void i40e_pci_error_resume(struct pci_dev *pdev) + */ +static void i40e_pci_error_reset_prepare(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); - dev_dbg(&pdev->dev, "%s\n", __func__); - if (test_bit(__I40E_SUSPENDED, pf->state)) - return; - - i40e_handle_reset_warning(pf, false); + i40e_prep_for_reset(pf); } /** - * i40e_enable_mc_magic_wake - enable multicast magic packet wake up - * using the mac_address_write admin q function - * @pf: pointer to i40e_pf struct - **/ -static void i40e_enable_mc_magic_wake(struct i40e_pf *pf) + * i40e_pci_error_reset_done - pci reset done, device driver reset can begin + * @pdev: PCI device information struct + */ +static void i40e_pci_error_reset_done(struct pci_dev *pdev) { - struct i40e_hw *hw = &pf->hw; - i40e_status ret; - u8 mac_addr[6]; - u16 flags = 0; + struct i40e_pf *pf = pci_get_drvdata(pdev); - /* Get current MAC address in case it's an LAA */ - if (pf->vsi[pf->lan_vsi] && pf->vsi[pf->lan_vsi]->netdev) { - ether_addr_copy(mac_addr, - pf->vsi[pf->lan_vsi]->netdev->dev_addr); - } else { - dev_err(&pf->pdev->dev, - "Failed to retrieve MAC address; using default\n"); - ether_addr_copy(mac_addr, hw->mac.addr); - } + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return; - /* The FW expects the mac address write cmd to first be called with - * one of these flags before calling it again with the multicast - * enable flags. - */ - flags = I40E_AQC_WRITE_TYPE_LAA_WOL; + i40e_reset_and_rebuild(pf, false, false); +#ifdef CONFIG_PCI_IOV + i40e_restore_all_vfs_msi_state(pdev); +#endif /* CONFIG_PCI_IOV */ +} - if (hw->func_caps.flex10_enable && hw->partition_id != 1) - flags = I40E_AQC_WRITE_TYPE_LAA_ONLY; +/** + * i40e_pci_error_resume - restart operations after PCI error recovery + * @pdev: PCI device information struct + * + * Called to allow the driver to bring things back up after PCI error + * and/or reset recovery has finished. + **/ +static void i40e_pci_error_resume(struct pci_dev *pdev) +{ + struct i40e_pf *pf = pci_get_drvdata(pdev); - ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); - if (ret) { - dev_err(&pf->pdev->dev, - "Failed to update MAC address registers; cannot enable Multicast Magic packet wake up"); + dev_dbg(&pdev->dev, "%s\n", __func__); + if (test_bit(__I40E_SUSPENDED, pf->state)) return; - } - flags = I40E_AQC_MC_MAG_EN - | I40E_AQC_WOL_PRESERVE_ON_PFR - | I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG; - ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); - if (ret) - dev_err(&pf->pdev->dev, - "Failed to enable Multicast Magic Packet wake up\n"); + i40e_io_resume(pf); } /** @@ -12032,33 +16524,40 @@ static void i40e_shutdown(struct pci_dev *pdev) set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); - rtnl_lock(); - i40e_prep_for_reset(pf, true); - rtnl_unlock(); - wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); - wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); - - del_timer_sync(&pf->service_timer); + timer_delete_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); + i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); /* Client close must be called explicitly here because the timer * has been stopped. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); + i40e_notify_client_of_netdev_close(pf, false); - if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE)) + if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && + pf->wol_en) i40e_enable_mc_magic_wake(pf); - i40e_prep_for_reset(pf, false); + i40e_prep_for_reset(pf); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + /* Free MSI/legacy interrupt 0 when in recovery mode. */ + if (test_bit(__I40E_RECOVERY_MODE, pf->state) && + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) + free_irq(pf->pdev->irq, pf); + + /* Since we're going to destroy queues during the + * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this + * whole section + */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); + rtnl_unlock(); if (system_state == SYSTEM_POWER_OFF) { pci_wake_from_d3(pdev, pf->wol_en); @@ -12066,91 +16565,50 @@ static void i40e_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PM /** - * i40e_suspend - PCI callback for moving to D3 - * @pdev: PCI device information struct + * i40e_suspend - PM callback for moving to D3 + * @dev: generic device information structure **/ -static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) +static int i40e_suspend(struct device *dev) { - struct i40e_pf *pf = pci_get_drvdata(pdev); - struct i40e_hw *hw = &pf->hw; - int retval = 0; - - set_bit(__I40E_SUSPENDED, pf->state); - set_bit(__I40E_DOWN, pf->state); + struct i40e_pf *pf = dev_get_drvdata(dev); - if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE)) - i40e_enable_mc_magic_wake(pf); - - i40e_prep_for_reset(pf, false); - - wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); - wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); - - i40e_stop_misc_vector(pf); - - retval = pci_save_state(pdev); - if (retval) - return retval; - - pci_wake_from_d3(pdev, pf->wol_en); - pci_set_power_state(pdev, PCI_D3hot); - - return retval; + /* If we're already suspended, then there is nothing to do */ + if (test_and_set_bit(__I40E_SUSPENDED, pf->state)) + return 0; + return i40e_io_suspend(pf); } /** - * i40e_resume - PCI callback for waking up from D3 - * @pdev: PCI device information struct + * i40e_resume - PM callback for waking up from D3 + * @dev: generic device information structure **/ -static int i40e_resume(struct pci_dev *pdev) +static int i40e_resume(struct device *dev) { - struct i40e_pf *pf = pci_get_drvdata(pdev); - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - /* pci_restore_state() clears dev->state_saves, so - * call pci_save_state() again to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; - } - pci_set_master(pdev); - - /* no wakeup events while running */ - pci_wake_from_d3(pdev, false); - - /* handling the reset will rebuild the device state */ - if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { - clear_bit(__I40E_DOWN, pf->state); - i40e_reset_and_rebuild(pf, false, false); - } + struct i40e_pf *pf = dev_get_drvdata(dev); - return 0; + /* If we're not suspended, then there is nothing to do */ + if (!test_bit(__I40E_SUSPENDED, pf->state)) + return 0; + return i40e_io_resume(pf); } -#endif static const struct pci_error_handlers i40e_err_handler = { .error_detected = i40e_pci_error_detected, .slot_reset = i40e_pci_error_slot_reset, + .reset_prepare = i40e_pci_error_reset_prepare, + .reset_done = i40e_pci_error_reset_done, .resume = i40e_pci_error_resume, }; +static DEFINE_SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); + static struct pci_driver i40e_driver = { .name = i40e_driver_name, .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, -#ifdef CONFIG_PM - .suspend = i40e_suspend, - .resume = i40e_resume, -#endif + .driver.pm = pm_sleep_ptr(&i40e_pm_ops), .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, @@ -12164,23 +16622,33 @@ static struct pci_driver i40e_driver = { **/ static int __init i40e_init_module(void) { - pr_info("%s: %s - version %s\n", i40e_driver_name, - i40e_driver_string, i40e_driver_version_str); + int err; + + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); - /* we will see if single thread per module is enough for now, - * it can't be any worse than using the system workqueue which - * was already single threaded + /* There is no need to throttle the number of active tasks because + * each device limits its own task using a state bit for scheduling + * the service task, and the device tasks do not interfere with each + * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM + * since we need to be able to guarantee forward progress even under + * memory pressure. */ - i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, - i40e_driver_name); + i40e_wq = alloc_workqueue("%s", WQ_PERCPU, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); @@ -12194,6 +16662,7 @@ static void __exit i40e_exit_module(void) { pci_unregister_driver(&i40e_driver); destroy_workqueue(i40e_wq); + ida_destroy(&i40e_client_ida); i40e_dbg_exit(); } module_exit(i40e_exit_module); |
