diff options
Diffstat (limited to 'drivers/net/ethernet/intel/igb/igb_main.c')
| -rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 5917 |
1 files changed, 4221 insertions, 1696 deletions
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6a0c1b66ce54..dbea37269d2c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1,29 +1,5 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -38,45 +14,52 @@ #include <linux/slab.h> #include <net/checksum.h> #include <net/ip6_checksum.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/net_tstamp.h> #include <linux/mii.h> #include <linux/ethtool.h> #include <linux/if.h> #include <linux/if_vlan.h> #include <linux/pci.h> -#include <linux/pci-aspm.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/sctp.h> #include <linux/if_ether.h> -#include <linux/aer.h> #include <linux/prefetch.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/pm_runtime.h> +#include <linux/etherdevice.h> #ifdef CONFIG_IGB_DCA #include <linux/dca.h> #endif #include <linux/i2c.h> #include "igb.h" -#define MAJ 5 -#define MIN 0 -#define BUILD 3 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ -__stringify(BUILD) "-k" +enum queue_mode { + QUEUE_MODE_STRICT_PRIORITY, + QUEUE_MODE_STREAM_RESERVATION, +}; + +enum tx_queue_prio { + TX_QUEUE_PRIO_HIGH, + TX_QUEUE_PRIO_LOW, +}; + char igb_driver_name[] = "igb"; -char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; static const char igb_copyright[] = - "Copyright (c) 2007-2013 Intel Corporation."; + "Copyright (c) 2007-2014 Intel Corporation."; static const struct e1000_info *igb_info_tbl[] = { [board_82575] = &e1000_82575_info, }; -static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { +static const struct pci_device_id igb_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, @@ -85,6 +68,8 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 }, @@ -116,34 +101,30 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { MODULE_DEVICE_TABLE(pci, igb_pci_tbl); -void igb_reset(struct igb_adapter *); static int igb_setup_all_tx_resources(struct igb_adapter *); static int igb_setup_all_rx_resources(struct igb_adapter *); static void igb_free_all_tx_resources(struct igb_adapter *); static void igb_free_all_rx_resources(struct igb_adapter *); static void igb_setup_mrqc(struct igb_adapter *); -static int igb_probe(struct pci_dev *, const struct pci_device_id *); -static void igb_remove(struct pci_dev *pdev); +static void igb_init_queue_configuration(struct igb_adapter *adapter); static int igb_sw_init(struct igb_adapter *); -static int igb_open(struct net_device *); -static int igb_close(struct net_device *); +int igb_open(struct net_device *); +int igb_close(struct net_device *); static void igb_configure(struct igb_adapter *); static void igb_configure_tx(struct igb_adapter *); static void igb_configure_rx(struct igb_adapter *); static void igb_clean_all_tx_rings(struct igb_adapter *); static void igb_clean_all_rx_rings(struct igb_adapter *); -static void igb_clean_tx_ring(struct igb_ring *); -static void igb_clean_rx_ring(struct igb_ring *); static void igb_set_rx_mode(struct net_device *); -static void igb_update_phy_info(unsigned long); -static void igb_watchdog(unsigned long); +static void igb_update_phy_info(struct timer_list *); +static void igb_watchdog(struct timer_list *); static void igb_watchdog_task(struct work_struct *); static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); -static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats); +static void igb_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); static int igb_set_mac(struct net_device *, void *); -static void igb_set_uta(struct igb_adapter *adapter); +static void igb_set_uta(struct igb_adapter *adapter, bool set); static irqreturn_t igb_intr(int irq, void *); static irqreturn_t igb_intr_msi(int irq, void *); static irqreturn_t igb_msix_other(int irq, void *); @@ -153,53 +134,46 @@ static void igb_update_dca(struct igb_q_vector *); static void igb_setup_dca(struct igb_adapter *); #endif /* CONFIG_IGB_DCA */ static int igb_poll(struct napi_struct *, int); -static bool igb_clean_tx_irq(struct igb_q_vector *); -static bool igb_clean_rx_irq(struct igb_q_vector *, int); +static bool igb_clean_tx_irq(struct igb_q_vector *, int); +static int igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); -static void igb_tx_timeout(struct net_device *); +static void igb_tx_timeout(struct net_device *, unsigned int txqueue); static void igb_reset_task(struct work_struct *); -static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); +static void igb_vlan_mode(struct net_device *netdev, + netdev_features_t features); static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16); static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16); static void igb_restore_vlan(struct igb_adapter *); -static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); +static void igb_rar_set_index(struct igb_adapter *, u32); static void igb_ping_all_vfs(struct igb_adapter *); static void igb_msg_task(struct igb_adapter *); static void igb_vmm_control(struct igb_adapter *); static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); +static void igb_flush_mac_table(struct igb_adapter *); +static int igb_available_rars(struct igb_adapter *, u8); +static void igb_set_default_mac_filter(struct igb_adapter *); +static int igb_uc_sync(struct net_device *, const unsigned char *); +static int igb_uc_unsync(struct net_device *, const unsigned char *); static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos); -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); + int vf, u16 vlan, u8 qos, __be16 vlan_proto); +static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); +static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, + bool setting); static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); static void igb_check_vf_rate_limit(struct igb_adapter *); +static void igb_nfc_filter_exit(struct igb_adapter *adapter); +static void igb_nfc_filter_restore(struct igb_adapter *adapter); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); +static int igb_disable_sriov(struct pci_dev *dev, bool reinit); #endif -#ifdef CONFIG_PM -#ifdef CONFIG_PM_SLEEP -static int igb_suspend(struct device *); -#endif -static int igb_resume(struct device *); -#ifdef CONFIG_PM_RUNTIME -static int igb_runtime_suspend(struct device *dev); -static int igb_runtime_resume(struct device *dev); -static int igb_runtime_idle(struct device *dev); -#endif -static const struct dev_pm_ops igb_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) - SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, - igb_runtime_idle) -}; -#endif -static void igb_shutdown(struct pci_dev *); -static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs); #ifdef CONFIG_IGB_DCA static int igb_notify_dca(struct notifier_block *, unsigned long, void *); static struct notifier_block dca_notifier = { @@ -208,15 +182,10 @@ static struct notifier_block dca_notifier = { .priority = 0 }; #endif -#ifdef CONFIG_NET_POLL_CONTROLLER -/* for netdump / net console */ -static void igb_netpoll(struct net_device *); -#endif #ifdef CONFIG_PCI_IOV -static unsigned int max_vfs = 0; -module_param(max_vfs, uint, 0); -MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate " - "per physical function"); +static unsigned int max_vfs; +module_param(max_vfs, uint, 0444); +MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function"); #endif /* CONFIG_PCI_IOV */ static pci_ers_result_t igb_io_error_detected(struct pci_dev *, @@ -232,23 +201,8 @@ static const struct pci_error_handlers igb_err_handler = { static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); -static struct pci_driver igb_driver = { - .name = igb_driver_name, - .id_table = igb_pci_tbl, - .probe = igb_probe, - .remove = igb_remove, -#ifdef CONFIG_PM - .driver.pm = &igb_pm_ops, -#endif - .shutdown = igb_shutdown, - .sriov_configure = igb_pci_sriov_configure, - .err_handler = &igb_err_handler -}; - -MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) static int debug = -1; @@ -330,7 +284,7 @@ static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) break; case E1000_TDBAL(0): for (n = 0; n < 4; n++) - regs[n] = rd32(E1000_RDBAL(n)); + regs[n] = rd32(E1000_TDBAL(n)); break; case E1000_TDBAH(0): for (n = 0; n < 4; n++) @@ -370,7 +324,7 @@ static void igb_dump(struct igb_adapter *adapter) struct igb_reg_info *reginfo; struct igb_ring *tx_ring; union e1000_adv_tx_desc *tx_desc; - struct my_u0 { u64 a; u64 b; } *u0; + struct my_u0 { __le64 a; __le64 b; } *u0; struct igb_ring *rx_ring; union e1000_adv_rx_desc *rx_desc; u32 staterr; @@ -382,10 +336,9 @@ static void igb_dump(struct igb_adapter *adapter) /* Print netdevice Info */ if (netdev) { dev_info(&adapter->pdev->dev, "Net device Info\n"); - pr_info("Device Name state trans_start " - "last_rx\n"); - pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, - netdev->state, netdev->trans_start, netdev->last_rx); + pr_info("Device Name state trans_start\n"); + pr_info("%-15s %016lX %016lX\n", netdev->name, + netdev->state, dev_trans_start(netdev)); } /* Print Registers */ @@ -436,9 +389,7 @@ static void igb_dump(struct igb_adapter *adapter) pr_info("------------------------------------\n"); pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] " - "[bi->dma ] leng ntw timestamp " - "bi->skb\n"); + pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] [bi->dma ] leng ntw timestamp bi->skb\n"); for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { const char *next_desc; @@ -456,9 +407,8 @@ static void igb_dump(struct igb_adapter *adapter) else next_desc = ""; - pr_info("T [0x%03X] %016llX %016llX %016llX" - " %04X %p %016llX %p%s\n", i, - le64_to_cpu(u0->a), + pr_info("T [0x%03X] %016llX %016llX %016llX %04X %p %016llX %p%s\n", + i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), (u64)dma_unmap_addr(buffer_info, dma), dma_unmap_len(buffer_info, len), @@ -517,19 +467,22 @@ rx_ring_summary: pr_info("------------------------------------\n"); pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] " - "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); - pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----" - "----------- [bi->skb] <-- Adv Rx Write-Back format\n"); + pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] [bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); + pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n"); for (i = 0; i < rx_ring->count; i++) { const char *next_desc; - struct igb_rx_buffer *buffer_info; - buffer_info = &rx_ring->rx_buffer_info[i]; + dma_addr_t dma = (dma_addr_t)0; + struct igb_rx_buffer *buffer_info = NULL; rx_desc = IGB_RX_DESC(rx_ring, i); u0 = (struct my_u0 *)rx_desc; staterr = le32_to_cpu(rx_desc->wb.upper.status_error); + if (!rx_ring->xsk_pool) { + buffer_info = &rx_ring->rx_buffer_info[i]; + dma = buffer_info->dma; + } + if (i == rx_ring->next_to_use) next_desc = " NTU"; else if (i == rx_ring->next_to_clean) @@ -549,17 +502,17 @@ rx_ring_summary: "R ", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)buffer_info->dma, + (u64)dma, next_desc); if (netif_msg_pktdata(adapter) && - buffer_info->dma && buffer_info->page) { + buffer_info && dma && buffer_info->page) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, page_address(buffer_info->page) + buffer_info->page_offset, - IGB_RX_BUFSZ, true); + igb_rx_bufsz(rx_ring), true); } } } @@ -571,8 +524,7 @@ exit: /** * igb_get_i2c_data - Reads the I2C SDA data bit - * @hw: pointer to hardware structure - * @i2cctl: Current value of I2CCTL register + * @data: opaque pointer to adapter struct * * Returns the I2C data bit value **/ @@ -582,7 +534,7 @@ static int igb_get_i2c_data(void *data) struct e1000_hw *hw = &adapter->hw; s32 i2cctl = rd32(E1000_I2CPARAMS); - return ((i2cctl & E1000_I2C_DATA_IN) != 0); + return !!(i2cctl & E1000_I2C_DATA_IN); } /** @@ -598,16 +550,15 @@ static void igb_set_i2c_data(void *data, int state) struct e1000_hw *hw = &adapter->hw; s32 i2cctl = rd32(E1000_I2CPARAMS); - if (state) - i2cctl |= E1000_I2C_DATA_OUT; - else + if (state) { + i2cctl |= E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N; + } else { + i2cctl &= ~E1000_I2C_DATA_OE_N; i2cctl &= ~E1000_I2C_DATA_OUT; + } - i2cctl &= ~E1000_I2C_DATA_OE_N; - i2cctl |= E1000_I2C_CLK_OE_N; wr32(E1000_I2CPARAMS, i2cctl); wrfl(); - } /** @@ -624,8 +575,7 @@ static void igb_set_i2c_clk(void *data, int state) s32 i2cctl = rd32(E1000_I2CPARAMS); if (state) { - i2cctl |= E1000_I2C_CLK_OUT; - i2cctl &= ~E1000_I2C_CLK_OE_N; + i2cctl |= E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N; } else { i2cctl &= ~E1000_I2C_CLK_OUT; i2cctl &= ~E1000_I2C_CLK_OE_N; @@ -646,7 +596,7 @@ static int igb_get_i2c_clk(void *data) struct e1000_hw *hw = &adapter->hw; s32 i2cctl = rd32(E1000_I2CPARAMS); - return ((i2cctl & E1000_I2C_CLK_IN) != 0); + return !!(i2cctl & E1000_I2C_CLK_IN); } static const struct i2c_algo_bit_data igb_i2c_algo = { @@ -670,6 +620,8 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) return adapter->netdev; } +static struct pci_driver igb_driver; + /** * igb_init_module - Driver Registration Routine * @@ -679,15 +631,18 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) static int __init igb_init_module(void) { int ret; - pr_info("%s - version %s\n", - igb_driver_string, igb_driver_version); + pr_info("%s\n", igb_driver_string); pr_info("%s\n", igb_copyright); #ifdef CONFIG_IGB_DCA dca_register_notify(&dca_notifier); #endif ret = pci_register_driver(&igb_driver); +#ifdef CONFIG_IGB_DCA + if (ret) + dca_unregister_notify(&dca_notifier); +#endif return ret; } @@ -734,6 +689,7 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) adapter->rx_ring[i]->reg_idx = rbase_offset + Q_IDX_82576(i); } + fallthrough; case e1000_82575: case e1000_82580: case e1000_i350: @@ -749,6 +705,29 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) } } +u32 igb_rd32(struct e1000_hw *hw, u32 reg) +{ + struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw); + u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); + u32 value = 0; + + if (E1000_REMOVED(hw_addr)) + return ~value; + + value = readl(&hw_addr[reg]); + + /* reads should not return all F's */ + if (!(~value) && (!reg || !(~readl(hw_addr)))) { + struct net_device *netdev = igb->netdev; + hw->hw_addr = NULL; + netdev_err(netdev, "PCIe link lost\n"); + WARN(pci_device_is_present(igb->pdev), + "igb: Failed to read reg 0x%x!\n", reg); + } + + return value; +} + /** * igb_write_ivar - configure ivar for given MSI-X vector * @hw: pointer to the HW structure @@ -800,7 +779,7 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; if (tx_queue > IGB_N0_QUEUE) msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; - if (!adapter->msix_entries && msix_vector == 0) + if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0) msixbm |= E1000_EIMS_OTHER; array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); q_vector->eims_value = msixbm; @@ -819,7 +798,7 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) igb_write_ivar(hw, msix_vector, tx_queue & 0x7, ((tx_queue & 0x8) << 1) + 8); - q_vector->eims_value = 1 << msix_vector; + q_vector->eims_value = BIT(msix_vector); break; case e1000_82580: case e1000_i350: @@ -840,7 +819,7 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) igb_write_ivar(hw, msix_vector, tx_queue >> 1, ((tx_queue & 0x1) << 4) + 8); - q_vector->eims_value = 1 << msix_vector; + q_vector->eims_value = BIT(msix_vector); break; default: BUG(); @@ -902,7 +881,7 @@ static void igb_configure_msix(struct igb_adapter *adapter) E1000_GPIE_NSICR); /* enable msix_other interrupt */ - adapter->eims_other = 1 << vector; + adapter->eims_other = BIT(vector); tmp = (vector++ | E1000_IVAR_VALID) << 8; wr32(E1000_IVAR_MISC, tmp); @@ -929,8 +908,8 @@ static void igb_configure_msix(struct igb_adapter *adapter) **/ static int igb_request_msix(struct igb_adapter *adapter) { + unsigned int num_q_vectors = adapter->num_q_vectors; struct net_device *netdev = adapter->netdev; - struct e1000_hw *hw = &adapter->hw; int i, err = 0, vector = 0, free_vector = 0; err = request_irq(adapter->msix_entries[vector].vector, @@ -938,12 +917,18 @@ static int igb_request_msix(struct igb_adapter *adapter) if (err) goto err_out; - for (i = 0; i < adapter->num_q_vectors; i++) { + if (num_q_vectors > MAX_Q_VECTORS) { + num_q_vectors = MAX_Q_VECTORS; + dev_warn(&adapter->pdev->dev, + "The number of queue vectors (%d) is higher than max allowed (%d)\n", + adapter->num_q_vectors, MAX_Q_VECTORS); + } + for (i = 0; i < num_q_vectors; i++) { struct igb_q_vector *q_vector = adapter->q_vector[i]; vector++; - q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); + q_vector->itr_register = adapter->io_addr + E1000_EITR(vector); if (q_vector->rx.ring && q_vector->tx.ring) sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, @@ -962,6 +947,9 @@ static int igb_request_msix(struct igb_adapter *adapter) q_vector); if (err) goto err_free; + + netif_napi_set_irq(&q_vector->napi, + adapter->msix_entries[vector].vector); } igb_configure_msix(adapter); @@ -980,43 +968,65 @@ err_out: return err; } -static void igb_reset_interrupt_capability(struct igb_adapter *adapter) -{ - if (adapter->msix_entries) { - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - } else if (adapter->flags & IGB_FLAG_HAS_MSI) { - pci_disable_msi(adapter->pdev); - } -} - /** * igb_free_q_vector - Free memory allocated for specific interrupt vector * @adapter: board private structure to initialize * @v_idx: Index of vector to be freed * - * This function frees the memory allocated to the q_vector. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. + * This function frees the memory allocated to the q_vector. **/ static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) { struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + adapter->q_vector[v_idx] = NULL; + + /* igb_get_stats64() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + if (q_vector) + kfree_rcu(q_vector, rcu); +} + +/** + * igb_reset_q_vector - Reset config for interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be reset + * + * If NAPI is enabled it will delete any references to the + * NAPI struct. This is preparation for igb_free_q_vector. + **/ +static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx) +{ + struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + + /* Coming from igb_set_interrupt_capability, the vectors are not yet + * allocated. So, q_vector is NULL so we should stop here. + */ + if (!q_vector) + return; + if (q_vector->tx.ring) adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; if (q_vector->rx.ring) - adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL; + adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; - adapter->q_vector[v_idx] = NULL; netif_napi_del(&q_vector->napi); - /* ixgbe_get_stats64() might access the rings on this vector, - * we must wait a grace period before freeing it. - */ - kfree_rcu(q_vector, rcu); +} + +static void igb_reset_interrupt_capability(struct igb_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + if (adapter->flags & IGB_FLAG_HAS_MSIX) + pci_disable_msix(adapter->pdev); + else if (adapter->flags & IGB_FLAG_HAS_MSI) + pci_disable_msi(adapter->pdev); + + while (v_idx--) + igb_reset_q_vector(adapter, v_idx); } /** @@ -1035,8 +1045,10 @@ static void igb_free_q_vectors(struct igb_adapter *adapter) adapter->num_rx_queues = 0; adapter->num_q_vectors = 0; - while (v_idx--) + while (v_idx--) { + igb_reset_q_vector(adapter, v_idx); igb_free_q_vector(adapter, v_idx); + } } /** @@ -1067,6 +1079,7 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) if (!msix) goto msi_only; + adapter->flags |= IGB_FLAG_HAS_MSIX; /* Number of supported queues. */ adapter->num_rx_queues = adapter->rss_queues; @@ -1087,25 +1100,21 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) /* add 1 vector for link status interrupts */ numvecs++; - adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), - GFP_KERNEL); - - if (!adapter->msix_entries) - goto msi_only; - for (i = 0; i < numvecs; i++) adapter->msix_entries[i].entry = i; - err = pci_enable_msix(adapter->pdev, - adapter->msix_entries, - numvecs); - if (err == 0) + err = pci_enable_msix_range(adapter->pdev, + adapter->msix_entries, + numvecs, + numvecs); + if (err > 0) return; igb_reset_interrupt_capability(adapter); /* If we can't do MSI-X, try MSI */ msi_only: + adapter->flags &= ~IGB_FLAG_HAS_MSIX; #ifdef CONFIG_PCI_IOV /* disable SR-IOV for non MSI-X configurations */ if (adapter->vf_data) { @@ -1114,6 +1123,8 @@ msi_only: pci_disable_sriov(adapter->pdev); msleep(500); + kfree(adapter->vf_mac_list); + adapter->vf_mac_list = NULL; kfree(adapter->vf_data); adapter->vf_data = NULL; wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); @@ -1158,24 +1169,36 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, { struct igb_q_vector *q_vector; struct igb_ring *ring; - int ring_count, size; + int ring_count; + size_t size; /* igb only supports 1 Tx and/or 1 Rx queue per vector */ if (txr_count > 1 || rxr_count > 1) return -ENOMEM; ring_count = txr_count + rxr_count; - size = sizeof(struct igb_q_vector) + - (sizeof(struct igb_ring) * ring_count); + size = kmalloc_size_roundup(struct_size(q_vector, ring, ring_count)); /* allocate q_vector and rings */ - q_vector = kzalloc(size, GFP_KERNEL); + q_vector = adapter->q_vector[v_idx]; + if (!q_vector) { + q_vector = kzalloc(size, GFP_KERNEL); + } else if (size > ksize(q_vector)) { + struct igb_q_vector *new_q_vector; + + new_q_vector = kzalloc(size, GFP_KERNEL); + if (new_q_vector) + kfree_rcu(q_vector, rcu); + q_vector = new_q_vector; + } else { + memset(q_vector, 0, size); + } if (!q_vector) return -ENOMEM; /* initialize NAPI */ - netif_napi_add(adapter->netdev, &q_vector->napi, - igb_poll, 64); + netif_napi_add_config(adapter->netdev, &q_vector->napi, igb_poll, + v_idx); /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; @@ -1185,13 +1208,13 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, q_vector->tx.work_limit = adapter->tx_work_limit; /* initialize ITR configuration */ - q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); + q_vector->itr_register = adapter->io_addr + E1000_EITR(0); q_vector->itr_val = IGB_START_ITR; /* initialize pointer to rings */ ring = q_vector->ring; - /* intialize ITR */ + /* initialize ITR */ if (rxr_count) { /* rx or rx/tx vector */ if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) @@ -1221,6 +1244,15 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; + ring->cbs_enable = false; + ring->idleslope = 0; + ring->sendslope = 0; + ring->hicredit = 0; + ring->locredit = 0; + + u64_stats_init(&ring->tx_syncp); + u64_stats_init(&ring->tx_syncp2); + /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; @@ -1243,8 +1275,7 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, if (adapter->hw.mac.type >= e1000_82576) set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); - /* - * On i350, i354, i210, and i211, loopback VLAN packets + /* On i350, i354, i210, and i211, loopback VLAN packets * have the tag byte-swapped. */ if (adapter->hw.mac.type >= e1000_i350) @@ -1254,6 +1285,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->rx_ring_count; ring->queue_index = rxr_idx; + u64_stats_init(&ring->rx_syncp); + /* assign ring to adapter */ adapter->rx_ring[rxr_idx] = ring; } @@ -1294,6 +1327,7 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) for (; v_idx < q_vectors; v_idx++) { int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, tqpv, txr_idx, rqpv, rxr_idx); @@ -1362,7 +1396,7 @@ static int igb_request_irq(struct igb_adapter *adapter) struct pci_dev *pdev = adapter->pdev; int err = 0; - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { err = igb_request_msix(adapter); if (!err) goto request_done; @@ -1406,7 +1440,7 @@ request_done: static void igb_free_irq(struct igb_adapter *adapter) { - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { int vector = 0, i; free_irq(adapter->msix_entries[vector++].vector, adapter); @@ -1431,8 +1465,9 @@ static void igb_irq_disable(struct igb_adapter *adapter) * mapped into these registers and so clearing the bits can cause * issues on the VF drivers so we only need to clear what we set */ - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 regval = rd32(E1000_EIAM); + wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); wr32(E1000_EIMC, adapter->eims_enable_mask); regval = rd32(E1000_EIAC); @@ -1442,8 +1477,9 @@ static void igb_irq_disable(struct igb_adapter *adapter) wr32(E1000_IAM, 0); wr32(E1000_IMC, ~0); wrfl(); - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { int i; + for (i = 0; i < adapter->num_q_vectors; i++) synchronize_irq(adapter->msix_entries[i].vector); } else { @@ -1459,9 +1495,10 @@ static void igb_irq_enable(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; u32 regval = rd32(E1000_EIAC); + wr32(E1000_EIAC, regval | adapter->eims_enable_mask); regval = rd32(E1000_EIAM); wr32(E1000_EIAM, regval | adapter->eims_enable_mask); @@ -1482,22 +1519,22 @@ static void igb_irq_enable(struct igb_adapter *adapter) static void igb_update_mng_vlan(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; u16 vid = adapter->hw.mng_cookie.vlan_id; u16 old_vid = adapter->mng_vlan_id; if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { /* add VID to filter table */ - igb_vfta_set(hw, vid, true); + igb_vfta_set(hw, vid, pf_id, true, true); adapter->mng_vlan_id = vid; } else { adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; } - if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && - (vid != old_vid) && + if (old_vid != IGB_MNG_VLAN_NONE && vid != old_vid && !test_bit(old_vid, adapter->active_vlans)) { /* remove VID from filter table */ - igb_vfta_set(hw, old_vid, false); + igb_vfta_set(hw, vid, pf_id, false, true); } } @@ -1539,6 +1576,394 @@ static void igb_get_hw_control(struct igb_adapter *adapter) ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } +static void enable_fqtss(struct igb_adapter *adapter, bool enable) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + + WARN_ON(hw->mac.type != e1000_i210); + + if (enable) + adapter->flags |= IGB_FLAG_FQTSS; + else + adapter->flags &= ~IGB_FLAG_FQTSS; + + if (netif_running(netdev)) + schedule_work(&adapter->reset_task); +} + +static bool is_fqtss_enabled(struct igb_adapter *adapter) +{ + return (adapter->flags & IGB_FLAG_FQTSS) ? true : false; +} + +static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue, + enum tx_queue_prio prio) +{ + u32 val; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 4); + + val = rd32(E1000_I210_TXDCTL(queue)); + + if (prio == TX_QUEUE_PRIO_HIGH) + val |= E1000_TXDCTL_PRIORITY; + else + val &= ~E1000_TXDCTL_PRIORITY; + + wr32(E1000_I210_TXDCTL(queue), val); +} + +static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode) +{ + u32 val; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 1); + + val = rd32(E1000_I210_TQAVCC(queue)); + + if (mode == QUEUE_MODE_STREAM_RESERVATION) + val |= E1000_TQAVCC_QUEUEMODE; + else + val &= ~E1000_TQAVCC_QUEUEMODE; + + wr32(E1000_I210_TQAVCC(queue), val); +} + +static bool is_any_cbs_enabled(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + if (adapter->tx_ring[i]->cbs_enable) + return true; + } + + return false; +} + +static bool is_any_txtime_enabled(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + if (adapter->tx_ring[i]->launchtime_enable) + return true; + } + + return false; +} + +/** + * igb_config_tx_modes - Configure "Qav Tx mode" features on igb + * @adapter: pointer to adapter struct + * @queue: queue number + * + * Configure CBS and Launchtime for a given hardware queue. + * Parameters are retrieved from the correct Tx ring, so + * igb_save_cbs_params() and igb_save_txtime_params() should be used + * for setting those correctly prior to this function being called. + **/ +static void igb_config_tx_modes(struct igb_adapter *adapter, int queue) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + struct igb_ring *ring; + u32 tqavcc, tqavctrl; + u16 value; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 1); + ring = adapter->tx_ring[queue]; + + /* If any of the Qav features is enabled, configure queues as SR and + * with HIGH PRIO. If none is, then configure them with LOW PRIO and + * as SP. + */ + if (ring->cbs_enable || ring->launchtime_enable) { + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH); + set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION); + } else { + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW); + set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY); + } + + /* If CBS is enabled, set DataTranARB and config its parameters. */ + if (ring->cbs_enable || queue == 0) { + /* i210 does not allow the queue 0 to be in the Strict + * Priority mode while the Qav mode is enabled, so, + * instead of disabling strict priority mode, we give + * queue 0 the maximum of credits possible. + * + * See section 8.12.19 of the i210 datasheet, "Note: + * Queue0 QueueMode must be set to 1b when + * TransmitMode is set to Qav." + */ + if (queue == 0 && !ring->cbs_enable) { + /* max "linkspeed" idleslope in kbps */ + ring->idleslope = 1000000; + ring->hicredit = ETH_FRAME_LEN; + } + + /* Always set data transfer arbitration to credit-based + * shaper algorithm on TQAVCTRL if CBS is enabled for any of + * the queues. + */ + tqavctrl = rd32(E1000_I210_TQAVCTRL); + tqavctrl |= E1000_TQAVCTRL_DATATRANARB; + wr32(E1000_I210_TQAVCTRL, tqavctrl); + + /* According to i210 datasheet section 7.2.7.7, we should set + * the 'idleSlope' field from TQAVCC register following the + * equation: + * + * For 100 Mbps link speed: + * + * value = BW * 0x7735 * 0.2 (E1) + * + * For 1000Mbps link speed: + * + * value = BW * 0x7735 * 2 (E2) + * + * E1 and E2 can be merged into one equation as shown below. + * Note that 'link-speed' is in Mbps. + * + * value = BW * 0x7735 * 2 * link-speed + * -------------- (E3) + * 1000 + * + * 'BW' is the percentage bandwidth out of full link speed + * which can be found with the following equation. Note that + * idleSlope here is the parameter from this function which + * is in kbps. + * + * BW = idleSlope + * ----------------- (E4) + * link-speed * 1000 + * + * That said, we can come up with a generic equation to + * calculate the value we should set it TQAVCC register by + * replacing 'BW' in E3 by E4. The resulting equation is: + * + * value = idleSlope * 0x7735 * 2 * link-speed + * ----------------- -------------- (E5) + * link-speed * 1000 1000 + * + * 'link-speed' is present in both sides of the fraction so + * it is canceled out. The final equation is the following: + * + * value = idleSlope * 61034 + * ----------------- (E6) + * 1000000 + * + * NOTE: For i210, given the above, we can see that idleslope + * is represented in 16.38431 kbps units by the value at + * the TQAVCC register (1Gbps / 61034), which reduces + * the granularity for idleslope increments. + * For instance, if you want to configure a 2576kbps + * idleslope, the value to be written on the register + * would have to be 157.23. If rounded down, you end + * up with less bandwidth available than originally + * required (~2572 kbps). If rounded up, you end up + * with a higher bandwidth (~2589 kbps). Below the + * approach we take is to always round up the + * calculated value, so the resulting bandwidth might + * be slightly higher for some configurations. + */ + value = DIV_ROUND_UP_ULL(ring->idleslope * 61034ULL, 1000000); + + tqavcc = rd32(E1000_I210_TQAVCC(queue)); + tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; + tqavcc |= value; + wr32(E1000_I210_TQAVCC(queue), tqavcc); + + wr32(E1000_I210_TQAVHC(queue), + 0x80000000 + ring->hicredit * 0x7735); + } else { + + /* Set idleSlope to zero. */ + tqavcc = rd32(E1000_I210_TQAVCC(queue)); + tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; + wr32(E1000_I210_TQAVCC(queue), tqavcc); + + /* Set hiCredit to zero. */ + wr32(E1000_I210_TQAVHC(queue), 0); + + /* If CBS is not enabled for any queues anymore, then return to + * the default state of Data Transmission Arbitration on + * TQAVCTRL. + */ + if (!is_any_cbs_enabled(adapter)) { + tqavctrl = rd32(E1000_I210_TQAVCTRL); + tqavctrl &= ~E1000_TQAVCTRL_DATATRANARB; + wr32(E1000_I210_TQAVCTRL, tqavctrl); + } + } + + /* If LaunchTime is enabled, set DataTranTIM. */ + if (ring->launchtime_enable) { + /* Always set DataTranTIM on TQAVCTRL if LaunchTime is enabled + * for any of the SR queues, and configure fetchtime delta. + * XXX NOTE: + * - LaunchTime will be enabled for all SR queues. + * - A fixed offset can be added relative to the launch + * time of all packets if configured at reg LAUNCH_OS0. + * We are keeping it as 0 for now (default value). + */ + tqavctrl = rd32(E1000_I210_TQAVCTRL); + tqavctrl |= E1000_TQAVCTRL_DATATRANTIM | + E1000_TQAVCTRL_FETCHTIME_DELTA; + wr32(E1000_I210_TQAVCTRL, tqavctrl); + } else { + /* If Launchtime is not enabled for any SR queues anymore, + * then clear DataTranTIM on TQAVCTRL and clear fetchtime delta, + * effectively disabling Launchtime. + */ + if (!is_any_txtime_enabled(adapter)) { + tqavctrl = rd32(E1000_I210_TQAVCTRL); + tqavctrl &= ~E1000_TQAVCTRL_DATATRANTIM; + tqavctrl &= ~E1000_TQAVCTRL_FETCHTIME_DELTA; + wr32(E1000_I210_TQAVCTRL, tqavctrl); + } + } + + /* XXX: In i210 controller the sendSlope and loCredit parameters from + * CBS are not configurable by software so we don't do any 'controller + * configuration' in respect to these parameters. + */ + + netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n", + ring->cbs_enable ? "enabled" : "disabled", + ring->launchtime_enable ? "enabled" : "disabled", + queue, + ring->idleslope, ring->sendslope, + ring->hicredit, ring->locredit); +} + +static int igb_save_txtime_params(struct igb_adapter *adapter, int queue, + bool enable) +{ + struct igb_ring *ring; + + if (queue < 0 || queue > adapter->num_tx_queues) + return -EINVAL; + + ring = adapter->tx_ring[queue]; + ring->launchtime_enable = enable; + + return 0; +} + +static int igb_save_cbs_params(struct igb_adapter *adapter, int queue, + bool enable, int idleslope, int sendslope, + int hicredit, int locredit) +{ + struct igb_ring *ring; + + if (queue < 0 || queue > adapter->num_tx_queues) + return -EINVAL; + + ring = adapter->tx_ring[queue]; + + ring->cbs_enable = enable; + ring->idleslope = idleslope; + ring->sendslope = sendslope; + ring->hicredit = hicredit; + ring->locredit = locredit; + + return 0; +} + +/** + * igb_setup_tx_mode - Switch to/from Qav Tx mode when applicable + * @adapter: pointer to adapter struct + * + * Configure TQAVCTRL register switching the controller's Tx mode + * if FQTSS mode is enabled or disabled. Additionally, will issue + * a call to igb_config_tx_modes() per queue so any previously saved + * Tx parameters are applied. + **/ +static void igb_setup_tx_mode(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + u32 val; + + /* Only i210 controller supports changing the transmission mode. */ + if (hw->mac.type != e1000_i210) + return; + + if (is_fqtss_enabled(adapter)) { + int i, max_queue; + + /* Configure TQAVCTRL register: set transmit mode to 'Qav', + * set data fetch arbitration to 'round robin', set SP_WAIT_SR + * so SP queues wait for SR ones. + */ + val = rd32(E1000_I210_TQAVCTRL); + val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_SP_WAIT_SR; + val &= ~E1000_TQAVCTRL_DATAFETCHARB; + wr32(E1000_I210_TQAVCTRL, val); + + /* Configure Tx and Rx packet buffers sizes as described in + * i210 datasheet section 7.2.7.7. + */ + val = rd32(E1000_TXPBS); + val &= ~I210_TXPBSIZE_MASK; + val |= I210_TXPBSIZE_PB0_6KB | I210_TXPBSIZE_PB1_6KB | + I210_TXPBSIZE_PB2_6KB | I210_TXPBSIZE_PB3_6KB; + wr32(E1000_TXPBS, val); + + val = rd32(E1000_RXPBS); + val &= ~I210_RXPBSIZE_MASK; + val |= I210_RXPBSIZE_PB_30KB; + wr32(E1000_RXPBS, val); + + /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ + * register should not exceed the buffer size programmed in + * TXPBS. The smallest buffer size programmed in TXPBS is 4kB + * so according to the datasheet we should set MAX_TPKT_SIZE to + * 4kB / 64. + * + * However, when we do so, no frame from queue 2 and 3 are + * transmitted. It seems the MAX_TPKT_SIZE should not be great + * or _equal_ to the buffer size programmed in TXPBS. For this + * reason, we set MAX_ TPKT_SIZE to (4kB - 1) / 64. + */ + val = (4096 - 1) / 64; + wr32(E1000_I210_DTXMXPKTSZ, val); + + /* Since FQTSS mode is enabled, apply any CBS configuration + * previously set. If no previous CBS configuration has been + * done, then the initial configuration is applied, which means + * CBS is disabled. + */ + max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ? + adapter->num_tx_queues : I210_SR_QUEUES_NUM; + + for (i = 0; i < max_queue; i++) { + igb_config_tx_modes(adapter, i); + } + } else { + wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); + wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); + wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT); + + val = rd32(E1000_I210_TQAVCTRL); + /* According to Section 8.12.21, the other flags we've set when + * enabling FQTSS are not relevant when disabling FQTSS so we + * don't set they here. + */ + val &= ~E1000_TQAVCTRL_XMIT_MODE; + wr32(E1000_I210_TQAVCTRL, val); + } + + netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ? + "enabled" : "disabled"); +} + /** * igb_configure - configure the hardware for RX and TX * @adapter: private board structure @@ -1550,6 +1975,7 @@ static void igb_configure(struct igb_adapter *adapter) igb_get_hw_control(adapter); igb_set_rx_mode(netdev); + igb_setup_tx_mode(adapter); igb_restore_vlan(adapter); @@ -1557,6 +1983,7 @@ static void igb_configure(struct igb_adapter *adapter) igb_setup_mrqc(adapter); igb_setup_rctl(adapter); + igb_nfc_filter_restore(adapter); igb_configure_tx(adapter); igb_configure_rx(adapter); @@ -1568,7 +1995,11 @@ static void igb_configure(struct igb_adapter *adapter) */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = adapter->rx_ring[i]; - igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); + if (ring->xsk_pool) + igb_alloc_rx_buffers_zc(ring, ring->xsk_pool, + igb_desc_unused(ring)); + else + igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); } } @@ -1584,6 +2015,8 @@ void igb_power_up_link(struct igb_adapter *adapter) igb_power_up_phy_copper(&adapter->hw); else igb_power_up_serdes_link_82575(&adapter->hw); + + igb_setup_link(&adapter->hw); } /** @@ -1599,12 +2032,97 @@ static void igb_power_down_link(struct igb_adapter *adapter) } /** + * igb_check_swap_media - Detect and switch function for Media Auto Sense + * @adapter: address of the board private structure + **/ +static void igb_check_swap_media(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext, connsw; + bool swap_now = false; + + ctrl_ext = rd32(E1000_CTRL_EXT); + connsw = rd32(E1000_CONNSW); + + /* need to live swap if current media is copper and we have fiber/serdes + * to go to. + */ + + if ((hw->phy.media_type == e1000_media_type_copper) && + (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { + swap_now = true; + } else if ((hw->phy.media_type != e1000_media_type_copper) && + !(connsw & E1000_CONNSW_SERDESD)) { + /* copper signal takes time to appear */ + if (adapter->copper_tries < 4) { + adapter->copper_tries++; + connsw |= E1000_CONNSW_AUTOSENSE_CONF; + wr32(E1000_CONNSW, connsw); + return; + } else { + adapter->copper_tries = 0; + if ((connsw & E1000_CONNSW_PHYSD) && + (!(connsw & E1000_CONNSW_PHY_PDN))) { + swap_now = true; + connsw &= ~E1000_CONNSW_AUTOSENSE_CONF; + wr32(E1000_CONNSW, connsw); + } + } + } + + if (!swap_now) + return; + + switch (hw->phy.media_type) { + case e1000_media_type_copper: + netdev_info(adapter->netdev, + "MAS: changing media to fiber/serdes\n"); + ctrl_ext |= + E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + adapter->copper_tries = 0; + break; + case e1000_media_type_internal_serdes: + case e1000_media_type_fiber: + netdev_info(adapter->netdev, + "MAS: changing media to copper\n"); + ctrl_ext &= + ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + break; + default: + /* shouldn't get here during regular operation */ + netdev_err(adapter->netdev, + "AMS: Invalid media type found, returning\n"); + break; + } + wr32(E1000_CTRL_EXT, ctrl_ext); +} + +void igb_set_queue_napi(struct igb_adapter *adapter, int vector, + struct napi_struct *napi) +{ + struct igb_q_vector *q_vector = adapter->q_vector[vector]; + + if (q_vector->rx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->rx.ring->queue_index, + NETDEV_QUEUE_TYPE_RX, napi); + + if (q_vector->tx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->tx.ring->queue_index, + NETDEV_QUEUE_TYPE_TX, napi); +} + +/** * igb_up - Open the interface and prepare it to handle traffic * @adapter: board private structure **/ int igb_up(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + struct napi_struct *napi; int i; /* hardware has been reset, we need to reload some things */ @@ -1612,21 +2130,26 @@ int igb_up(struct igb_adapter *adapter) clear_bit(__IGB_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igb_set_queue_napi(adapter, i, napi); + } - if (adapter->msix_entries) + if (adapter->flags & IGB_FLAG_HAS_MSIX) igb_configure_msix(adapter); else igb_assign_vector(adapter->q_vector[0], 0); /* Clear any pending interrupts. */ + rd32(E1000_TSICR); rd32(E1000_ICR); igb_irq_enable(adapter); /* notify VFs that reset has been completed */ if (adapter->vfs_allocated_count) { u32 reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; wr32(E1000_CTRL_EXT, reg_data); } @@ -1637,6 +2160,10 @@ int igb_up(struct igb_adapter *adapter) hw->mac.get_link_status = 1; schedule_work(&adapter->watchdog_task); + if ((adapter->flags & IGB_FLAG_EEE) && + (!hw->dev_spec._82575.eee_disable)) + adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; + return 0; } @@ -1657,6 +2184,9 @@ void igb_down(struct igb_adapter *adapter) wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); /* flush and sleep below */ + igb_nfc_filter_exit(adapter); + + netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); /* disable transmits in the hardware */ @@ -1665,24 +2195,26 @@ void igb_down(struct igb_adapter *adapter) wr32(E1000_TCTL, tctl); /* flush both disables and wait for them to finish */ wrfl(); - msleep(10); + usleep_range(10000, 11000); igb_irq_disable(adapter); + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + for (i = 0; i < adapter->num_q_vectors; i++) { - napi_synchronize(&(adapter->q_vector[i]->napi)); - napi_disable(&(adapter->q_vector[i]->napi)); + if (adapter->q_vector[i]) { + napi_synchronize(&adapter->q_vector[i]->napi); + igb_set_queue_napi(adapter, i, NULL); + napi_disable(&adapter->q_vector[i]->napi); + } } - - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); - - netif_carrier_off(netdev); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); /* record the stats before reset*/ spin_lock(&adapter->stats64_lock); - igb_update_stats(adapter, &adapter->stats64); + igb_update_stats(adapter); spin_unlock(&adapter->stats64_lock); adapter->link_speed = 0; @@ -1690,6 +2222,10 @@ void igb_down(struct igb_adapter *adapter) if (!pci_channel_offline(adapter->pdev)) igb_reset(adapter); + + /* clear VLAN promisc flag so VFTA will be updated if necessary */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + igb_clean_all_tx_rings(adapter); igb_clean_all_rx_rings(adapter); #ifdef CONFIG_IGB_DCA @@ -1701,21 +2237,63 @@ void igb_down(struct igb_adapter *adapter) void igb_reinit_locked(struct igb_adapter *adapter) { - WARN_ON(in_interrupt()); while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - msleep(1); + usleep_range(1000, 2000); igb_down(adapter); igb_up(adapter); clear_bit(__IGB_RESETTING, &adapter->state); } +/** igb_enable_mas - Media Autosense re-enable after swap + * + * @adapter: adapter struct + **/ +static void igb_enable_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 connsw = rd32(E1000_CONNSW); + + /* configure for SerDes media detect */ + if ((hw->phy.media_type == e1000_media_type_copper) && + (!(connsw & E1000_CONNSW_SERDESD))) { + connsw |= E1000_CONNSW_ENRGSRC; + connsw |= E1000_CONNSW_AUTOSENSE_EN; + wr32(E1000_CONNSW, connsw); + wrfl(); + } +} + +#ifdef CONFIG_IGB_HWMON +/** + * igb_set_i2c_bb - Init I2C interface + * @hw: pointer to hardware structure + **/ +static void igb_set_i2c_bb(struct e1000_hw *hw) +{ + u32 ctrl_ext; + s32 i2cctl; + + ctrl_ext = rd32(E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_I2C_ENA; + wr32(E1000_CTRL_EXT, ctrl_ext); + wrfl(); + + i2cctl = rd32(E1000_I2CPARAMS); + i2cctl |= E1000_I2CBB_EN + | E1000_I2C_CLK_OE_N + | E1000_I2C_DATA_OE_N; + wr32(E1000_I2CPARAMS, i2cctl); + wrfl(); +} +#endif + void igb_reset(struct igb_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct e1000_fc_info *fc = &hw->fc; - u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; + u32 pba, hwm; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. @@ -1739,9 +2317,10 @@ void igb_reset(struct igb_adapter *adapter) break; } - if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && - (mac->type < e1000_82576)) { - /* adjust PBA for jumbo frames */ + if (mac->type == e1000_82575) { + u32 min_rx_space, min_tx_space, needed_tx_space; + + /* write Rx PBA so that hardware can report correct Tx PBA */ wr32(E1000_PBA, pba); /* To maintain wire speed transmits, the Tx FIFO should be @@ -1751,31 +2330,26 @@ void igb_reset(struct igb_adapter *adapter) * one full receive packet and is similarly rounded up and * expressed in KB. */ - pba = rd32(E1000_PBA); - /* upper 16 bits has Tx packet buffer allocation size in KB */ - tx_space = pba >> 16; - /* lower 16 bits has Rx packet buffer allocation size in KB */ - pba &= 0xffff; - /* the Tx fifo also stores 16 bytes of information about the Tx - * but don't include ethernet FCS because hardware appends it + min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024); + + /* The Tx FIFO also stores 16 bytes of information about the Tx + * but don't include Ethernet FCS because hardware appends it. + * We only need to round down to the nearest 512 byte block + * count since the value we care about is 2 frames, not 1. */ - min_tx_space = (adapter->max_frame_size + - sizeof(union e1000_adv_tx_desc) - - ETH_FCS_LEN) * 2; - min_tx_space = ALIGN(min_tx_space, 1024); - min_tx_space >>= 10; - /* software strips receive CRC, so leave room for it */ - min_rx_space = adapter->max_frame_size; - min_rx_space = ALIGN(min_rx_space, 1024); - min_rx_space >>= 10; + min_tx_space = adapter->max_frame_size; + min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN; + min_tx_space = DIV_ROUND_UP(min_tx_space, 512); + + /* upper 16 bits has Tx packet buffer allocation size in KB */ + needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16); /* If current Tx allocation is less than the min Tx FIFO size, * and the min Tx FIFO size is less than the current Rx FIFO - * allocation, take space away from current Rx allocation + * allocation, take space away from current Rx allocation. */ - if (tx_space < min_tx_space && - ((min_tx_space - tx_space) < pba)) { - pba = pba - (min_tx_space - tx_space); + if (needed_tx_space < pba) { + pba -= needed_tx_space; /* if short on Rx space, Rx wins and must trump Tx * adjustment @@ -1783,18 +2357,20 @@ void igb_reset(struct igb_adapter *adapter) if (pba < min_rx_space) pba = min_rx_space; } + + /* adjust PBA for jumbo frames */ wr32(E1000_PBA, pba); } - /* flow control settings */ - /* The high water mark must be low enough to fit one full frame - * (or the size used for early receive) above it in the Rx FIFO. - * Set it to the lower of: - * - 90% of the Rx FIFO size, or - * - the full Rx FIFO size minus one full frame + /* flow control settings + * The high water mark must be low enough to fit one full frame + * after transmitting the pause frame. As such we must have enough + * space to allow for us to complete our current transmit and then + * receive the frame that is in progress from the link partner. + * Set it to: + * - the full Rx FIFO size minus one full Tx plus one full Rx frame */ - hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - 2 * adapter->max_frame_size)); + hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; @@ -1805,6 +2381,7 @@ void igb_reset(struct igb_adapter *adapter) /* disable receive for all VFs and wait one second */ if (adapter->vfs_allocated_count) { int i; + for (i = 0 ; i < adapter->vfs_allocated_count; i++) adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; @@ -1820,9 +2397,25 @@ void igb_reset(struct igb_adapter *adapter) hw->mac.ops.reset_hw(hw); wr32(E1000_WUC, 0); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + /* need to resetup here after media swap */ + adapter->ei.get_invariants(hw); + adapter->flags &= ~IGB_FLAG_MEDIA_RESET; + } + if ((mac->type == e1000_82575 || mac->type == e1000_i350) && + (adapter->flags & IGB_FLAG_MAS_ENABLE)) { + igb_enable_mas(adapter); + } if (hw->mac.ops.init_hw(hw)) dev_err(&pdev->dev, "Hardware Error\n"); + /* RAR registers were cleared during init_hw, clear mac table */ + igb_flush_mac_table(adapter); + __dev_uc_unsync(adapter->netdev, NULL); + + /* Recover default RAR entry */ + igb_set_default_mac_filter(adapter); + /* Flow control settings reset on hardware reset, so guarantee flow * control is off when forcing speed. */ @@ -1838,10 +2431,26 @@ void igb_reset(struct igb_adapter *adapter) * interface. */ if (adapter->ets) - mac->ops.init_thermal_sensor_thresh(hw); + igb_set_i2c_bb(hw); + mac->ops.init_thermal_sensor_thresh(hw); } } #endif + /* Re-establish EEE setting */ + if (hw->phy.media_type == e1000_media_type_copper) { + switch (mac->type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + igb_set_eee_i350(hw, true, true); + break; + case e1000_i354: + igb_set_eee_i354(hw, true, true); + break; + default: + break; + } + } if (!netif_running(adapter->netdev)) igb_power_down_link(adapter); @@ -1851,7 +2460,8 @@ void igb_reset(struct igb_adapter *adapter) wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); /* Re-enable PTP, where applicable. */ - igb_ptp_reset(adapter); + if (adapter->ptp_flags & IGB_PTP_ENABLED) + igb_ptp_reset(adapter); igb_get_phy_info(hw); } @@ -1879,9 +2489,24 @@ static int igb_set_features(struct net_device *netdev, if (changed & NETIF_F_HW_VLAN_CTAG_RX) igb_vlan_mode(netdev, features); - if (!(changed & NETIF_F_RXALL)) + if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) return 0; + if (!(features & NETIF_F_NTUPLE)) { + struct hlist_node *node2; + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + hlist_for_each_entry_safe(rule, node2, + &adapter->nfc_filter_list, nfc_node) { + igb_erase_filter(adapter, rule); + hlist_del(&rule->nfc_node); + kfree(rule); + } + spin_unlock(&adapter->nfc_lock); + adapter->nfc_filter_count = 0; + } + netdev->features = features; if (netif_running(netdev)) @@ -1889,7 +2514,524 @@ static int igb_set_features(struct net_device *netdev, else igb_reset(adapter); + return 1; +} + +static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags, bool *notified, + struct netlink_ext_ack *extack) +{ + /* guarantee we can provide a unique filter for the unicast address */ + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { + struct igb_adapter *adapter = netdev_priv(dev); + int vfn = adapter->vfs_allocated_count; + + if (netdev_uc_count(dev) >= igb_available_rars(adapter, vfn)) + return -ENOMEM; + } + + return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags); +} + +#define IGB_MAX_MAC_HDR_LEN 127 +#define IGB_MAX_NETWORK_HDR_LEN 511 + +static netdev_features_t +igb_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + unsigned int network_hdr_len, mac_hdr_len; + + /* Make certain the headers can be described by a context descriptor */ + mac_hdr_len = skb_network_offset(skb); + if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_TSO | + NETIF_F_TSO6); + + network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); + if (unlikely(network_hdr_len > IGB_MAX_NETWORK_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | + NETIF_F_TSO | + NETIF_F_TSO6); + + /* We can only support IPV4 TSO in tunnels if we can mangle the + * inner IP ID field, so strip TSO if MANGLEID is not supported. + */ + if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) + features &= ~NETIF_F_TSO; + + return features; +} + +static void igb_offload_apply(struct igb_adapter *adapter, s32 queue) +{ + if (!is_fqtss_enabled(adapter)) { + enable_fqtss(adapter, true); + return; + } + + igb_config_tx_modes(adapter, queue); + + if (!is_any_cbs_enabled(adapter) && !is_any_txtime_enabled(adapter)) + enable_fqtss(adapter, false); +} + +static int igb_offload_cbs(struct igb_adapter *adapter, + struct tc_cbs_qopt_offload *qopt) +{ + struct e1000_hw *hw = &adapter->hw; + int err; + + /* CBS offloading is only supported by i210 controller. */ + if (hw->mac.type != e1000_i210) + return -EOPNOTSUPP; + + /* CBS offloading is only supported by queue 0 and queue 1. */ + if (qopt->queue < 0 || qopt->queue > 1) + return -EINVAL; + + err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable, + qopt->idleslope, qopt->sendslope, + qopt->hicredit, qopt->locredit); + if (err) + return err; + + igb_offload_apply(adapter, qopt->queue); + + return 0; +} + +#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +#define VLAN_PRIO_FULL_MASK (0x07) + +static int igb_parse_cls_flower(struct igb_adapter *adapter, + struct flow_cls_offload *f, + int traffic_class, + struct igb_nfc_filter *input) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; + struct netlink_ext_ack *extack = f->common.extack; + + if (dissector->used_keys & + ~(BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | + BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_VLAN))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + if (!is_zero_ether_addr(match.mask->dst)) { + if (!is_broadcast_ether_addr(match.mask->dst)) { + NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address"); + return -EINVAL; + } + + input->filter.match_flags |= + IGB_FILTER_FLAG_DST_MAC_ADDR; + ether_addr_copy(input->filter.dst_addr, match.key->dst); + } + + if (!is_zero_ether_addr(match.mask->src)) { + if (!is_broadcast_ether_addr(match.mask->src)) { + NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address"); + return -EINVAL; + } + + input->filter.match_flags |= + IGB_FILTER_FLAG_SRC_MAC_ADDR; + ether_addr_copy(input->filter.src_addr, match.key->src); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + if (match.mask->n_proto) { + if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter"); + return -EINVAL; + } + + input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE; + input->filter.etype = match.key->n_proto; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_priority) { + if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority"); + return -EINVAL; + } + + input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; + input->filter.vlan_tci = + (__force __be16)match.key->vlan_priority; + } + } + + input->action = traffic_class; + input->cookie = f->cookie; + + return 0; +} + +static int igb_configure_clsflower(struct igb_adapter *adapter, + struct flow_cls_offload *cls_flower) +{ + struct netlink_ext_ack *extack = cls_flower->common.extack; + struct igb_nfc_filter *filter, *f; + int err, tc; + + tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); + if (tc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class"); + return -EINVAL; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + err = igb_parse_cls_flower(adapter, cls_flower, tc, filter); + if (err < 0) + goto err_parse; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) { + if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { + err = -EEXIST; + NL_SET_ERR_MSG_MOD(extack, + "This filter is already set in ethtool"); + goto err_locked; + } + } + + hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) { + if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { + err = -EEXIST; + NL_SET_ERR_MSG_MOD(extack, + "This filter is already set in cls_flower"); + goto err_locked; + } + } + + err = igb_add_filter(adapter, filter); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter"); + goto err_locked; + } + + hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list); + + spin_unlock(&adapter->nfc_lock); + return 0; + +err_locked: + spin_unlock(&adapter->nfc_lock); + +err_parse: + kfree(filter); + + return err; +} + +static int igb_delete_clsflower(struct igb_adapter *adapter, + struct flow_cls_offload *cls_flower) +{ + struct igb_nfc_filter *filter; + int err; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node) + if (filter->cookie == cls_flower->cookie) + break; + + if (!filter) { + err = -ENOENT; + goto out; + } + + err = igb_erase_filter(adapter, filter); + if (err < 0) + goto out; + + hlist_del(&filter->nfc_node); + kfree(filter); + +out: + spin_unlock(&adapter->nfc_lock); + + return err; +} + +static int igb_setup_tc_cls_flower(struct igb_adapter *adapter, + struct flow_cls_offload *cls_flower) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return igb_configure_clsflower(adapter, cls_flower); + case FLOW_CLS_DESTROY: + return igb_delete_clsflower(adapter, cls_flower); + case FLOW_CLS_STATS: + return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } +} + +static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct igb_adapter *adapter = cb_priv; + + if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return igb_setup_tc_cls_flower(adapter, type_data); + + default: + return -EOPNOTSUPP; + } +} + +static int igb_offload_txtime(struct igb_adapter *adapter, + struct tc_etf_qopt_offload *qopt) +{ + struct e1000_hw *hw = &adapter->hw; + int err; + + /* Launchtime offloading is only supported by i210 controller. */ + if (hw->mac.type != e1000_i210) + return -EOPNOTSUPP; + + /* Launchtime offloading is only supported by queues 0 and 1. */ + if (qopt->queue < 0 || qopt->queue > 1) + return -EINVAL; + + err = igb_save_txtime_params(adapter, qopt->queue, qopt->enable); + if (err) + return err; + + igb_offload_apply(adapter, qopt->queue); + + return 0; +} + +static int igb_tc_query_caps(struct igb_adapter *adapter, + struct tc_query_caps_base *base) +{ + switch (base->type) { + case TC_SETUP_QDISC_TAPRIO: { + struct tc_taprio_caps *caps = base->caps; + + caps->broken_mqprio = true; + + return 0; + } + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(igb_block_cb_list); + +static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (type) { + case TC_QUERY_CAPS: + return igb_tc_query_caps(adapter, type_data); + case TC_SETUP_QDISC_CBS: + return igb_offload_cbs(adapter, type_data); + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &igb_block_cb_list, + igb_setup_tc_block_cb, + adapter, adapter, true); + + case TC_SETUP_QDISC_ETF: + return igb_offload_txtime(adapter, type_data); + + default: + return -EOPNOTSUPP; + } +} + +static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) +{ + int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD; + struct igb_adapter *adapter = netdev_priv(dev); + struct bpf_prog *prog = bpf->prog, *old_prog; + bool running = netif_running(dev); + bool need_reset; + + /* verify igb ring attributes are sufficient for XDP */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *ring = adapter->rx_ring[i]; + + if (frame_size > igb_rx_bufsz(ring)) { + NL_SET_ERR_MSG_MOD(bpf->extack, + "The RX buffer size is too small for the frame size"); + netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n", + igb_rx_bufsz(ring), frame_size); + return -EINVAL; + } + } + + old_prog = xchg(&adapter->xdp_prog, prog); + need_reset = (!!prog != !!old_prog); + + /* device is up and bpf is added/removed, must setup the RX queues */ + if (need_reset && running) { + igb_close(dev); + } else { + for (i = 0; i < adapter->num_rx_queues; i++) + (void)xchg(&adapter->rx_ring[i]->xdp_prog, + adapter->xdp_prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + /* bpf is just replaced, RXQ and MTU are already setup */ + if (!need_reset) { + return 0; + } else { + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + } + + if (running) + igb_open(dev); + + return 0; +} + +static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return igb_xdp_setup(dev, xdp); + case XDP_SETUP_XSK_POOL: + return igb_xsk_pool_setup(adapter, xdp->xsk.pool, + xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + +int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp) +{ + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); + int cpu = smp_processor_id(); + struct igb_ring *tx_ring; + struct netdev_queue *nq; + u32 ret; + + if (unlikely(!xdpf)) + return IGB_XDP_CONSUMED; + + /* During program transitions its possible adapter->xdp_prog is assigned + * but ring has not been configured yet. In this case simply abort xmit. + */ + tx_ring = igb_xdp_is_enabled(adapter) ? + igb_xdp_tx_queue_mapping(adapter) : NULL; + if (unlikely(!tx_ring)) + return IGB_XDP_CONSUMED; + + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); + __netif_tx_unlock(nq); + + return ret; +} + +static int igb_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct igb_adapter *adapter = netdev_priv(dev); + int cpu = smp_processor_id(); + struct igb_ring *tx_ring; + struct netdev_queue *nq; + int nxmit = 0; + int i; + + if (unlikely(test_bit(__IGB_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + /* During program transitions its possible adapter->xdp_prog is assigned + * but ring has not been configured yet. In this case simply abort xmit. + */ + tx_ring = igb_xdp_is_enabled(adapter) ? + igb_xdp_tx_queue_mapping(adapter) : NULL; + if (unlikely(!tx_ring)) + return -ENXIO; + + if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))) + return -ENXIO; + + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); + + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; + + err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); + if (err != IGB_XDP_TX) + break; + nxmit++; + } + + if (unlikely(flags & XDP_XMIT_FLUSH)) + igb_xdp_ring_update_tail(tx_ring); + + __netif_tx_unlock(nq); + + return nxmit; } static const struct net_device_ops igb_netdev_ops = { @@ -1900,21 +3042,27 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_set_rx_mode = igb_set_rx_mode, .ndo_set_mac_address = igb_set_mac, .ndo_change_mtu = igb_change_mtu, - .ndo_do_ioctl = igb_ioctl, + .ndo_eth_ioctl = igb_ioctl, .ndo_tx_timeout = igb_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, .ndo_set_vf_mac = igb_ndo_set_vf_mac, .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, - .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, + .ndo_set_vf_rate = igb_ndo_set_vf_bw, .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, + .ndo_set_vf_trust = igb_ndo_set_vf_trust, .ndo_get_vf_config = igb_ndo_get_vf_config, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = igb_netpoll, -#endif .ndo_fix_features = igb_fix_features, .ndo_set_features = igb_set_features, + .ndo_fdb_add = igb_ndo_fdb_add, + .ndo_features_check = igb_features_check, + .ndo_setup_tc = igb_setup_tc, + .ndo_bpf = igb_xdp, + .ndo_xdp_xmit = igb_xdp_xmit, + .ndo_xsk_wakeup = igb_xsk_wakeup, + .ndo_hwtstamp_get = igb_ptp_hwtstamp_get, + .ndo_hwtstamp_set = igb_ptp_hwtstamp_set, }; /** @@ -1929,14 +3077,19 @@ void igb_set_fw_version(struct igb_adapter *adapter) igb_get_fw_version(hw, &fw); switch (hw->mac.type) { + case e1000_i210: case e1000_i211: - snprintf(adapter->fw_version, sizeof(adapter->fw_version), - "%2d.%2d-%d", - fw.invm_major, fw.invm_minor, fw.invm_img_type); - break; - + if (!(igb_get_flash_presence_i210(hw))) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, + fw.invm_img_type); + break; + } + fallthrough; default: - /* if option is rom valid, display its version too */ + /* if option rom is valid, display its version too */ if (fw.or_valid) { snprintf(adapter->fw_version, sizeof(adapter->fw_version), @@ -1944,15 +3097,71 @@ void igb_set_fw_version(struct igb_adapter *adapter) fw.eep_major, fw.eep_minor, fw.etrack_id, fw.or_major, fw.or_build, fw.or_patch); /* no option rom */ - } else { + } else if (fw.etrack_id != 0X0000) { snprintf(adapter->fw_version, sizeof(adapter->fw_version), "%d.%d, 0x%08x", fw.eep_major, fw.eep_minor, fw.etrack_id); + } else { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d.%d", + fw.eep_major, fw.eep_minor, fw.eep_build); } break; } - return; +} + +/** + * igb_init_mas - init Media Autosense feature if enabled in the NVM + * + * @adapter: adapter struct + **/ +static void igb_init_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 eeprom_data; + + hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data); + switch (hw->bus.func) { + case E1000_FUNC_0: + if (eeprom_data & IGB_MAS_ENABLE_0) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + case E1000_FUNC_1: + if (eeprom_data & IGB_MAS_ENABLE_1) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + case E1000_FUNC_2: + if (eeprom_data & IGB_MAS_ENABLE_2) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + case E1000_FUNC_3: + if (eeprom_data & IGB_MAS_ENABLE_3) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + default: + /* Shouldn't get here */ + netdev_err(adapter->netdev, + "MAS: Invalid port configuration, returning\n"); + break; + } } /** @@ -1961,14 +3170,14 @@ void igb_set_fw_version(struct igb_adapter *adapter) **/ static s32 igb_init_i2c(struct igb_adapter *adapter) { - s32 status = E1000_SUCCESS; + s32 status = 0; /* I2C interface supported on i350 devices */ if (adapter->hw.mac.type != e1000_i350) - return E1000_SUCCESS; + return 0; /* Initialize the i2c bus which is controlled by the registers. - * This bus will use the i2c_algo_bit structue that implements + * This bus will use the i2c_algo_bit structure that implements * the protocol through toggling of the 4 bits in the register. */ adapter->i2c_adap.owner = THIS_MODULE; @@ -1976,7 +3185,7 @@ static s32 igb_init_i2c(struct igb_adapter *adapter) adapter->i2c_algo.data = adapter; adapter->i2c_adap.algo_data = &adapter->i2c_algo; adapter->i2c_adap.dev.parent = &adapter->pdev->dev; - strlcpy(adapter->i2c_adap.name, "igb BB", + strscpy(adapter->i2c_adap.name, "igb BB", sizeof(adapter->i2c_adap.name)); status = i2c_bit_add_bus(&adapter->i2c_adap); return status; @@ -2002,15 +3211,14 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) s32 ret_val; static int global_quad_port_a; /* global quad port a indication */ const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; - unsigned long mmio_start, mmio_len; - int err, pci_using_dac; u8 part_str[E1000_PBANUM_LENGTH]; + int err; /* Catch broken hardware that put the wrong VF device ID in * the PCIe SR-IOV capability. */ if (pdev->is_virtfn) { - WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n", + WARN(1, KERN_ERR "%s (%x:%x) should not be a VF!\n", pci_name(pdev), pdev->vendor, pdev->device); return -EINVAL; } @@ -2019,33 +3227,17 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - pci_using_dac = 0; - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) - pci_using_dac = 1; - } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } - } + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } - err = pci_request_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM), - igb_driver_name); + err = pci_request_mem_regions(pdev, igb_driver_name); if (err) goto err_pci_reg; - pci_enable_pcie_error_reporting(pdev); - pci_set_master(pdev); pci_save_state(pdev); @@ -2065,22 +3257,21 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->back = adapter; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); - mmio_start = pci_resource_start(pdev, 0); - mmio_len = pci_resource_len(pdev, 0); - err = -EIO; - hw->hw_addr = ioremap(mmio_start, mmio_len); - if (!hw->hw_addr) + adapter->io_addr = pci_iomap(pdev, 0, 0); + if (!adapter->io_addr) goto err_ioremap; + /* hw->hw_addr can be altered, we'll use adapter->io_addr for unmap */ + hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igb_netdev_ops; igb_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); + strscpy(netdev->name, pci_name(pdev), sizeof(netdev->name)); - netdev->mem_start = mmio_start; - netdev->mem_end = mmio_start + mmio_len; + netdev->mem_start = pci_resource_start(pdev, 0); + netdev->mem_end = pci_resource_end(pdev, 0); /* PCI config space info */ hw->vendor_id = pdev->vendor; @@ -2123,41 +3314,57 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * assignment. */ netdev->features |= NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_HW_CSUM; + + if (hw->mac.type >= e1000_82576) + netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4; + + if (hw->mac.type >= e1000_i350) + netdev->features |= NETIF_F_HW_TC; + +#define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ + NETIF_F_GSO_GRE_CSUM | \ + NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | \ + NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) + + netdev->gso_partial_features = IGB_GSO_PARTIAL_FEATURES; + netdev->features |= NETIF_F_GSO_PARTIAL | IGB_GSO_PARTIAL_FEATURES; /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features; - netdev->hw_features |= NETIF_F_RXALL; + netdev->hw_features |= netdev->features | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_RXALL; - /* set this bit last since it cannot be part of hw_features */ - netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (hw->mac.type >= e1000_i350) + netdev->hw_features |= NETIF_F_NTUPLE; - netdev->vlan_features |= NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_SG; + netdev->features |= NETIF_F_HIGHDMA; - netdev->priv_flags |= IFF_SUPP_NOFCS; + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= netdev->vlan_features; - if (pci_using_dac) { - netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_HIGHDMA; - } + /* set this bit last since it cannot be part of vlan_features */ + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; - if (hw->mac.type >= e1000_82576) { - netdev->hw_features |= NETIF_F_SCTP_CSUM; - netdev->features |= NETIF_F_SCTP_CSUM; - } + netdev->priv_flags |= IFF_SUPP_NOFCS; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; + + /* MTU range: 68 - 9216 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); @@ -2166,22 +3373,37 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ hw->mac.ops.reset_hw(hw); - /* make sure the NVM is good , i211 parts have special NVM that - * doesn't contain a checksum + /* make sure the NVM is good , i211/i210 parts can have special NVM + * that doesn't contain a checksum */ - if (hw->mac.type != e1000_i211) { + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + if (igb_get_flash_presence_i210(hw)) { + if (hw->nvm.ops.validate(hw) < 0) { + dev_err(&pdev->dev, + "The NVM Checksum Is Not Valid\n"); + err = -EIO; + goto err_eeprom; + } + } + break; + default: if (hw->nvm.ops.validate(hw) < 0) { dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); err = -EIO; goto err_eeprom; } + break; } - /* copy the MAC address out of the NVM */ - if (hw->mac.ops.read_mac_addr(hw)) - dev_err(&pdev->dev, "NVM Read Error\n"); + if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { + /* copy the MAC address out of the NVM */ + if (hw->mac.ops.read_mac_addr(hw)) + dev_err(&pdev->dev, "NVM Read Error\n"); + } - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + eth_hw_addr_set(netdev, hw->mac.addr); if (!is_valid_ether_addr(netdev->dev_addr)) { dev_err(&pdev->dev, "Invalid MAC Address\n"); @@ -2189,13 +3411,19 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } + igb_set_default_mac_filter(adapter); + /* get firmware version for ethtool -i */ igb_set_fw_version(adapter); - setup_timer(&adapter->watchdog_timer, igb_watchdog, - (unsigned long) adapter); - setup_timer(&adapter->phy_info_timer, igb_update_phy_info, - (unsigned long) adapter); + /* configure RXPBSIZE and TXPBSIZE */ + if (hw->mac.type == e1000_i210) { + wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); + wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); + } + + timer_setup(&adapter->watchdog_timer, igb_watchdog, 0); + timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0); INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); @@ -2270,6 +3498,26 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->wol = 0; } + /* Some vendors want the ability to Use the EEPROM setting as + * enable/disable only, and not for capability + */ + if (((hw->mac.type == e1000_i350) || + (hw->mac.type == e1000_i354)) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + if (hw->mac.type == e1000_i350) { + if (((pdev->subsystem_device == 0x5001) || + (pdev->subsystem_device == 0x5002)) && + (hw->bus.func == 0)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + if (pdev->subsystem_device == 0x1F52) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + } + device_set_wakeup_enable(&adapter->pdev->dev, adapter->flags & IGB_FLAG_WOL_SUPPORTED); @@ -2284,7 +3532,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* let the f/w know that the h/w is now under the control of the - * driver. */ + * driver. + */ igb_get_hw_control(adapter); strcpy(netdev->name, "eth%d"); @@ -2316,6 +3565,12 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->ets = true; else adapter->ets = false; + /* Only enable I2C bit banging if an external thermal + * sensor is supported. + */ + if (adapter->ets) + igb_set_i2c_bb(hw); + hw->mac.ops.init_thermal_sensor_thresh(hw); if (igb_sysfs_init(adapter)) dev_err(&pdev->dev, "failed to allocate sysfs resources\n"); @@ -2323,6 +3578,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->ets = false; } #endif + /* Check if Media Autosense is enabled */ + adapter->ei = *ei; + if (hw->dev_spec._82575.mas_capable) + igb_init_mas(adapter); + /* do hw tstamp init after resetting */ igb_ptp_init(adapter); @@ -2342,32 +3602,57 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Width x1" : "unknown"), netdev->dev_addr); } - ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH); + if ((hw->mac.type == e1000_82576 && + rd32(E1000_EECD) & E1000_EECD_PRES) || + (hw->mac.type >= e1000_i210 || + igb_get_flash_presence_i210(hw))) { + ret_val = igb_read_part_string(hw, part_str, + E1000_PBANUM_LENGTH); + } else { + ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND; + } + if (ret_val) strcpy(part_str, "Unknown"); dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str); dev_info(&pdev->dev, "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", - adapter->msix_entries ? "MSI-X" : + (adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" : (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", adapter->num_rx_queues, adapter->num_tx_queues); - switch (hw->mac.type) { - case e1000_i350: - case e1000_i210: - case e1000_i211: - igb_set_eee_i350(hw); - break; - case e1000_i354: - if (hw->phy.media_type == e1000_media_type_copper) { + if (hw->phy.media_type == e1000_media_type_copper) { + switch (hw->mac.type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + /* Enable EEE for internal copper PHY devices */ + err = igb_set_eee_i350(hw, true, true); + if ((!err) && + (!hw->dev_spec._82575.eee_disable)) { + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + adapter->flags |= IGB_FLAG_EEE; + } + break; + case e1000_i354: if ((rd32(E1000_CTRL_EXT) & - E1000_CTRL_EXT_LINK_MODE_SGMII)) - igb_set_eee_i354(hw); + E1000_CTRL_EXT_LINK_MODE_SGMII)) { + err = igb_set_eee_i354(hw, true, true); + if ((!err) && + (!hw->dev_spec._82575.eee_disable)) { + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + adapter->flags |= IGB_FLAG_EEE; + } + } + break; + default: + break; } - break; - default: - break; } + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); + pm_runtime_put_noidle(&pdev->dev); return 0; @@ -2381,13 +3666,17 @@ err_eeprom: if (hw->flash_address) iounmap(hw->flash_address); err_sw_init: + kfree(adapter->mac_table); + kfree(adapter->shadow_vfta); igb_clear_interrupt_scheme(adapter); - iounmap(hw->hw_addr); +#ifdef CONFIG_PCI_IOV + igb_disable_sriov(pdev, false); +#endif + pci_iounmap(pdev, adapter->io_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -2395,11 +3684,43 @@ err_dma: } #ifdef CONFIG_PCI_IOV -static int igb_disable_sriov(struct pci_dev *pdev) +static int igb_sriov_reinit(struct pci_dev *dev) +{ + struct net_device *netdev = pci_get_drvdata(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct pci_dev *pdev = adapter->pdev; + + rtnl_lock(); + + if (netif_running(netdev)) + igb_close(netdev); + else + igb_reset(adapter); + + igb_clear_interrupt_scheme(adapter); + + igb_init_queue_configuration(adapter); + + if (igb_init_interrupt_scheme(adapter, true)) { + rtnl_unlock(); + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + if (netif_running(netdev)) + igb_open(netdev); + + rtnl_unlock(); + + return 0; +} + +static int igb_disable_sriov(struct pci_dev *pdev, bool reinit) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + unsigned long flags; /* reclaim resources allocated to VFs */ if (adapter->vf_data) { @@ -2412,10 +3733,13 @@ static int igb_disable_sriov(struct pci_dev *pdev) pci_disable_sriov(pdev); msleep(500); } - + spin_lock_irqsave(&adapter->vfs_lock, flags); + kfree(adapter->vf_mac_list); + adapter->vf_mac_list = NULL; kfree(adapter->vf_data); adapter->vf_data = NULL; adapter->vfs_allocated_count = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); wrfl(); msleep(100); @@ -2425,33 +3749,31 @@ static int igb_disable_sriov(struct pci_dev *pdev) adapter->flags |= IGB_FLAG_DMAC; } - return 0; + return reinit ? igb_sriov_reinit(pdev) : 0; } -static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) +static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs, bool reinit) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); int old_vfs = pci_num_vf(pdev); + struct vf_mac_filter *mac_list; int err = 0; - int i; + int num_vf_mac_filters, i; - if (!num_vfs) - goto out; - else if (old_vfs && old_vfs == num_vfs) - goto out; - else if (old_vfs && old_vfs != num_vfs) - err = igb_disable_sriov(pdev); - - if (err) - goto out; - - if (num_vfs > 7) { + if (!(adapter->flags & IGB_FLAG_HAS_MSIX) || num_vfs > 7) { err = -EPERM; goto out; } + if (!num_vfs) + goto out; - adapter->vfs_allocated_count = num_vfs; + if (old_vfs) { + dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n", + old_vfs, max_vfs); + adapter->vfs_allocated_count = old_vfs; + } else + adapter->vfs_allocated_count = num_vfs; adapter->vf_data = kcalloc(adapter->vfs_allocated_count, sizeof(struct vf_data_storage), GFP_KERNEL); @@ -2459,15 +3781,41 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) /* if allocation failed then we do not support SR-IOV */ if (!adapter->vf_data) { adapter->vfs_allocated_count = 0; - dev_err(&pdev->dev, - "Unable to allocate memory for VF Data Storage\n"); err = -ENOMEM; goto out; } - err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); - if (err) - goto err_out; + /* Due to the limited number of RAR entries calculate potential + * number of MAC filters available for the VFs. Reserve entries + * for PF default MAC, PF MAC filters and at least one RAR entry + * for each VF for VF MAC. + */ + num_vf_mac_filters = adapter->hw.mac.rar_entry_count - + (1 + IGB_PF_MAC_FILTERS_RESERVED + + adapter->vfs_allocated_count); + + adapter->vf_mac_list = kcalloc(num_vf_mac_filters, + sizeof(struct vf_mac_filter), + GFP_KERNEL); + + mac_list = adapter->vf_mac_list; + INIT_LIST_HEAD(&adapter->vf_macs.l); + + if (adapter->vf_mac_list) { + /* Initialize list of VF MAC filters */ + for (i = 0; i < num_vf_mac_filters; i++) { + mac_list->vf = -1; + mac_list->free = true; + list_add(&mac_list->l, &adapter->vf_macs.l); + mac_list++; + } + } else { + /* If we could not allocate memory for the VF MAC filters + * we can continue without this feature but warn user. + */ + dev_err(&pdev->dev, + "Unable to allocate memory for VF MAC filter list\n"); + } dev_info(&pdev->dev, "%d VFs allocated\n", adapter->vfs_allocated_count); @@ -2476,9 +3824,25 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) /* DMA Coalescing is not supported in IOV mode. */ adapter->flags &= ~IGB_FLAG_DMAC; + + if (reinit) { + err = igb_sriov_reinit(pdev); + if (err) + goto err_out; + } + + /* only call pci_enable_sriov() if no VFs are allocated already */ + if (!old_vfs) { + err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); + if (err) + goto err_out; + } + goto out; err_out: + kfree(adapter->vf_mac_list); + adapter->vf_mac_list = NULL; kfree(adapter->vf_data); adapter->vf_data = NULL; adapter->vfs_allocated_count = 0; @@ -2522,8 +3886,8 @@ static void igb_remove(struct pci_dev *pdev) * disable watchdog from being rescheduled. */ set_bit(__IGB_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); @@ -2542,25 +3906,23 @@ static void igb_remove(struct pci_dev *pdev) */ igb_release_hw_control(adapter); +#ifdef CONFIG_PCI_IOV + igb_disable_sriov(pdev, false); +#endif + unregister_netdev(netdev); igb_clear_interrupt_scheme(adapter); -#ifdef CONFIG_PCI_IOV - igb_disable_sriov(pdev); -#endif - - iounmap(hw->hw_addr); + pci_iounmap(pdev, adapter->io_addr); if (hw->flash_address) iounmap(hw->flash_address); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); + kfree(adapter->mac_table); kfree(adapter->shadow_vfta); free_netdev(netdev); - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); } @@ -2570,7 +3932,7 @@ static void igb_remove(struct pci_dev *pdev) * * This function initializes the vf specific data storage and then attempts to * allocate the VFs. The reason for ordering it this way is because it is much - * mor expensive time wise to disable SR-IOV than it is to allocate and free + * more expensive time wise to disable SR-IOV than it is to allocate and free * the memory for the VFs. **/ static void igb_probe_vfs(struct igb_adapter *adapter) @@ -2579,20 +3941,28 @@ static void igb_probe_vfs(struct igb_adapter *adapter) struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; - /* Virtualization features not supported on i210 family. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) + /* Virtualization features not supported on i210 and 82580 family. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) || + (hw->mac.type == e1000_82580)) return; + /* Of the below we really only want the effect of getting + * IGB_FLAG_HAS_MSIX set (if available), without which + * igb_enable_sriov() has no effect. + */ + igb_set_interrupt_capability(adapter, true); + igb_reset_interrupt_capability(adapter); + pci_sriov_set_totalvfs(pdev, 7); - igb_enable_sriov(pdev, max_vfs); + igb_enable_sriov(pdev, max_vfs, false); #endif /* CONFIG_PCI_IOV */ } -static void igb_init_queue_configuration(struct igb_adapter *adapter) +unsigned int igb_get_max_rss_queues(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 max_rss_queues; + unsigned int max_rss_queues; /* Determine the maximum number of RSS queues supported. */ switch (hw->mac.type) { @@ -2609,13 +3979,13 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) max_rss_queues = 1; break; } - /* fall through */ + fallthrough; case e1000_82576: if (!!adapter->vfs_allocated_count) { max_rss_queues = 2; break; } - /* fall through */ + fallthrough; case e1000_82580: case e1000_i354: default: @@ -2623,8 +3993,24 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) break; } + return max_rss_queues; +} + +static void igb_init_queue_configuration(struct igb_adapter *adapter) +{ + u32 max_rss_queues; + + max_rss_queues = igb_get_max_rss_queues(adapter); adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); + igb_set_flag_queue_pairs(adapter, max_rss_queues); +} + +void igb_set_flag_queue_pairs(struct igb_adapter *adapter, + const u32 max_rss_queues) +{ + struct e1000_hw *hw = &adapter->hw; + /* Determine if we need to pair queues. */ switch (hw->mac.type) { case e1000_82575: @@ -2632,14 +4018,6 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) /* Device supports enough interrupts without queue pairing. */ break; case e1000_82576: - /* If VFs are going to be allocated with RSS queues then we - * should pair the queues in order to conserve interrupts due - * to limited supply. - */ - if ((adapter->rss_queues > 1) && - (adapter->vfs_allocated_count > 6)) - adapter->flags |= IGB_FLAG_QUEUE_PAIRS; - /* fall through */ case e1000_82580: case e1000_i350: case e1000_i354: @@ -2650,6 +4028,8 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) */ if (adapter->rss_queues > (max_rss_queues / 2)) adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + else + adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS; break; } } @@ -2681,11 +4061,14 @@ static int igb_sw_init(struct igb_adapter *adapter) /* set default work limits */ adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; + adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; + spin_lock_init(&adapter->nfc_lock); spin_lock_init(&adapter->stats64_lock); + + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: @@ -2705,11 +4088,24 @@ static int igb_sw_init(struct igb_adapter *adapter) } #endif /* CONFIG_PCI_IOV */ + /* Assume MSI-X interrupts, will be checked during IRQ allocation */ + adapter->flags |= IGB_FLAG_HAS_MSIX; + + adapter->mac_table = kcalloc(hw->mac.rar_entry_count, + sizeof(struct igb_mac_addr), + GFP_KERNEL); + if (!adapter->mac_table) + return -ENOMEM; + + igb_probe_vfs(adapter); + igb_init_queue_configuration(adapter); /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), - GFP_ATOMIC); + GFP_KERNEL); + if (!adapter->shadow_vfta) + return -ENOMEM; /* This call may decrease the number of queues */ if (igb_init_interrupt_scheme(adapter, true)) { @@ -2717,8 +4113,6 @@ static int igb_sw_init(struct igb_adapter *adapter) return -ENOMEM; } - igb_probe_vfs(adapter); - /* Explicitly disable IRQ since the NIC can be in any state. */ igb_irq_disable(adapter); @@ -2730,8 +4124,9 @@ static int igb_sw_init(struct igb_adapter *adapter) } /** - * igb_open - Called when a network interface is made active + * __igb_open - Called when a network interface is made active * @netdev: network interface device structure + * @resuming: indicates whether we are in a resume call * * Returns 0 on success, negative value on failure * @@ -2744,8 +4139,9 @@ static int igb_sw_init(struct igb_adapter *adapter) static int __igb_open(struct net_device *netdev, bool resuming) { struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + struct napi_struct *napi; int err; int i; @@ -2797,10 +4193,14 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* From here on the code is the same as igb_up() */ clear_bit(__IGB_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igb_set_queue_napi(adapter, i, napi); + } /* Clear any pending interrupts. */ + rd32(E1000_TSICR); rd32(E1000_ICR); igb_irq_enable(adapter); @@ -2808,6 +4208,7 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* notify VFs that reset has been completed */ if (adapter->vfs_allocated_count) { u32 reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; wr32(E1000_CTRL_EXT, reg_data); } @@ -2839,14 +4240,15 @@ err_setup_tx: return err; } -static int igb_open(struct net_device *netdev) +int igb_open(struct net_device *netdev) { return __igb_open(netdev, false); } /** - * igb_close - Disables a network interface + * __igb_close - Disables a network interface * @netdev: network interface device structure + * @suspending: indicates we are in a suspend call * * Returns 0, this is not allowed to fail * @@ -2876,9 +4278,11 @@ static int __igb_close(struct net_device *netdev, bool suspending) return 0; } -static int igb_close(struct net_device *netdev) +int igb_close(struct net_device *netdev) { - return __igb_close(netdev, false); + if (netif_device_present(netdev) || netdev->dismantle) + return __igb_close(netdev, false); + return 0; } /** @@ -2894,7 +4298,7 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) size = sizeof(struct igb_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc(size); + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -2979,17 +4383,14 @@ void igb_setup_tctl(struct igb_adapter *adapter) * Configure a transmit ring after a reset. **/ void igb_configure_tx_ring(struct igb_adapter *adapter, - struct igb_ring *ring) + struct igb_ring *ring) { struct e1000_hw *hw = &adapter->hw; u32 txdctl = 0; u64 tdba = ring->dma; int reg_idx = ring->reg_idx; - /* disable the queue */ - wr32(E1000_TXDCTL(reg_idx), 0); - wrfl(); - mdelay(10); + WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); wr32(E1000_TDLEN(reg_idx), ring->count * sizeof(union e1000_adv_tx_desc)); @@ -2997,7 +4398,7 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, tdba & 0x00000000ffffffffULL); wr32(E1000_TDBAH(reg_idx), tdba >> 32); - ring->tail = hw->hw_addr + E1000_TDT(reg_idx); + ring->tail = adapter->io_addr + E1000_TDT(reg_idx); wr32(E1000_TDH(reg_idx), 0); writel(0, ring->tail); @@ -3005,6 +4406,10 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, txdctl |= IGB_TX_HTHRESH << 8; txdctl |= IGB_TX_WTHRESH << 16; + /* reinitialize tx_buffer_info */ + memset(ring->tx_buffer_info, 0, + sizeof(struct igb_tx_buffer) * ring->count); + txdctl |= E1000_TXDCTL_QUEUE_ENABLE; wr32(E1000_TXDCTL(reg_idx), txdctl); } @@ -3017,8 +4422,16 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, **/ static void igb_configure_tx(struct igb_adapter *adapter) { + struct e1000_hw *hw = &adapter->hw; int i; + /* disable the queues */ + for (i = 0; i < adapter->num_tx_queues; i++) + wr32(E1000_TXDCTL(adapter->tx_ring[i]->reg_idx), 0); + + wrfl(); + usleep_range(10000, 20000); + for (i = 0; i < adapter->num_tx_queues; i++) igb_configure_tx_ring(adapter, adapter->tx_ring[i]); } @@ -3031,12 +4444,24 @@ static void igb_configure_tx(struct igb_adapter *adapter) **/ int igb_setup_rx_resources(struct igb_ring *rx_ring) { + struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; - int size; + int size, res; + + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0); + if (res < 0) { + dev_err(dev, "Failed to register xdp_rxq index %u\n", + rx_ring->queue_index); + return res; + } size = sizeof(struct igb_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc(size); + rx_ring->rx_buffer_info = vmalloc(size); if (!rx_ring->rx_buffer_info) goto err; @@ -3053,9 +4478,12 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + rx_ring->xdp_prog = adapter->xdp_prog; + return 0; err: + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); @@ -3096,48 +4524,32 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mrqc, rxcsum; - u32 j, num_rx_queues, shift = 0; - static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, - 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, - 0xA32DCB77, 0x0CF23080, 0x3BB7426A, - 0xFA01ACBE }; + u32 j, num_rx_queues; + u32 rss_key[10]; - /* Fill out hash function seeds */ + netdev_rss_key_fill(rss_key, sizeof(rss_key)); for (j = 0; j < 10; j++) - wr32(E1000_RSSRK(j), rsskey[j]); + wr32(E1000_RSSRK(j), rss_key[j]); num_rx_queues = adapter->rss_queues; switch (hw->mac.type) { - case e1000_82575: - shift = 6; - break; case e1000_82576: /* 82576 supports 2 RSS queues for SR-IOV */ - if (adapter->vfs_allocated_count) { - shift = 3; + if (adapter->vfs_allocated_count) num_rx_queues = 2; - } break; default: break; } - /* Populate the indirection table 4 entries at a time. To do this - * we are generating the results for n and n+2 and then interleaving - * those with the results with n+1 and n+3. - */ - for (j = 0; j < 32; j++) { - /* first pass generates n and n+2 */ - u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; - u32 reta = (base & 0x07800780) >> (7 - shift); - - /* second pass generates n+1 and n+3 */ - base += 0x00010001 * num_rx_queues; - reta |= (base & 0x07800780) << (1 + shift); - - wr32(E1000_RETA(j), reta); + if (adapter->rss_indir_tbl_init != num_rx_queues) { + for (j = 0; j < IGB_RETA_SIZE; j++) + adapter->rss_indir_tbl[j] = + (j * num_rx_queues) / IGB_RETA_SIZE; + adapter->rss_indir_tbl_init = num_rx_queues; } + igb_write_rss_indir_tbl(adapter); /* Disable raw packet checksumming so that RSS hash is placed in * descriptor on writeback. No need to enable TCP/UDP/IP checksum @@ -3175,6 +4587,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) if (hw->mac.type > e1000_82575) { /* Set the default pool for the PF's first queue */ u32 vtctl = rd32(E1000_VT_CTL); + vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | E1000_VT_CTL_DISABLE_DEF_POOL); vtctl |= adapter->vfs_allocated_count << @@ -3182,12 +4595,11 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) wr32(E1000_VT_CTL, vtctl); } if (adapter->rss_queues > 1) - mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_MQ; else mrqc |= E1000_MRQC_ENABLE_VMDQ; } else { - if (hw->mac.type != e1000_i211) - mrqc |= E1000_MRQC_ENABLE_RSS_4Q; + mrqc |= E1000_MRQC_ENABLE_RSS_MQ; } igb_vmm_control(adapter); @@ -3220,7 +4632,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) /* disable store bad packets and clear size bits. */ rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); - /* enable LPE to prevent packets larger than max_frame_size */ + /* enable LPE to allow for reception of jumbo frames */ rctl |= E1000_RCTL_LPE; /* disable queue 0 to prevent tail write w/o re-config */ @@ -3244,8 +4656,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) E1000_RCTL_BAM | /* RX All Bcast Pkts */ E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ - rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ - E1000_RCTL_DPF | /* Allow filtered pause */ + rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */ E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ /* Do not mess with E1000_CTRL_VME, it affects transmit as well, * and that breaks VLANs. @@ -3256,17 +4667,13 @@ void igb_setup_rctl(struct igb_adapter *adapter) } static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, - int vfn) + int vfn) { struct e1000_hw *hw = &adapter->hw; u32 vmolr; - /* if it isn't the PF check to see if VFs are enabled and - * increase the size to support vlan tags - */ - if (vfn < adapter->vfs_allocated_count && - adapter->vf_data[vfn].vlans_enabled) - size += VLAN_TAG_SIZE; + if (size > MAX_JUMBO_FRAME_SIZE) + size = MAX_JUMBO_FRAME_SIZE; vmolr = rd32(E1000_VMOLR(vfn)); vmolr &= ~E1000_VMOLR_RLPML_MASK; @@ -3276,30 +4683,26 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, return 0; } -/** - * igb_rlpml_set - set maximum receive packet size - * @adapter: board private structure - * - * Configure maximum receivable packet size. - **/ -static void igb_rlpml_set(struct igb_adapter *adapter) +static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter, + int vfn, bool enable) { - u32 max_frame_size = adapter->max_frame_size; struct e1000_hw *hw = &adapter->hw; - u16 pf_id = adapter->vfs_allocated_count; + u32 val, reg; - if (pf_id) { - igb_set_vf_rlpml(adapter, max_frame_size, pf_id); - /* If we're in VMDQ or SR-IOV mode, then set global RLPML - * to our max jumbo frame size, in case we need to enable - * jumbo frames on one of the rings later. - * This will not pass over-length frames into the default - * queue because it's gated by the VMOLR.RLPML. - */ - max_frame_size = MAX_JUMBO_FRAME_SIZE; - } + if (hw->mac.type < e1000_82576) + return; - wr32(E1000_RLPML, max_frame_size); + if (hw->mac.type == e1000_i350) + reg = E1000_DVMOLR(vfn); + else + reg = E1000_VMOLR(vfn); + + val = rd32(reg); + if (enable) + val |= E1000_VMOLR_STRVLAN; + else + val &= ~(E1000_VMOLR_STRVLAN); + wr32(reg, val); } static inline void igb_set_vmolr(struct igb_adapter *adapter, @@ -3315,7 +4718,6 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, return; vmolr = rd32(E1000_VMOLR(vfn)); - vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ if (aupe) vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ else @@ -3336,6 +4738,42 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, } /** + * igb_setup_srrctl - configure the split and replication receive control + * registers + * @adapter: Board private structure + * @ring: receive ring to be configured + **/ +void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring) +{ + struct e1000_hw *hw = &adapter->hw; + int reg_idx = ring->reg_idx; + u32 srrctl = 0; + u32 buf_size; + + if (ring->xsk_pool) + buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); + else if (ring_uses_large_buffer(ring)) + buf_size = IGB_RXBUFFER_3072; + else + buf_size = IGB_RXBUFFER_2048; + + srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; + srrctl |= buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; + if (hw->mac.type >= e1000_82580) + srrctl |= E1000_SRRCTL_TIMESTAMP; + /* Only set Drop Enable if VFs allocated, or we are supporting multiple + * queues and rx flow control is disabled + */ + if (adapter->vfs_allocated_count || + (!(hw->fc.current_mode & e1000_fc_rx_pause) && + adapter->num_rx_queues > 1)) + srrctl |= E1000_SRRCTL_DROP_EN; + + wr32(E1000_SRRCTL(reg_idx), srrctl); +} + +/** * igb_configure_rx_ring - Configure a receive ring after Reset * @adapter: board private structure * @ring: receive ring to be configured @@ -3346,9 +4784,23 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, struct igb_ring *ring) { struct e1000_hw *hw = &adapter->hw; + union e1000_adv_rx_desc *rx_desc; u64 rdba = ring->dma; int reg_idx = ring->reg_idx; - u32 srrctl = 0, rxdctl = 0; + u32 rxdctl = 0; + + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); + if (ring->xsk_pool) { + WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL)); + xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); + } else { + WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL)); + } /* disable the queue */ wr32(E1000_RXDCTL(reg_idx), 0); @@ -3361,21 +4813,12 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, ring->count * sizeof(union e1000_adv_rx_desc)); /* initialize head and tail */ - ring->tail = hw->hw_addr + E1000_RDT(reg_idx); + ring->tail = adapter->io_addr + E1000_RDT(reg_idx); wr32(E1000_RDH(reg_idx), 0); writel(0, ring->tail); /* set descriptor configuration */ - srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT; - srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; - if (hw->mac.type >= e1000_82580) - srrctl |= E1000_SRRCTL_TIMESTAMP; - /* Only set Drop Enable if we are supporting multiple queues */ - if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) - srrctl |= E1000_SRRCTL_DROP_EN; - - wr32(E1000_SRRCTL(reg_idx), srrctl); + igb_setup_srrctl(adapter, ring); /* set filtering for VMDQ pools */ igb_set_vmolr(adapter, reg_idx & 0x7, true); @@ -3384,11 +4827,46 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; + if (ring->xsk_pool) + memset(ring->rx_buffer_info_zc, 0, + sizeof(*ring->rx_buffer_info_zc) * ring->count); + else + memset(ring->rx_buffer_info, 0, + sizeof(*ring->rx_buffer_info) * ring->count); + + /* initialize Rx descriptor 0 */ + rx_desc = IGB_RX_DESC(ring, 0); + rx_desc->wb.upper.length = 0; + /* enable receive descriptor fetching */ rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; wr32(E1000_RXDCTL(reg_idx), rxdctl); } +static void igb_set_rx_buffer_len(struct igb_adapter *adapter, + struct igb_ring *rx_ring) +{ +#if (PAGE_SIZE < 8192) + struct e1000_hw *hw = &adapter->hw; +#endif + + /* set build_skb and buffer size flags */ + clear_ring_build_skb_enabled(rx_ring); + clear_ring_uses_large_buffer(rx_ring); + + if (adapter->flags & IGB_FLAG_RX_LEGACY) + return; + + set_ring_build_skb_enabled(rx_ring); + +#if (PAGE_SIZE < 8192) + if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || + IGB_2K_TOO_SMALL_WITH_PADDING || + rd32(E1000_RCTL) & E1000_RCTL_SBP) + set_ring_uses_large_buffer(rx_ring); +#endif +} + /** * igb_configure_rx - Configure receive Unit after Reset * @adapter: board private structure @@ -3399,18 +4877,18 @@ static void igb_configure_rx(struct igb_adapter *adapter) { int i; - /* set UTA to appropriate mode */ - igb_set_uta(adapter); - /* set the correct pool for the PF default MAC address in entry 0 */ - igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, - adapter->vfs_allocated_count); + igb_set_default_mac_filter(adapter); /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) - igb_configure_rx_ring(adapter, adapter->rx_ring[i]); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *rx_ring = adapter->rx_ring[i]; + + igb_set_rx_buffer_len(adapter, rx_ring); + igb_configure_rx_ring(adapter, rx_ring); + } } /** @@ -3447,58 +4925,81 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_tx_queues; i++) - igb_free_tx_resources(adapter->tx_ring[i]); -} - -void igb_unmap_and_free_tx_resource(struct igb_ring *ring, - struct igb_tx_buffer *tx_buffer) -{ - if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } - tx_buffer->next_to_watch = NULL; - tx_buffer->skb = NULL; - dma_unmap_len_set(tx_buffer, len, 0); - /* buffer_info must be completely set up in the transmit path */ + if (adapter->tx_ring[i]) + igb_free_tx_resources(adapter->tx_ring[i]); } /** * igb_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned **/ -static void igb_clean_tx_ring(struct igb_ring *tx_ring) -{ - struct igb_tx_buffer *buffer_info; - unsigned long size; - u16 i; +void igb_clean_tx_ring(struct igb_ring *tx_ring) +{ + u16 i = tx_ring->next_to_clean; + struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; + u32 xsk_frames = 0; + + while (i != tx_ring->next_to_use) { + union e1000_adv_tx_desc *eop_desc, *tx_desc; + + /* Free all the Tx ring sk_buffs or xdp frames */ + if (tx_buffer->type == IGB_TYPE_SKB) { + dev_kfree_skb_any(tx_buffer->skb); + } else if (tx_buffer->type == IGB_TYPE_XDP) { + xdp_return_frame(tx_buffer->xdpf); + } else if (tx_buffer->type == IGB_TYPE_XSK) { + xsk_frames++; + goto skip_for_xsk; + } - if (!tx_ring->tx_buffer_info) - return; - /* Free all the Tx ring sk_buffs */ + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); - for (i = 0; i < tx_ring->count; i++) { - buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, buffer_info); + /* check for eop_desc to determine the end of the packet */ + eop_desc = tx_buffer->next_to_watch; + tx_desc = IGB_TX_DESC(tx_ring, i); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } + +skip_for_xsk: + tx_buffer->next_to_watch = NULL; + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + } } + /* reset BQL for queue */ netdev_tx_reset_queue(txring_txq(tx_ring)); - size = sizeof(struct igb_tx_buffer) * tx_ring->count; - memset(tx_ring->tx_buffer_info, 0, size); - - /* Zero out the descriptor ring */ - memset(tx_ring->desc, 0, tx_ring->size); + if (tx_ring->xsk_pool && xsk_frames) + xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; } @@ -3512,7 +5013,8 @@ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_tx_queues; i++) - igb_clean_tx_ring(adapter->tx_ring[i]); + if (adapter->tx_ring[i]) + igb_clean_tx_ring(adapter->tx_ring[i]); } /** @@ -3525,8 +5027,15 @@ void igb_free_rx_resources(struct igb_ring *rx_ring) { igb_clean_rx_ring(rx_ring); - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; + rx_ring->xdp_prog = NULL; + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + if (rx_ring->xsk_pool) { + vfree(rx_ring->rx_buffer_info_zc); + rx_ring->rx_buffer_info_zc = NULL; + } else { + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + } /* if not set, then don't free */ if (!rx_ring->desc) @@ -3549,47 +5058,54 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_rx_queues; i++) - igb_free_rx_resources(adapter->rx_ring[i]); + if (adapter->rx_ring[i]) + igb_free_rx_resources(adapter->rx_ring[i]); } /** * igb_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ -static void igb_clean_rx_ring(struct igb_ring *rx_ring) +void igb_clean_rx_ring(struct igb_ring *rx_ring) { - unsigned long size; - u16 i; + u16 i = rx_ring->next_to_clean; - if (rx_ring->skb) - dev_kfree_skb(rx_ring->skb); + dev_kfree_skb(rx_ring->skb); rx_ring->skb = NULL; - if (!rx_ring->rx_buffer_info) - return; + if (rx_ring->xsk_pool) { + igb_clean_rx_ring_zc(rx_ring); + goto skip_for_xsk; + } /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { + while (i != rx_ring->next_to_alloc) { struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; - if (!buffer_info->page) - continue; - - dma_unmap_page(rx_ring->dev, - buffer_info->dma, - PAGE_SIZE, - DMA_FROM_DEVICE); - __free_page(buffer_info->page); + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + buffer_info->dma, + buffer_info->page_offset, + igb_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, + buffer_info->dma, + igb_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IGB_RX_DMA_ATTR); + __page_frag_cache_drain(buffer_info->page, + buffer_info->pagecnt_bias); - buffer_info->page = NULL; + i++; + if (i == rx_ring->count) + i = 0; } - size = sizeof(struct igb_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_buffer_info, 0, size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - +skip_for_xsk: rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -3604,7 +5120,8 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_ring(adapter->rx_ring[i]); + if (adapter->rx_ring[i]) + igb_clean_rx_ring(adapter->rx_ring[i]); } /** @@ -3623,12 +5140,11 @@ static int igb_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); /* set the correct pool for the new PF MAC address in entry 0 */ - igb_rar_set_qsel(adapter, hw->mac.addr, 0, - adapter->vfs_allocated_count); + igb_set_default_mac_filter(adapter); return 0; } @@ -3657,7 +5173,7 @@ static int igb_write_mc_addr_list(struct net_device *netdev) return 0; } - mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC); + mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); if (!mta_list) return -ENOMEM; @@ -3672,47 +5188,128 @@ static int igb_write_mc_addr_list(struct net_device *netdev) return netdev_mc_count(netdev); } -/** - * igb_write_uc_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure - * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table - **/ -static int igb_write_uc_addr_list(struct net_device *netdev) +static int igb_vlan_promisc_enable(struct igb_adapter *adapter) { - struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - unsigned int vfn = adapter->vfs_allocated_count; - unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1); - int count = 0; + u32 i, pf_id; - /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > rar_entries) - return -ENOMEM; + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + case e1000_i350: + /* VLAN filtering needed for VLAN prio filter */ + if (adapter->netdev->features & NETIF_F_NTUPLE) + break; + fallthrough; + case e1000_82576: + case e1000_82580: + case e1000_i354: + /* VLAN filtering needed for pool filtering */ + if (adapter->vfs_allocated_count) + break; + fallthrough; + default: + return 1; + } - if (!netdev_uc_empty(netdev) && rar_entries) { - struct netdev_hw_addr *ha; + /* We are already in VLAN promisc, nothing to do */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + return 0; - netdev_for_each_uc_addr(ha, netdev) { - if (!rar_entries) - break; - igb_rar_set_qsel(adapter, ha->addr, - rar_entries--, - vfn); - count++; + if (!adapter->vfs_allocated_count) + goto set_vfta; + + /* Add PF to all active pools */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + vlvf |= BIT(pf_id); + wr32(E1000_VLVF(i), vlvf); + } + +set_vfta: + /* Set all bits in the VLAN filter table array */ + for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;) + hw->mac.ops.write_vfta(hw, i, ~0U); + + /* Set flag so we don't redo unnecessary work */ + adapter->flags |= IGB_FLAG_VLAN_PROMISC; + + return 0; +} + +#define VFTA_BLOCK_SIZE 8 +static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vfta[VFTA_BLOCK_SIZE] = { 0 }; + u32 vid_start = vfta_offset * 32; + u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32); + u32 i, vid, word, bits, pf_id; + + /* guarantee that we don't scrub out management VLAN */ + vid = adapter->mng_vlan_id; + if (vid >= vid_start && vid < vid_end) + vfta[(vid - vid_start) / 32] |= BIT(vid % 32); + + if (!adapter->vfs_allocated_count) + goto set_vfta; + + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + /* pull VLAN ID from VLVF */ + vid = vlvf & VLAN_VID_MASK; + + /* only concern ourselves with a certain range */ + if (vid < vid_start || vid >= vid_end) + continue; + + if (vlvf & E1000_VLVF_VLANID_ENABLE) { + /* record VLAN ID in VFTA */ + vfta[(vid - vid_start) / 32] |= BIT(vid % 32); + + /* if PF is part of this then continue */ + if (test_bit(vid, adapter->active_vlans)) + continue; } + + /* remove PF from the pool */ + bits = ~BIT(pf_id); + bits &= rd32(E1000_VLVF(i)); + wr32(E1000_VLVF(i), bits); } - /* write the addresses in reverse order to avoid write combining */ - for (; rar_entries > 0 ; rar_entries--) { - wr32(E1000_RAH(rar_entries), 0); - wr32(E1000_RAL(rar_entries), 0); + +set_vfta: + /* extract values from active_vlans and write back to VFTA */ + for (i = VFTA_BLOCK_SIZE; i--;) { + vid = (vfta_offset + i) * 32; + word = vid / BITS_PER_LONG; + bits = vid % BITS_PER_LONG; + + vfta[i] |= adapter->active_vlans[word] >> bits; + + hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]); } - wrfl(); +} - return count; +static void igb_vlan_promisc_disable(struct igb_adapter *adapter) +{ + u32 i; + + /* We are not in VLAN promisc, nothing to do */ + if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + return; + + /* Set flag so we don't redo unnecessary work */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + + for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE) + igb_scrub_vfta(adapter, i); } /** @@ -3729,22 +5326,17 @@ static void igb_set_rx_mode(struct net_device *netdev) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; unsigned int vfn = adapter->vfs_allocated_count; - u32 rctl, vmolr = 0; + u32 rctl = 0, vmolr = 0, rlpml = MAX_JUMBO_FRAME_SIZE; int count; /* Check for Promiscuous and All Multicast modes */ - rctl = rd32(E1000_RCTL); - - /* clear the effected bits */ - rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); - if (netdev->flags & IFF_PROMISC) { - u32 mrqc = rd32(E1000_MRQC); - /* retain VLAN HW filtering if in VT mode */ - if (mrqc & E1000_MRQC_ENABLE_VMDQ) - rctl |= E1000_RCTL_VFE; - rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); + rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + + /* enable use of UTA filter to force packets to default pool */ + if (hw->mac.type == e1000_82576) + vmolr |= E1000_VMOLR_ROPE; } else { if (netdev->flags & IFF_ALLMULTI) { rctl |= E1000_RCTL_MPE; @@ -3762,19 +5354,43 @@ static void igb_set_rx_mode(struct net_device *netdev) vmolr |= E1000_VMOLR_ROMPE; } } - /* Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - count = igb_write_uc_addr_list(netdev); - if (count < 0) { - rctl |= E1000_RCTL_UPE; - vmolr |= E1000_VMOLR_ROPE; - } - rctl |= E1000_RCTL_VFE; } + + /* Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + if (__dev_uc_sync(netdev, igb_uc_sync, igb_uc_unsync)) { + rctl |= E1000_RCTL_UPE; + vmolr |= E1000_VMOLR_ROPE; + } + + /* enable VLAN filtering by default */ + rctl |= E1000_RCTL_VFE; + + /* disable VLAN filtering for modes that require it */ + if ((netdev->flags & IFF_PROMISC) || + (netdev->features & NETIF_F_RXALL)) { + /* if we fail to set all rules then just clear VFE */ + if (igb_vlan_promisc_enable(adapter)) + rctl &= ~E1000_RCTL_VFE; + } else { + igb_vlan_promisc_disable(adapter); + } + + /* update state of unicast, multicast, and VLAN filtering modes */ + rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE | + E1000_RCTL_VFE); wr32(E1000_RCTL, rctl); +#if (PAGE_SIZE < 8192) + if (!adapter->vfs_allocated_count) { + if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB) + rlpml = IGB_MAX_FRAME_BUILD_SKB; + } +#endif + wr32(E1000_RLPML, rlpml); + /* In order to support SR-IOV and eventually VMDq it is necessary to set * the VMOLR to enable the appropriate modes. Without this workaround * we will have issues with VLAN tag stripping not being done for frames @@ -3783,9 +5399,24 @@ static void igb_set_rx_mode(struct net_device *netdev) if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350)) return; + /* set UTA to appropriate mode */ + igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE)); + vmolr |= rd32(E1000_VMOLR(vfn)) & ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + + /* enable Rx jumbo frames, restrict as needed to support build_skb */ + vmolr &= ~E1000_VMOLR_RLPML_MASK; +#if (PAGE_SIZE < 8192) + if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB) + vmolr |= IGB_MAX_FRAME_BUILD_SKB; + else +#endif + vmolr |= MAX_JUMBO_FRAME_SIZE; + vmolr |= E1000_VMOLR_LPE; + wr32(E1000_VMOLR(vfn), vmolr); + igb_restore_vf_multicasts(adapter); } @@ -3797,7 +5428,8 @@ static void igb_check_wvbr(struct igb_adapter *adapter) switch (hw->mac.type) { case e1000_82576: case e1000_i350: - if (!(wvbr = rd32(E1000_WVBR))) + wvbr = rd32(E1000_WVBR); + if (!wvbr) return; break; default: @@ -3816,14 +5448,14 @@ static void igb_spoof_check(struct igb_adapter *adapter) if (!adapter->wvbr) return; - for(j = 0; j < adapter->vfs_allocated_count; j++) { - if (adapter->wvbr & (1 << j) || - adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { + for (j = 0; j < adapter->vfs_allocated_count; j++) { + if (adapter->wvbr & BIT(j) || + adapter->wvbr & BIT(j + IGB_STAGGERED_QUEUE_OFFSET)) { dev_warn(&adapter->pdev->dev, "Spoof event(s) detected on VF %d\n", j); adapter->wvbr &= - ~((1 << j) | - (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))); + ~(BIT(j) | + BIT(j + IGB_STAGGERED_QUEUE_OFFSET)); } } } @@ -3831,9 +5463,10 @@ static void igb_spoof_check(struct igb_adapter *adapter) /* Need to wait a few seconds after link up to get diagnostic information from * the phy */ -static void igb_update_phy_info(unsigned long data) +static void igb_update_phy_info(struct timer_list *t) { - struct igb_adapter *adapter = (struct igb_adapter *) data; + struct igb_adapter *adapter = timer_container_of(adapter, t, + phy_info_timer); igb_get_phy_info(&adapter->hw); } @@ -3845,7 +5478,6 @@ bool igb_has_link(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; bool link_active = false; - s32 ret_val = 0; /* get_link_status is set on LSC (link status) interrupt or * rx sequence error interrupt. get_link_status will stay @@ -3854,22 +5486,29 @@ bool igb_has_link(struct igb_adapter *adapter) */ switch (hw->phy.media_type) { case e1000_media_type_copper: - if (hw->mac.get_link_status) { - ret_val = hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; - } else { - link_active = true; - } - break; + if (!hw->mac.get_link_status) + return true; + fallthrough; case e1000_media_type_internal_serdes: - ret_val = hw->mac.ops.check_for_link(hw); - link_active = hw->mac.serdes_has_link; + hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; break; default: case e1000_media_type_unknown: break; } + if (((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) && + (hw->phy.id == I210_I_PHY_ID)) { + if (!netif_carrier_ok(adapter->netdev)) { + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) { + adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE; + adapter->link_check_timeout = jiffies; + } + } + return link_active; } @@ -3892,12 +5531,33 @@ static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) } /** + * igb_check_lvmmc - check for malformed packets received + * and indicated in LVMMC register + * @adapter: pointer to adapter + **/ +static void igb_check_lvmmc(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 lvmmc; + + lvmmc = rd32(E1000_LVMMC); + if (lvmmc) { + if (unlikely(net_ratelimit())) { + netdev_warn(adapter->netdev, + "malformed Tx packet detected and dropped, LVMMC:0x%08x\n", + lvmmc); + } + } +} + +/** * igb_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long + * @t: pointer to timer_list containing our private info pointer **/ -static void igb_watchdog(unsigned long data) +static void igb_watchdog(struct timer_list *t) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = timer_container_of(adapter, t, + watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); } @@ -3912,22 +5572,47 @@ static void igb_watchdog_task(struct work_struct *work) struct net_device *netdev = adapter->netdev; u32 link; int i; + u32 connsw; + u16 phy_data, retry_count = 20; link = igb_has_link(adapter); + + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { + if (time_after(jiffies, (adapter->link_check_timeout + HZ))) + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + else + link = false; + } + + /* Force link down if we have fiber to swap to */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + if (hw->phy.media_type == e1000_media_type_copper) { + connsw = rd32(E1000_CONNSW); + if (!(connsw & E1000_CONNSW_AUTOSENSE_EN)) + link = 0; + } + } if (link) { + /* Perform a reset if the media type changed. */ + if (hw->dev_spec._82575.media_changed) { + hw->dev_spec._82575.media_changed = false; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + igb_reset(adapter); + } /* Cancel scheduled suspend requests. */ pm_runtime_resume(netdev->dev.parent); if (!netif_carrier_ok(netdev)) { u32 ctrl; + hw->mac.ops.get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); ctrl = rd32(E1000_CTRL); /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s " - "Duplex, Flow Control: %s\n", + netdev_info(netdev, + "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", netdev->name, adapter->link_speed, adapter->link_duplex == FULL_DUPLEX ? @@ -3937,6 +5622,15 @@ static void igb_watchdog_task(struct work_struct *work) (ctrl & E1000_CTRL_RFCE) ? "RX" : (ctrl & E1000_CTRL_TFCE) ? "TX" : "None"); + /* disable EEE if enabled */ + if ((adapter->flags & IGB_FLAG_EEE) && + (adapter->link_duplex == HALF_DUPLEX)) { + dev_info(&adapter->pdev->dev, + "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n"); + adapter->hw.dev_spec._82575.eee_disable = true; + adapter->flags &= ~IGB_FLAG_EEE; + } + /* check if SmartSpeed worked */ igb_check_downshift(hw); if (phy->speed_downgraded) @@ -3944,11 +5638,8 @@ static void igb_watchdog_task(struct work_struct *work) /* check for thermal sensor event */ if (igb_thermal_sensor_event(hw, - E1000_THSTAT_LINK_THROTTLE)) { - netdev_info(netdev, "The network adapter link " - "speed was downshifted because it " - "overheated\n"); - } + E1000_THSTAT_LINK_THROTTLE)) + netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n"); /* adjust timeout factor according to speed/duplex */ adapter->tx_timeout_factor = 1; @@ -3961,6 +5652,26 @@ static void igb_watchdog_task(struct work_struct *work) break; } + if (adapter->link_speed != SPEED_1000 || + !hw->phy.ops.read_reg) + goto no_wait; + + /* wait for Remote receiver status OK */ +retry_read_status: + if (!igb_read_phy_reg(hw, PHY_1000T_STATUS, + &phy_data)) { + if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && + retry_count) { + msleep(100); + retry_count--; + goto retry_read_status; + } else if (!retry_count) { + dev_err(&adapter->pdev->dev, "exceed max 2 second\n"); + } + } else { + dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n"); + } +no_wait: netif_carrier_on(netdev); igb_ping_all_vfs(adapter); @@ -3979,12 +5690,11 @@ static void igb_watchdog_task(struct work_struct *work) /* check for thermal sensor event */ if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) { - netdev_err(netdev, "The network adapter was " - "stopped because it overheated\n"); + netdev_err(netdev, "The network adapter was stopped because it overheated\n"); } /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Down\n", + netdev_info(netdev, "igb: %s NIC Link is Down\n", netdev->name); netif_carrier_off(netdev); @@ -3995,13 +5705,32 @@ static void igb_watchdog_task(struct work_struct *work) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); + /* link is down, time to check for alternate media */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } pm_schedule_suspend(netdev->dev.parent, MSEC_PER_SEC * 5); + + /* also check for alternate media here */ + } else if (!netif_carrier_ok(netdev) && + (adapter->flags & IGB_FLAG_MAS_ENABLE)) { + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } } } spin_lock(&adapter->stats64_lock); - igb_update_stats(adapter, &adapter->stats64); + igb_update_stats(adapter); spin_unlock(&adapter->stats64_lock); for (i = 0; i < adapter->num_tx_queues; i++) { @@ -4025,22 +5754,52 @@ static void igb_watchdog_task(struct work_struct *work) } /* Cause software interrupt to ensure Rx ring is cleaned */ - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 eics = 0; - for (i = 0; i < adapter->num_q_vectors; i++) - eics |= adapter->q_vector[i]->eims_value; - wr32(E1000_EICS, eics); + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igb_q_vector *q_vector = adapter->q_vector[i]; + struct igb_ring *rx_ring; + + if (!q_vector->rx.ring) + continue; + + rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; + + if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + eics |= q_vector->eims_value; + clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + } + } + if (eics) + wr32(E1000_EICS, eics); } else { - wr32(E1000_ICS, E1000_ICS_RXDMT0); + struct igb_ring *rx_ring = adapter->rx_ring[0]; + + if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + wr32(E1000_ICS, E1000_ICS_RXDMT0); + } } igb_spoof_check(adapter); igb_ptp_rx_hang(adapter); + igb_ptp_tx_hang(adapter); + + /* Check LVMMC register on i350/i354 only */ + if ((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) + igb_check_lvmmc(adapter); /* Reset the timer */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 2 * HZ)); + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + HZ)); + else + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + 2 * HZ)); + } } enum latency_range { @@ -4061,8 +5820,7 @@ enum latency_range { * were determined based on theoretical maximum wire speed and testing * data, in order to minimize response time while increasing bulk * throughput. - * This functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) + * This functionality is controlled by ethtool's coalescing settings. * NOTE: This function is called only when operating in a multiqueue * receive environment. **/ @@ -4136,8 +5894,7 @@ clear_counts: * based on theoretical maximum wire speed and thresholds were set based * on testing data as well as attempting to minimize response time * while increasing bulk throughput. - * this functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) + * This functionality is controlled by ethtool's coalescing settings. * NOTE: These calculations are only valid when operating in a single- * queue environment. **/ @@ -4163,13 +5920,12 @@ static void igb_update_itr(struct igb_q_vector *q_vector, case low_latency: /* 50 usec aka 20000 ints/s */ if (bytes > 10000) { /* this if handles the TSO accounting */ - if (bytes/packets > 8000) { + if (bytes/packets > 8000) itrval = bulk_latency; - } else if ((packets < 10) || ((bytes/packets) > 1200)) { + else if ((packets < 10) || ((bytes/packets) > 1200)) itrval = bulk_latency; - } else if ((packets > 35)) { + else if ((packets > 35)) itrval = lowest_latency; - } } else if (bytes/packets > 2000) { itrval = bulk_latency; } else if (packets <= 2 && bytes < 512) { @@ -4254,11 +6010,14 @@ set_itr_now: } } -static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, - u32 type_tucmd, u32 mss_l4len_idx) +static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, + struct igb_tx_buffer *first, + u32 vlan_macip_lens, u32 type_tucmd, + u32 mss_l4len_idx) { struct e1000_adv_tx_context_desc *context_desc; u16 i = tx_ring->next_to_use; + struct timespec64 ts; context_desc = IGB_TX_CTXTDESC(tx_ring, i); @@ -4273,18 +6032,39 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, mss_l4len_idx |= tx_ring->reg_idx << 4; context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); - context_desc->seqnum_seed = 0; context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); + + /* We assume there is always a valid tx time available. Invalid times + * should have been handled by the upper layers. + */ + if (tx_ring->launchtime_enable) { + ts = ktime_to_timespec64(first->skb->tstamp); + skb_txtime_consumed(first->skb); + context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); + } else { + context_desc->seqnum_seed = 0; + } } static int igb_tso(struct igb_ring *tx_ring, struct igb_tx_buffer *first, u8 *hdr_len) { + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; struct sk_buff *skb = first->skb; - u32 vlan_macip_lens, type_tucmd; - u32 mss_l4len_idx, l4len; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; + int err; if (skb->ip_summed != CHECKSUM_PARTIAL) return 0; @@ -4292,54 +6072,72 @@ static int igb_tso(struct igb_ring *tx_ring, if (!skb_is_gso(skb)) return 0; - if (skb_header_cloned(skb)) { - int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) - return err; - } + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ - type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; - - if (first->protocol == __constant_htons(ETH_P_IP)) { - struct iphdr *iph = ip_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, - iph->daddr, 0, - IPPROTO_TCP, - 0); + type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? + E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP; + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + unsigned char *csum_start = skb_checksum_start(skb); + unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); + + /* IP header will have to cancel out any data that + * is not a part of the outer IP header + */ + ip.v4->check = csum_fold(csum_partial(trans_start, + csum_start - trans_start, + 0)); type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; + + ip.v4->tot_len = 0; first->tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_IPV4; - } else if (skb_is_gso_v6(skb)) { - ipv6_hdr(skb)->payload_len = 0; - tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - 0, IPPROTO_TCP, 0); + } else { + ip.v6->payload_len = 0; first->tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM; } - /* compute header lengths */ - l4len = tcp_hdrlen(skb); - *hdr_len = skb_transport_offset(skb) + l4len; + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = skb->len - l4_offset; + if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) { + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + } else { + /* compute length of segmentation header */ + *hdr_len = sizeof(*l4.udp) + l4_offset; + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + } /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; first->bytecount += (first->gso_segs - 1) * *hdr_len; /* MSS L4LEN IDX */ - mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; + mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; /* VLAN MACLEN IPLEN */ - vlan_macip_lens = skb_network_header_len(skb); - vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens = l4.hdr - ip.hdr; + vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; - igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); + igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, + type_tucmd, mss_l4len_idx); return 1; } @@ -4348,65 +6146,43 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) { struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; - u32 mss_l4len_idx = 0; u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) +csum_failed: + if (!(first->tx_flags & IGB_TX_FLAGS_VLAN) && + !tx_ring->launchtime_enable) return; - } else { - u8 l4_hdr = 0; - switch (first->protocol) { - case __constant_htons(ETH_P_IP): - vlan_macip_lens |= skb_network_header_len(skb); - type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; - l4_hdr = ip_hdr(skb)->protocol; - break; - case __constant_htons(ETH_P_IPV6): - vlan_macip_lens |= skb_network_header_len(skb); - l4_hdr = ipv6_hdr(skb)->nexthdr; - break; - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } - break; - } + goto no_csum; + } - switch (l4_hdr) { - case IPPROTO_TCP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - mss_l4len_idx = tcp_hdrlen(skb) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - case IPPROTO_SCTP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; - mss_l4len_idx = sizeof(struct sctphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - case IPPROTO_UDP: - mss_l4len_idx = sizeof(struct udphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); - } + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + fallthrough; + case offsetof(struct udphdr, check): + break; + case offsetof(struct sctphdr, checksum): + /* validate that this is actually an SCTP request */ + if (skb_csum_is_sctp(skb)) { + type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; break; } - - /* update TX checksum flag */ - first->tx_flags |= IGB_TX_FLAGS_CSUM; + fallthrough; + default: + skb_checksum_help(skb); + goto csum_failed; } + /* update TX checksum flag */ + first->tx_flags |= IGB_TX_FLAGS_CSUM; + vlan_macip_lens = skb_checksum_start_offset(skb) - + skb_network_offset(skb); +no_csum: vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; - igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); + igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0); } #define IGB_SET_FLAG(_input, _flag, _result) \ @@ -4462,14 +6238,49 @@ static void igb_tx_olinfo_status(struct igb_ring *tx_ring, tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); } -static void igb_tx_map(struct igb_ring *tx_ring, - struct igb_tx_buffer *first, - const u8 hdr_len) +static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) +{ + struct net_device *netdev = tx_ring->netdev; + + netif_stop_subqueue(netdev, tx_ring->queue_index); + + /* Herbert's original patch had: + * smp_mb__after_netif_stop_queue(); + * but since that doesn't exist yet, just open code it. + */ + smp_mb(); + + /* We need to check again in a case another CPU has just + * made room available. + */ + if (igb_desc_unused(tx_ring) < size) + return -EBUSY; + + /* A reprieve! */ + netif_wake_subqueue(netdev, tx_ring->queue_index); + + u64_stats_update_begin(&tx_ring->tx_syncp2); + tx_ring->tx_stats.restart_queue2++; + u64_stats_update_end(&tx_ring->tx_syncp2); + + return 0; +} + +static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) +{ + if (igb_desc_unused(tx_ring) >= size) + return 0; + return __igb_maybe_stop_tx(tx_ring, size); +} + +static int igb_tx_map(struct igb_ring *tx_ring, + struct igb_tx_buffer *first, + const u8 hdr_len) { struct sk_buff *skb = first->skb; struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; - struct skb_frag_struct *frag; + skb_frag_t *frag; dma_addr_t dma; unsigned int data_len, size; u32 tx_flags = first->tx_flags; @@ -4546,6 +6357,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, /* set the timestamp */ first->time_stamp = jiffies; + skb_tx_timestamp(skb); + /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. (Only applicable for weak-ordered * memory model archs, such as IA-64). @@ -4553,7 +6366,7 @@ static void igb_tx_map(struct igb_ring *tx_ring, * We also need this memory barrier to make certain all of the * status bits have been updated before next_to_watch is written. */ - wmb(); + dma_wmb(); /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -4564,65 +6377,153 @@ static void igb_tx_map(struct igb_ring *tx_ring, tx_ring->next_to_use = i; - writel(i, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); + /* Make sure there is space in the ring for the next send. */ + igb_maybe_stop_tx(tx_ring, DESC_NEEDED); - return; + if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { + writel(i, tx_ring->tail); + } + return 0; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); + tx_buffer = &tx_ring->tx_buffer_info[i]; /* clear dma mappings for failed tx_buffer_info map */ - for (;;) { + while (tx_buffer != first) { + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + if (i-- == 0) + i += tx_ring->count; tx_buffer = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); - if (tx_buffer == first) - break; - if (i == 0) - i = tx_ring->count; - i--; } + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + dev_kfree_skb_any(tx_buffer->skb); + tx_buffer->skb = NULL; + tx_ring->next_to_use = i; + + return -1; } -static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) +int igb_xmit_xdp_ring(struct igb_adapter *adapter, + struct igb_ring *tx_ring, + struct xdp_frame *xdpf) { - struct net_device *netdev = tx_ring->netdev; + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); + u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; + u16 count, i, index = tx_ring->next_to_use; + struct igb_tx_buffer *tx_head = &tx_ring->tx_buffer_info[index]; + struct igb_tx_buffer *tx_buffer = tx_head; + union e1000_adv_tx_desc *tx_desc = IGB_TX_DESC(tx_ring, index); + u32 len = xdpf->len, cmd_type, olinfo_status; + void *data = xdpf->data; - netif_stop_subqueue(netdev, tx_ring->queue_index); + count = TXD_USE_COUNT(len); + for (i = 0; i < nr_frags; i++) + count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); - /* Herbert's original patch had: - * smp_mb__after_netif_stop_queue(); - * but since that doesn't exist yet, just open code it. - */ - smp_mb(); + if (igb_maybe_stop_tx(tx_ring, count + 3)) + return IGB_XDP_CONSUMED; - /* We need to check again in a case another CPU has just - * made room available. - */ - if (igb_desc_unused(tx_ring) < size) - return -EBUSY; + i = 0; + /* record the location of the first descriptor for this packet */ + tx_head->bytecount = xdp_get_frame_len(xdpf); + tx_head->type = IGB_TYPE_XDP; + tx_head->gso_segs = 1; + tx_head->xdpf = xdpf; - /* A reprieve! */ - netif_wake_subqueue(netdev, tx_ring->queue_index); + olinfo_status = tx_head->bytecount << E1000_ADVTXD_PAYLEN_SHIFT; + /* 82575 requires a unique index per ring */ + if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + olinfo_status |= tx_ring->reg_idx << 4; + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); - u64_stats_update_begin(&tx_ring->tx_syncp2); - tx_ring->tx_stats.restart_queue2++; - u64_stats_update_end(&tx_ring->tx_syncp2); + for (;;) { + dma_addr_t dma; - return 0; -} + dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto unmap; -static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) -{ - if (igb_desc_unused(tx_ring) >= size) - return 0; - return __igb_maybe_stop_tx(tx_ring, size); + /* record length, and DMA address */ + dma_unmap_len_set(tx_buffer, len, len); + dma_unmap_addr_set(tx_buffer, dma, dma); + + /* put descriptor type bits */ + cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT | + E1000_ADVTXD_DCMD_IFCS | len; + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + tx_buffer->protocol = 0; + + if (++index == tx_ring->count) + index = 0; + + if (i == nr_frags) + break; + + tx_buffer = &tx_ring->tx_buffer_info[index]; + tx_desc = IGB_TX_DESC(tx_ring, index); + tx_desc->read.olinfo_status = 0; + + data = skb_frag_address(&sinfo->frags[i]); + len = skb_frag_size(&sinfo->frags[i]); + i++; + } + tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_TXD_DCMD); + + netdev_tx_sent_queue(txring_txq(tx_ring), tx_head->bytecount); + /* set the timestamp */ + tx_head->time_stamp = jiffies; + + /* Avoid any potential race with xdp_xmit and cleanup */ + smp_wmb(); + + /* set next_to_watch value indicating a packet is present */ + tx_head->next_to_watch = tx_desc; + tx_ring->next_to_use = index; + + /* Make sure there is space in the ring for the next send. */ + igb_maybe_stop_tx(tx_ring, DESC_NEEDED); + + if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) + writel(index, tx_ring->tail); + + return IGB_XDP_TX; + +unmap: + for (;;) { + tx_buffer = &tx_ring->tx_buffer_info[index]; + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + if (tx_buffer == tx_head) + break; + + if (!index) + index += tx_ring->count; + index--; + } + + return IGB_XDP_CONSUMED; } netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, @@ -4631,6 +6532,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; + unsigned short f; u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = vlan_get_protocol(skb); u8 hdr_len = 0; @@ -4641,31 +6543,31 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, * + 1 desc for context descriptor, * otherwise try next time */ - if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { - unsigned short f; - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - } else { - count += skb_shinfo(skb)->nr_frags; - } + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_frag_size( + &skb_shinfo(skb)->frags[f])); if (igb_maybe_stop_tx(tx_ring, count + 3)) { /* this is a hard error */ return NETDEV_TX_BUSY; } + if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))) + return NETDEV_TX_BUSY; + /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->type = IGB_TYPE_SKB; first->skb = skb; first->bytecount = skb->len; first->gso_segs = 1; - skb_tx_timestamp(skb); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); - if (!(adapter->ptp_tx_skb)) { + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && + !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS, + &adapter->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGB_TX_FLAGS_TSTAMP; @@ -4673,12 +6575,14 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, adapter->ptp_tx_start = jiffies; if (adapter->hw.mac.type == e1000_82576) schedule_work(&adapter->ptp_tx_work); + } else { + adapter->tx_hwtstamp_skipped++; } } - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; - tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); + tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); } /* record initial flags and protocol */ @@ -4691,15 +6595,24 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, else if (!tso) igb_tx_csum(tx_ring, first); - igb_tx_map(tx_ring, first, hdr_len); - - /* Make sure there is space in the ring for the next send. */ - igb_maybe_stop_tx(tx_ring, DESC_NEEDED); + if (igb_tx_map(tx_ring, first, hdr_len)) + goto cleanup_tx_tstamp; return NETDEV_TX_OK; out_drop: - igb_unmap_and_free_tx_resource(tx_ring, first); + dev_kfree_skb_any(first->skb); + first->skb = NULL; +cleanup_tx_tstamp: + if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP)) { + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); + + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + if (adapter->hw.mac.type == e1000_82576) + cancel_work_sync(&adapter->ptp_tx_work); + clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state); + } return NETDEV_TX_OK; } @@ -4720,25 +6633,11 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, { struct igb_adapter *adapter = netdev_priv(netdev); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - if (skb->len <= 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (unlikely(skb->len < 17)) { - if (skb_pad(skb, 17 - skb->len)) - return NETDEV_TX_OK; - skb->len = 17; - skb_set_tail_pointer(skb, 17); - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); } @@ -4746,8 +6645,9 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, /** * igb_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure + * @txqueue: number of the Tx queue that hung (unused) **/ -static void igb_tx_timeout(struct net_device *netdev) +static void igb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -4768,9 +6668,18 @@ static void igb_reset_task(struct work_struct *work) struct igb_adapter *adapter; adapter = container_of(work, struct igb_adapter, reset_task); + rtnl_lock(); + /* If we're already down or resetting, just bail */ + if (test_bit(__IGB_DOWN, &adapter->state) || + test_bit(__IGB_RESETTING, &adapter->state)) { + rtnl_unlock(); + return; + } + igb_dump(adapter); netdev_err(adapter->netdev, "Reset adapter\n"); igb_reinit_locked(adapter); + rtnl_unlock(); } /** @@ -4778,17 +6687,15 @@ static void igb_reset_task(struct work_struct *work) * @netdev: network interface device structure * @stats: rtnl_link_stats64 pointer **/ -static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void igb_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct igb_adapter *adapter = netdev_priv(netdev); spin_lock(&adapter->stats64_lock); - igb_update_stats(adapter, &adapter->stats64); + igb_update_stats(adapter); memcpy(stats, &adapter->stats64, sizeof(*stats)); spin_unlock(&adapter->stats64_lock); - - return stats; } /** @@ -4801,22 +6708,29 @@ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, static int igb_change_mtu(struct net_device *netdev, int new_mtu) { struct igb_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD; - if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { - dev_err(&pdev->dev, "Invalid MTU setting\n"); - return -EINVAL; - } + if (igb_xdp_is_enabled(adapter)) { + int i; -#define MAX_STD_JUMBO_FRAME_SIZE 9238 - if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { - dev_err(&pdev->dev, "MTU > 9216 not supported.\n"); - return -EINVAL; + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *ring = adapter->rx_ring[i]; + + if (max_frame > igb_rx_bufsz(ring)) { + netdev_warn(adapter->netdev, + "Requested MTU size is not supported with XDP. Max frame size is %d\n", + max_frame); + return -EINVAL; + } + } } + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - msleep(1); + usleep_range(1000, 2000); /* igb_down has a dependency on max_frame_size */ adapter->max_frame_size = max_frame; @@ -4824,9 +6738,9 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) igb_down(adapter); - dev_info(&pdev->dev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) igb_up(adapter); @@ -4842,20 +6756,17 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) * igb_update_stats - Update the board statistics counters * @adapter: board private structure **/ -void igb_update_stats(struct igb_adapter *adapter, - struct rtnl_link_stats64 *net_stats) +void igb_update_stats(struct igb_adapter *adapter) { + struct rtnl_link_stats64 *net_stats = &adapter->stats64; struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; u32 reg, mpc; - u16 phy_tmp; int i; u64 bytes, packets; unsigned int start; u64 _bytes, _packets; -#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF - /* Prevent stats update while adapter is being reset, or if the pci * connection is down. */ @@ -4866,9 +6777,13 @@ void igb_update_stats(struct igb_adapter *adapter, bytes = 0; packets = 0; + + rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { - u32 rqdpc = rd32(E1000_RQDPC(i)); struct igb_ring *ring = adapter->rx_ring[i]; + u32 rqdpc = rd32(E1000_RQDPC(i)); + if (hw->mac.type >= e1000_i210) + wr32(E1000_RQDPC(i), 0); if (rqdpc) { ring->rx_stats.drops += rqdpc; @@ -4876,10 +6791,10 @@ void igb_update_stats(struct igb_adapter *adapter, } do { - start = u64_stats_fetch_begin_bh(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -4892,15 +6807,16 @@ void igb_update_stats(struct igb_adapter *adapter, for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_bh(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } net_stats->tx_bytes = bytes; net_stats->tx_packets = packets; + rcu_read_unlock(); /* read stats registers */ adapter->stats.crcerrs += rd32(E1000_CRCERRS); @@ -5011,15 +6927,6 @@ void igb_update_stats(struct igb_adapter *adapter, /* Tx Dropped needs to be maintained elsewhere */ - /* Phy Stats */ - if (hw->phy.media_type == e1000_media_type_copper) { - if ((adapter->link_speed == SPEED_1000) && - (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { - phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; - adapter->phy_stats.idle_errors += phy_tmp; - } - } - /* Management Stats */ adapter->stats.mgptc += rd32(E1000_MGTPTC); adapter->stats.mgprc += rd32(E1000_MGTPRC); @@ -5035,6 +6942,158 @@ void igb_update_stats(struct igb_adapter *adapter, } } +static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) +{ + int pin = ptp_find_pin(adapter->ptp_clock, PTP_PF_PEROUT, tsintr_tt); + struct e1000_hw *hw = &adapter->hw; + struct timespec64 ts; + u32 tsauxc; + + if (pin < 0 || pin >= IGB_N_SDP) + return; + + spin_lock(&adapter->tmreg_lock); + + if (hw->mac.type == e1000_82580 || + hw->mac.type == e1000_i354 || + hw->mac.type == e1000_i350) { + s64 ns = timespec64_to_ns(&adapter->perout[tsintr_tt].period); + u32 systiml, systimh, level_mask, level, rem; + u64 systim, now; + + /* read systim registers in sequence */ + rd32(E1000_SYSTIMR); + systiml = rd32(E1000_SYSTIML); + systimh = rd32(E1000_SYSTIMH); + systim = (((u64)(systimh & 0xFF)) << 32) | ((u64)systiml); + now = timecounter_cyc2time(&adapter->tc, systim); + + if (pin < 2) { + level_mask = (tsintr_tt == 1) ? 0x80000 : 0x40000; + level = (rd32(E1000_CTRL) & level_mask) ? 1 : 0; + } else { + level_mask = (tsintr_tt == 1) ? 0x80 : 0x40; + level = (rd32(E1000_CTRL_EXT) & level_mask) ? 1 : 0; + } + + div_u64_rem(now, ns, &rem); + systim = systim + (ns - rem); + + /* synchronize pin level with rising/falling edges */ + div_u64_rem(now, ns << 1, &rem); + if (rem < ns) { + /* first half of period */ + if (level == 0) { + /* output is already low, skip this period */ + systim += ns; + pr_notice("igb: periodic output on %s missed falling edge\n", + adapter->sdp_config[pin].name); + } + } else { + /* second half of period */ + if (level == 1) { + /* output is already high, skip this period */ + systim += ns; + pr_notice("igb: periodic output on %s missed rising edge\n", + adapter->sdp_config[pin].name); + } + } + + /* for this chip family tv_sec is the upper part of the binary value, + * so not seconds + */ + ts.tv_nsec = (u32)systim; + ts.tv_sec = ((u32)(systim >> 32)) & 0xFF; + } else { + ts = timespec64_add(adapter->perout[tsintr_tt].start, + adapter->perout[tsintr_tt].period); + } + + /* u32 conversion of tv_sec is safe until y2106 */ + wr32((tsintr_tt == 1) ? E1000_TRGTTIML1 : E1000_TRGTTIML0, ts.tv_nsec); + wr32((tsintr_tt == 1) ? E1000_TRGTTIMH1 : E1000_TRGTTIMH0, (u32)ts.tv_sec); + tsauxc = rd32(E1000_TSAUXC); + tsauxc |= TSAUXC_EN_TT0; + wr32(E1000_TSAUXC, tsauxc); + adapter->perout[tsintr_tt].start = ts; + + spin_unlock(&adapter->tmreg_lock); +} + +static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) +{ + int pin = ptp_find_pin(adapter->ptp_clock, PTP_PF_EXTTS, tsintr_tt); + int auxstmpl = (tsintr_tt == 1) ? E1000_AUXSTMPL1 : E1000_AUXSTMPL0; + int auxstmph = (tsintr_tt == 1) ? E1000_AUXSTMPH1 : E1000_AUXSTMPH0; + struct e1000_hw *hw = &adapter->hw; + struct ptp_clock_event event; + struct timespec64 ts; + unsigned long flags; + + if (pin < 0 || pin >= IGB_N_SDP) + return; + + if (hw->mac.type == e1000_82580 || + hw->mac.type == e1000_i354 || + hw->mac.type == e1000_i350) { + u64 ns = rd32(auxstmpl); + + ns += ((u64)(rd32(auxstmph) & 0xFF)) << 32; + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->tc, ns); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + ts = ns_to_timespec64(ns); + } else { + ts.tv_nsec = rd32(auxstmpl); + ts.tv_sec = rd32(auxstmph); + } + + event.type = PTP_CLOCK_EXTTS; + event.index = tsintr_tt; + event.timestamp = ts.tv_sec * 1000000000ULL + ts.tv_nsec; + ptp_clock_event(adapter->ptp_clock, &event); +} + +static void igb_tsync_interrupt(struct igb_adapter *adapter) +{ + const u32 mask = (TSINTR_SYS_WRAP | E1000_TSICR_TXTS | + TSINTR_TT0 | TSINTR_TT1 | + TSINTR_AUTT0 | TSINTR_AUTT1); + struct e1000_hw *hw = &adapter->hw; + u32 tsicr = rd32(E1000_TSICR); + struct ptp_clock_event event; + + if (hw->mac.type == e1000_82580) { + /* 82580 has a hardware bug that requires an explicit + * write to clear the TimeSync interrupt cause. + */ + wr32(E1000_TSICR, tsicr & mask); + } + + if (tsicr & TSINTR_SYS_WRAP) { + event.type = PTP_CLOCK_PPS; + if (adapter->ptp_caps.pps) + ptp_clock_event(adapter->ptp_clock, &event); + } + + if (tsicr & E1000_TSICR_TXTS) { + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + + if (tsicr & TSINTR_TT0) + igb_perout(adapter, 0); + + if (tsicr & TSINTR_TT1) + igb_perout(adapter, 1); + + if (tsicr & TSINTR_AUTT0) + igb_extts(adapter, 0); + + if (tsicr & TSINTR_AUTT1) + igb_extts(adapter, 1); +} + static irqreturn_t igb_msix_other(int irq, void *data) { struct igb_adapter *adapter = data; @@ -5066,16 +7125,8 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); wr32(E1000_EIMS, adapter->eims_other); @@ -5211,7 +7262,7 @@ static int __igb_notify_dca(struct device *dev, void *data) igb_setup_dca(adapter); break; } - /* Fall Through since DCA is disabled. */ + fallthrough; /* since DCA is disabled. */ case DCA_PROVIDER_REMOVE: if (adapter->flags & IGB_FLAG_DCA_ENABLED) { /* without this a class_device is left @@ -5251,6 +7302,9 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf) /* By default spoof check is enabled for all VFs */ adapter->vf_data[vf].spoofchk_enabled = true; + /* By default VFs are not trusted */ + adapter->vf_data[vf].trusted = false; + return 0; } @@ -5292,6 +7346,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) vmolr |= E1000_VMOLR_MPME; } else if (vf_data->num_vf_mc_hashes) { int j; + vmolr |= E1000_VMOLR_ROMPE; for (j = 0; j < vf_data->num_vf_mc_hashes; j++) igb_mta_set(hw, vf_data->vf_mc_hashes[j]); @@ -5310,7 +7365,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) static int igb_set_vf_multicasts(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) { - int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int n = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); u16 *hash_list = (u16 *)&msgbuf[1]; struct vf_data_storage *vf_data = &adapter->vf_data[vf]; int i; @@ -5343,6 +7398,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter) for (i = 0; i < adapter->vfs_allocated_count; i++) { u32 vmolr = rd32(E1000_VMOLR(i)); + vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); vf_data = &adapter->vf_data[i]; @@ -5362,123 +7418,132 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter) static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) { struct e1000_hw *hw = &adapter->hw; - u32 pool_mask, reg, vid; - int i; + u32 pool_mask, vlvf_mask, i; + + /* create mask for VF and other pools */ + pool_mask = E1000_VLVF_POOLSEL_MASK; + vlvf_mask = BIT(E1000_VLVF_POOLSEL_SHIFT + vf); - pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + /* drop PF from pool bits */ + pool_mask &= ~BIT(E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count); /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); + for (i = E1000_VLVF_ARRAY_SIZE; i--;) { + u32 vlvf = rd32(E1000_VLVF(i)); + u32 vfta_mask, vid, vfta; /* remove the vf from the pool */ - reg &= ~pool_mask; - - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK) && - (reg & E1000_VLVF_VLANID_ENABLE)) { - reg = 0; - vid = reg & E1000_VLVF_VLANID_MASK; - igb_vfta_set(hw, vid, false); - } + if (!(vlvf & vlvf_mask)) + continue; + + /* clear out bit from VLVF */ + vlvf ^= vlvf_mask; + + /* if other pools are present, just remove ourselves */ + if (vlvf & pool_mask) + goto update_vlvfb; - wr32(E1000_VLVF(i), reg); + /* if PF is present, leave VFTA */ + if (vlvf & E1000_VLVF_POOLSEL_MASK) + goto update_vlvf; + + vid = vlvf & E1000_VLVF_VLANID_MASK; + vfta_mask = BIT(vid % 32); + + /* clear bit from VFTA */ + vfta = adapter->shadow_vfta[vid / 32]; + if (vfta & vfta_mask) + hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask); +update_vlvf: + /* clear pool selection enable */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + vlvf &= E1000_VLVF_POOLSEL_MASK; + else + vlvf = 0; +update_vlvfb: + /* clear pool bits */ + wr32(E1000_VLVF(i), vlvf); } +} - adapter->vf_data[vf].vlans_enabled = 0; +static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan) +{ + u32 vlvf; + int idx; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the VLAN id in the VLVF entries */ + for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) { + vlvf = rd32(E1000_VLVF(idx)); + if ((vlvf & VLAN_VID_MASK) == vlan) + break; + } + + return idx; } -static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +static void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid) { struct e1000_hw *hw = &adapter->hw; - u32 reg, i; - - /* The vlvf table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return -1; + u32 bits, pf_id; + int idx; - /* we only need to do this if VMDq is enabled */ - if (!adapter->vfs_allocated_count) - return -1; + idx = igb_find_vlvf_entry(hw, vid); + if (!idx) + return; - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + bits = ~BIT(pf_id) & E1000_VLVF_POOLSEL_MASK; + bits &= rd32(E1000_VLVF(idx)); + + /* Disable the filter so this falls into the default pool. */ + if (!bits) { + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + wr32(E1000_VLVF(idx), BIT(pf_id)); + else + wr32(E1000_VLVF(idx), 0); } +} - if (add) { - if (i == E1000_VLVF_ARRAY_SIZE) { - /* Did not find a matching VLAN ID entry that was - * enabled. Search for a free filter entry, i.e. - * one without the enable bit set - */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if (!(reg & E1000_VLVF_VLANID_ENABLE)) - break; - } - } - if (i < E1000_VLVF_ARRAY_SIZE) { - /* Found an enabled/available entry */ - reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); - - /* if !enabled we need to set this up in vfta */ - if (!(reg & E1000_VLVF_VLANID_ENABLE)) { - /* add VID to filter table */ - igb_vfta_set(hw, vid, true); - reg |= E1000_VLVF_VLANID_ENABLE; - } - reg &= ~E1000_VLVF_VLANID_MASK; - reg |= vid; - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size += 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } +static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid, + bool add, u32 vf) +{ + int pf_id = adapter->vfs_allocated_count; + struct e1000_hw *hw = &adapter->hw; + int err; - adapter->vf_data[vf].vlans_enabled++; - } - } else { - if (i < E1000_VLVF_ARRAY_SIZE) { - /* remove vf from the pool */ - reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK)) { - reg = 0; - igb_vfta_set(hw, vid, false); - } - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - adapter->vf_data[vf].vlans_enabled--; - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size -= 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } - } + /* If VLAN overlaps with one the PF is currently monitoring make + * sure that we are able to allocate a VLVF entry. This may be + * redundant but it guarantees PF will maintain visibility to + * the VLAN. + */ + if (add && test_bit(vid, adapter->active_vlans)) { + err = igb_vfta_set(hw, vid, pf_id, true, false); + if (err) + return err; } - return 0; + + err = igb_vfta_set(hw, vid, vf, add, false); + + if (add && !err) + return err; + + /* If we failed to add the VF VLAN or we are removing the VF VLAN + * we may need to drop the PF pool bit in order to allow us to free + * up the VLVF resources. + */ + if (test_bit(vid, adapter->active_vlans) || + (adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_update_pf_vlvf(adapter, vid); + + return err; } static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) @@ -5491,130 +7556,107 @@ static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) wr32(E1000_VMVIR(vf), 0); } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf, + u16 vlan, u8 qos) { - int err = 0; - struct igb_adapter *adapter = netdev_priv(netdev); + int err; - if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) - return -EINVAL; - if (vlan || qos) { - err = igb_vlvf_set(adapter, vlan, !!vlan, vf); - if (err) - goto out; - igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); - igb_set_vmolr(adapter, vf, !vlan); - adapter->vf_data[vf].pf_vlan = vlan; - adapter->vf_data[vf].pf_qos = qos; - dev_info(&adapter->pdev->dev, - "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, - "The VF VLAN has been set, but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, - "Bring the PF device up before attempting to use the VF device.\n"); - } - } else { - igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, - false, vf); - igb_set_vmvir(adapter, vlan, vf); - igb_set_vmolr(adapter, vf, true); - adapter->vf_data[vf].pf_vlan = 0; - adapter->vf_data[vf].pf_qos = 0; + err = igb_set_vf_vlan(adapter, vlan, true, vf); + if (err) + return err; + + igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vlan); + + /* revoke access to previous VLAN */ + if (vlan != adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); + + adapter->vf_data[vf].pf_vlan = vlan; + adapter->vf_data[vf].pf_qos = qos; + igb_set_vf_vlan_strip(adapter, vf, true); + dev_info(&adapter->pdev->dev, + "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF VLAN has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); } -out: + return err; } -static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) +static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) { - struct e1000_hw *hw = &adapter->hw; - int i; - u32 reg; + /* Restore tagless access via VLAN 0 */ + igb_set_vf_vlan(adapter, 0, true, vf); - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; - } + igb_set_vmvir(adapter, 0, vf); + igb_set_vmolr(adapter, vf, true); + + /* Remove any PF assigned VLAN */ + if (adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); - if (i >= E1000_VLVF_ARRAY_SIZE) - i = -1; + adapter->vf_data[vf].pf_vlan = 0; + adapter->vf_data[vf].pf_qos = 0; + igb_set_vf_vlan_strip(adapter, vf, false); - return i; + return 0; } -static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) { - struct e1000_hw *hw = &adapter->hw; - int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; - int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); - int err = 0; + struct igb_adapter *adapter = netdev_priv(netdev); - /* If in promiscuous mode we need to make sure the PF also has - * the VLAN filter set. - */ - if (add && (adapter->netdev->flags & IFF_PROMISC)) - err = igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - if (err) - goto out; + if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) + return -EINVAL; - err = igb_vlvf_set(adapter, vid, add, vf); + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; - if (err) - goto out; + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : + igb_disable_port_vlan(adapter, vf); +} - /* Go through all the checks to see if the VLAN filter should - * be wiped completely. - */ - if (!add && (adapter->netdev->flags & IFF_PROMISC)) { - u32 vlvf, bits; - - int regndx = igb_find_vlvf_entry(adapter, vid); - if (regndx < 0) - goto out; - /* See if any other pools are set for this VLAN filter - * entry other than the PF. - */ - vlvf = bits = rd32(E1000_VLVF(regndx)); - bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + - adapter->vfs_allocated_count); - /* If the filter was removed then ensure PF pool bit - * is cleared if the PF only added itself to the pool - * because the PF is in promiscuous mode. - */ - if ((vlvf & VLAN_VID_MASK) == vid && - !test_bit(vid, adapter->active_vlans) && - !bits) - igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - } +static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + int add = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + int ret; -out: - return err; + if (adapter->vf_data[vf].pf_vlan) + return -1; + + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; + + ret = igb_set_vf_vlan(adapter, vid, !!add, vf); + if (!ret) + igb_set_vf_vlan_strip(adapter, vf, !!vid); + return ret; } static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) { - /* clear flags - except flag that indicates PF has set the MAC */ - adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; - adapter->vf_data[vf].last_nack = jiffies; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - /* reset offloads to defaults */ - igb_set_vmolr(adapter, vf, true); + /* clear flags - except flag that indicates PF has set the MAC */ + vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC; + vf_data->last_nack = jiffies; /* reset vlans for device */ igb_clear_vf_vfta(adapter, vf); - if (adapter->vf_data[vf].pf_vlan) - igb_ndo_set_vf_vlan(adapter->netdev, vf, - adapter->vf_data[vf].pf_vlan, - adapter->vf_data[vf].pf_qos); - else - igb_clear_vf_vfta(adapter, vf); + igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf); + igb_set_vmvir(adapter, vf_data->pf_vlan | + (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vf_data->pf_vlan); + igb_set_vf_vlan_strip(adapter, vf, !!(vf_data->pf_vlan)); /* reset multicast table array for vf */ adapter->vf_data[vf].num_vf_mc_hashes = 0; @@ -5639,42 +7681,347 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) { struct e1000_hw *hw = &adapter->hw; unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; - int rar_entry = hw->mac.rar_entry_count - (vf + 1); - u32 reg, msgbuf[3]; + u32 reg, msgbuf[3] = {}; u8 *addr = (u8 *)(&msgbuf[1]); /* process all the same items cleared in a function level reset */ igb_vf_reset(adapter, vf); /* set vf mac address */ - igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf); + igb_set_vf_mac(adapter, vf, vf_mac); /* enable transmit and receive for vf */ reg = rd32(E1000_VFTE); - wr32(E1000_VFTE, reg | (1 << vf)); + wr32(E1000_VFTE, reg | BIT(vf)); reg = rd32(E1000_VFRE); - wr32(E1000_VFRE, reg | (1 << vf)); + wr32(E1000_VFRE, reg | BIT(vf)); adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; /* reply to reset with ack and vf mac address */ - msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; - memcpy(addr, vf_mac, 6); + if (!is_zero_ether_addr(vf_mac)) { + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, ETH_ALEN); + } else { + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_NACK; + } igb_write_mbx(hw, msgbuf, 3, vf); } +static void igb_flush_mac_table(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < hw->mac.rar_entry_count; i++) { + adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE; + eth_zero_addr(adapter->mac_table[i].addr); + adapter->mac_table[i].queue = 0; + igb_rar_set_index(adapter, i); + } +} + +static int igb_available_rars(struct igb_adapter *adapter, u8 queue) +{ + struct e1000_hw *hw = &adapter->hw; + /* do not count rar entries reserved for VFs MAC addresses */ + int rar_entries = hw->mac.rar_entry_count - + adapter->vfs_allocated_count; + int i, count = 0; + + for (i = 0; i < rar_entries; i++) { + /* do not count default entries */ + if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) + continue; + + /* do not count "in use" entries for different queues */ + if ((adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) && + (adapter->mac_table[i].queue != queue)) + continue; + + count++; + } + + return count; +} + +/* Set default MAC address for the PF in the first RAR entry */ +static void igb_set_default_mac_filter(struct igb_adapter *adapter) +{ + struct igb_mac_addr *mac_table = &adapter->mac_table[0]; + + ether_addr_copy(mac_table->addr, adapter->hw.mac.addr); + mac_table->queue = adapter->vfs_allocated_count; + mac_table->state = IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE; + + igb_rar_set_index(adapter, 0); +} + +/* If the filter to be added and an already existing filter express + * the same address and address type, it should be possible to only + * override the other configurations, for example the queue to steer + * traffic. + */ +static bool igb_mac_entry_can_be_used(const struct igb_mac_addr *entry, + const u8 *addr, const u8 flags) +{ + if (!(entry->state & IGB_MAC_STATE_IN_USE)) + return true; + + if ((entry->state & IGB_MAC_STATE_SRC_ADDR) != + (flags & IGB_MAC_STATE_SRC_ADDR)) + return false; + + if (!ether_addr_equal(addr, entry->addr)) + return false; + + return true; +} + +/* Add a MAC filter for 'addr' directing matching traffic to 'queue', + * 'flags' is used to indicate what kind of match is made, match is by + * default for the destination address, if matching by source address + * is desired the flag IGB_MAC_STATE_SRC_ADDR can be used. + */ +static int igb_add_mac_filter_flags(struct igb_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) +{ + struct e1000_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count - + adapter->vfs_allocated_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for the first empty entry in the MAC table. + * Do not touch entries at the end of the table reserved for the VF MAC + * addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!igb_mac_entry_can_be_used(&adapter->mac_table[i], + addr, flags)) + continue; + + ether_addr_copy(adapter->mac_table[i].addr, addr); + adapter->mac_table[i].queue = queue; + adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE | flags; + + igb_rar_set_index(adapter, i); + return i; + } + + return -ENOSPC; +} + +static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, + const u8 queue) +{ + return igb_add_mac_filter_flags(adapter, addr, queue, 0); +} + +/* Remove a MAC filter for 'addr' directing matching traffic to + * 'queue', 'flags' is used to indicate what kind of match need to be + * removed, match is by default for the destination address, if + * matching by source address is to be removed the flag + * IGB_MAC_STATE_SRC_ADDR can be used. + */ +static int igb_del_mac_filter_flags(struct igb_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) +{ + struct e1000_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count - + adapter->vfs_allocated_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for matching entry in the MAC table based on given address + * and queue. Do not touch entries at the end of the table reserved + * for the VF MAC addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!(adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)) + continue; + if ((adapter->mac_table[i].state & flags) != flags) + continue; + if (adapter->mac_table[i].queue != queue) + continue; + if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) + continue; + + /* When a filter for the default address is "deleted", + * we return it to its initial configuration + */ + if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) { + adapter->mac_table[i].state = + IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE; + adapter->mac_table[i].queue = + adapter->vfs_allocated_count; + } else { + adapter->mac_table[i].state = 0; + adapter->mac_table[i].queue = 0; + eth_zero_addr(adapter->mac_table[i].addr); + } + + igb_rar_set_index(adapter, i); + return 0; + } + + return -ENOENT; +} + +static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, + const u8 queue) +{ + return igb_del_mac_filter_flags(adapter, addr, queue, 0); +} + +int igb_add_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + struct e1000_hw *hw = &adapter->hw; + + /* In theory, this should be supported on 82575 as well, but + * that part wasn't easily accessible during development. + */ + if (hw->mac.type != e1000_i210) + return -EOPNOTSUPP; + + return igb_add_mac_filter_flags(adapter, addr, queue, + IGB_MAC_STATE_QUEUE_STEERING | flags); +} + +int igb_del_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + return igb_del_mac_filter_flags(adapter, addr, queue, + IGB_MAC_STATE_QUEUE_STEERING | flags); +} + +static int igb_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + int ret; + + ret = igb_add_mac_filter(adapter, addr, adapter->vfs_allocated_count); + + return min_t(int, ret, 0); +} + +static int igb_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + igb_del_mac_filter(adapter, addr, adapter->vfs_allocated_count); + + return 0; +} + +static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, + const u32 info, const u8 *addr) +{ + struct pci_dev *pdev = adapter->pdev; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + struct vf_mac_filter *entry; + bool found = false; + int ret = 0; + + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { + dev_warn(&pdev->dev, + "VF %d requested MAC filter but is administratively denied\n", + vf); + return -EINVAL; + } + if (!is_valid_ether_addr(addr)) { + dev_warn(&pdev->dev, + "VF %d attempted to set invalid MAC filter\n", + vf); + return -EINVAL; + } + + switch (info) { + case E1000_VF_MAC_FILTER_CLR: + /* remove all unicast MAC filters related to the current VF */ + list_for_each_entry(entry, &adapter->vf_macs.l, l) { + if (entry->vf == vf) { + entry->vf = -1; + entry->free = true; + igb_del_mac_filter(adapter, entry->vf_mac, vf); + } + } + break; + case E1000_VF_MAC_FILTER_ADD: + /* try to find empty slot in the list */ + list_for_each_entry(entry, &adapter->vf_macs.l, l) { + if (entry->free) { + found = true; + break; + } + } + + if (found) { + entry->free = false; + entry->vf = vf; + ether_addr_copy(entry->vf_mac, addr); + + ret = igb_add_mac_filter(adapter, addr, vf); + ret = min_t(int, ret, 0); + } else { + ret = -ENOSPC; + } + + if (ret == -ENOSPC) + dev_warn(&pdev->dev, + "VF %d has requested MAC filter but there is no space for it\n", + vf); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) { + struct pci_dev *pdev = adapter->pdev; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + u32 info = msg[0] & E1000_VT_MSGINFO_MASK; + /* The VF MAC Address is stored in a packed array of bytes * starting at the second 32 bit word of the msg array */ - unsigned char *addr = (char *)&msg[1]; - int err = -1; + unsigned char *addr = (unsigned char *)&msg[1]; + int ret = 0; - if (is_valid_ether_addr(addr)) - err = igb_set_vf_mac(adapter, vf, addr); + if (!info) { + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { + dev_warn(&pdev->dev, + "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n", + vf); + return -EINVAL; + } - return err; + if (!is_valid_ether_addr(addr)) { + dev_warn(&pdev->dev, + "VF %d attempted to set invalid MAC\n", + vf); + return -EINVAL; + } + + ret = igb_set_vf_mac(adapter, vf, addr); + } else { + ret = igb_set_vf_mac_filter(adapter, vf, info, addr); + } + + return ret; } static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) @@ -5699,45 +8046,40 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) struct vf_data_storage *vf_data = &adapter->vf_data[vf]; s32 retval; - retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf); + retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf, false); if (retval) { /* if receive failed revoke VF CTS stats and restart init */ dev_err(&pdev->dev, "Error receiving message from VF\n"); vf_data->flags &= ~IGB_VF_FLAG_CTS; if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) - return; + goto unlock; goto out; } /* this is a message we already processed, do nothing */ if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) - return; + goto unlock; /* until the vf completes a reset it should not be * allowed to start any configuration. */ if (msgbuf[0] == E1000_VF_RESET) { + /* unlocks mailbox */ igb_vf_reset_msg(adapter, vf); return; } if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) - return; + goto unlock; retval = -1; goto out; } switch ((msgbuf[0] & 0xFFFF)) { case E1000_VF_SET_MAC_ADDR: - retval = -EINVAL; - if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC)) - retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); - else - dev_warn(&pdev->dev, - "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n", - vf); + retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); break; case E1000_VF_SET_PROMISC: retval = igb_set_vf_promisc(adapter, msgbuf, vf); @@ -5755,7 +8097,7 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n", vf); else - retval = igb_set_vf_vlan(adapter, msgbuf, vf); + retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf); break; default: dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); @@ -5771,14 +8113,21 @@ out: else msgbuf[0] |= E1000_VT_MSGTYPE_ACK; + /* unlocks mailbox */ igb_write_mbx(hw, msgbuf, 1, vf); + return; + +unlock: + igb_unlock_mbx(hw, vf); } static void igb_msg_task(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { /* process any reset requests */ if (!igb_check_for_rst(hw, vf)) @@ -5792,11 +8141,13 @@ static void igb_msg_task(struct igb_adapter *adapter) if (!igb_check_for_ack(hw, vf)) igb_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } /** * igb_set_uta - Set unicast filter table address * @adapter: board private structure + * @set: boolean indicating if we are setting or clearing bits * * The unicast table address is a register array of 32-bit registers. * The table is meant to be used in a way similar to how the MTA is used @@ -5804,21 +8155,18 @@ static void igb_msg_task(struct igb_adapter *adapter) * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous * enable bit to allow vlan tag stripping when promiscuous mode is enabled **/ -static void igb_set_uta(struct igb_adapter *adapter) +static void igb_set_uta(struct igb_adapter *adapter, bool set) { struct e1000_hw *hw = &adapter->hw; + u32 uta = set ? ~0 : 0; int i; - /* The UTA table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return; - /* we only need to do this if VMDq is enabled */ if (!adapter->vfs_allocated_count) return; - for (i = 0; i < hw->mac.uta_reg_count; i++) - array_wr32(E1000_UTA, i, ~0); + for (i = hw->mac.uta_reg_count; i--;) + array_wr32(E1000_UTA, i, uta); } /** @@ -5850,16 +8198,8 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); napi_schedule(&q_vector->napi); @@ -5904,16 +8244,8 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); napi_schedule(&q_vector->napi); @@ -5934,7 +8266,7 @@ static void igb_ring_irq_enable(struct igb_q_vector *q_vector) } if (!test_bit(__IGB_DOWN, &adapter->state)) { - if (adapter->msix_entries) + if (adapter->flags & IGB_FLAG_HAS_MSIX) wr32(E1000_EIMS, q_vector->eims_value); else igb_irq_enable(adapter); @@ -5951,44 +8283,64 @@ static int igb_poll(struct napi_struct *napi, int budget) struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi); + struct xsk_buff_pool *xsk_pool; bool clean_complete = true; + int work_done = 0; #ifdef CONFIG_IGB_DCA if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) igb_update_dca(q_vector); #endif if (q_vector->tx.ring) - clean_complete = igb_clean_tx_irq(q_vector); + clean_complete = igb_clean_tx_irq(q_vector, budget); - if (q_vector->rx.ring) - clean_complete &= igb_clean_rx_irq(q_vector, budget); + if (q_vector->rx.ring) { + int cleaned; + + xsk_pool = READ_ONCE(q_vector->rx.ring->xsk_pool); + cleaned = xsk_pool ? + igb_clean_rx_irq_zc(q_vector, xsk_pool, budget) : + igb_clean_rx_irq(q_vector, budget); + + work_done += cleaned; + if (cleaned >= budget) + clean_complete = false; + } /* If all work not completed, return budget and keep polling */ if (!clean_complete) return budget; - /* If not enough Rx work done, exit the polling mode */ - napi_complete(napi); - igb_ring_irq_enable(q_vector); + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + igb_ring_irq_enable(q_vector); - return 0; + return work_done; } /** * igb_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info + * @napi_budget: Used to determine if we are in netpoll * * returns true if ring is completely cleaned **/ -static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) +static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget) { - struct igb_adapter *adapter = q_vector->adapter; - struct igb_ring *tx_ring = q_vector->tx.ring; - struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; + struct igb_adapter *adapter = q_vector->adapter; unsigned int budget = q_vector->tx.work_limit; + struct igb_ring *tx_ring = q_vector->tx.ring; unsigned int i = tx_ring->next_to_clean; + union e1000_adv_tx_desc *tx_desc; + struct igb_tx_buffer *tx_buffer; + struct xsk_buff_pool *xsk_pool; + int cpu = smp_processor_id(); + bool xsk_xmit_done = true; + struct netdev_queue *nq; + u32 xsk_frames = 0; if (test_bit(__IGB_DOWN, &adapter->state)) return true; @@ -6005,7 +8357,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) @@ -6019,7 +8371,14 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) total_packets += tx_buffer->gso_segs; /* free the skb */ - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->type == IGB_TYPE_SKB) { + napi_consume_skb(tx_buffer->skb, napi_budget); + } else if (tx_buffer->type == IGB_TYPE_XDP) { + xdp_return_frame(tx_buffer->xdpf); + } else if (tx_buffer->type == IGB_TYPE_XSK) { + xsk_frames++; + goto skip_for_xsk; + } /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -6028,7 +8387,6 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) DMA_TO_DEVICE); /* clear tx_buffer data */ - tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); /* clear last DMA location and unmap remaining buffers */ @@ -6052,6 +8410,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } } +skip_for_xsk: /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; tx_desc++; @@ -6080,6 +8439,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) q_vector->tx.total_bytes += total_bytes; q_vector->tx.total_packets += total_packets; + xsk_pool = READ_ONCE(tx_ring->xsk_pool); + if (xsk_pool) { + if (xsk_frames) + xsk_tx_completed(xsk_pool, xsk_frames); + if (xsk_uses_need_wakeup(xsk_pool)) + xsk_set_tx_need_wakeup(xsk_pool); + + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + xsk_xmit_done = igb_xmit_zc(tx_ring, xsk_pool); + __netif_tx_unlock(nq); + } + if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { struct e1000_hw *hw = &adapter->hw; @@ -6142,7 +8516,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } } - return !!budget; + return !!budget && xsk_xmit_done; } /** @@ -6164,48 +8538,47 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, nta++; rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - /* transfer page from old buffer to new buffer */ - memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer)); - - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, - old_buff->page_offset, - IGB_RX_BUFSZ, - DMA_FROM_DEVICE); + /* Transfer page from old buffer to new buffer. + * Move each member individually to avoid possible store + * forwarding stalls. + */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; } static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, - struct page *page, - unsigned int truesize) + int rx_buf_pgcnt) { - /* avoid re-using remote pages */ - if (unlikely(page_to_nid(page) != numa_node_id())) + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; + + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) return false; - - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= IGB_RX_BUFSZ; - - /* since we are the only owner of the page and we need to - * increment it, just set the value to 2 in order to avoid - * an unnecessary locked operation - */ - atomic_set(&page->_count, 2); #else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; +#define IGB_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGB_RXBUFFER_2048) - if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) + if (rx_buffer->page_offset > IGB_LAST_OFFSET) return false; - - /* bump ref count on page before it is given to the stack */ - get_page(page); #endif + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); + rx_buffer->pagecnt_bias = USHRT_MAX; + } + return true; } @@ -6213,114 +8586,197 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, * igb_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into + * @size: size of buffer to be added * * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. **/ -static bool igb_add_rx_frag(struct igb_ring *rx_ring, +static void igb_add_rx_frag(struct igb_ring *rx_ring, struct igb_rx_buffer *rx_buffer, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); #if (PAGE_SIZE < 8192) - unsigned int truesize = IGB_RX_BUFSZ; + unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IGB_SKB_PAD + size) : + SKB_DATA_ALIGN(size); #endif + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif +} - if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; +static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + struct xdp_buff *xdp, + ktime_t timestamp) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); +#endif + unsigned int size = xdp->data_end - xdp->data; + unsigned int headlen; + struct sk_buff *skb; - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - va += IGB_TS_HDR_LEN; - size -= IGB_TS_HDR_LEN; - } + /* prefetch first cache line of first page */ + net_prefetch(xdp->data); - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + /* allocate a skb to store the frags */ + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN); + if (unlikely(!skb)) + return NULL; - /* we can reuse buffer as-is, just make sure it is local */ - if (likely(page_to_nid(page) == numa_node_id())) - return true; + if (timestamp) + skb_hwtstamps(skb)->hwtstamp = timestamp; - /* this page cannot be reused so discard it */ - put_page(page); - return false; - } + /* Determine available headroom for copy */ + headlen = size; + if (headlen > IGB_RX_HDR_LEN) + headlen = eth_get_headlen(skb->dev, xdp->data, IGB_RX_HDR_LEN); - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long))); + + /* update all of the pointers */ + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + (xdp->data + headlen) - page_address(rx_buffer->page), + size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + } else { + rx_buffer->pagecnt_bias++; + } - return igb_can_reuse_rx_page(rx_buffer, page, truesize); + return skb; } -static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) +static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + struct xdp_buff *xdp, + ktime_t timestamp) { - struct igb_rx_buffer *rx_buffer; - struct page *page; +#if (PAGE_SIZE < 8192) + unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); +#endif + unsigned int metasize = xdp->data - xdp->data_meta; + struct sk_buff *skb; - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + /* prefetch first cache line of first page */ + net_prefetch(xdp->data_meta); + + /* build an skb around the page buffer */ + skb = napi_build_skb(xdp->data_hard_start, truesize); + if (unlikely(!skb)) + return NULL; - page = rx_buffer->page; - prefetchw(page); + /* update pointers within the skb to store the data */ + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; + if (metasize) + skb_metadata_set(skb, metasize); - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); + if (timestamp) + skb_hwtstamps(skb)->hwtstamp = timestamp; + + /* update buffer offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; #endif - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IGB_RX_HDR_LEN); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_failed++; - return NULL; - } + return skb; +} - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - } +static int igb_run_xdp(struct igb_adapter *adapter, struct igb_ring *rx_ring, + struct xdp_buff *xdp) +{ + int err, result = IGB_XDP_PASS; + struct bpf_prog *xdp_prog; + u32 act; - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IGB_RX_BUFSZ, - DMA_FROM_DEVICE); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); - /* pull page into skb */ - if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { - /* hand second half of page back to the ring */ - igb_reuse_rx_page(rx_ring, rx_buffer); - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + if (!xdp_prog) + goto xdp_out; + + prefetchw(xdp->data_hard_start); /* xdp_frame write */ + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + result = igb_xdp_xmit_back(adapter, xdp); + if (result == IGB_XDP_CONSUMED) + goto out_failure; + break; + case XDP_REDIRECT: + err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); + if (err) + goto out_failure; + result = IGB_XDP_REDIR; + break; + default: + bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: +out_failure: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + result = IGB_XDP_CONSUMED; + break; } +xdp_out: + return result; +} - /* clear contents of rx_buffer */ - rx_buffer->page = NULL; +static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring, + unsigned int size) +{ + unsigned int truesize; - return skb; +#if (PAGE_SIZE < 8192) + truesize = igb_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ +#else + truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IGB_SKB_PAD + size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : + SKB_DATA_ALIGN(size); +#endif + return truesize; +} + +static void igb_rx_buffer_flip(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + unsigned int size) +{ + unsigned int truesize = igb_rx_frame_truesize(rx_ring, size); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif } static inline void igb_rx_checksum(struct igb_ring *ring, @@ -6368,14 +8824,15 @@ static inline void igb_rx_hash(struct igb_ring *ring, struct sk_buff *skb) { if (ring->netdev->features & NETIF_F_RXHASH) - skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + skb_set_hash(skb, + le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + PKT_HASH_TYPE_L3); } /** * igb_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: current socket buffer containing buffer in progress * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the @@ -6400,169 +8857,6 @@ static bool igb_is_non_eop(struct igb_ring *rx_ring, } /** - * igb_get_headlen - determine size of header for LRO/GRO - * @data: pointer to the start of the headers - * @max_len: total length of section to find headers in - * - * This function is meant to determine the length of headers that will - * be recognized by hardware for LRO, and GRO offloads. The main - * motivation of doing this is to only perform one pull for IPv4 TCP - * packets so that we can do basic things like calculating the gso_size - * based on the average data per packet. - **/ -static unsigned int igb_get_headlen(unsigned char *data, - unsigned int max_len) -{ - union { - unsigned char *network; - /* l2 headers */ - struct ethhdr *eth; - struct vlan_hdr *vlan; - /* l3 headers */ - struct iphdr *ipv4; - struct ipv6hdr *ipv6; - } hdr; - __be16 protocol; - u8 nexthdr = 0; /* default to not TCP */ - u8 hlen; - - /* this should never happen, but better safe than sorry */ - if (max_len < ETH_HLEN) - return max_len; - - /* initialize network frame pointer */ - hdr.network = data; - - /* set first protocol and move network header forward */ - protocol = hdr.eth->h_proto; - hdr.network += ETH_HLEN; - - /* handle any vlan tag if present */ - if (protocol == __constant_htons(ETH_P_8021Q)) { - if ((hdr.network - data) > (max_len - VLAN_HLEN)) - return max_len; - - protocol = hdr.vlan->h_vlan_encapsulated_proto; - hdr.network += VLAN_HLEN; - } - - /* handle L3 protocols */ - if (protocol == __constant_htons(ETH_P_IP)) { - if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) - return max_len; - - /* access ihl as a u8 to avoid unaligned access on ia64 */ - hlen = (hdr.network[0] & 0x0F) << 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct iphdr)) - return hdr.network - data; - - /* record next protocol if header is present */ - if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) - nexthdr = hdr.ipv4->protocol; - } else if (protocol == __constant_htons(ETH_P_IPV6)) { - if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) - return max_len; - - /* record next protocol */ - nexthdr = hdr.ipv6->nexthdr; - hlen = sizeof(struct ipv6hdr); - } else { - return hdr.network - data; - } - - /* relocate pointer to start of L4 header */ - hdr.network += hlen; - - /* finally sort out TCP */ - if (nexthdr == IPPROTO_TCP) { - if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) - return max_len; - - /* access doff as a u8 to avoid unaligned access on ia64 */ - hlen = (hdr.network[12] & 0xF0) >> 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct tcphdr)) - return hdr.network - data; - - hdr.network += hlen; - } else if (nexthdr == IPPROTO_UDP) { - if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) - return max_len; - - hdr.network += sizeof(struct udphdr); - } - - /* If everything has gone correctly hdr.network should be the - * data section of the packet and will be the end of the header. - * If not then it probably represents the end of the last recognized - * header. - */ - if ((hdr.network - data) < max_len) - return hdr.network - data; - else - return max_len; -} - -/** - * igb_pull_tail - igb specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being adjusted - * - * This function is an igb specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void igb_pull_tail(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - /* retrieve timestamp from buffer */ - igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - - /* update pointers to remove timestamp header */ - skb_frag_size_sub(frag, IGB_TS_HDR_LEN); - frag->page_offset += IGB_TS_HDR_LEN; - skb->data_len -= IGB_TS_HDR_LEN; - skb->len -= IGB_TS_HDR_LEN; - - /* move va to start of packet data */ - va += IGB_TS_HDR_LEN; - } - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * igb_cleanup_headers - Correct corrupted or empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor @@ -6589,18 +8883,9 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring, } } - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - igb_pull_tail(rx_ring, rx_desc, skb); - - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } @@ -6615,9 +8900,9 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring, * order to populate the hash, checksum, VLAN, timestamp, protocol, and * other fields within the skb. **/ -static void igb_process_skb_fields(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) +void igb_process_skb_fields(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) { struct net_device *dev = rx_ring->netdev; @@ -6625,14 +8910,17 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, igb_rx_checksum(rx_ring, rx_desc, skb); - igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) && + !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) + igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb); if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { u16 vid; + if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) - vid = be16_to_cpu(rx_desc->wb.upper.vlan); + vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); else vid = le16_to_cpu(rx_desc->wb.upper.vlan); @@ -6644,15 +8932,116 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, skb->protocol = eth_type_trans(skb, rx_ring->netdev); } -static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) +static unsigned int igb_rx_offset(struct igb_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0; +} + +static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, + const unsigned int size, int *rx_buf_pgcnt) +{ + struct igb_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buf_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif + prefetchw(rx_buffer->page); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); + + rx_buffer->pagecnt_bias--; + + return rx_buffer; +} + +static void igb_put_rx_buffer(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt) +{ + if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) { + /* hand second half of page back to the ring */ + igb_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + igb_rx_pg_size(rx_ring), DMA_FROM_DEVICE, + IGB_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; +} + +void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status) +{ + int cpu = smp_processor_id(); + struct netdev_queue *nq; + + if (status & IGB_XDP_REDIR) + xdp_do_flush(); + + if (status & IGB_XDP_TX) { + struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter); + + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); + igb_xdp_ring_update_tail(tx_ring); + __netif_tx_unlock(nq); + } +} + +void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets, + unsigned int bytes) +{ + struct igb_ring *ring = q_vector->rx.ring; + + u64_stats_update_begin(&ring->rx_syncp); + ring->rx_stats.packets += packets; + ring->rx_stats.bytes += bytes; + u64_stats_update_end(&ring->rx_syncp); + + q_vector->rx.total_packets += packets; + q_vector->rx.total_bytes += bytes; +} + +static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) { - struct igb_ring *rx_ring = q_vector->rx.ring; - struct sk_buff *skb = rx_ring->skb; unsigned int total_bytes = 0, total_packets = 0; + struct igb_adapter *adapter = q_vector->adapter; + struct igb_ring *rx_ring = q_vector->rx.ring; u16 cleaned_count = igb_desc_unused(rx_ring); + struct sk_buff *skb = rx_ring->skb; + unsigned int xdp_xmit = 0; + struct xdp_buff xdp; + u32 frame_sz = 0; + int rx_buf_pgcnt; + int xdp_res = 0; - do { + /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ +#if (PAGE_SIZE < 8192) + frame_sz = igb_rx_frame_truesize(rx_ring, 0); +#endif + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + + while (likely(total_packets < budget)) { union e1000_adv_rx_desc *rx_desc; + struct igb_rx_buffer *rx_buffer; + ktime_t timestamp = 0; + int pkt_offset = 0; + unsigned int size; + void *pktbuf; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IGB_RX_BUFFER_WRITE) { @@ -6661,23 +9050,71 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) } rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); - - if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + size = le16_to_cpu(rx_desc->wb.upper.length); + if (!size) break; /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the - * RXD_STAT_DD bit is set + * descriptor has been written back */ - rmb(); + dma_rmb(); + + rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt); + pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; + + /* pull rx packet timestamp if available and valid */ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + int ts_hdr_len; + + ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector, + pktbuf, ×tamp); + + pkt_offset += ts_hdr_len; + size -= ts_hdr_len; + } /* retrieve a buffer from the ring */ - skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); + if (!skb) { + unsigned char *hard_start = pktbuf - igb_rx_offset(rx_ring); + unsigned int offset = pkt_offset + igb_rx_offset(rx_ring); + + xdp_prepare_buff(&xdp, hard_start, offset, size, true); + xdp_buff_clear_frags_flag(&xdp); +#if (PAGE_SIZE > 4096) + /* At larger PAGE_SIZE, frame_sz depend on len size */ + xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size); +#endif + xdp_res = igb_run_xdp(adapter, rx_ring, &xdp); + } + + if (xdp_res) { + if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) { + xdp_xmit |= xdp_res; + igb_rx_buffer_flip(rx_ring, rx_buffer, size); + } else { + rx_buffer->pagecnt_bias++; + } + total_packets++; + total_bytes += size; + } else if (skb) + igb_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = igb_build_skb(rx_ring, rx_buffer, &xdp, + timestamp); + else + skb = igb_construct_skb(rx_ring, rx_buffer, + &xdp, timestamp); /* exit if we failed to retrieve a buffer */ - if (!skb) + if (!xdp_res && !skb) { + rx_ring->rx_stats.alloc_failed++; + rx_buffer->pagecnt_bias++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); break; + } + igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -6685,7 +9122,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) continue; /* verify the packet layout is correct */ - if (igb_cleanup_headers(rx_ring, rx_desc, skb)) { + if (xdp_res || igb_cleanup_headers(rx_ring, rx_desc, skb)) { skb = NULL; continue; } @@ -6703,22 +9140,20 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) /* update budget accounting */ total_packets++; - } while (likely(total_packets < budget)); + } /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; - u64_stats_update_begin(&rx_ring->rx_syncp); - rx_ring->rx_stats.packets += total_packets; - rx_ring->rx_stats.bytes += total_bytes; - u64_stats_update_end(&rx_ring->rx_syncp); - q_vector->rx.total_packets += total_packets; - q_vector->rx.total_bytes += total_bytes; + if (xdp_xmit) + igb_finalize_xdp(adapter, xdp_xmit); + + igb_update_rx_stats(q_vector, total_packets, total_bytes); if (cleaned_count) igb_alloc_rx_buffers(rx_ring, cleaned_count); - return (total_packets < budget); + return total_packets; } static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, @@ -6732,41 +9167,50 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, return true; /* alloc new page for storage */ - page = __skb_alloc_page(GFP_ATOMIC | __GFP_COLD, NULL); + page = dev_alloc_pages(igb_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + igb_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IGB_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); + __free_pages(page, igb_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = igb_rx_offset(rx_ring); + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; return true; } /** - * igb_alloc_rx_buffers - Replace used receive buffers; packet split - * @adapter: address of board private structure + * igb_alloc_rx_buffers - Replace used receive buffers + * @rx_ring: rx descriptor ring to allocate new receive buffers + * @cleaned_count: count of buffers to allocate **/ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) { union e1000_adv_rx_desc *rx_desc; struct igb_rx_buffer *bi; u16 i = rx_ring->next_to_use; + u16 bufsz; /* nothing to do */ if (!cleaned_count) @@ -6776,10 +9220,17 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; + bufsz = igb_rx_bufsz(rx_ring); + do { if (!igb_alloc_mapped_page(rx_ring, bi)) break; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, bufsz, + DMA_FROM_DEVICE); + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ @@ -6794,8 +9245,8 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; } - /* clear the hdr_addr for the next_to_use descriptor */ - rx_desc->read.hdr_addr = 0; + /* clear the length for the next_to_use descriptor */ + rx_desc->wb.upper.length = 0; cleaned_count--; } while (cleaned_count); @@ -6814,16 +9265,16 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) * applicable for weak-ordered memory model archs, * such as IA-64). */ - wmb(); + dma_wmb(); writel(i, rx_ring->tail); } } /** * igb_mii_ioctl - - * @netdev: - * @ifreq: - * @cmd: + * @netdev: pointer to netdev struct + * @ifr: interface structure + * @cmd: ioctl command to execute **/ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { @@ -6839,10 +9290,14 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) break; case SIOCGMIIREG: if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, - &data->val_out)) + &data->val_out)) return -EIO; break; case SIOCSMIIREG: + if (igb_write_phy_reg(&adapter->hw, data->reg_num & 0x1F, + data->val_in)) + return -EIO; + break; default: return -EOPNOTSUPP; } @@ -6851,9 +9306,9 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) /** * igb_ioctl - - * @netdev: - * @ifreq: - * @cmd: + * @netdev: pointer to netdev struct + * @ifr: interface structure + * @cmd: ioctl command to execute **/ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { @@ -6862,13 +9317,25 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); - case SIOCSHWTSTAMP: - return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); default: return -EOPNOTSUPP; } } +void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, value); +} + +void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, *value); +} + s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) { struct igb_adapter *adapter = hw->back; @@ -6913,7 +9380,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) wr32(E1000_CTRL, ctrl); } - igb_rlpml_set(adapter); + igb_set_vf_vlan_strip(adapter, adapter->vfs_allocated_count, enable); } static int igb_vlan_rx_add_vid(struct net_device *netdev, @@ -6923,11 +9390,9 @@ static int igb_vlan_rx_add_vid(struct net_device *netdev, struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - /* attempt to add filter to vlvf array */ - igb_vlvf_set(adapter, vid, true, pf_id); - /* add the filter since PF can receive vlans w/o entry in vlvf */ - igb_vfta_set(hw, vid, true); + if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, true, !!vid); set_bit(vid, adapter->active_vlans); @@ -6938,16 +9403,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - s32 err; - - /* remove vlan from VLVF table array */ - err = igb_vlvf_set(adapter, vid, false, pf_id); + struct e1000_hw *hw = &adapter->hw; - /* if vid was not present in VLVF just remove it from table */ - if (err) - igb_vfta_set(hw, vid, false); + /* remove VID from filter table */ + if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, false, true); clear_bit(vid, adapter->active_vlans); @@ -6956,11 +9417,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, static void igb_restore_vlan(struct igb_adapter *adapter) { - u16 vid; + u16 vid = 1; igb_vlan_mode(adapter->netdev, adapter->netdev->features); + igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); - for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) + for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID) igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } @@ -7031,22 +9493,18 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl, status; u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; -#ifdef CONFIG_PM - int retval = 0; -#endif + bool wake; + rtnl_lock(); netif_device_detach(netdev); if (netif_running(netdev)) __igb_close(netdev, true); - igb_clear_interrupt_scheme(adapter); + igb_ptp_suspend(adapter); -#ifdef CONFIG_PM - retval = pci_save_state(pdev); - if (retval) - return retval; -#endif + igb_clear_interrupt_scheme(adapter); + rtnl_unlock(); status = rd32(E1000_STATUS); if (status & E1000_STATUS_LU) @@ -7064,10 +9522,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, } ctrl = rd32(E1000_CTRL); - /* advertise wake from D3Cold */ - #define E1000_CTRL_ADVD3WUC 0x00100000 - /* phy power management enable */ - #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ctrl |= E1000_CTRL_ADVD3WUC; wr32(E1000_CTRL, ctrl); @@ -7081,12 +9535,15 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, wr32(E1000_WUFC, 0); } - *enable_wake = wufc || adapter->en_mng_pt; - if (!*enable_wake) + wake = wufc || adapter->en_mng_pt; + if (!wake) igb_power_down_link(adapter); else igb_power_up_link(adapter); + if (enable_wake) + *enable_wake = wake; + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ @@ -7097,41 +9554,54 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, return 0; } -#ifdef CONFIG_PM -#ifdef CONFIG_PM_SLEEP -static int igb_suspend(struct device *dev) +static void igb_deliver_wake_packet(struct net_device *netdev) { - int retval; - bool wake; - struct pci_dev *pdev = to_pci_dev(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct sk_buff *skb; + u32 wupl; - retval = __igb_shutdown(pdev, &wake, 0); - if (retval) - return retval; + wupl = rd32(E1000_WUPL) & E1000_WUPL_MASK; - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } + /* WUPM stores only the first 128 bytes of the wake packet. + * Read the packet only if we have the whole thing. + */ + if ((wupl == 0) || (wupl > E1000_WUPM_BYTES)) + return; - return 0; + skb = netdev_alloc_skb_ip_align(netdev, E1000_WUPM_BYTES); + if (!skb) + return; + + skb_put(skb, wupl); + + /* Ensure reads are 32-bit aligned */ + wupl = roundup(wupl, 4); + + memcpy_fromio(skb->data, hw->hw_addr + E1000_WUPM_REG(0), wupl); + + skb->protocol = eth_type_trans(skb, netdev); + netif_rx(skb); } -#endif /* CONFIG_PM_SLEEP */ -static int igb_resume(struct device *dev) +static int igb_suspend(struct device *dev) +{ + return __igb_shutdown(to_pci_dev(dev), NULL, 0); +} + +static int __igb_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 err; + u32 err, val; pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - pci_save_state(pdev); + if (!pci_device_is_present(pdev)) + return -ENODEV; err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, @@ -7155,25 +9625,33 @@ static int igb_resume(struct device *dev) */ igb_get_hw_control(adapter); + val = rd32(E1000_WUS); + if (val & WAKE_PKT_WUS) + igb_deliver_wake_packet(netdev); + wr32(E1000_WUS, ~0); - if (netdev->flags & IFF_UP) { + if (!rpm) rtnl_lock(); + if (!err && netif_running(netdev)) err = __igb_open(netdev, true); + + if (!err) + netif_device_attach(netdev); + if (!rpm) rtnl_unlock(); - if (err) - return err; - } - netif_device_attach(netdev); - return 0; + return err; +} + +static int igb_resume(struct device *dev) +{ + return __igb_resume(dev, false); } -#ifdef CONFIG_PM_RUNTIME static int igb_runtime_idle(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct net_device *netdev = pci_get_drvdata(pdev); + struct net_device *netdev = dev_get_drvdata(dev); struct igb_adapter *adapter = netdev_priv(netdev); if (!igb_has_link(adapter)) @@ -7184,30 +9662,13 @@ static int igb_runtime_idle(struct device *dev) static int igb_runtime_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - int retval; - bool wake; - - retval = __igb_shutdown(pdev, &wake, 1); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; + return __igb_shutdown(to_pci_dev(dev), NULL, 1); } static int igb_runtime_resume(struct device *dev) { - return igb_resume(dev); + return __igb_resume(dev, true); } -#endif /* CONFIG_PM_RUNTIME */ -#endif static void igb_shutdown(struct pci_dev *pdev) { @@ -7221,94 +9682,20 @@ static void igb_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PCI_IOV -static int igb_sriov_reinit(struct pci_dev *dev) -{ - struct net_device *netdev = pci_get_drvdata(dev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; - - rtnl_lock(); - - if (netif_running(netdev)) - igb_close(netdev); - - igb_clear_interrupt_scheme(adapter); - - igb_init_queue_configuration(adapter); - - if (igb_init_interrupt_scheme(adapter, true)) { - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); - return -ENOMEM; - } - - if (netif_running(netdev)) - igb_open(netdev); - - rtnl_unlock(); - - return 0; -} - -static int igb_pci_disable_sriov(struct pci_dev *dev) -{ - int err = igb_disable_sriov(dev); - - if (!err) - err = igb_sriov_reinit(dev); - - return err; -} - -static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs) -{ - int err = igb_enable_sriov(dev, num_vfs); - - if (err) - goto out; - - err = igb_sriov_reinit(dev); - if (!err) - return num_vfs; - -out: - return err; -} - -#endif static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs) { #ifdef CONFIG_PCI_IOV - if (num_vfs == 0) - return igb_pci_disable_sriov(dev); - else - return igb_pci_enable_sriov(dev, num_vfs); -#endif - return 0; -} - -#ifdef CONFIG_NET_POLL_CONTROLLER -/* Polling 'interrupt' - used by things like netconsole to send skbs - * without having to re-enable interrupts. It's not called while - * the interrupt routine is executing. - */ -static void igb_netpoll(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct igb_q_vector *q_vector; - int i; + int err; - for (i = 0; i < adapter->num_q_vectors; i++) { - q_vector = adapter->q_vector[i]; - if (adapter->msix_entries) - wr32(E1000_EIMC, q_vector->eims_value); - else - igb_irq_disable(adapter); - napi_schedule(&q_vector->napi); + if (num_vfs == 0) { + return igb_disable_sriov(dev, true); + } else { + err = igb_enable_sriov(dev, num_vfs, true); + return err ? err : num_vfs; } +#endif + return 0; } -#endif /* CONFIG_NET_POLL_CONTROLLER */ /** * igb_io_error_detected - called when PCI error is detected @@ -7324,16 +9711,24 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + if (state == pci_channel_io_normal) { + dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n"); + return PCI_ERS_RESULT_CAN_RECOVER; + } + netif_device_detach(netdev); if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; + rtnl_lock(); if (netif_running(netdev)) igb_down(adapter); + rtnl_unlock(); + pci_disable_device(pdev); - /* Request a slot slot reset. */ + /* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET; } @@ -7342,7 +9737,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. + * resembles the first-half of the __igb_resume routine. **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { @@ -7350,7 +9745,6 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; pci_ers_result_t result; - int err; if (pci_enable_device_mem(pdev)) { dev_err(&pdev->dev, @@ -7359,24 +9753,20 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) } else { pci_set_master(pdev); pci_restore_state(pdev); - pci_save_state(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + /* In case of PCI error, adapter lose its HW address + * so we should re-assign it here. + */ + hw->hw_addr = adapter->io_addr; + igb_reset(adapter); wr32(E1000_WUS, ~0); result = PCI_ERS_RESULT_RECOVERED; } - err = pci_cleanup_aer_uncorrect_error_status(pdev); - if (err) { - dev_err(&pdev->dev, - "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", - err); - /* non-fatal, continue */ - } - return result; } @@ -7386,19 +9776,28 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. + * second-half of the __igb_resume routine. */ static void igb_io_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + rtnl_lock(); if (netif_running(netdev)) { + if (!test_bit(__IGB_DOWN, &adapter->state)) { + dev_dbg(&pdev->dev, "Resuming from non-fatal error, do nothing.\n"); + rtnl_unlock(); + return; + } + if (igb_up(adapter)) { dev_err(&pdev->dev, "igb_up failed after reset\n"); + rtnl_unlock(); return; } } + rtnl_unlock(); netif_device_attach(netdev); @@ -7408,26 +9807,49 @@ static void igb_io_resume(struct pci_dev *pdev) igb_get_hw_control(adapter); } -static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, - u8 qsel) +/** + * igb_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table + * @adapter: Pointer to adapter structure + * @index: Index of the RAR entry which need to be synced with MAC table + **/ +static void igb_rar_set_index(struct igb_adapter *adapter, u32 index) { - u32 rar_low, rar_high; struct e1000_hw *hw = &adapter->hw; + u32 rar_low, rar_high; + u8 *addr = adapter->mac_table[index].addr; - /* HW expects these in little endian so we reverse the byte order - * from network order (big endian) to little endian + /* HW expects these to be in network order when they are plugged + * into the registers which are little endian. In order to guarantee + * that ordering we need to do an leXX_to_cpup here in order to be + * ready for the byteswap that occurs with writel */ - rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); - rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + rar_low = le32_to_cpup((__le32 *)(addr)); + rar_high = le16_to_cpup((__le16 *)(addr + 4)); /* Indicate to hardware the Address is Valid. */ - rar_high |= E1000_RAH_AV; - - if (hw->mac.type == e1000_82575) - rar_high |= E1000_RAH_POOL_1 * qsel; - else - rar_high |= E1000_RAH_POOL_1 << qsel; + if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) { + if (is_valid_ether_addr(addr)) + rar_high |= E1000_RAH_AV; + + if (adapter->mac_table[index].state & IGB_MAC_STATE_SRC_ADDR) + rar_high |= E1000_RAH_ASEL_SRC_ADDR; + + switch (hw->mac.type) { + case e1000_82575: + case e1000_i210: + if (adapter->mac_table[index].state & + IGB_MAC_STATE_QUEUE_STEERING) + rar_high |= E1000_RAH_QSEL_ENABLE; + + rar_high |= E1000_RAH_POOL_1 * + adapter->mac_table[index].queue; + break; + default: + rar_high |= E1000_RAH_POOL_1 << + adapter->mac_table[index].queue; + break; + } + } wr32(E1000_RAL(index), rar_low); wrfl(); @@ -7443,10 +9865,13 @@ static int igb_set_vf_mac(struct igb_adapter *adapter, * towards the first, as a result a collision should not be possible */ int rar_entry = hw->mac.rar_entry_count - (vf + 1); + unsigned char *vf_mac_addr = adapter->vf_data[vf].vf_mac_addresses; - memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); - - igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf); + ether_addr_copy(vf_mac_addr, mac_addr); + ether_addr_copy(adapter->mac_table[rar_entry].addr, mac_addr); + adapter->mac_table[rar_entry].queue = vf; + adapter->mac_table[rar_entry].state |= IGB_MAC_STATE_IN_USE; + igb_rar_set_index(adapter, rar_entry); return 0; } @@ -7454,17 +9879,36 @@ static int igb_set_vf_mac(struct igb_adapter *adapter, static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) { struct igb_adapter *adapter = netdev_priv(netdev); - if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + + /* Setting the VF MAC to 0 reverts the IGB_VF_FLAG_PF_SET_MAC + * flag and allows to overwrite the MAC via VF netdev. This + * is necessary to allow libvirt a way to restore the original + * MAC after unbinding vfio-pci and reloading igbvf after shutting + * down a VM. + */ + if (is_zero_ether_addr(mac)) { + adapter->vf_data[vf].flags &= ~IGB_VF_FLAG_PF_SET_MAC; + dev_info(&adapter->pdev->dev, + "remove administratively set MAC on VF %d\n", + vf); + } else if (is_valid_ether_addr(mac)) { + adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; + dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", + mac, vf); + dev_info(&adapter->pdev->dev, + "Reload the VF driver to make this change effective."); + /* Generate additional warning if PF is down */ + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF MAC address has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); + } + } else { return -EINVAL; - adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; - dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); - dev_info(&adapter->pdev->dev, - "Reload the VF driver to make this change effective."); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, - "The VF MAC address has been set, but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, - "Bring the PF device up before attempting to use the VF device.\n"); } return igb_set_vf_mac(adapter, vf, mac); } @@ -7491,12 +9935,11 @@ static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, /* Calculate the rate factor values to set */ rf_int = link_speed / tx_rate; rf_dec = (link_speed - (rf_int * tx_rate)); - rf_dec = (rf_dec * (1 << E1000_RTTBCNRC_RF_INT_SHIFT)) / + rf_dec = (rf_dec * BIT(E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; bcnrc_val = E1000_RTTBCNRC_RS_ENA; - bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) & - E1000_RTTBCNRC_RF_INT_MASK); + bcnrc_val |= FIELD_PREP(E1000_RTTBCNRC_RF_INT_MASK, rf_int); bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); } else { bcnrc_val = 0; @@ -7538,7 +9981,8 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter) } } -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -7547,15 +9991,19 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) if (hw->mac.type != e1000_82576) return -EOPNOTSUPP; + if (min_tx_rate) + return -EINVAL; + actual_link_speed = igb_link_mbps(adapter->link_speed); if ((vf >= adapter->vfs_allocated_count) || (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || - (tx_rate < 0) || (tx_rate > actual_link_speed)) + (max_tx_rate < 0) || + (max_tx_rate > actual_link_speed)) return -EINVAL; adapter->vf_rate_link_speed = actual_link_speed; - adapter->vf_data[vf].tx_rate = (u16)tx_rate; - igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + adapter->vf_data[vf].tx_rate = (u16)max_tx_rate; + igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed); return 0; } @@ -7576,15 +10024,31 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC; reg_val = rd32(reg_offset); if (setting) - reg_val |= ((1 << vf) | - (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + reg_val |= (BIT(vf) | + BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)); else - reg_val &= ~((1 << vf) | - (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + reg_val &= ~(BIT(vf) | + BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)); wr32(reg_offset, reg_val); adapter->vf_data[vf].spoofchk_enabled = setting; - return E1000_SUCCESS; + return 0; +} + +static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + if (adapter->vf_data[vf].trusted == setting) + return 0; + + adapter->vf_data[vf].trusted = setting; + + dev_info(&adapter->pdev->dev, "VF %u is %strusted\n", + vf, setting ? "" : "not "); + return 0; } static int igb_ndo_get_vf_config(struct net_device *netdev, @@ -7595,10 +10059,12 @@ static int igb_ndo_get_vf_config(struct net_device *netdev, return -EINVAL; ivi->vf = vf; memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); - ivi->tx_rate = adapter->vf_data[vf].tx_rate; + ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = adapter->vf_data[vf].pf_vlan; ivi->qos = adapter->vf_data[vf].pf_qos; ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; + ivi->trusted = adapter->vf_data[vf].trusted; return 0; } @@ -7620,11 +10086,13 @@ static void igb_vmm_control(struct igb_adapter *adapter) reg = rd32(E1000_DTXCTL); reg |= E1000_DTXCTL_VLAN_ADDED; wr32(E1000_DTXCTL, reg); + fallthrough; case e1000_82580: /* enable replication vlan tag stripping */ reg = rd32(E1000_RPLOLR); reg |= E1000_RPLOLR_STRVLAN; wr32(E1000_RPLOLR, reg); + fallthrough; case e1000_i350: /* none of the above registers are supported by i350 */ break; @@ -7646,11 +10114,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) struct e1000_hw *hw = &adapter->hw; u32 dmac_thr; u16 hwm; + u32 reg; if (hw->mac.type > e1000_82580) { if (adapter->flags & IGB_FLAG_DMAC) { - u32 reg; - /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); @@ -7658,25 +10125,19 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) * than the Rx threshold. Set hwm to PBA - max frame * size in 16B units, capping it at PBA - 6KB. */ - hwm = 64 * pba - adapter->max_frame_size / 16; - if (hwm < 64 * (pba - 6)) - hwm = 64 * (pba - 6); + hwm = 64 * (pba - 6); reg = rd32(E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; - reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) - & E1000_FCRTC_RTH_COAL_MASK); + reg |= FIELD_PREP(E1000_FCRTC_RTH_COAL_MASK, hwm); wr32(E1000_FCRTC, reg); /* Set the DMA Coalescing Rx threshold to PBA - 2 * max * frame size, capping it at PBA - 10KB. */ - dmac_thr = pba - adapter->max_frame_size / 512; - if (dmac_thr < pba - 10) - dmac_thr = pba - 10; + dmac_thr = pba - 10; reg = rd32(E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; - reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) - & E1000_DMACR_DMACTHR_MASK); + reg |= FIELD_PREP(E1000_DMACR_DMACTHR_MASK, dmac_thr); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); @@ -7687,7 +10148,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Disable BMC-to-OS Watchdog Enable */ if (hw->mac.type != e1000_i354) reg &= ~E1000_DMACR_DC_BMC2OSW_EN; - wr32(E1000_DMACR, reg); /* no lower threshold to disable @@ -7704,16 +10164,17 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); + } - /* make low power state decision controlled - * by DMA coal - */ + if (hw->mac.type >= e1000_i210 || + (adapter->flags & IGB_FLAG_DMAC)) { reg = rd32(E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; + reg |= E1000_PCIEMISC_LX_DECISION; wr32(E1000_PCIEMISC, reg); } /* endif adapter->dmac is not disabled */ } else if (hw->mac.type == e1000_82580) { u32 reg = rd32(E1000_PCIEMISC); + wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); wr32(E1000_DMACR, 0); } @@ -7742,8 +10203,7 @@ s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, swfw_mask = E1000_SWFW_PHY0_SM; - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) - != E1000_SUCCESS) + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) return E1000_ERR_SWFW_SYNC; status = i2c_smbus_read_byte_data(this_client, byte_offset); @@ -7753,7 +10213,7 @@ s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, return E1000_ERR_I2C; else { *data = status; - return E1000_SUCCESS; + return 0; } } @@ -7778,7 +10238,7 @@ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, if (!this_client) return E1000_ERR_I2C; - if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) return E1000_ERR_SWFW_SYNC; status = i2c_smbus_write_byte_data(this_client, byte_offset, data); hw->mac.ops.release_swfw_sync(hw, swfw_mask); @@ -7786,7 +10246,72 @@ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, if (status) return E1000_ERR_I2C; else - return E1000_SUCCESS; + return 0; } + +int igb_reinit_queues(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + if (netif_running(netdev)) + igb_close(netdev); + + igb_reset_interrupt_capability(adapter); + + if (igb_init_interrupt_scheme(adapter, true)) { + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + if (netif_running(netdev)) + err = igb_open(netdev); + + return err; +} + +static void igb_nfc_filter_exit(struct igb_adapter *adapter) +{ + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igb_erase_filter(adapter, rule); + + hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node) + igb_erase_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} + +static void igb_nfc_filter_restore(struct igb_adapter *adapter) +{ + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igb_add_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} + +static _DEFINE_DEV_PM_OPS(igb_pm_ops, igb_suspend, igb_resume, + igb_runtime_suspend, igb_runtime_resume, + igb_runtime_idle); + +static struct pci_driver igb_driver = { + .name = igb_driver_name, + .id_table = igb_pci_tbl, + .probe = igb_probe, + .remove = igb_remove, + .driver.pm = pm_ptr(&igb_pm_ops), + .shutdown = igb_shutdown, + .sriov_configure = igb_pci_sriov_configure, + .err_handler = &igb_err_handler +}; + /* igb_main.c */ |
