diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c')
| -rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1938 |
1 files changed, 1264 insertions, 674 deletions
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 084c53582793..d5ce20f47def 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1,28 +1,5 @@ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2024 Intel Corporation. */ /****************************************************************************** Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code @@ -50,6 +27,10 @@ #include <linux/if_vlan.h> #include <linux/prefetch.h> #include <net/mpls.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/atomic.h> +#include <net/xfrm.h> #include "ixgbevf.h" @@ -57,10 +38,8 @@ const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver"; -#define DRV_VERSION "4.1.0-k" -const char ixgbevf_driver_version[] = DRV_VERSION; static char ixgbevf_copyright[] = - "Copyright (c) 2009 - 2015 Intel Corporation."; + "Copyright (c) 2009 - 2024 Intel Corporation."; static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_82599_vf] = &ixgbevf_82599_vf_info, @@ -72,6 +51,8 @@ static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_X550EM_x_vf] = &ixgbevf_X550EM_x_vf_info, [board_X550EM_x_vf_hv] = &ixgbevf_X550EM_x_vf_hv_info, [board_x550em_a_vf] = &ixgbevf_x550em_a_vf_info, + [board_e610_vf] = &ixgbevf_e610_vf_info, + [board_e610_vf_hv] = &ixgbevf_e610_vf_hv_info, }; /* ixgbevf_pci_tbl - PCI Device ID Table @@ -92,15 +73,16 @@ static const struct pci_device_id ixgbevf_pci_tbl[] = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_VF), board_x550em_a_vf }, + {PCI_VDEVICE_SUB(INTEL, IXGBE_DEV_ID_E610_VF, PCI_ANY_ID, + IXGBE_SUBDEV_ID_E610_VF_HV), board_e610_vf_hv}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_VF), board_e610_vf}, /* required last entry */ {0, } }; MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); -MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) static int debug = -1; @@ -130,6 +112,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter); static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); +static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer); +static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *old_buff); static void ixgbevf_remove_adapter(struct ixgbe_hw *hw) { @@ -164,7 +149,7 @@ static void ixgbevf_check_remove(struct ixgbe_hw *hw, u32 reg) u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); u32 value; if (IXGBE_REMOVED(reg_addr)) @@ -206,28 +191,6 @@ static void ixgbevf_set_ivar(struct ixgbevf_adapter *adapter, s8 direction, } } -static void ixgbevf_unmap_and_free_tx_resource(struct ixgbevf_ring *tx_ring, - struct ixgbevf_tx_buffer *tx_buffer) -{ - if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } - tx_buffer->next_to_watch = NULL; - tx_buffer->skb = NULL; - dma_unmap_len_set(tx_buffer, len, 0); - /* tx_buffer must be completely set up in the transmit path */ -} - static u64 ixgbevf_get_tx_completed(struct ixgbevf_ring *ring) { return ring->stats.packets; @@ -287,8 +250,9 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter) /** * ixgbevf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure + * @txqueue: transmit queue hanging (unused) **/ -static void ixgbevf_tx_timeout(struct net_device *netdev) +static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); @@ -307,7 +271,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_adapter *adapter = q_vector->adapter; struct ixgbevf_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - unsigned int total_bytes = 0, total_packets = 0; + unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0; unsigned int budget = tx_ring->count / 2; unsigned int i = tx_ring->next_to_clean; @@ -326,7 +290,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) @@ -338,9 +302,14 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, /* update the statistics for this packet */ total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; + if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC) + total_ipsec++; /* free the skb */ - napi_consume_skb(tx_buffer->skb, napi_budget); + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buffer->data); + else + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -349,7 +318,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, DMA_TO_DEVICE); /* clear tx_buffer data */ - tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); /* unmap remaining buffers */ @@ -398,6 +366,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, u64_stats_update_end(&tx_ring->syncp); q_vector->tx.total_bytes += total_bytes; q_vector->tx.total_packets += total_packets; + adapter->tx_ipsec += total_ipsec; if (check_for_tx_hang(tx_ring) && ixgbevf_check_tx_hang(tx_ring)) { struct ixgbe_hw *hw = &adapter->hw; @@ -405,7 +374,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc = tx_ring->tx_buffer_info[i].next_to_watch; - pr_err("Detected Tx Unit Hang\n" + pr_err("Detected Tx Unit Hang%s\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" " next_to_use <%x>\n" @@ -415,6 +384,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, " eop_desc->wb.status <%x>\n" " time_stamp <%lx>\n" " jiffies <%lx>\n", + ring_is_xdp(tx_ring) ? " XDP" : "", tx_ring->queue_index, IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)), IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)), @@ -422,7 +392,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc, (eop_desc ? eop_desc->wb.status : 0), tx_ring->tx_buffer_info[i].time_stamp, jiffies); - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + if (!ring_is_xdp(tx_ring)) + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); /* schedule immediate reset if we believe we hung */ ixgbevf_tx_timeout_reset(adapter); @@ -430,6 +402,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return true; } + if (ring_is_xdp(tx_ring)) + return !!budget; + #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -547,14 +522,61 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) + ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); } +static +struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, + const unsigned int size) +{ + struct ixgbevf_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); + + rx_buffer->pagecnt_bias--; + + return rx_buffer; +} + +static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + struct sk_buff *skb) +{ + if (ixgbevf_can_reuse_rx_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbevf_reuse_rx_page(rx_ring, rx_buffer); + } else { + if (IS_ERR(skb)) + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbevf_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBEVF_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; +} + /** * ixgbevf_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: current socket buffer containing buffer in progress * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the @@ -578,40 +600,48 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, return true; } +static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0; +} + static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t dma = bi->dma; + dma_addr_t dma; /* since we are recycling buffers we should seldom need to alloc */ if (likely(page)) return true; /* alloc new page for storage */ - page = dev_alloc_page(); + page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, - PAGE_SIZE, DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + ixgbevf_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); + __free_pages(page, ixgbevf_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_ring->rx_stats.alloc_rx_page_failed++; return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = ixgbevf_rx_offset(rx_ring); + bi->pagecnt_bias = 1; + rx_ring->rx_stats.alloc_rx_page++; return true; } @@ -640,6 +670,12 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, if (!ixgbevf_alloc_mapped_page(rx_ring, bi)) break; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, + ixgbevf_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + /* Refresh the desc even if pkt_addr didn't change * because each write-back erases this info. */ @@ -654,8 +690,8 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, i -= rx_ring->count; } - /* clear the hdr_addr for the next_to_use descriptor */ - rx_desc->read.hdr_addr = 0; + /* clear the length for the next_to_use descriptor */ + rx_desc->wb.upper.length = 0; cleaned_count--; } while (cleaned_count); @@ -742,186 +778,363 @@ static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, new_buff->page = old_buff->page; new_buff->dma = old_buff->dma; new_buff->page_offset = old_buff->page_offset; - - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, - new_buff->page_offset, - IXGBEVF_RX_BUFSZ, - DMA_FROM_DEVICE); + new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static inline bool ixgbevf_page_is_reserved(struct page *page) +static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; + + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + return false; +#else +#define IXGBEVF_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048) + + if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET) + return false; + +#endif + + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(!pagecnt_bias)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } + + return true; } /** * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into + * @size: size of buffer to be added * * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. **/ -static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, +static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *rx_buffer, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned char *va = page_address(page) + rx_buffer->page_offset; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); #if (PAGE_SIZE < 8192) - unsigned int truesize = IXGBEVF_RX_BUFSZ; + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : + SKB_DATA_ALIGN(size); +#endif + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + rx_buffer->page_offset += truesize; #endif - unsigned int pull_len; +} - if (unlikely(skb_is_nonlinear(skb))) - goto add_tail_frag; +static +struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + struct xdp_buff *xdp, + union ixgbe_adv_rx_desc *rx_desc) +{ + unsigned int size = xdp->data_end - xdp->data; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); +#endif + unsigned int headlen; + struct sk_buff *skb; - if (likely(size <= IXGBEVF_RX_HDR_SIZE)) { - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + /* prefetch first cache line of first page */ + net_prefetch(xdp->data); - /* page is not reserved, we can reuse buffer as is */ - if (likely(!ixgbevf_page_is_reserved(page))) - return true; + /* Note, we get here by enabling legacy-rx via: + * + * ethtool --set-priv-flags <dev> legacy-rx on + * + * In this mode, we currently get 0 extra XDP headroom as + * opposed to having legacy-rx off, where we process XDP + * packets going to stack via ixgbevf_build_skb(). + * + * For ixgbevf_construct_skb() mode it means that the + * xdp->data_meta will always point to xdp->data, since + * the helper cannot expand the head. Should this ever + * changed in future for legacy-rx mode on, then lets also + * add xdp->data_meta handling here. + */ - /* this page cannot be reused so discard it */ - put_page(page); - return false; - } + /* allocate a skb to store the frags */ + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); + if (unlikely(!skb)) + return NULL; - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE); + /* Determine available headroom for copy */ + headlen = size; + if (headlen > IXGBEVF_RX_HDR_SIZE) + headlen = eth_get_headlen(skb->dev, xdp->data, + IXGBEVF_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, + ALIGN(headlen, sizeof(long))); /* update all of the pointers */ - va += pull_len; - size -= pull_len; + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + (xdp->data + headlen) - + page_address(rx_buffer->page), + size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + } else { + rx_buffer->pagecnt_bias++; + } -add_tail_frag: - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - (unsigned long)va & ~PAGE_MASK, size, truesize); + return skb; +} - /* avoid re-using remote pages */ - if (unlikely(ixgbevf_page_is_reserved(page))) - return false; +static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, + u32 qmask) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); +} +static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + struct xdp_buff *xdp, + union ixgbe_adv_rx_desc *rx_desc) +{ + unsigned int metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); +#endif + struct sk_buff *skb; - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ; + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points to xdp->data, otherwise, we likely + * have a consumer accessing first few bytes of meta data, + * and then actual data. + */ + net_prefetch(xdp->data_meta); -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* build an skb around the page buffer */ + skb = napi_build_skb(xdp->data_hard_start, truesize); + if (unlikely(!skb)) + return NULL; - if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ)) - return false; + /* update pointers within the skb to store the data */ + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); + /* update buffer offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; #endif - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - page_ref_inc(page); - return true; + return skb; } -static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) +#define IXGBEVF_XDP_PASS 0 +#define IXGBEVF_XDP_CONSUMED 1 +#define IXGBEVF_XDP_TX 2 + +static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, + struct xdp_buff *xdp) { - struct ixgbevf_rx_buffer *rx_buffer; - struct page *page; + struct ixgbevf_tx_buffer *tx_buffer; + union ixgbe_adv_tx_desc *tx_desc; + u32 len, cmd_type; + dma_addr_t dma; + u16 i; - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); + len = xdp->data_end - xdp->data; - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; + if (unlikely(!ixgbevf_desc_unused(ring))) + return IXGBEVF_XDP_CONSUMED; - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif + dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(ring->dev, dma)) + return IXGBEVF_XDP_CONSUMED; - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IXGBEVF_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return NULL; - } + /* record the location of the first descriptor for this packet */ + i = ring->next_to_use; + tx_buffer = &ring->tx_buffer_info[i]; + + dma_unmap_len_set(tx_buffer, len, len); + dma_unmap_addr_set(tx_buffer, dma, dma); + tx_buffer->data = xdp->data; + tx_buffer->bytecount = len; + tx_buffer->gso_segs = 1; + tx_buffer->protocol = 0; + + /* Populate minimal context descriptor that will provide for the + * fact that we are expected to process Ethernet frames. + */ + if (!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state)) { + struct ixgbe_adv_tx_context_desc *context_desc; - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); + set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); + + context_desc = IXGBEVF_TX_CTXTDESC(ring, 0); + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->fceof_saidx = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | + IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + i = 1; } - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IXGBEVF_RX_BUFSZ, - DMA_FROM_DEVICE); + /* put descriptor type bits */ + cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS; + cmd_type |= len | IXGBE_TXD_CMD; - /* pull page into skb */ - if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { - /* hand second half of page back to the ring */ - ixgbevf_reuse_rx_page(rx_ring, rx_buffer); - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + tx_desc = IXGBEVF_TX_DESC(ring, i); + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + tx_desc->read.olinfo_status = + cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + + /* Avoid any potential race with cleanup */ + smp_wmb(); + + /* set next_to_watch value indicating a packet is present */ + i++; + if (i == ring->count) + i = 0; + + tx_buffer->next_to_watch = tx_desc; + ring->next_to_use = i; + + return IXGBEVF_XDP_TX; +} + +static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring, + struct xdp_buff *xdp) +{ + int result = IXGBEVF_XDP_PASS; + struct ixgbevf_ring *xdp_ring; + struct bpf_prog *xdp_prog; + u32 act; + + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + + if (!xdp_prog) + goto xdp_out; + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; + result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); + if (result == IXGBEVF_XDP_CONSUMED) + goto out_failure; + break; + default: + bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: +out_failure: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + fallthrough; /* handle aborts by dropping packet */ + case XDP_DROP: + result = IXGBEVF_XDP_CONSUMED; + break; } +xdp_out: + return result; +} - /* clear contents of buffer_info */ - rx_buffer->dma = 0; - rx_buffer->page = NULL; +static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring, + unsigned int size) +{ + unsigned int truesize; - return skb; +#if (PAGE_SIZE < 8192) + truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ +#else + truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : + SKB_DATA_ALIGN(size); +#endif + return truesize; } -static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, - u32 qmask) +static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + unsigned int size) { - struct ixgbe_hw *hw = &adapter->hw; + unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size); - IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif } static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; + struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; + bool xdp_xmit = false; + struct xdp_buff xdp; + int xdp_res = 0; + + /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ +#if (PAGE_SIZE < 8192) + frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0); +#endif + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { + struct ixgbevf_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; + unsigned int size; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) { @@ -930,8 +1143,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); - - if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) + size = le16_to_cpu(rx_desc->wb.upper.length); + if (!size) break; /* This memory barrier is needed to keep us from reading @@ -940,13 +1153,51 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); + rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); + /* retrieve a buffer from the ring */ - skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb); + if (!skb) { + unsigned int offset = ixgbevf_rx_offset(rx_ring); + unsigned char *hard_start; + + hard_start = page_address(rx_buffer->page) + + rx_buffer->page_offset - offset; + xdp_prepare_buff(&xdp, hard_start, offset, size, true); +#if (PAGE_SIZE > 4096) + /* At larger PAGE_SIZE, frame_sz depend on len size */ + xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size); +#endif + xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp); + } + + if (xdp_res) { + if (xdp_res == IXGBEVF_XDP_TX) { + xdp_xmit = true; + ixgbevf_rx_buffer_flip(rx_ring, rx_buffer, + size); + } else { + rx_buffer->pagecnt_bias++; + } + total_rx_packets++; + total_rx_bytes += size; + } else if (skb) { + ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); + } else if (ring_uses_build_skb(rx_ring)) { + skb = ixgbevf_build_skb(rx_ring, rx_buffer, + &xdp, rx_desc); + } else { + skb = ixgbevf_construct_skb(rx_ring, rx_buffer, + &xdp, rx_desc); + } /* exit if we failed to retrieve a buffer */ - if (!skb) + if (!xdp_res && !skb) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_buffer->pagecnt_bias++; break; + } + ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -954,7 +1205,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, continue; /* verify the packet layout is correct */ - if (ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) { + if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) { skb = NULL; continue; } @@ -988,6 +1239,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; + if (xdp_xmit) { + struct ixgbevf_ring *xdp_ring = + adapter->xdp_ring[rx_ring->queue_index]; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); + } + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1042,16 +1304,20 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) /* If all work not completed, return budget and keep polling */ if (!clean_complete) return budget; - /* all work done, exit the polling mode */ - napi_complete_done(napi, work_done); - if (adapter->rx_itr_setting == 1) - ixgbevf_set_itr(q_vector); - if (!test_bit(__IXGBEVF_DOWN, &adapter->state) && - !test_bit(__IXGBEVF_REMOVING, &adapter->state)) - ixgbevf_irq_enable_queues(adapter, - BIT(q_vector->v_idx)); - return 0; + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) { + if (adapter->rx_itr_setting == 1) + ixgbevf_set_itr(q_vector); + if (!test_bit(__IXGBEVF_DOWN, &adapter->state) && + !test_bit(__IXGBEVF_REMOVING, &adapter->state)) + ixgbevf_irq_enable_queues(adapter, + BIT(q_vector->v_idx)); + } + + return min(work_done, budget - 1); } /** @@ -1167,6 +1433,9 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector, */ /* what was last interrupt timeslice? */ timepassed_us = q_vector->itr >> 2; + if (timepassed_us == 0) + return; + bytes_perint = bytes / timepassed_us; /* bytes/usec */ switch (itr_setting) { @@ -1261,85 +1530,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data) return IRQ_HANDLED; } -static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx, - int r_idx) -{ - struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx]; - - a->rx_ring[r_idx]->next = q_vector->rx.ring; - q_vector->rx.ring = a->rx_ring[r_idx]; - q_vector->rx.count++; -} - -static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx, - int t_idx) -{ - struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx]; - - a->tx_ring[t_idx]->next = q_vector->tx.ring; - q_vector->tx.ring = a->tx_ring[t_idx]; - q_vector->tx.count++; -} - -/** - * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors - * @adapter: board private structure to initialize - * - * This function maps descriptor rings to the queue-specific vectors - * we were allotted through the MSI-X enabling code. Ideally, we'd have - * one vector per ring/queue, but on a constrained vector budget, we - * group the rings as "efficiently" as possible. You would add new - * mapping configurations in here. - **/ -static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) -{ - int q_vectors; - int v_start = 0; - int rxr_idx = 0, txr_idx = 0; - int rxr_remaining = adapter->num_rx_queues; - int txr_remaining = adapter->num_tx_queues; - int i, j; - int rqpv, tqpv; - - q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - /* The ideal configuration... - * We have enough vectors to map one per queue. - */ - if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) { - for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) - map_vector_to_rxq(adapter, v_start, rxr_idx); - - for (; txr_idx < txr_remaining; v_start++, txr_idx++) - map_vector_to_txq(adapter, v_start, txr_idx); - return 0; - } - - /* If we don't have enough vectors for a 1-to-1 - * mapping, we'll have to group them so there are - * multiple queues per vector. - */ - /* Re-adjusting *qpv takes care of the remainder. */ - for (i = v_start; i < q_vectors; i++) { - rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i); - for (j = 0; j < rqpv; j++) { - map_vector_to_rxq(adapter, i, rxr_idx); - rxr_idx++; - rxr_remaining--; - } - } - for (i = v_start; i < q_vectors; i++) { - tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i); - for (j = 0; j < tqpv; j++) { - map_vector_to_txq(adapter, i, txr_idx); - txr_idx++; - txr_remaining--; - } - } - - return 0; -} - /** * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts * @adapter: board private structure @@ -1412,20 +1602,6 @@ free_queue_irqs: return err; } -static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter) -{ - int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - for (i = 0; i < q_vectors; i++) { - struct ixgbevf_q_vector *q_vector = adapter->q_vector[i]; - - q_vector->rx.ring = NULL; - q_vector->tx.ring = NULL; - q_vector->rx.count = 0; - q_vector->tx.count = 0; - } -} - /** * ixgbevf_request_irq - initialize interrupts * @adapter: board private structure @@ -1465,8 +1641,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter) free_irq(adapter->msix_entries[i].vector, adapter->q_vector[i]); } - - ixgbevf_reset_q_vectors(adapter); } /** @@ -1554,7 +1728,12 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, txdctl |= (1u << 8) | /* HTHRESH = 1 */ 32; /* PTHRESH = 32 */ + /* reinitialize tx_buffer_info */ + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbevf_tx_buffer) * ring->count); + clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); + clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), txdctl); @@ -1580,11 +1759,14 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter) /* Setup the HW Tx Head and Tail descriptor pointers */ for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_configure_tx_ring(adapter, adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_configure_tx_ring(adapter, adapter->xdp_ring[i]); } #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 -static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) +static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring, int index) { struct ixgbe_hw *hw = &adapter->hw; u32 srrctl; @@ -1592,7 +1774,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + if (ring_uses_large_buffer(ring)) + srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1722,6 +1907,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; + union ixgbe_adv_rx_desc *rx_desc; u64 rdba = ring->dma; u32 rxdctl; u8 reg_idx = ring->reg_idx; @@ -1750,15 +1936,34 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0); ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx); + /* initialize rx_buffer_info */ + memset(ring->rx_buffer_info, 0, + sizeof(struct ixgbevf_rx_buffer) * ring->count); + + /* initialize Rx descriptor 0 */ + rx_desc = IXGBEVF_RX_DESC(ring, 0); + rx_desc->wb.upper.length = 0; + /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; ring->next_to_alloc = 0; - ixgbevf_configure_srrctl(adapter, reg_idx); + ixgbevf_configure_srrctl(adapter, ring, reg_idx); - /* allow any size packet since we can handle overflow */ - rxdctl &= ~IXGBE_RXDCTL_RLPML_EN; + /* RXDCTL.RLPML does not work on 82599 */ + if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | + IXGBE_RXDCTL_RLPML_EN); + +#if (PAGE_SIZE < 8192) + /* Limit the maximum frame size so we don't overrun the skb */ + if (ring_uses_build_skb(ring) && + !ring_uses_large_buffer(ring)) + rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | + IXGBE_RXDCTL_RLPML_EN; +#endif + } rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); @@ -1767,6 +1972,30 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); } +static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) +{ + struct net_device *netdev = adapter->netdev; + unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + + /* set build_skb and buffer size flags */ + clear_ring_build_skb_enabled(rx_ring); + clear_ring_uses_large_buffer(rx_ring); + + if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) + return; + + if (PAGE_SIZE < 8192) + if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) + set_ring_uses_large_buffer(rx_ring); + + /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ + if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) + return; + + set_ring_build_skb_enabled(rx_ring); +} + /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1794,8 +2023,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) - ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; + + ixgbevf_set_rx_buffer_len(adapter, rx_ring); + ixgbevf_configure_rx_ring(adapter, rx_ring); + } } static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, @@ -1812,12 +2045,16 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, spin_unlock_bh(&adapter->mbx_lock); - /* translate error return types so error makes sense */ - if (err == IXGBE_ERR_MBX) - return -EIO; + if (err) { + netdev_err(netdev, "VF could not set VLAN %d\n", vid); + + /* translate error return types so error makes sense */ + if (err == IXGBE_ERR_MBX) + return -EIO; - if (err == IXGBE_ERR_INVALID_ARGUMENT) - return -EACCES; + if (err == IXGBE_ERR_INVALID_ARGUMENT) + return -EACCES; + } set_bit(vid, adapter->active_vlans); @@ -1838,6 +2075,9 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, spin_unlock_bh(&adapter->mbx_lock); + if (err) + netdev_err(netdev, "Could not remove VLAN %d\n", vid); + clear_bit(vid, adapter->active_vlans); return err; @@ -1858,11 +2098,6 @@ static int ixgbevf_write_uc_addr_list(struct net_device *netdev) struct ixgbe_hw *hw = &adapter->hw; int count = 0; - if ((netdev_uc_count(netdev)) > 10) { - pr_err("Too many unicast filters - No Space\n"); - return -ENOSPC; - } - if (!netdev_uc_empty(netdev)) { struct netdev_hw_addr *ha; @@ -1896,10 +2131,6 @@ static void ixgbevf_set_rx_mode(struct net_device *netdev) unsigned int flags = netdev->flags; int xcast_mode; - xcast_mode = (flags & IFF_ALLMULTI) ? IXGBEVF_XCAST_MODE_ALLMULTI : - (flags & (IFF_BROADCAST | IFF_MULTICAST)) ? - IXGBEVF_XCAST_MODE_MULTI : IXGBEVF_XCAST_MODE_NONE; - /* request the most inclusive mode we need */ if (flags & IFF_PROMISC) xcast_mode = IXGBEVF_XCAST_MODE_PROMISC; @@ -1996,6 +2227,7 @@ static void ixgbevf_configure(struct ixgbevf_adapter *adapter) ixgbevf_set_rx_mode(adapter->netdev); ixgbevf_restore_vlan(adapter); + ixgbevf_ipsec_restore(adapter); ixgbevf_configure_tx(adapter); ixgbevf_configure_rx(adapter); @@ -2039,14 +2271,44 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; } +/** + * ixgbevf_set_features - Set features supported by PF + * @adapter: pointer to the adapter struct + * + * Negotiate with PF supported features and then set pf_features accordingly. + */ +static void ixgbevf_set_features(struct ixgbevf_adapter *adapter) +{ + u32 *pf_features = &adapter->pf_features; + struct ixgbe_hw *hw = &adapter->hw; + int err; + + err = hw->mac.ops.negotiate_features(hw, pf_features); + if (err && err != -EOPNOTSUPP) + netdev_dbg(adapter->netdev, + "PF feature negotiation failed.\n"); + + /* Address also pre API 1.7 cases */ + if (hw->api_version == ixgbe_mbox_api_14) + *pf_features |= IXGBEVF_PF_SUP_IPSEC; + else if (hw->api_version == ixgbe_mbox_api_15) + *pf_features |= IXGBEVF_PF_SUP_ESX_MBX; +} + static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - int api[] = { ixgbe_mbox_api_13, - ixgbe_mbox_api_12, - ixgbe_mbox_api_11, - ixgbe_mbox_api_10, - ixgbe_mbox_api_unknown }; + static const int api[] = { + ixgbe_mbox_api_17, + ixgbe_mbox_api_16, + ixgbe_mbox_api_15, + ixgbe_mbox_api_14, + ixgbe_mbox_api_13, + ixgbe_mbox_api_12, + ixgbe_mbox_api_11, + ixgbe_mbox_api_10, + ixgbe_mbox_api_unknown + }; int err, idx = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2058,13 +2320,23 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) idx++; } + ixgbevf_set_features(adapter); + + if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) { + hw->mbx.ops.init_params(hw); + memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, + sizeof(struct ixgbe_mbx_operations)); + } + spin_unlock_bh(&adapter->mbx_lock); } static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; struct ixgbe_hw *hw = &adapter->hw; + bool state; ixgbevf_configure_msix(adapter); @@ -2077,6 +2349,11 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) spin_unlock_bh(&adapter->mbx_lock); + state = adapter->link_state; + hw->mac.ops.get_link_state(hw, &adapter->link_state); + if (state && state != adapter->link_state) + dev_info(&pdev->dev, "VF is administratively disabled\n"); + smp_mb__before_atomic(); clear_bit(__IXGBEVF_DOWN, &adapter->state); ixgbevf_napi_enable_all(adapter); @@ -2108,9 +2385,7 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) **/ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { - struct device *dev = rx_ring->dev; - unsigned long size; - unsigned int i; + u16 i = rx_ring->next_to_clean; /* Free Rx ring sk_buff */ if (rx_ring->skb) { @@ -2118,29 +2393,39 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) rx_ring->skb = NULL; } - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_buffer_info) - return; - /* Free all the Rx ring pages */ - for (i = 0; i < rx_ring->count; i++) { + while (i != rx_ring->next_to_alloc) { struct ixgbevf_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[i]; - if (rx_buffer->dma) - dma_unmap_page(dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE); - rx_buffer->dma = 0; - if (rx_buffer->page) - __free_page(rx_buffer->page); - rx_buffer->page = NULL; - } - size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_buffer_info, 0, size); + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbevf_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, + rx_buffer->dma, + ixgbevf_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBEVF_RX_DMA_ATTR); + + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); + i++; + if (i == rx_ring->count) + i = 0; + } + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; } /** @@ -2149,23 +2434,60 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) **/ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { - struct ixgbevf_tx_buffer *tx_buffer_info; - unsigned long size; - unsigned int i; + u16 i = tx_ring->next_to_clean; + struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; - if (!tx_ring->tx_buffer_info) - return; + while (i != tx_ring->next_to_use) { + union ixgbe_adv_tx_desc *eop_desc, *tx_desc; + + /* Free all the Tx ring sk_buffs */ + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buffer->data); + else + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + + /* check for eop_desc to determine the end of the packet */ + eop_desc = tx_buffer->next_to_watch; + tx_desc = IXGBEVF_TX_DESC(tx_ring, i); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); + } - /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + } } - size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; - memset(tx_ring->tx_buffer_info, 0, size); + /* reset next_to_use and next_to_clean */ + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; - memset(tx_ring->desc, 0, tx_ring->size); } /** @@ -2190,6 +2512,8 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_clean_tx_ring(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_clean_tx_ring(adapter->xdp_ring[i]); } void ixgbevf_down(struct ixgbevf_adapter *adapter) @@ -2218,7 +2542,7 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) ixgbevf_napi_disable_all(adapter); - del_timer_sync(&adapter->service_timer); + timer_delete_sync(&adapter->service_timer); /* disable transmits in the hardware now that interrupts are off */ for (i = 0; i < adapter->num_tx_queues; i++) { @@ -2228,6 +2552,13 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) IXGBE_TXDCTL_SWFLSH); } + for (i = 0; i < adapter->num_xdp_queues; i++) { + u8 reg_idx = adapter->xdp_ring[i]->reg_idx; + + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), + IXGBE_TXDCTL_SWFLSH); + } + if (!pci_channel_offline(adapter->pdev)) ixgbevf_reset(adapter); @@ -2237,12 +2568,11 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) { - WARN_ON(in_interrupt()); - while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state)) msleep(1); ixgbevf_down(adapter); + pci_set_master(adapter->pdev); ixgbevf_up(adapter); clear_bit(__IXGBEVF_RESETTING, &adapter->state); @@ -2261,7 +2591,7 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) } if (is_valid_ether_addr(adapter->hw.mac.addr)) { - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } @@ -2312,7 +2642,7 @@ static int ixgbevf_acquire_msix_vectors(struct ixgbevf_adapter *adapter, * important, starting with the "most" number of features turned on at once, * and ending with the smallest set of features. This way large combinations * can be allocated if they're turned on, and smaller combinations are the - * fallthrough conditions. + * fall through conditions. * **/ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) @@ -2325,6 +2655,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) /* Start with base case */ adapter->num_rx_queues = 1; adapter->num_tx_queues = 1; + adapter->num_xdp_queues = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2346,8 +2677,18 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_11: case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: + if (adapter->xdp_prog && + hw->mac.max_tx_queues == rss) + rss = rss > 3 ? 2 : 1; + adapter->num_rx_queues = rss; adapter->num_tx_queues = rss; + adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0; + break; default: break; } @@ -2355,105 +2696,209 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) } /** - * ixgbevf_alloc_queues - Allocate memory for all rings + * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported * @adapter: board private structure to initialize * - * We allocate one ring per queue at run-time since we don't know the - * number of queues at compile-time. The polling_netdev array is - * intended for Multiqueue, but should work fine with a single queue. + * Attempt to configure the interrupts using the best available + * capabilities of the hardware and the kernel. **/ -static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter) +static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) { + int vector, v_budget; + + /* It's easy to be greedy for MSI-X vectors, but it really + * doesn't do us much good if we have a lot more vectors + * than CPU's. So let's be conservative and only ask for + * (roughly) the same number of vectors as there are CPU's. + * The default is to use pairs of vectors. + */ + v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues); + v_budget = min_t(int, v_budget, num_online_cpus()); + v_budget += NON_Q_VECTORS; + + adapter->msix_entries = kcalloc(v_budget, + sizeof(struct msix_entry), GFP_KERNEL); + if (!adapter->msix_entries) + return -ENOMEM; + + for (vector = 0; vector < v_budget; vector++) + adapter->msix_entries[vector].entry = vector; + + /* A failure in MSI-X entry allocation isn't fatal, but the VF driver + * does not support any other modes, so we will simply fail here. Note + * that we clean up the msix_entries pointer else-where. + */ + return ixgbevf_acquire_msix_vectors(adapter, v_budget); +} + +static void ixgbevf_add_ring(struct ixgbevf_ring *ring, + struct ixgbevf_ring_container *head) +{ + ring->next = head->ring; + head->ring = ring; + head->count++; +} + +/** + * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_idx: index of vector in adapter struct + * @txr_count: number of Tx rings for q vector + * @txr_idx: index of first Tx ring to assign + * @xdp_count: total number of XDP rings to allocate + * @xdp_idx: index of first XDP ring to allocate + * @rxr_count: number of Rx rings for q vector + * @rxr_idx: index of first Rx ring to assign + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + **/ +static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, + int txr_count, int txr_idx, + int xdp_count, int xdp_idx, + int rxr_count, int rxr_idx) +{ + struct ixgbevf_q_vector *q_vector; + int reg_idx = txr_idx + xdp_idx; struct ixgbevf_ring *ring; - int rx = 0, tx = 0; + int ring_count, size; - for (; tx < adapter->num_tx_queues; tx++) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; + ring_count = txr_count + xdp_count + rxr_count; + size = sizeof(*q_vector) + (sizeof(*ring) * ring_count); + /* allocate q_vector and rings */ + q_vector = kzalloc(size, GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + q_vector->v_idx = v_idx; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + while (txr_count) { + /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + ixgbevf_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ ring->count = adapter->tx_ring_count; - ring->queue_index = tx; - ring->reg_idx = tx; + ring->queue_index = txr_idx; + ring->reg_idx = reg_idx; + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; - adapter->tx_ring[tx] = ring; + /* update count and index */ + txr_count--; + txr_idx++; + reg_idx++; + + /* push pointer to next ring */ + ring++; } - for (; rx < adapter->num_rx_queues; rx++) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; + while (xdp_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + ixgbevf_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = xdp_idx; + ring->reg_idx = reg_idx; + set_ring_xdp(ring); + + /* assign ring to adapter */ + adapter->xdp_ring[xdp_idx] = ring; + /* update count and index */ + xdp_count--; + xdp_idx++; + reg_idx++; + + /* push pointer to next ring */ + ring++; + } + + while (rxr_count) { + /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + ixgbevf_add_ring(ring, &q_vector->rx); + + /* apply Rx specific ring traits */ ring->count = adapter->rx_ring_count; - ring->queue_index = rx; - ring->reg_idx = rx; + ring->queue_index = rxr_idx; + ring->reg_idx = rxr_idx; - adapter->rx_ring[rx] = ring; - } + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; - return 0; + /* update count and index */ + rxr_count--; + rxr_idx++; -err_allocation: - while (tx) { - kfree(adapter->tx_ring[--tx]); - adapter->tx_ring[tx] = NULL; + /* push pointer to next ring */ + ring++; } - while (rx) { - kfree(adapter->rx_ring[--rx]); - adapter->rx_ring[rx] = NULL; - } - return -ENOMEM; + return 0; } /** - * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported + * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector * @adapter: board private structure to initialize + * @v_idx: index of vector in adapter struct * - * Attempt to configure the interrupts using the best available - * capabilities of the hardware and the kernel. + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. **/ -static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) +static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx) { - struct net_device *netdev = adapter->netdev; - int err; - int vector, v_budget; - - /* It's easy to be greedy for MSI-X vectors, but it really - * doesn't do us much good if we have a lot more vectors - * than CPU's. So let's be conservative and only ask for - * (roughly) the same number of vectors as there are CPU's. - * The default is to use pairs of vectors. - */ - v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues); - v_budget = min_t(int, v_budget, num_online_cpus()); - v_budget += NON_Q_VECTORS; + struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx]; + struct ixgbevf_ring *ring; - /* A failure in MSI-X entry allocation isn't fatal, but it does - * mean we disable MSI-X capabilities of the adapter. - */ - adapter->msix_entries = kcalloc(v_budget, - sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) - return -ENOMEM; + ixgbevf_for_each_ring(ring, q_vector->tx) { + if (ring_is_xdp(ring)) + adapter->xdp_ring[ring->queue_index] = NULL; + else + adapter->tx_ring[ring->queue_index] = NULL; + } - for (vector = 0; vector < v_budget; vector++) - adapter->msix_entries[vector].entry = vector; + ixgbevf_for_each_ring(ring, q_vector->rx) + adapter->rx_ring[ring->queue_index] = NULL; - err = ixgbevf_acquire_msix_vectors(adapter, v_budget); - if (err) - return err; + adapter->q_vector[v_idx] = NULL; + netif_napi_del(&q_vector->napi); - err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); - if (err) - return err; - - return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + /* ixgbevf_get_stats() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + kfree_rcu(q_vector, rcu); } /** @@ -2465,35 +2910,58 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) **/ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) { - int q_idx, num_q_vectors; - struct ixgbevf_q_vector *q_vector; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int xdp_remaining = adapter->num_xdp_queues; + int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) { + for (; rxr_remaining; v_idx++, q_vectors--) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); + + err = ixgbevf_alloc_q_vector(adapter, v_idx, + 0, 0, 0, 0, rqpv, rxr_idx); + if (err) + goto err_out; - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + /* update counts and index */ + rxr_remaining -= rqpv; + rxr_idx += rqpv; + } + } + + for (; q_vectors; v_idx++, q_vectors--) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors); + int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors); - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL); - if (!q_vector) + err = ixgbevf_alloc_q_vector(adapter, v_idx, + tqpv, txr_idx, + xqpv, xdp_idx, + rqpv, rxr_idx); + + if (err) goto err_out; - q_vector->adapter = adapter; - q_vector->v_idx = q_idx; - netif_napi_add(adapter->netdev, &q_vector->napi, - ixgbevf_poll, 64); - adapter->q_vector[q_idx] = q_vector; + + /* update counts and index */ + rxr_remaining -= rqpv; + rxr_idx += rqpv; + txr_remaining -= tqpv; + txr_idx += tqpv; + xdp_remaining -= xqpv; + xdp_idx += xqpv; } return 0; err_out: - while (q_idx) { - q_idx--; - q_vector = adapter->q_vector[q_idx]; -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_del(&q_vector->napi); -#endif - netif_napi_del(&q_vector->napi); - kfree(q_vector); - adapter->q_vector[q_idx] = NULL; + while (v_idx) { + v_idx--; + ixgbevf_free_q_vector(adapter, v_idx); } + return -ENOMEM; } @@ -2507,17 +2975,11 @@ err_out: **/ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter) { - int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx]; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - adapter->q_vector[q_idx] = NULL; -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_del(&q_vector->napi); -#endif - netif_napi_del(&q_vector->napi); - kfree(q_vector); + while (q_vectors) { + q_vectors--; + ixgbevf_free_q_vector(adapter, q_vectors); } } @@ -2561,21 +3023,14 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) goto err_alloc_q_vectors; } - err = ixgbevf_alloc_queues(adapter); - if (err) { - pr_err("Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - - hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", - (adapter->num_rx_queues > 1) ? "Enabled" : - "Disabled", adapter->num_rx_queues, adapter->num_tx_queues); + hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count %u\n", + (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", + adapter->num_rx_queues, adapter->num_tx_queues, + adapter->num_xdp_queues); set_bit(__IXGBEVF_DOWN, &adapter->state); return 0; -err_alloc_queues: - ixgbevf_free_q_vectors(adapter); err_alloc_q_vectors: ixgbevf_reset_interrupt_capability(adapter); err_set_interrupt: @@ -2591,18 +3046,8 @@ err_set_interrupt: **/ static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) { - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) { - kfree(adapter->tx_ring[i]); - adapter->tx_ring[i] = NULL; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - kfree(adapter->rx_ring[i]); - adapter->rx_ring[i] = NULL; - } - adapter->num_tx_queues = 0; + adapter->num_xdp_queues = 0; adapter->num_rx_queues = 0; ixgbevf_free_q_vectors(adapter); @@ -2663,7 +3108,7 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) else if (is_zero_ether_addr(adapter->hw.mac.addr)) dev_info(&pdev->dev, "MAC address not assigned by administrator.\n"); - ether_addr_copy(netdev->dev_addr, hw->mac.addr); + eth_hw_addr_set(netdev, hw->mac.addr); } if (!is_valid_ether_addr(netdev->dev_addr)) { @@ -2681,6 +3126,8 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) adapter->tx_ring_count = IXGBEVF_DEFAULT_TXD; adapter->rx_ring_count = IXGBEVF_DEFAULT_RXD; + adapter->link_state = true; + set_bit(__IXGBEVF_DOWN, &adapter->state); return 0; @@ -2717,6 +3164,8 @@ out: void ixgbevf_update_stats(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0; + u64 alloc_rx_page = 0, hw_csum_rx_error = 0; int i; if (test_bit(__IXGBEVF_DOWN, &adapter->state) || @@ -2737,19 +3186,28 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter) adapter->stats.vfmprc); for (i = 0; i < adapter->num_rx_queues; i++) { - adapter->hw_csum_rx_error += - adapter->rx_ring[i]->hw_csum_rx_error; - adapter->rx_ring[i]->hw_csum_rx_error = 0; + struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; + + hw_csum_rx_error += rx_ring->rx_stats.csum_err; + alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; + alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; + alloc_rx_page += rx_ring->rx_stats.alloc_rx_page; } + + adapter->hw_csum_rx_error = hw_csum_rx_error; + adapter->alloc_rx_page_failed = alloc_rx_page_failed; + adapter->alloc_rx_buff_failed = alloc_rx_buff_failed; + adapter->alloc_rx_page = alloc_rx_page; } /** * ixgbevf_service_timer - Timer Call-back - * @data: pointer to adapter cast into an unsigned long + * @t: pointer to timer_list struct **/ -static void ixgbevf_service_timer(unsigned long data) +static void ixgbevf_service_timer(struct timer_list *t) { - struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data; + struct ixgbevf_adapter *adapter = timer_container_of(adapter, t, + service_timer); /* Reset the timer */ mod_timer(&adapter->service_timer, (HZ * 2) + jiffies); @@ -2762,15 +3220,17 @@ static void ixgbevf_reset_subtask(struct ixgbevf_adapter *adapter) if (!test_and_clear_bit(__IXGBEVF_RESET_REQUESTED, &adapter->state)) return; + rtnl_lock(); /* If we're already down or resetting, just bail */ if (test_bit(__IXGBEVF_DOWN, &adapter->state) || test_bit(__IXGBEVF_REMOVING, &adapter->state) || - test_bit(__IXGBEVF_RESETTING, &adapter->state)) + test_bit(__IXGBEVF_RESETTING, &adapter->state)) { + rtnl_unlock(); return; + } adapter->tx_timeout_count++; - rtnl_lock(); ixgbevf_reinit_locked(adapter); rtnl_unlock(); } @@ -2799,6 +3259,8 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter) if (netif_carrier_ok(adapter->netdev)) { for (i = 0; i < adapter->num_tx_queues; i++) set_check_for_tx_hang(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + set_check_for_tx_hang(adapter->xdp_ring[i]); } /* get one bit for every active Tx/Rx interrupt vector */ @@ -2887,7 +3349,7 @@ static void ixgbevf_watchdog_link_is_down(struct ixgbevf_adapter *adapter) /** * ixgbevf_watchdog_subtask - worker thread to bring link up - * @work: pointer to work_struct containing our data + * @adapter: board private structure **/ static void ixgbevf_watchdog_subtask(struct ixgbevf_adapter *adapter) { @@ -2898,7 +3360,7 @@ static void ixgbevf_watchdog_subtask(struct ixgbevf_adapter *adapter) ixgbevf_watchdog_update_link(adapter); - if (adapter->link_up) + if (adapter->link_up && adapter->link_state) ixgbevf_watchdog_link_is_up(adapter); else ixgbevf_watchdog_link_is_down(adapter); @@ -2970,6 +3432,9 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) if (adapter->tx_ring[i]->desc) ixgbevf_free_tx_resources(adapter->tx_ring[i]); + for (i = 0; i < adapter->num_xdp_queues; i++) + if (adapter->xdp_ring[i]->desc) + ixgbevf_free_tx_resources(adapter->xdp_ring[i]); } /** @@ -2984,10 +3449,12 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) int size; size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc(size); + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); @@ -3018,34 +3485,54 @@ err: **/ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) { - int i, err = 0; + int i, j = 0, err = 0; for (i = 0; i < adapter->num_tx_queues; i++) { err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]); if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i); - break; + goto err_setup_tx; } + for (j = 0; j < adapter->num_xdp_queues; j++) { + err = ixgbevf_setup_tx_resources(adapter->xdp_ring[j]); + if (!err) + continue; + hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j); + goto err_setup_tx; + } + + return 0; +err_setup_tx: + /* rewind the index freeing the rings as we go */ + while (j--) + ixgbevf_free_tx_resources(adapter->xdp_ring[j]); + while (i--) + ixgbevf_free_tx_resources(adapter->tx_ring[i]); + return err; } /** * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) + * @adapter: board private structure * @rx_ring: Rx descriptor ring (for a specific queue) to setup * * Returns 0 on success, negative on failure **/ -int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring) +int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) { int size; size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc(size); + rx_ring->rx_buffer_info = vmalloc(size); if (!rx_ring->rx_buffer_info) goto err; + u64_stats_init(&rx_ring->syncp); + /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); @@ -3056,6 +3543,13 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring) if (!rx_ring->desc) goto err; + /* XDP RX-queue info */ + if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, 0) < 0) + goto err; + + rx_ring->xdp_prog = adapter->xdp_prog; + return 0; err: vfree(rx_ring->rx_buffer_info); @@ -3079,12 +3573,18 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_rx_queues; i++) { - err = ixgbevf_setup_rx_resources(adapter->rx_ring[i]); + err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]); if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i); - break; + goto err_setup_rx; } + + return 0; +err_setup_rx: + /* rewind the index freeing the rings as we go */ + while (i--) + ixgbevf_free_rx_resources(adapter->rx_ring[i]); return err; } @@ -3098,6 +3598,8 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { ixgbevf_clean_rx_ring(rx_ring); + rx_ring->xdp_prog = NULL; + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; @@ -3179,28 +3681,31 @@ int ixgbevf_open(struct net_device *netdev) ixgbevf_configure(adapter); - /* Map the Tx/Rx rings to the vectors we were allotted. - * if request_irq will be called in this function map_rings - * must be called *before* up_complete - */ - ixgbevf_map_rings_to_vectors(adapter); - err = ixgbevf_request_irq(adapter); if (err) goto err_req_irq; + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + if (err) + goto err_set_queues; + ixgbevf_up_complete(adapter); return 0; +err_set_queues: + ixgbevf_free_irq(adapter); err_req_irq: - ixgbevf_down(adapter); -err_setup_rx: ixgbevf_free_all_rx_resources(adapter); -err_setup_tx: +err_setup_rx: ixgbevf_free_all_tx_resources(adapter); +err_setup_tx: ixgbevf_reset(adapter); - err_setup_reset: return err; @@ -3274,8 +3779,8 @@ static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter) } static void ixgbevf_tx_ctxtdesc(struct ixgbevf_ring *tx_ring, - u32 vlan_macip_lens, u32 type_tucmd, - u32 mss_l4len_idx) + u32 vlan_macip_lens, u32 fceof_saidx, + u32 type_tucmd, u32 mss_l4len_idx) { struct ixgbe_adv_tx_context_desc *context_desc; u16 i = tx_ring->next_to_use; @@ -3289,14 +3794,15 @@ static void ixgbevf_tx_ctxtdesc(struct ixgbevf_ring *tx_ring, type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); - context_desc->seqnum_seed = 0; + context_desc->fceof_saidx = cpu_to_le32(fceof_saidx); context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); } static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, struct ixgbevf_tx_buffer *first, - u8 *hdr_len) + u8 *hdr_len, + struct ixgbevf_ipsec_tx_data *itd) { u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; struct sk_buff *skb = first->skb; @@ -3310,6 +3816,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, unsigned char *hdr; } l4; u32 paylen, l4_offset; + u32 fceof_saidx = 0; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -3335,13 +3842,15 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, if (ip.v4->version == 4) { unsigned char *csum_start = skb_checksum_start(skb); unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); + int len = csum_start - trans_start; /* IP header will have to cancel out any data that - * is not a part of the outer IP header + * is not a part of the outer IP header, so set to + * a reverse csum if needed, else init check to 0. */ - ip.v4->check = csum_fold(csum_partial(trans_start, - csum_start - trans_start, - 0)); + ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ? + csum_fold(csum_partial(trans_start, + len, 0)) : 0; type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; ip.v4->tot_len = 0; @@ -3362,7 +3871,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; @@ -3373,31 +3882,27 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; mss_l4len_idx |= (1u << IXGBE_ADVTXD_IDX_SHIFT); + fceof_saidx |= itd->pfsa; + type_tucmd |= itd->flags | itd->trailer_len; + /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ vlan_macip_lens = l4.hdr - ip.hdr; vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; - ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, - type_tucmd, mss_l4len_idx); + ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, + mss_l4len_idx); return 1; } -static inline bool ixgbevf_ipv6_csum_is_sctp(struct sk_buff *skb) -{ - unsigned int offset = 0; - - ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - - return offset == skb_checksum_start_offset(skb); -} - static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, - struct ixgbevf_tx_buffer *first) + struct ixgbevf_tx_buffer *first, + struct ixgbevf_ipsec_tx_data *itd) { struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; + u32 fceof_saidx = 0; u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -3406,23 +3911,24 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, switch (skb->csum_offset) { case offsetof(struct tcphdr, check): type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP; - /* fall through */ + fallthrough; case offsetof(struct udphdr, check): break; case offsetof(struct sctphdr, checksum): /* validate that this is actually an SCTP request */ - if (((first->protocol == htons(ETH_P_IP)) && - (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || - ((first->protocol == htons(ETH_P_IPV6)) && - ixgbevf_ipv6_csum_is_sctp(skb))) { + if (skb_csum_is_sctp(skb)) { type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_SCTP; break; } - /* fall through */ + fallthrough; default: skb_checksum_help(skb); goto no_csum; } + + if (first->protocol == htons(ETH_P_IP)) + type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; + /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; vlan_macip_lens = skb_checksum_start_offset(skb) - @@ -3432,7 +3938,11 @@ no_csum: vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; - ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0); + fceof_saidx |= itd->pfsa; + type_tucmd |= itd->flags | itd->trailer_len; + + ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, + fceof_saidx, type_tucmd, 0); } static __le32 ixgbevf_tx_cmd_type(u32 tx_flags) @@ -3466,8 +3976,12 @@ static void ixgbevf_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc, if (tx_flags & IXGBE_TX_FLAGS_IPV4) olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM); - /* use index 1 context for TSO/FSO/FCOE */ - if (tx_flags & IXGBE_TX_FLAGS_TSO) + /* enable IPsec */ + if (tx_flags & IXGBE_TX_FLAGS_IPSEC) + olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IPSEC); + + /* use index 1 context for TSO/FSO/FCOE/IPSEC */ + if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_IPSEC)) olinfo_status |= cpu_to_le32(1u << IXGBE_ADVTXD_IDX_SHIFT); /* Check Context must be set if Tx switch is enabled, which it @@ -3482,34 +3996,37 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, struct ixgbevf_tx_buffer *first, const u8 hdr_len) { - dma_addr_t dma; struct sk_buff *skb = first->skb; struct ixgbevf_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned int data_len = skb->data_len; - unsigned int size = skb_headlen(skb); - unsigned int paylen = skb->len - hdr_len; + skb_frag_t *frag; + dma_addr_t dma; + unsigned int data_len, size; u32 tx_flags = first->tx_flags; - __le32 cmd_type; + __le32 cmd_type = ixgbevf_tx_cmd_type(tx_flags); u16 i = tx_ring->next_to_use; tx_desc = IXGBEVF_TX_DESC(tx_ring, i); - ixgbevf_tx_olinfo_status(tx_desc, tx_flags, paylen); - cmd_type = ixgbevf_tx_cmd_type(tx_flags); + ixgbevf_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len); + + size = skb_headlen(skb); + data_len = skb->data_len; dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(tx_ring->dev, dma)) - goto dma_error; - /* record length, and DMA address */ - dma_unmap_len_set(first, len, size); - dma_unmap_addr_set(first, dma, dma); + tx_buffer = first; - tx_desc->read.buffer_addr = cpu_to_le64(dma); + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); + + tx_desc->read.buffer_addr = cpu_to_le64(dma); - for (;;) { while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) { tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD); @@ -3520,12 +4037,12 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); i = 0; } + tx_desc->read.olinfo_status = 0; dma += IXGBE_MAX_DATA_PER_TXD; size -= IXGBE_MAX_DATA_PER_TXD; tx_desc->read.buffer_addr = cpu_to_le64(dma); - tx_desc->read.olinfo_status = 0; } if (likely(!data_len)) @@ -3539,23 +4056,15 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); i = 0; } + tx_desc->read.olinfo_status = 0; size = skb_frag_size(frag); data_len -= size; dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, DMA_TO_DEVICE); - if (dma_mapping_error(tx_ring->dev, dma)) - goto dma_error; tx_buffer = &tx_ring->tx_buffer_info[i]; - dma_unmap_len_set(tx_buffer, len, size); - dma_unmap_addr_set(tx_buffer, dma, dma); - - tx_desc->read.buffer_addr = cpu_to_le64(dma); - tx_desc->read.olinfo_status = 0; - - frag++; } /* write last descriptor with RS and EOP bits */ @@ -3565,6 +4074,8 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, /* set the timestamp */ first->time_stamp = jiffies; + skb_tx_timestamp(skb); + /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. (Only applicable for weak-ordered * memory model archs, such as IA-64). @@ -3589,18 +4100,32 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, return; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); + tx_buffer = &tx_ring->tx_buffer_info[i]; /* clear dma mappings for failed tx_buffer_info map */ - for (;;) { + while (tx_buffer != first) { + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + if (i-- == 0) + i += tx_ring->count; tx_buffer = &tx_ring->tx_buffer_info[i]; - ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer); - if (tx_buffer == first) - break; - if (i == 0) - i = tx_ring->count; - i--; } + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + dev_kfree_skb_any(tx_buffer->skb); + tx_buffer->skb = NULL; + tx_ring->next_to_use = i; } @@ -3633,14 +4158,14 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) return __ixgbevf_maybe_stop_tx(tx_ring, size); } -static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, + struct ixgbevf_ring *tx_ring) { - struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbevf_tx_buffer *first; - struct ixgbevf_ring *tx_ring; int tso; u32 tx_flags = 0; u16 count = TXD_USE_COUNT(skb_headlen(skb)); + struct ixgbevf_ipsec_tx_data ipsec_tx = { 0 }; #if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD unsigned short f; #endif @@ -3652,8 +4177,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - tx_ring = adapter->tx_ring[skb->queue_mapping]; - /* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, @@ -3661,8 +4184,11 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * otherwise try next time */ #if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[f]; + + count += TXD_USE_COUNT(skb_frag_size(frag)); + } #else count += skb_shinfo(skb)->nr_frags; #endif @@ -3687,11 +4213,15 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) first->tx_flags = tx_flags; first->protocol = vlan_get_protocol(skb); - tso = ixgbevf_tso(tx_ring, first, &hdr_len); +#ifdef CONFIG_IXGBEVF_IPSEC + if (xfrm_offload(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) + goto out_drop; +#endif + tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx); if (tso < 0) goto out_drop; else if (!tso) - ixgbevf_tx_csum(tx_ring, first); + ixgbevf_tx_csum(tx_ring, first, &ipsec_tx); ixgbevf_tx_map(tx_ring, first, hdr_len); @@ -3706,6 +4236,29 @@ out_drop: return NETDEV_TX_OK; } +static netdev_tx_t ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + struct ixgbevf_ring *tx_ring; + + if (skb->len <= 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* The minimum packet size for olinfo paylen is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + + tx_ring = adapter->tx_ring[skb->queue_mapping]; + return ixgbevf_xmit_frame_ring(skb, tx_ring); +} + /** * ixgbevf_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure @@ -3733,7 +4286,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) return -EPERM; ether_addr_copy(hw->mac.addr, addr->sa_data); - ether_addr_copy(netdev->dev_addr, addr->sa_data); + ether_addr_copy(hw->mac.perm_addr, addr->sa_data); + eth_hw_addr_set(netdev, addr->sa_data); return 0; } @@ -3752,6 +4306,12 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; int ret; + /* prevent MTU being changed to a size unsupported by XDP */ + if (adapter->xdp_prog) { + dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n"); + return -EPERM; + } + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); @@ -3763,36 +4323,18 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) netdev->mtu, new_mtu); /* must set new MTU before calling down or up */ - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); - return 0; -} - -#ifdef CONFIG_NET_POLL_CONTROLLER -/* Polling 'interrupt' - used by things like netconsole to send skbs - * without having to re-enable interrupts. It's not called while - * the interrupt routine is executing. - */ -static void ixgbevf_netpoll(struct net_device *netdev) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - int i; + if (netif_running(netdev)) + ixgbevf_reinit_locked(adapter); - /* if interface is down do nothing */ - if (test_bit(__IXGBEVF_DOWN, &adapter->state)) - return; - for (i = 0; i < adapter->num_rx_queues; i++) - ixgbevf_msix_clean_rings(0, adapter->q_vector[i]); + return 0; } -#endif /* CONFIG_NET_POLL_CONTROLLER */ -static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state) +static int ixgbevf_suspend(struct device *dev_d) { - struct net_device *netdev = pci_get_drvdata(pdev); + struct net_device *netdev = dev_get_drvdata(dev_d); struct ixgbevf_adapter *adapter = netdev_priv(netdev); -#ifdef CONFIG_PM - int retval = 0; -#endif rtnl_lock(); netif_device_detach(netdev); @@ -3803,36 +4345,15 @@ static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state) ixgbevf_clear_interrupt_scheme(adapter); rtnl_unlock(); -#ifdef CONFIG_PM - retval = pci_save_state(pdev); - if (retval) - return retval; - -#endif - if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state)) - pci_disable_device(pdev); - return 0; } -#ifdef CONFIG_PM -static int ixgbevf_resume(struct pci_dev *pdev) +static int ixgbevf_resume(struct device *dev_d) { + struct pci_dev *pdev = to_pci_dev(dev_d); struct net_device *netdev = pci_get_drvdata(pdev); struct ixgbevf_adapter *adapter = netdev_priv(netdev); - u32 err; - - pci_restore_state(pdev); - /* pci_restore_state clears dev->state_saved so call - * pci_save_state to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; - } + int err; adapter->hw.hw_addr = adapter->io_addr; smp_mb__before_atomic(); @@ -3843,27 +4364,37 @@ static int ixgbevf_resume(struct pci_dev *pdev) rtnl_lock(); err = ixgbevf_init_interrupt_scheme(adapter); + if (!err && netif_running(netdev)) + err = ixgbevf_open(netdev); rtnl_unlock(); - if (err) { - dev_err(&pdev->dev, "Cannot initialize interrupts\n"); + if (err) return err; - } - - if (netif_running(netdev)) { - err = ixgbevf_open(netdev); - if (err) - return err; - } netif_device_attach(netdev); return err; } -#endif /* CONFIG_PM */ static void ixgbevf_shutdown(struct pci_dev *pdev) { - ixgbevf_suspend(pdev, PMSG_SUSPEND); + ixgbevf_suspend(&pdev->dev); +} + +static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats, + const struct ixgbevf_ring *ring) +{ + u64 bytes, packets; + unsigned int start; + + if (ring) { + do { + start = u64_stats_fetch_begin(&ring->syncp); + bytes = ring->stats.bytes; + packets = ring->stats.packets; + } while (u64_stats_fetch_retry(&ring->syncp, start)); + stats->tx_bytes += bytes; + stats->tx_packets += packets; + } } static void ixgbevf_get_stats(struct net_device *netdev, @@ -3879,27 +4410,28 @@ static void ixgbevf_get_stats(struct net_device *netdev, stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc; + rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { ring = adapter->rx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); bytes = ring->stats.bytes; packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_bytes += bytes; stats->rx_packets += packets; } for (i = 0; i < adapter->num_tx_queues; i++) { ring = adapter->tx_ring[i]; - do { - start = u64_stats_fetch_begin_irq(&ring->syncp); - bytes = ring->stats.bytes; - packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); - stats->tx_bytes += bytes; - stats->tx_packets += packets; + ixgbevf_get_tx_ring_stats(stats, ring); + } + + for (i = 0; i < adapter->num_xdp_queues; i++) { + ring = adapter->xdp_ring[i]; + ixgbevf_get_tx_ring_stats(stats, ring); } + rcu_read_unlock(); } #define IXGBEVF_MAX_MAC_HDR_LEN 127 @@ -3912,7 +4444,7 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBEVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | @@ -3936,6 +4468,57 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, return features; } +static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +{ + int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct ixgbevf_adapter *adapter = netdev_priv(dev); + struct bpf_prog *old_prog; + + /* verify ixgbevf ring attributes are sufficient for XDP */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbevf_ring *ring = adapter->rx_ring[i]; + + if (frame_size > ixgbevf_rx_bufsz(ring)) + return -EINVAL; + } + + old_prog = xchg(&adapter->xdp_prog, prog); + + /* If transitioning XDP modes reconfigure rings */ + if (!!prog != !!old_prog) { + /* Hardware has to reinitialize queues and interrupts to + * match packet buffer alignment. Unfortunately, the + * hardware is not flexible enough to do this dynamically. + */ + if (netif_running(dev)) + ixgbevf_close(dev); + + ixgbevf_clear_interrupt_scheme(adapter); + ixgbevf_init_interrupt_scheme(adapter); + + if (netif_running(dev)) + ixgbevf_open(dev); + } else { + for (i = 0; i < adapter->num_rx_queues; i++) + xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return ixgbevf_xdp_setup(dev, xdp->prog); + default: + return -EINVAL; + } +} + static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_open = ixgbevf_open, .ndo_stop = ixgbevf_close, @@ -3948,10 +4531,8 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_tx_timeout = ixgbevf_tx_timeout, .ndo_vlan_rx_add_vid = ixgbevf_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ixgbevf_vlan_rx_kill_vid, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ixgbevf_netpoll, -#endif .ndo_features_check = ixgbevf_features_check, + .ndo_bpf = ixgbevf_xdp, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -3978,22 +4559,17 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ixgbevf_adapter *adapter = NULL; struct ixgbe_hw *hw = NULL; const struct ixgbevf_info *ii = ixgbevf_info_tbl[ent->driver_data]; - int err, pci_using_dac; bool disable_dev = false; + int err; err = pci_enable_device(pdev); if (err) return err; - if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); - goto err_dma; - } - pci_using_dac = 0; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_regions(pdev, ixgbevf_driver_name); @@ -4040,7 +4616,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) memcpy(&hw->mac.ops, ii->mac_ops, sizeof(hw->mac.ops)); hw->mac.type = ii->mac; - memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, + memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops_legacy, sizeof(struct ixgbe_mbx_operations)); /* setup the private structure */ @@ -4073,10 +4649,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_GSO_PARTIAL | IXGBEVF_GSO_PARTIAL_FEATURES; - netdev->features = netdev->hw_features; - - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_SG | @@ -4092,6 +4665,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; @@ -4099,6 +4673,10 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mbox_api_11: case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); break; @@ -4116,8 +4694,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_sw_init; } - setup_timer(&adapter->service_timer, &ixgbevf_service_timer, - (unsigned long)adapter); + timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0); INIT_WORK(&adapter->service_task, ixgbevf_service_task); set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state); @@ -4135,6 +4712,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, netdev); netif_carrier_off(netdev); + ixgbevf_init_ipsec_offload(adapter); ixgbevf_init_last_counter_stats(adapter); @@ -4149,6 +4727,9 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mac_X540_vf: dev_info(&pdev->dev, "Intel(R) X540 Virtual Function\n"); break; + case ixgbe_mac_e610_vf: + dev_info(&pdev->dev, "Intel(R) E610 Virtual Function\n"); + break; case ixgbe_mac_82599_vf: default: dev_info(&pdev->dev, "Intel(R) 82599 Virtual Function\n"); @@ -4201,6 +4782,7 @@ static void ixgbevf_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); + ixgbevf_stop_ipsec_offload(adapter); ixgbevf_clear_interrupt_scheme(adapter); ixgbevf_reset_interrupt_capability(adapter); @@ -4237,19 +4819,19 @@ static pci_ers_result_t ixgbevf_io_error_detected(struct pci_dev *pdev, rtnl_lock(); netif_device_detach(netdev); + if (netif_running(netdev)) + ixgbevf_close_suspend(adapter); + if (state == pci_channel_io_perm_failure) { rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT; } - if (netif_running(netdev)) - ixgbevf_close_suspend(adapter); - if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state)) pci_disable_device(pdev); rtnl_unlock(); - /* Request a slot slot reset. */ + /* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET; } @@ -4308,16 +4890,17 @@ static const struct pci_error_handlers ixgbevf_err_handler = { .resume = ixgbevf_io_resume, }; +static DEFINE_SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume); + static struct pci_driver ixgbevf_driver = { .name = ixgbevf_driver_name, .id_table = ixgbevf_pci_tbl, .probe = ixgbevf_probe, .remove = ixgbevf_remove, -#ifdef CONFIG_PM + /* Power Management Hooks */ - .suspend = ixgbevf_suspend, - .resume = ixgbevf_resume, -#endif + .driver.pm = pm_sleep_ptr(&ixgbevf_pm_ops), + .shutdown = ixgbevf_shutdown, .err_handler = &ixgbevf_err_handler }; @@ -4330,9 +4913,9 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { - pr_info("%s - version %s\n", ixgbevf_driver_string, - ixgbevf_driver_version); + int err; + pr_info("%s\n", ixgbevf_driver_string); pr_info("%s\n", ixgbevf_copyright); ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); if (!ixgbevf_wq) { @@ -4340,7 +4923,13 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - return pci_register_driver(&ixgbevf_driver); + err = pci_register_driver(&ixgbevf_driver); + if (err) { + destroy_workqueue(ixgbevf_wq); + return err; + } + + return 0; } module_init(ixgbevf_init_module); @@ -4364,6 +4953,7 @@ static void __exit ixgbevf_exit_module(void) /** * ixgbevf_get_hw_dev_name - return device name string * used by hardware layer to print debugging information + * @hw: pointer to private hardware struct **/ char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw) { |
