diff options
Diffstat (limited to 'drivers/net/vmxnet3/vmxnet3_drv.c')
| -rw-r--r-- | drivers/net/vmxnet3/vmxnet3_drv.c | 2284 |
1 files changed, 1784 insertions, 500 deletions
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 55a62cae2cb4..0572f6a9bdb6 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2024, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -20,14 +20,19 @@ * The full GNU General Public License is included in this distribution in * the file called "COPYING". * - * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> + * Maintained by: pv-drivers@vmware.com * */ #include <linux/module.h> #include <net/ip6_checksum.h> +#ifdef CONFIG_X86 +#include <asm/msr.h> +#endif + #include "vmxnet3_int.h" +#include "vmxnet3_xdp.h" char vmxnet3_driver_name[] = "vmxnet3"; #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver" @@ -36,7 +41,7 @@ char vmxnet3_driver_name[] = "vmxnet3"; * PCI Device ID Table * Last entry must be all 0s */ -static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = { +static const struct pci_device_id vmxnet3_pciid_table[] = { {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)}, {0} }; @@ -46,7 +51,7 @@ MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table); static int enable_mq = 1; static void -vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac); +vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, const u8 *mac); /* * Enable/Disable the given intr @@ -75,8 +80,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter) for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_enable_intr(adapter, i); - adapter->shared->devRead.intrConf.intrCtrl &= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl &= + cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl &= cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } } @@ -85,8 +96,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter) { int i; - adapter->shared->devRead.intrConf.intrCtrl |= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl |= + cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_disable_intr(adapter, i); } @@ -130,6 +147,46 @@ vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue)); } +static u64 +vmxnet3_get_cycles(int pmc) +{ +#ifdef CONFIG_X86 + return native_read_pmc(pmc); +#else + return 0; +#endif +} + +static bool +vmxnet3_apply_timestamp(struct vmxnet3_tx_queue *tq, u16 rate) +{ +#ifdef CONFIG_X86 + if (rate > 0) { + if (tq->tsPktCount == 1) { + if (rate != 1) + tq->tsPktCount = rate; + return true; + } + tq->tsPktCount--; + } +#endif + return false; +} + +/* Check if capability is supported by UPT device or + * UPT is even requested + */ +bool +vmxnet3_check_ptcapability(u32 cap_supported, u32 cap) +{ + if (cap_supported & (1UL << VMXNET3_DCR_ERROR) || + cap_supported & (1UL << cap)) { + return true; + } + + return false; +} + /* * Check the link state. This may start or stop the tx queue. @@ -148,6 +205,14 @@ vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue) adapter->link_speed = ret >> 16; if (ret & 1) { /* Link is up. */ + /* + * From vmxnet3 v9, the hypervisor reports the speed in Gbps. + * Convert the speed to Mbps before rporting it to the kernel. + * Max link speed supported is 10000G. + */ + if (VMXNET3_VERSION_GE_9(adapter) && + adapter->link_speed < 10000) + adapter->link_speed = adapter->link_speed * 1000; netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n", adapter->link_speed); netif_carrier_on(adapter->netdev); @@ -312,14 +377,16 @@ static void vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi, struct pci_dev *pdev) { - if (tbi->map_type == VMXNET3_MAP_SINGLE) - pci_unmap_single(pdev, tbi->dma_addr, tbi->len, - PCI_DMA_TODEVICE); - else if (tbi->map_type == VMXNET3_MAP_PAGE) - pci_unmap_page(pdev, tbi->dma_addr, tbi->len, - PCI_DMA_TODEVICE); + u32 map_type = tbi->map_type; + + if (map_type & VMXNET3_MAP_SINGLE) + dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len, + DMA_TO_DEVICE); + else if (map_type & VMXNET3_MAP_PAGE) + dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len, + DMA_TO_DEVICE); else - BUG_ON(tbi->map_type != VMXNET3_MAP_NONE); + BUG_ON(map_type & ~VMXNET3_MAP_XDP); tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */ } @@ -327,19 +394,20 @@ vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi, static int vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq, - struct pci_dev *pdev, struct vmxnet3_adapter *adapter) + struct pci_dev *pdev, struct vmxnet3_adapter *adapter, + struct xdp_frame_bulk *bq) { - struct sk_buff *skb; + struct vmxnet3_tx_buf_info *tbi; int entries = 0; + u32 map_type; /* no out of order completion */ BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp); BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1); - skb = tq->buf_info[eop_idx].skb; - BUG_ON(skb == NULL); - tq->buf_info[eop_idx].skb = NULL; - + tbi = &tq->buf_info[eop_idx]; + BUG_ON(!tbi->skb); + map_type = tbi->map_type; VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size); while (tq->tx_ring.next2comp != eop_idx) { @@ -355,7 +423,14 @@ vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq, entries++; } - dev_kfree_skb_any(skb); + if (map_type & VMXNET3_MAP_XDP) + xdp_return_frame_bulk(tbi->xdpf, bq); + else + dev_kfree_skb_any(tbi->skb); + + /* xdpf and skb are in an anonymous union. */ + tbi->skb = NULL; + return entries; } @@ -364,18 +439,29 @@ static int vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { - int completed = 0; union Vmxnet3_GenericDesc *gdesc; + struct xdp_frame_bulk bq; + int completed = 0; + + xdp_frame_bulk_init(&bq); + rcu_read_lock(); gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) { + /* Prevent any &gdesc->tcd field from being (speculatively) + * read before (&gdesc->tcd)->gen is read. + */ + dma_rmb(); + completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX( &gdesc->tcd), tq, adapter->pdev, - adapter); + adapter, &bq); vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring); gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; } + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); if (completed) { spin_lock(&tq->tx_lock); @@ -395,26 +481,36 @@ static void vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { + struct xdp_frame_bulk bq; + u32 map_type; int i; + xdp_frame_bulk_init(&bq); + rcu_read_lock(); + while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) { struct vmxnet3_tx_buf_info *tbi; tbi = tq->buf_info + tq->tx_ring.next2comp; + map_type = tbi->map_type; vmxnet3_unmap_tx_buf(tbi, adapter->pdev); if (tbi->skb) { - dev_kfree_skb_any(tbi->skb); + if (map_type & VMXNET3_MAP_XDP) + xdp_return_frame_bulk(tbi->xdpf, &bq); + else + dev_kfree_skb_any(tbi->skb); tbi->skb = NULL; } vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring); } - /* sanity check, verify all buffers are indeed unmapped and freed */ - for (i = 0; i < tq->tx_ring.size; i++) { - BUG_ON(tq->buf_info[i].skb != NULL || - tq->buf_info[i].map_type != VMXNET3_MAP_NONE); - } + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); + + /* sanity check, verify all buffers are indeed unmapped */ + for (i = 0; i < tq->tx_ring.size; i++) + BUG_ON(tq->buf_info[i].map_type != VMXNET3_MAP_NONE); tq->tx_ring.gen = VMXNET3_INIT_GEN; tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0; @@ -429,21 +525,27 @@ vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { if (tq->tx_ring.base) { - pci_free_consistent(adapter->pdev, tq->tx_ring.size * - sizeof(struct Vmxnet3_TxDesc), - tq->tx_ring.base, tq->tx_ring.basePA); + dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size * + sizeof(struct Vmxnet3_TxDesc), + tq->tx_ring.base, tq->tx_ring.basePA); tq->tx_ring.base = NULL; } if (tq->data_ring.base) { - pci_free_consistent(adapter->pdev, tq->data_ring.size * - sizeof(struct Vmxnet3_TxDataDesc), - tq->data_ring.base, tq->data_ring.basePA); + dma_free_coherent(&adapter->pdev->dev, + tq->data_ring.size * tq->txdata_desc_size, + tq->data_ring.base, tq->data_ring.basePA); tq->data_ring.base = NULL; } + if (tq->ts_ring.base) { + dma_free_coherent(&adapter->pdev->dev, + tq->tx_ring.size * tq->tx_ts_desc_size, + tq->ts_ring.base, tq->ts_ring.basePA); + tq->ts_ring.base = NULL; + } if (tq->comp_ring.base) { - pci_free_consistent(adapter->pdev, tq->comp_ring.size * - sizeof(struct Vmxnet3_TxCompDesc), - tq->comp_ring.base, tq->comp_ring.basePA); + dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size * + sizeof(struct Vmxnet3_TxCompDesc), + tq->comp_ring.base, tq->comp_ring.basePA); tq->comp_ring.base = NULL; } kfree(tq->buf_info); @@ -474,8 +576,12 @@ vmxnet3_tq_init(struct vmxnet3_tx_queue *tq, tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0; tq->tx_ring.gen = VMXNET3_INIT_GEN; - memset(tq->data_ring.base, 0, tq->data_ring.size * - sizeof(struct Vmxnet3_TxDataDesc)); + memset(tq->data_ring.base, 0, + tq->data_ring.size * tq->txdata_desc_size); + + if (tq->ts_ring.base) + memset(tq->ts_ring.base, 0, + tq->tx_ring.size * tq->tx_ts_desc_size); /* reset the tx comp ring contents to 0 and reset comp ring states */ memset(tq->comp_ring.base, 0, tq->comp_ring.size * @@ -499,34 +605,45 @@ vmxnet3_tq_create(struct vmxnet3_tx_queue *tq, BUG_ON(tq->tx_ring.base || tq->data_ring.base || tq->comp_ring.base || tq->buf_info); - tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size - * sizeof(struct Vmxnet3_TxDesc), - &tq->tx_ring.basePA); + tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev, + tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc), + &tq->tx_ring.basePA, GFP_KERNEL); if (!tq->tx_ring.base) { netdev_err(adapter->netdev, "failed to allocate tx ring\n"); goto err; } - tq->data_ring.base = pci_alloc_consistent(adapter->pdev, - tq->data_ring.size * - sizeof(struct Vmxnet3_TxDataDesc), - &tq->data_ring.basePA); + tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev, + tq->data_ring.size * tq->txdata_desc_size, + &tq->data_ring.basePA, GFP_KERNEL); if (!tq->data_ring.base) { - netdev_err(adapter->netdev, "failed to allocate data ring\n"); + netdev_err(adapter->netdev, "failed to allocate tx data ring\n"); goto err; } - tq->comp_ring.base = pci_alloc_consistent(adapter->pdev, - tq->comp_ring.size * - sizeof(struct Vmxnet3_TxCompDesc), - &tq->comp_ring.basePA); + if (tq->tx_ts_desc_size != 0) { + tq->ts_ring.base = dma_alloc_coherent(&adapter->pdev->dev, + tq->tx_ring.size * tq->tx_ts_desc_size, + &tq->ts_ring.basePA, GFP_KERNEL); + if (!tq->ts_ring.base) { + netdev_err(adapter->netdev, "failed to allocate tx ts ring\n"); + tq->tx_ts_desc_size = 0; + } + } else { + tq->ts_ring.base = NULL; + } + + tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, + tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc), + &tq->comp_ring.basePA, GFP_KERNEL); if (!tq->comp_ring.base) { netdev_err(adapter->netdev, "failed to allocate tx comp ring\n"); goto err; } - tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]), - GFP_KERNEL); + tq->buf_info = kcalloc_node(tq->tx_ring.size, sizeof(tq->buf_info[0]), + GFP_KERNEL, + dev_to_node(&adapter->pdev->dev)); if (!tq->buf_info) goto err; @@ -567,8 +684,19 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, rbi = rbi_base + ring->next2fill; gd = ring->base + ring->next2fill; + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; - if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { + if (rbi->buf_type == VMXNET3_RX_BUF_XDP) { + void *data = vmxnet3_pp_get_buff(rq->page_pool, + &rbi->dma_addr, + GFP_KERNEL); + if (!data) { + rq->stats.rx_buf_alloc_failure++; + break; + } + rbi->page = virt_to_page(data); + val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT; + } else if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { if (rbi->skb == NULL) { rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev, rbi->len, @@ -578,9 +706,17 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, break; } - rbi->dma_addr = pci_map_single(adapter->pdev, + rbi->dma_addr = dma_map_single( + &adapter->pdev->dev, rbi->skb->data, rbi->len, - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + rbi->dma_addr)) { + dev_kfree_skb_any(rbi->skb); + rbi->skb = NULL; + rq->stats.rx_buf_alloc_failure++; + break; + } } else { /* rx buffer skipped by the device */ } @@ -595,24 +731,33 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, rq->stats.rx_buf_alloc_failure++; break; } - rbi->dma_addr = pci_map_page(adapter->pdev, + rbi->dma_addr = dma_map_page( + &adapter->pdev->dev, rbi->page, 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + rbi->dma_addr)) { + put_page(rbi->page); + rbi->page = NULL; + rq->stats.rx_buf_alloc_failure++; + break; + } } else { /* rx buffers skipped by the device */ } val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT; } - BUG_ON(rbi->dma_addr == 0); gd->rxd.addr = cpu_to_le64(rbi->dma_addr); gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT) | val | rbi->len); /* Fill the last buffer but dont mark it ready, or else the * device will think that the queue is full */ - if (num_allocated == num_to_alloc) + if (num_allocated == num_to_alloc) { + rbi->comp_state = VMXNET3_RXD_COMP_DONE; break; + } gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT); num_allocated++; @@ -634,21 +779,18 @@ static void vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd, struct vmxnet3_rx_buf_info *rbi) { - struct skb_frag_struct *frag = skb_shinfo(skb)->frags + - skb_shinfo(skb)->nr_frags; + skb_frag_t *frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); - __skb_frag_set_page(frag, rbi->page); - frag->page_offset = 0; - skb_frag_size_set(frag, rcd->len); + skb_frag_fill_page_desc(frag, rbi->page, 0, rcd->len); skb->data_len += rcd->len; skb->truesize += PAGE_SIZE; skb_shinfo(skb)->nr_frags++; } -static void +static int vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, struct vmxnet3_tx_queue *tq, struct pci_dev *pdev, struct vmxnet3_adapter *adapter) @@ -671,7 +813,7 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, if (ctx->copy_size) { ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA + tq->tx_ring.next2fill * - sizeof(struct Vmxnet3_TxDataDesc)); + tq->txdata_desc_size); ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size); ctx->sop_txd->dword[3] = 0; @@ -705,9 +847,11 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, tbi = tq->buf_info + tq->tx_ring.next2fill; tbi->map_type = VMXNET3_MAP_SINGLE; - tbi->dma_addr = pci_map_single(adapter->pdev, + tbi->dma_addr = dma_map_single(&adapter->pdev->dev, skb->data + buf_offset, buf_size, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + return -EFAULT; tbi->len = buf_size; @@ -730,7 +874,7 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 buf_size; buf_offset = 0; @@ -748,6 +892,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag, buf_offset, buf_size, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + return -EFAULT; tbi->len = buf_size; @@ -759,7 +905,7 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, gdesc->dword[3] = 0; netdev_dbg(adapter->netdev, - "txd[%u]: 0x%llu %u %u\n", + "txd[%u]: 0x%llx %u %u\n", tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr), le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]); vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); @@ -775,6 +921,13 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, /* set the last buf_info for the pkt */ tbi->skb = skb; tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base; + if (tq->tx_ts_desc_size != 0) { + ctx->ts_txd = (struct Vmxnet3_TxTSDesc *)((u8 *)tq->ts_ring.base + + tbi->sop_idx * tq->tx_ts_desc_size); + ctx->ts_txd->ts.tsi = 0; + } + + return 0; } @@ -790,7 +943,7 @@ vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter) /* - * parse and copy relevant protocol headers: + * parse relevant protocol headers: * For a tso pkt, relevant headers are L2/3/4 including options * For a pkt requesting csum offloading, they are L2/3 and may include L4 * if it's a TCP/UDP pkt @@ -803,72 +956,150 @@ vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter) * Other effects: * 1. related *ctx fields are updated. * 2. ctx->copy_size is # of bytes copied - * 3. the portion copied is guaranteed to be in the linear part + * 3. the portion to be copied is guaranteed to be in the linear part * */ static int -vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, - struct vmxnet3_tx_ctx *ctx, - struct vmxnet3_adapter *adapter) +vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, + struct vmxnet3_tx_ctx *ctx, + struct vmxnet3_adapter *adapter) { - struct Vmxnet3_TxDataDesc *tdd; + u8 protocol = 0; if (ctx->mss) { /* TSO */ - ctx->eth_ip_hdr_size = skb_transport_offset(skb); - ctx->l4_hdr_size = tcp_hdrlen(skb); - ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size; + if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) { + ctx->l4_offset = skb_inner_transport_offset(skb); + ctx->l4_hdr_size = inner_tcp_hdrlen(skb); + ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size; + } else { + ctx->l4_offset = skb_transport_offset(skb); + ctx->l4_hdr_size = tcp_hdrlen(skb); + ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size; + } } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { - ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb); + /* For encap packets, skb_checksum_start_offset refers + * to inner L4 offset. Thus, below works for encap as + * well as non-encap case + */ + ctx->l4_offset = skb_checksum_start_offset(skb); - if (ctx->ipv4) { - const struct iphdr *iph = ip_hdr(skb); + if (VMXNET3_VERSION_GE_4(adapter) && + skb->encapsulation) { + struct iphdr *iph = inner_ip_hdr(skb); - if (iph->protocol == IPPROTO_TCP) - ctx->l4_hdr_size = tcp_hdrlen(skb); - else if (iph->protocol == IPPROTO_UDP) - ctx->l4_hdr_size = sizeof(struct udphdr); - else - ctx->l4_hdr_size = 0; + if (iph->version == 4) { + protocol = iph->protocol; + } else { + const struct ipv6hdr *ipv6h; + + ipv6h = inner_ipv6_hdr(skb); + protocol = ipv6h->nexthdr; + } } else { - /* for simplicity, don't copy L4 headers */ + if (ctx->ipv4) { + const struct iphdr *iph = ip_hdr(skb); + + protocol = iph->protocol; + } else if (ctx->ipv6) { + const struct ipv6hdr *ipv6h; + + ipv6h = ipv6_hdr(skb); + protocol = ipv6h->nexthdr; + } + } + + switch (protocol) { + case IPPROTO_TCP: + ctx->l4_hdr_size = skb->encapsulation ? inner_tcp_hdrlen(skb) : + tcp_hdrlen(skb); + break; + case IPPROTO_UDP: + ctx->l4_hdr_size = sizeof(struct udphdr); + break; + default: ctx->l4_hdr_size = 0; + break; } - ctx->copy_size = min(ctx->eth_ip_hdr_size + + + ctx->copy_size = min(ctx->l4_offset + ctx->l4_hdr_size, skb->len); } else { - ctx->eth_ip_hdr_size = 0; + ctx->l4_offset = 0; ctx->l4_hdr_size = 0; /* copy as much as allowed */ - ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE - , skb_headlen(skb)); + ctx->copy_size = min_t(unsigned int, + tq->txdata_desc_size, + skb_headlen(skb)); } + if (skb->len <= tq->txdata_desc_size) + ctx->copy_size = skb->len; + /* make sure headers are accessible directly */ if (unlikely(!pskb_may_pull(skb, ctx->copy_size))) goto err; } - if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) { + if (unlikely(ctx->copy_size > tq->txdata_desc_size)) { tq->stats.oversized_hdr++; ctx->copy_size = 0; return 0; } - tdd = tq->data_ring.base + tq->tx_ring.next2fill; + return 1; +err: + return -1; +} + +/* + * copy relevant protocol headers to the transmit ring: + * For a tso pkt, relevant headers are L2/3/4 including options + * For a pkt requesting csum offloading, they are L2/3 and may include L4 + * if it's a TCP/UDP pkt + * + * + * Note that this requires that vmxnet3_parse_hdr be called first to set the + * appropriate bits in ctx first + */ +static void +vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, + struct vmxnet3_tx_ctx *ctx, + struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_TxDataDesc *tdd; + + tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base + + tq->tx_ring.next2fill * + tq->txdata_desc_size); memcpy(tdd->data, skb->data, ctx->copy_size); netdev_dbg(adapter->netdev, "copy %u bytes to dataRing[%u]\n", ctx->copy_size, tq->tx_ring.next2fill); - return 1; - -err: - return -1; } static void +vmxnet3_prepare_inner_tso(struct sk_buff *skb, + struct vmxnet3_tx_ctx *ctx) +{ + struct tcphdr *tcph = inner_tcp_hdr(skb); + struct iphdr *iph = inner_ip_hdr(skb); + + if (iph->version == 4) { + iph->check = 0; + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, + IPPROTO_TCP, 0); + } else { + struct ipv6hdr *iph = inner_ipv6_hdr(skb); + + tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0, + IPPROTO_TCP, 0); + } +} + +static void vmxnet3_prepare_tso(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx) { @@ -880,11 +1111,8 @@ vmxnet3_prepare_tso(struct sk_buff *skb, iph->check = 0; tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, IPPROTO_TCP, 0); - } else { - struct ipv6hdr *iph = ipv6_hdr(skb); - - tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0, - IPPROTO_TCP, 0); + } else if (ctx->ipv6) { + tcp_v6_gso_csum_prep(skb); } } @@ -894,7 +1122,7 @@ static int txd_estimate(const struct sk_buff *skb) int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; count += VMXNET3_TXD_NEEDED(skb_frag_size(frag)); } @@ -920,6 +1148,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, { int ret; u32 count; + int num_pkts; + int tx_num_deferred; unsigned long flags; struct vmxnet3_tx_ctx ctx; union Vmxnet3_GenericDesc *gdesc; @@ -931,6 +1161,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, count = txd_estimate(skb); ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP)); + ctx.ipv6 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IPV6)); ctx.mss = skb_shinfo(skb)->gso_size; if (ctx.mss) { @@ -942,7 +1173,28 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } tq->stats.copy_skb_header++; } - vmxnet3_prepare_tso(skb, &ctx); + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + /* tso pkts must not use more than + * VMXNET3_MAX_TSO_TXD_PER_PKT entries + */ + if (skb_linearize(skb) != 0) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + tq->stats.linearized++; + + /* recalculate the # of descriptors to use */ + count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1; + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + } + if (skb->encapsulation) { + vmxnet3_prepare_inner_tso(skb, &ctx); + } else { + vmxnet3_prepare_tso(skb, &ctx); + } } else { if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) { @@ -960,46 +1212,51 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } } - spin_lock_irqsave(&tq->tx_lock, flags); - - if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) { - tq->stats.tx_ring_full++; - netdev_dbg(adapter->netdev, - "tx queue stopped on %s, next2comp %u" - " next2fill %u\n", adapter->netdev->name, - tq->tx_ring.next2comp, tq->tx_ring.next2fill); - - vmxnet3_tq_stop(tq, adapter); - spin_unlock_irqrestore(&tq->tx_lock, flags); - return NETDEV_TX_BUSY; - } - - - ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter); + ret = vmxnet3_parse_hdr(skb, tq, &ctx, adapter); if (ret >= 0) { BUG_ON(ret <= 0 && ctx.copy_size != 0); /* hdrs parsed, check against other limits */ if (ctx.mss) { - if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size > + if (unlikely(ctx.l4_offset + ctx.l4_hdr_size > VMXNET3_MAX_TX_BUF_SIZE)) { - goto hdr_too_big; + tq->stats.drop_oversized_hdr++; + goto drop_pkt; } } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (unlikely(ctx.eth_ip_hdr_size + + if (unlikely(ctx.l4_offset + skb->csum_offset > VMXNET3_MAX_CSUM_OFFSET)) { - goto hdr_too_big; + tq->stats.drop_oversized_hdr++; + goto drop_pkt; } } } } else { tq->stats.drop_hdr_inspect_err++; - goto unlock_drop_pkt; + goto drop_pkt; } + spin_lock_irqsave(&tq->tx_lock, flags); + + if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) { + tq->stats.tx_ring_full++; + netdev_dbg(adapter->netdev, + "tx queue stopped on %s, next2comp %u" + " next2fill %u\n", adapter->netdev->name, + tq->tx_ring.next2comp, tq->tx_ring.next2fill); + + vmxnet3_tq_stop(tq, adapter); + spin_unlock_irqrestore(&tq->tx_lock, flags); + return NETDEV_TX_BUSY; + } + + + vmxnet3_copy_hdr(skb, tq, &ctx, adapter); + /* fill tx descs related to addr & len */ - vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter); + if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter)) + goto unlock_drop_pkt; /* setup the EOP desc */ ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP); @@ -1012,30 +1269,74 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, #else gdesc = ctx.sop_txd; #endif + tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred); if (ctx.mss) { - gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_TSO; - gdesc->txd.msscof = ctx.mss; - le32_add_cpu(&tq->shared->txNumDeferred, (skb->len - - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss); + if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) { + gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + } + gdesc->txd.msscof = ctx.mss; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) + gdesc->txd.oco = 1; + } else { + gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.msscof = ctx.mss; + } + num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss; } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { - gdesc->txd.hlen = ctx.eth_ip_hdr_size; - gdesc->txd.om = VMXNET3_OM_CSUM; - gdesc->txd.msscof = ctx.eth_ip_hdr_size + - skb->csum_offset; + if (VMXNET3_VERSION_GE_4(adapter) && + skb->encapsulation) { + gdesc->txd.hlen = ctx.l4_offset + + ctx.l4_hdr_size; + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.msscof = ctx.l4_offset + + skb->csum_offset; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + gdesc->txd.msscof = 0; /* Reserved */ + } + } else { + gdesc->txd.hlen = ctx.l4_offset; + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.msscof = ctx.l4_offset + + skb->csum_offset; + } } else { gdesc->txd.om = 0; gdesc->txd.msscof = 0; } - le32_add_cpu(&tq->shared->txNumDeferred, 1); + num_pkts = 1; } + le32_add_cpu(&tq->shared->txNumDeferred, num_pkts); + tx_num_deferred += num_pkts; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { gdesc->txd.ti = 1; - gdesc->txd.tci = vlan_tx_tag_get(skb); + gdesc->txd.tci = skb_vlan_tag_get(skb); } + if (tq->tx_ts_desc_size != 0 && + adapter->latencyConf->sampleRate != 0) { + if (vmxnet3_apply_timestamp(tq, adapter->latencyConf->sampleRate)) { + ctx.ts_txd->ts.tsData = vmxnet3_get_cycles(VMXNET3_PMC_PSEUDO_TSC); + ctx.ts_txd->ts.tsi = 1; + } + } + + /* Ensure that the write to (&gdesc->txd)->gen will be observed after + * all other writes to &gdesc->txd. + */ + dma_wmb(); + /* finally flips the GEN bit of the SOP desc. */ gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); @@ -1055,26 +1356,80 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, spin_unlock_irqrestore(&tq->tx_lock, flags); - if (le32_to_cpu(tq->shared->txNumDeferred) >= - le32_to_cpu(tq->shared->txThreshold)) { + if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { tq->shared->txNumDeferred = 0; VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_TXPROD + tq->qid * 8, + adapter->tx_prod_offset + tq->qid * 8, tq->tx_ring.next2fill); } return NETDEV_TX_OK; -hdr_too_big: - tq->stats.drop_oversized_hdr++; unlock_drop_pkt: spin_unlock_irqrestore(&tq->tx_lock, flags); drop_pkt: tq->stats.drop_total++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } +static int +vmxnet3_create_pp(struct vmxnet3_adapter *adapter, + struct vmxnet3_rx_queue *rq, int size) +{ + bool xdp_prog = vmxnet3_xdp_enabled(adapter); + const struct page_pool_params pp_params = { + .order = 0, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .pool_size = size, + .nid = NUMA_NO_NODE, + .dev = &adapter->pdev->dev, + .offset = VMXNET3_XDP_RX_OFFSET, + .max_len = VMXNET3_XDP_MAX_FRSIZE, + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + }; + struct page_pool *pp; + int err; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) + return PTR_ERR(pp); + + err = xdp_rxq_info_reg(&rq->xdp_rxq, adapter->netdev, rq->qid, + rq->napi.napi_id); + if (err < 0) + goto err_free_pp; + + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, pp); + if (err) + goto err_unregister_rxq; + + rq->page_pool = pp; + + return 0; + +err_unregister_rxq: + xdp_rxq_info_unreg(&rq->xdp_rxq); +err_free_pp: + page_pool_destroy(pp); + + return err; +} + +void * +vmxnet3_pp_get_buff(struct page_pool *pp, dma_addr_t *dma_addr, + gfp_t gfp_mask) +{ + struct page *page; + + page = page_pool_alloc_pages(pp, gfp_mask | __GFP_NOWARN); + if (unlikely(!page)) + return NULL; + + *dma_addr = page_pool_get_dma_addr(page) + pp->p.offset; + + return page_address(page); +} static netdev_tx_t vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev) @@ -1094,13 +1449,33 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, union Vmxnet3_GenericDesc *gdesc) { if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) { - /* typical case: TCP/UDP over IP and both csums are correct */ - if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) == - VMXNET3_RCD_CSUM_OK) { + if (gdesc->rcd.v4 && + (le32_to_cpu(gdesc->dword[3]) & + VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { skb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp)); - BUG_ON(!(gdesc->rcd.v4 || gdesc->rcd.v6)); - BUG_ON(gdesc->rcd.frg); + if ((le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { + skb->csum_level = 1; + } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); + WARN_ON_ONCE(gdesc->rcd.frg && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); + } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & + (1 << VMXNET3_RCD_TUC_SHIFT))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if ((le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { + skb->csum_level = 1; + } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); + WARN_ON_ONCE(gdesc->rcd.frg && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); } else { if (gdesc->rcd.csum) { skb->csum = htons(gdesc->rcd.csum); @@ -1142,41 +1517,132 @@ vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd, } +static u32 +vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb, + union Vmxnet3_GenericDesc *gdesc) +{ + u32 hlen, maplen; + union { + void *ptr; + struct ethhdr *eth; + struct vlan_ethhdr *veth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + } hdr; + BUG_ON(gdesc->rcd.tcp == 0); + + maplen = skb_headlen(skb); + if (unlikely(sizeof(struct iphdr) + sizeof(struct tcphdr) > maplen)) + return 0; + + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || + skb->protocol == cpu_to_be16(ETH_P_8021AD)) + hlen = sizeof(struct vlan_ethhdr); + else + hlen = sizeof(struct ethhdr); + + hdr.eth = eth_hdr(skb); + if (gdesc->rcd.v4) { + BUG_ON(hdr.eth->h_proto != htons(ETH_P_IP) && + hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IP)); + hdr.ptr += hlen; + BUG_ON(hdr.ipv4->protocol != IPPROTO_TCP); + hlen = hdr.ipv4->ihl << 2; + hdr.ptr += hdr.ipv4->ihl << 2; + } else if (gdesc->rcd.v6) { + BUG_ON(hdr.eth->h_proto != htons(ETH_P_IPV6) && + hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IPV6)); + hdr.ptr += hlen; + /* Use an estimated value, since we also need to handle + * TSO case. + */ + if (hdr.ipv6->nexthdr != IPPROTO_TCP) + return sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + hlen = sizeof(struct ipv6hdr); + hdr.ptr += sizeof(struct ipv6hdr); + } else { + /* Non-IP pkt, dont estimate header length */ + return 0; + } + + if (hlen + sizeof(struct tcphdr) > maplen) + return 0; + + return (hlen + (hdr.tcp->doff << 2)); +} + +static void +vmxnet3_lro_tunnel(struct sk_buff *skb, __be16 ip_proto) +{ + struct udphdr *uh = NULL; + + if (ip_proto == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } else { + struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + + if (iph->nexthdr == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } + if (uh) { + if (uh->check) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + else + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } +} + static int vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter, int quota) { - static const u32 rxprod_reg[2] = { - VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 + u32 rxprod_reg[2] = { + adapter->rx_prod_offset, adapter->rx_prod2_offset }; - u32 num_rxd = 0; + u32 num_pkts = 0; bool skip_page_frags = false; + bool encap_lro = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; + u16 segCnt = 0, mss = 0; + int comp_offset, fill_offset; #ifdef __BIG_ENDIAN_BITFIELD struct Vmxnet3_RxDesc rxCmdDesc; struct Vmxnet3_RxCompDesc rxComp; #endif + bool need_flush = false; + vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); while (rcd->gen == rq->comp_ring.gen) { struct vmxnet3_rx_buf_info *rbi; struct sk_buff *skb, *new_skb = NULL; struct page *new_page = NULL; + dma_addr_t new_dma_addr; int num_to_alloc; struct Vmxnet3_RxDesc *rxd; u32 idx, ring_idx; struct vmxnet3_cmd_ring *ring = NULL; - if (num_rxd >= quota) { + if (num_pkts >= quota) { /* we may stop even before we see the EOP desc of * the current pkt */ break; } - num_rxd++; - BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2); + + /* Prevent any rcd field from being (speculatively) read before + * rcd->gen is read. + */ + dma_rmb(); + + BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 && + rcd->rqID != rq->dataRingQid); idx = rcd->rxdIdx; - ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1; + ring_idx = VMXNET3_GET_RING_IDX(adapter, rcd->rqID); ring = rq->rx_ring + ring_idx; vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd, &rxCmdDesc); @@ -1190,11 +1656,41 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, goto rcd_done; } + if (rcd->sop && rcd->eop && vmxnet3_xdp_enabled(adapter)) { + struct sk_buff *skb_xdp_pass; + int act; + + if (VMXNET3_RX_DATA_RING(adapter, rcd->rqID)) { + ctx->skb = NULL; + goto skip_xdp; /* Handle it later. */ + } + + if (rbi->buf_type != VMXNET3_RX_BUF_XDP) + goto rcd_done; + + act = vmxnet3_process_xdp(adapter, rq, rcd, rbi, rxd, + &skb_xdp_pass); + if (act == XDP_PASS) { + ctx->skb = skb_xdp_pass; + goto sop_done; + } + ctx->skb = NULL; + need_flush |= act == XDP_REDIRECT; + + goto rcd_done; + } +skip_xdp: + if (rcd->sop) { /* first buf of the pkt */ + bool rxDataRingUsed; + u16 len; + BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD || - rcd->rqID != rq->qid); + (rcd->rqID != rq->qid && + rcd->rqID != rq->dataRingQid)); - BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB); + BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB && + rbi->buf_type != VMXNET3_RX_BUF_XDP); BUG_ON(ctx->skb != NULL || rbi->skb == NULL); if (unlikely(rcd->len == 0)) { @@ -1208,8 +1704,40 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, skip_page_frags = false; ctx->skb = rbi->skb; + + if (rq->rx_ts_desc_size != 0 && rcd->ext2) { + struct Vmxnet3_RxTSDesc *ts_rxd; + + ts_rxd = (struct Vmxnet3_RxTSDesc *)((u8 *)rq->ts_ring.base + + idx * rq->rx_ts_desc_size); + ts_rxd->ts.tsData = vmxnet3_get_cycles(VMXNET3_PMC_PSEUDO_TSC); + ts_rxd->ts.tsi = 1; + } + + rxDataRingUsed = + VMXNET3_RX_DATA_RING(adapter, rcd->rqID); + len = rxDataRingUsed ? rcd->len : rbi->len; + + if (rxDataRingUsed && vmxnet3_xdp_enabled(adapter)) { + struct sk_buff *skb_xdp_pass; + size_t sz; + int act; + + sz = rcd->rxdIdx * rq->data_ring.desc_size; + act = vmxnet3_process_xdp_small(adapter, rq, + &rq->data_ring.base[sz], + rcd->len, + &skb_xdp_pass); + if (act == XDP_PASS) { + ctx->skb = skb_xdp_pass; + goto sop_done; + } + need_flush |= act == XDP_REDIRECT; + + goto rcd_done; + } new_skb = netdev_alloc_skb_ip_align(adapter->netdev, - rbi->len); + len); if (new_skb == NULL) { /* Skb allocation failed, do not handover this * skb to stack. Reuse it. Drop the existing pkt @@ -1221,24 +1749,69 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, goto rcd_done; } - pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len, - PCI_DMA_FROMDEVICE); + if (rxDataRingUsed && adapter->rxdataring_enabled) { + size_t sz; -#ifdef VMXNET3_RSS - if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && - (adapter->netdev->features & NETIF_F_RXHASH)) - ctx->skb->rxhash = le32_to_cpu(rcd->rssHash); -#endif - skb_put(ctx->skb, rcd->len); + BUG_ON(rcd->len > rq->data_ring.desc_size); + + ctx->skb = new_skb; + sz = rcd->rxdIdx * rq->data_ring.desc_size; + memcpy(new_skb->data, + &rq->data_ring.base[sz], rcd->len); + } else { + ctx->skb = rbi->skb; + + new_dma_addr = + dma_map_single(&adapter->pdev->dev, + new_skb->data, rbi->len, + DMA_FROM_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + new_dma_addr)) { + dev_kfree_skb(new_skb); + /* Skb allocation failed, do not + * handover this skb to stack. Reuse + * it. Drop the existing pkt. + */ + rq->stats.rx_buf_alloc_failure++; + ctx->skb = NULL; + rq->stats.drop_total++; + skip_page_frags = true; + goto rcd_done; + } - /* Immediate refill */ - rbi->skb = new_skb; - rbi->dma_addr = pci_map_single(adapter->pdev, - rbi->skb->data, rbi->len, - PCI_DMA_FROMDEVICE); - rxd->addr = cpu_to_le64(rbi->dma_addr); - rxd->len = rbi->len; + dma_unmap_single(&adapter->pdev->dev, + rbi->dma_addr, + rbi->len, + DMA_FROM_DEVICE); + /* Immediate refill */ + rbi->skb = new_skb; + rbi->dma_addr = new_dma_addr; + rxd->addr = cpu_to_le64(rbi->dma_addr); + rxd->len = rbi->len; + } + + skb_record_rx_queue(ctx->skb, rq->qid); + skb_put(ctx->skb, rcd->len); + + if (VMXNET3_VERSION_GE_2(adapter) && + rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { + struct Vmxnet3_RxCompDescExt *rcdlro; + union Vmxnet3_GenericDesc *gdesc; + + rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; + gdesc = (union Vmxnet3_GenericDesc *)rcd; + + segCnt = rcdlro->segCnt; + WARN_ON_ONCE(segCnt == 0); + mss = rcdlro->mss; + if (unlikely(segCnt <= 1)) + segCnt = 0; + encap_lro = (le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)); + } else { + segCnt = 0; + } } else { BUG_ON(ctx->skb == NULL && !skip_page_frags); @@ -1252,75 +1825,179 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (skip_page_frags) goto rcd_done; - new_page = alloc_page(GFP_ATOMIC); - if (unlikely(new_page == NULL)) { + if (rcd->len) { + new_page = alloc_page(GFP_ATOMIC); /* Replacement page frag could not be allocated. * Reuse this page. Drop the pkt and free the * skb which contained this page as a frag. Skip * processing all the following non-sop frags. */ - rq->stats.rx_buf_alloc_failure++; - dev_kfree_skb(ctx->skb); - ctx->skb = NULL; - skip_page_frags = true; - goto rcd_done; - } + if (unlikely(!new_page)) { + rq->stats.rx_buf_alloc_failure++; + dev_kfree_skb(ctx->skb); + ctx->skb = NULL; + skip_page_frags = true; + goto rcd_done; + } + new_dma_addr = dma_map_page(&adapter->pdev->dev, + new_page, + 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + new_dma_addr)) { + put_page(new_page); + rq->stats.rx_buf_alloc_failure++; + dev_kfree_skb(ctx->skb); + ctx->skb = NULL; + skip_page_frags = true; + goto rcd_done; + } - if (rcd->len) { - pci_unmap_page(adapter->pdev, + dma_unmap_page(&adapter->pdev->dev, rbi->dma_addr, rbi->len, - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); vmxnet3_append_frag(ctx->skb, rcd, rbi); - } - /* Immediate refill */ - rbi->page = new_page; - rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page, - 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - rxd->addr = cpu_to_le64(rbi->dma_addr); - rxd->len = rbi->len; + /* Immediate refill */ + rbi->page = new_page; + rbi->dma_addr = new_dma_addr; + rxd->addr = cpu_to_le64(rbi->dma_addr); + rxd->len = rbi->len; + } } +sop_done: skb = ctx->skb; if (rcd->eop) { + u32 mtu = adapter->netdev->mtu; skb->len += skb->data_len; +#ifdef VMXNET3_RSS + if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && + (adapter->netdev->features & NETIF_F_RXHASH)) { + enum pkt_hash_types hash_type; + + switch (rcd->rssType) { + case VMXNET3_RCD_RSS_TYPE_IPV4: + case VMXNET3_RCD_RSS_TYPE_IPV6: + hash_type = PKT_HASH_TYPE_L3; + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: + case VMXNET3_RCD_RSS_TYPE_UDPIPV4: + case VMXNET3_RCD_RSS_TYPE_UDPIPV6: + hash_type = PKT_HASH_TYPE_L4; + break; + default: + hash_type = PKT_HASH_TYPE_L3; + break; + } + skb_set_hash(skb, + le32_to_cpu(rcd->rssHash), + hash_type); + } +#endif vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); - + if ((!rcd->tcp && !encap_lro) || + !(adapter->netdev->features & NETIF_F_LRO)) + goto not_lro; + + if (segCnt != 0 && mss != 0) { + skb_shinfo(skb)->gso_type = rcd->v4 ? + SKB_GSO_TCPV4 : SKB_GSO_TCPV6; + if (encap_lro) + vmxnet3_lro_tunnel(skb, skb->protocol); + skb_shinfo(skb)->gso_size = mss; + skb_shinfo(skb)->gso_segs = segCnt; + } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { + u32 hlen; + + hlen = vmxnet3_get_hdr_len(adapter, skb, + (union Vmxnet3_GenericDesc *)rcd); + if (hlen == 0) + goto not_lro; + + skb_shinfo(skb)->gso_type = + rcd->v4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6; + if (segCnt != 0) { + skb_shinfo(skb)->gso_segs = segCnt; + skb_shinfo(skb)->gso_size = + DIV_ROUND_UP(skb->len - + hlen, segCnt); + } else { + skb_shinfo(skb)->gso_size = mtu - hlen; + } + } +not_lro: if (unlikely(rcd->ts)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci); - if (adapter->netdev->features & NETIF_F_LRO) + /* Use GRO callback if UPT is enabled */ + if ((adapter->netdev->features & NETIF_F_LRO) && + !rq->shared->updateRxProd) netif_receive_skb(skb); else napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + encap_lro = false; + num_pkts++; } rcd_done: /* device may have skipped some rx descs */ - ring->next2comp = idx; - num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); ring = rq->rx_ring + ring_idx; + rbi->comp_state = VMXNET3_RXD_COMP_DONE; + + comp_offset = vmxnet3_cmd_ring_desc_avail(ring); + fill_offset = (idx > ring->next2fill ? 0 : ring->size) + + idx - ring->next2fill - 1; + if (!ring->isOutOfOrder || fill_offset >= comp_offset) + ring->next2comp = idx; + num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); + + /* Ensure that the writes to rxd->gen bits will be observed + * after all other writes to rxd objects. + */ + dma_wmb(); + while (num_to_alloc) { - vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, - &rxCmdDesc); - BUG_ON(!rxd->addr); + rbi = rq->buf_info[ring_idx] + ring->next2fill; + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_OOORX_COMP))) + goto refill_buf; + if (ring_idx == 0) { + /* ring0 Type1 buffers can get skipped; re-fill them */ + if (rbi->buf_type != VMXNET3_RX_BUF_SKB) + goto refill_buf; + } + if (rbi->comp_state == VMXNET3_RXD_COMP_DONE) { +refill_buf: + vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, + &rxCmdDesc); + WARN_ON(!rxd->addr); + + /* Recv desc is ready to be used by the device */ + rxd->gen = ring->gen; + vmxnet3_cmd_ring_adv_next2fill(ring); + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; + num_to_alloc--; + } else { + /* rx completion hasn't occurred */ + ring->isOutOfOrder = 1; + break; + } + } - /* Recv desc is ready to be used by the device */ - rxd->gen = ring->gen; - vmxnet3_cmd_ring_adv_next2fill(ring); - num_to_alloc--; + if (num_to_alloc == 0) { + ring->isOutOfOrder = 0; } /* if needed, update the register */ - if (unlikely(rq->shared->updateRxProd)) { + if (unlikely(rq->shared->updateRxProd) && (ring->next2fill & 0xf) == 0) { VMXNET3_WRITE_BAR0_REG(adapter, rxprod_reg[ring_idx] + rq->qid * 8, ring->next2fill); @@ -1330,8 +2007,10 @@ rcd_done: vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); } + if (need_flush) + xdp_do_flush(); - return num_rxd; + return num_pkts; } @@ -1342,26 +2021,38 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; + /* ring has already been cleaned up */ + if (!rq->rx_ring[0].base) + return; + for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { + struct vmxnet3_rx_buf_info *rbi; #ifdef __BIG_ENDIAN_BITFIELD struct Vmxnet3_RxDesc rxDesc; #endif + + rbi = &rq->buf_info[ring_idx][i]; vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc); if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD && - rq->buf_info[ring_idx][i].skb) { - pci_unmap_single(adapter->pdev, rxd->addr, - rxd->len, PCI_DMA_FROMDEVICE); - dev_kfree_skb(rq->buf_info[ring_idx][i].skb); - rq->buf_info[ring_idx][i].skb = NULL; + rbi->page && rbi->buf_type == VMXNET3_RX_BUF_XDP) { + page_pool_recycle_direct(rq->page_pool, + rbi->page); + rbi->page = NULL; + } else if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD && + rbi->skb) { + dma_unmap_single(&adapter->pdev->dev, rxd->addr, + rxd->len, DMA_FROM_DEVICE); + dev_kfree_skb(rbi->skb); + rbi->skb = NULL; } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY && - rq->buf_info[ring_idx][i].page) { - pci_unmap_page(adapter->pdev, rxd->addr, - rxd->len, PCI_DMA_FROMDEVICE); - put_page(rq->buf_info[ring_idx][i].page); - rq->buf_info[ring_idx][i].page = NULL; + rbi->page) { + dma_unmap_page(&adapter->pdev->dev, rxd->addr, + rxd->len, DMA_FROM_DEVICE); + put_page(rbi->page); + rbi->page = NULL; } } @@ -1372,6 +2063,11 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, rq->comp_ring.gen = VMXNET3_INIT_GEN; rq->comp_ring.next2proc = 0; + + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); + rq->page_pool = NULL; } @@ -1382,6 +2078,7 @@ vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter); + rcu_assign_pointer(adapter->xdp_bpf_prog, NULL); } @@ -1400,40 +2097,77 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, } - kfree(rq->buf_info[0]); - for (i = 0; i < 2; i++) { if (rq->rx_ring[i].base) { - pci_free_consistent(adapter->pdev, rq->rx_ring[i].size - * sizeof(struct Vmxnet3_RxDesc), - rq->rx_ring[i].base, - rq->rx_ring[i].basePA); + dma_free_coherent(&adapter->pdev->dev, + rq->rx_ring[i].size + * sizeof(struct Vmxnet3_RxDesc), + rq->rx_ring[i].base, + rq->rx_ring[i].basePA); rq->rx_ring[i].base = NULL; } - rq->buf_info[i] = NULL; + } + + if (rq->data_ring.base) { + dma_free_coherent(&adapter->pdev->dev, + rq->rx_ring[0].size * rq->data_ring.desc_size, + rq->data_ring.base, rq->data_ring.basePA); + rq->data_ring.base = NULL; + } + + if (rq->ts_ring.base) { + dma_free_coherent(&adapter->pdev->dev, + rq->rx_ring[0].size * rq->rx_ts_desc_size, + rq->ts_ring.base, rq->ts_ring.basePA); + rq->ts_ring.base = NULL; } if (rq->comp_ring.base) { - pci_free_consistent(adapter->pdev, rq->comp_ring.size * - sizeof(struct Vmxnet3_RxCompDesc), - rq->comp_ring.base, rq->comp_ring.basePA); + dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size + * sizeof(struct Vmxnet3_RxCompDesc), + rq->comp_ring.base, rq->comp_ring.basePA); rq->comp_ring.base = NULL; } + + kfree(rq->buf_info[0]); + rq->buf_info[0] = NULL; + rq->buf_info[1] = NULL; } +static void +vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; + + if (rq->data_ring.base) { + dma_free_coherent(&adapter->pdev->dev, + (rq->rx_ring[0].size * + rq->data_ring.desc_size), + rq->data_ring.base, + rq->data_ring.basePA); + rq->data_ring.base = NULL; + } + rq->data_ring.desc_size = 0; + } +} static int vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) { - int i; + int i, err; /* initialize buf_info */ for (i = 0; i < rq->rx_ring[0].size; i++) { - /* 1st buf for a pkt is skbuff */ + /* 1st buf for a pkt is skbuff or xdp page */ if (i % adapter->rx_buf_per_pkt == 0) { - rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB; + rq->buf_info[0][i].buf_type = vmxnet3_xdp_enabled(adapter) ? + VMXNET3_RX_BUF_XDP : + VMXNET3_RX_BUF_SKB; rq->buf_info[0][i].len = adapter->skb_buf_size; } else { /* subsequent bufs for a pkt is frag */ rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE; @@ -1452,14 +2186,29 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc)); rq->rx_ring[i].gen = VMXNET3_INIT_GEN; + rq->rx_ring[i].isOutOfOrder = 0; } + + err = vmxnet3_create_pp(adapter, rq, + rq->rx_ring[0].size + rq->rx_ring[1].size); + if (err) + return err; + if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1, adapter) == 0) { + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); + rq->page_pool = NULL; + /* at least has 1 rx buffer for the 1st ring */ return -ENOMEM; } vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter); + if (rq->ts_ring.base) + memset(rq->ts_ring.base, 0, + rq->rx_ring[0].size * rq->rx_ts_desc_size); + /* reset the comp ring */ rq->comp_ring.next2proc = 0; memset(rq->comp_ring.base, 0, rq->comp_ring.size * @@ -1503,8 +2252,10 @@ vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) for (i = 0; i < 2; i++) { sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc); - rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz, - &rq->rx_ring[i].basePA); + rq->rx_ring[i].base = dma_alloc_coherent( + &adapter->pdev->dev, sz, + &rq->rx_ring[i].basePA, + GFP_KERNEL); if (!rq->rx_ring[i].base) { netdev_err(adapter->netdev, "failed to allocate rx ring %d\n", i); @@ -1512,17 +2263,49 @@ vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) } } + if ((adapter->rxdataring_enabled) && (rq->data_ring.desc_size != 0)) { + sz = rq->rx_ring[0].size * rq->data_ring.desc_size; + rq->data_ring.base = + dma_alloc_coherent(&adapter->pdev->dev, sz, + &rq->data_ring.basePA, + GFP_KERNEL); + if (!rq->data_ring.base) { + netdev_err(adapter->netdev, + "rx data ring will be disabled\n"); + adapter->rxdataring_enabled = false; + } + } else { + rq->data_ring.base = NULL; + rq->data_ring.desc_size = 0; + } + + if (rq->rx_ts_desc_size != 0) { + sz = rq->rx_ring[0].size * rq->rx_ts_desc_size; + rq->ts_ring.base = + dma_alloc_coherent(&adapter->pdev->dev, sz, + &rq->ts_ring.basePA, + GFP_KERNEL); + if (!rq->ts_ring.base) { + netdev_err(adapter->netdev, + "rx ts ring will be disabled\n"); + rq->rx_ts_desc_size = 0; + } + } else { + rq->ts_ring.base = NULL; + } + sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc); - rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz, - &rq->comp_ring.basePA); + rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz, + &rq->comp_ring.basePA, + GFP_KERNEL); if (!rq->comp_ring.base) { netdev_err(adapter->netdev, "failed to allocate rx comp ring\n"); goto err; } - sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size + - rq->rx_ring[1].size); - bi = kzalloc(sz, GFP_KERNEL); + bi = kcalloc_node(rq->rx_ring[0].size + rq->rx_ring[1].size, + sizeof(rq->buf_info[0][0]), GFP_KERNEL, + dev_to_node(&adapter->pdev->dev)); if (!bi) goto err; @@ -1537,11 +2320,13 @@ err: } -static int +int vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter) { int i, err = 0; + adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) { err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter); if (unlikely(err)) { @@ -1551,6 +2336,10 @@ vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter) goto err_out; } } + + if (!adapter->rxdataring_enabled) + vmxnet3_rq_destroy_all_rxdataring(adapter); + return err; err_out: vmxnet3_rq_destroy_all(adapter); @@ -1586,7 +2375,7 @@ vmxnet3_poll(struct napi_struct *napi, int budget) rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget); if (rxd_done < budget) { - napi_complete(napi); + napi_complete_done(napi, rxd_done); vmxnet3_enable_all_intrs(rx_queue->adapter); } return rxd_done; @@ -1617,7 +2406,7 @@ vmxnet3_poll_rx_only(struct napi_struct *napi, int budget) rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget); if (rxd_done < budget) { - napi_complete(napi); + napi_complete_done(napi, rxd_done); vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx); } return rxd_done; @@ -1741,11 +2530,20 @@ vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE) - vmxnet3_disable_all_intrs(adapter); - - vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size); - vmxnet3_enable_all_intrs(adapter); + switch (adapter->intr.type) { +#ifdef CONFIG_PCI_MSI + case VMXNET3_IT_MSIX: { + int i; + for (i = 0; i < adapter->num_rx_queues; i++) + vmxnet3_msix_rx(0, &adapter->rx_queue[i]); + break; + } +#endif + case VMXNET3_IT_MSI: + default: + vmxnet3_intr(0, adapter->netdev); + break; + } } #endif /* CONFIG_NET_POLL_CONTROLLER */ @@ -1849,10 +2647,9 @@ vmxnet3_request_irqs(struct vmxnet3_adapter *adapter) struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; rq->qid = i; rq->qid2 = i + adapter->num_rx_queues; + rq->dataRingQid = i + 2 * adapter->num_rx_queues; } - - /* init our intr settings */ for (i = 0; i < intr->num_intrs; i++) intr->mod_levels[i] = UPT1_IML_ADAPTIVE; @@ -2005,6 +2802,8 @@ vmxnet3_set_mc(struct net_device *netdev) struct Vmxnet3_RxFilterConf *rxConf = &adapter->shared->devRead.rxFilterConf; u8 *new_table = NULL; + dma_addr_t new_table_pa = 0; + bool new_table_pa_valid = false; u32 new_mode = VMXNET3_RXM_UCAST; if (netdev->flags & IFF_PROMISC) { @@ -2025,19 +2824,29 @@ vmxnet3_set_mc(struct net_device *netdev) if (!netdev_mc_empty(netdev)) { new_table = vmxnet3_copy_mc(netdev); if (new_table) { - new_mode |= VMXNET3_RXM_MCAST; - rxConf->mfTableLen = cpu_to_le16( - netdev_mc_count(netdev) * ETH_ALEN); - rxConf->mfTablePA = cpu_to_le64(virt_to_phys( - new_table)); - } else { - netdev_info(netdev, "failed to copy mcast list" - ", setting ALL_MULTI\n"); + size_t sz = netdev_mc_count(netdev) * ETH_ALEN; + + rxConf->mfTableLen = cpu_to_le16(sz); + new_table_pa = dma_map_single( + &adapter->pdev->dev, + new_table, + sz, + DMA_TO_DEVICE); + if (!dma_mapping_error(&adapter->pdev->dev, + new_table_pa)) { + new_mode |= VMXNET3_RXM_MCAST; + new_table_pa_valid = true; + rxConf->mfTablePA = cpu_to_le64( + new_table_pa); + } + } + if (!new_table_pa_valid) { + netdev_info(netdev, + "failed to copy mcast list, setting ALL_MULTI\n"); new_mode |= VMXNET3_RXM_ALL_MULTI; } } - if (!(new_mode & VMXNET3_RXM_MCAST)) { rxConf->mfTableLen = 0; rxConf->mfTablePA = 0; @@ -2056,6 +2865,9 @@ vmxnet3_set_mc(struct net_device *netdev) VMXNET3_CMD_UPDATE_MAC_FILTERS); spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (new_table_pa_valid) + dma_unmap_single(&adapter->pdev->dev, new_table_pa, + rxConf->mfTableLen, DMA_TO_DEVICE); kfree(new_table); } @@ -2078,8 +2890,11 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) { struct Vmxnet3_DriverShared *shared = adapter->shared; struct Vmxnet3_DSDevRead *devRead = &shared->devRead; + struct Vmxnet3_DSDevReadExt *devReadExt = &shared->devReadExt; struct Vmxnet3_TxQueueConf *tqc; struct Vmxnet3_RxQueueConf *rqc; + struct Vmxnet3_TxQueueTSConf *tqtsc; + struct Vmxnet3_RxQueueTSConf *rqtsc; int i; memset(shared, 0, sizeof(*shared)); @@ -2096,7 +2911,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1); devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1); - devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter)); + devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa); devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter)); /* set up feature flags */ @@ -2110,6 +2925,10 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) devRead->misc.uptFeatures |= UPT1_F_RXVLAN; + if (adapter->netdev->features & (NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM)) + devRead->misc.uptFeatures |= UPT1_F_RXINNEROFLD; + devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu); devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa); devRead->misc.queueDescLen = cpu_to_le32( @@ -2125,14 +2944,18 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) tqc->txRingBasePA = cpu_to_le64(tq->tx_ring.basePA); tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA); tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA); - tqc->ddPA = cpu_to_le64(virt_to_phys(tq->buf_info)); + tqc->ddPA = cpu_to_le64(~0ULL); tqc->txRingSize = cpu_to_le32(tq->tx_ring.size); tqc->dataRingSize = cpu_to_le32(tq->data_ring.size); + tqc->txDataRingDescSize = cpu_to_le32(tq->txdata_desc_size); tqc->compRingSize = cpu_to_le32(tq->comp_ring.size); - tqc->ddLen = cpu_to_le32( - sizeof(struct vmxnet3_tx_buf_info) * - tqc->txRingSize); + tqc->ddLen = cpu_to_le32(0); tqc->intrIdx = tq->comp_ring.intr_idx; + if (VMXNET3_VERSION_GE_9(adapter)) { + tqtsc = &adapter->tqd_start[i].tsConf; + tqtsc->txTSRingBasePA = cpu_to_le64(tq->ts_ring.basePA); + tqtsc->txTSRingDescSize = cpu_to_le16(tq->tx_ts_desc_size); + } } /* rx queue settings */ @@ -2143,16 +2966,23 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA); rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA); rqc->compRingBasePA = cpu_to_le64(rq->comp_ring.basePA); - rqc->ddPA = cpu_to_le64(virt_to_phys( - rq->buf_info)); + rqc->ddPA = cpu_to_le64(~0ULL); rqc->rxRingSize[0] = cpu_to_le32(rq->rx_ring[0].size); rqc->rxRingSize[1] = cpu_to_le32(rq->rx_ring[1].size); rqc->compRingSize = cpu_to_le32(rq->comp_ring.size); - rqc->ddLen = cpu_to_le32( - sizeof(struct vmxnet3_rx_buf_info) * - (rqc->rxRingSize[0] + - rqc->rxRingSize[1])); + rqc->ddLen = cpu_to_le32(0); rqc->intrIdx = rq->comp_ring.intr_idx; + if (VMXNET3_VERSION_GE_3(adapter)) { + rqc->rxDataRingBasePA = + cpu_to_le64(rq->data_ring.basePA); + rqc->rxDataRingDescSize = + cpu_to_le16(rq->data_ring.desc_size); + } + if (VMXNET3_VERSION_GE_9(adapter)) { + rqtsc = &adapter->rqd_start[i].tsConf; + rqtsc->rxTSRingBasePA = cpu_to_le64(rq->ts_ring.basePA); + rqtsc->rxTSRingDescSize = cpu_to_le16(rq->rx_ts_desc_size); + } } #ifdef VMXNET3_RSS @@ -2160,13 +2990,6 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) if (adapter->rss) { struct UPT1_RSSConf *rssConf = adapter->rss_conf; - static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = { - 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac, - 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28, - 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70, - 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3, - 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9, - }; devRead->misc.uptFeatures |= UPT1_F_RSS; devRead->misc.numRxQueues = adapter->num_rx_queues; @@ -2177,28 +3000,41 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ; rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE; rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE; - memcpy(rssConf->hashKey, rss_key, sizeof(rss_key)); + netdev_rss_key_fill(rssConf->hashKey, sizeof(rssConf->hashKey)); for (i = 0; i < rssConf->indTableSize; i++) rssConf->indTable[i] = ethtool_rxfh_indir_default( i, adapter->num_rx_queues); devRead->rssConfDesc.confVer = 1; - devRead->rssConfDesc.confLen = sizeof(*rssConf); - devRead->rssConfDesc.confPA = virt_to_phys(rssConf); + devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf)); + devRead->rssConfDesc.confPA = + cpu_to_le64(adapter->rss_conf_pa); } #endif /* VMXNET3_RSS */ /* intr settings */ - devRead->intrConf.autoMask = adapter->intr.mask_mode == - VMXNET3_IMM_AUTO; - devRead->intrConf.numIntrs = adapter->intr.num_intrs; - for (i = 0; i < adapter->intr.num_intrs; i++) - devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i]; - - devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx; - devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + devRead->intrConf.autoMask = adapter->intr.mask_mode == + VMXNET3_IMM_AUTO; + devRead->intrConf.numIntrs = adapter->intr.num_intrs; + for (i = 0; i < adapter->intr.num_intrs; i++) + devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i]; + + devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx; + devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } else { + devReadExt->intrConfExt.autoMask = adapter->intr.mask_mode == + VMXNET3_IMM_AUTO; + devReadExt->intrConfExt.numIntrs = adapter->intr.num_intrs; + for (i = 0; i < adapter->intr.num_intrs; i++) + devReadExt->intrConfExt.modLevels[i] = adapter->intr.mod_levels[i]; + + devReadExt->intrConfExt.eventIntrIdx = adapter->intr.event_intr_idx; + devReadExt->intrConfExt.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } /* rx filter settings */ devRead->rxFilterConf.rxMode = 0; @@ -2208,6 +3044,112 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) /* the rest are already zeroed */ } +static void +vmxnet3_init_bufsize(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + if (!VMXNET3_VERSION_GE_7(adapter)) + return; + + cmdInfo->ringBufSize = adapter->ringBufSize; + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RING_BUFFER_SIZE); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); +} + +static void +vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + if (!VMXNET3_VERSION_GE_3(adapter)) + return; + + spin_lock_irqsave(&adapter->cmd_lock, flags); + cmdInfo->varConf.confVer = 1; + cmdInfo->varConf.confLen = + cpu_to_le32(sizeof(*adapter->coal_conf)); + cmdInfo->varConf.confPA = cpu_to_le64(adapter->coal_conf_pa); + + if (adapter->default_coal_mode) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_COALESCE); + } else { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_COALESCE); + } + + spin_unlock_irqrestore(&adapter->cmd_lock, flags); +} + +static void +vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + if (!VMXNET3_VERSION_GE_4(adapter)) + return; + + spin_lock_irqsave(&adapter->cmd_lock, flags); + + if (adapter->default_rss_fields) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_RSS_FIELDS); + adapter->rss_fields = + VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } else { + if (VMXNET3_VERSION_GE_7(adapter)) { + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP4 || + adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_UDP_RSS)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_UDP_RSS; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_UDP_RSS); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV4)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV4; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV4); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV6)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV6; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV6); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } + cmdInfo->setRssFields = adapter->rss_fields; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RSS_FIELDS); + /* Not all requested RSS may get applied, so get and + * cache what was actually applied. + */ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_RSS_FIELDS); + adapter->rss_fields = + VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } + + spin_unlock_irqrestore(&adapter->cmd_lock, flags); +} int vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) @@ -2257,11 +3199,15 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) goto activate_err; } + vmxnet3_init_bufsize(adapter); + vmxnet3_init_coalesce(adapter); + vmxnet3_init_rssfields(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) { VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN, + adapter->rx_prod_offset + i * VMXNET3_REG_ALIGN, adapter->rx_queue[i].rx_ring[0].next2fill); - VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 + + VMXNET3_WRITE_BAR0_REG(adapter, (adapter->rx_prod2_offset + (i * VMXNET3_REG_ALIGN)), adapter->rx_queue[i].rx_ring[1].next2fill); } @@ -2274,6 +3220,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) * tx queue if the link is up. */ vmxnet3_check_link(adapter, true); + netif_tx_wake_all_queues(adapter->netdev); for (i = 0; i < adapter->num_rx_queues; i++) napi_enable(&adapter->rx_queue[i].napi); vmxnet3_enable_all_intrs(adapter); @@ -2331,7 +3278,7 @@ vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter) static void -vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac) +vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, const u8 *mac) { u32 tmp; @@ -2349,7 +3296,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p) struct sockaddr *addr = p; struct vmxnet3_adapter *adapter = netdev_priv(netdev); - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + dev_addr_set(netdev, addr->sa_data); vmxnet3_write_mac_addr(adapter, addr->sa_data); return 0; @@ -2359,7 +3306,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p) /* ==================== initialization and cleanup routines ============ */ static int -vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) +vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter) { int err; unsigned long mmio_start, mmio_len; @@ -2371,30 +3318,12 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) return err; } - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { - dev_err(&pdev->dev, - "pci_set_consistent_dma_mask failed\n"); - err = -EIO; - goto err_set_mask; - } - *dma64 = true; - } else { - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { - dev_err(&pdev->dev, - "pci_set_dma_mask failed\n"); - err = -EIO; - goto err_set_mask; - } - *dma64 = false; - } - err = pci_request_selected_regions(pdev, (1 << 2) - 1, vmxnet3_driver_name); if (err) { dev_err(&pdev->dev, "Failed to request region for adapter: error %d\n", err); - goto err_set_mask; + goto err_enable_device; } pci_set_master(pdev); @@ -2422,7 +3351,7 @@ err_bar1: iounmap(adapter->hw_addr0); err_ioremap: pci_release_selected_regions(pdev, (1 << 2) - 1); -err_set_mask: +err_enable_device: pci_disable_device(pdev); return err; } @@ -2440,26 +3369,33 @@ vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter) } -static void +void vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) { size_t sz, i, ring0_size, ring1_size, comp_size; - struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0]; - - - if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - - VMXNET3_MAX_ETH_HDR_SIZE) { - adapter->skb_buf_size = adapter->netdev->mtu + - VMXNET3_MAX_ETH_HDR_SIZE; - if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) - adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; - - adapter->rx_buf_per_pkt = 1; + /* With version7 ring1 will have only T0 buffers */ + if (!VMXNET3_VERSION_GE_7(adapter)) { + if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - + VMXNET3_MAX_ETH_HDR_SIZE) { + adapter->skb_buf_size = adapter->netdev->mtu + + VMXNET3_MAX_ETH_HDR_SIZE; + if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) + adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; + + adapter->rx_buf_per_pkt = 1; + } else { + adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; + sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + + VMXNET3_MAX_ETH_HDR_SIZE; + adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + } } else { - adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; - sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + - VMXNET3_MAX_ETH_HDR_SIZE; - adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + adapter->skb_buf_size = min((int)adapter->netdev->mtu + VMXNET3_MAX_ETH_HDR_SIZE, + VMXNET3_MAX_SKB_BUF_SIZE); + adapter->rx_buf_per_pkt = 1; + adapter->ringBufSize.ring1BufSizeType0 = cpu_to_le16(adapter->skb_buf_size); + adapter->ringBufSize.ring1BufSizeType1 = 0; + adapter->ringBufSize.ring2BufSizeType1 = cpu_to_le16(PAGE_SIZE); } /* @@ -2472,10 +3408,19 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE / sz * sz); ring1_size = adapter->rx_queue[0].rx_ring[1].size; + ring1_size = (ring1_size + sz - 1) / sz * sz; + ring1_size = min_t(u32, ring1_size, VMXNET3_RX_RING2_MAX_SIZE / + sz * sz); + /* For v7 and later, keep ring size power of 2 for UPT */ + if (VMXNET3_VERSION_GE_7(adapter)) { + ring0_size = rounddown_pow_of_two(ring0_size); + ring1_size = rounddown_pow_of_two(ring1_size); + } comp_size = ring0_size + ring1_size; for (i = 0; i < adapter->num_rx_queues; i++) { - rq = &adapter->rx_queue[i]; + struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; + rq->rx_ring[0].size = ring0_size; rq->rx_ring[1].size = ring1_size; rq->comp_ring.size = comp_size; @@ -2485,7 +3430,8 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) int vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, - u32 rx_ring_size, u32 rx_ring2_size) + u32 rx_ring_size, u32 rx_ring2_size, + u16 txdata_desc_size, u16 rxdata_desc_size) { int err = 0, i; @@ -2494,10 +3440,13 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, tq->tx_ring.size = tx_ring_size; tq->data_ring.size = tx_ring_size; tq->comp_ring.size = tx_ring_size; + tq->txdata_desc_size = txdata_desc_size; tq->shared = &adapter->tqd_start[i].ctrl; tq->stopped = true; tq->adapter = adapter; tq->qid = i; + tq->tx_ts_desc_size = adapter->tx_ts_desc_size; + tq->tsPktCount = 1; err = vmxnet3_tq_create(tq, adapter); /* * Too late to change num_tx_queues. We cannot do away with @@ -2510,12 +3459,16 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, adapter->rx_queue[0].rx_ring[0].size = rx_ring_size; adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size; vmxnet3_adjust_rx_ring_size(adapter); + + adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter); for (i = 0; i < adapter->num_rx_queues; i++) { struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; /* qid and qid2 for rx queues will be assigned later when num * of rx queues is finalized after allocating intrs */ rq->shared = &adapter->rqd_start[i].ctrl; rq->adapter = adapter; + rq->data_ring.desc_size = rxdata_desc_size; + rq->rx_ts_desc_size = adapter->rx_ts_desc_size; err = vmxnet3_rq_create(rq, adapter); if (err) { if (i == 0) { @@ -2533,6 +3486,10 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, } } } + + if (!adapter->rxdataring_enabled) + vmxnet3_rq_destroy_all_rxdataring(adapter); + return err; queue_err: vmxnet3_tq_destroy_all(adapter); @@ -2550,9 +3507,66 @@ vmxnet3_open(struct net_device *netdev) for (i = 0; i < adapter->num_tx_queues; i++) spin_lock_init(&adapter->tx_queue[i].tx_lock); - err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE, - VMXNET3_DEF_RX_RING_SIZE, - VMXNET3_DEF_RX_RING_SIZE); + if (VMXNET3_VERSION_GE_3(adapter)) { + unsigned long flags; + u16 txdata_desc_size; + u32 ret; + + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_TXDATA_DESC_SIZE); + ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + txdata_desc_size = ret & 0xffff; + if ((txdata_desc_size < VMXNET3_TXDATA_DESC_MIN_SIZE) || + (txdata_desc_size > VMXNET3_TXDATA_DESC_MAX_SIZE) || + (txdata_desc_size & VMXNET3_TXDATA_DESC_SIZE_MASK)) { + adapter->txdata_desc_size = + sizeof(struct Vmxnet3_TxDataDesc); + } else { + adapter->txdata_desc_size = txdata_desc_size; + } + if (VMXNET3_VERSION_GE_9(adapter)) + adapter->rxdata_desc_size = (ret >> 16) & 0xffff; + } else { + adapter->txdata_desc_size = sizeof(struct Vmxnet3_TxDataDesc); + } + + if (VMXNET3_VERSION_GE_9(adapter)) { + unsigned long flags; + u16 tx_ts_desc_size = 0; + u16 rx_ts_desc_size = 0; + u32 ret; + + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_TSRING_DESC_SIZE); + ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (ret > 0) { + tx_ts_desc_size = (ret & 0xff); + rx_ts_desc_size = ((ret >> 16) & 0xff); + } + if (tx_ts_desc_size > VMXNET3_TXTS_DESC_MAX_SIZE || + tx_ts_desc_size & VMXNET3_TXTS_DESC_SIZE_MASK) + tx_ts_desc_size = 0; + if (rx_ts_desc_size > VMXNET3_RXTS_DESC_MAX_SIZE || + rx_ts_desc_size & VMXNET3_RXTS_DESC_SIZE_MASK) + rx_ts_desc_size = 0; + adapter->tx_ts_desc_size = tx_ts_desc_size; + adapter->rx_ts_desc_size = rx_ts_desc_size; + } else { + adapter->tx_ts_desc_size = 0; + adapter->rx_ts_desc_size = 0; + } + + err = vmxnet3_create_queues(adapter, + adapter->tx_ring_size, + adapter->rx_ring_size, + adapter->rx_ring2_size, + adapter->txdata_desc_size, + adapter->rxdata_desc_size); if (err) goto queue_err; @@ -2580,7 +3594,7 @@ vmxnet3_close(struct net_device *netdev) * completion. */ while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) - msleep(1); + usleep_range(1000, 2000); vmxnet3_quiesce_dev(adapter); @@ -2608,6 +3622,11 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter) /* we need to enable NAPI, otherwise dev_close will deadlock */ for (i = 0; i < adapter->num_rx_queues; i++) napi_enable(&adapter->rx_queue[i].napi); + /* + * Need to clear the quiesce bit to ensure that vmxnet3_close + * can quiesce the device properly + */ + clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); dev_close(adapter->netdev); } @@ -2618,17 +3637,12 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) struct vmxnet3_adapter *adapter = netdev_priv(netdev); int err = 0; - if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU) - return -EINVAL; - - netdev->mtu = new_mtu; - /* * Reset_work may be in the middle of resetting the device, wait for its * completion. */ while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) - msleep(1); + usleep_range(1000, 2000); if (netif_running(netdev)) { vmxnet3_quiesce_dev(adapter); @@ -2636,6 +3650,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) /* we need to re-create the rx queue based on the new mtu */ vmxnet3_rq_destroy_all(adapter); + WRITE_ONCE(netdev->mtu, new_mtu); vmxnet3_adjust_rx_ring_size(adapter); err = vmxnet3_rq_create_all(adapter); if (err) { @@ -2652,6 +3667,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) "Closing it\n", err); goto out; } + } else { + WRITE_ONCE(netdev->mtu, new_mtu); } out: @@ -2664,16 +3681,93 @@ out: static void -vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) +vmxnet3_declare_features(struct vmxnet3_adapter *adapter) { struct net_device *netdev = adapter->netdev; + unsigned long flags; + + if (VMXNET3_VERSION_GE_9(adapter)) { + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_DISABLED_OFFLOADS); + adapter->disabledOffloads = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_LRO; - if (dma64) - netdev->hw_features |= NETIF_F_HIGHDMA; + NETIF_F_LRO | NETIF_F_HIGHDMA; + + if (VMXNET3_VERSION_GE_4(adapter)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->hw_enc_features = NETIF_F_SG | NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + + if (adapter->disabledOffloads & VMXNET3_OFFLOAD_TSO) { + netdev->hw_features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + netdev->hw_enc_features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + } + + if (adapter->disabledOffloads & VMXNET3_OFFLOAD_LRO) { + netdev->hw_features &= ~(NETIF_F_LRO); + netdev->hw_enc_features &= ~(NETIF_F_LRO); + } + + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD; + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_TSO)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_TSO))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; + } + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + } + netdev->vlan_features = netdev->hw_features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); @@ -2699,47 +3793,35 @@ vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac) /* * Enable MSIx vectors. * Returns : - * 0 on successful enabling of required vectors, * VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required - * could be enabled. - * number of vectors which can be enabled otherwise (this number is smaller + * were enabled. + * number of vectors which were enabled otherwise (this number is greater * than VMXNET3_LINUX_MIN_MSIX_VECT) */ static int -vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, - int vectors) -{ - int err = 0, vector_threshold; - vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT; - - while (vectors >= vector_threshold) { - err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries, - vectors); - if (!err) { - adapter->intr.num_intrs = vectors; - return 0; - } else if (err < 0) { - dev_err(&adapter->netdev->dev, - "Failed to enable MSI-X, error: %d\n", err); - vectors = 0; - } else if (err < vector_threshold) { - break; - } else { - /* If fails to enable required number of MSI-x vectors - * try enabling minimum number of vectors required. - */ - dev_err(&adapter->netdev->dev, - "Failed to enable %d MSI-X, trying %d instead\n", - vectors, vector_threshold); - vectors = vector_threshold; - } +vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec) +{ + int ret = pci_enable_msix_range(adapter->pdev, + adapter->intr.msix_entries, nvec, nvec); + + if (ret == -ENOSPC && nvec > VMXNET3_LINUX_MIN_MSIX_VECT) { + dev_err(&adapter->netdev->dev, + "Failed to enable %d MSI-X, trying %d\n", + nvec, VMXNET3_LINUX_MIN_MSIX_VECT); + + ret = pci_enable_msix_range(adapter->pdev, + adapter->intr.msix_entries, + VMXNET3_LINUX_MIN_MSIX_VECT, + VMXNET3_LINUX_MIN_MSIX_VECT); } - dev_info(&adapter->pdev->dev, - "Number of MSI-X interrupts which can be allocated " - "is lower than min threshold required.\n"); - return err; + if (ret < 0) { + dev_err(&adapter->netdev->dev, + "Failed to enable MSI-X, error: %d\n", ret); + } + + return ret; } @@ -2766,56 +3848,51 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) #ifdef CONFIG_PCI_MSI if (adapter->intr.type == VMXNET3_IT_MSIX) { - int vector, err = 0; - - adapter->intr.num_intrs = (adapter->share_intr == - VMXNET3_INTR_TXSHARE) ? 1 : - adapter->num_tx_queues; - adapter->intr.num_intrs += (adapter->share_intr == - VMXNET3_INTR_BUDDYSHARE) ? 0 : - adapter->num_rx_queues; - adapter->intr.num_intrs += 1; /* for link event */ - - adapter->intr.num_intrs = (adapter->intr.num_intrs > - VMXNET3_LINUX_MIN_MSIX_VECT - ? adapter->intr.num_intrs : - VMXNET3_LINUX_MIN_MSIX_VECT); - - for (vector = 0; vector < adapter->intr.num_intrs; vector++) - adapter->intr.msix_entries[vector].entry = vector; - - err = vmxnet3_acquire_msix_vectors(adapter, - adapter->intr.num_intrs); + int i, nvec, nvec_allocated; + + nvec = adapter->share_intr == VMXNET3_INTR_TXSHARE ? + 1 : adapter->num_tx_queues; + nvec += adapter->share_intr == VMXNET3_INTR_BUDDYSHARE ? + 0 : adapter->num_rx_queues; + nvec += 1; /* for link event */ + nvec = nvec > VMXNET3_LINUX_MIN_MSIX_VECT ? + nvec : VMXNET3_LINUX_MIN_MSIX_VECT; + + for (i = 0; i < nvec; i++) + adapter->intr.msix_entries[i].entry = i; + + nvec_allocated = vmxnet3_acquire_msix_vectors(adapter, nvec); + if (nvec_allocated < 0) + goto msix_err; + /* If we cannot allocate one MSIx vector per queue * then limit the number of rx queues to 1 */ - if (err == VMXNET3_LINUX_MIN_MSIX_VECT) { + if (nvec_allocated == VMXNET3_LINUX_MIN_MSIX_VECT && + nvec != VMXNET3_LINUX_MIN_MSIX_VECT) { if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE || adapter->num_rx_queues != 1) { adapter->share_intr = VMXNET3_INTR_TXSHARE; netdev_err(adapter->netdev, "Number of rx queues : 1\n"); adapter->num_rx_queues = 1; - adapter->intr.num_intrs = - VMXNET3_LINUX_MIN_MSIX_VECT; } - return; } - if (!err) - return; + adapter->intr.num_intrs = nvec_allocated; + return; + +msix_err: /* If we cannot allocate MSIx vectors use only one rx queue */ dev_info(&adapter->pdev->dev, "Failed to enable MSI-X, error %d. " - "Limiting #rx queues to 1, try MSI.\n", err); + "Limiting #rx queues to 1, try MSI.\n", nvec_allocated); adapter->intr.type = VMXNET3_IT_MSI; } if (adapter->intr.type == VMXNET3_IT_MSI) { - int err; - err = pci_enable_msi(adapter->pdev); - if (!err) { + if (!pci_enable_msi(adapter->pdev)) { adapter->num_rx_queues = 1; adapter->intr.num_intrs = 1; return; @@ -2846,14 +3923,13 @@ vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter) static void -vmxnet3_tx_timeout(struct net_device *netdev) +vmxnet3_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; netdev_err(adapter->netdev, "tx hang\n"); schedule_work(&adapter->work); - netif_wake_queue(adapter->netdev); } @@ -2880,6 +3956,7 @@ vmxnet3_reset_work(struct work_struct *data) } rtnl_unlock(); + netif_wake_queue(adapter->netdev); clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); } @@ -2894,7 +3971,9 @@ vmxnet3_probe_device(struct pci_dev *pdev, .ndo_start_xmit = vmxnet3_xmit_frame, .ndo_set_mac_address = vmxnet3_set_mac_addr, .ndo_change_mtu = vmxnet3_change_mtu, + .ndo_fix_features = vmxnet3_fix_features, .ndo_set_features = vmxnet3_set_features, + .ndo_features_check = vmxnet3_features_check, .ndo_get_stats64 = vmxnet3_get_stats64, .ndo_tx_timeout = vmxnet3_tx_timeout, .ndo_set_rx_mode = vmxnet3_set_mc, @@ -2903,16 +3982,19 @@ vmxnet3_probe_device(struct pci_dev *pdev, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = vmxnet3_netpoll, #endif + .ndo_bpf = vmxnet3_xdp, + .ndo_xdp_xmit = vmxnet3_xdp_xmit, }; int err; - bool dma64 = false; /* stupid gcc */ u32 ver; struct net_device *netdev; struct vmxnet3_adapter *adapter; u8 mac[ETH_ALEN]; - int size; + int size, i; int num_tx_queues; int num_rx_queues; + int queues; + unsigned long flags; if (!pci_msi_enabled()) enable_mq = 0; @@ -2924,7 +4006,6 @@ vmxnet3_probe_device(struct pci_dev *pdev, else #endif num_rx_queues = 1; - num_rx_queues = rounddown_pow_of_two(num_rx_queues); if (enable_mq) num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES, @@ -2932,13 +4013,8 @@ vmxnet3_probe_device(struct pci_dev *pdev, else num_tx_queues = 1; - num_tx_queues = rounddown_pow_of_two(num_tx_queues); netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter), max(num_tx_queues, num_rx_queues)); - dev_info(&pdev->dev, - "# of Tx queues : %d, # of Rx queues : %d\n", - num_tx_queues, num_rx_queues); - if (!netdev) return -ENOMEM; @@ -2947,34 +4023,154 @@ vmxnet3_probe_device(struct pci_dev *pdev, adapter->netdev = netdev; adapter->pdev = pdev; + adapter->tx_ring_size = VMXNET3_DEF_TX_RING_SIZE; + adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE; + adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "dma_set_mask failed\n"); + goto err_set_mask; + } + spin_lock_init(&adapter->cmd_lock); - adapter->shared = pci_alloc_consistent(adapter->pdev, - sizeof(struct Vmxnet3_DriverShared), - &adapter->shared_pa); + adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter, + sizeof(struct vmxnet3_adapter), + DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) { + dev_err(&pdev->dev, "Failed to map dma\n"); + err = -EFAULT; + goto err_set_mask; + } + adapter->shared = dma_alloc_coherent( + &adapter->pdev->dev, + sizeof(struct Vmxnet3_DriverShared), + &adapter->shared_pa, GFP_KERNEL); if (!adapter->shared) { dev_err(&pdev->dev, "Failed to allocate memory\n"); err = -ENOMEM; goto err_alloc_shared; } - adapter->num_rx_queues = num_rx_queues; - adapter->num_tx_queues = num_tx_queues; + err = vmxnet3_alloc_pci_resources(adapter); + if (err < 0) + goto err_alloc_pci; + + ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); + for (i = VMXNET3_REV_9; i >= VMXNET3_REV_1; i--) { + if (ver & (1 << i)) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1 << i); + adapter->version = i + 1; + break; + } + } + if (i < VMXNET3_REV_1) { + dev_err(&pdev->dev, + "Incompatible h/w version (0x%x) for adapter\n", ver); + err = -EBUSY; + goto err_ver; + } + dev_dbg(&pdev->dev, "Using device version %d\n", adapter->version); + + ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS); + if (ver & 1) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1); + } else { + dev_err(&pdev->dev, + "Incompatible upt version (0x%x) for adapter\n", ver); + err = -EBUSY; + goto err_ver; + } + + if (VMXNET3_VERSION_GE_7(adapter)) { + adapter->devcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_DCR); + adapter->ptcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_PTCR); + if (adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->dev_caps[0] = adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_LARGE_BAR); + } + if (!(adapter->ptcap_supported[0] & (1UL << VMXNET3_DCR_ERROR)) && + adapter->ptcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP) && + adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP)) { + adapter->dev_caps[0] |= adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_OOORX_COMP); + } + if (adapter->dev_caps[0]) + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } + + if (VMXNET3_VERSION_GE_7(adapter) && + adapter->dev_caps[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->tx_prod_offset = VMXNET3_REG_LB_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_LB_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_LB_RXPROD2; + } else { + adapter->tx_prod_offset = VMXNET3_REG_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_RXPROD2; + } + + if (VMXNET3_VERSION_GE_6(adapter)) { + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_MAX_QUEUES_CONF); + queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (queues > 0) { + adapter->num_rx_queues = min(num_rx_queues, ((queues >> 8) & 0xff)); + adapter->num_tx_queues = min(num_tx_queues, (queues & 0xff)); + } else { + adapter->num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + adapter->num_tx_queues = min(num_tx_queues, + VMXNET3_DEVICE_DEFAULT_TX_QUEUES); + } + if (adapter->num_rx_queues > VMXNET3_MAX_RX_QUEUES || + adapter->num_tx_queues > VMXNET3_MAX_TX_QUEUES) { + adapter->queuesExtEnabled = true; + } else { + adapter->queuesExtEnabled = false; + } + } else { + adapter->queuesExtEnabled = false; + num_rx_queues = rounddown_pow_of_two(num_rx_queues); + num_tx_queues = rounddown_pow_of_two(num_tx_queues); + adapter->num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + adapter->num_tx_queues = min(num_tx_queues, + VMXNET3_DEVICE_DEFAULT_TX_QUEUES); + } + dev_info(&pdev->dev, + "# of Tx queues : %d, # of Rx queues : %d\n", + adapter->num_tx_queues, adapter->num_rx_queues); + adapter->rx_buf_per_pkt = 1; size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues; size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues; - adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size, - &adapter->queue_desc_pa); + adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size, + &adapter->queue_desc_pa, + GFP_KERNEL); if (!adapter->tqd_start) { dev_err(&pdev->dev, "Failed to allocate memory\n"); err = -ENOMEM; - goto err_alloc_queue_desc; + goto err_ver; } adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start + adapter->num_tx_queues); + if (VMXNET3_VERSION_GE_9(adapter)) + adapter->latencyConf = &adapter->tqd_start->tsConf.latencyConf; - adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL); + adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_PMConf), + &adapter->pm_conf_pa, + GFP_KERNEL); if (adapter->pm_conf == NULL) { err = -ENOMEM; goto err_alloc_pm; @@ -2982,39 +4178,43 @@ vmxnet3_probe_device(struct pci_dev *pdev, #ifdef VMXNET3_RSS - adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL); + adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev, + sizeof(struct UPT1_RSSConf), + &adapter->rss_conf_pa, + GFP_KERNEL); if (adapter->rss_conf == NULL) { err = -ENOMEM; goto err_alloc_rss; } #endif /* VMXNET3_RSS */ - err = vmxnet3_alloc_pci_resources(adapter, &dma64); - if (err < 0) - goto err_alloc_pci; - - ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); - if (ver & 1) { - VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1); - } else { - dev_err(&pdev->dev, - "Incompatible h/w version (0x%x) for adapter\n", ver); - err = -EBUSY; - goto err_ver; + if (VMXNET3_VERSION_GE_3(adapter)) { + adapter->coal_conf = + dma_alloc_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_CoalesceScheme) + , + &adapter->coal_conf_pa, + GFP_KERNEL); + if (!adapter->coal_conf) { + err = -ENOMEM; + goto err_coal_conf; + } + adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED; + adapter->default_coal_mode = true; } - ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS); - if (ver & 1) { - VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1); - } else { - dev_err(&pdev->dev, - "Incompatible upt version (0x%x) for adapter\n", ver); - err = -EBUSY; - goto err_ver; + if (VMXNET3_VERSION_GE_4(adapter)) { + adapter->default_rss_fields = true; + adapter->rss_fields = VMXNET3_RSS_FIELDS_DEFAULT; } SET_NETDEV_DEV(netdev, &pdev->dev); - vmxnet3_declare_features(adapter, dma64); + vmxnet3_declare_features(adapter); + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + + adapter->rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ? + VMXNET3_DEF_RXDATA_DESC_SIZE : 0; if (adapter->num_tx_queues == adapter->num_rx_queues) adapter->share_intr = VMXNET3_INTR_BUDDYSHARE; @@ -3036,12 +4236,19 @@ vmxnet3_probe_device(struct pci_dev *pdev, #endif vmxnet3_read_mac_addr(adapter, mac); - memcpy(netdev->dev_addr, mac, netdev->addr_len); + dev_addr_set(netdev, mac); netdev->netdev_ops = &vmxnet3_netdev_ops; vmxnet3_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; + /* MTU range: 60 - 9190 */ + netdev->min_mtu = VMXNET3_MIN_MTU; + if (VMXNET3_VERSION_GE_6(adapter)) + netdev->max_mtu = VMXNET3_V6_MAX_MTU; + else + netdev->max_mtu = VMXNET3_MAX_MTU; + INIT_WORK(&adapter->work, vmxnet3_reset_work); set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); @@ -3050,11 +4257,11 @@ vmxnet3_probe_device(struct pci_dev *pdev, for (i = 0; i < adapter->num_rx_queues; i++) { netif_napi_add(adapter->netdev, &adapter->rx_queue[i].napi, - vmxnet3_poll_rx_only, 64); + vmxnet3_poll_rx_only); } } else { netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi, - vmxnet3_poll, 64); + vmxnet3_poll); } netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); @@ -3072,23 +4279,33 @@ vmxnet3_probe_device(struct pci_dev *pdev, return 0; err_register: + if (VMXNET3_VERSION_GE_3(adapter)) { + dma_free_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_CoalesceScheme), + adapter->coal_conf, adapter->coal_conf_pa); + } vmxnet3_free_intr_resources(adapter); -err_ver: - vmxnet3_free_pci_resources(adapter); -err_alloc_pci: +err_coal_conf: #ifdef VMXNET3_RSS - kfree(adapter->rss_conf); + dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf), + adapter->rss_conf, adapter->rss_conf_pa); err_alloc_rss: #endif - kfree(adapter->pm_conf); + dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf), + adapter->pm_conf, adapter->pm_conf_pa); err_alloc_pm: - pci_free_consistent(adapter->pdev, size, adapter->tqd_start, - adapter->queue_desc_pa); -err_alloc_queue_desc: - pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared), - adapter->shared, adapter->shared_pa); + dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start, + adapter->queue_desc_pa); +err_ver: + vmxnet3_free_pci_resources(adapter); +err_alloc_pci: + dma_free_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_DriverShared), + adapter->shared, adapter->shared_pa); err_alloc_shared: - pci_set_drvdata(pdev, NULL); + dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa, + sizeof(struct vmxnet3_adapter), DMA_TO_DEVICE); +err_set_mask: free_netdev(netdev); return err; } @@ -3100,7 +4317,8 @@ vmxnet3_remove_device(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct vmxnet3_adapter *adapter = netdev_priv(netdev); int size = 0; - int num_rx_queues; + int num_rx_queues, rx_queues; + unsigned long flags; #ifdef VMXNET3_RSS if (enable_mq) @@ -3109,7 +4327,24 @@ vmxnet3_remove_device(struct pci_dev *pdev) else #endif num_rx_queues = 1; - num_rx_queues = rounddown_pow_of_two(num_rx_queues); + if (!VMXNET3_VERSION_GE_6(adapter)) { + num_rx_queues = rounddown_pow_of_two(num_rx_queues); + } + if (VMXNET3_VERSION_GE_6(adapter)) { + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_MAX_QUEUES_CONF); + rx_queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (rx_queues > 0) + rx_queues = (rx_queues >> 8) & 0xff; + else + rx_queues = min(num_rx_queues, VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + num_rx_queues = min(num_rx_queues, rx_queues); + } else { + num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + } cancel_work_sync(&adapter->work); @@ -3117,20 +4352,56 @@ vmxnet3_remove_device(struct pci_dev *pdev) vmxnet3_free_intr_resources(adapter); vmxnet3_free_pci_resources(adapter); + if (VMXNET3_VERSION_GE_3(adapter)) { + dma_free_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_CoalesceScheme), + adapter->coal_conf, adapter->coal_conf_pa); + } #ifdef VMXNET3_RSS - kfree(adapter->rss_conf); + dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf), + adapter->rss_conf, adapter->rss_conf_pa); #endif - kfree(adapter->pm_conf); + dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf), + adapter->pm_conf, adapter->pm_conf_pa); size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues; size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues; - pci_free_consistent(adapter->pdev, size, adapter->tqd_start, - adapter->queue_desc_pa); - pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared), - adapter->shared, adapter->shared_pa); + dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start, + adapter->queue_desc_pa); + dma_free_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_DriverShared), + adapter->shared, adapter->shared_pa); + dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa, + sizeof(struct vmxnet3_adapter), DMA_TO_DEVICE); free_netdev(netdev); } +static void vmxnet3_shutdown_device(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + unsigned long flags; + + /* Reset_work may be in the middle of resetting the device, wait for its + * completion. + */ + while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, + &adapter->state)) { + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); + return; + } + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_QUIESCE_DEV); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + vmxnet3_disable_all_intrs(adapter); + + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); +} + #ifdef CONFIG_PM @@ -3160,7 +4431,6 @@ vmxnet3_suspend(struct device *device) vmxnet3_free_intr_resources(adapter); netif_device_detach(netdev); - netif_tx_stop_all_queues(netdev); /* Create wake-up filters. */ pmConf = adapter->pm_conf; @@ -3177,13 +4447,19 @@ vmxnet3_suspend(struct device *device) } if (adapter->wol & WAKE_ARP) { - in_dev = in_dev_get(netdev); - if (!in_dev) + rcu_read_lock(); + + in_dev = __in_dev_get_rcu(netdev); + if (!in_dev) { + rcu_read_unlock(); goto skip_arp; + } - ifa = (struct in_ifaddr *)in_dev->ifa_list; - if (!ifa) + ifa = rcu_dereference(in_dev->ifa_list); + if (!ifa) { + rcu_read_unlock(); goto skip_arp; + } pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/ sizeof(struct arphdr) + /* ARP header */ @@ -3203,7 +4479,9 @@ vmxnet3_suspend(struct device *device) /* The Unicast IPv4 address in 'tip' field. */ arpreq += 2 * ETH_ALEN + sizeof(u32); - *(u32 *)arpreq = ifa->ifa_address; + *(__be32 *)arpreq = ifa->ifa_address; + + rcu_read_unlock(); /* The mask for the relevant bits. */ pmConf->filters[i].mask[0] = 0x00; @@ -3212,7 +4490,6 @@ vmxnet3_suspend(struct device *device) pmConf->filters[i].mask[3] = 0x00; pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */ pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */ - in_dev_put(in_dev); pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER; i++; @@ -3227,8 +4504,8 @@ skip_arp: adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1); adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof( *pmConf)); - adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys( - pmConf)); + adapter->shared->devRead.pmConfDesc.confPA = + cpu_to_le64(adapter->pm_conf_pa); spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, @@ -3248,27 +4525,15 @@ skip_arp: static int vmxnet3_resume(struct device *device) { - int err, i = 0; + int err; unsigned long flags; struct pci_dev *pdev = to_pci_dev(device); struct net_device *netdev = pci_get_drvdata(pdev); struct vmxnet3_adapter *adapter = netdev_priv(netdev); - struct Vmxnet3_PMConf *pmConf; if (!netif_running(netdev)) return 0; - /* Destroy wake-up filters. */ - pmConf = adapter->pm_conf; - memset(pmConf, 0, sizeof(*pmConf)); - - adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1); - adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof( - *pmConf)); - adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys( - pmConf)); - - netif_device_attach(netdev); pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); err = pci_enable_device_mem(pdev); @@ -3277,15 +4542,31 @@ vmxnet3_resume(struct device *device) pci_enable_wake(pdev, PCI_D0, 0); + vmxnet3_alloc_intr_resources(adapter); + + /* During hibernate and suspend, device has to be reinitialized as the + * device state need not be preserved. + */ + + /* Need not check adapter state as other reset tasks cannot run during + * device resume. + */ spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, - VMXNET3_CMD_UPDATE_PMCFG); + VMXNET3_CMD_QUIESCE_DEV); spin_unlock_irqrestore(&adapter->cmd_lock, flags); - vmxnet3_alloc_intr_resources(adapter); - vmxnet3_request_irqs(adapter); - for (i = 0; i < adapter->num_rx_queues; i++) - napi_enable(&adapter->rx_queue[i].napi); - vmxnet3_enable_all_intrs(adapter); + vmxnet3_tq_cleanup_all(adapter); + vmxnet3_rq_cleanup_all(adapter); + + vmxnet3_reset_dev(adapter); + err = vmxnet3_activate_dev(adapter); + if (err != 0) { + netdev_err(netdev, + "failed to re-activate on resume, error: %d", err); + vmxnet3_force_close(adapter); + return err; + } + netif_device_attach(netdev); return 0; } @@ -3293,6 +4574,8 @@ vmxnet3_resume(struct device *device) static const struct dev_pm_ops vmxnet3_pm_ops = { .suspend = vmxnet3_suspend, .resume = vmxnet3_resume, + .freeze = vmxnet3_suspend, + .restore = vmxnet3_resume, }; #endif @@ -3301,6 +4584,7 @@ static struct pci_driver vmxnet3_driver = { .id_table = vmxnet3_pciid_table, .probe = vmxnet3_probe_device, .remove = vmxnet3_remove_device, + .shutdown = vmxnet3_shutdown_device, #ifdef CONFIG_PM .driver.pm = &vmxnet3_pm_ops, #endif |
