diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_rx.c')
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 211 |
1 files changed, 119 insertions, 92 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 8f762fc170b3..13666d50b90f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -42,63 +42,49 @@ #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/irq.h> +#include <linux/skbuff_ref.h> #include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_checksum.h> #endif +#include <net/page_pool/helpers.h> #include "mlx4_en.h" -static int mlx4_alloc_page(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *frag, - gfp_t gfp) -{ - struct page *page; - dma_addr_t dma; - - page = alloc_page(gfp); - if (unlikely(!page)) - return -ENOMEM; - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); - if (unlikely(dma_mapping_error(priv->ddev, dma))) { - __free_page(page); - return -ENOMEM; - } - frag->page = page; - frag->dma = dma; - frag->page_offset = priv->rx_headroom; - return 0; -} - static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, gfp_t gfp) { + dma_addr_t dma; int i; for (i = 0; i < priv->num_frags; i++, frags++) { if (!frags->page) { - if (mlx4_alloc_page(priv, frags, gfp)) + frags->page = page_pool_alloc_pages(ring->pp, gfp); + if (!frags->page) { + ring->alloc_fail++; return -ENOMEM; + } + page_pool_fragment_page(frags->page, 1); + frags->page_offset = priv->rx_headroom; + ring->rx_alloc_pages++; } - rx_desc->data[i].addr = cpu_to_be64(frags->dma + - frags->page_offset); + dma = page_pool_get_dma_addr(frags->page); + rx_desc->data[i].addr = cpu_to_be64(dma + frags->page_offset); } return 0; } static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_alloc *frag) { - if (frag->page) { - dma_unmap_page(priv->ddev, frag->dma, - PAGE_SIZE, priv->dma_dir); - __free_page(frag->page); - } + if (frag->page) + page_pool_put_full_page(ring->pp, frag->page, false); /* We need to clear all fields, otherwise a change of priv->log_rx_info * could lead to see garbage later in frag->page. */ @@ -138,18 +124,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, (index << ring->log_stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (likely(ring->page_cache.index > 0)) { - /* XDP uses a single page per frame */ - if (!frags->page) { - ring->page_cache.index--; - frags->page = ring->page_cache.buf[ring->page_cache.index].page; - frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; - } - frags->page_offset = XDP_PACKET_HEADROOM; - rx_desc->data[0].addr = cpu_to_be64(frags->dma + - XDP_PACKET_HEADROOM); - return 0; - } return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); } @@ -175,7 +149,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, frags = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - mlx4_en_free_frag(priv, frags + nr); + mlx4_en_free_frag(priv, ring, frags + nr); } } @@ -265,6 +239,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, u32 size, u16 stride, int node, int queue_index) { struct mlx4_en_dev *mdev = priv->mdev; + struct page_pool_params pp = {}; struct mlx4_en_rx_ring *ring; int err = -ENOMEM; int tmp; @@ -283,8 +258,27 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; - if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) + pp.flags = PP_FLAG_DMA_MAP; + pp.pool_size = size * DIV_ROUND_UP(priv->rx_skb_size, PAGE_SIZE); + pp.nid = node; + pp.napi = &priv->rx_cq[queue_index]->napi; + pp.netdev = priv->dev; + pp.dev = &mdev->dev->persist->pdev->dev; + pp.dma_dir = priv->dma_dir; + + ring->pp = page_pool_create(&pp); + if (IS_ERR(ring->pp)) { + err = PTR_ERR(ring->pp); goto err_ring; + } + + if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) + goto err_pp; + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_POOL, + ring->pp); + if (err) + goto err_xdp_info; tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); @@ -316,6 +310,8 @@ err_info: ring->rx_info = NULL; err_xdp_info: xdp_rxq_info_unreg(&ring->xdp_rxq); +err_pp: + page_pool_destroy(ring->pp); err_ring: kfree(ring); *pring = NULL; @@ -400,32 +396,12 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) for (ring = 0; ring < priv->rx_ring_num; ring++) { if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { local_bh_disable(); - napi_reschedule(&priv->rx_cq[ring]->napi); + napi_schedule(&priv->rx_cq[ring]->napi); local_bh_enable(); } } } -/* When the rx ring is running in page-per-packet mode, a released frame can go - * directly into a small cache, to avoid unmapping or touching the page - * allocator. In bpf prog performance scenarios, buffers are either forwarded - * or dropped, never converted to skbs, so every page can come directly from - * this cache when it is sized to be a multiple of the napi budget. - */ -bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, - struct mlx4_en_rx_alloc *frame) -{ - struct mlx4_en_page_cache *cache = &ring->page_cache; - - if (cache->index >= MLX4_EN_CACHE_SIZE) - return false; - - cache->buf[cache->index].page = frame->page; - cache->buf[cache->index].dma = frame->dma; - cache->index++; - return true; -} - void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) @@ -442,6 +418,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, xdp_rxq_info_unreg(&ring->xdp_rxq); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); kvfree(ring->rx_info); + page_pool_destroy(ring->pp); ring->rx_info = NULL; kfree(ring); *pring = NULL; @@ -450,14 +427,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { - int i; - - for (i = 0; i < ring->page_cache.index; i++) { - dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, - PAGE_SIZE, priv->dma_dir); - put_page(ring->page_cache.buf[i].page); - } - ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; @@ -484,7 +453,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, if (unlikely(!page)) goto fail; - dma = frags->dma; + dma = page_pool_get_dma_addr(page); dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset, frag_size, priv->dma_dir); @@ -493,8 +462,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, truesize += frag_info->frag_stride; if (frag_info->frag_stride == PAGE_SIZE / 2) { + struct netmem_desc *desc = pp_page_to_nmdesc(page); + frags->page_offset ^= PAGE_SIZE / 2; release = page_count(page) != 1 || + atomic_long_read(&desc->pp_ref_count) != 1 || page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } else if (!priv->rx_headroom) { @@ -508,10 +480,9 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; } if (release) { - dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); frags->page = NULL; } else { - page_ref_inc(page); + page_pool_ref_page(page); } nr++; @@ -661,9 +632,59 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, #define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4) #endif +struct mlx4_en_xdp_buff { + struct xdp_buff xdp; + struct mlx4_cqe *cqe; + struct mlx4_en_dev *mdev; + struct mlx4_en_rx_ring *ring; + struct net_device *dev; +}; + +int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) +{ + struct mlx4_en_xdp_buff *_ctx = (void *)ctx; + + if (unlikely(_ctx->ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL)) + return -ENODATA; + + *timestamp = mlx4_en_get_hwtstamp(_ctx->mdev, + mlx4_en_get_cqe_ts(_ctx->cqe)); + return 0; +} + +int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + struct mlx4_en_xdp_buff *_ctx = (void *)ctx; + struct mlx4_cqe *cqe = _ctx->cqe; + enum xdp_rss_hash_type xht = 0; + __be16 status; + + if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH))) + return -ENODATA; + + *hash = be32_to_cpu(cqe->immed_rss_invalid); + status = cqe->status; + if (status & cpu_to_be16(MLX4_CQE_STATUS_TCP)) + xht = XDP_RSS_L4_TCP; + if (status & cpu_to_be16(MLX4_CQE_STATUS_UDP)) + xht = XDP_RSS_L4_UDP; + if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F)) + xht |= XDP_RSS_L3_IPV4; + if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) { + xht |= XDP_RSS_L3_IPV6; + if (cqe->ipv6_ext_mask) + xht |= XDP_RSS_L3_DYNHDR; + } + *rss_type = xht; + + return 0; +} + int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_xdp_buff mxbuf = {}; int factor = priv->cqe_factor; struct mlx4_en_rx_ring *ring; struct bpf_prog *xdp_prog; @@ -671,7 +692,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud bool doorbell_pending; bool xdp_redir_flush; struct mlx4_cqe *cqe; - struct xdp_buff xdp; int polled = 0; int index; @@ -681,7 +701,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ring = priv->rx_ring[cq_ring]; xdp_prog = rcu_dereference_bh(ring->xdp_prog); - xdp_init_buff(&xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq); + xdp_init_buff(&mxbuf.xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq); doorbell_pending = false; xdp_redir_flush = false; @@ -713,7 +733,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { - en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n", + en_err(priv, "CQE completed in error - vendor syndrome:%d syndrome:%d\n", ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); goto next; @@ -732,7 +752,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Get pointer to first fragment since we haven't * skb yet and cast it to ethhdr struct */ - dma = frags[0].dma + frags[0].page_offset; + dma = page_pool_get_dma_addr(frags[0].page); + dma += frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); @@ -771,29 +792,34 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud void *orig_data; u32 act; - dma = frags[0].dma + frags[0].page_offset; + dma = page_pool_get_dma_addr(frags[0].page); + dma += frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, priv->frag_info[0].frag_size, DMA_FROM_DEVICE); - xdp_prepare_buff(&xdp, va - frags[0].page_offset, - frags[0].page_offset, length, false); - orig_data = xdp.data; - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - - length = xdp.data_end - xdp.data; - if (xdp.data != orig_data) { - frags[0].page_offset = xdp.data - - xdp.data_hard_start; - va = xdp.data; + xdp_prepare_buff(&mxbuf.xdp, va - frags[0].page_offset, + frags[0].page_offset, length, true); + orig_data = mxbuf.xdp.data; + mxbuf.cqe = cqe; + mxbuf.mdev = priv->mdev; + mxbuf.ring = ring; + mxbuf.dev = dev; + + act = bpf_prog_run_xdp(xdp_prog, &mxbuf.xdp); + + length = mxbuf.xdp.data_end - mxbuf.xdp.data; + if (mxbuf.xdp.data != orig_data) { + frags[0].page_offset = mxbuf.xdp.data - + mxbuf.xdp.data_hard_start; + va = mxbuf.xdp.data; } switch (act) { case XDP_PASS: break; case XDP_REDIRECT: - if (likely(!xdp_do_redirect(dev, &xdp, xdp_prog))) { + if (likely(!xdp_do_redirect(dev, &mxbuf.xdp, xdp_prog))) { ring->xdp_redirect++; xdp_redir_flush = true; frags[0].page = NULL; @@ -830,6 +856,7 @@ xdp_drop_no_cnt: skb = napi_get_frags(&cq->napi); if (unlikely(!skb)) goto next; + skb_mark_for_recycle(skb); if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { u64 timestamp = mlx4_en_get_cqe_ts(cqe); |
