summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx4/en_rx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_rx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c211
1 files changed, 119 insertions, 92 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 8f762fc170b3..13666d50b90f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -42,63 +42,49 @@
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
#include <linux/irq.h>
+#include <linux/skbuff_ref.h>
#include <net/ip.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_checksum.h>
#endif
+#include <net/page_pool/helpers.h>
#include "mlx4_en.h"
-static int mlx4_alloc_page(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_alloc *frag,
- gfp_t gfp)
-{
- struct page *page;
- dma_addr_t dma;
-
- page = alloc_page(gfp);
- if (unlikely(!page))
- return -ENOMEM;
- dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
- if (unlikely(dma_mapping_error(priv->ddev, dma))) {
- __free_page(page);
- return -ENOMEM;
- }
- frag->page = page;
- frag->dma = dma;
- frag->page_offset = priv->rx_headroom;
- return 0;
-}
-
static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring,
struct mlx4_en_rx_desc *rx_desc,
struct mlx4_en_rx_alloc *frags,
gfp_t gfp)
{
+ dma_addr_t dma;
int i;
for (i = 0; i < priv->num_frags; i++, frags++) {
if (!frags->page) {
- if (mlx4_alloc_page(priv, frags, gfp))
+ frags->page = page_pool_alloc_pages(ring->pp, gfp);
+ if (!frags->page) {
+ ring->alloc_fail++;
return -ENOMEM;
+ }
+ page_pool_fragment_page(frags->page, 1);
+ frags->page_offset = priv->rx_headroom;
+
ring->rx_alloc_pages++;
}
- rx_desc->data[i].addr = cpu_to_be64(frags->dma +
- frags->page_offset);
+ dma = page_pool_get_dma_addr(frags->page);
+ rx_desc->data[i].addr = cpu_to_be64(dma + frags->page_offset);
}
return 0;
}
static void mlx4_en_free_frag(const struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring,
struct mlx4_en_rx_alloc *frag)
{
- if (frag->page) {
- dma_unmap_page(priv->ddev, frag->dma,
- PAGE_SIZE, priv->dma_dir);
- __free_page(frag->page);
- }
+ if (frag->page)
+ page_pool_put_full_page(ring->pp, frag->page, false);
/* We need to clear all fields, otherwise a change of priv->log_rx_info
* could lead to see garbage later in frag->page.
*/
@@ -138,18 +124,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
(index << ring->log_stride);
struct mlx4_en_rx_alloc *frags = ring->rx_info +
(index << priv->log_rx_info);
- if (likely(ring->page_cache.index > 0)) {
- /* XDP uses a single page per frame */
- if (!frags->page) {
- ring->page_cache.index--;
- frags->page = ring->page_cache.buf[ring->page_cache.index].page;
- frags->dma = ring->page_cache.buf[ring->page_cache.index].dma;
- }
- frags->page_offset = XDP_PACKET_HEADROOM;
- rx_desc->data[0].addr = cpu_to_be64(frags->dma +
- XDP_PACKET_HEADROOM);
- return 0;
- }
return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp);
}
@@ -175,7 +149,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
frags = ring->rx_info + (index << priv->log_rx_info);
for (nr = 0; nr < priv->num_frags; nr++) {
en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
- mlx4_en_free_frag(priv, frags + nr);
+ mlx4_en_free_frag(priv, ring, frags + nr);
}
}
@@ -265,6 +239,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
u32 size, u16 stride, int node, int queue_index)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct page_pool_params pp = {};
struct mlx4_en_rx_ring *ring;
int err = -ENOMEM;
int tmp;
@@ -283,8 +258,27 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
- if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0)
+ pp.flags = PP_FLAG_DMA_MAP;
+ pp.pool_size = size * DIV_ROUND_UP(priv->rx_skb_size, PAGE_SIZE);
+ pp.nid = node;
+ pp.napi = &priv->rx_cq[queue_index]->napi;
+ pp.netdev = priv->dev;
+ pp.dev = &mdev->dev->persist->pdev->dev;
+ pp.dma_dir = priv->dma_dir;
+
+ ring->pp = page_pool_create(&pp);
+ if (IS_ERR(ring->pp)) {
+ err = PTR_ERR(ring->pp);
goto err_ring;
+ }
+
+ if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0)
+ goto err_pp;
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_POOL,
+ ring->pp);
+ if (err)
+ goto err_xdp_info;
tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
sizeof(struct mlx4_en_rx_alloc));
@@ -316,6 +310,8 @@ err_info:
ring->rx_info = NULL;
err_xdp_info:
xdp_rxq_info_unreg(&ring->xdp_rxq);
+err_pp:
+ page_pool_destroy(ring->pp);
err_ring:
kfree(ring);
*pring = NULL;
@@ -400,32 +396,12 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
for (ring = 0; ring < priv->rx_ring_num; ring++) {
if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) {
local_bh_disable();
- napi_reschedule(&priv->rx_cq[ring]->napi);
+ napi_schedule(&priv->rx_cq[ring]->napi);
local_bh_enable();
}
}
}
-/* When the rx ring is running in page-per-packet mode, a released frame can go
- * directly into a small cache, to avoid unmapping or touching the page
- * allocator. In bpf prog performance scenarios, buffers are either forwarded
- * or dropped, never converted to skbs, so every page can come directly from
- * this cache when it is sized to be a multiple of the napi budget.
- */
-bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
- struct mlx4_en_rx_alloc *frame)
-{
- struct mlx4_en_page_cache *cache = &ring->page_cache;
-
- if (cache->index >= MLX4_EN_CACHE_SIZE)
- return false;
-
- cache->buf[cache->index].page = frame->page;
- cache->buf[cache->index].dma = frame->dma;
- cache->index++;
- return true;
-}
-
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
u32 size, u16 stride)
@@ -442,6 +418,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
xdp_rxq_info_unreg(&ring->xdp_rxq);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
kvfree(ring->rx_info);
+ page_pool_destroy(ring->pp);
ring->rx_info = NULL;
kfree(ring);
*pring = NULL;
@@ -450,14 +427,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring)
{
- int i;
-
- for (i = 0; i < ring->page_cache.index; i++) {
- dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma,
- PAGE_SIZE, priv->dma_dir);
- put_page(ring->page_cache.buf[i].page);
- }
- ring->page_cache.index = 0;
mlx4_en_free_rx_buf(priv, ring);
if (ring->stride <= TXBB_SIZE)
ring->buf -= TXBB_SIZE;
@@ -484,7 +453,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
if (unlikely(!page))
goto fail;
- dma = frags->dma;
+ dma = page_pool_get_dma_addr(page);
dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset,
frag_size, priv->dma_dir);
@@ -493,8 +462,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
truesize += frag_info->frag_stride;
if (frag_info->frag_stride == PAGE_SIZE / 2) {
+ struct netmem_desc *desc = pp_page_to_nmdesc(page);
+
frags->page_offset ^= PAGE_SIZE / 2;
release = page_count(page) != 1 ||
+ atomic_long_read(&desc->pp_ref_count) != 1 ||
page_is_pfmemalloc(page) ||
page_to_nid(page) != numa_mem_id();
} else if (!priv->rx_headroom) {
@@ -508,10 +480,9 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
release = frags->page_offset + frag_info->frag_size > PAGE_SIZE;
}
if (release) {
- dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir);
frags->page = NULL;
} else {
- page_ref_inc(page);
+ page_pool_ref_page(page);
}
nr++;
@@ -661,9 +632,59 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
#endif
+struct mlx4_en_xdp_buff {
+ struct xdp_buff xdp;
+ struct mlx4_cqe *cqe;
+ struct mlx4_en_dev *mdev;
+ struct mlx4_en_rx_ring *ring;
+ struct net_device *dev;
+};
+
+int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+{
+ struct mlx4_en_xdp_buff *_ctx = (void *)ctx;
+
+ if (unlikely(_ctx->ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL))
+ return -ENODATA;
+
+ *timestamp = mlx4_en_get_hwtstamp(_ctx->mdev,
+ mlx4_en_get_cqe_ts(_ctx->cqe));
+ return 0;
+}
+
+int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
+{
+ struct mlx4_en_xdp_buff *_ctx = (void *)ctx;
+ struct mlx4_cqe *cqe = _ctx->cqe;
+ enum xdp_rss_hash_type xht = 0;
+ __be16 status;
+
+ if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH)))
+ return -ENODATA;
+
+ *hash = be32_to_cpu(cqe->immed_rss_invalid);
+ status = cqe->status;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_TCP))
+ xht = XDP_RSS_L4_TCP;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_UDP))
+ xht = XDP_RSS_L4_UDP;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F))
+ xht |= XDP_RSS_L3_IPV4;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) {
+ xht |= XDP_RSS_L3_IPV6;
+ if (cqe->ipv6_ext_mask)
+ xht |= XDP_RSS_L3_DYNHDR;
+ }
+ *rss_type = xht;
+
+ return 0;
+}
+
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_xdp_buff mxbuf = {};
int factor = priv->cqe_factor;
struct mlx4_en_rx_ring *ring;
struct bpf_prog *xdp_prog;
@@ -671,7 +692,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
bool doorbell_pending;
bool xdp_redir_flush;
struct mlx4_cqe *cqe;
- struct xdp_buff xdp;
int polled = 0;
int index;
@@ -681,7 +701,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
ring = priv->rx_ring[cq_ring];
xdp_prog = rcu_dereference_bh(ring->xdp_prog);
- xdp_init_buff(&xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq);
+ xdp_init_buff(&mxbuf.xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq);
doorbell_pending = false;
xdp_redir_flush = false;
@@ -713,7 +733,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Drop packet on bad receive or bad checksum */
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR)) {
- en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
+ en_err(priv, "CQE completed in error - vendor syndrome:%d syndrome:%d\n",
((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
((struct mlx4_err_cqe *)cqe)->syndrome);
goto next;
@@ -732,7 +752,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Get pointer to first fragment since we haven't
* skb yet and cast it to ethhdr struct
*/
- dma = frags[0].dma + frags[0].page_offset;
+ dma = page_pool_get_dma_addr(frags[0].page);
+ dma += frags[0].page_offset;
dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
DMA_FROM_DEVICE);
@@ -771,29 +792,34 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
void *orig_data;
u32 act;
- dma = frags[0].dma + frags[0].page_offset;
+ dma = page_pool_get_dma_addr(frags[0].page);
+ dma += frags[0].page_offset;
dma_sync_single_for_cpu(priv->ddev, dma,
priv->frag_info[0].frag_size,
DMA_FROM_DEVICE);
- xdp_prepare_buff(&xdp, va - frags[0].page_offset,
- frags[0].page_offset, length, false);
- orig_data = xdp.data;
-
- act = bpf_prog_run_xdp(xdp_prog, &xdp);
-
- length = xdp.data_end - xdp.data;
- if (xdp.data != orig_data) {
- frags[0].page_offset = xdp.data -
- xdp.data_hard_start;
- va = xdp.data;
+ xdp_prepare_buff(&mxbuf.xdp, va - frags[0].page_offset,
+ frags[0].page_offset, length, true);
+ orig_data = mxbuf.xdp.data;
+ mxbuf.cqe = cqe;
+ mxbuf.mdev = priv->mdev;
+ mxbuf.ring = ring;
+ mxbuf.dev = dev;
+
+ act = bpf_prog_run_xdp(xdp_prog, &mxbuf.xdp);
+
+ length = mxbuf.xdp.data_end - mxbuf.xdp.data;
+ if (mxbuf.xdp.data != orig_data) {
+ frags[0].page_offset = mxbuf.xdp.data -
+ mxbuf.xdp.data_hard_start;
+ va = mxbuf.xdp.data;
}
switch (act) {
case XDP_PASS:
break;
case XDP_REDIRECT:
- if (likely(!xdp_do_redirect(dev, &xdp, xdp_prog))) {
+ if (likely(!xdp_do_redirect(dev, &mxbuf.xdp, xdp_prog))) {
ring->xdp_redirect++;
xdp_redir_flush = true;
frags[0].page = NULL;
@@ -830,6 +856,7 @@ xdp_drop_no_cnt:
skb = napi_get_frags(&cq->napi);
if (unlikely(!skb))
goto next;
+ skb_mark_for_recycle(skb);
if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
u64 timestamp = mlx4_en_get_cqe_ts(cqe);