summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-05-22 18:30:34 -0700
committerDavid S. Miller <davem@davemloft.net>2020-05-22 18:30:34 -0700
commita152b85984a03e7f83b9d8bcf908c29597d898fc (patch)
tree7269af03a60dc0df32f75998e8badc317c00c182 /drivers
parent1e6a705266409e0dbe020ac10107d265b73e5ca9 (diff)
parenta5dfaa2ab94057dd75c7911143482a0a85593c14 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-05-23 The following pull-request contains BPF updates for your *net-next* tree. We've added 50 non-merge commits during the last 8 day(s) which contain a total of 109 files changed, 2776 insertions(+), 2887 deletions(-). The main changes are: 1) Add a new AF_XDP buffer allocation API to the core in order to help lowering the bar for drivers adopting AF_XDP support. i40e, ice, ixgbe as well as mlx5 have been moved over to the new API and also gained a small improvement in performance, from Björn Töpel and Magnus Karlsson. 2) Add getpeername()/getsockname() attach types for BPF sock_addr programs in order to allow for e.g. reverse translation of load-balancer backend to service address/port tuple from a connected peer, from Daniel Borkmann. 3) Improve the BPF verifier is_branch_taken() logic to evaluate pointers being non-NULL, e.g. if after an initial test another non-NULL test on that pointer follows in a given path, then it can be pruned right away, from John Fastabend. 4) Larger rework of BPF sockmap selftests to make output easier to understand and to reduce overall runtime as well as adding new BPF kTLS selftests that run in combination with sockmap, also from John Fastabend. 5) Batch of misc updates to BPF selftests including fixing up test_align to match verifier output again and moving it under test_progs, allowing bpf_iter selftest to compile on machines with older vmlinux.h, and updating config options for lirc and v6 segment routing helpers, from Stanislav Fomichev, Andrii Nakryiko and Alan Maguire. 6) Conversion of BPF tracing samples outdated internal BPF loader to use libbpf API instead, from Daniel T. Lee. 7) Follow-up to BPF kernel test infrastructure in order to fix a flake in the XDP selftests, from Jesper Dangaard Brouer. 8) Minor improvements to libbpf's internal hashmap implementation, from Ian Rogers. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c28
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c134
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h17
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx_common.h40
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c379
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c378
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.h13
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h9
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c15
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c309
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c113
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c34
-rw-r--r--drivers/net/hyperv/netvsc_bpf.c1
27 files changed, 390 insertions, 1281 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2a037ec244b9..ea7395b391e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11,7 +11,7 @@
#include "i40e_diag.h"
#include "i40e_xsk.h"
#include <net/udp_tunnel.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
/* All i40e tracepoints are defined by the include below, which
* must be included exactly once across the whole kernel with
* CREATE_TRACE_POINTS defined
@@ -3260,26 +3260,31 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
if (ring->vsi->type == I40E_VSI_MAIN)
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+ kfree(ring->rx_bi);
ring->xsk_umem = i40e_xsk_umem(ring);
if (ring->xsk_umem) {
- ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
- XDP_PACKET_HEADROOM;
+ ret = i40e_alloc_rx_bi_zc(ring);
+ if (ret)
+ return ret;
+ ring->rx_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
*/
chain_len = 1;
- ring->zca.free = i40e_zca_free;
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
- MEM_TYPE_ZERO_COPY,
- &ring->zca);
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL);
if (ret)
return ret;
dev_info(&vsi->back->pdev->dev,
- "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+ "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->queue_index);
} else {
+ ret = i40e_alloc_rx_bi(ring);
+ if (ret)
+ return ret;
ring->rx_buf_len = vsi->rx_buf_len;
if (ring->vsi->type == I40E_VSI_MAIN) {
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
@@ -3344,9 +3349,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail);
- ok = ring->xsk_umem ?
- i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
- !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+ if (ring->xsk_umem) {
+ xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+ ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
+ } else {
+ ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+ }
if (!ok) {
/* Log this in case the user has forgotten to give the kernel
* any buffers, even later in the application.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index a3772beffe02..f613782f2f56 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -521,28 +521,29 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
/**
* i40e_fd_handle_status - check the Programming Status for FD
* @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
+ * @qword0_raw: qword0
+ * @qword1: qword1 after le_to_cpu
* @prog_id: the id originally used for programming
*
* This is used to verify if the FD programming or invalidation
* requested by SW to the HW is successful or not and take actions accordingly.
**/
-void i40e_fd_handle_status(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc, u8 prog_id)
+static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+ u64 qword1, u8 prog_id)
{
struct i40e_pf *pf = rx_ring->vsi->back;
struct pci_dev *pdev = pf->pdev;
+ struct i40e_32b_rx_wb_qw0 *qw0;
u32 fcnt_prog, fcnt_avail;
u32 error;
- u64 qw;
- qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
+ qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw;
+ error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
- pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
- if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
+ pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id);
+ if (qw0->hi_dword.fd_id != 0 ||
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
pf->fd_inv);
@@ -560,7 +561,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
/* store the current atr filter count */
pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
- if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
+ if (qw0->hi_dword.fd_id == 0 &&
test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
/* These set_bit() calls aren't atomic with the
* test_bit() here, but worse case we potentially
@@ -589,7 +590,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
- rx_desc->wb.qword0.hi_dword.fd_id);
+ qw0->hi_dword.fd_id);
}
}
@@ -1195,6 +1196,11 @@ clear_counts:
rc->total_packets = 0;
}
+static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+ return &rx_ring->rx_bi[idx];
+}
+
/**
* i40e_reuse_rx_page - page flip buffer and store it back on the ring
* @rx_ring: rx descriptor ring to store buffers on
@@ -1208,7 +1214,7 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *new_buff;
u16 nta = rx_ring->next_to_alloc;
- new_buff = &rx_ring->rx_bi[nta];
+ new_buff = i40e_rx_bi(rx_ring, nta);
/* update, and store next to alloc */
nta++;
@@ -1227,29 +1233,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
}
/**
- * i40e_rx_is_programming_status - check for programming status descriptor
- * @qw: qword representing status_error_len in CPU ordering
- *
- * The value of in the descriptor length field indicate if this
- * is a programming status descriptor for flow director or FCoE
- * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
- * it is a packet descriptor.
- **/
-static inline bool i40e_rx_is_programming_status(u64 qw)
-{
- /* The Rx filter programming status and SPH bit occupy the same
- * spot in the descriptor. Since we don't support packet split we
- * can just reuse the bit as an indication that this is a
- * programming status descriptor.
- */
- return qw & I40E_RXD_QW1_LENGTH_SPH_MASK;
-}
-
-/**
- * i40e_clean_programming_status - try clean the programming status descriptor
+ * i40e_clean_programming_status - clean the programming status descriptor
* @rx_ring: the rx ring that has this descriptor
- * @rx_desc: the rx descriptor written back by HW
- * @qw: qword representing status_error_len in CPU ordering
+ * @qword0_raw: qword0
+ * @qword1: qword1 representing status_error_len in CPU ordering
*
* Flow director should handle FD_FILTER_STATUS to check its filter programming
* status being successful or not and take actions accordingly. FCoE should
@@ -1257,34 +1244,16 @@ static inline bool i40e_rx_is_programming_status(u64 qw)
*
* Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
**/
-struct i40e_rx_buffer *i40e_clean_programming_status(
- struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc,
- u64 qw)
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+ u64 qword1)
{
- struct i40e_rx_buffer *rx_buffer;
- u32 ntc;
u8 id;
- if (!i40e_rx_is_programming_status(qw))
- return NULL;
-
- ntc = rx_ring->next_to_clean;
-
- /* fetch, update, and store next to clean */
- rx_buffer = &rx_ring->rx_bi[ntc++];
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
-
- prefetch(I40E_RX_DESC(rx_ring, ntc));
-
- id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
+ id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
- i40e_fd_handle_status(rx_ring, rx_desc, id);
-
- return rx_buffer;
+ i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
}
/**
@@ -1336,13 +1305,25 @@ err:
return -ENOMEM;
}
+int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
+{
+ unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
+
+ rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
+ return rx_ring->rx_bi ? 0 : -ENOMEM;
+}
+
+static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
+{
+ memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
+}
+
/**
* i40e_clean_rx_ring - Free Rx buffers
* @rx_ring: ring to be cleaned
**/
void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
{
- unsigned long bi_size;
u16 i;
/* ring already cleared, nothing to do */
@@ -1361,7 +1342,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
- struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+ struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
if (!rx_bi->page)
continue;
@@ -1388,8 +1369,10 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
}
skip_free:
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
- memset(rx_ring->rx_bi, 0, bi_size);
+ if (rx_ring->xsk_umem)
+ i40e_clear_rx_bi_zc(rx_ring);
+ else
+ i40e_clear_rx_bi(rx_ring);
/* Zero out the descriptor ring */
memset(rx_ring->desc, 0, rx_ring->size);
@@ -1430,15 +1413,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
- int err = -ENOMEM;
- int bi_size;
-
- /* warn if we are about to overwrite the pointer */
- WARN_ON(rx_ring->rx_bi);
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
- rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
- if (!rx_ring->rx_bi)
- goto err;
+ int err;
u64_stats_init(&rx_ring->syncp);
@@ -1451,7 +1426,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
if (!rx_ring->desc) {
dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
rx_ring->size);
- goto err;
+ return -ENOMEM;
}
rx_ring->next_to_alloc = 0;
@@ -1463,16 +1438,12 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->queue_index);
if (err < 0)
- goto err;
+ return err;
}
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
return 0;
-err:
- kfree(rx_ring->rx_bi);
- rx_ring->rx_bi = NULL;
- return err;
}
/**
@@ -1592,7 +1563,7 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
return false;
rx_desc = I40E_RX_DESC(rx_ring, ntu);
- bi = &rx_ring->rx_bi[ntu];
+ bi = i40e_rx_bi(rx_ring, ntu);
do {
if (!i40e_alloc_mapped_page(rx_ring, bi))
@@ -1614,7 +1585,7 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
ntu++;
if (unlikely(ntu == rx_ring->count)) {
rx_desc = I40E_RX_DESC(rx_ring, 0);
- bi = rx_ring->rx_bi;
+ bi = i40e_rx_bi(rx_ring, 0);
ntu = 0;
}
@@ -1981,7 +1952,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
{
struct i40e_rx_buffer *rx_buffer;
- rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
prefetchw(rx_buffer->page);
/* we are reusing so sync this buffer for CPU use */
@@ -2382,9 +2353,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
*/
dma_rmb();
- rx_buffer = i40e_clean_programming_status(rx_ring, rx_desc,
- qword);
- if (unlikely(rx_buffer)) {
+ if (i40e_rx_is_programming_status(qword)) {
+ i40e_clean_programming_status(rx_ring,
+ rx_desc->raw.qword[0],
+ qword);
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+ i40e_inc_ntc(rx_ring);
i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 36d37f31a287..5c255977fd58 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -296,17 +296,9 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer {
dma_addr_t dma;
- union {
- struct {
- struct page *page;
- __u32 page_offset;
- __u16 pagecnt_bias;
- };
- struct {
- void *addr;
- u64 handle;
- };
- };
+ struct page *page;
+ __u32 page_offset;
+ __u16 pagecnt_bias;
};
struct i40e_queue_stats {
@@ -358,6 +350,7 @@ struct i40e_ring {
union {
struct i40e_tx_buffer *tx_bi;
struct i40e_rx_buffer *rx_bi;
+ struct xdp_buff **rx_bi_zc;
};
DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
u16 queue_index; /* Queue number of ring */
@@ -419,7 +412,6 @@ struct i40e_ring {
struct i40e_channel *ch;
struct xdp_rxq_info xdp_rxq;
struct xdp_umem *xsk_umem;
- struct zero_copy_allocator zca; /* ZC allocator anchor */
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
@@ -495,6 +487,7 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
+int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
/**
* i40e_get_head - Retrieve head from head writeback
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index 8af0e99c6c0d..667c4dc4b39f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -4,13 +4,9 @@
#ifndef I40E_TXRX_COMMON_
#define I40E_TXRX_COMMON_
-void i40e_fd_handle_status(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc, u8 prog_id);
int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
-struct i40e_rx_buffer *i40e_clean_programming_status(
- struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc,
- u64 qw);
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+ u64 qword1);
void i40e_process_skb_fields(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc, struct sk_buff *skb);
void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring);
@@ -84,6 +80,38 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
}
}
+/**
+ * i40e_rx_is_programming_status - check for programming status descriptor
+ * @qword1: qword1 representing status_error_len in CPU ordering
+ *
+ * The value of in the descriptor length field indicate if this
+ * is a programming status descriptor for flow director or FCoE
+ * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
+ * it is a packet descriptor.
+ **/
+static inline bool i40e_rx_is_programming_status(u64 qword1)
+{
+ /* The Rx filter programming status and SPH bit occupy the same
+ * spot in the descriptor. Since we don't support packet split we
+ * can just reuse the bit as an indication that this is a
+ * programming status descriptor.
+ */
+ return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK;
+}
+
+/**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static inline void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+ prefetch(I40E_RX_DESC(rx_ring, ntc));
+}
+
void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 6ea2867ff60f..63e098f7cb63 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -689,7 +689,7 @@ union i40e_32byte_rx_desc {
__le64 rsvd2;
} read;
struct {
- struct {
+ struct i40e_32b_rx_wb_qw0 {
struct {
union {
__le16 mirroring_status;
@@ -727,6 +727,9 @@ union i40e_32byte_rx_desc {
} hi_dword;
} qword3;
} wb; /* writeback */
+ struct {
+ u64 qword[4];
+ } raw;
};
enum i40e_rx_desc_status_bits {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 2b9184aead5f..f3953744c505 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -2,68 +2,30 @@
/* Copyright(c) 2018 Intel Corporation. */
#include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "i40e.h"
#include "i40e_txrx_common.h"
#include "i40e_xsk.h"
-/**
- * i40e_xsk_umem_dma_map - DMA maps all UMEM memory for the netdev
- * @vsi: Current VSI
- * @umem: UMEM to DMA map
- *
- * Returns 0 on success, <0 on failure
- **/
-static int i40e_xsk_umem_dma_map(struct i40e_vsi *vsi, struct xdp_umem *umem)
+int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
{
- struct i40e_pf *pf = vsi->back;
- struct device *dev;
- unsigned int i, j;
- dma_addr_t dma;
-
- dev = &pf->pdev->dev;
- for (i = 0; i < umem->npgs; i++) {
- dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
- if (dma_mapping_error(dev, dma))
- goto out_unmap;
+ unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
- umem->pages[i].dma = dma;
- }
-
- return 0;
-
-out_unmap:
- for (j = 0; j < i; j++) {
- dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
- umem->pages[i].dma = 0;
- }
-
- return -1;
+ rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
+ return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
}
-/**
- * i40e_xsk_umem_dma_unmap - DMA unmaps all UMEM memory for the netdev
- * @vsi: Current VSI
- * @umem: UMEM to DMA map
- **/
-static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)
+void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
{
- struct i40e_pf *pf = vsi->back;
- struct device *dev;
- unsigned int i;
-
- dev = &pf->pdev->dev;
-
- for (i = 0; i < umem->npgs; i++) {
- dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
+ memset(rx_ring->rx_bi_zc, 0,
+ sizeof(*rx_ring->rx_bi_zc) * rx_ring->count);
+}
- umem->pages[i].dma = 0;
- }
+static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+ return &rx_ring->rx_bi_zc[idx];
}
/**
@@ -78,7 +40,6 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid)
{
struct net_device *netdev = vsi->netdev;
- struct xdp_umem_fq_reuse *reuseq;
bool if_running;
int err;
@@ -92,13 +53,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
qid >= netdev->real_num_tx_queues)
return -EINVAL;
- reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
- if (!reuseq)
- return -ENOMEM;
-
- xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
- err = i40e_xsk_umem_dma_map(vsi, umem);
+ err = xsk_buff_dma_map(umem, &vsi->back->pdev->dev, I40E_RX_DMA_ATTR);
if (err)
return err;
@@ -151,7 +106,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
}
clear_bit(qid, vsi->af_xdp_zc_qps);
- i40e_xsk_umem_dma_unmap(vsi, umem);
+ xsk_buff_dma_unmap(umem, I40E_RX_DMA_ATTR);
if (if_running) {
err = i40e_queue_pair_enable(vsi, qid);
@@ -190,11 +145,9 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
**/
static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
{
- struct xdp_umem *umem = rx_ring->xsk_umem;
int err, result = I40E_XDP_PASS;
struct i40e_ring *xdp_ring;
struct bpf_prog *xdp_prog;
- u64 offset;
u32 act;
rcu_read_lock();
@@ -203,9 +156,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
*/
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- offset = xdp->data - xdp->data_hard_start;
-
- xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
switch (act) {
case XDP_PASS:
@@ -232,107 +182,26 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
return result;
}
-/**
- * i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer
- * @rx_ring: Rx ring
- * @bi: Rx buffer to populate
- *
- * This function allocates an Rx buffer. The buffer can come from fill
- * queue, or via the recycle queue (next_to_alloc).
- *
- * Returns true for a successful allocation, false otherwise
- **/
-static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *bi)
-{
- struct xdp_umem *umem = rx_ring->xsk_umem;
- void *addr = bi->addr;
- u64 handle, hr;
-
- if (addr) {
- rx_ring->rx_stats.page_reuse_count++;
- return true;
- }
-
- if (!xsk_umem_peek_addr(umem, &handle)) {
- rx_ring->rx_stats.alloc_page_failed++;
- return false;
- }
-
- hr = umem->headroom + XDP_PACKET_HEADROOM;
-
- bi->dma = xdp_umem_get_dma(umem, handle);
- bi->dma += hr;
-
- bi->addr = xdp_umem_get_data(umem, handle);
- bi->addr += hr;
-
- bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
- xsk_umem_release_addr(umem);
- return true;
-}
-
-/**
- * i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer
- * @rx_ring: Rx ring
- * @bi: Rx buffer to populate
- *
- * This function allocates an Rx buffer. The buffer can come from fill
- * queue, or via the reuse queue.
- *
- * Returns true for a successful allocation, false otherwise
- **/
-static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *bi)
-{
- struct xdp_umem *umem = rx_ring->xsk_umem;
- u64 handle, hr;
-
- if (!xsk_umem_peek_addr_rq(umem, &handle)) {
- rx_ring->rx_stats.alloc_page_failed++;
- return false;
- }
-
- handle &= rx_ring->xsk_umem->chunk_mask;
-
- hr = umem->headroom + XDP_PACKET_HEADROOM;
-
- bi->dma = xdp_umem_get_dma(umem, handle);
- bi->dma += hr;
-
- bi->addr = xdp_umem_get_data(umem, handle);
- bi->addr += hr;
-
- bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
- xsk_umem_release_addr_rq(umem);
- return true;
-}
-
-static __always_inline bool
-__i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
- bool alloc(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *bi))
+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
{
u16 ntu = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
- struct i40e_rx_buffer *bi;
+ struct xdp_buff **bi, *xdp;
+ dma_addr_t dma;
bool ok = true;
rx_desc = I40E_RX_DESC(rx_ring, ntu);
- bi = &rx_ring->rx_bi[ntu];
+ bi = i40e_rx_bi(rx_ring, ntu);
do {
- if (!alloc(rx_ring, bi)) {
+ xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+ if (!xdp) {
ok = false;
goto no_buffers;
}
-
- dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 0,
- rx_ring->rx_buf_len,
- DMA_BIDIRECTIONAL);
-
- rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+ *bi = xdp;
+ dma = xsk_buff_xdp_get_dma(xdp);
+ rx_desc->read.pkt_addr = cpu_to_le64(dma);
+ rx_desc->read.hdr_addr = 0;
rx_desc++;
bi++;
@@ -340,11 +209,10 @@ __i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
if (unlikely(ntu == rx_ring->count)) {
rx_desc = I40E_RX_DESC(rx_ring, 0);
- bi = rx_ring->rx_bi;
+ bi = i40e_rx_bi(rx_ring, 0);
ntu = 0;
}
- rx_desc->wb.qword1.status_error_len = 0;
count--;
} while (count);
@@ -356,127 +224,8 @@ no_buffers:
}
/**
- * i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers
- * @rx_ring: Rx ring
- * @count: The number of buffers to allocate
- *
- * This function allocates a number of Rx buffers from the reuse queue
- * or fill ring and places them on the Rx ring.
- *
- * Returns true for a successful allocation, false otherwise
- **/
-bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
-{
- return __i40e_alloc_rx_buffers_zc(rx_ring, count,
- i40e_alloc_buffer_slow_zc);
-}
-
-/**
- * i40e_alloc_rx_buffers_fast_zc - Allocates a number of Rx buffers
- * @rx_ring: Rx ring
- * @count: The number of buffers to allocate
- *
- * This function allocates a number of Rx buffers from the fill ring
- * or the internal recycle mechanism and places them on the Rx ring.
- *
- * Returns true for a successful allocation, false otherwise
- **/
-static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count)
-{
- return __i40e_alloc_rx_buffers_zc(rx_ring, count,
- i40e_alloc_buffer_zc);
-}
-
-/**
- * i40e_get_rx_buffer_zc - Return the current Rx buffer
- * @rx_ring: Rx ring
- * @size: The size of the rx buffer (read from descriptor)
- *
- * This function returns the current, received Rx buffer, and also
- * does DMA synchronization. the Rx ring.
- *
- * Returns the received Rx buffer
- **/
-static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
- const unsigned int size)
-{
- struct i40e_rx_buffer *bi;
-
- bi = &rx_ring->rx_bi[rx_ring->next_to_clean];
-
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- bi->dma, 0,
- size,
- DMA_BIDIRECTIONAL);
-
- return bi;
-}
-
-/**
- * i40e_reuse_rx_buffer_zc - Recycle an Rx buffer
- * @rx_ring: Rx ring
- * @old_bi: The Rx buffer to recycle
- *
- * This function recycles a finished Rx buffer, and places it on the
- * recycle queue (next_to_alloc).
- **/
-static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *old_bi)
-{
- struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc];
- u16 nta = rx_ring->next_to_alloc;
-
- /* update, and store next to alloc */
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- /* transfer page from old buffer to new buffer */
- new_bi->dma = old_bi->dma;
- new_bi->addr = old_bi->addr;
- new_bi->handle = old_bi->handle;
-
- old_bi->addr = NULL;
-}
-
-/**
- * i40e_zca_free - Free callback for MEM_TYPE_ZERO_COPY allocations
- * @alloc: Zero-copy allocator
- * @handle: Buffer handle
- **/
-void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
-{
- struct i40e_rx_buffer *bi;
- struct i40e_ring *rx_ring;
- u64 hr, mask;
- u16 nta;
-
- rx_ring = container_of(alloc, struct i40e_ring, zca);
- hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
- mask = rx_ring->xsk_umem->chunk_mask;
-
- nta = rx_ring->next_to_alloc;
- bi = &rx_ring->rx_bi[nta];
-
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- handle &= mask;
-
- bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle);
- bi->dma += hr;
-
- bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
- bi->addr += hr;
-
- bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
- rx_ring->xsk_umem->headroom);
-}
-
-/**
* i40e_construct_skb_zc - Create skbufff from zero-copy Rx buffer
* @rx_ring: Rx ring
- * @bi: Rx buffer
* @xdp: xdp_buff
*
* This functions allocates a new skb from a zero-copy Rx buffer.
@@ -484,7 +233,6 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
* Returns the skb, or NULL on failure.
**/
static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *bi,
struct xdp_buff *xdp)
{
unsigned int metasize = xdp->data - xdp->data_meta;
@@ -503,24 +251,11 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
if (metasize)
skb_metadata_set(skb, metasize);
- i40e_reuse_rx_buffer_zc(rx_ring, bi);
+ xsk_buff_free(xdp);
return skb;
}
/**
- * i40e_inc_ntc: Advance the next_to_clean index
- * @rx_ring: Rx ring
- **/
-static void i40e_inc_ntc(struct i40e_ring *rx_ring)
-{
- u32 ntc = rx_ring->next_to_clean + 1;
-
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
- prefetch(I40E_RX_DESC(rx_ring, ntc));
-}
-
-/**
* i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
* @rx_ring: Rx ring
* @budget: NAPI budget
@@ -531,25 +266,20 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_res, xdp_xmit = 0;
bool failure = false;
struct sk_buff *skb;
- struct xdp_buff xdp;
-
- xdp.rxq = &rx_ring->xdp_rxq;
- xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
while (likely(total_rx_packets < (unsigned int)budget)) {
- struct i40e_rx_buffer *bi;
union i40e_rx_desc *rx_desc;
+ struct xdp_buff **bi;
unsigned int size;
u64 qword;
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
failure = failure ||
- !i40e_alloc_rx_buffers_fast_zc(rx_ring,
- cleaned_count);
+ !i40e_alloc_rx_buffers_zc(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -562,35 +292,36 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
*/
dma_rmb();
- bi = i40e_clean_programming_status(rx_ring, rx_desc,
- qword);
- if (unlikely(bi)) {
- i40e_reuse_rx_buffer_zc(rx_ring, bi);
+ if (i40e_rx_is_programming_status(qword)) {
+ i40e_clean_programming_status(rx_ring,
+ rx_desc->raw.qword[0],
+ qword);
+ bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+ xsk_buff_free(*bi);
+ *bi = NULL;
cleaned_count++;
+ i40e_inc_ntc(rx_ring);
continue;
}
+ bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
if (!size)
break;
- bi = i40e_get_rx_buffer_zc(rx_ring, size);
- xdp.data = bi->addr;
- xdp.data_meta = xdp.data;
- xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
- xdp.data_end = xdp.data + size;
- xdp.handle = bi->handle;
+ bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+ (*bi)->data_end = (*bi)->data + size;
+ xsk_buff_dma_sync_for_cpu(*bi);
- xdp_res = i40e_run_xdp_zc(rx_ring, &xdp);
+ xdp_res = i40e_run_xdp_zc(rx_ring, *bi);
if (xdp_res) {
- if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
+ if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR))
xdp_xmit |= xdp_res;
- bi->addr = NULL;
- } else {
- i40e_reuse_rx_buffer_zc(rx_ring, bi);
- }
+ else
+ xsk_buff_free(*bi);
+ *bi = NULL;
total_rx_bytes += size;
total_rx_packets++;
@@ -606,7 +337,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
* BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that
* SBP is *not* set in PRT_SBPVSI (default not set).
*/
- skb = i40e_construct_skb_zc(rx_ring, bi, &xdp);
+ skb = i40e_construct_skb_zc(rx_ring, *bi);
+ *bi = NULL;
if (!skb) {
rx_ring->rx_stats.alloc_buff_failed++;
break;
@@ -664,10 +396,9 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
break;
- dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
-
- dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
- DMA_BIDIRECTIONAL);
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+ desc.len);
tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
tx_bi->bytecount = desc.len;
@@ -826,13 +557,13 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
u16 i;
for (i = 0; i < rx_ring->count; i++) {
- struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+ struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, i);
- if (!rx_bi->addr)
+ if (!rx_bi)
continue;
- xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_bi->handle);
- rx_bi->addr = NULL;
+ xsk_buff_free(rx_bi);
+ rx_bi = NULL;
}
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 9ed59c14eb55..ea919a7d60ec 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -12,12 +12,13 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid);
-void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
struct i40e_ring *tx_ring, int napi_budget);
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
+void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
#endif /* _I40E_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 4c835c144907..f066befadd39 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019, Intel Corporation. */
+#include <net/xdp_sock_drv.h>
#include "ice_base.h"
#include "ice_dcb_lib.h"
@@ -308,24 +309,23 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
if (ring->xsk_umem) {
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
- ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
- XDP_PACKET_HEADROOM;
+ ring->rx_buf_len =
+ xsk_umem_get_rx_frame_size(ring->xsk_umem);
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
*/
chain_len = 1;
- ring->zca.free = ice_zca_free;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
- MEM_TYPE_ZERO_COPY,
- &ring->zca);
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL);
if (err)
return err;
+ xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
- dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+ dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->q_index);
} else {
- ring->zca.free = NULL;
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq,
@@ -426,7 +426,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
writel(0, ring->tail);
err = ring->xsk_umem ?
- ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+ ice_alloc_rx_bufs_zc(ring, ICE_DESC_UNUSED(ring)) :
ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
if (err)
dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 025dd642cf28..5f5a6ce2b7e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -158,17 +158,16 @@ struct ice_tx_offload_params {
};
struct ice_rx_buf {
- struct sk_buff *skb;
- dma_addr_t dma;
union {
struct {
+ struct sk_buff *skb;
+ dma_addr_t dma;
struct page *page;
unsigned int page_offset;
u16 pagecnt_bias;
};
struct {
- void *addr;
- u64 handle;
+ struct xdp_buff *xdp;
};
};
};
@@ -292,7 +291,6 @@ struct ice_ring {
struct rcu_head rcu; /* to avoid race on free */
struct bpf_prog *xdp_prog;
struct xdp_umem *xsk_umem;
- struct zero_copy_allocator zca;
/* CL3 - 3rd cacheline starts here */
struct xdp_rxq_info xdp_rxq;
/* CLX - the below items are only accessed infrequently and should be
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 20ac54e3156d..b6f928c9e9c9 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019, Intel Corporation. */
#include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "ice.h"
#include "ice_base.h"
@@ -280,28 +280,6 @@ static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
}
/**
- * ice_xsk_add_umem - add a UMEM region for XDP sockets
- * @vsi: VSI to which the UMEM will be added
- * @umem: pointer to a requested UMEM region
- * @qid: queue ID
- *
- * Returns 0 on success, negative on error
- */
-static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
-{
- int err;
-
- err = ice_xsk_alloc_umems(vsi);
- if (err)
- return err;
-
- vsi->xsk_umems[qid] = umem;
- vsi->num_xsk_umems_used++;
-
- return 0;
-}
-
-/**
* ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
* @vsi: VSI from which the VSI will be removed
* @qid: Ring/qid associated with the UMEM
@@ -318,65 +296,6 @@ static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
}
}
-/**
- * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
- * @vsi: VSI to map the UMEM region
- * @umem: UMEM to map
- *
- * Returns 0 on success, negative on error
- */
-static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
-{
- struct ice_pf *pf = vsi->back;
- struct device *dev;
- unsigned int i;
-
- dev = ice_pf_to_dev(pf);
- for (i = 0; i < umem->npgs; i++) {
- dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
- PAGE_SIZE,
- DMA_BIDIRECTIONAL,
- ICE_RX_DMA_ATTR);
- if (dma_mapping_error(dev, dma)) {
- dev_dbg(dev, "XSK UMEM DMA mapping error on page num %d\n",
- i);
- goto out_unmap;
- }
-
- umem->pages[i].dma = dma;
- }
-
- return 0;
-
-out_unmap:
- for (; i > 0; i--) {
- dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
- umem->pages[i].dma = 0;
- }
-
- return -EFAULT;
-}
-
-/**
- * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
- * @vsi: VSI from which the UMEM will be unmapped
- * @umem: UMEM to unmap
- */
-static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
-{
- struct ice_pf *pf = vsi->back;
- struct device *dev;
- unsigned int i;
-
- dev = ice_pf_to_dev(pf);
- for (i = 0; i < umem->npgs; i++) {
- dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
-
- umem->pages[i].dma = 0;
- }
-}
/**
* ice_xsk_umem_disable - disable a UMEM region
@@ -391,7 +310,7 @@ static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
!vsi->xsk_umems[qid])
return -EINVAL;
- ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+ xsk_buff_dma_unmap(vsi->xsk_umems[qid], ICE_RX_DMA_ATTR);
ice_xsk_remove_umem(vsi, qid);
return 0;
@@ -408,7 +327,6 @@ static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
static int
ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
{
- struct xdp_umem_fq_reuse *reuseq;
int err;
if (vsi->type != ICE_VSI_PF)
@@ -419,20 +337,18 @@ ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
if (qid >= vsi->num_xsk_umems)
return -EINVAL;
+ err = ice_xsk_alloc_umems(vsi);
+ if (err)
+ return err;
+
if (vsi->xsk_umems && vsi->xsk_umems[qid])
return -EBUSY;
- reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
- if (!reuseq)
- return -ENOMEM;
-
- xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
- err = ice_xsk_umem_dma_map(vsi, umem);
- if (err)
- return err;
+ vsi->xsk_umems[qid] = umem;
+ vsi->num_xsk_umems_used++;
- err = ice_xsk_add_umem(vsi, umem, qid);
+ err = xsk_buff_dma_map(vsi->xsk_umems[qid], ice_pf_to_dev(vsi->back),
+ ICE_RX_DMA_ATTR);
if (err)
return err;
@@ -484,137 +400,22 @@ xsk_umem_if_up:
}
/**
- * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
- * @zca: zero-cpoy allocator
- * @handle: Buffer handle
- */
-void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
-{
- struct ice_rx_buf *rx_buf;
- struct ice_ring *rx_ring;
- struct xdp_umem *umem;
- u64 hr, mask;
- u16 nta;
-
- rx_ring = container_of(zca, struct ice_ring, zca);
- umem = rx_ring->xsk_umem;
- hr = umem->headroom + XDP_PACKET_HEADROOM;
-
- mask = umem->chunk_mask;
-
- nta = rx_ring->next_to_alloc;
- rx_buf = &rx_ring->rx_buf[nta];
-
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- handle &= mask;
-
- rx_buf->dma = xdp_umem_get_dma(umem, handle);
- rx_buf->dma += hr;
-
- rx_buf->addr = xdp_umem_get_data(umem, handle);
- rx_buf->addr += hr;
-
- rx_buf->handle = (u64)handle + umem->headroom;
-}
-
-/**
- * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
- * @rx_ring: ring with an xdp_umem bound to it
- * @rx_buf: buffer to which xsk page address will be assigned
- *
- * This function allocates an Rx buffer in the hot path.
- * The buffer can come from fill queue or recycle queue.
- *
- * Returns true if an assignment was successful, false if not.
- */
-static __always_inline bool
-ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
-{
- struct xdp_umem *umem = rx_ring->xsk_umem;
- void *addr = rx_buf->addr;
- u64 handle, hr;
-
- if (addr) {
- rx_ring->rx_stats.page_reuse_count++;
- return true;
- }
-
- if (!xsk_umem_peek_addr(umem, &handle)) {
- rx_ring->rx_stats.alloc_page_failed++;
- return false;
- }
-
- hr = umem->headroom + XDP_PACKET_HEADROOM;
-
- rx_buf->dma = xdp_umem_get_dma(umem, handle);
- rx_buf->dma += hr;
-
- rx_buf->addr = xdp_umem_get_data(umem, handle);
- rx_buf->addr += hr;
-
- rx_buf->handle = handle + umem->headroom;
-
- xsk_umem_release_addr(umem);
- return true;
-}
-
-/**
- * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
- * @rx_ring: ring with an xdp_umem bound to it
- * @rx_buf: buffer to which xsk page address will be assigned
- *
- * This function allocates an Rx buffer in the slow path.
- * The buffer can come from fill queue or recycle queue.
- *
- * Returns true if an assignment was successful, false if not.
- */
-static __always_inline bool
-ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
-{
- struct xdp_umem *umem = rx_ring->xsk_umem;
- u64 handle, headroom;
-
- if (!xsk_umem_peek_addr_rq(umem, &handle)) {
- rx_ring->rx_stats.alloc_page_failed++;
- return false;
- }
-
- handle &= umem->chunk_mask;
- headroom = umem->headroom + XDP_PACKET_HEADROOM;
-
- rx_buf->dma = xdp_umem_get_dma(umem, handle);
- rx_buf->dma += headroom;
-
- rx_buf->addr = xdp_umem_get_data(umem, handle);
- rx_buf->addr += headroom;
-
- rx_buf->handle = handle + umem->headroom;
-
- xsk_umem_release_addr_rq(umem);
- return true;
-}
-
-/**
* ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
* @rx_ring: Rx ring
* @count: The number of buffers to allocate
- * @alloc: the function pointer to call for allocation
*
* This function allocates a number of Rx buffers from the fill ring
* or the internal recycle mechanism and places them on the Rx ring.
*
* Returns false if all allocations were successful, true if any fail.
*/
-static bool
-ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
- bool (*alloc)(struct ice_ring *, struct ice_rx_buf *))
+bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
{
union ice_32b_rx_flex_desc *rx_desc;
u16 ntu = rx_ring->next_to_use;
struct ice_rx_buf *rx_buf;
bool ret = false;
+ dma_addr_t dma;
if (!count)
return false;
@@ -623,16 +424,14 @@ ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
rx_buf = &rx_ring->rx_buf[ntu];
do {
- if (!alloc(rx_ring, rx_buf)) {
+ rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+ if (!rx_buf->xdp) {
ret = true;
break;
}
- dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
- rx_ring->rx_buf_len,
- DMA_BIDIRECTIONAL);
-
- rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+ dma = xsk_buff_xdp_get_dma(rx_buf->xdp);
+ rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->wb.status_error0 = 0;
rx_desc++;
@@ -653,32 +452,6 @@ ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
}
/**
- * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
- * @rx_ring: Rx ring
- * @count: number of bufs to allocate
- *
- * Returns false on success, true on failure.
- */
-static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
-{
- return ice_alloc_rx_bufs_zc(rx_ring, count,
- ice_alloc_buf_fast_zc);
-}
-
-/**
- * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
- * @rx_ring: Rx ring
- * @count: number of bufs to allocate
- *
- * Returns false on success, true on failure.
- */
-bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
-{
- return ice_alloc_rx_bufs_zc(rx_ring, count,
- ice_alloc_buf_slow_zc);
-}
-
-/**
* ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
* @rx_ring: Rx ring
*/
@@ -692,76 +465,21 @@ static void ice_bump_ntc(struct ice_ring *rx_ring)
}
/**
- * ice_get_rx_buf_zc - Fetch the current Rx buffer
- * @rx_ring: Rx ring
- * @size: size of a buffer
- *
- * This function returns the current, received Rx buffer and does
- * DMA synchronization.
- *
- * Returns a pointer to the received Rx buffer.
- */
-static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
-{
- struct ice_rx_buf *rx_buf;
-
- rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
-
- dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
- size, DMA_BIDIRECTIONAL);
-
- return rx_buf;
-}
-
-/**
- * ice_reuse_rx_buf_zc - reuse an Rx buffer
- * @rx_ring: Rx ring
- * @old_buf: The buffer to recycle
- *
- * This function recycles a finished Rx buffer, and places it on the recycle
- * queue (next_to_alloc).
- */
-static void
-ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
-{
- unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
- u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
- u16 nta = rx_ring->next_to_alloc;
- struct ice_rx_buf *new_buf;
-
- new_buf = &rx_ring->rx_buf[nta++];
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- new_buf->dma = old_buf->dma & mask;
- new_buf->dma += hr;
-
- new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
- new_buf->addr += hr;
-
- new_buf->handle = old_buf->handle & mask;
- new_buf->handle += rx_ring->xsk_umem->headroom;
-
- old_buf->addr = NULL;
-}
-
-/**
* ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
* @rx_ring: Rx ring
* @rx_buf: zero-copy Rx buffer
- * @xdp: XDP buffer
*
* This function allocates a new skb from a zero-copy Rx buffer.
*
* Returns the skb on success, NULL on failure.
*/
static struct sk_buff *
-ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
- struct xdp_buff *xdp)
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
- unsigned int metasize = xdp->data - xdp->data_meta;
- unsigned int datasize = xdp->data_end - xdp->data;
- unsigned int datasize_hard = xdp->data_end -
- xdp->data_hard_start;
+ unsigned int metasize = rx_buf->xdp->data - rx_buf->xdp->data_meta;
+ unsigned int datasize = rx_buf->xdp->data_end - rx_buf->xdp->data;
+ unsigned int datasize_hard = rx_buf->xdp->data_end -
+ rx_buf->xdp->data_hard_start;
struct sk_buff *skb;
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
@@ -769,13 +487,13 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
if (unlikely(!skb))
return NULL;
- skb_reserve(skb, xdp->data - xdp->data_hard_start);
- memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+ skb_reserve(skb, rx_buf->xdp->data - rx_buf->xdp->data_hard_start);
+ memcpy(__skb_put(skb, datasize), rx_buf->xdp->data, datasize);
if (metasize)
skb_metadata_set(skb, metasize);
- ice_reuse_rx_buf_zc(rx_ring, rx_buf);
-
+ xsk_buff_free(rx_buf->xdp);
+ rx_buf->xdp = NULL;
return skb;
}
@@ -802,7 +520,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
}
act = bpf_prog_run_xdp(xdp_prog, xdp);
- xdp->handle += xdp->data - xdp->data_hard_start;
switch (act) {
case XDP_PASS:
break;
@@ -840,13 +557,8 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
- struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_xmit = 0;
bool failure = false;
- struct xdp_buff xdp;
-
- xdp.rxq = &rx_ring->xdp_rxq;
- xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
while (likely(total_rx_packets < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
@@ -858,8 +570,8 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
u8 rx_ptype;
if (cleaned_count >= ICE_RX_BUF_WRITE) {
- failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
- cleaned_count);
+ failure |= ice_alloc_rx_bufs_zc(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -880,25 +592,19 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
if (!size)
break;
- rx_buf = ice_get_rx_buf_zc(rx_ring, size);
- if (!rx_buf->addr)
- break;
- xdp.data = rx_buf->addr;
- xdp.data_meta = xdp.data;
- xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
- xdp.data_end = xdp.data + size;
- xdp.handle = rx_buf->handle;
+ rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+ rx_buf->xdp->data_end = rx_buf->xdp->data + size;
+ xsk_buff_dma_sync_for_cpu(rx_buf->xdp);
- xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+ xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
if (xdp_res) {
- if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))
xdp_xmit |= xdp_res;
- rx_buf->addr = NULL;
- } else {
- ice_reuse_rx_buf_zc(rx_ring, rx_buf);
- }
+ else
+ xsk_buff_free(rx_buf->xdp);
+ rx_buf->xdp = NULL;
total_rx_bytes += size;
total_rx_packets++;
cleaned_count++;
@@ -908,7 +614,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
}
/* XDP_PASS path */
- skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+ skb = ice_construct_skb_zc(rx_ring, rx_buf);
if (!skb) {
rx_ring->rx_stats.alloc_buf_failed++;
break;
@@ -979,10 +685,9 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
break;
- dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
-
- dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
- DMA_BIDIRECTIONAL);
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+ desc.len);
tx_buf->bytecount = desc.len;
@@ -1165,11 +870,10 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
- if (!rx_buf->addr)
+ if (!rx_buf->xdp)
continue;
- xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
- rx_buf->addr = NULL;
+ rx_buf->xdp = NULL;
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index 8a4ba7c6d549..fc1a06b4df36 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -10,11 +10,10 @@ struct ice_vsi;
#ifdef CONFIG_XDP_SOCKETS
int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
-void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
-bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count);
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
@@ -27,12 +26,6 @@ ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
return -EOPNOTSUPP;
}
-static inline void
-ice_zca_free(struct zero_copy_allocator __always_unused *zca,
- unsigned long __always_unused handle)
-{
-}
-
static inline int
ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
int __always_unused budget)
@@ -48,8 +41,8 @@ ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
}
static inline bool
-ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
- u16 __always_unused count)
+ice_alloc_rx_bufs_zc(struct ice_ring __always_unused *rx_ring,
+ u16 __always_unused count)
{
return false;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 2833e4f041ce..5ddfc83a1e46 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -224,17 +224,17 @@ struct ixgbe_tx_buffer {
};
struct ixgbe_rx_buffer {
- struct sk_buff *skb;
- dma_addr_t dma;
union {
struct {
+ struct sk_buff *skb;
+ dma_addr_t dma;
struct page *page;
__u32 page_offset;
__u16 pagecnt_bias;
};
struct {
- void *addr;
- u64 handle;
+ bool discard;
+ struct xdp_buff *xdp;
};
};
};
@@ -351,7 +351,6 @@ struct ixgbe_ring {
};
struct xdp_rxq_info xdp_rxq;
struct xdp_umem *xsk_umem;
- struct zero_copy_allocator zca; /* ZC allocator anchor */
u16 ring_idx; /* {rx,tx,xdp}_ring back reference idx */
u16 rx_buf_len;
} ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index eab5934b04f5..45fc7ce1a543 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -35,7 +35,7 @@
#include <net/tc_act/tc_mirred.h>
#include <net/vxlan.h>
#include <net/mpls.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/xfrm.h>
#include "ixgbe.h"
@@ -3745,8 +3745,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
/* configure the packet buffer length */
if (rx_ring->xsk_umem) {
- u32 xsk_buf_len = rx_ring->xsk_umem->chunk_size_nohr -
- XDP_PACKET_HEADROOM;
+ u32 xsk_buf_len = xsk_umem_get_rx_frame_size(rx_ring->xsk_umem);
/* If the MAC support setting RXDCTL.RLPML, the
* SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
@@ -4093,11 +4092,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
if (ring->xsk_umem) {
- ring->zca.free = ixgbe_zca_free;
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
- MEM_TYPE_ZERO_COPY,
- &ring->zca));
-
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL));
+ xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
} else {
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL));
@@ -4153,8 +4151,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
}
if (ring->xsk_umem && hw->mac.type != ixgbe_mac_82599EB) {
- u32 xsk_buf_len = ring->xsk_umem->chunk_size_nohr -
- XDP_PACKET_HEADROOM;
+ u32 xsk_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
IXGBE_RXDCTL_RLPML_EN);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index 6d01700b46bc..7887ae4aaf4f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -35,7 +35,7 @@ int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
-void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
+bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring *rx_ring,
const int budget);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index a656ee9a1fae..86add9fbd36c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -2,7 +2,7 @@
/* Copyright(c) 2018 Intel Corporation. */
#include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "ixgbe.h"
@@ -20,54 +20,11 @@ struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
return xdp_get_umem_from_qid(adapter->netdev, qid);
}
-static int ixgbe_xsk_umem_dma_map(struct ixgbe_adapter *adapter,
- struct xdp_umem *umem)
-{
- struct device *dev = &adapter->pdev->dev;
- unsigned int i, j;
- dma_addr_t dma;
-
- for (i = 0; i < umem->npgs; i++) {
- dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
- if (dma_mapping_error(dev, dma))
- goto out_unmap;
-
- umem->pages[i].dma = dma;
- }
-
- return 0;
-
-out_unmap:
- for (j = 0; j < i; j++) {
- dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
- umem->pages[i].dma = 0;
- }
-
- return -1;
-}
-
-static void ixgbe_xsk_umem_dma_unmap(struct ixgbe_adapter *adapter,
- struct xdp_umem *umem)
-{
- struct device *dev = &adapter->pdev->dev;
- unsigned int i;
-
- for (i = 0; i < umem->npgs; i++) {
- dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
-
- umem->pages[i].dma = 0;
- }
-}
-
static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
struct xdp_umem *umem,
u16 qid)
{
struct net_device *netdev = adapter->netdev;
- struct xdp_umem_fq_reuse *reuseq;
bool if_running;
int err;
@@ -78,13 +35,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
qid >= netdev->real_num_tx_queues)
return -EINVAL;
- reuseq = xsk_reuseq_prepare(adapter->rx_ring[0]->count);
- if (!reuseq)
- return -ENOMEM;
-
- xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
- err = ixgbe_xsk_umem_dma_map(adapter, umem);
+ err = xsk_buff_dma_map(umem, &adapter->pdev->dev, IXGBE_RX_DMA_ATTR);
if (err)
return err;
@@ -124,7 +75,7 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
ixgbe_txrx_ring_disable(adapter, qid);
clear_bit(qid, adapter->af_xdp_zc_qps);
- ixgbe_xsk_umem_dma_unmap(adapter, umem);
+ xsk_buff_dma_unmap(umem, IXGBE_RX_DMA_ATTR);
if (if_running)
ixgbe_txrx_ring_enable(adapter, qid);
@@ -143,19 +94,14 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
struct ixgbe_ring *rx_ring,
struct xdp_buff *xdp)
{
- struct xdp_umem *umem = rx_ring->xsk_umem;
int err, result = IXGBE_XDP_PASS;
struct bpf_prog *xdp_prog;
struct xdp_frame *xdpf;
- u64 offset;
u32 act;
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- offset = xdp->data - xdp->data_hard_start;
-
- xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
switch (act) {
case XDP_PASS:
@@ -186,140 +132,16 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
return result;
}
-static struct
-ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring,
- unsigned int size)
-{
- struct ixgbe_rx_buffer *bi;
-
- bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- bi->dma, 0,
- size,
- DMA_BIDIRECTIONAL);
-
- return bi;
-}
-
-static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
- struct ixgbe_rx_buffer *obi)
-{
- u16 nta = rx_ring->next_to_alloc;
- struct ixgbe_rx_buffer *nbi;
-
- nbi = &rx_ring->rx_buffer_info[rx_ring->next_to_alloc];
- /* update, and store next to alloc */
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- /* transfer page from old buffer to new buffer */
- nbi->dma = obi->dma;
- nbi->addr = obi->addr;
- nbi->handle = obi->handle;
-
- obi->addr = NULL;
- obi->skb = NULL;
-}
-
-void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
-{
- struct ixgbe_rx_buffer *bi;
- struct ixgbe_ring *rx_ring;
- u64 hr, mask;
- u16 nta;
-
- rx_ring = container_of(alloc, struct ixgbe_ring, zca);
- hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
- mask = rx_ring->xsk_umem->chunk_mask;
-
- nta = rx_ring->next_to_alloc;
- bi = rx_ring->rx_buffer_info;
-
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- handle &= mask;
-
- bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle);
- bi->dma += hr;
-
- bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
- bi->addr += hr;
-
- bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
- rx_ring->xsk_umem->headroom);
-}
-
-static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
- struct ixgbe_rx_buffer *bi)
-{
- struct xdp_umem *umem = rx_ring->xsk_umem;
- void *addr = bi->addr;
- u64 handle, hr;
-
- if (addr)
- return true;
-
- if (!xsk_umem_peek_addr(umem, &handle)) {
- rx_ring->rx_stats.alloc_rx_page_failed++;
- return false;
- }
-
- hr = umem->headroom + XDP_PACKET_HEADROOM;
-
- bi->dma = xdp_umem_get_dma(umem, handle);
- bi->dma += hr;
-
- bi->addr = xdp_umem_get_data(umem, handle);
- bi->addr += hr;
-
- bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
- xsk_umem_release_addr(umem);
- return true;
-}
-
-static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
- struct ixgbe_rx_buffer *bi)
-{
- struct xdp_umem *umem = rx_ring->xsk_umem;
- u64 handle, hr;
-
- if (!xsk_umem_peek_addr_rq(umem, &handle)) {
- rx_ring->rx_stats.alloc_rx_page_failed++;
- return false;
- }
-
- handle &= rx_ring->xsk_umem->chunk_mask;
-
- hr = umem->headroom + XDP_PACKET_HEADROOM;
-
- bi->dma = xdp_umem_get_dma(umem, handle);
- bi->dma += hr;
-
- bi->addr = xdp_umem_get_data(umem, handle);
- bi->addr += hr;
-
- bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
- xsk_umem_release_addr_rq(umem);
- return true;
-}
-
-static __always_inline bool
-__ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
- bool alloc(struct ixgbe_ring *rx_ring,
- struct ixgbe_rx_buffer *bi))
+bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
{
union ixgbe_adv_rx_desc *rx_desc;
struct ixgbe_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
+ dma_addr_t dma;
bool ok = true;
/* nothing to do */
- if (!cleaned_count)
+ if (!count)
return true;
rx_desc = IXGBE_RX_DESC(rx_ring, i);
@@ -327,21 +149,18 @@ __ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
i -= rx_ring->count;
do {
- if (!alloc(rx_ring, bi)) {
+ bi->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+ if (!bi->xdp) {
ok = false;
break;
}
- /* sync the buffer for use by the device */
- dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
- bi->page_offset,
- rx_ring->rx_buf_len,
- DMA_BIDIRECTIONAL);
+ dma = xsk_buff_xdp_get_dma(bi->xdp);
/* Refresh the desc even if buffer_addrs didn't change
* because each write-back erases this info.
*/
- rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+ rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc++;
bi++;
@@ -355,17 +174,14 @@ __ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
/* clear the length for the next_to_use descriptor */
rx_desc->wb.upper.length = 0;
- cleaned_count--;
- } while (cleaned_count);
+ count--;
+ } while (count);
i += rx_ring->count;
if (rx_ring->next_to_use != i) {
rx_ring->next_to_use = i;
- /* update next to alloc since we have filled the ring */
- rx_ring->next_to_alloc = i;
-
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
@@ -378,40 +194,27 @@ __ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
return ok;
}
-void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
-{
- __ixgbe_alloc_rx_buffers_zc(rx_ring, count,
- ixgbe_alloc_buffer_slow_zc);
-}
-
-static bool ixgbe_alloc_rx_buffers_fast_zc(struct ixgbe_ring *rx_ring,
- u16 count)
-{
- return __ixgbe_alloc_rx_buffers_zc(rx_ring, count,
- ixgbe_alloc_buffer_zc);
-}
-
static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
- struct ixgbe_rx_buffer *bi,
- struct xdp_buff *xdp)
+ struct ixgbe_rx_buffer *bi)
{
- unsigned int metasize = xdp->data - xdp->data_meta;
- unsigned int datasize = xdp->data_end - xdp->data;
+ unsigned int metasize = bi->xdp->data - bi->xdp->data_meta;
+ unsigned int datasize = bi->xdp->data_end - bi->xdp->data;
struct sk_buff *skb;
/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
- xdp->data_end - xdp->data_hard_start,
+ bi->xdp->data_end - bi->xdp->data_hard_start,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
- skb_reserve(skb, xdp->data - xdp->data_hard_start);
- memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+ skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start);
+ memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize);
if (metasize)
skb_metadata_set(skb, metasize);
- ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+ xsk_buff_free(bi->xdp);
+ bi->xdp = NULL;
return skb;
}
@@ -431,14 +234,9 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
struct ixgbe_adapter *adapter = q_vector->adapter;
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
- struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_res, xdp_xmit = 0;
bool failure = false;
struct sk_buff *skb;
- struct xdp_buff xdp;
-
- xdp.rxq = &rx_ring->xdp_rxq;
- xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
@@ -448,8 +246,8 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
failure = failure ||
- !ixgbe_alloc_rx_buffers_fast_zc(rx_ring,
- cleaned_count);
+ !ixgbe_alloc_rx_buffers_zc(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -464,42 +262,40 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
*/
dma_rmb();
- bi = ixgbe_get_rx_buffer_zc(rx_ring, size);
+ bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
if (unlikely(!ixgbe_test_staterr(rx_desc,
IXGBE_RXD_STAT_EOP))) {
struct ixgbe_rx_buffer *next_bi;
- ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+ xsk_buff_free(bi->xdp);
+ bi->xdp = NULL;
ixgbe_inc_ntc(rx_ring);
next_bi =
&rx_ring->rx_buffer_info[rx_ring->next_to_clean];
- next_bi->skb = ERR_PTR(-EINVAL);
+ next_bi->discard = true;
continue;
}
- if (unlikely(bi->skb)) {
- ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+ if (unlikely(bi->discard)) {
+ xsk_buff_free(bi->xdp);
+ bi->xdp = NULL;
+ bi->discard = false;
ixgbe_inc_ntc(rx_ring);
continue;
}
- xdp.data = bi->addr;
- xdp.data_meta = xdp.data;
- xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
- xdp.data_end = xdp.data + size;
- xdp.handle = bi->handle;
-
- xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, &xdp);
+ bi->xdp->data_end = bi->xdp->data + size;
+ xsk_buff_dma_sync_for_cpu(bi->xdp);
+ xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
if (xdp_res) {
- if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
+ if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))
xdp_xmit |= xdp_res;
- bi->addr = NULL;
- bi->skb = NULL;
- } else {
- ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
- }
+ else
+ xsk_buff_free(bi->xdp);
+
+ bi->xdp = NULL;
total_rx_packets++;
total_rx_bytes += size;
@@ -509,7 +305,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
}
/* XDP_PASS path */
- skb = ixgbe_construct_skb_zc(rx_ring, bi, &xdp);
+ skb = ixgbe_construct_skb_zc(rx_ring, bi);
if (!skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
break;
@@ -561,17 +357,17 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
{
- u16 i = rx_ring->next_to_clean;
- struct ixgbe_rx_buffer *bi = &rx_ring->rx_buffer_info[i];
+ struct ixgbe_rx_buffer *bi;
+ u16 i;
- while (i != rx_ring->next_to_alloc) {
- xsk_umem_fq_reuse(rx_ring->xsk_umem, bi->handle);
- i++;
- bi++;
- if (i == rx_ring->count) {
- i = 0;
- bi = rx_ring->rx_buffer_info;
- }
+ for (i = 0; i < rx_ring->count; i++) {
+ bi = &rx_ring->rx_buffer_info[i];
+
+ if (!bi->xdp)
+ continue;
+
+ xsk_buff_free(bi->xdp);
+ bi->xdp = NULL;
}
}
@@ -594,10 +390,9 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
break;
- dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
-
- dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
- DMA_BIDIRECTIONAL);
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+ desc.len);
tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
tx_bi->bytecount = desc.len;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 81fd53569463..4906aee6798d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -365,10 +365,7 @@ struct mlx5e_dma_info {
dma_addr_t addr;
union {
struct page *page;
- struct {
- u64 handle;
- void *data;
- } xsk;
+ struct xdp_buff *xsk;
};
};
@@ -581,7 +578,6 @@ struct mlx5e_rq {
} mpwqe;
};
struct {
- u16 umem_headroom;
u16 headroom;
u32 frame0_sz;
u8 map_dir; /* dma map direction */
@@ -614,7 +610,6 @@ struct mlx5e_rq {
struct page_pool *page_pool;
/* AF_XDP zero-copy */
- struct zero_copy_allocator zca;
struct xdp_umem *umem;
struct work_struct recover_work;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index eb2e1f2138e4..38e4f19d69f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -12,15 +12,16 @@ static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- u16 headroom = NET_IP_ALIGN;
+ u16 headroom;
- if (mlx5e_rx_is_xdp(params, xsk)) {
+ if (xsk)
+ return xsk->headroom;
+
+ headroom = NET_IP_ALIGN;
+ if (mlx5e_rx_is_xdp(params, xsk))
headroom += XDP_PACKET_HEADROOM;
- if (xsk)
- headroom += xsk->headroom;
- } else {
+ else
headroom += MLX5_RX_HEADROOM;
- }
return headroom;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 42202d19245c..3bea1d4be53b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,7 +31,7 @@
*/
#include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "en/xdp.h"
#include "en/params.h"
@@ -71,7 +71,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdptxd.data = xdpf->data;
xdptxd.len = xdpf->len;
- if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
/* The xdp_buff was in the UMEM and was copied into a newly
* allocated page. The UMEM page was returned via the ZCA, and
* this new page has to be mapped at this point and has to be
@@ -119,50 +119,33 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len, bool xsk)
+ u32 *len, struct xdp_buff *xdp)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
- struct xdp_umem *umem = rq->umem;
- struct xdp_buff xdp;
u32 act;
int err;
if (!prog)
return false;
- xdp.data = va + *rx_headroom;
- xdp_set_data_meta_invalid(&xdp);
- xdp.data_end = xdp.data + *len;
- xdp.data_hard_start = va;
- if (xsk)
- xdp.handle = di->xsk.handle;
- xdp.rxq = &rq->xdp_rxq;
- xdp.frame_sz = rq->buff.frame0_sz;
-
- act = bpf_prog_run_xdp(prog, &xdp);
- if (xsk) {
- u64 off = xdp.data - xdp.data_hard_start;
-
- xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
- }
+ act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
- *rx_headroom = xdp.data - xdp.data_hard_start;
- *len = xdp.data_end - xdp.data;
+ *len = xdp->data_end - xdp->data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
case XDP_REDIRECT:
/* When XDP enabled then page-refcnt==1 here */
- err = xdp_do_redirect(rq->netdev, &xdp, prog);
+ err = xdp_do_redirect(rq->netdev, xdp, prog);
if (unlikely(err))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- if (!xsk)
+ if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
mlx5e_page_dma_unmap(rq, di);
rq->stats->xdp_redirect++;
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index be64eb68f4e5..ca48c293151b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -61,7 +61,7 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len, bool xsk);
+ u32 *len, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 62fc8a128a8d..a33a1f762c70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -3,71 +3,10 @@
#include "rx.h"
#include "en/xdp.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
/* RX data path */
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
-{
- /* Check in advance that we have enough frames, instead of allocating
- * one-by-one, failing and moving frames to the Reuse Ring.
- */
- return xsk_umem_has_addrs_rq(rq->umem, count);
-}
-
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- struct xdp_umem *umem = rq->umem;
- u64 handle;
-
- if (!xsk_umem_peek_addr_rq(umem, &handle))
- return -ENOMEM;
-
- dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
- rq->buff.umem_headroom);
- dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
-
- /* No need to add headroom to the DMA address. In striding RQ case, we
- * just provide pages for UMR, and headroom is counted at the setup
- * stage when creating a WQE. In non-striding RQ case, headroom is
- * accounted in mlx5e_alloc_rx_wqe.
- */
- dma_info->addr = xdp_umem_get_dma(umem, handle);
-
- xsk_umem_release_addr_rq(umem);
-
- dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
-
- return 0;
-}
-
-static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
-{
- xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
-}
-
-/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
- * the userspace if possible, and if not, this function is called to reuse them
- * in the driver.
- */
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
-}
-
-/* Return a frame back to the hardware to fill in again. It is used by XDP when
- * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
- */
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
-{
- struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
-
- mlx5e_xsk_recycle_frame(rq, handle);
-}
-
static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
u32 cqe_bcnt)
{
@@ -90,11 +29,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
u32 head_offset,
u32 page_idx)
{
- struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
- u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
u32 cqe_bcnt32 = cqe_bcnt;
- void *va, *data;
- u32 frag_size;
bool consumed;
/* Check packet size. Note LRO doesn't use linear SKB */
@@ -103,22 +39,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
return NULL;
}
- /* head_offset is not used in this function, because di->xsk.data and
- * di->addr point directly to the necessary place. Furthermore, in the
- * current implementation, UMR pages are mapped to XSK frames, so
+ /* head_offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, in
+ * the current implementation, UMR pages are mapped to XSK frames, so
* head_offset should always be 0.
*/
WARN_ON_ONCE(head_offset);
- va = di->xsk.data;
- data = va + rx_headroom;
- frag_size = rq->buff.headroom + cqe_bcnt32;
-
- dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
- prefetch(data);
+ xdp->data_end = xdp->data + cqe_bcnt32;
+ xdp_set_data_meta_invalid(xdp);
+ xsk_buff_dma_sync_for_cpu(xdp);
+ prefetch(xdp->data);
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+ consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
rcu_read_unlock();
/* Possible flows:
@@ -145,7 +79,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
* frame. On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+ return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32);
}
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
@@ -153,25 +87,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt)
{
- struct mlx5e_dma_info *di = wi->di;
- u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
- void *va, *data;
+ struct xdp_buff *xdp = wi->di->xsk;
bool consumed;
- u32 frag_size;
- /* wi->offset is not used in this function, because di->xsk.data and
- * di->addr point directly to the necessary place. Furthermore, in the
- * current implementation, one page = one packet = one frame, so
+ /* wi->offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, the
+ * XSK allocator allocates frames per packet, instead of pages, so
* wi->offset should always be 0.
*/
WARN_ON_ONCE(wi->offset);
- va = di->xsk.data;
- data = va + rx_headroom;
- frag_size = rq->buff.headroom + cqe_bcnt;
-
- dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
- prefetch(data);
+ xdp->data_end = xdp->data + cqe_bcnt;
+ xdp_set_data_meta_invalid(xdp);
+ xsk_buff_dma_sync_for_cpu(xdp);
+ prefetch(xdp->data);
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
@@ -179,7 +108,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
}
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+ consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp);
rcu_read_unlock();
if (likely(consumed))
@@ -189,5 +118,5 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
* will be handled by mlx5e_put_rx_frag.
* On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+ return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index cab0e93497ae..d147b2f13b54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,16 +5,10 @@
#define __MLX5_EN_XSK_RX_H__
#include "en.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
/* RX data path */
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
struct mlx5e_mpw_info *wi,
u16 cqe_bcnt,
@@ -25,6 +19,23 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
+static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ dma_info->xsk = xsk_buff_alloc(rq->umem);
+ if (!dma_info->xsk)
+ return -ENOMEM;
+
+ /* Store the DMA address without headroom. In striding RQ case, we just
+ * provide pages for UMR, and headroom is counted at the setup stage
+ * when creating a WQE. In non-striding RQ case, headroom is accounted
+ * in mlx5e_alloc_rx_wqe.
+ */
+ dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
+
+ return 0;
+}
+
static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
{
if (!xsk_umem_uses_need_wakeup(rq->umem))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 3bcdb5b2fc20..83dce9cdb8c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -5,7 +5,7 @@
#include "umem.h"
#include "en/xdp.h"
#include "en/params.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
@@ -92,12 +92,11 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
- xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+ xdptxd.dma_addr = xsk_buff_raw_get_dma(umem, desc.addr);
+ xdptxd.data = xsk_buff_raw_get_data(umem, desc.addr);
xdptxd.len = desc.len;
- dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
- xdptxd.len, DMA_BIDIRECTIONAL);
+ xsk_buff_raw_dma_sync_for_device(umem, xdptxd.dma_addr, xdptxd.len);
if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
if (sq->mpwqe.wqe)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 79b487d89757..39fa0a705856 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,7 +5,7 @@
#define __MLX5_EN_XSK_TX_H__
#include "en.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
/* TX data path */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
index 4baaa5788320..7b17fcd0a56d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies. */
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "umem.h"
#include "setup.h"
#include "en/params.h"
@@ -10,40 +10,14 @@ static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
struct xdp_umem *umem)
{
struct device *dev = priv->mdev->device;
- u32 i;
- for (i = 0; i < umem->npgs; i++) {
- dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
-
- if (unlikely(dma_mapping_error(dev, dma)))
- goto err_unmap;
- umem->pages[i].dma = dma;
- }
-
- return 0;
-
-err_unmap:
- while (i--) {
- dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- umem->pages[i].dma = 0;
- }
-
- return -ENOMEM;
+ return xsk_buff_dma_map(umem, dev, 0);
}
static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
struct xdp_umem *umem)
{
- struct device *dev = priv->mdev->device;
- u32 i;
-
- for (i = 0; i < umem->npgs; i++) {
- dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- umem->pages[i].dma = 0;
- }
+ return xsk_buff_dma_unmap(umem, 0);
}
static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
@@ -90,13 +64,14 @@ static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
{
- return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+ return xsk_umem_get_headroom(umem) <= 0xffff &&
+ xsk_umem_get_chunk_size(umem) <= 0xffff;
}
void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
{
- xsk->headroom = umem->headroom;
- xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+ xsk->headroom = xsk_umem_get_headroom(umem);
+ xsk->chunk_size = xsk_umem_get_chunk_size(umem);
}
static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
@@ -241,18 +216,6 @@ int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
mlx5e_xsk_disable_umem(priv, ix);
}
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
-{
- struct xdp_umem_fq_reuse *reuseq;
-
- reuseq = xsk_reuseq_prepare(nentries);
- if (unlikely(!reuseq))
- return -ENOMEM;
- xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
- return 0;
-}
-
u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
{
u16 res = xsk->refcnt ? params->num_channels : 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 07823abe5557..8b86c6ded302 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -38,7 +38,7 @@
#include <linux/bpf.h>
#include <linux/if_bridge.h>
#include <net/page_pool.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "eswitch.h"
#include "en.h"
#include "en/txrx.h"
@@ -373,7 +373,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = c->mdev;
void *rqc = rqp->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
- u32 num_xsk_frames = 0;
u32 rq_xdp_ix;
u32 pool_size;
int wq_sz;
@@ -413,7 +412,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
- rq->buff.umem_headroom = xsk ? xsk->headroom : 0;
pool_size = 1 << params->log_rq_mtu_frames;
switch (rq->wq_type) {
@@ -427,10 +425,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
- if (xsk)
- num_xsk_frames = wq_sz <<
- mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
-
pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
mlx5e_mpwqe_get_log_rq_size(params, xsk);
@@ -482,9 +476,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
- if (xsk)
- num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
-
rq->wqe.info = rqp->frags_info;
rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
@@ -525,19 +516,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
}
if (xsk) {
- rq->buff.frame0_sz = xsk_umem_xdp_frame_sz(umem);
-
- err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
- if (unlikely(err)) {
- mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
- num_xsk_frames);
- goto err_free;
- }
-
- rq->zca.free = mlx5e_xsk_zca_free;
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_ZERO_COPY,
- &rq->zca);
+ MEM_TYPE_XSK_BUFF_POOL, NULL);
+ xsk_buff_set_rxq_info(rq->umem, &rq->xdp_rxq);
} else {
/* Create a page_pool and register it with rxq */
pp_params.order = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index a514685fb560..fdba52136c5d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -300,7 +300,7 @@ static inline void mlx5e_page_release(struct mlx5e_rq *rq,
* put into the Reuse Ring, because there is no way to return
* the page to the userspace when the interface goes down.
*/
- mlx5e_xsk_page_release(rq, dma_info);
+ xsk_buff_free(dma_info->xsk);
else
mlx5e_page_release_dynamic(rq, dma_info, recycle);
}
@@ -385,7 +385,11 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
if (rq->umem) {
int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
- if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired)))
+ /* Check in advance that we have enough frames, instead of
+ * allocating one-by-one, failing and moving frames to the
+ * Reuse Ring.
+ */
+ if (unlikely(!xsk_buff_can_alloc(rq->umem, pages_desired)))
return -ENOMEM;
}
@@ -480,8 +484,11 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
int err;
int i;
+ /* Check in advance that we have enough frames, instead of allocating
+ * one-by-one, failing and moving frames to the Reuse Ring.
+ */
if (rq->umem &&
- unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) {
+ unlikely(!xsk_buff_can_alloc(rq->umem, MLX5_MPWRQ_PAGES_PER_WQE))) {
err = -ENOMEM;
goto err;
}
@@ -1044,12 +1051,24 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
return skb;
}
+static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
+ u32 len, struct xdp_buff *xdp)
+{
+ xdp->data_hard_start = va;
+ xdp_set_data_meta_invalid(xdp);
+ xdp->data = va + headroom;
+ xdp->data_end = xdp->data + len;
+ xdp->rxq = &rq->xdp_rxq;
+ xdp->frame_sz = rq->buff.frame0_sz;
+}
+
struct sk_buff *
mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
{
struct mlx5e_dma_info *di = wi->di;
u16 rx_headroom = rq->buff.headroom;
+ struct xdp_buff xdp;
struct sk_buff *skb;
void *va, *data;
bool consumed;
@@ -1065,11 +1084,13 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
prefetch(data);
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false);
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp);
rcu_read_unlock();
if (consumed)
return NULL; /* page/packet was consumed by XDP */
+ rx_headroom = xdp.data - xdp.data_hard_start;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
if (unlikely(!skb))
@@ -1343,6 +1364,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
u16 rx_headroom = rq->buff.headroom;
u32 cqe_bcnt32 = cqe_bcnt;
+ struct xdp_buff xdp;
struct sk_buff *skb;
void *va, *data;
u32 frag_size;
@@ -1364,7 +1386,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
prefetch(data);
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false);
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
+ consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp);
rcu_read_unlock();
if (consumed) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
@@ -1372,6 +1395,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return NULL; /* page/packet was consumed by XDP */
}
+ rx_headroom = xdp.data - xdp.data_hard_start;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
if (unlikely(!skb))
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index 1e0c024b0a93..8e4141552423 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -50,7 +50,6 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
xdp->data_end = xdp->data + len;
xdp->rxq = &nvchan->xdp_rxq;
xdp->frame_sz = PAGE_SIZE;
- xdp->handle = 0;
memcpy(xdp->data, data, len);