summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/i40e/i40e_txrx.c
diff options
context:
space:
mode:
authorMagnus Karlsson <magnus.karlsson@intel.com>2020-11-16 12:12:47 +0100
committerDaniel Borkmann <daniel@iogearbox.net>2020-11-17 22:07:40 +0100
commit3106c580fb7cf26691c1ce3aba2223f3ae56d846 (patch)
treefd8b06d91a34b79b6f9b456ed2d6509033f39e1b /drivers/net/ethernet/intel/i40e/i40e_txrx.c
parent9349eb3a9d2ae0151510dd98b6640dfaeebee9cc (diff)
i40e: Use batched xsk Tx interfaces to increase performance
Use the new batched xsk interfaces for the Tx path in the i40e driver to improve performance. On my machine, this yields a throughput increase of 4% for the l2fwd sample app in xdpsock. If we instead just look at the Tx part, this patch set increases throughput with above 20% for Tx. Note that I had to explicitly loop unroll the inner loop to get to this performance level, by using a pragma. It is honored by both clang and gcc and should be ignored by versions that do not support it. Using the -funroll-loops compiler command line switch on the source file resulted in a loop unrolling on a higher level that lead to a performance decrease instead of an increase. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/1605525167-14450-6-git-send-email-magnus.karlsson@gmail.com
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c11
1 files changed, 11 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index d43ce13a93c9..c21548c71bb1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -676,6 +676,8 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
i40e_clean_tx_ring(tx_ring);
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
+ kfree(tx_ring->xsk_descs);
+ tx_ring->xsk_descs = NULL;
if (tx_ring->desc) {
dma_free_coherent(tx_ring->dev, tx_ring->size,
@@ -1277,6 +1279,13 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
if (!tx_ring->tx_bi)
goto err;
+ if (ring_is_xdp(tx_ring)) {
+ tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs),
+ GFP_KERNEL);
+ if (!tx_ring->xsk_descs)
+ goto err;
+ }
+
u64_stats_init(&tx_ring->syncp);
/* round up to nearest 4K */
@@ -1300,6 +1309,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
return 0;
err:
+ kfree(tx_ring->xsk_descs);
+ tx_ring->xsk_descs = NULL;
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
return -ENOMEM;