summaryrefslogtreecommitdiff
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
authorMaciej Fijalkowski <maciej.fijalkowski@intel.com>2022-08-30 14:17:05 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2022-08-31 20:52:09 +0200
commitc00c4461689e15ac2cc3b9a595a54e4d8afd3d77 (patch)
treeb4e8546be8db0cf7599ab421b2c749f9a04ca236 /net/xdp/xsk.c
parentc829dba797360d9a266cabfaac16d1cd80abfc2b (diff)
xsk: Fix backpressure mechanism on Tx
Commit d678cbd2f867 ("xsk: Fix handling of invalid descriptors in XSK TX batching API") fixed batch API usage against set of descriptors with invalid ones but introduced a problem when AF_XDP SW rings are smaller than HW ones. Mismatch of reported Tx'ed frames between HW generator and user space app was observed. It turned out that backpressure mechanism became a bottleneck when the amount of produced descriptors to CQ is lower than what we grabbed from XSK Tx ring. Say that 512 entries had been taken from XSK Tx ring but we had only 490 free entries in CQ. Then callsite (ZC driver) will produce only 490 entries onto HW Tx ring but 512 entries will be released from Tx ring and this is what will be seen by the user space. In order to fix this case, mix XSK Tx/CQ ring interractions by moving around internal functions and changing call order: * pull out xskq_prod_nb_free() from xskq_prod_reserve_addr_batch() up to xsk_tx_peek_release_desc_batch(); ** move xskq_cons_release_n() into xskq_cons_read_desc_batch() After doing so, algorithm can be described as follows: 1. lookup Tx entries 2. use value from 1. to reserve space in CQ (*) 3. Read from Tx ring as much descriptors as value from 2 3a. release descriptors from XSK Tx ring (**) 4. Finally produce addresses to CQ Fixes: d678cbd2f867 ("xsk: Fix handling of invalid descriptors in XSK TX batching API") Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20220830121705.8618-1-maciej.fijalkowski@intel.com
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c22
1 files changed, 11 insertions, 11 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5b4ce6ba1bc7..639b2c3beb69 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -355,16 +355,15 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entr
return nb_pkts;
}
-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 nb_pkts)
{
struct xdp_sock *xs;
- u32 nb_pkts;
rcu_read_lock();
if (!list_is_singular(&pool->xsk_tx_list)) {
/* Fallback to the non-batched version */
rcu_read_unlock();
- return xsk_tx_peek_release_fallback(pool, max_entries);
+ return xsk_tx_peek_release_fallback(pool, nb_pkts);
}
xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
@@ -373,12 +372,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
goto out;
}
- max_entries = xskq_cons_nb_entries(xs->tx, max_entries);
- nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, max_entries);
- if (!nb_pkts) {
- xs->tx->queue_empty_descs++;
- goto out;
- }
+ nb_pkts = xskq_cons_nb_entries(xs->tx, nb_pkts);
/* This is the backpressure mechanism for the Tx path. Try to
* reserve space in the completion queue for all packets, but
@@ -386,12 +380,18 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
* packets. This avoids having to implement any buffering in
* the Tx path.
*/
- nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
+ nb_pkts = xskq_prod_nb_free(pool->cq, nb_pkts);
if (!nb_pkts)
goto out;
- xskq_cons_release_n(xs->tx, max_entries);
+ nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, nb_pkts);
+ if (!nb_pkts) {
+ xs->tx->queue_empty_descs++;
+ goto out;
+ }
+
__xskq_cons_release(xs->tx);
+ xskq_prod_write_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
xs->sk.sk_write_space(&xs->sk);
out: