summaryrefslogtreecommitdiff
path: root/net/xdp/xsk_buff_pool.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp/xsk_buff_pool.c')
-rw-r--r--net/xdp/xsk_buff_pool.c253
1 files changed, 176 insertions, 77 deletions
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index fd39bb660ebc..51526034c42a 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/netdevice.h>
+#include <net/netdev_lock.h>
#include <net/xsk_buff_pool.h>
#include <net/xdp_sock.h>
#include <net/xdp_sock_drv.h>
@@ -10,26 +12,22 @@
void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
{
- unsigned long flags;
-
if (!xs->tx)
return;
- spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
+ spin_lock(&pool->xsk_tx_list_lock);
list_add_rcu(&xs->tx_list, &pool->xsk_tx_list);
- spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
+ spin_unlock(&pool->xsk_tx_list_lock);
}
void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
{
- unsigned long flags;
-
if (!xs->tx)
return;
- spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
+ spin_lock(&pool->xsk_tx_list_lock);
list_del_rcu(&xs->tx_list);
- spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
+ spin_unlock(&pool->xsk_tx_list_lock);
}
void xp_destroy(struct xsk_buff_pool *pool)
@@ -37,10 +35,21 @@ void xp_destroy(struct xsk_buff_pool *pool)
if (!pool)
return;
+ kvfree(pool->tx_descs);
kvfree(pool->heads);
kvfree(pool);
}
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
+{
+ pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
+ GFP_KERNEL);
+ if (!pool->tx_descs)
+ return -ENOMEM;
+
+ return 0;
+}
+
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem)
{
@@ -58,6 +67,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (!pool->heads)
goto out;
+ if (xs->tx)
+ if (xp_alloc_tx_descs(pool, xs))
+ goto out;
+
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
pool->addrs_cnt = umem->size;
pool->heads_cnt = umem->chunks;
@@ -70,10 +83,15 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
+ pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
+ spin_lock_init(&pool->rx_lock);
INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
- spin_lock_init(&pool->cq_lock);
+ spin_lock_init(&pool->cq_prod_lock);
+ spin_lock_init(&pool->cq_cached_prod_lock);
refcount_set(&pool->users, 1);
pool->fq = xs->fq_tmp;
@@ -83,11 +101,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb = &pool->heads[i];
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
- INIT_LIST_HEAD(&xskb->free_list_node);
+ INIT_LIST_HEAD(&xskb->list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
- xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
+ xp_init_xskb_addr(xskb, pool, (u64)i * pool->chunk_size);
}
return pool;
@@ -106,6 +124,18 @@ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
}
EXPORT_SYMBOL(xp_set_rxq_info);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc)
+{
+ u32 i;
+
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+
+ memcpy(xskb->cb + desc->off, desc->src, desc->bytes);
+ }
+}
+EXPORT_SYMBOL(xp_fill_cb);
+
static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
{
struct netdev_bpf bpf;
@@ -149,6 +179,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
if (err)
return err;
+ if (flags & XDP_USE_SG)
+ pool->umem->flags |= XDP_UMEM_SG_FLAG;
+
if (flags & XDP_USE_NEED_WAKEUP)
pool->uses_need_wakeup = true;
/* Tx needs to be explicitly woken up the first time. Also
@@ -163,16 +196,26 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
/* For copy-mode, we are done. */
return 0;
- if (!netdev->netdev_ops->ndo_bpf ||
- !netdev->netdev_ops->ndo_xsk_wakeup) {
+ if ((netdev->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) {
err = -EOPNOTSUPP;
goto err_unreg_pool;
}
+ if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_pool;
+ }
+
+ if (dev_get_min_mp_channel_count(netdev)) {
+ err = -EBUSY;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
+ netdev_ops_assert_locked(netdev);
err = netdev->netdev_ops->ndo_bpf(netdev, &bpf);
if (err)
goto err_unreg_pool;
@@ -183,6 +226,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_xsk;
}
pool->umem->zc = true;
+ pool->xdp_zc_max_segs = netdev->xdp_zc_max_segs;
return 0;
err_unreg_xsk:
@@ -197,17 +241,18 @@ err_unreg_pool:
return err;
}
-int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
struct net_device *dev, u16 queue_id)
{
u16 flags;
+ struct xdp_umem *umem = umem_xs->umem;
/* One fill and completion ring required for each queue id. */
if (!pool->fq || !pool->cq)
return -EINVAL;
flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
- if (pool->uses_need_wakeup)
+ if (umem_xs->pool->uses_need_wakeup)
flags |= XDP_USE_NEED_WAKEUP;
return xp_assign_dev(pool, dev, queue_id, flags);
@@ -215,13 +260,17 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
void xp_clear_dev(struct xsk_buff_pool *pool)
{
+ struct net_device *netdev = pool->netdev;
+
if (!pool->netdev)
return;
+ netdev_lock_ops(netdev);
xp_disable_drv_zc(pool);
xsk_clear_pool_at_qid(pool->netdev, pool->queue_id);
- dev_put(pool->netdev);
pool->netdev = NULL;
+ netdev_unlock_ops(netdev);
+ dev_put(netdev);
}
static void xp_release_deferred(struct work_struct *work)
@@ -295,7 +344,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
dma_map->netdev = netdev;
dma_map->dev = dev;
- dma_map->dma_need_sync = false;
dma_map->dma_pages_cnt = nr_pages;
refcount_set(&dma_map->users, 1);
list_add(&dma_map->list, &umem->xsk_dma_list);
@@ -317,6 +365,7 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
for (i = 0; i < dma_map->dma_pages_cnt; i++) {
dma = &dma_map->dma_pages[i];
if (*dma) {
+ *dma &= ~XSK_NEXT_PG_CONTIG_MASK;
dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
DMA_BIDIRECTIONAL, attrs);
*dma = 0;
@@ -330,7 +379,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
{
struct xsk_dma_map *dma_map;
- if (pool->dma_pages_cnt == 0)
+ if (!pool->dma_pages)
return;
dma_map = xp_find_dma_map(pool);
@@ -339,11 +388,11 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
return;
}
- if (!refcount_dec_and_test(&dma_map->users))
- return;
+ if (refcount_dec_and_test(&dma_map->users))
+ __xp_dma_unmap(dma_map, attrs);
- __xp_dma_unmap(dma_map, attrs);
kvfree(pool->dma_pages);
+ pool->dma_pages = NULL;
pool->dma_pages_cnt = 0;
pool->dev = NULL;
}
@@ -363,13 +412,24 @@ static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map)
static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map)
{
+ if (!pool->unaligned) {
+ u32 i;
+
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+ u64 orig_addr;
+
+ orig_addr = xskb->xdp.data_hard_start - pool->addrs - pool->headroom;
+ xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, orig_addr);
+ }
+ }
+
pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL);
if (!pool->dma_pages)
return -ENOMEM;
pool->dev = dma_map->dev;
pool->dma_pages_cnt = dma_map->dma_pages_cnt;
- pool->dma_need_sync = dma_map->dma_need_sync;
memcpy(pool->dma_pages, dma_map->dma_pages,
pool->dma_pages_cnt * sizeof(*pool->dma_pages));
@@ -405,19 +465,11 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
__xp_dma_unmap(dma_map, attrs);
return -ENOMEM;
}
- if (dma_need_sync(dev, dma))
- dma_map->dma_need_sync = true;
dma_map->dma_pages[i] = dma;
}
if (pool->unaligned)
xp_check_dma_contiguity(dma_map);
- else
- for (i = 0; i < pool->heads_cnt; i++) {
- struct xdp_buff_xsk *xskb = &pool->heads[i];
-
- xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
- }
err = xp_init_dma_info(pool, dma_map);
if (err) {
@@ -451,6 +503,22 @@ static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
return *addr < pool->addrs_cnt;
}
+static struct xdp_buff_xsk *xp_get_xskb(struct xsk_buff_pool *pool, u64 addr)
+{
+ struct xdp_buff_xsk *xskb;
+
+ if (pool->unaligned) {
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+ xp_init_xskb_addr(xskb, pool, addr);
+ if (pool->dma_pages)
+ xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
+ } else {
+ xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
+ }
+
+ return xskb;
+}
+
static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
{
struct xdp_buff_xsk *xskb;
@@ -476,14 +544,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
break;
}
- if (pool->unaligned) {
- xskb = pool->free_heads[--pool->free_heads_cnt];
- xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages_cnt)
- xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
- } else {
- xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
- }
+ xskb = xp_get_xskb(pool, addr);
xskq_cons_release(pool->fq);
return xskb;
@@ -500,18 +561,17 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
} else {
pool->free_list_cnt--;
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
- free_list_node);
- list_del_init(&xskb->free_list_node);
+ list_node);
+ list_del_init(&xskb->list_node);
}
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
xskb->xdp.data_meta = xskb->xdp.data;
+ xskb->xdp.flags = 0;
+
+ if (pool->dev)
+ xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len);
- if (pool->dma_need_sync) {
- dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
- pool->frame_len,
- DMA_BIDIRECTIONAL);
- }
return &xskb->xdp;
}
EXPORT_SYMBOL(xp_alloc);
@@ -542,14 +602,7 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd
continue;
}
- if (pool->unaligned) {
- xskb = pool->free_heads[--pool->free_heads_cnt];
- xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages_cnt)
- xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
- } else {
- xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
- }
+ xskb = xp_get_xskb(pool, addr);
*xdp = &xskb->xdp;
xdp++;
@@ -568,8 +621,8 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
i = nb_entries;
while (i--) {
- xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
- list_del_init(&xskb->free_list_node);
+ xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node);
+ list_del_init(&xskb->list_node);
*xdp = &xskb->xdp;
xdp++;
@@ -579,15 +632,30 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
return nb_entries;
}
+static u32 xp_alloc_slow(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ u32 max)
+{
+ int i;
+
+ for (i = 0; i < max; i++) {
+ struct xdp_buff *buff;
+
+ buff = xp_alloc(pool);
+ if (unlikely(!buff))
+ return i;
+ *xdp = buff;
+ xdp++;
+ }
+
+ return max;
+}
+
u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
u32 nb_entries1 = 0, nb_entries2;
- if (unlikely(pool->dma_need_sync)) {
- /* Slow path */
- *xdp = xp_alloc(pool);
- return !!*xdp;
- }
+ if (unlikely(pool->dev && dma_dev_need_sync(pool->dev)))
+ return xp_alloc_slow(pool, xdp, max);
if (unlikely(pool->free_list_cnt)) {
nb_entries1 = xp_alloc_reused(pool, xdp, max);
@@ -608,49 +676,80 @@ EXPORT_SYMBOL(xp_alloc_batch);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
+ u32 req_count, avail_count;
+
if (pool->free_list_cnt >= count)
return true;
- return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
+
+ req_count = count - pool->free_list_cnt;
+ avail_count = xskq_cons_nb_entries(pool->fq, req_count);
+ if (!avail_count)
+ pool->fq->queue_empty_descs++;
+
+ return avail_count >= req_count;
}
EXPORT_SYMBOL(xp_can_alloc);
void xp_free(struct xdp_buff_xsk *xskb)
{
- if (!list_empty(&xskb->free_list_node))
+ if (!list_empty(&xskb->list_node))
return;
xskb->pool->free_list_cnt++;
- list_add(&xskb->free_list_node, &xskb->pool->free_list);
+ list_add(&xskb->list_node, &xskb->pool->free_list);
}
EXPORT_SYMBOL(xp_free);
-void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+static u64 __xp_raw_get_addr(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
+}
+
+static void *__xp_raw_get_data(const struct xsk_buff_pool *pool, u64 addr)
{
- addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
return pool->addrs + addr;
}
+
+void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+{
+ return __xp_raw_get_data(pool, __xp_raw_get_addr(pool, addr));
+}
EXPORT_SYMBOL(xp_raw_get_data);
-dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
+static dma_addr_t __xp_raw_get_dma(const struct xsk_buff_pool *pool, u64 addr)
{
- addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
return (pool->dma_pages[addr >> PAGE_SHIFT] &
~XSK_NEXT_PG_CONTIG_MASK) +
(addr & ~PAGE_MASK);
}
-EXPORT_SYMBOL(xp_raw_get_dma);
-void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
+dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
{
- dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
- xskb->pool->frame_len, DMA_BIDIRECTIONAL);
+ return __xp_raw_get_dma(pool, __xp_raw_get_addr(pool, addr));
}
-EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
+EXPORT_SYMBOL(xp_raw_get_dma);
-void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
- size_t size)
+/**
+ * xp_raw_get_ctx - get &xdp_desc context
+ * @pool: XSk buff pool desc address belongs to
+ * @addr: desc address (from userspace)
+ *
+ * Helper for getting desc's DMA address and metadata pointer, if present.
+ * Saves one call on hotpath, double calculation of the actual address,
+ * and inline checks for metadata presence and sanity.
+ *
+ * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata
+ * pointer, if it is present and valid (initialized to %NULL otherwise).
+ */
+struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
{
- dma_sync_single_range_for_device(pool->dev, dma, 0,
- size, DMA_BIDIRECTIONAL);
+ struct xdp_desc_ctx ret;
+
+ addr = __xp_raw_get_addr(pool, addr);
+
+ ret.dma = __xp_raw_get_dma(pool, addr);
+ ret.meta = __xsk_buff_get_metadata(pool, __xp_raw_get_data(pool, addr));
+
+ return ret;
}
-EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
+EXPORT_SYMBOL(xp_raw_get_ctx);