From 1f20a5769446a1acae67ac9e63d07a594829a789 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 7 May 2024 13:20:23 +0200 Subject: page_pool: make sure frag API fields don't span between cachelines After commit 5027ec19f104 ("net: page_pool: split the page_pool_params into fast and slow") that made &page_pool contain only "hot" params at the start, cacheline boundary chops frag API fields group in the middle again. To not bother with this each time fast params get expanded or shrunk, let's just align them to `4 * sizeof(long)`, the closest upper pow-2 to their actual size (2 longs + 1 int). This ensures 16-byte alignment for the 32-bit architectures and 32-byte alignment for the 64-bit ones, excluding unnecessary false-sharing. ::page_state_hold_cnt is used quite intensively on hotpath no matter if frag API is used, so move it to the newly created hole in the first cacheline. Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig --- net/core/page_pool.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index dd364d738c00..95eac12e8790 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -172,12 +172,22 @@ static void page_pool_producer_unlock(struct page_pool *pool, spin_unlock_bh(&pool->ring.producer_lock); } +static void page_pool_struct_check(void) +{ + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); + CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long)); +} + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params, int cpuid) { unsigned int ring_qsize = 1024; /* Default */ + page_pool_struct_check(); + memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); -- cgit From 403f11ac9ab72fc3bee0b8c80c16e33212ea8cd9 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 7 May 2024 13:20:24 +0200 Subject: page_pool: don't use driver-set flags field directly page_pool::p is driver-defined params, copied directly from the structure passed to page_pool_create(). The structure isn't meant to be modified by the Page Pool core code and this even might look confusing[0][1]. In order to be able to alter some flags, let's define our own, internal fields the same way as the already existing one (::has_init_callback). They are defined as bits in the driver-set params, leave them so here as well, to not waste byte-per-bit or so. Almost 30 bits are still free for future extensions. We could've defined only new flags here or only the ones we may need to alter, but checking some flags in one place while others in another doesn't sound convenient or intuitive. ::flags passed by the driver can now go to the "slow" PP params. Suggested-by: Jakub Kicinski Link[0]: https://lore.kernel.org/netdev/20230703133207.4f0c54ce@kernel.org Suggested-by: Alexander Duyck Link[1]: https://lore.kernel.org/netdev/CAKgT0UfZCGnWgOH96E4GV3ZP6LLbROHM7SHE8NKwq+exX+Gk_Q@mail.gmail.com Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig --- net/core/page_pool.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 95eac12e8790..c2819ff03dd2 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -194,7 +194,7 @@ static int page_pool_init(struct page_pool *pool, pool->cpuid = cpuid; /* Validate only known flags were used */ - if (pool->p.flags & ~(PP_FLAG_ALL)) + if (pool->slow.flags & ~PP_FLAG_ALL) return -EINVAL; if (pool->p.pool_size) @@ -208,22 +208,26 @@ static int page_pool_init(struct page_pool *pool, * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, * which is the XDP_TX use-case. */ - if (pool->p.flags & PP_FLAG_DMA_MAP) { + if (pool->slow.flags & PP_FLAG_DMA_MAP) { if ((pool->p.dma_dir != DMA_FROM_DEVICE) && (pool->p.dma_dir != DMA_BIDIRECTIONAL)) return -EINVAL; + + pool->dma_map = true; } - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { + if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { /* In order to request DMA-sync-for-device the page * needs to be mapped */ - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!(pool->slow.flags & PP_FLAG_DMA_MAP)) return -EINVAL; if (!pool->p.max_len) return -EINVAL; + pool->dma_sync = true; + /* pool->p.offset has to be set according to the address * offset used by the DMA engine to start copying rx data */ @@ -232,7 +236,7 @@ static int page_pool_init(struct page_pool *pool, pool->has_init_callback = !!pool->slow.init_callback; #ifdef CONFIG_PAGE_POOL_STATS - if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) { + if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) { pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) return -ENOMEM; @@ -242,12 +246,13 @@ static int page_pool_init(struct page_pool *pool, * (also percpu) page pool instance. */ pool->recycle_stats = &pp_system_recycle_stats; + pool->system = true; } #endif if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { #ifdef CONFIG_PAGE_POOL_STATS - if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + if (!pool->system) free_percpu(pool->recycle_stats); #endif return -ENOMEM; @@ -258,7 +263,7 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->p.flags & PP_FLAG_DMA_MAP) + if (pool->dma_map) get_device(pool->p.dev); return 0; @@ -268,11 +273,11 @@ static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); - if (pool->p.flags & PP_FLAG_DMA_MAP) + if (pool->dma_map) put_device(pool->p.dev); #ifdef CONFIG_PAGE_POOL_STATS - if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + if (!pool->system) free_percpu(pool->recycle_stats); #endif } @@ -424,7 +429,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (page_pool_set_dma_addr(page, dma)) goto unmap_failed; - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + if (pool->dma_sync) page_pool_dma_sync_for_device(pool, page, pool->p.max_len); return true; @@ -470,8 +475,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if ((pool->p.flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) { put_page(page); return NULL; } @@ -491,8 +495,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; - unsigned int pp_flags = pool->p.flags; unsigned int pp_order = pool->p.order; + bool dma_map = pool->dma_map; struct page *page; int i, nr_pages; @@ -517,8 +521,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { page = pool->alloc.cache[i]; - if ((pp_flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, page))) { put_page(page); continue; } @@ -590,7 +593,7 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) { dma_addr_t dma; - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!pool->dma_map) /* Always account for inflight pages, even if we didn't * map them */ @@ -673,7 +676,7 @@ static bool __page_pool_page_can_be_recycled(const struct page *page) } /* If the page refcnt == 1, this will try to recycle the page. - * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for + * If pool->dma_sync is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). * If the page refcnt != 1, then the page will be returned to memory * subsystem. @@ -696,7 +699,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, if (likely(__page_pool_page_can_be_recycled(page))) { /* Read barrier done in page_ref_count / READ_ONCE */ - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + if (pool->dma_sync) page_pool_dma_sync_for_device(pool, page, dma_sync_size); @@ -809,7 +812,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, return NULL; if (__page_pool_page_can_be_recycled(page)) { - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + if (pool->dma_sync) page_pool_dma_sync_for_device(pool, page, -1); return page; -- cgit From 4321de4497b24fbf22389331f4ecd4039a451aa9 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 7 May 2024 13:20:25 +0200 Subject: page_pool: check for DMA sync shortcut earlier We can save a couple more function calls in the Page Pool code if we check for dma_need_sync() earlier, just when we test pp->p.dma_sync. Move both these checks into an inline wrapper and call the PP wrapper over the generic DMA sync function only when both are true. You can't cache the result of dma_need_sync() in &page_pool, as it may change anytime if an SWIOTLB buffer is allocated or mapped. Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig --- net/core/page_pool.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index c2819ff03dd2..4f9d1bd7f4d1 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -398,16 +398,26 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) return page; } -static void page_pool_dma_sync_for_device(struct page_pool *pool, - struct page *page, - unsigned int dma_sync_size) +static void __page_pool_dma_sync_for_device(const struct page_pool *pool, + struct page *page, + u32 dma_sync_size) { +#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) dma_addr_t dma_addr = page_pool_get_dma_addr(page); dma_sync_size = min(dma_sync_size, pool->p.max_len); - dma_sync_single_range_for_device(pool->p.dev, dma_addr, - pool->p.offset, dma_sync_size, - pool->p.dma_dir); + __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, + dma_sync_size, pool->p.dma_dir); +#endif +} + +static __always_inline void +page_pool_dma_sync_for_device(const struct page_pool *pool, + struct page *page, + u32 dma_sync_size) +{ + if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) + __page_pool_dma_sync_for_device(pool, page, dma_sync_size); } static bool page_pool_dma_map(struct page_pool *pool, struct page *page) @@ -429,8 +439,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (page_pool_set_dma_addr(page, dma)) goto unmap_failed; - if (pool->dma_sync) - page_pool_dma_sync_for_device(pool, page, pool->p.max_len); + page_pool_dma_sync_for_device(pool, page, pool->p.max_len); return true; @@ -699,9 +708,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, if (likely(__page_pool_page_can_be_recycled(page))) { /* Read barrier done in page_ref_count / READ_ONCE */ - if (pool->dma_sync) - page_pool_dma_sync_for_device(pool, page, - dma_sync_size); + page_pool_dma_sync_for_device(pool, page, dma_sync_size); if (allow_direct && in_softirq() && page_pool_recycle_in_cache(page, pool)) @@ -812,9 +819,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, return NULL; if (__page_pool_page_can_be_recycled(page)) { - if (pool->dma_sync) - page_pool_dma_sync_for_device(pool, page, -1); - + page_pool_dma_sync_for_device(pool, page, -1); return page; } -- cgit From 163943ac00cb31ac1a88ce5f78a7e2ead37329ec Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 7 May 2024 13:20:26 +0200 Subject: xsk: use generic DMA sync shortcut instead of a custom one XSk infra's been using its own DMA sync shortcut to try avoiding redundant function calls. Now that there is a generic one, remove the custom implementation and rely on the generic helpers. xsk_buff_dma_sync_for_cpu() doesn't need the second argument anymore, remove it. Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig --- net/xdp/xsk_buff_pool.c | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index ce60ecd48a4d..c0e0204b9630 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi dma_map->netdev = netdev; dma_map->dev = dev; - dma_map->dma_need_sync = false; dma_map->dma_pages_cnt = nr_pages; refcount_set(&dma_map->users, 1); list_add(&dma_map->list, &umem->xsk_dma_list); @@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_ pool->dev = dma_map->dev; pool->dma_pages_cnt = dma_map->dma_pages_cnt; - pool->dma_need_sync = dma_map->dma_need_sync; memcpy(pool->dma_pages, dma_map->dma_pages, pool->dma_pages_cnt * sizeof(*pool->dma_pages)); @@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, __xp_dma_unmap(dma_map, attrs); return -ENOMEM; } - if (dma_need_sync(dev, dma)) - dma_map->dma_need_sync = true; dma_map->dma_pages[i] = dma; } @@ -557,11 +553,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data_meta = xskb->xdp.data; xskb->xdp.flags = 0; - if (pool->dma_need_sync) { - dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, - pool->frame_len, - DMA_BIDIRECTIONAL); - } + if (pool->dev) + xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len); + return &xskb->xdp; } EXPORT_SYMBOL(xp_alloc); @@ -633,7 +627,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { u32 nb_entries1 = 0, nb_entries2; - if (unlikely(pool->dma_need_sync)) { + if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) { struct xdp_buff *buff; /* Slow path */ @@ -693,18 +687,3 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) (addr & ~PAGE_MASK); } EXPORT_SYMBOL(xp_raw_get_dma); - -void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb) -{ - dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, - xskb->pool->frame_len, DMA_BIDIRECTIONAL); -} -EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow); - -void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, - size_t size) -{ - dma_sync_single_range_for_device(pool->dev, dma, 0, - size, DMA_BIDIRECTIONAL); -} -EXPORT_SYMBOL(xp_dma_sync_for_device_slow); -- cgit