diff options
Diffstat (limited to 'net/core/page_pool.c')
| -rw-r--r-- | net/core/page_pool.c | 537 |
1 files changed, 395 insertions, 142 deletions
diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 2abe6e919224..265a729431bb 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -11,7 +11,10 @@ #include <linux/slab.h> #include <linux/device.h> +#include <net/netdev_lock.h> +#include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/xdp.h> #include <linux/dma-direction.h> @@ -24,8 +27,13 @@ #include <trace/events/page_pool.h> +#include "dev.h" +#include "mp_dmabuf_devmem.h" +#include "netmem_priv.h" #include "page_pool_priv.h" +DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); + #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) @@ -145,9 +153,9 @@ u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) EXPORT_SYMBOL(page_pool_ethtool_stats_get); #else -#define alloc_stat_inc(pool, __stat) -#define recycle_stat_inc(pool, __stat) -#define recycle_stat_add(pool, __stat, val) +#define alloc_stat_inc(...) do { } while (0) +#define recycle_stat_inc(...) do { } while (0) +#define recycle_stat_add(...) do { } while (0) #endif static bool page_pool_producer_lock(struct page_pool *pool) @@ -187,6 +195,8 @@ static int page_pool_init(struct page_pool *pool, int cpuid) { unsigned int ring_qsize = 1024; /* Default */ + struct netdev_rx_queue *rxq; + int err; page_pool_struct_check(); @@ -194,17 +204,14 @@ static int page_pool_init(struct page_pool *pool, memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); pool->cpuid = cpuid; + pool->dma_sync_for_cpu = true; /* Validate only known flags were used */ if (pool->slow.flags & ~PP_FLAG_ALL) return -EINVAL; if (pool->p.pool_size) - ring_qsize = pool->p.pool_size; - - /* Sanity limit mem that can be pinned down */ - if (ring_qsize > 32768) - return -E2BIG; + ring_qsize = min(pool->p.pool_size, 16384); /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, @@ -265,18 +272,56 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->dma_map) - get_device(pool->p.dev); + xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1); + + if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { + netdev_assert_locked(pool->slow.netdev); + rxq = __netif_get_rx_queue(pool->slow.netdev, + pool->slow.queue_idx); + pool->mp_priv = rxq->mp_params.mp_priv; + pool->mp_ops = rxq->mp_params.mp_ops; + } + + if (pool->mp_ops) { + if (!pool->dma_map || !pool->dma_sync) { + err = -EOPNOTSUPP; + goto free_ptr_ring; + } + + if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { + err = -EFAULT; + goto free_ptr_ring; + } + + err = pool->mp_ops->init(pool); + if (err) { + pr_warn("%s() mem-provider init failed %d\n", __func__, + err); + goto free_ptr_ring; + } + + static_branch_inc(&page_pool_mem_providers); + } else if (pool->p.order > MAX_PAGE_ORDER) { + err = -EINVAL; + goto free_ptr_ring; + } return 0; + +free_ptr_ring: + ptr_ring_cleanup(&pool->ring, NULL); + xa_destroy(&pool->dma_mapped); +#ifdef CONFIG_PAGE_POOL_STATS + if (!pool->system) + free_percpu(pool->recycle_stats); +#endif + return err; } static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); - - if (pool->dma_map) - put_device(pool->p.dev); + xa_destroy(&pool->dma_mapped); #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) @@ -328,7 +373,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); -static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem); +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem); static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) { @@ -358,7 +403,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) if (unlikely(!netmem)) break; - if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) { + if (likely(netmem_is_pref_nid(netmem, pref_nid))) { pool->alloc.cache[pool->alloc.count++] = netmem; } else { /* NUMA mismatch; @@ -366,7 +411,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); alloc_stat_inc(pool, waive); netmem = 0; break; @@ -417,13 +462,70 @@ page_pool_dma_sync_for_device(const struct page_pool *pool, netmem_ref netmem, u32 dma_sync_size) { - if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) - __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); + if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) { + rcu_read_lock(); + /* re-check under rcu_read_lock() to sync with page_pool_scrub() */ + if (pool->dma_sync) + __page_pool_dma_sync_for_device(pool, netmem, + dma_sync_size); + rcu_read_unlock(); + } +} + +static int page_pool_register_dma_index(struct page_pool *pool, + netmem_ref netmem, gfp_t gfp) +{ + int err = 0; + u32 id; + + if (unlikely(!PP_DMA_INDEX_BITS)) + goto out; + + if (in_softirq()) + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + else + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + if (err) { + WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); + goto out; + } + + netmem_set_dma_index(netmem, id); +out: + return err; +} + +static int page_pool_release_dma_index(struct page_pool *pool, + netmem_ref netmem) +{ + struct page *old, *page = netmem_to_page(netmem); + unsigned long id; + + if (unlikely(!PP_DMA_INDEX_BITS)) + return 0; + + id = netmem_get_dma_index(netmem); + if (!id) + return -1; + + if (in_softirq()) + old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); + else + old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); + if (old != page) + return -1; + + netmem_set_dma_index(netmem, 0); + + return 0; } -static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) { dma_addr_t dma; + int err; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit @@ -437,47 +539,28 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) if (dma_mapping_error(pool->p.dev, dma)) return false; - if (page_pool_set_dma_addr_netmem(netmem, dma)) + if (page_pool_set_dma_addr_netmem(netmem, dma)) { + WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); goto unmap_failed; + } + + err = page_pool_register_dma_index(pool, netmem, gfp); + if (err) + goto unset_failed; page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; +unset_failed: + page_pool_set_dma_addr_netmem(netmem, 0); unmap_failed: - WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); return false; } -static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) -{ - struct page *page = netmem_to_page(netmem); - - page->pp = pool; - page->pp_magic |= PP_SIGNATURE; - - /* Ensuring all pages have been split into one fragment initially: - * page_pool_set_pp_info() is only called once for every page when it - * is allocated from the page allocator and page_pool_fragment_page() - * is dirtying the same cache line as the page->pp_magic above, so - * the overhead is negligible. - */ - page_pool_fragment_netmem(netmem, 1); - if (pool->has_init_callback) - pool->slow.init_callback(netmem, pool->slow.init_arg); -} - -static void page_pool_clear_pp_info(netmem_ref netmem) -{ - struct page *page = netmem_to_page(netmem); - - page->pp_magic = 0; - page->pp = NULL; -} - static struct page *__page_pool_alloc_page_order(struct page_pool *pool, gfp_t gfp) { @@ -488,7 +571,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) { put_page(page); return NULL; } @@ -504,8 +587,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, } /* slow path */ -static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, - gfp_t gfp) +static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool, + gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; unsigned int pp_order = pool->p.order; @@ -513,6 +596,12 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, netmem_ref netmem; int i, nr_pages; + /* Unconditionally set NOWARN if allocating from NAPI. + * Drivers forget to set it, and OOM reports on packet Rx are useless. + */ + if ((gfp & GFP_ATOMIC) == GFP_ATOMIC) + gfp |= __GFP_NOWARN; + /* Don't support bulk alloc for high-order pages */ if (unlikely(pp_order)) return page_to_netmem(__page_pool_alloc_page_order(pool, gfp)); @@ -521,12 +610,11 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, if (unlikely(pool->alloc.count > 0)) return pool->alloc.cache[--pool->alloc.count]; - /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ + /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array_node(gfp, - pool->p.nid, bulk, - (struct page **)pool->alloc.cache); + nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk, + (struct page **)pool->alloc.cache); if (unlikely(!nr_pages)) return 0; @@ -535,7 +623,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { netmem = pool->alloc.cache[i]; - if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) { put_page(netmem_to_page(netmem)); continue; } @@ -563,7 +651,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) { netmem_ref netmem; @@ -573,17 +661,20 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) return netmem; /* Slow-path: cache empty, do real allocation */ - netmem = __page_pool_alloc_pages_slow(pool, gfp); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + netmem = pool->mp_ops->alloc_netmems(pool, gfp); + else + netmem = __page_pool_alloc_netmems_slow(pool, gfp); return netmem; } -EXPORT_SYMBOL(page_pool_alloc_netmem); +EXPORT_SYMBOL(page_pool_alloc_netmems); +ALLOW_ERROR_INJECTION(page_pool_alloc_netmems, NULL); struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) { - return netmem_to_page(page_pool_alloc_netmem(pool, gfp)); + return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); -ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); /* Calculate distance between two u32 values, valid if distance is below 2^(31) * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution @@ -609,8 +700,30 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict) return inflight; } -static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, - netmem_ref netmem) +void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) +{ + netmem_set_pp(netmem, pool); + netmem_or_pp_magic(netmem, PP_SIGNATURE); + + /* Ensuring all pages have been split into one fragment initially: + * page_pool_set_pp_info() is only called once for every page when it + * is allocated from the page allocator and page_pool_fragment_page() + * is dirtying the same cache line as the page->pp_magic above, so + * the overhead is negligible. + */ + page_pool_fragment_netmem(netmem, 1); + if (pool->has_init_callback) + pool->slow.init_callback(netmem, pool->slow.init_arg); +} + +void page_pool_clear_pp_info(netmem_ref netmem) +{ + netmem_clear_pp_magic(netmem); + netmem_set_pp(netmem, NULL); +} + +static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool, + netmem_ref netmem) { dma_addr_t dma; @@ -620,6 +733,9 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, */ return; + if (page_pool_release_dma_index(pool, netmem)) + return; + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ @@ -634,11 +750,16 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, * a regular page (that will eventually be returned to the normal * page-allocator via put_page). */ -void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem) { int count; + bool put; - __page_pool_release_page_dma(pool, netmem); + put = true; + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + put = pool->mp_ops->release_netmem(pool, netmem); + else + __page_pool_release_netmem_dma(pool, netmem); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -646,8 +767,10 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); trace_page_pool_state_release(pool, netmem, count); - page_pool_clear_pp_info(netmem); - put_page(netmem_to_page(netmem)); + if (put) { + page_pool_clear_pp_info(netmem); + put_page(netmem_to_page(netmem)); + } /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a * __page_cache_release() call). @@ -656,19 +779,16 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem) { - int ret; - /* BH protection not needed if current is softirq */ - if (in_softirq()) - ret = ptr_ring_produce(&pool->ring, (__force void *)netmem); - else - ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem); + bool in_softirq, ret; - if (!ret) { + /* BH protection not needed if current is softirq */ + in_softirq = page_pool_producer_lock(pool); + ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem); + if (ret) recycle_stat_inc(pool, ring); - return true; - } + page_pool_producer_unlock(pool, in_softirq); - return false; + return ret; } /* Only allow direct recycling in special circumstances, into the @@ -692,8 +812,9 @@ static bool page_pool_recycle_in_cache(netmem_ref netmem, static bool __page_pool_page_can_be_recycled(netmem_ref netmem) { - return page_ref_count(netmem_to_page(netmem)) == 1 && - !page_is_pfmemalloc(netmem_to_page(netmem)); + return netmem_is_net_iov(netmem) || + (page_ref_count(netmem_to_page(netmem)) == 1 && + !page_is_pfmemalloc(netmem_to_page(netmem))); } /* If the page refcnt == 1, this will try to recycle the page. @@ -728,6 +849,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem, /* Page found as candidate for recycling */ return netmem; } + /* Fallback/non-XDP mode: API user have elevated refcnt. * * Many drivers split up the page into fragments, and some @@ -742,7 +864,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem, * will be invoking put_page. */ recycle_stat_inc(pool, released_refcnt); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); return 0; } @@ -752,6 +874,10 @@ static bool page_pool_napi_local(const struct page_pool *pool) const struct napi_struct *napi; u32 cpuid; + /* On PREEMPT_RT the softirq can be preempted by the consumer */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return false; + if (unlikely(!in_softirq())) return false; @@ -776,12 +902,12 @@ void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, if (!allow_direct) allow_direct = page_pool_napi_local(pool); - netmem = - __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct); + netmem = __page_pool_put_page(pool, netmem, dma_sync_size, + allow_direct); if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } EXPORT_SYMBOL(page_pool_put_unrefed_netmem); @@ -794,69 +920,104 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, } EXPORT_SYMBOL(page_pool_put_unrefed_page); -/** - * page_pool_put_page_bulk() - release references on multiple pages - * @pool: pool from which pages were allocated - * @data: array holding page pointers - * @count: number of pages in @data - * - * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring - * producer lock. If the ptr_ring is full, page_pool_put_page_bulk() - * will release leftover pages to the page allocator. - * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx - * completion loop for the XDP_REDIRECT use case. - * - * Please note the caller must not use data area after running - * page_pool_put_page_bulk(), as this function overwrites it. - */ -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) +static void page_pool_recycle_ring_bulk(struct page_pool *pool, + netmem_ref *bulk, + u32 bulk_len) { - int i, bulk_len = 0; - bool allow_direct; bool in_softirq; + u32 i; - allow_direct = page_pool_napi_local(pool); - - for (i = 0; i < count; i++) { - netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i])); - - /* It is not the last user for the page frag case */ - if (!page_pool_is_last_ref(netmem)) - continue; - - netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); - /* Approved for bulk recycling in ptr_ring cache */ - if (netmem) - data[bulk_len++] = (__force void *)netmem; - } - - if (!bulk_len) - return; - - /* Bulk producer into ptr_ring page_pool cache */ + /* Bulk produce into ptr_ring page_pool cache */ in_softirq = page_pool_producer_lock(pool); + for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) { + if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { /* ring full */ recycle_stat_inc(pool, ring_full); break; } } - recycle_stat_add(pool, ring, i); + page_pool_producer_unlock(pool, in_softirq); + recycle_stat_add(pool, ring, i); - /* Hopefully all pages was return into ptr_ring */ + /* Hopefully all pages were returned into ptr_ring */ if (likely(i == bulk_len)) return; - /* ptr_ring cache full, free remaining pages outside producer lock - * since put_page() with refcnt == 1 can be an expensive operation + /* + * ptr_ring cache is full, free remaining pages outside producer lock + * since put_page() with refcnt == 1 can be an expensive operation. */ for (; i < bulk_len; i++) - page_pool_return_page(pool, (__force netmem_ref)data[i]); + page_pool_return_netmem(pool, bulk[i]); +} + +/** + * page_pool_put_netmem_bulk() - release references on multiple netmems + * @data: array holding netmem references + * @count: number of entries in @data + * + * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring + * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk() + * will release leftover netmems to the memory provider. + * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx + * completion loop for the XDP_REDIRECT use case. + * + * Please note the caller must not use data area after running + * page_pool_put_netmem_bulk(), as this function overwrites it. + */ +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) +{ + u32 bulk_len = 0; + + for (u32 i = 0; i < count; i++) { + netmem_ref netmem = netmem_compound_head(data[i]); + + if (page_pool_unref_and_test(netmem)) + data[bulk_len++] = netmem; + } + + count = bulk_len; + while (count) { + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; + struct page_pool *pool = NULL; + bool allow_direct; + u32 foreign = 0; + + bulk_len = 0; + + for (u32 i = 0; i < count; i++) { + struct page_pool *netmem_pp; + netmem_ref netmem = data[i]; + + netmem_pp = netmem_get_pp(netmem); + if (unlikely(!pool)) { + pool = netmem_pp; + allow_direct = page_pool_napi_local(pool); + } else if (netmem_pp != pool) { + /* + * If the netmem belongs to a different + * page_pool, save it for another round. + */ + data[foreign++] = netmem; + continue; + } + + netmem = __page_pool_put_page(pool, netmem, -1, + allow_direct); + /* Approved for bulk recycling in ptr_ring cache */ + if (netmem) + bulk[bulk_len++] = netmem; + } + + if (bulk_len) + page_pool_recycle_ring_bulk(pool, bulk, bulk_len); + + count = foreign; + } } -EXPORT_SYMBOL(page_pool_put_page_bulk); +EXPORT_SYMBOL(page_pool_put_netmem_bulk); static netmem_ref page_pool_drain_frag(struct page_pool *pool, netmem_ref netmem) @@ -872,7 +1033,7 @@ static netmem_ref page_pool_drain_frag(struct page_pool *pool, return netmem; } - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); return 0; } @@ -886,7 +1047,7 @@ static void page_pool_free_frag(struct page_pool *pool) if (!netmem || page_pool_unref_netmem(netmem, drain_count)) return; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, @@ -905,13 +1066,14 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, if (netmem && *offset + size > max_size) { netmem = page_pool_drain_frag(pool, netmem); if (netmem) { + recycle_stat_inc(pool, cached); alloc_stat_inc(pool, fast); goto frag_reset; } } if (!netmem) { - netmem = page_pool_alloc_netmem(pool, gfp); + netmem = page_pool_alloc_netmems(pool, gfp); if (unlikely(!netmem)) { pool->frag_page = 0; return 0; @@ -929,7 +1091,6 @@ frag_reset: pool->frag_users++; pool->frag_offset = *offset + size; - alloc_stat_inc(pool, fast); return netmem; } EXPORT_SYMBOL(page_pool_alloc_frag_netmem); @@ -949,11 +1110,11 @@ static void page_pool_empty_ring(struct page_pool *pool) /* Empty recycle ring */ while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) { /* Verify the refcnt invariant of cached pages */ - if (!(page_ref_count(netmem_to_page(netmem)) == 1)) + if (!(netmem_ref_count(netmem) == 1)) pr_crit("%s() page_pool refcnt %d violation\n", __func__, netmem_ref_count(netmem)); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } @@ -964,6 +1125,12 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); + + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); + static_branch_dec(&page_pool_mem_providers); + } + kfree(pool); } @@ -980,14 +1147,35 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } static void page_pool_scrub(struct page_pool *pool) { + unsigned long id; + void *ptr; + page_pool_empty_alloc_cache_once(pool); - pool->destroy_cnt++; + if (!pool->destroy_cnt++ && pool->dma_map) { + if (pool->dma_sync) { + /* Disable page_pool_dma_sync_for_device() */ + pool->dma_sync = false; + + /* Make sure all concurrent returns that may see the old + * value of dma_sync (and thus perform a sync) have + * finished before doing the unmapping below. Skip the + * wait if the device doesn't actually need syncing, or + * if there are no outstanding mapped pages. + */ + if (dma_dev_need_sync(pool->p.dev) && + !xa_empty(&pool->dma_mapped)) + synchronize_net(); + } + + xa_for_each(&pool->dma_mapped, id, ptr) + __page_pool_release_netmem_dma(pool, page_to_netmem((struct page *)ptr)); + } /* No more consumers should exist, but producers could still * be in-flight. @@ -997,10 +1185,14 @@ static void page_pool_scrub(struct page_pool *pool) static int page_pool_release(struct page_pool *pool) { + bool in_softirq; int inflight; page_pool_scrub(pool); inflight = page_pool_inflight(pool, true); + /* Acquire producer lock to make sure producers have exited. */ + in_softirq = page_pool_producer_lock(pool); + page_pool_producer_unlock(pool, in_softirq); if (!inflight) __page_pool_destroy(pool); @@ -1015,7 +1207,13 @@ static void page_pool_release_retry(struct work_struct *wq) int inflight; inflight = page_pool_release(pool); - if (!inflight) + /* In rare cases, a driver bug may cause inflight to go negative. + * Don't reschedule release if inflight is 0 or negative. + * - If 0, the page_pool has been destroyed + * - if negative, we will never recover + * in both cases no reschedule is necessary. + */ + if (inflight <= 0) return; /* Periodic warning for page pools the user can't see */ @@ -1041,6 +1239,35 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), pool->xdp_mem_id = mem->id; } +/** + * page_pool_enable_direct_recycling() - mark page pool as owned by NAPI + * @pool: page pool to modify + * @napi: NAPI instance to associate the page pool with + * + * Associate a page pool with a NAPI instance for lockless page recycling. + * This is useful when a new page pool has to be added to a NAPI instance + * without disabling that NAPI instance, to mark the point at which control + * path "hands over" the page pool to the NAPI instance. In most cases driver + * can simply set the @napi field in struct page_pool_params, and does not + * have to call this helper. + * + * The function is idempotent, but does not implement any refcounting. + * Single page_pool_disable_direct_recycling() will disable recycling, + * no matter how many times enable was called. + */ +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi) +{ + if (READ_ONCE(pool->p.napi) == napi) + return; + WARN_ON(!napi || pool->p.napi); + + mutex_lock(&page_pools_lock); + WRITE_ONCE(pool->p.napi, napi); + mutex_unlock(&page_pools_lock); +} +EXPORT_SYMBOL(page_pool_enable_direct_recycling); + void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. @@ -1051,13 +1278,11 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) if (!pool->p.napi) return; - /* To avoid races with recycling and additional barriers make sure - * pool and NAPI are unlinked when NAPI is disabled. - */ - WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); - WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + napi_assert_will_not_race(pool->p.napi); + mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); + mutex_unlock(&page_pools_lock); } EXPORT_SYMBOL(page_pool_disable_direct_recycling); @@ -1095,7 +1320,35 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } EXPORT_SYMBOL(page_pool_update_nid); + +bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr) +{ + return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr); +} + +/* Associate a niov with a page pool. Should follow with a matching + * net_mp_niov_clear_page_pool() + */ +void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_set_pp_info(pool, netmem); + + pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); +} + +/* Disassociate a niov from a page pool. Should only be used in the + * ->release_netmem() path. + */ +void net_mp_niov_clear_page_pool(struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_clear_pp_info(netmem); +} |
