diff options
Diffstat (limited to 'net/core/page_pool.c')
| -rw-r--r-- | net/core/page_pool.c | 1012 |
1 files changed, 731 insertions, 281 deletions
diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9b203d8660e4..265a729431bb 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -5,12 +5,16 @@ * Copyright (C) 2016 Red Hat, Inc. */ +#include <linux/error-injection.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/device.h> -#include <net/page_pool.h> +#include <net/netdev_lock.h> +#include <net/netdev_rx_queue.h> +#include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/xdp.h> #include <linux/dma-direction.h> @@ -19,15 +23,25 @@ #include <linux/mm.h> /* for put_page() */ #include <linux/poison.h> #include <linux/ethtool.h> +#include <linux/netdevice.h> #include <trace/events/page_pool.h> +#include "dev.h" +#include "mp_dmabuf_devmem.h" +#include "netmem_priv.h" +#include "page_pool_priv.h" + +DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); + #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) -#define BIAS_MAX LONG_MAX +#define BIAS_MAX (LONG_MAX >> 1) #ifdef CONFIG_PAGE_POOL_STATS +static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats); + /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) /* recycle_stat_inc is safe to use when preemption is possible. */ @@ -57,7 +71,18 @@ static const char pp_stats[][ETH_GSTRING_LEN] = { "rx_pp_recycle_released_ref", }; -bool page_pool_get_stats(struct page_pool *pool, +/** + * page_pool_get_stats() - fetch page pool stats + * @pool: pool from which page was allocated + * @stats: struct page_pool_stats to fill in + * + * Retrieve statistics about the page_pool. This API is only available + * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``. + * A pointer to a caller allocated struct page_pool_stats structure + * is passed to this API which is filled in. The caller can then report + * those stats to the user (perhaps via ethtool, debugfs, etc.). + */ +bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats) { int cpu = 0; @@ -107,9 +132,9 @@ int page_pool_ethtool_stats_get_count(void) } EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { - struct page_pool_stats *pool_stats = stats; + const struct page_pool_stats *pool_stats = stats; *data++ = pool_stats->alloc_stats.fast; *data++ = pool_stats->alloc_stats.slow; @@ -128,79 +153,189 @@ u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) EXPORT_SYMBOL(page_pool_ethtool_stats_get); #else -#define alloc_stat_inc(pool, __stat) -#define recycle_stat_inc(pool, __stat) -#define recycle_stat_add(pool, __stat, val) +#define alloc_stat_inc(...) do { } while (0) +#define recycle_stat_inc(...) do { } while (0) +#define recycle_stat_add(...) do { } while (0) #endif +static bool page_pool_producer_lock(struct page_pool *pool) + __acquires(&pool->ring.producer_lock) +{ + bool in_softirq = in_softirq(); + + if (in_softirq) + spin_lock(&pool->ring.producer_lock); + else + spin_lock_bh(&pool->ring.producer_lock); + + return in_softirq; +} + +static void page_pool_producer_unlock(struct page_pool *pool, + bool in_softirq) + __releases(&pool->ring.producer_lock) +{ + if (in_softirq) + spin_unlock(&pool->ring.producer_lock); + else + spin_unlock_bh(&pool->ring.producer_lock); +} + +static void page_pool_struct_check(void) +{ + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); + CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, + PAGE_POOL_FRAG_GROUP_ALIGN); +} + static int page_pool_init(struct page_pool *pool, - const struct page_pool_params *params) + const struct page_pool_params *params, + int cpuid) { unsigned int ring_qsize = 1024; /* Default */ + struct netdev_rx_queue *rxq; + int err; + + page_pool_struct_check(); - memcpy(&pool->p, params, sizeof(pool->p)); + memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); + memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); + + pool->cpuid = cpuid; + pool->dma_sync_for_cpu = true; /* Validate only known flags were used */ - if (pool->p.flags & ~(PP_FLAG_ALL)) + if (pool->slow.flags & ~PP_FLAG_ALL) return -EINVAL; if (pool->p.pool_size) - ring_qsize = pool->p.pool_size; - - /* Sanity limit mem that can be pinned down */ - if (ring_qsize > 32768) - return -E2BIG; + ring_qsize = min(pool->p.pool_size, 16384); /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, * which is the XDP_TX use-case. */ - if (pool->p.flags & PP_FLAG_DMA_MAP) { + if (pool->slow.flags & PP_FLAG_DMA_MAP) { if ((pool->p.dma_dir != DMA_FROM_DEVICE) && (pool->p.dma_dir != DMA_BIDIRECTIONAL)) return -EINVAL; + + pool->dma_map = true; } - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { + if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { /* In order to request DMA-sync-for-device the page * needs to be mapped */ - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!(pool->slow.flags & PP_FLAG_DMA_MAP)) return -EINVAL; if (!pool->p.max_len) return -EINVAL; + pool->dma_sync = true; + /* pool->p.offset has to be set according to the address * offset used by the DMA engine to start copying rx data */ } - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && - pool->p.flags & PP_FLAG_PAGE_FRAG) - return -EINVAL; + pool->has_init_callback = !!pool->slow.init_callback; #ifdef CONFIG_PAGE_POOL_STATS - pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); - if (!pool->recycle_stats) - return -ENOMEM; + if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) { + pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); + if (!pool->recycle_stats) + return -ENOMEM; + } else { + /* For system page pool instance we use a singular stats object + * instead of allocating a separate percpu variable for each + * (also percpu) page pool instance. + */ + pool->recycle_stats = &pp_system_recycle_stats; + pool->system = true; + } #endif - if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { +#ifdef CONFIG_PAGE_POOL_STATS + if (!pool->system) + free_percpu(pool->recycle_stats); +#endif return -ENOMEM; + } atomic_set(&pool->pages_state_release_cnt, 0); /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->p.flags & PP_FLAG_DMA_MAP) - get_device(pool->p.dev); + xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1); + + if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { + netdev_assert_locked(pool->slow.netdev); + rxq = __netif_get_rx_queue(pool->slow.netdev, + pool->slow.queue_idx); + pool->mp_priv = rxq->mp_params.mp_priv; + pool->mp_ops = rxq->mp_params.mp_ops; + } + + if (pool->mp_ops) { + if (!pool->dma_map || !pool->dma_sync) { + err = -EOPNOTSUPP; + goto free_ptr_ring; + } + + if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { + err = -EFAULT; + goto free_ptr_ring; + } + + err = pool->mp_ops->init(pool); + if (err) { + pr_warn("%s() mem-provider init failed %d\n", __func__, + err); + goto free_ptr_ring; + } + + static_branch_inc(&page_pool_mem_providers); + } else if (pool->p.order > MAX_PAGE_ORDER) { + err = -EINVAL; + goto free_ptr_ring; + } return 0; + +free_ptr_ring: + ptr_ring_cleanup(&pool->ring, NULL); + xa_destroy(&pool->dma_mapped); +#ifdef CONFIG_PAGE_POOL_STATS + if (!pool->system) + free_percpu(pool->recycle_stats); +#endif + return err; } -struct page_pool *page_pool_create(const struct page_pool_params *params) +static void page_pool_uninit(struct page_pool *pool) +{ + ptr_ring_cleanup(&pool->ring, NULL); + xa_destroy(&pool->dma_mapped); + +#ifdef CONFIG_PAGE_POOL_STATS + if (!pool->system) + free_percpu(pool->recycle_stats); +#endif +} + +/** + * page_pool_create_percpu() - create a page pool for a given cpu. + * @params: parameters, see struct page_pool_params + * @cpuid: cpu identifier + */ +struct page_pool * +page_pool_create_percpu(const struct page_pool_params *params, int cpuid) { struct page_pool *pool; int err; @@ -209,30 +344,47 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) if (!pool) return ERR_PTR(-ENOMEM); - err = page_pool_init(pool, params); - if (err < 0) { - pr_warn("%s() gave up with errno %d\n", __func__, err); - kfree(pool); - return ERR_PTR(err); - } + err = page_pool_init(pool, params, cpuid); + if (err < 0) + goto err_free; + + err = page_pool_list(pool); + if (err) + goto err_uninit; return pool; + +err_uninit: + page_pool_uninit(pool); +err_free: + pr_warn("%s() gave up with errno %d\n", __func__, err); + kfree(pool); + return ERR_PTR(err); +} +EXPORT_SYMBOL(page_pool_create_percpu); + +/** + * page_pool_create() - create a page pool + * @params: parameters, see struct page_pool_params + */ +struct page_pool *page_pool_create(const struct page_pool_params *params) +{ + return page_pool_create_percpu(params, -1); } EXPORT_SYMBOL(page_pool_create); -static void page_pool_return_page(struct page_pool *pool, struct page *page); +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem); -noinline -static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) +static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; - struct page *page; + netmem_ref netmem; int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ if (__ptr_ring_empty(r)) { alloc_stat_inc(pool, empty); - return NULL; + return 0; } /* Softirq guarantee CPU and thus NUMA node is stable. This, @@ -247,99 +399,166 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) /* Refill alloc array, but only if NUMA match */ do { - page = __ptr_ring_consume(r); - if (unlikely(!page)) + netmem = (__force netmem_ref)__ptr_ring_consume(r); + if (unlikely(!netmem)) break; - if (likely(page_to_nid(page) == pref_nid)) { - pool->alloc.cache[pool->alloc.count++] = page; + if (likely(netmem_is_pref_nid(netmem, pref_nid))) { + pool->alloc.cache[pool->alloc.count++] = netmem; } else { /* NUMA mismatch; * (1) release 1 page to page-allocator and * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ - page_pool_return_page(pool, page); + page_pool_return_netmem(pool, netmem); alloc_stat_inc(pool, waive); - page = NULL; + netmem = 0; break; } } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ if (likely(pool->alloc.count > 0)) { - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, refill); } - return page; + return netmem; } /* fast path */ -static struct page *__page_pool_get_cached(struct page_pool *pool) +static netmem_ref __page_pool_get_cached(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ if (likely(pool->alloc.count)) { /* Fast-path */ - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, fast); } else { - page = page_pool_refill_alloc_cache(pool); + netmem = page_pool_refill_alloc_cache(pool); } - return page; + return netmem; } -static void page_pool_dma_sync_for_device(struct page_pool *pool, - struct page *page, - unsigned int dma_sync_size) +static void __page_pool_dma_sync_for_device(const struct page_pool *pool, + netmem_ref netmem, + u32 dma_sync_size) { - dma_addr_t dma_addr = page_pool_get_dma_addr(page); +#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) + dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_size = min(dma_sync_size, pool->p.max_len); - dma_sync_single_range_for_device(pool->p.dev, dma_addr, - pool->p.offset, dma_sync_size, - pool->p.dma_dir); + __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, + dma_sync_size, pool->p.dma_dir); +#endif } -static bool page_pool_dma_map(struct page_pool *pool, struct page *page) +static __always_inline void +page_pool_dma_sync_for_device(const struct page_pool *pool, + netmem_ref netmem, + u32 dma_sync_size) +{ + if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) { + rcu_read_lock(); + /* re-check under rcu_read_lock() to sync with page_pool_scrub() */ + if (pool->dma_sync) + __page_pool_dma_sync_for_device(pool, netmem, + dma_sync_size); + rcu_read_unlock(); + } +} + +static int page_pool_register_dma_index(struct page_pool *pool, + netmem_ref netmem, gfp_t gfp) +{ + int err = 0; + u32 id; + + if (unlikely(!PP_DMA_INDEX_BITS)) + goto out; + + if (in_softirq()) + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + else + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + if (err) { + WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); + goto out; + } + + netmem_set_dma_index(netmem, id); +out: + return err; +} + +static int page_pool_release_dma_index(struct page_pool *pool, + netmem_ref netmem) +{ + struct page *old, *page = netmem_to_page(netmem); + unsigned long id; + + if (unlikely(!PP_DMA_INDEX_BITS)) + return 0; + + id = netmem_get_dma_index(netmem); + if (!id) + return -1; + + if (in_softirq()) + old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); + else + old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); + if (old != page) + return -1; + + netmem_set_dma_index(netmem, 0); + + return 0; +} + +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) { dma_addr_t dma; + int err; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page_attrs(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); + dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0, + (PAGE_SIZE << pool->p.order), pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); if (dma_mapping_error(pool->p.dev, dma)) return false; - page_pool_set_dma_addr(page, dma); + if (page_pool_set_dma_addr_netmem(netmem, dma)) { + WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); + goto unmap_failed; + } - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, pool->p.max_len); + err = page_pool_register_dma_index(pool, netmem, gfp); + if (err) + goto unset_failed; - return true; -} + page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); -static void page_pool_set_pp_info(struct page_pool *pool, - struct page *page) -{ - page->pp = pool; - page->pp_magic |= PP_SIGNATURE; - if (pool->p.init_callback) - pool->p.init_callback(page, pool->p.init_arg); -} + return true; -static void page_pool_clear_pp_info(struct page *page) -{ - page->pp_magic = 0; - page->pp = NULL; +unset_failed: + page_pool_set_dma_addr_netmem(netmem, 0); +unmap_failed: + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return false; } static struct page *__page_pool_alloc_page_order(struct page_pool *pool, @@ -352,94 +571,108 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if ((pool->p.flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) { put_page(page); return NULL; } alloc_stat_inc(pool, slow_high_order); - page_pool_set_pp_info(pool, page); + page_pool_set_pp_info(pool, page_to_netmem(page)); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; - trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); + trace_page_pool_state_hold(pool, page_to_netmem(page), + pool->pages_state_hold_cnt); return page; } /* slow path */ -noinline -static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, - gfp_t gfp) +static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool, + gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; - unsigned int pp_flags = pool->p.flags; unsigned int pp_order = pool->p.order; - struct page *page; + bool dma_map = pool->dma_map; + netmem_ref netmem; int i, nr_pages; + /* Unconditionally set NOWARN if allocating from NAPI. + * Drivers forget to set it, and OOM reports on packet Rx are useless. + */ + if ((gfp & GFP_ATOMIC) == GFP_ATOMIC) + gfp |= __GFP_NOWARN; + /* Don't support bulk alloc for high-order pages */ if (unlikely(pp_order)) - return __page_pool_alloc_page_order(pool, gfp); + return page_to_netmem(__page_pool_alloc_page_order(pool, gfp)); /* Unnecessary as alloc cache is empty, but guarantees zero count */ if (unlikely(pool->alloc.count > 0)) return pool->alloc.cache[--pool->alloc.count]; - /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ + /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, - pool->alloc.cache); + nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk, + (struct page **)pool->alloc.cache); if (unlikely(!nr_pages)) - return NULL; + return 0; /* Pages have been filled into alloc.cache array, but count is zero and * page element have not been (possibly) DMA mapped. */ for (i = 0; i < nr_pages; i++) { - page = pool->alloc.cache[i]; - if ((pp_flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { - put_page(page); + netmem = pool->alloc.cache[i]; + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) { + put_page(netmem_to_page(netmem)); continue; } - page_pool_set_pp_info(pool, page); - pool->alloc.cache[pool->alloc.count++] = page; + page_pool_set_pp_info(pool, netmem); + pool->alloc.cache[pool->alloc.count++] = netmem; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; - trace_page_pool_state_hold(pool, page, + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); } /* Return last page */ if (likely(pool->alloc.count > 0)) { - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, slow); } else { - page = NULL; + netmem = 0; } /* When page just alloc'ed is should/must have refcnt 1. */ - return page; + return netmem; } /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) { - struct page *page; + netmem_ref netmem; /* Fast-path: Get a page from cache */ - page = __page_pool_get_cached(pool); - if (page) - return page; + netmem = __page_pool_get_cached(pool); + if (netmem) + return netmem; /* Slow-path: cache empty, do real allocation */ - page = __page_pool_alloc_pages_slow(pool, gfp); - return page; + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + netmem = pool->mp_ops->alloc_netmems(pool, gfp); + else + netmem = __page_pool_alloc_netmems_slow(pool, gfp); + return netmem; +} +EXPORT_SYMBOL(page_pool_alloc_netmems); +ALLOW_ERROR_INJECTION(page_pool_alloc_netmems, NULL); + +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); @@ -448,7 +681,7 @@ EXPORT_SYMBOL(page_pool_alloc_pages); */ #define _distance(a, b) (s32)((a) - (b)) -static s32 page_pool_inflight(struct page_pool *pool) +s32 page_pool_inflight(const struct page_pool *pool, bool strict) { u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); @@ -456,73 +689,106 @@ static s32 page_pool_inflight(struct page_pool *pool) inflight = _distance(hold_cnt, release_cnt); - trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); - WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); + if (strict) { + trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); + WARN(inflight < 0, "Negative(%d) inflight packet-pages", + inflight); + } else { + inflight = max(0, inflight); + } return inflight; } -/* Disconnects a page (from a page_pool). API users can have a need - * to disconnect a page (from a page_pool), to allow it to be used as - * a regular page (that will eventually be returned to the normal - * page-allocator via put_page). - */ -void page_pool_release_page(struct page_pool *pool, struct page *page) +void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) +{ + netmem_set_pp(netmem, pool); + netmem_or_pp_magic(netmem, PP_SIGNATURE); + + /* Ensuring all pages have been split into one fragment initially: + * page_pool_set_pp_info() is only called once for every page when it + * is allocated from the page allocator and page_pool_fragment_page() + * is dirtying the same cache line as the page->pp_magic above, so + * the overhead is negligible. + */ + page_pool_fragment_netmem(netmem, 1); + if (pool->has_init_callback) + pool->slow.init_callback(netmem, pool->slow.init_arg); +} + +void page_pool_clear_pp_info(netmem_ref netmem) +{ + netmem_clear_pp_magic(netmem); + netmem_set_pp(netmem, NULL); +} + +static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool, + netmem_ref netmem) { dma_addr_t dma; - int count; - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!pool->dma_map) /* Always account for inflight pages, even if we didn't * map them */ - goto skip_dma_unmap; + return; + + if (page_pool_release_dma_index(pool, netmem)) + return; - dma = page_pool_get_dma_addr(page); + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, - DMA_ATTR_SKIP_CPU_SYNC); - page_pool_set_dma_addr(page, 0); -skip_dma_unmap: - page_pool_clear_pp_info(page); + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + page_pool_set_dma_addr_netmem(netmem, 0); +} + +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem) +{ + int count; + bool put; + + put = true; + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + put = pool->mp_ops->release_netmem(pool, netmem); + else + __page_pool_release_netmem_dma(pool, netmem); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. */ count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); - trace_page_pool_state_release(pool, page, count); -} -EXPORT_SYMBOL(page_pool_release_page); + trace_page_pool_state_release(pool, netmem, count); -/* Return a page to the page allocator, cleaning up our state */ -static void page_pool_return_page(struct page_pool *pool, struct page *page) -{ - page_pool_release_page(pool, page); - - put_page(page); + if (put) { + page_pool_clear_pp_info(netmem); + put_page(netmem_to_page(netmem)); + } /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a * __page_cache_release() call). */ } -static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) +static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem) { - int ret; - /* BH protection not needed if current is serving softirq */ - if (in_serving_softirq()) - ret = ptr_ring_produce(&pool->ring, page); - else - ret = ptr_ring_produce_bh(&pool->ring, page); + bool in_softirq, ret; - if (!ret) { + /* BH protection not needed if current is softirq */ + in_softirq = page_pool_producer_lock(pool); + ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem); + if (ret) recycle_stat_inc(pool, ring); - return true; - } + page_pool_producer_unlock(pool, in_softirq); - return false; + return ret; } /* Only allow direct recycling in special circumstances, into the @@ -530,7 +796,7 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) * * Caller must provide appropriate safe context. */ -static bool page_pool_recycle_in_cache(struct page *page, +static bool page_pool_recycle_in_cache(netmem_ref netmem, struct page_pool *pool) { if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { @@ -539,21 +805,30 @@ static bool page_pool_recycle_in_cache(struct page *page, } /* Caller MUST have verified/know (page_ref_count(page) == 1) */ - pool->alloc.cache[pool->alloc.count++] = page; + pool->alloc.cache[pool->alloc.count++] = netmem; recycle_stat_inc(pool, cached); return true; } +static bool __page_pool_page_can_be_recycled(netmem_ref netmem) +{ + return netmem_is_net_iov(netmem) || + (page_ref_count(netmem_to_page(netmem)) == 1 && + !page_is_pfmemalloc(netmem_to_page(netmem))); +} + /* If the page refcnt == 1, this will try to recycle the page. - * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for + * If pool->dma_sync is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). * If the page refcnt != 1, then the page will be returned to memory * subsystem. */ -static __always_inline struct page * -__page_pool_put_page(struct page_pool *pool, struct page *page, +static __always_inline netmem_ref +__page_pool_put_page(struct page_pool *pool, netmem_ref netmem, unsigned int dma_sync_size, bool allow_direct) { + lockdep_assert_no_hardirq(); + /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the * regular page allocator APIs. @@ -563,20 +838,18 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * page is NOT reusable when allocated when system is under * some pressure. (page_is_pfmemalloc) */ - if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) { + if (likely(__page_pool_page_can_be_recycled(netmem))) { /* Read barrier done in page_ref_count / READ_ONCE */ - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, - dma_sync_size); + page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); - if (allow_direct && in_serving_softirq() && - page_pool_recycle_in_cache(page, pool)) - return NULL; + if (allow_direct && page_pool_recycle_in_cache(netmem, pool)) + return 0; /* Page found as candidate for recycling */ - return page; + return netmem; } + /* Fallback/non-XDP mode: API user have elevated refcnt. * * Many drivers split up the page into fragments, and some @@ -591,184 +864,279 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * will be invoking put_page. */ recycle_stat_inc(pool, released_refcnt); - /* Do not replace this with page_pool_return_page() */ - page_pool_release_page(pool, page); - put_page(page); + page_pool_return_netmem(pool, netmem); - return NULL; + return 0; } -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, +static bool page_pool_napi_local(const struct page_pool *pool) +{ + const struct napi_struct *napi; + u32 cpuid; + + /* On PREEMPT_RT the softirq can be preempted by the consumer */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return false; + + if (unlikely(!in_softirq())) + return false; + + /* Allow direct recycle if we have reasons to believe that we are + * in the same context as the consumer would run, so there's + * no possible race. + * __page_pool_put_page() makes sure we're not in hardirq context + * and interrupts are enabled prior to accessing the cache. + */ + cpuid = smp_processor_id(); + if (READ_ONCE(pool->cpuid) == cpuid) + return true; + + napi = READ_ONCE(pool->p.napi); + + return napi && READ_ONCE(napi->list_owner) == cpuid; +} + +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, unsigned int dma_sync_size, bool allow_direct) { - page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); - if (page && !page_pool_recycle_in_ring(pool, page)) { + if (!allow_direct) + allow_direct = page_pool_napi_local(pool); + + netmem = __page_pool_put_page(pool, netmem, dma_sync_size, + allow_direct); + if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); - page_pool_return_page(pool, page); + page_pool_return_netmem(pool, netmem); } } -EXPORT_SYMBOL(page_pool_put_defragged_page); +EXPORT_SYMBOL(page_pool_put_unrefed_netmem); -/* Caller must not use data area after call, as this function overwrites it */ -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) { - int i, bulk_len = 0; - - for (i = 0; i < count; i++) { - struct page *page = virt_to_head_page(data[i]); - - /* It is not the last user for the page frag case */ - if (!page_pool_is_last_frag(pool, page)) - continue; + page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size, + allow_direct); +} +EXPORT_SYMBOL(page_pool_put_unrefed_page); - page = __page_pool_put_page(pool, page, -1, false); - /* Approved for bulk recycling in ptr_ring cache */ - if (page) - data[bulk_len++] = page; - } +static void page_pool_recycle_ring_bulk(struct page_pool *pool, + netmem_ref *bulk, + u32 bulk_len) +{ + bool in_softirq; + u32 i; - if (unlikely(!bulk_len)) - return; + /* Bulk produce into ptr_ring page_pool cache */ + in_softirq = page_pool_producer_lock(pool); - /* Bulk producer into ptr_ring page_pool cache */ - page_pool_ring_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) { + if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { /* ring full */ recycle_stat_inc(pool, ring_full); break; } } + + page_pool_producer_unlock(pool, in_softirq); recycle_stat_add(pool, ring, i); - page_pool_ring_unlock(pool); - /* Hopefully all pages was return into ptr_ring */ + /* Hopefully all pages were returned into ptr_ring */ if (likely(i == bulk_len)) return; - /* ptr_ring cache full, free remaining pages outside producer lock - * since put_page() with refcnt == 1 can be an expensive operation + /* + * ptr_ring cache is full, free remaining pages outside producer lock + * since put_page() with refcnt == 1 can be an expensive operation. */ for (; i < bulk_len; i++) - page_pool_return_page(pool, data[i]); + page_pool_return_netmem(pool, bulk[i]); } -EXPORT_SYMBOL(page_pool_put_page_bulk); -static struct page *page_pool_drain_frag(struct page_pool *pool, - struct page *page) +/** + * page_pool_put_netmem_bulk() - release references on multiple netmems + * @data: array holding netmem references + * @count: number of entries in @data + * + * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring + * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk() + * will release leftover netmems to the memory provider. + * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx + * completion loop for the XDP_REDIRECT use case. + * + * Please note the caller must not use data area after running + * page_pool_put_netmem_bulk(), as this function overwrites it. + */ +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) +{ + u32 bulk_len = 0; + + for (u32 i = 0; i < count; i++) { + netmem_ref netmem = netmem_compound_head(data[i]); + + if (page_pool_unref_and_test(netmem)) + data[bulk_len++] = netmem; + } + + count = bulk_len; + while (count) { + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; + struct page_pool *pool = NULL; + bool allow_direct; + u32 foreign = 0; + + bulk_len = 0; + + for (u32 i = 0; i < count; i++) { + struct page_pool *netmem_pp; + netmem_ref netmem = data[i]; + + netmem_pp = netmem_get_pp(netmem); + if (unlikely(!pool)) { + pool = netmem_pp; + allow_direct = page_pool_napi_local(pool); + } else if (netmem_pp != pool) { + /* + * If the netmem belongs to a different + * page_pool, save it for another round. + */ + data[foreign++] = netmem; + continue; + } + + netmem = __page_pool_put_page(pool, netmem, -1, + allow_direct); + /* Approved for bulk recycling in ptr_ring cache */ + if (netmem) + bulk[bulk_len++] = netmem; + } + + if (bulk_len) + page_pool_recycle_ring_bulk(pool, bulk, bulk_len); + + count = foreign; + } +} +EXPORT_SYMBOL(page_pool_put_netmem_bulk); + +static netmem_ref page_pool_drain_frag(struct page_pool *pool, + netmem_ref netmem) { long drain_count = BIAS_MAX - pool->frag_users; /* Some user is still using the page frag */ - if (likely(page_pool_defrag_page(page, drain_count))) - return NULL; - - if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, -1); + if (likely(page_pool_unref_netmem(netmem, drain_count))) + return 0; - return page; + if (__page_pool_page_can_be_recycled(netmem)) { + page_pool_dma_sync_for_device(pool, netmem, -1); + return netmem; } - page_pool_return_page(pool, page); - return NULL; + page_pool_return_netmem(pool, netmem); + return 0; } static void page_pool_free_frag(struct page_pool *pool) { long drain_count = BIAS_MAX - pool->frag_users; - struct page *page = pool->frag_page; + netmem_ref netmem = pool->frag_page; - pool->frag_page = NULL; + pool->frag_page = 0; - if (!page || page_pool_defrag_page(page, drain_count)) + if (!netmem || page_pool_unref_netmem(netmem, drain_count)) return; - page_pool_return_page(pool, page); + page_pool_return_netmem(pool, netmem); } -struct page *page_pool_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size, gfp_t gfp) +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, + unsigned int *offset, unsigned int size, + gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; - struct page *page = pool->frag_page; + netmem_ref netmem = pool->frag_page; - if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || - size > max_size)) - return NULL; + if (WARN_ON(size > max_size)) + return 0; size = ALIGN(size, dma_get_cache_alignment()); *offset = pool->frag_offset; - if (page && *offset + size > max_size) { - page = page_pool_drain_frag(pool, page); - if (page) { + if (netmem && *offset + size > max_size) { + netmem = page_pool_drain_frag(pool, netmem); + if (netmem) { + recycle_stat_inc(pool, cached); alloc_stat_inc(pool, fast); goto frag_reset; } } - if (!page) { - page = page_pool_alloc_pages(pool, gfp); - if (unlikely(!page)) { - pool->frag_page = NULL; - return NULL; + if (!netmem) { + netmem = page_pool_alloc_netmems(pool, gfp); + if (unlikely(!netmem)) { + pool->frag_page = 0; + return 0; } - pool->frag_page = page; + pool->frag_page = netmem; frag_reset: pool->frag_users = 1; *offset = 0; pool->frag_offset = size; - page_pool_fragment_page(page, BIAS_MAX); - return page; + page_pool_fragment_netmem(netmem, BIAS_MAX); + return netmem; } pool->frag_users++; pool->frag_offset = *offset + size; - alloc_stat_inc(pool, fast); - return page; + return netmem; +} +EXPORT_SYMBOL(page_pool_alloc_frag_netmem); + +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size, + gfp)); } EXPORT_SYMBOL(page_pool_alloc_frag); static void page_pool_empty_ring(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; /* Empty recycle ring */ - while ((page = ptr_ring_consume_bh(&pool->ring))) { + while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) { /* Verify the refcnt invariant of cached pages */ - if (!(page_ref_count(page) == 1)) + if (!(netmem_ref_count(netmem) == 1)) pr_crit("%s() page_pool refcnt %d violation\n", - __func__, page_ref_count(page)); + __func__, netmem_ref_count(netmem)); - page_pool_return_page(pool, page); + page_pool_return_netmem(pool, netmem); } } -static void page_pool_free(struct page_pool *pool) +static void __page_pool_destroy(struct page_pool *pool) { if (pool->disconnect) pool->disconnect(pool); - ptr_ring_cleanup(&pool->ring, NULL); + page_pool_unlist(pool); + page_pool_uninit(pool); - if (pool->p.flags & PP_FLAG_DMA_MAP) - put_device(pool->p.dev); + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); + static_branch_dec(&page_pool_mem_providers); + } -#ifdef CONFIG_PAGE_POOL_STATS - free_percpu(pool->recycle_stats); -#endif kfree(pool); } static void page_pool_empty_alloc_cache_once(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; if (pool->destroy_cnt) return; @@ -778,15 +1146,36 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) * call concurrently. */ while (pool->alloc.count) { - page = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, page); + netmem = pool->alloc.cache[--pool->alloc.count]; + page_pool_return_netmem(pool, netmem); } } static void page_pool_scrub(struct page_pool *pool) { + unsigned long id; + void *ptr; + page_pool_empty_alloc_cache_once(pool); - pool->destroy_cnt++; + if (!pool->destroy_cnt++ && pool->dma_map) { + if (pool->dma_sync) { + /* Disable page_pool_dma_sync_for_device() */ + pool->dma_sync = false; + + /* Make sure all concurrent returns that may see the old + * value of dma_sync (and thus perform a sync) have + * finished before doing the unmapping below. Skip the + * wait if the device doesn't actually need syncing, or + * if there are no outstanding mapped pages. + */ + if (dma_dev_need_sync(pool->p.dev) && + !xa_empty(&pool->dma_mapped)) + synchronize_net(); + } + + xa_for_each(&pool->dma_mapped, id, ptr) + __page_pool_release_netmem_dma(pool, page_to_netmem((struct page *)ptr)); + } /* No more consumers should exist, but producers could still * be in-flight. @@ -796,12 +1185,16 @@ static void page_pool_scrub(struct page_pool *pool) static int page_pool_release(struct page_pool *pool) { + bool in_softirq; int inflight; page_pool_scrub(pool); - inflight = page_pool_inflight(pool); + inflight = page_pool_inflight(pool, true); + /* Acquire producer lock to make sure producers have exited. */ + in_softirq = page_pool_producer_lock(pool); + page_pool_producer_unlock(pool, in_softirq); if (!inflight) - page_pool_free(pool); + __page_pool_destroy(pool); return inflight; } @@ -810,18 +1203,27 @@ static void page_pool_release_retry(struct work_struct *wq) { struct delayed_work *dwq = to_delayed_work(wq); struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw); + void *netdev; int inflight; inflight = page_pool_release(pool); - if (!inflight) + /* In rare cases, a driver bug may cause inflight to go negative. + * Don't reschedule release if inflight is 0 or negative. + * - If 0, the page_pool has been destroyed + * - if negative, we will never recover + * in both cases no reschedule is necessary. + */ + if (inflight <= 0) return; - /* Periodic warning */ - if (time_after_eq(jiffies, pool->defer_warn)) { + /* Periodic warning for page pools the user can't see */ + netdev = READ_ONCE(pool->slow.netdev); + if (time_after_eq(jiffies, pool->defer_warn) && + (!netdev || netdev == NET_PTR_POISON)) { int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ; - pr_warn("%s() stalled pool shutdown %d inflight %d sec\n", - __func__, inflight, sec); + pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n", + __func__, pool->user.id, inflight, sec); pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; } @@ -830,13 +1232,60 @@ static void page_pool_release_retry(struct work_struct *wq) } void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), - struct xdp_mem_info *mem) + const struct xdp_mem_info *mem) { refcount_inc(&pool->user_cnt); pool->disconnect = disconnect; pool->xdp_mem_id = mem->id; } +/** + * page_pool_enable_direct_recycling() - mark page pool as owned by NAPI + * @pool: page pool to modify + * @napi: NAPI instance to associate the page pool with + * + * Associate a page pool with a NAPI instance for lockless page recycling. + * This is useful when a new page pool has to be added to a NAPI instance + * without disabling that NAPI instance, to mark the point at which control + * path "hands over" the page pool to the NAPI instance. In most cases driver + * can simply set the @napi field in struct page_pool_params, and does not + * have to call this helper. + * + * The function is idempotent, but does not implement any refcounting. + * Single page_pool_disable_direct_recycling() will disable recycling, + * no matter how many times enable was called. + */ +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi) +{ + if (READ_ONCE(pool->p.napi) == napi) + return; + WARN_ON(!napi || pool->p.napi); + + mutex_lock(&page_pools_lock); + WRITE_ONCE(pool->p.napi, napi); + mutex_unlock(&page_pools_lock); +} +EXPORT_SYMBOL(page_pool_enable_direct_recycling); + +void page_pool_disable_direct_recycling(struct page_pool *pool) +{ + /* Disable direct recycling based on pool->cpuid. + * Paired with READ_ONCE() in page_pool_napi_local(). + */ + WRITE_ONCE(pool->cpuid, -1); + + if (!pool->p.napi) + return; + + napi_assert_will_not_race(pool->p.napi); + + mutex_lock(&page_pools_lock); + WRITE_ONCE(pool->p.napi, NULL); + mutex_unlock(&page_pools_lock); +} +EXPORT_SYMBOL(page_pool_disable_direct_recycling); + void page_pool_destroy(struct page_pool *pool) { if (!pool) @@ -845,11 +1294,13 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_put(pool)) return; + page_pool_disable_direct_recycling(pool); page_pool_free_frag(pool); if (!page_pool_release(pool)) return; + page_pool_detached(pool); pool->defer_start = jiffies; pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; @@ -861,44 +1312,43 @@ EXPORT_SYMBOL(page_pool_destroy); /* Caller must provide appropriate safe context, e.g. NAPI. */ void page_pool_update_nid(struct page_pool *pool, int new_nid) { - struct page *page; + netmem_ref netmem; trace_page_pool_update_nid(pool, new_nid); pool->p.nid = new_nid; /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { - page = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, page); + netmem = pool->alloc.cache[--pool->alloc.count]; + page_pool_return_netmem(pool, netmem); } } EXPORT_SYMBOL(page_pool_update_nid); -bool page_pool_return_skb_page(struct page *page) +bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr) { - struct page_pool *pp; + return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr); +} - page = compound_head(page); +/* Associate a niov with a page pool. Should follow with a matching + * net_mp_niov_clear_page_pool() + */ +void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); - /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation - * in order to preserve any existing bits, such as bit 0 for the - * head page of compound page and bit 1 for pfmemalloc page, so - * mask those bits for freeing side when doing below checking, - * and page_is_pfmemalloc() is checked in __page_pool_put_page() - * to avoid recycling the pfmemalloc page. - */ - if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) - return false; + page_pool_set_pp_info(pool, netmem); - pp = page->pp; + pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); +} - /* Driver set this to memory recycling info. Reset it on recycle. - * This will *not* work for NIC using a split-page memory model. - * The page will be returned to the pool here regardless of the - * 'flipped' fragment being in use or not. - */ - page_pool_put_full_page(pp, page, false); +/* Disassociate a niov from a page pool. Should only be used in the + * ->release_netmem() path. + */ +void net_mp_niov_clear_page_pool(struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); - return true; + page_pool_clear_pp_info(netmem); } -EXPORT_SYMBOL(page_pool_return_skb_page); |
