diff options
Diffstat (limited to 'mm/memremap.c')
| -rw-r--r-- | mm/memremap.c | 201 |
1 files changed, 74 insertions, 127 deletions
diff --git a/mm/memremap.c b/mm/memremap.c index 08cbf54fe037..4c2e0d68eb27 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -5,8 +5,8 @@ #include <linux/kasan.h> #include <linux/memory_hotplug.h> #include <linux/memremap.h> -#include <linux/pfn_t.h> #include <linux/swap.h> +#include <linux/mm.h> #include <linux/mmzone.h> #include <linux/swapops.h> #include <linux/types.h> @@ -38,30 +38,6 @@ unsigned long memremap_compat_align(void) EXPORT_SYMBOL_GPL(memremap_compat_align); #endif -#ifdef CONFIG_FS_DAX -DEFINE_STATIC_KEY_FALSE(devmap_managed_key); -EXPORT_SYMBOL(devmap_managed_key); - -static void devmap_managed_enable_put(struct dev_pagemap *pgmap) -{ - if (pgmap->type == MEMORY_DEVICE_FS_DAX) - static_branch_dec(&devmap_managed_key); -} - -static void devmap_managed_enable_get(struct dev_pagemap *pgmap) -{ - if (pgmap->type == MEMORY_DEVICE_FS_DAX) - static_branch_inc(&devmap_managed_key); -} -#else -static void devmap_managed_enable_get(struct dev_pagemap *pgmap) -{ -} -static void devmap_managed_enable_put(struct dev_pagemap *pgmap) -{ -} -#endif /* CONFIG_FS_DAX */ - static void pgmap_array_delete(struct range *range) { xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end), @@ -129,7 +105,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) } mem_hotplug_done(); - untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range)); + pfnmap_untrack(PHYS_PFN(range->start), range_len(range)); pgmap_array_delete(range); } @@ -150,7 +126,6 @@ void memunmap_pages(struct dev_pagemap *pgmap) percpu_ref_exit(&pgmap->ref); WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); - devmap_managed_enable_put(pgmap); } EXPORT_SYMBOL_GPL(memunmap_pages); @@ -178,14 +153,14 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, "altmap not supported for multiple ranges\n")) return -EINVAL; - conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL); + conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start)); if (conflict_pgmap) { WARN(1, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); return -ENOMEM; } - conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL); + conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end)); if (conflict_pgmap) { WARN(1, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); @@ -210,8 +185,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, ¶ms->pgprot, PHYS_PFN(range->start), 0, - range_len(range)); + error = pfnmap_track(PHYS_PFN(range->start), range_len(range), + ¶ms->pgprot); if (error) goto err_pfn_remap; @@ -253,7 +228,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; move_pfn_range_to_zone(zone, PHYS_PFN(range->start), PHYS_PFN(range_len(range)), params->altmap, - MIGRATE_MOVABLE); + MIGRATE_MOVABLE, false); } mem_hotplug_done(); @@ -276,7 +251,7 @@ err_add_memory: if (!is_private) kasan_remove_zero_shadow(__va(range->start), range_len(range)); err_kasan: - untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range)); + pfnmap_untrack(PHYS_PFN(range->start), range_len(range)); err_pfn_remap: pgmap_array_delete(range); return error; @@ -300,6 +275,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) if (WARN_ONCE(!nr_range, "nr_range must be specified\n")) return ERR_PTR(-EINVAL); + if (WARN_ONCE(pgmap->vmemmap_shift > MAX_FOLIO_ORDER, + "requested folio size unsupported\n")) + return ERR_PTR(-EINVAL); switch (pgmap->type) { case MEMORY_DEVICE_PRIVATE: @@ -311,8 +289,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) WARN(1, "Missing migrate_to_ram method\n"); return ERR_PTR(-EINVAL); } - if (!pgmap->ops->page_free) { - WARN(1, "Missing page_free method\n"); + if (!pgmap->ops->folio_free) { + WARN(1, "Missing folio_free method\n"); return ERR_PTR(-EINVAL); } if (!pgmap->owner) { @@ -321,8 +299,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_COHERENT: - if (!pgmap->ops->page_free) { - WARN(1, "Missing page_free method\n"); + if (!pgmap->ops->folio_free) { + WARN(1, "Missing folio_free method\n"); return ERR_PTR(-EINVAL); } if (!pgmap->owner) { @@ -331,10 +309,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_FS_DAX: - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) { - WARN(1, "File system DAX not supported\n"); - return ERR_PTR(-EINVAL); - } params.pgprot = pgprot_decrypted(params.pgprot); break; case MEMORY_DEVICE_GENERIC: @@ -353,8 +327,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) if (error) return ERR_PTR(error); - devmap_managed_enable_get(pgmap); - /* * Clear the pgmap nr_range as it will be incremented for each * successfully processed range. This communicates how many @@ -385,7 +357,7 @@ EXPORT_SYMBOL_GPL(memremap_pages); * @pgmap: pointer to a struct dev_pagemap * * Notes: - * 1/ At a minimum the res and type members of @pgmap must be initialized + * 1/ At a minimum the range and type members of @pgmap must be initialized * by the caller before passing it to this function * * 2/ The altmap field may optionally be initialized, in which case @@ -422,42 +394,15 @@ void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) } EXPORT_SYMBOL_GPL(devm_memunmap_pages); -unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) -{ - /* number of pfns from base where pfn_to_page() is valid */ - if (altmap) - return altmap->reserve + altmap->free; - return 0; -} - -void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns) -{ - altmap->alloc -= nr_pfns; -} - /** * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn * @pfn: page frame number to lookup page_map - * @pgmap: optional known pgmap that already has a reference - * - * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap - * is non-NULL but does not cover @pfn the reference to it will be released. */ -struct dev_pagemap *get_dev_pagemap(unsigned long pfn, - struct dev_pagemap *pgmap) +struct dev_pagemap *get_dev_pagemap(unsigned long pfn) { + struct dev_pagemap *pgmap; resource_size_t phys = PFN_PHYS(pfn); - /* - * In the cached case we're already holding a live reference. - */ - if (pgmap) { - if (phys >= pgmap->range.start && phys <= pgmap->range.end) - return pgmap; - put_dev_pagemap(pgmap); - } - - /* fall back to slow path lookup */ rcu_read_lock(); pgmap = xa_load(&pgmap_array, PHYS_PFN(phys)); if (pgmap && !percpu_ref_tryget_live_rcu(&pgmap->ref)) @@ -468,83 +413,85 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, } EXPORT_SYMBOL_GPL(get_dev_pagemap); -void free_zone_device_page(struct page *page) +void free_zone_device_folio(struct folio *folio) { - if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free)) + struct dev_pagemap *pgmap = folio->pgmap; + unsigned long nr = folio_nr_pages(folio); + int i; + + if (WARN_ON_ONCE(!pgmap)) return; - mem_cgroup_uncharge(page_folio(page)); + mem_cgroup_uncharge(folio); - /* - * Note: we don't expect anonymous compound pages yet. Once supported - * and we could PTE-map them similar to THP, we'd have to clear - * PG_anon_exclusive on all tail pages. - */ - VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page); - if (PageAnon(page)) - __ClearPageAnonExclusive(page); + if (folio_test_anon(folio)) { + for (i = 0; i < nr; i++) + __ClearPageAnonExclusive(folio_page(folio, i)); + } else { + VM_WARN_ON_ONCE(folio_test_large(folio)); + } /* - * When a device managed page is freed, the page->mapping field + * When a device managed page is freed, the folio->mapping field * may still contain a (stale) mapping value. For example, the - * lower bits of page->mapping may still identify the page as an - * anonymous page. Ultimately, this entire field is just stale - * and wrong, and it will cause errors if not cleared. One - * example is: - * - * migrate_vma_pages() - * migrate_vma_insert_page() - * page_add_new_anon_rmap() - * __page_set_anon_rmap() - * ...checks page->mapping, via PageAnon(page) call, - * and incorrectly concludes that the page is an - * anonymous page. Therefore, it incorrectly, - * silently fails to set up the new anon rmap. + * lower bits of folio->mapping may still identify the folio as an + * anonymous folio. Ultimately, this entire field is just stale + * and wrong, and it will cause errors if not cleared. * * For other types of ZONE_DEVICE pages, migration is either * handled differently or not done at all, so there is no need - * to clear page->mapping. + * to clear folio->mapping. + * + * FS DAX pages clear the mapping when the folio->share count hits + * zero which indicating the page has been removed from the file + * system mapping. */ - page->mapping = NULL; - page->pgmap->ops->page_free(page); + if (pgmap->type != MEMORY_DEVICE_FS_DAX && + pgmap->type != MEMORY_DEVICE_GENERIC) + folio->mapping = NULL; - if (page->pgmap->type != MEMORY_DEVICE_PRIVATE && - page->pgmap->type != MEMORY_DEVICE_COHERENT) + switch (pgmap->type) { + case MEMORY_DEVICE_PRIVATE: + case MEMORY_DEVICE_COHERENT: + if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->folio_free)) + break; + pgmap->ops->folio_free(folio); + percpu_ref_put_many(&folio->pgmap->ref, nr); + break; + + case MEMORY_DEVICE_GENERIC: /* - * Reset the page count to 1 to prepare for handing out the page + * Reset the refcount to 1 to prepare for handing out the page * again. */ - set_page_count(page, 1); - else - put_dev_pagemap(page->pgmap); + folio_set_count(folio, 1); + break; + + case MEMORY_DEVICE_FS_DAX: + wake_up_var(&folio->page); + break; + + case MEMORY_DEVICE_PCI_P2PDMA: + if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->folio_free)) + break; + pgmap->ops->folio_free(folio); + break; + } } -void zone_device_page_init(struct page *page) +void zone_device_page_init(struct page *page, unsigned int order) { + VM_WARN_ON_ONCE(order > MAX_ORDER_NR_PAGES); + /* * Drivers shouldn't be allocating pages after calling * memunmap_pages(). */ - WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref)); + WARN_ON_ONCE(!percpu_ref_tryget_many(&page_pgmap(page)->ref, 1 << order)); set_page_count(page, 1); lock_page(page); -} -EXPORT_SYMBOL_GPL(zone_device_page_init); - -#ifdef CONFIG_FS_DAX -bool __put_devmap_managed_page_refs(struct page *page, int refs) -{ - if (page->pgmap->type != MEMORY_DEVICE_FS_DAX) - return false; - /* - * fsdax page refcounts are 1-based, rather than 0-based: if - * refcount is 1, then the page is free and the refcount is - * stable because nobody holds a reference on the page. - */ - if (page_ref_sub_return(page, refs) == 1) - wake_up_var(&page->_refcount); - return true; + if (order) + prep_compound_page(page, order); } -EXPORT_SYMBOL(__put_devmap_managed_page_refs); -#endif /* CONFIG_FS_DAX */ +EXPORT_SYMBOL_GPL(zone_device_page_init); |
