summaryrefslogtreecommitdiff
path: root/mm/memremap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memremap.c')
-rw-r--r--mm/memremap.c201
1 files changed, 74 insertions, 127 deletions
diff --git a/mm/memremap.c b/mm/memremap.c
index 08cbf54fe037..4c2e0d68eb27 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -5,8 +5,8 @@
#include <linux/kasan.h>
#include <linux/memory_hotplug.h>
#include <linux/memremap.h>
-#include <linux/pfn_t.h>
#include <linux/swap.h>
+#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/swapops.h>
#include <linux/types.h>
@@ -38,30 +38,6 @@ unsigned long memremap_compat_align(void)
EXPORT_SYMBOL_GPL(memremap_compat_align);
#endif
-#ifdef CONFIG_FS_DAX
-DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
-EXPORT_SYMBOL(devmap_managed_key);
-
-static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
-{
- if (pgmap->type == MEMORY_DEVICE_FS_DAX)
- static_branch_dec(&devmap_managed_key);
-}
-
-static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
-{
- if (pgmap->type == MEMORY_DEVICE_FS_DAX)
- static_branch_inc(&devmap_managed_key);
-}
-#else
-static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
-{
-}
-static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
-{
-}
-#endif /* CONFIG_FS_DAX */
-
static void pgmap_array_delete(struct range *range)
{
xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end),
@@ -129,7 +105,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
}
mem_hotplug_done();
- untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
+ pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
pgmap_array_delete(range);
}
@@ -150,7 +126,6 @@ void memunmap_pages(struct dev_pagemap *pgmap)
percpu_ref_exit(&pgmap->ref);
WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
- devmap_managed_enable_put(pgmap);
}
EXPORT_SYMBOL_GPL(memunmap_pages);
@@ -178,14 +153,14 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
"altmap not supported for multiple ranges\n"))
return -EINVAL;
- conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL);
+ conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start));
if (conflict_pgmap) {
WARN(1, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
return -ENOMEM;
}
- conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL);
+ conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end));
if (conflict_pgmap) {
WARN(1, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
@@ -210,8 +185,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
if (nid < 0)
nid = numa_mem_id();
- error = track_pfn_remap(NULL, &params->pgprot, PHYS_PFN(range->start), 0,
- range_len(range));
+ error = pfnmap_track(PHYS_PFN(range->start), range_len(range),
+ &params->pgprot);
if (error)
goto err_pfn_remap;
@@ -253,7 +228,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), params->altmap,
- MIGRATE_MOVABLE);
+ MIGRATE_MOVABLE, false);
}
mem_hotplug_done();
@@ -276,7 +251,7 @@ err_add_memory:
if (!is_private)
kasan_remove_zero_shadow(__va(range->start), range_len(range));
err_kasan:
- untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
+ pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
err_pfn_remap:
pgmap_array_delete(range);
return error;
@@ -300,6 +275,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
if (WARN_ONCE(!nr_range, "nr_range must be specified\n"))
return ERR_PTR(-EINVAL);
+ if (WARN_ONCE(pgmap->vmemmap_shift > MAX_FOLIO_ORDER,
+ "requested folio size unsupported\n"))
+ return ERR_PTR(-EINVAL);
switch (pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
@@ -311,8 +289,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
WARN(1, "Missing migrate_to_ram method\n");
return ERR_PTR(-EINVAL);
}
- if (!pgmap->ops->page_free) {
- WARN(1, "Missing page_free method\n");
+ if (!pgmap->ops->folio_free) {
+ WARN(1, "Missing folio_free method\n");
return ERR_PTR(-EINVAL);
}
if (!pgmap->owner) {
@@ -321,8 +299,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}
break;
case MEMORY_DEVICE_COHERENT:
- if (!pgmap->ops->page_free) {
- WARN(1, "Missing page_free method\n");
+ if (!pgmap->ops->folio_free) {
+ WARN(1, "Missing folio_free method\n");
return ERR_PTR(-EINVAL);
}
if (!pgmap->owner) {
@@ -331,10 +309,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}
break;
case MEMORY_DEVICE_FS_DAX:
- if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
- WARN(1, "File system DAX not supported\n");
- return ERR_PTR(-EINVAL);
- }
params.pgprot = pgprot_decrypted(params.pgprot);
break;
case MEMORY_DEVICE_GENERIC:
@@ -353,8 +327,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
if (error)
return ERR_PTR(error);
- devmap_managed_enable_get(pgmap);
-
/*
* Clear the pgmap nr_range as it will be incremented for each
* successfully processed range. This communicates how many
@@ -385,7 +357,7 @@ EXPORT_SYMBOL_GPL(memremap_pages);
* @pgmap: pointer to a struct dev_pagemap
*
* Notes:
- * 1/ At a minimum the res and type members of @pgmap must be initialized
+ * 1/ At a minimum the range and type members of @pgmap must be initialized
* by the caller before passing it to this function
*
* 2/ The altmap field may optionally be initialized, in which case
@@ -422,42 +394,15 @@ void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
}
EXPORT_SYMBOL_GPL(devm_memunmap_pages);
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
-{
- /* number of pfns from base where pfn_to_page() is valid */
- if (altmap)
- return altmap->reserve + altmap->free;
- return 0;
-}
-
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
-{
- altmap->alloc -= nr_pfns;
-}
-
/**
* get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
* @pfn: page frame number to lookup page_map
- * @pgmap: optional known pgmap that already has a reference
- *
- * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
- * is non-NULL but does not cover @pfn the reference to it will be released.
*/
-struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
- struct dev_pagemap *pgmap)
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn)
{
+ struct dev_pagemap *pgmap;
resource_size_t phys = PFN_PHYS(pfn);
- /*
- * In the cached case we're already holding a live reference.
- */
- if (pgmap) {
- if (phys >= pgmap->range.start && phys <= pgmap->range.end)
- return pgmap;
- put_dev_pagemap(pgmap);
- }
-
- /* fall back to slow path lookup */
rcu_read_lock();
pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
if (pgmap && !percpu_ref_tryget_live_rcu(&pgmap->ref))
@@ -468,83 +413,85 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
}
EXPORT_SYMBOL_GPL(get_dev_pagemap);
-void free_zone_device_page(struct page *page)
+void free_zone_device_folio(struct folio *folio)
{
- if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free))
+ struct dev_pagemap *pgmap = folio->pgmap;
+ unsigned long nr = folio_nr_pages(folio);
+ int i;
+
+ if (WARN_ON_ONCE(!pgmap))
return;
- mem_cgroup_uncharge(page_folio(page));
+ mem_cgroup_uncharge(folio);
- /*
- * Note: we don't expect anonymous compound pages yet. Once supported
- * and we could PTE-map them similar to THP, we'd have to clear
- * PG_anon_exclusive on all tail pages.
- */
- VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page);
- if (PageAnon(page))
- __ClearPageAnonExclusive(page);
+ if (folio_test_anon(folio)) {
+ for (i = 0; i < nr; i++)
+ __ClearPageAnonExclusive(folio_page(folio, i));
+ } else {
+ VM_WARN_ON_ONCE(folio_test_large(folio));
+ }
/*
- * When a device managed page is freed, the page->mapping field
+ * When a device managed page is freed, the folio->mapping field
* may still contain a (stale) mapping value. For example, the
- * lower bits of page->mapping may still identify the page as an
- * anonymous page. Ultimately, this entire field is just stale
- * and wrong, and it will cause errors if not cleared. One
- * example is:
- *
- * migrate_vma_pages()
- * migrate_vma_insert_page()
- * page_add_new_anon_rmap()
- * __page_set_anon_rmap()
- * ...checks page->mapping, via PageAnon(page) call,
- * and incorrectly concludes that the page is an
- * anonymous page. Therefore, it incorrectly,
- * silently fails to set up the new anon rmap.
+ * lower bits of folio->mapping may still identify the folio as an
+ * anonymous folio. Ultimately, this entire field is just stale
+ * and wrong, and it will cause errors if not cleared.
*
* For other types of ZONE_DEVICE pages, migration is either
* handled differently or not done at all, so there is no need
- * to clear page->mapping.
+ * to clear folio->mapping.
+ *
+ * FS DAX pages clear the mapping when the folio->share count hits
+ * zero which indicating the page has been removed from the file
+ * system mapping.
*/
- page->mapping = NULL;
- page->pgmap->ops->page_free(page);
+ if (pgmap->type != MEMORY_DEVICE_FS_DAX &&
+ pgmap->type != MEMORY_DEVICE_GENERIC)
+ folio->mapping = NULL;
- if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
- page->pgmap->type != MEMORY_DEVICE_COHERENT)
+ switch (pgmap->type) {
+ case MEMORY_DEVICE_PRIVATE:
+ case MEMORY_DEVICE_COHERENT:
+ if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->folio_free))
+ break;
+ pgmap->ops->folio_free(folio);
+ percpu_ref_put_many(&folio->pgmap->ref, nr);
+ break;
+
+ case MEMORY_DEVICE_GENERIC:
/*
- * Reset the page count to 1 to prepare for handing out the page
+ * Reset the refcount to 1 to prepare for handing out the page
* again.
*/
- set_page_count(page, 1);
- else
- put_dev_pagemap(page->pgmap);
+ folio_set_count(folio, 1);
+ break;
+
+ case MEMORY_DEVICE_FS_DAX:
+ wake_up_var(&folio->page);
+ break;
+
+ case MEMORY_DEVICE_PCI_P2PDMA:
+ if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->folio_free))
+ break;
+ pgmap->ops->folio_free(folio);
+ break;
+ }
}
-void zone_device_page_init(struct page *page)
+void zone_device_page_init(struct page *page, unsigned int order)
{
+ VM_WARN_ON_ONCE(order > MAX_ORDER_NR_PAGES);
+
/*
* Drivers shouldn't be allocating pages after calling
* memunmap_pages().
*/
- WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
+ WARN_ON_ONCE(!percpu_ref_tryget_many(&page_pgmap(page)->ref, 1 << order));
set_page_count(page, 1);
lock_page(page);
-}
-EXPORT_SYMBOL_GPL(zone_device_page_init);
-
-#ifdef CONFIG_FS_DAX
-bool __put_devmap_managed_page_refs(struct page *page, int refs)
-{
- if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
- return false;
- /*
- * fsdax page refcounts are 1-based, rather than 0-based: if
- * refcount is 1, then the page is free and the refcount is
- * stable because nobody holds a reference on the page.
- */
- if (page_ref_sub_return(page, refs) == 1)
- wake_up_var(&page->_refcount);
- return true;
+ if (order)
+ prep_compound_page(page, order);
}
-EXPORT_SYMBOL(__put_devmap_managed_page_refs);
-#endif /* CONFIG_FS_DAX */
+EXPORT_SYMBOL_GPL(zone_device_page_init);