summaryrefslogtreecommitdiff
path: root/mm/memremap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memremap.c')
-rw-r--r--mm/memremap.c155
1 files changed, 61 insertions, 94 deletions
diff --git a/mm/memremap.c b/mm/memremap.c
index 40d4547ce514..4c2e0d68eb27 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -5,7 +5,6 @@
#include <linux/kasan.h>
#include <linux/memory_hotplug.h>
#include <linux/memremap.h>
-#include <linux/pfn_t.h>
#include <linux/swap.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
@@ -39,30 +38,6 @@ unsigned long memremap_compat_align(void)
EXPORT_SYMBOL_GPL(memremap_compat_align);
#endif
-#ifdef CONFIG_FS_DAX
-DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
-EXPORT_SYMBOL(devmap_managed_key);
-
-static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
-{
- if (pgmap->type == MEMORY_DEVICE_FS_DAX)
- static_branch_dec(&devmap_managed_key);
-}
-
-static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
-{
- if (pgmap->type == MEMORY_DEVICE_FS_DAX)
- static_branch_inc(&devmap_managed_key);
-}
-#else
-static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
-{
-}
-static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
-{
-}
-#endif /* CONFIG_FS_DAX */
-
static void pgmap_array_delete(struct range *range)
{
xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end),
@@ -130,7 +105,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
}
mem_hotplug_done();
- untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
+ pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
pgmap_array_delete(range);
}
@@ -151,7 +126,6 @@ void memunmap_pages(struct dev_pagemap *pgmap)
percpu_ref_exit(&pgmap->ref);
WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
- devmap_managed_enable_put(pgmap);
}
EXPORT_SYMBOL_GPL(memunmap_pages);
@@ -179,14 +153,14 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
"altmap not supported for multiple ranges\n"))
return -EINVAL;
- conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL);
+ conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start));
if (conflict_pgmap) {
WARN(1, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
return -ENOMEM;
}
- conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL);
+ conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end));
if (conflict_pgmap) {
WARN(1, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
@@ -211,8 +185,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
if (nid < 0)
nid = numa_mem_id();
- error = track_pfn_remap(NULL, &params->pgprot, PHYS_PFN(range->start), 0,
- range_len(range));
+ error = pfnmap_track(PHYS_PFN(range->start), range_len(range),
+ &params->pgprot);
if (error)
goto err_pfn_remap;
@@ -254,7 +228,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), params->altmap,
- MIGRATE_MOVABLE);
+ MIGRATE_MOVABLE, false);
}
mem_hotplug_done();
@@ -277,7 +251,7 @@ err_add_memory:
if (!is_private)
kasan_remove_zero_shadow(__va(range->start), range_len(range));
err_kasan:
- untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
+ pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
err_pfn_remap:
pgmap_array_delete(range);
return error;
@@ -301,6 +275,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
if (WARN_ONCE(!nr_range, "nr_range must be specified\n"))
return ERR_PTR(-EINVAL);
+ if (WARN_ONCE(pgmap->vmemmap_shift > MAX_FOLIO_ORDER,
+ "requested folio size unsupported\n"))
+ return ERR_PTR(-EINVAL);
switch (pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
@@ -312,8 +289,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
WARN(1, "Missing migrate_to_ram method\n");
return ERR_PTR(-EINVAL);
}
- if (!pgmap->ops->page_free) {
- WARN(1, "Missing page_free method\n");
+ if (!pgmap->ops->folio_free) {
+ WARN(1, "Missing folio_free method\n");
return ERR_PTR(-EINVAL);
}
if (!pgmap->owner) {
@@ -322,8 +299,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}
break;
case MEMORY_DEVICE_COHERENT:
- if (!pgmap->ops->page_free) {
- WARN(1, "Missing page_free method\n");
+ if (!pgmap->ops->folio_free) {
+ WARN(1, "Missing folio_free method\n");
return ERR_PTR(-EINVAL);
}
if (!pgmap->owner) {
@@ -332,10 +309,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}
break;
case MEMORY_DEVICE_FS_DAX:
- if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
- WARN(1, "File system DAX not supported\n");
- return ERR_PTR(-EINVAL);
- }
params.pgprot = pgprot_decrypted(params.pgprot);
break;
case MEMORY_DEVICE_GENERIC:
@@ -354,8 +327,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
if (error)
return ERR_PTR(error);
- devmap_managed_enable_get(pgmap);
-
/*
* Clear the pgmap nr_range as it will be incremented for each
* successfully processed range. This communicates how many
@@ -426,26 +397,12 @@ EXPORT_SYMBOL_GPL(devm_memunmap_pages);
/**
* get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
* @pfn: page frame number to lookup page_map
- * @pgmap: optional known pgmap that already has a reference
- *
- * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
- * is non-NULL but does not cover @pfn the reference to it will be released.
*/
-struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
- struct dev_pagemap *pgmap)
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn)
{
+ struct dev_pagemap *pgmap;
resource_size_t phys = PFN_PHYS(pfn);
- /*
- * In the cached case we're already holding a live reference.
- */
- if (pgmap) {
- if (phys >= pgmap->range.start && phys <= pgmap->range.end)
- return pgmap;
- put_dev_pagemap(pgmap);
- }
-
- /* fall back to slow path lookup */
rcu_read_lock();
pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
if (pgmap && !percpu_ref_tryget_live_rcu(&pgmap->ref))
@@ -458,20 +415,20 @@ EXPORT_SYMBOL_GPL(get_dev_pagemap);
void free_zone_device_folio(struct folio *folio)
{
- if (WARN_ON_ONCE(!folio->page.pgmap->ops ||
- !folio->page.pgmap->ops->page_free))
+ struct dev_pagemap *pgmap = folio->pgmap;
+ unsigned long nr = folio_nr_pages(folio);
+ int i;
+
+ if (WARN_ON_ONCE(!pgmap))
return;
mem_cgroup_uncharge(folio);
- /*
- * Note: we don't expect anonymous compound pages yet. Once supported
- * and we could PTE-map them similar to THP, we'd have to clear
- * PG_anon_exclusive on all tail pages.
- */
if (folio_test_anon(folio)) {
- VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
- __ClearPageAnonExclusive(folio_page(folio, 0));
+ for (i = 0; i < nr; i++)
+ __ClearPageAnonExclusive(folio_page(folio, i));
+ } else {
+ VM_WARN_ON_ONCE(folio_test_large(folio));
}
/*
@@ -484,47 +441,57 @@ void free_zone_device_folio(struct folio *folio)
* For other types of ZONE_DEVICE pages, migration is either
* handled differently or not done at all, so there is no need
* to clear folio->mapping.
+ *
+ * FS DAX pages clear the mapping when the folio->share count hits
+ * zero which indicating the page has been removed from the file
+ * system mapping.
*/
- folio->mapping = NULL;
- folio->page.pgmap->ops->page_free(folio_page(folio, 0));
+ if (pgmap->type != MEMORY_DEVICE_FS_DAX &&
+ pgmap->type != MEMORY_DEVICE_GENERIC)
+ folio->mapping = NULL;
+
+ switch (pgmap->type) {
+ case MEMORY_DEVICE_PRIVATE:
+ case MEMORY_DEVICE_COHERENT:
+ if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->folio_free))
+ break;
+ pgmap->ops->folio_free(folio);
+ percpu_ref_put_many(&folio->pgmap->ref, nr);
+ break;
- if (folio->page.pgmap->type != MEMORY_DEVICE_PRIVATE &&
- folio->page.pgmap->type != MEMORY_DEVICE_COHERENT)
+ case MEMORY_DEVICE_GENERIC:
/*
* Reset the refcount to 1 to prepare for handing out the page
* again.
*/
folio_set_count(folio, 1);
- else
- put_dev_pagemap(folio->page.pgmap);
+ break;
+
+ case MEMORY_DEVICE_FS_DAX:
+ wake_up_var(&folio->page);
+ break;
+
+ case MEMORY_DEVICE_PCI_P2PDMA:
+ if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->folio_free))
+ break;
+ pgmap->ops->folio_free(folio);
+ break;
+ }
}
-void zone_device_page_init(struct page *page)
+void zone_device_page_init(struct page *page, unsigned int order)
{
+ VM_WARN_ON_ONCE(order > MAX_ORDER_NR_PAGES);
+
/*
* Drivers shouldn't be allocating pages after calling
* memunmap_pages().
*/
- WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
+ WARN_ON_ONCE(!percpu_ref_tryget_many(&page_pgmap(page)->ref, 1 << order));
set_page_count(page, 1);
lock_page(page);
-}
-EXPORT_SYMBOL_GPL(zone_device_page_init);
-
-#ifdef CONFIG_FS_DAX
-bool __put_devmap_managed_folio_refs(struct folio *folio, int refs)
-{
- if (folio->page.pgmap->type != MEMORY_DEVICE_FS_DAX)
- return false;
- /*
- * fsdax page refcounts are 1-based, rather than 0-based: if
- * refcount is 1, then the page is free and the refcount is
- * stable because nobody holds a reference on the page.
- */
- if (folio_ref_sub_return(folio, refs) == 1)
- wake_up_var(&folio->_refcount);
- return true;
+ if (order)
+ prep_compound_page(page, order);
}
-EXPORT_SYMBOL(__put_devmap_managed_folio_refs);
-#endif /* CONFIG_FS_DAX */
+EXPORT_SYMBOL_GPL(zone_device_page_init);