diff options
Diffstat (limited to 'mm/migrate_device.c')
-rw-r--r-- | mm/migrate_device.c | 300 |
1 files changed, 176 insertions, 124 deletions
diff --git a/mm/migrate_device.c b/mm/migrate_device.c index b6c27c76e1a0..3158afe7eb23 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -60,6 +60,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, struct mm_walk *walk) { struct migrate_vma *migrate = walk->private; + struct folio *fault_folio = migrate->fault_page ? + page_folio(migrate->fault_page) : NULL; struct vm_area_struct *vma = walk->vma; struct mm_struct *mm = vma->vm_mm; unsigned long addr = start, unmapped = 0; @@ -71,7 +73,7 @@ again: return migrate_vma_collect_hole(start, end, -1, walk); if (pmd_trans_huge(*pmdp)) { - struct page *page; + struct folio *folio; ptl = pmd_lock(mm, pmdp); if (unlikely(!pmd_trans_huge(*pmdp))) { @@ -79,21 +81,26 @@ again: goto again; } - page = pmd_page(*pmdp); - if (is_huge_zero_page(page)) { + folio = pmd_folio(*pmdp); + if (is_huge_zero_folio(folio)) { spin_unlock(ptl); split_huge_pmd(vma, pmdp, addr); } else { int ret; - get_page(page); + folio_get(folio); spin_unlock(ptl); - if (unlikely(!trylock_page(page))) + /* FIXME: we don't expect THP for fault_folio */ + if (WARN_ON_ONCE(fault_folio == folio)) + return migrate_vma_collect_skip(start, end, + walk); + if (unlikely(!folio_trylock(folio))) return migrate_vma_collect_skip(start, end, walk); - ret = split_huge_page(page); - unlock_page(page); - put_page(page); + ret = split_folio(folio); + if (fault_folio != folio) + folio_unlock(folio); + folio_put(folio); if (ret) return migrate_vma_collect_skip(start, end, walk); @@ -106,6 +113,7 @@ again: arch_enter_lazy_mmu_mode(); for (; addr < end; addr += PAGE_SIZE, ptep++) { + struct dev_pagemap *pgmap; unsigned long mpfn = 0, pfn; struct folio *folio; struct page *page; @@ -133,9 +141,10 @@ again: goto next; page = pfn_swap_entry_to_page(entry); + pgmap = page_pgmap(page); if (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_PRIVATE) || - page->pgmap->owner != migrate->pgmap_owner) + pgmap->owner != migrate->pgmap_owner) goto next; mpfn = migrate_pfn(page_to_pfn(page)) | @@ -152,12 +161,16 @@ again: } page = vm_normal_page(migrate->vma, addr, pte); if (page && !is_zone_device_page(page) && - !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) - goto next; - else if (page && is_device_coherent_page(page) && - (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) || - page->pgmap->owner != migrate->pgmap_owner)) + !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) { goto next; + } else if (page && is_device_coherent_page(page)) { + pgmap = page_pgmap(page); + + if (!(migrate->flags & + MIGRATE_VMA_SELECT_DEVICE_COHERENT) || + pgmap->owner != migrate->pgmap_owner) + goto next; + } mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; } @@ -192,7 +205,7 @@ again: * optimisation to avoid walking the rmap later with * try_to_migrate(). */ - if (folio_trylock(folio)) { + if (fault_folio == folio || folio_trylock(folio)) { bool anon_exclusive; pte_t swp_pte; @@ -204,7 +217,8 @@ again: if (folio_try_share_anon_rmap_pte(folio, page)) { set_pte_at(mm, addr, ptep, pte); - folio_unlock(folio); + if (fault_folio != folio) + folio_unlock(folio); folio_put(folio); mpfn = 0; goto next; @@ -324,10 +338,12 @@ static void migrate_vma_collect(struct migrate_vma *migrate) */ static bool migrate_vma_check_page(struct page *page, struct page *fault_page) { + struct folio *folio = page_folio(page); + /* * One extra ref because caller holds an extra reference, either from - * isolate_lru_page() for a regular page, or migrate_vma_collect() for - * a device page. + * folio_isolate_lru() for a regular folio, or migrate_vma_collect() for + * a device folio. */ int extra = 1 + (page == fault_page); @@ -336,18 +352,18 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page) * check them than regular pages, because they can be mapped with a pmd * or with a pte (split pte mapping). */ - if (PageCompound(page)) + if (folio_test_large(folio)) return false; /* Page from ZONE_DEVICE have one extra reference */ - if (is_zone_device_page(page)) + if (folio_is_zone_device(folio)) extra++; /* For file back page */ - if (page_mapping(page)) - extra += 1 + page_has_private(page); + if (folio_mapping(folio)) + extra += 1 + folio_has_private(folio); - if ((page_count(page) - extra) > page_mapcount(page)) + if ((folio_ref_count(folio) - extra) > folio_mapcount(folio)) return false; return true; @@ -361,6 +377,8 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, unsigned long npages, struct page *fault_page) { + struct folio *fault_folio = fault_page ? + page_folio(fault_page) : NULL; unsigned long i, restore = 0; bool allow_drain = true; unsigned long unmapped = 0; @@ -377,33 +395,33 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, continue; } - /* ZONE_DEVICE pages are not on LRU */ - if (!is_zone_device_page(page)) { - if (!PageLRU(page) && allow_drain) { + folio = page_folio(page); + /* ZONE_DEVICE folios are not on LRU */ + if (!folio_is_zone_device(folio)) { + if (!folio_test_lru(folio) && allow_drain) { /* Drain CPU's lru cache */ lru_add_drain_all(); allow_drain = false; } - if (!isolate_lru_page(page)) { + if (!folio_isolate_lru(folio)) { src_pfns[i] &= ~MIGRATE_PFN_MIGRATE; restore++; continue; } /* Drop the reference we took in collect */ - put_page(page); + folio_put(folio); } - folio = page_folio(page); if (folio_mapped(folio)) try_to_migrate(folio, 0); - if (page_mapped(page) || + if (folio_mapped(folio) || !migrate_vma_check_page(page, fault_page)) { - if (!is_zone_device_page(page)) { - get_page(page); - putback_lru_page(page); + if (!folio_is_zone_device(folio)) { + folio_get(folio); + folio_putback_lru(folio); } src_pfns[i] &= ~MIGRATE_PFN_MIGRATE; @@ -422,10 +440,11 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, continue; folio = page_folio(page); - remove_migration_ptes(folio, folio, false); + remove_migration_ptes(folio, folio, 0); src_pfns[i] = 0; - folio_unlock(folio); + if (fault_folio != folio) + folio_unlock(folio); folio_put(folio); restore--; } @@ -534,6 +553,8 @@ int migrate_vma_setup(struct migrate_vma *args) return -EINVAL; if (args->fault_page && !is_device_private_page(args->fault_page)) return -EINVAL; + if (args->fault_page && !PageLocked(args->fault_page)) + return -EINVAL; memset(args->src, 0, sizeof(*args->src) * nr_pages); args->cpages = 0; @@ -656,7 +677,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, goto unlock_abort; inc_mm_counter(mm, MM_ANONPAGES); - folio_add_new_anon_rmap(folio, vma, addr); + folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE); if (!folio_is_zone_device(folio)) folio_add_lru_vma(folio, vma); folio_get(folio); @@ -664,13 +685,9 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, if (flush) { flush_cache_page(vma, addr, pte_pfn(orig_pte)); ptep_clear_flush(vma, addr, ptep); - set_pte_at_notify(mm, addr, ptep, entry); - update_mmu_cache(vma, addr, ptep); - } else { - /* No need to invalidate - it was non-present before */ - set_pte_at(mm, addr, ptep, entry); - update_mmu_cache(vma, addr, ptep); } + set_pte_at(mm, addr, ptep, entry); + update_mmu_cache(vma, addr, ptep); pte_unmap_unlock(ptep, ptl); *src = MIGRATE_PFN_MIGRATE; @@ -694,7 +711,8 @@ static void __migrate_device_pages(unsigned long *src_pfns, struct page *newpage = migrate_pfn_to_page(dst_pfns[i]); struct page *page = migrate_pfn_to_page(src_pfns[i]); struct address_space *mapping; - int r; + struct folio *newfolio, *folio; + int r, extra_cnt = 0; if (!newpage) { src_pfns[i] &= ~MIGRATE_PFN_MIGRATE; @@ -709,7 +727,7 @@ static void __migrate_device_pages(unsigned long *src_pfns, /* * The only time there is no vma is when called from - * migrate_device_coherent_page(). However this isn't + * migrate_device_coherent_folio(). However this isn't * called if the page could not be unmapped. */ VM_BUG_ON(!migrate); @@ -728,15 +746,13 @@ static void __migrate_device_pages(unsigned long *src_pfns, continue; } - mapping = page_mapping(page); + newfolio = page_folio(newpage); + folio = page_folio(page); + mapping = folio_mapping(folio); - if (is_device_private_page(newpage) || - is_device_coherent_page(newpage)) { + if (folio_is_device_private(newfolio) || + folio_is_device_coherent(newfolio)) { if (mapping) { - struct folio *folio; - - folio = page_folio(page); - /* * For now only support anonymous memory migrating to * device private or coherent memory. @@ -749,7 +765,7 @@ static void __migrate_device_pages(unsigned long *src_pfns, continue; } } - } else if (is_zone_device_page(newpage)) { + } else if (folio_is_zone_device(newfolio)) { /* * Other types of ZONE_DEVICE page are not supported. */ @@ -757,15 +773,15 @@ static void __migrate_device_pages(unsigned long *src_pfns, continue; } + BUG_ON(folio_test_writeback(folio)); + if (migrate && migrate->fault_page == page) - r = migrate_folio_extra(mapping, page_folio(newpage), - page_folio(page), - MIGRATE_SYNC_NO_COPY, 1); - else - r = migrate_folio(mapping, page_folio(newpage), - page_folio(page), MIGRATE_SYNC_NO_COPY); + extra_cnt = 1; + r = folio_migrate_mapping(mapping, newfolio, folio, extra_cnt); if (r != MIGRATEPAGE_SUCCESS) src_pfns[i] &= ~MIGRATE_PFN_MIGRATE; + else + folio_migrate_flags(newfolio, folio); } if (notified) @@ -802,61 +818,73 @@ void migrate_vma_pages(struct migrate_vma *migrate) } EXPORT_SYMBOL(migrate_vma_pages); -/* - * migrate_device_finalize() - complete page migration - * @src_pfns: src_pfns returned from migrate_device_range() - * @dst_pfns: array of pfns allocated by the driver to migrate memory to - * @npages: number of pages in the range - * - * Completes migration of the page by removing special migration entries. - * Drivers must ensure copying of page data is complete and visible to the CPU - * before calling this. - */ -void migrate_device_finalize(unsigned long *src_pfns, - unsigned long *dst_pfns, unsigned long npages) +static void __migrate_device_finalize(unsigned long *src_pfns, + unsigned long *dst_pfns, + unsigned long npages, + struct page *fault_page) { + struct folio *fault_folio = fault_page ? + page_folio(fault_page) : NULL; unsigned long i; for (i = 0; i < npages; i++) { - struct folio *dst, *src; + struct folio *dst = NULL, *src = NULL; struct page *newpage = migrate_pfn_to_page(dst_pfns[i]); struct page *page = migrate_pfn_to_page(src_pfns[i]); + if (newpage) + dst = page_folio(newpage); + if (!page) { - if (newpage) { - unlock_page(newpage); - put_page(newpage); + if (dst) { + WARN_ON_ONCE(fault_folio == dst); + folio_unlock(dst); + folio_put(dst); } continue; } - if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE) || !newpage) { - if (newpage) { - unlock_page(newpage); - put_page(newpage); - } - newpage = page; - } - src = page_folio(page); - dst = page_folio(newpage); - remove_migration_ptes(src, dst, false); - folio_unlock(src); - if (is_zone_device_page(page)) - put_page(page); - else - putback_lru_page(page); + if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE) || !dst) { + if (dst) { + WARN_ON_ONCE(fault_folio == dst); + folio_unlock(dst); + folio_put(dst); + } + dst = src; + } - if (newpage != page) { - unlock_page(newpage); - if (is_zone_device_page(newpage)) - put_page(newpage); - else - putback_lru_page(newpage); + if (!folio_is_zone_device(dst)) + folio_add_lru(dst); + remove_migration_ptes(src, dst, 0); + if (fault_folio != src) + folio_unlock(src); + folio_put(src); + + if (dst != src) { + WARN_ON_ONCE(fault_folio == dst); + folio_unlock(dst); + folio_put(dst); } } } + +/* + * migrate_device_finalize() - complete page migration + * @src_pfns: src_pfns returned from migrate_device_range() + * @dst_pfns: array of pfns allocated by the driver to migrate memory to + * @npages: number of pages in the range + * + * Completes migration of the page by removing special migration entries. + * Drivers must ensure copying of page data is complete and visible to the CPU + * before calling this. + */ +void migrate_device_finalize(unsigned long *src_pfns, + unsigned long *dst_pfns, unsigned long npages) +{ + return __migrate_device_finalize(src_pfns, dst_pfns, npages, NULL); +} EXPORT_SYMBOL(migrate_device_finalize); /** @@ -872,10 +900,27 @@ EXPORT_SYMBOL(migrate_device_finalize); */ void migrate_vma_finalize(struct migrate_vma *migrate) { - migrate_device_finalize(migrate->src, migrate->dst, migrate->npages); + __migrate_device_finalize(migrate->src, migrate->dst, migrate->npages, + migrate->fault_page); } EXPORT_SYMBOL(migrate_vma_finalize); +static unsigned long migrate_device_pfn_lock(unsigned long pfn) +{ + struct folio *folio; + + folio = folio_get_nontail_page(pfn_to_page(pfn)); + if (!folio) + return 0; + + if (!folio_trylock(folio)) { + folio_put(folio); + return 0; + } + + return migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; +} + /** * migrate_device_range() - migrate device private pfns to normal memory. * @src_pfns: array large enough to hold migrating source device private pfns. @@ -900,62 +945,69 @@ int migrate_device_range(unsigned long *src_pfns, unsigned long start, { unsigned long i, pfn; - for (pfn = start, i = 0; i < npages; pfn++, i++) { - struct page *page = pfn_to_page(pfn); + for (pfn = start, i = 0; i < npages; pfn++, i++) + src_pfns[i] = migrate_device_pfn_lock(pfn); - if (!get_page_unless_zero(page)) { - src_pfns[i] = 0; - continue; - } + migrate_device_unmap(src_pfns, npages, NULL); - if (!trylock_page(page)) { - src_pfns[i] = 0; - put_page(page); - continue; - } + return 0; +} +EXPORT_SYMBOL(migrate_device_range); - src_pfns[i] = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; - } +/** + * migrate_device_pfns() - migrate device private pfns to normal memory. + * @src_pfns: pre-popluated array of source device private pfns to migrate. + * @npages: number of pages to migrate. + * + * Similar to migrate_device_range() but supports non-contiguous pre-popluated + * array of device pages to migrate. + */ +int migrate_device_pfns(unsigned long *src_pfns, unsigned long npages) +{ + unsigned long i; + + for (i = 0; i < npages; i++) + src_pfns[i] = migrate_device_pfn_lock(src_pfns[i]); migrate_device_unmap(src_pfns, npages, NULL); return 0; } -EXPORT_SYMBOL(migrate_device_range); +EXPORT_SYMBOL(migrate_device_pfns); /* - * Migrate a device coherent page back to normal memory. The caller should have - * a reference on page which will be copied to the new page if migration is + * Migrate a device coherent folio back to normal memory. The caller should have + * a reference on folio which will be copied to the new folio if migration is * successful or dropped on failure. */ -int migrate_device_coherent_page(struct page *page) +int migrate_device_coherent_folio(struct folio *folio) { unsigned long src_pfn, dst_pfn = 0; - struct page *dpage; + struct folio *dfolio; - WARN_ON_ONCE(PageCompound(page)); + WARN_ON_ONCE(folio_test_large(folio)); - lock_page(page); - src_pfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; + folio_lock(folio); + src_pfn = migrate_pfn(folio_pfn(folio)) | MIGRATE_PFN_MIGRATE; /* * We don't have a VMA and don't need to walk the page tables to find - * the source page. So call migrate_vma_unmap() directly to unmap the - * page as migrate_vma_setup() will fail if args.vma == NULL. + * the source folio. So call migrate_vma_unmap() directly to unmap the + * folio as migrate_vma_setup() will fail if args.vma == NULL. */ migrate_device_unmap(&src_pfn, 1, NULL); if (!(src_pfn & MIGRATE_PFN_MIGRATE)) return -EBUSY; - dpage = alloc_page(GFP_USER | __GFP_NOWARN); - if (dpage) { - lock_page(dpage); - dst_pfn = migrate_pfn(page_to_pfn(dpage)); + dfolio = folio_alloc(GFP_USER | __GFP_NOWARN, 0); + if (dfolio) { + folio_lock(dfolio); + dst_pfn = migrate_pfn(folio_pfn(dfolio)); } migrate_device_pages(&src_pfn, &dst_pfn, 1); if (src_pfn & MIGRATE_PFN_MIGRATE) - copy_highpage(dpage, page); + folio_copy(dfolio, folio); migrate_device_finalize(&src_pfn, &dst_pfn, 1); if (src_pfn & MIGRATE_PFN_MIGRATE) |