From 8dd1e896735f6e5abf66525dfd39bbd7b8c0c6d6 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Fri, 20 Oct 2023 11:33:27 -0700 Subject: mm/khugepaged: convert __collapse_huge_page_isolate() to use folios Patch series "Some khugepaged folio conversions", v3. This patchset converts a number of functions to use folios. This cleans up some khugepaged code and removes a large number of hidden compound_head() calls. This patch (of 5): Replaces 11 calls to compound_head() with 1, and removes 1348 bytes of kernel text. Link: https://lkml.kernel.org/r/20231020183331.10770-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20231020183331.10770-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Kefeng Wang Signed-off-by: Andrew Morton --- mm/khugepaged.c | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) (limited to 'mm/khugepaged.c') diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 88433cc25d8a..500756604488 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -542,6 +542,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, struct list_head *compound_pagelist) { struct page *page = NULL; + struct folio *folio = NULL; pte_t *_pte; int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0; bool writable = false; @@ -576,7 +577,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, goto out; } - VM_BUG_ON_PAGE(!PageAnon(page), page); + folio = page_folio(page); + VM_BUG_ON_FOLIO(!folio_test_anon(folio), folio); if (page_mapcount(page) > 1) { ++shared; @@ -588,16 +590,15 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, } } - if (PageCompound(page)) { - struct page *p; - page = compound_head(page); + if (folio_test_large(folio)) { + struct folio *f; /* * Check if we have dealt with the compound page * already */ - list_for_each_entry(p, compound_pagelist, lru) { - if (page == p) + list_for_each_entry(f, compound_pagelist, lru) { + if (folio == f) goto next; } } @@ -608,7 +609,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * is needed to serialize against split_huge_page * when invoked from the VM. */ - if (!trylock_page(page)) { + if (!folio_trylock(folio)) { result = SCAN_PAGE_LOCK; goto out; } @@ -624,8 +625,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * but not from this process. The other process cannot write to * the page, only trigger CoW. */ - if (!is_refcount_suitable(page)) { - unlock_page(page); + if (!is_refcount_suitable(&folio->page)) { + folio_unlock(folio); result = SCAN_PAGE_COUNT; goto out; } @@ -634,27 +635,27 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * Isolate the page to avoid collapsing an hugepage * currently in use by the VM. */ - if (!isolate_lru_page(page)) { - unlock_page(page); + if (!folio_isolate_lru(folio)) { + folio_unlock(folio); result = SCAN_DEL_PAGE_LRU; goto out; } - mod_node_page_state(page_pgdat(page), - NR_ISOLATED_ANON + page_is_file_lru(page), - compound_nr(page)); - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(PageLRU(page), page); + node_stat_mod_folio(folio, + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - if (PageCompound(page)) - list_add_tail(&page->lru, compound_pagelist); + if (folio_test_large(folio)) + list_add_tail(&folio->lru, compound_pagelist); next: /* * If collapse was initiated by khugepaged, check that there is * enough young pte to justify collapsing the page */ if (cc->is_khugepaged && - (pte_young(pteval) || page_is_young(page) || - PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, + (pte_young(pteval) || folio_test_young(folio) || + folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, address))) referenced++; @@ -668,13 +669,13 @@ next: result = SCAN_LACK_REFERENCED_PAGE; } else { result = SCAN_SUCCEED; - trace_mm_collapse_huge_page_isolate(page, none_or_zero, + trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero, referenced, writable, result); return result; } out: release_pte_pages(pte, _pte, compound_pagelist); - trace_mm_collapse_huge_page_isolate(page, none_or_zero, + trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero, referenced, writable, result); return result; } -- cgit From 5c07ebb372d66423e508ecfb8e00324f8797f072 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Fri, 20 Oct 2023 11:33:28 -0700 Subject: mm/khugepaged: convert hpage_collapse_scan_pmd() to use folios Replaces 5 calls to compound_head(), and removes 1385 bytes of kernel text. Link: https://lkml.kernel.org/r/20231020183331.10770-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Rik van Riel Reviewed-by: Yang Shi Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/khugepaged.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'mm/khugepaged.c') diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 500756604488..6c4b5af43371 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1248,6 +1248,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, int result = SCAN_FAIL, referenced = 0; int none_or_zero = 0, shared = 0; struct page *page = NULL; + struct folio *folio = NULL; unsigned long _address; spinlock_t *ptl; int node = NUMA_NO_NODE, unmapped = 0; @@ -1334,29 +1335,28 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, } } - page = compound_head(page); - + folio = page_folio(page); /* * Record which node the original page is from and save this * information to cc->node_load[]. * Khugepaged will allocate hugepage from the node has the max * hit record. */ - node = page_to_nid(page); + node = folio_nid(folio); if (hpage_collapse_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; goto out_unmap; } cc->node_load[node]++; - if (!PageLRU(page)) { + if (!folio_test_lru(folio)) { result = SCAN_PAGE_LRU; goto out_unmap; } - if (PageLocked(page)) { + if (folio_test_locked(folio)) { result = SCAN_PAGE_LOCK; goto out_unmap; } - if (!PageAnon(page)) { + if (!folio_test_anon(folio)) { result = SCAN_PAGE_ANON; goto out_unmap; } @@ -1371,7 +1371,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, * has excessive GUP pins (i.e. 512). Anyway the same check * will be done again later the risk seems low. */ - if (!is_refcount_suitable(page)) { + if (!is_refcount_suitable(&folio->page)) { result = SCAN_PAGE_COUNT; goto out_unmap; } @@ -1381,8 +1381,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, * enough young pte to justify collapsing the page */ if (cc->is_khugepaged && - (pte_young(pteval) || page_is_young(page) || - PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, + (pte_young(pteval) || folio_test_young(folio) || + folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, address))) referenced++; } @@ -1404,7 +1404,7 @@ out_unmap: *mmap_locked = false; } out: - trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, + trace_mm_khugepaged_scan_pmd(mm, &folio->page, writable, referenced, none_or_zero, result, unmapped); return result; } -- cgit From dbf85c21e4aff90912b5d7755d2b25611f9191e9 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Fri, 20 Oct 2023 11:33:29 -0700 Subject: mm/khugepaged: convert is_refcount_suitable() to use folios Both callers of is_refcount_suitable() have been converted to use folios, so convert it to take in a folio. Both callers only operate on head pages of folios so mapcount/refcount conversions here are trivial. Removes 3 calls to compound head, and removes 315 bytes of kernel text. Link: https://lkml.kernel.org/r/20231020183331.10770-4-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/khugepaged.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/khugepaged.c') diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 6c4b5af43371..9efd8ff68f06 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -524,15 +524,15 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte, } } -static bool is_refcount_suitable(struct page *page) +static bool is_refcount_suitable(struct folio *folio) { int expected_refcount; - expected_refcount = total_mapcount(page); - if (PageSwapCache(page)) - expected_refcount += compound_nr(page); + expected_refcount = folio_mapcount(folio); + if (folio_test_swapcache(folio)) + expected_refcount += folio_nr_pages(folio); - return page_count(page) == expected_refcount; + return folio_ref_count(folio) == expected_refcount; } static int __collapse_huge_page_isolate(struct vm_area_struct *vma, @@ -625,7 +625,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * but not from this process. The other process cannot write to * the page, only trigger CoW. */ - if (!is_refcount_suitable(&folio->page)) { + if (!is_refcount_suitable(folio)) { folio_unlock(folio); result = SCAN_PAGE_COUNT; goto out; @@ -1371,7 +1371,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, * has excessive GUP pins (i.e. 512). Anyway the same check * will be done again later the risk seems low. */ - if (!is_refcount_suitable(&folio->page)) { + if (!is_refcount_suitable(folio)) { result = SCAN_PAGE_COUNT; goto out_unmap; } -- cgit From b455f39d228935f88eebcd1f7c1a6981093c6a3b Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Fri, 20 Oct 2023 11:33:30 -0700 Subject: mm/khugepaged: convert alloc_charge_hpage() to use folios Also remove count_memcg_page_event now that its last caller no longer uses it and reword hpage_collapse_alloc_page() to hpage_collapse_alloc_folio(). This removes 1 call to compound_head() and helps convert khugepaged to use folios throughout. Link: https://lkml.kernel.org/r/20231020183331.10770-5-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Rik van Riel Reviewed-by: Yang Shi Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/khugepaged.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'mm/khugepaged.c') diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 9efd8ff68f06..6a7184cd291b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -888,16 +888,16 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, +static bool hpage_collapse_alloc_folio(struct folio **folio, gfp_t gfp, int node, nodemask_t *nmask) { - *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); - if (unlikely(!*hpage)) { + *folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, nmask); + + if (unlikely(!*folio)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return false; } - folio_prep_large_rmappable((struct folio *)*hpage); count_vm_event(THP_COLLAPSE_ALLOC); return true; } @@ -1064,17 +1064,20 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, int node = hpage_collapse_find_target_node(cc); struct folio *folio; - if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask)) + if (!hpage_collapse_alloc_folio(&folio, gfp, node, &cc->alloc_nmask)) { + *hpage = NULL; return SCAN_ALLOC_HUGE_PAGE_FAIL; + } - folio = page_folio(*hpage); if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); *hpage = NULL; return SCAN_CGROUP_CHARGE_FAIL; } - count_memcg_page_event(*hpage, THP_COLLAPSE_ALLOC); + count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1); + + *hpage = folio_page(folio, 0); return SCAN_SUCCEED; } -- cgit From 98b32d296d95d7aa0516c36b72406277412268cd Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Fri, 20 Oct 2023 11:33:31 -0700 Subject: mm/khugepaged: convert collapse_pte_mapped_thp() to use folios This removes 2 calls to compound_head() and helps convert khugepaged to use folios throughout. Previously, if the address passed to collapse_pte_mapped_thp() corresponded to a tail page, the scan would fail immediately. Using filemap_lock_folio() we get the corresponding folio back and try to operate on the folio instead. Link: https://lkml.kernel.org/r/20231020183331.10770-6-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Rik van Riel Reviewed-by: Yang Shi Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/khugepaged.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'mm/khugepaged.c') diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 6a7184cd291b..bc2d8ff269c7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1477,7 +1477,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool notified = false; unsigned long haddr = addr & HPAGE_PMD_MASK; struct vm_area_struct *vma = vma_lookup(mm, haddr); - struct page *hpage; + struct folio *folio; pte_t *start_pte, *pte; pmd_t *pmd, pgt_pmd; spinlock_t *pml = NULL, *ptl; @@ -1510,19 +1510,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; - hpage = find_lock_page(vma->vm_file->f_mapping, + folio = filemap_lock_folio(vma->vm_file->f_mapping, linear_page_index(vma, haddr)); - if (!hpage) + if (IS_ERR(folio)) return SCAN_PAGE_NULL; - if (!PageHead(hpage)) { - result = SCAN_FAIL; - goto drop_hpage; - } - - if (compound_order(hpage) != HPAGE_PMD_ORDER) { + if (folio_order(folio) != HPAGE_PMD_ORDER) { result = SCAN_PAGE_COMPOUND; - goto drop_hpage; + goto drop_folio; } result = find_pmd_or_thp_or_none(mm, haddr, &pmd); @@ -1536,13 +1531,13 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, */ goto maybe_install_pmd; default: - goto drop_hpage; + goto drop_folio; } result = SCAN_FAIL; start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); if (!start_pte) /* mmap_lock + page lock should prevent this */ - goto drop_hpage; + goto drop_folio; /* step 1: check all mapped PTEs are to the right huge page */ for (i = 0, addr = haddr, pte = start_pte; @@ -1567,7 +1562,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, * Note that uprobe, debugger, or MAP_PRIVATE may change the * page table, but the new page will not be a subpage of hpage. */ - if (hpage + i != page) + if (folio_page(folio, i) != page) goto abort; } @@ -1582,7 +1577,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, * page_table_lock) ptl nests inside pml. The less time we hold pml, * the better; but userfaultfd's mfill_atomic_pte() on a private VMA * inserts a valid as-if-COWed PTE without even looking up page cache. - * So page lock of hpage does not protect from it, so we must not drop + * So page lock of folio does not protect from it, so we must not drop * ptl before pgt_pmd is removed, so uffd private needs pml taken now. */ if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED)) @@ -1606,7 +1601,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, continue; /* * We dropped ptl after the first scan, to do the mmu_notifier: - * page lock stops more PTEs of the hpage being faulted in, but + * page lock stops more PTEs of the folio being faulted in, but * does not stop write faults COWing anon copies from existing * PTEs; and does not stop those being swapped out or migrated. */ @@ -1615,7 +1610,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto abort; } page = vm_normal_page(vma, addr, ptent); - if (hpage + i != page) + if (folio_page(folio, i) != page) goto abort; /* @@ -1634,8 +1629,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 3: set proper refcount and mm_counters. */ if (nr_ptes) { - page_ref_sub(hpage, nr_ptes); - add_mm_counter(mm, mm_counter_file(hpage), -nr_ptes); + folio_ref_sub(folio, nr_ptes); + add_mm_counter(mm, mm_counter_file(&folio->page), -nr_ptes); } /* step 4: remove empty page table */ @@ -1659,14 +1654,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, maybe_install_pmd: /* step 5: install pmd entry */ result = install_pmd - ? set_huge_pmd(vma, haddr, pmd, hpage) + ? set_huge_pmd(vma, haddr, pmd, &folio->page) : SCAN_SUCCEED; - goto drop_hpage; + goto drop_folio; abort: if (nr_ptes) { flush_tlb_mm(mm); - page_ref_sub(hpage, nr_ptes); - add_mm_counter(mm, mm_counter_file(hpage), -nr_ptes); + folio_ref_sub(folio, nr_ptes); + add_mm_counter(mm, mm_counter_file(&folio->page), -nr_ptes); } if (start_pte) pte_unmap_unlock(start_pte, ptl); @@ -1674,9 +1669,9 @@ abort: spin_unlock(pml); if (notified) mmu_notifier_invalidate_range_end(&range); -drop_hpage: - unlock_page(hpage); - put_page(hpage); +drop_folio: + folio_unlock(folio); + folio_put(folio); return result; } -- cgit