From 68aa2fdbf57f769e552f472ddb762aba028a207e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Nov 2025 22:21:20 +0000 Subject: mm: introduce leaf entry type and use to simplify leaf entry logic The kernel maintains leaf page table entries which contain either: The kernel maintains leaf page table entries which contain either: - Nothing ('none' entries) - Present entries* - Everything else that will cause a fault which the kernel handles * Present entries are either entries the hardware can navigate without page fault or special cases like NUMA hint protnone or PMD with cleared present bit which contain hardware-valid entries modulo the present bit. In the 'everything else' group we include swap entries, but we also include a number of other things such as migration entries, device private entries and marker entries. Unfortunately this 'everything else' group expresses everything through a swp_entry_t type, and these entries are referred to swap entries even though they may well not contain a... swap entry. This is compounded by the rather mind-boggling concept of a non-swap swap entry (checked via non_swap_entry()) and the means by which we twist and turn to satisfy this. This patch lays the foundation for reducing this confusion. We refer to 'everything else' as a 'software-define leaf entry' or 'softleaf'. for short And in fact we scoop up the 'none' entries into this concept also so we are left with: - Present entries. - Softleaf entries (which may be empty). This allows for radical simplification across the board - one can simply convert any leaf page table entry to a leaf entry via softleaf_from_pte(). If the entry is present, we return an empty leaf entry, so it is assumed the caller is aware that they must differentiate between the two categories of page table entries, checking for the former via pte_present(). As a result, we can eliminate a number of places where we would otherwise need to use predicates to see if we can proceed with leaf page table entry conversion and instead just go ahead and do it unconditionally. We do so where we can, adjusting surrounding logic as necessary to integrate the new softleaf_t logic as far as seems reasonable at this stage. We typedef swp_entry_t to softleaf_t for the time being until the conversion can be complete, meaning everything remains compatible regardless of which type is used. We will eventually remove swp_entry_t when the conversion is complete. We introduce a new header file to keep things clear - leafops.h - this imports swapops.h so can direct replace swapops imports without issue, and we do so in all the files that require it. Additionally, add new leafops.h file to core mm maintainers entry. Link: https://lkml.kernel.org/r/c879383aac77d96a03e4d38f7daba893cd35fc76.1762812360.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Zi Yan Reviewed-by: Vlastimil Babka Cc: Alexander Gordeev Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Byungchul Park Cc: Chengming Zhou Cc: Chris Li Cc: Christian Borntraeger Cc: Christian Brauner Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: Dev Jain Cc: Gerald Schaefer Cc: Gregory Price Cc: Heiko Carstens Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Janosch Frank Cc: Jason Gunthorpe Cc: Joshua Hahn Cc: Kairui Song Cc: Kemeng Shi Cc: Lance Yang Cc: Leon Romanovsky Cc: Liam Howlett Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nhat Pham Cc: Nico Pache Cc: Oscar Salvador Cc: Pasha Tatashin Cc: Peter Xu Cc: Rakie Kim Cc: Rik van Riel Cc: Ryan Roberts Cc: SeongJae Park Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Wei Xu Cc: xu xin Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- mm/memory.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 732414852570..0caf8c5c8c68 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ static __always_inline bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) return false; - return pte_marker_uffd_wp(vmf->orig_pte); + return pte_is_uffd_wp_marker(vmf->orig_pte); } /* @@ -927,10 +927,10 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, { vm_flags_t vm_flags = dst_vma->vm_flags; pte_t orig_pte = ptep_get(src_pte); + softleaf_t entry = softleaf_from_pte(orig_pte); pte_t pte = orig_pte; struct folio *folio; struct page *page; - swp_entry_t entry = pte_to_swp_entry(orig_pte); if (likely(!non_swap_entry(entry))) { if (swap_duplicate(entry) < 0) @@ -1016,7 +1016,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (try_restore_exclusive_pte(src_vma, addr, src_pte, orig_pte)) return -EBUSY; return -ENOENT; - } else if (is_pte_marker_entry(entry)) { + } else if (softleaf_is_marker(entry)) { pte_marker marker = copy_pte_marker(entry, dst_vma); if (marker) @@ -1711,14 +1711,14 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, unsigned int max_nr, unsigned long addr, struct zap_details *details, int *rss, bool *any_skipped) { - swp_entry_t entry; + softleaf_t entry; int nr = 1; *any_skipped = true; - entry = pte_to_swp_entry(ptent); - if (is_device_private_entry(entry) || - is_device_exclusive_entry(entry)) { - struct page *page = pfn_swap_entry_to_page(entry); + entry = softleaf_from_pte(ptent); + if (softleaf_is_device_private(entry) || + softleaf_is_device_exclusive(entry)) { + struct page *page = softleaf_to_page(entry); struct folio *folio = page_folio(page); if (unlikely(!should_zap_folio(details, folio))) @@ -1733,7 +1733,7 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, rss[mm_counter(folio)]--; folio_remove_rmap_pte(folio, page, vma); folio_put(folio); - } else if (!non_swap_entry(entry)) { + } else if (softleaf_is_swap(entry)) { /* Genuine swap entries, hence a private anon pages */ if (!should_zap_cows(details)) return 1; @@ -1741,20 +1741,20 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, nr = swap_pte_batch(pte, max_nr, ptent); rss[MM_SWAPENTS] -= nr; free_swap_and_cache_nr(entry, nr); - } else if (is_migration_entry(entry)) { - struct folio *folio = pfn_swap_entry_folio(entry); + } else if (softleaf_is_migration(entry)) { + struct folio *folio = softleaf_to_folio(entry); if (!should_zap_folio(details, folio)) return 1; rss[mm_counter(folio)]--; - } else if (pte_marker_entry_uffd_wp(entry)) { + } else if (softleaf_is_uffd_wp_marker(entry)) { /* * For anon: always drop the marker; for file: only * drop the marker if explicitly requested. */ if (!vma_is_anonymous(vma) && !zap_drop_markers(details)) return 1; - } else if (is_guard_swp_entry(entry)) { + } else if (softleaf_is_guard_marker(entry)) { /* * Ordinary zapping should not remove guard PTE * markers. Only do so if we should remove PTE markers @@ -1762,7 +1762,8 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, */ if (!zap_drop_markers(details)) return 1; - } else if (is_hwpoison_entry(entry) || is_poisoned_swp_entry(entry)) { + } else if (softleaf_is_hwpoison(entry) || + softleaf_is_poison_marker(entry)) { if (!should_zap_cows(details)) return 1; } else { @@ -4380,7 +4381,7 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf) * * This should also cover the case where e.g. the pte changed * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_POISONED. - * So is_pte_marker() check is not enough to safely drop the pte. + * So pte_is_marker() check is not enough to safely drop the pte. */ if (pte_same(vmf->orig_pte, ptep_get(vmf->pte))) pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); @@ -4414,8 +4415,8 @@ static vm_fault_t pte_marker_handle_uffd_wp(struct vm_fault *vmf) static vm_fault_t handle_pte_marker(struct vm_fault *vmf) { - swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte); - unsigned long marker = pte_marker_get(entry); + const softleaf_t entry = softleaf_from_pte(vmf->orig_pte); + const pte_marker marker = softleaf_to_marker(entry); /* * PTE markers should never be empty. If anything weird happened, @@ -4432,7 +4433,7 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) if (marker & PTE_MARKER_GUARD) return VM_FAULT_SIGSEGV; - if (pte_marker_entry_uffd_wp(entry)) + if (softleaf_is_uffd_wp_marker(entry)) return pte_marker_handle_uffd_wp(vmf); /* This is an unknown pte marker */ @@ -4680,7 +4681,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; - } else if (is_pte_marker_entry(entry)) { + } else if (softleaf_is_marker(entry)) { ret = handle_pte_marker(vmf); } else { print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); -- cgit