From 073ebebd186250038a04be9693240f90f398c4b1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jul 2024 17:07:28 +0200 Subject: powerpc/8xx: document and enforce that split PT locks are not used Right now, we cannot have split PT locks because 8xx does not support SMP. But for the sake of documentation *why* 8xx is fine regarding what we documented in huge_pte_lockptr(), let's just add code to enforce it at the same time as documenting it. This should also make everybody who wants to copy from the 8xx approach of supporting such unusual ways of mapping hugetlb folios aware that it gets tricky once multiple page tables are involved. Link: https://lkml.kernel.org/r/20240726150728.3159964-4-david@redhat.com Signed-off-by: David Hildenbrand Cc: Alexander Viro Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Christian Brauner Cc: Christophe Leroy Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juergen Gross Cc: Michael Ellerman Cc: Mike Rapoport (Microsoft) Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Russell King Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/powerpc/mm/pgtable.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ab0656115424..7316396e452d 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -297,6 +297,12 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, } #if defined(CONFIG_PPC_8xx) + +#if defined(CONFIG_SPLIT_PTE_PTLOCKS) || defined(CONFIG_SPLIT_PMD_PTLOCKS) +/* We need the same lock to protect the PMD table and the two PTE tables. */ +#error "8M hugetlb folios are incompatible with split page table locks" +#endif + static void __set_huge_pte_at(pmd_t *pmd, pte_t *ptep, pte_basic_t val) { pte_basic_t *entry = (pte_basic_t *)ptep; -- cgit From 4dd7724f02abe48ff3bbbe873568754e011cfe4c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 12 Aug 2024 14:12:21 -0400 Subject: mm/powerpc: add missing pud helpers Some new helpers will be needed for pud entry updates soon. Introduce these helpers by referencing the pmd ones. Namely: - pudp_invalidate(): this helper invalidates a huge pud before a split happens, so that the invalidated pud entry will make sure no race will happen (either with software, like a concurrent zap, or hardware, like a/d bit lost). - pud_modify(): this helper applies a new pgprot to an existing huge pud mapping. For more information on why we need these two helpers, please refer to the corresponding pmd helpers in the mprotect() code path. Link: https://lkml.kernel.org/r/20240812181225.1360970-4-peterx@redhat.com Signed-off-by: Peter Xu Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: David Rientjes Cc: "Edgecombe, Rick P" Cc: Hugh Dickins Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Paolo Bonzini Cc: Rik van Riel Cc: Sean Christopherson Cc: Thomas Gleixner Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- arch/powerpc/mm/book3s64/pgtable.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index f4d8d3c40e5c..5a4a75369043 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -176,6 +176,17 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, return __pmd(old_pmd); } +pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp) +{ + unsigned long old_pud; + + VM_WARN_ON_ONCE(!pud_present(*pudp)); + old_pud = pud_hugepage_update(vma->vm_mm, address, pudp, _PAGE_PRESENT, _PAGE_INVALID); + flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); + return __pud(old_pud); +} + pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, int full) { @@ -259,6 +270,15 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) pmdv &= _HPAGE_CHG_MASK; return pmd_set_protbits(__pmd(pmdv), newprot); } + +pud_t pud_modify(pud_t pud, pgprot_t newprot) +{ + unsigned long pudv; + + pudv = pud_val(pud); + pudv &= _HPAGE_CHG_MASK; + return pud_set_protbits(__pud(pudv), newprot); +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* For use by kexec, called with MMU off */ -- cgit From 46bcce503197d1019ee5c49ccde978e31298e35f Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:40:51 +0300 Subject: arch, mm: move definition of node_data to generic code Every architecture that supports NUMA defines node_data in the same way: struct pglist_data *node_data[MAX_NUMNODES]; No reason to keep multiple copies of this definition and its forward declarations, especially when such forward declaration is the only thing in include/asm/mmzone.h for many architectures. Add definition and declaration of node_data to generic code and drop architecture-specific versions. Link: https://lkml.kernel.org/r/20240807064110.1003856-8-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Acked-by: Davidlohr Bueso Tested-by: Zi Yan # for x86_64 and arm64 Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/powerpc/mm/numa.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index aa89899f0c1a..0744a9a2944b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -43,11 +43,9 @@ static char *cmdline __initdata; int numa_cpu_lookup_table[NR_CPUS]; cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; -struct pglist_data *node_data[MAX_NUMNODES]; EXPORT_SYMBOL(numa_cpu_lookup_table); EXPORT_SYMBOL(node_to_cpumask_map); -EXPORT_SYMBOL(node_data); static int primary_domain_index; static int n_mem_addr_cells, n_mem_size_cells; -- cgit From 3515863d9f29dd476cdad875fbd558fc85b4c511 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:40:53 +0300 Subject: arch, mm: pull out allocation of NODE_DATA to generic code Architectures that support NUMA duplicate the code that allocates NODE_DATA on the node-local memory with slight variations in reporting of the addresses where the memory was allocated. Use x86 version as the basis for the generic alloc_node_data() function and call this function in architecture specific numa initialization. Round up node data size to SMP_CACHE_BYTES rather than to PAGE_SIZE like x86 used to do since the bootmem era when allocation granularity was PAGE_SIZE anyway. Link: https://lkml.kernel.org/r/20240807064110.1003856-10-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Tested-by: Zi Yan # for x86_64 and arm64 Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/powerpc/mm/numa.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 0744a9a2944b..3c1da08304d0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1093,27 +1093,9 @@ void __init dump_numa_cpu_topology(void) static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { u64 spanned_pages = end_pfn - start_pfn; - const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); - u64 nd_pa; - void *nd; - int tnid; - - nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); - if (!nd_pa) - panic("Cannot allocate %zu bytes for node %d data\n", - nd_size, nid); - - nd = __va(nd_pa); - - /* report and initialize */ - pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", - nd_pa, nd_pa + nd_size - 1); - tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); - if (tnid != nid) - pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); - - node_data[nid] = nd; - memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + + alloc_node_data(nid); + NODE_DATA(nid)->node_id = nid; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = spanned_pages; -- cgit From bf03c806993091d8fb2fc0e98f07a8ef251c78f3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:34 +0100 Subject: mm: remove PageActive Patch series "Simplify the page flags a little". In the course of our folio conversions, we have made many page flags only used on folios, so we can now remove the page-based accessors. This should cut down compile time a little, and prevent new users from cropping up. There is more that could be done in this area, but it would produce merge conflicts, so I'll sit on those patches until next merge window. We now have line of sight to removing PG_private_2 and PG_private. This patch (of 10): This flag is now only used on folios, so we can remove all the page accessors. [akpm@linux-foundation.org: fix arch/powerpc/mm/pgtable-frag.c] Link: https://lkml.kernel.org/r/20240821193445.2294269-1-willy@infradead.org Link: https://lkml.kernel.org/r/20240821193445.2294269-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- arch/powerpc/mm/pgtable-frag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c index 8c31802f97e8..e89f64a0f24a 100644 --- a/arch/powerpc/mm/pgtable-frag.c +++ b/arch/powerpc/mm/pgtable-frag.c @@ -136,10 +136,10 @@ void pte_fragment_free(unsigned long *table, int kernel) #ifdef CONFIG_TRANSPARENT_HUGEPAGE void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) { - struct page *page; + struct folio *folio; - page = virt_to_page(pgtable); - SetPageActive(page); + folio = virt_to_folio(pgtable); + folio_set_active(folio); pte_fragment_free((unsigned long *)pgtable, 0); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -- cgit From 25d4054cc97484f2555709ac233f955f674e026a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Sep 2024 17:57:59 +0100 Subject: mm: make arch_get_unmapped_area() take vm_flags by default Patch series "mm: Care about shadow stack guard gap when getting an unmapped area", v2. As covered in the commit log for c44357c2e76b ("x86/mm: care about shadow stack guard gap during placement") our current mmap() implementation does not take care to ensure that a new mapping isn't placed with existing mappings inside it's own guard gaps. This is particularly important for shadow stacks since if two shadow stacks end up getting placed adjacent to each other then they can overflow into each other which weakens the protection offered by the feature. On x86 there is a custom arch_get_unmapped_area() which was updated by the above commit to cover this case by specifying a start_gap for allocations with VM_SHADOW_STACK. Both arm64 and RISC-V have equivalent features and use the generic implementation of arch_get_unmapped_area() so let's make the equivalent change there so they also don't get shadow stack pages placed without guard pages. The arm64 and RISC-V shadow stack implementations are currently on the list: https://lore.kernel.org/r/20240829-arm64-gcs-v12-0-42fec94743 https://lore.kernel.org/lkml/20240403234054.2020347-1-debug@rivosinc.com/ Given the addition of the use of vm_flags in the generic implementation we also simplify the set of possibilities that have to be dealt with in the core code by making arch_get_unmapped_area() take vm_flags as standard. This is a bit invasive since the prototype change touches quite a few architectures but since the parameter is ignored the change is straightforward, the simplification for the generic code seems worth it. This patch (of 3): When we introduced arch_get_unmapped_area_vmflags() in 961148704acd ("mm: introduce arch_get_unmapped_area_vmflags()") we did so as part of properly supporting guard pages for shadow stacks on x86_64, which uses a custom arch_get_unmapped_area(). Equivalent features are also present on both arm64 and RISC-V, both of which use the generic implementation of arch_get_unmapped_area() and will require equivalent modification there. Rather than continue to deal with having two versions of the functions let's bite the bullet and have all implementations of arch_get_unmapped_area() take vm_flags as a parameter. The new parameter is currently ignored by all implementations other than x86. The only caller that doesn't have a vm_flags available is mm_get_unmapped_area(), as for the x86 implementation and the wrapper used on other architectures this is modified to supply no flags. No functional changes. Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-0-a46b8b6dc0ed@kernel.org Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-1-a46b8b6dc0ed@kernel.org Signed-off-by: Mark Brown Acked-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Acked-by: Helge Deller [parisc] Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: "Edgecombe, Rick P" Cc: Gerald Schaefer Cc: Guo Ren Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Bottomley Cc: John Paul Adrian Glaubitz Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sven Schnelle Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Vlastimil Babka Cc: WANG Xuerui Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/powerpc/mm/book3s64/slice.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index ef3ce37f1bb3..ada6bf896ef8 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -637,7 +637,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags) + unsigned long flags, + vm_flags_t vm_flags) { if (radix_enabled()) return generic_get_unmapped_area(filp, addr, len, pgoff, flags); @@ -650,7 +651,8 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, - const unsigned long flags) + const unsigned long flags, + vm_flags_t vm_flags) { if (radix_enabled()) return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags); -- cgit From 540e00a729dfbefe0aa0d64b6dac1d1b620a1787 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Sep 2024 17:58:00 +0100 Subject: mm: pass vm_flags to generic_get_unmapped_area() In preparation for using vm_flags to ensure guard pages for shadow stacks supply them as an argument to generic_get_unmapped_area(). The only user outside of the core code is the PowerPC book3s64 implementation which is trivially wrapping the generic implementation in the radix_enabled() case. No functional changes. Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-2-a46b8b6dc0ed@kernel.org Signed-off-by: Mark Brown Acked-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Acked-by: Michael Ellerman Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: "Edgecombe, Rick P" Cc: Gerald Schaefer Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Bottomley Cc: John Paul Adrian Glaubitz Cc: Matt Turner Cc: Max Filippov Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sven Schnelle Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Vlastimil Babka Cc: WANG Xuerui Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/powerpc/mm/book3s64/slice.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index ada6bf896ef8..87307d0fc3b8 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -641,7 +641,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, vm_flags_t vm_flags) { if (radix_enabled()) - return generic_get_unmapped_area(filp, addr, len, pgoff, flags); + return generic_get_unmapped_area(filp, addr, len, pgoff, flags, vm_flags); return slice_get_unmapped_area(addr, len, flags, mm_ctx_user_psize(¤t->mm->context), 0); @@ -655,7 +655,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, vm_flags_t vm_flags) { if (radix_enabled()) - return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags); + return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags, vm_flags); return slice_get_unmapped_area(addr0, len, flags, mm_ctx_user_psize(¤t->mm->context), 1); -- cgit