From 7966a2b76f117b650b42a94522e748889ce0794c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:03:56 -0400 Subject: mm/arm: use macros to define pmd/pud helpers It's already confusing that ARM 2-level v.s. 3-level defines SECT bit differently on pmd/puds. Always use a macro which is much clearer. Link: https://lkml.kernel.org/r/20240318200404.448346-7-peterx@redhat.com Signed-off-by: Peter Xu Cc: Russell King Cc: Shawn Guo Cc: Krzysztof Kozlowski Cc: Bjorn Andersson Cc: Arnd Bergmann Cc: Konrad Dybcio Cc: Fabio Estevam Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Lucas Stach Cc: Mark Salter Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/include/asm/pgtable-2level.h | 4 ++-- arch/arm/include/asm/pgtable-3level-hwdef.h | 1 + arch/arm/include/asm/pgtable-3level.h | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index b0a262566eb9..4245c2e74720 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -213,8 +213,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_pfn(pmd) (__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) -#define pmd_leaf(pmd) (pmd_val(pmd) & 2) -#define pmd_bad(pmd) (pmd_val(pmd) & 2) +#define pmd_leaf(pmd) (pmd_val(pmd) & PMD_TYPE_SECT) +#define pmd_bad(pmd) pmd_leaf(pmd) #define pmd_present(pmd) (pmd_val(pmd)) #define copy_pmd(pmdpd,pmdps) \ diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index 2f35b4eddaa8..e7b666cf0060 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -14,6 +14,7 @@ * + Level 1/2 descriptor * - common */ +#define PUD_TABLE_BIT (_AT(pmdval_t, 1) << 1) #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 4b1d9eb3908a..e7aecbef75c9 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -112,7 +112,7 @@ #ifndef __ASSEMBLY__ #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!(pud_val(pud) & 2)) +#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) (pud_val(pud)) #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_TABLE) @@ -137,7 +137,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } -#define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) +#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT)) #define copy_pmd(pmdpd,pmdps) \ do { \ -- cgit From 6818135dea59f085335737629c410c2b8c6b2eb7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:03:57 -0400 Subject: mm/arm: redefine pmd_huge() with pmd_leaf() Most of the archs already define these two APIs the same way. ARM is more complicated in two aspects: - For pXd_huge() it's always checking against !PXD_TABLE_BIT, while for pXd_leaf() it's always checking against PXD_TYPE_SECT. - SECT/TABLE bits are defined differently on 2-level v.s. 3-level ARM pgtables, which makes the whole thing even harder to follow. Luckily, the second complexity should be hidden by the pmd_leaf() implementation against 2-level v.s. 3-level headers. Invoke pmd_leaf() directly for pmd_huge(), to remove the first part of complexity. This prepares to drop pXd_huge() API globally. When at it, drop the obsolete comments - it's outdated. Link: https://lkml.kernel.org/r/20240318200404.448346-8-peterx@redhat.com Signed-off-by: Peter Xu Cc: Russell King Cc: Shawn Guo Cc: Krzysztof Kozlowski Cc: Bjorn Andersson Cc: Arnd Bergmann Cc: Konrad Dybcio Cc: Fabio Estevam Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Lucas Stach Cc: Mark Salter Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/hugetlbpage.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index dd7a0277c5c0..c2fa643f6bb5 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -18,11 +18,6 @@ #include #include -/* - * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot - * of type casting from pmd_t * to pte_t *. - */ - int pud_huge(pud_t pud) { return 0; @@ -30,5 +25,5 @@ int pud_huge(pud_t pud) int pmd_huge(pmd_t pmd) { - return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); + return pmd_leaf(pmd); } -- cgit From 1965e933ddeba5e27e68ed80cfc7241931f70d4c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:04:01 -0400 Subject: mm/treewide: replace pXd_huge() with pXd_leaf() Now after we're sure all pXd_huge() definitions are the same as pXd_leaf(), reuse it. Luckily, pXd_huge() isn't widely used. Link: https://lkml.kernel.org/r/20240318200404.448346-12-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Bjorn Andersson Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Fabio Estevam Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Konrad Dybcio Cc: Krzysztof Kozlowski Cc: Lucas Stach Cc: Mark Salter Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Russell King Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/include/asm/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index e7aecbef75c9..9e3c44f0aea2 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -190,7 +190,7 @@ static inline pte_t pte_mkspecial(pte_t pte) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) -#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) +#define pmd_thp_or_huge(pmd) (pmd_leaf(pmd) || pmd_trans_huge(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd)) -- cgit From 9636f055dae19ffae9d8bf21a68261b58c769987 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:04:02 -0400 Subject: mm/treewide: remove pXd_huge() This API is not used anymore, drop it for the whole tree. Link: https://lkml.kernel.org/r/20240318200404.448346-13-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Bjorn Andersson Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Fabio Estevam Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Konrad Dybcio Cc: Krzysztof Kozlowski Cc: Lucas Stach Cc: Mark Salter Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Russell King Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/Makefile | 1 - arch/arm/mm/hugetlbpage.c | 29 ----------------------------- 2 files changed, 30 deletions(-) delete mode 100644 arch/arm/mm/hugetlbpage.c (limited to 'arch/arm') diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 71b858c9b10c..1779e12db085 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -21,7 +21,6 @@ KASAN_SANITIZE_physaddr.o := n obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c deleted file mode 100644 index c2fa643f6bb5..000000000000 --- a/arch/arm/mm/hugetlbpage.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch/arm/mm/hugetlbpage.c - * - * Copyright (C) 2012 ARM Ltd. - * - * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int pud_huge(pud_t pud) -{ - return 0; -} - -int pmd_huge(pmd_t pmd) -{ - return pmd_leaf(pmd); -} -- cgit From 502016e33ae6474177097bc43ee26bad8f4d6919 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:04:03 -0400 Subject: mm/arm: remove pmd_thp_or_huge() ARM/ARM64 used to define pmd_thp_or_huge(). Now this macro is completely redundant. Remove it and use pmd_leaf(). Link: https://lkml.kernel.org/r/20240318200404.448346-14-peterx@redhat.com Signed-off-by: Peter Xu Cc: Mark Salter Cc: Catalin Marinas Cc: Will Deacon Cc: Russell King Cc: Shawn Guo Cc: Krzysztof Kozlowski Cc: Bjorn Andersson Cc: Arnd Bergmann Cc: Konrad Dybcio Cc: Fabio Estevam Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Lucas Stach Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/arm/include/asm/pgtable-2level.h | 1 - arch/arm/include/asm/pgtable-3level.h | 1 - arch/arm/lib/uaccess_with_memcpy.c | 4 ++-- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 4245c2e74720..6b5392e20f41 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -241,7 +241,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) * define empty stubs for use by pin_page_for_write. */ #define pmd_hugewillfault(pmd) (0) -#define pmd_thp_or_huge(pmd) (0) #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 9e3c44f0aea2..fa5939eb9864 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -190,7 +190,6 @@ static inline pte_t pte_mkspecial(pte_t pte) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) -#define pmd_thp_or_huge(pmd) (pmd_leaf(pmd) || pmd_trans_huge(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd)) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 2f6163f05e93..c0ac7796d775 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -56,10 +56,10 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) * to see that it's still huge and whether or not we will * need to fault on write. */ - if (unlikely(pmd_thp_or_huge(*pmd))) { + if (unlikely(pmd_leaf(*pmd))) { ptl = ¤t->mm->page_table_lock; spin_lock(ptl); - if (unlikely(!pmd_thp_or_huge(*pmd) + if (unlikely(!pmd_leaf(*pmd) || pmd_hugewillfault(*pmd))) { spin_unlock(ptl); return 0; -- cgit From 0069455bcbf9ea73ffe4553ed6d2b4e4cad703de Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Mar 2024 09:36:23 -0700 Subject: fix missing vmalloc.h includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Memory allocation profiling", v6. Overview: Low overhead [1] per-callsite memory allocation profiling. Not just for debug kernels, overhead low enough to be deployed in production. Example output: root@moria-kvm:~# sort -rn /proc/allocinfo 127664128 31168 mm/page_ext.c:270 func:alloc_page_ext 56373248 4737 mm/slub.c:2259 func:alloc_slab_page 14880768 3633 mm/readahead.c:247 func:page_cache_ra_unbounded 14417920 3520 mm/mm_init.c:2530 func:alloc_large_system_hash 13377536 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs 11718656 2861 mm/filemap.c:1919 func:__filemap_get_folio 9192960 2800 kernel/fork.c:307 func:alloc_thread_stack_node 4206592 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable 4136960 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start 3940352 962 mm/memory.c:4214 func:alloc_anon_folio 2894464 22613 fs/kernfs/dir.c:615 func:__kernfs_new_node ... Usage: kconfig options: - CONFIG_MEM_ALLOC_PROFILING - CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT - CONFIG_MEM_ALLOC_PROFILING_DEBUG adds warnings for allocations that weren't accounted because of a missing annotation sysctl: /proc/sys/vm/mem_profiling Runtime info: /proc/allocinfo Notes: [1]: Overhead To measure the overhead we are comparing the following configurations: (1) Baseline with CONFIG_MEMCG_KMEM=n (2) Disabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n) (3) Enabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=y) (4) Enabled at runtime (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n && /proc/sys/vm/mem_profiling=1) (5) Baseline with CONFIG_MEMCG_KMEM=y && allocating with __GFP_ACCOUNT (6) Disabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n) && CONFIG_MEMCG_KMEM=y (7) Enabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=y) && CONFIG_MEMCG_KMEM=y Performance overhead: To evaluate performance we implemented an in-kernel test executing multiple get_free_page/free_page and kmalloc/kfree calls with allocation sizes growing from 8 to 240 bytes with CPU frequency set to max and CPU affinity set to a specific CPU to minimize the noise. Below are results from running the test on Ubuntu 22.04.2 LTS with 6.8.0-rc1 kernel on 56 core Intel Xeon: kmalloc pgalloc (1 baseline) 6.764s 16.902s (2 default disabled) 6.793s (+0.43%) 17.007s (+0.62%) (3 default enabled) 7.197s (+6.40%) 23.666s (+40.02%) (4 runtime enabled) 7.405s (+9.48%) 23.901s (+41.41%) (5 memcg) 13.388s (+97.94%) 48.460s (+186.71%) (6 def disabled+memcg) 13.332s (+97.10%) 48.105s (+184.61%) (7 def enabled+memcg) 13.446s (+98.78%) 54.963s (+225.18%) Memory overhead: Kernel size: text data bss dec diff (1) 26515311 18890222 17018880 62424413 (2) 26524728 19423818 16740352 62688898 264485 (3) 26524724 19423818 16740352 62688894 264481 (4) 26524728 19423818 16740352 62688898 264485 (5) 26541782 18964374 16957440 62463596 39183 Memory consumption on a 56 core Intel CPU with 125GB of memory: Code tags: 192 kB PageExts: 262144 kB (256MB) SlabExts: 9876 kB (9.6MB) PcpuExts: 512 kB (0.5MB) Total overhead is 0.2% of total memory. Benchmarks: Hackbench tests run 100 times: hackbench -s 512 -l 200 -g 15 -f 25 -P baseline disabled profiling enabled profiling avg 0.3543 0.3559 (+0.0016) 0.3566 (+0.0023) stdev 0.0137 0.0188 0.0077 hackbench -l 10000 baseline disabled profiling enabled profiling avg 6.4218 6.4306 (+0.0088) 6.5077 (+0.0859) stdev 0.0933 0.0286 0.0489 stress-ng tests: stress-ng --class memory --seq 4 -t 60 stress-ng --class cpu --seq 4 -t 60 Results posted at: https://evilpiepirate.org/~kent/memalloc_prof_v4_stress-ng/ [2] https://lore.kernel.org/all/20240306182440.2003814-1-surenb@google.com/ This patch (of 37): The next patch drops vmalloc.h from a system header in order to fix a circular dependency; this adds it to all the files that were pulling it in implicitly. [kent.overstreet@linux.dev: fix arch/alpha/lib/memcpy.c] Link: https://lkml.kernel.org/r/20240327002152.3339937-1-kent.overstreet@linux.dev [surenb@google.com: fix arch/x86/mm/numa_32.c] Link: https://lkml.kernel.org/r/20240402180933.1663992-1-surenb@google.com [kent.overstreet@linux.dev: a few places were depending on sizes.h] Link: https://lkml.kernel.org/r/20240404034744.1664840-1-kent.overstreet@linux.dev [arnd@arndb.de: fix mm/kasan/hw_tags.c] Link: https://lkml.kernel.org/r/20240404124435.3121534-1-arnd@kernel.org [surenb@google.com: fix arc build] Link: https://lkml.kernel.org/r/20240405225115.431056-1-surenb@google.com Link: https://lkml.kernel.org/r/20240321163705.3067592-1-surenb@google.com Link: https://lkml.kernel.org/r/20240321163705.3067592-2-surenb@google.com Signed-off-by: Kent Overstreet Signed-off-by: Suren Baghdasaryan Signed-off-by: Arnd Bergmann Reviewed-by: Pasha Tatashin Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- arch/arm/kernel/irq.c | 1 + arch/arm/kernel/traps.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index fe28fc1f759d..dab42d066d06 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 72c82a4d63ac..480e307501bb 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include -- cgit From 51718e25c53f58fe9fa515f219367632cce4914d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 17:10:29 +0000 Subject: mm: convert arch_clear_hugepage_flags to take a folio All implementations that aren't no-ops just set a bit in the flags, and we want to use the folio flags rather than the page flags for that. Rename it to arch_clear_hugetlb_flags() while we're touching it so nobody thinks it's used for THP. [willy@infradead.org: fix arm64 build] Link: https://lkml.kernel.org/r/ZgQvNKGdlDkwhQEX@casper.infradead.org Link: https://lkml.kernel.org/r/20240326171045.410737-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- arch/arm/include/asm/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index a3a82b7158d4..b766c4b373f6 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -15,10 +15,10 @@ #include #include -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_dcache_clean, &page->flags); + clear_bit(PG_dcache_clean, &folio->flags); } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #endif /* _ASM_ARM_HUGETLB_H */ -- cgit From b80fa3cbb78c0fbe5039682919d97a0dbe05ae7c Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 25 Mar 2024 19:16:52 -0700 Subject: treewide: use initializer for struct vm_unmapped_area_info Future changes will need to add a new member to struct vm_unmapped_area_info. This would cause trouble for any call site that doesn't initialize the struct. Currently every caller sets each member manually, so if new ones are added they will be uninitialized and the core code parsing the struct will see garbage in the new member. It could be possible to initialize the new member manually to 0 at each call site. This and a couple other options were discussed. Having some struct vm_unmapped_area_info instances not zero initialized will put those sites at risk of feeding garbage into vm_unmapped_area(), if the convention is to zero initialize the struct and any new field addition missed a call site that initializes each field manually. So it is useful to do things similar across the kernel. The consensus (see links) was that in general the best way to accomplish taking into account both code cleanliness and minimizing the chance of introducing bugs, was to do C99 static initialization. As in: struct vm_unmapped_area_info info = {}; With this method of initialization, the whole struct will be zero initialized, and any statements setting fields to zero will be unneeded. The change should not leave cleanup at the call sides. While iterating though the possible solutions a few archs kindly acked other variations that still zero initialized the struct. These sites have been modified in previous changes using the pattern acked by the respective arch. So to be reduce the chance of bugs via uninitialized fields, perform a tree wide change using the consensus for the best general way to do this change. Use C99 static initializing to zero the struct and remove and statements that simply set members to zero. Link: https://lkml.kernel.org/r/20240326021656.202649-11-rick.p.edgecombe@intel.com Link: https://lore.kernel.org/lkml/202402280912.33AEE7A9CF@keescook/#t Link: https://lore.kernel.org/lkml/j7bfvig3gew3qruouxrh7z7ehjjafrgkbcmg6tcghhfh3rhmzi@wzlcoecgy5rs/ Link: https://lore.kernel.org/lkml/ec3e377a-c0a0-4dd3-9cb9-96517e54d17e@csgroup.eu/ Signed-off-by: Rick Edgecombe Reviewed-by: Kees Cook Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Borislav Petkov (AMD) Cc: Christophe Leroy Cc: Dan Williams Cc: Dave Hansen Cc: Deepak Gupta Cc: Guo Ren Cc: Helge Deller Cc: H. Peter Anvin (Intel) Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Kirill A. Shutemov Cc: Liam R. Howlett Cc: Mark Brown Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/arm/mm/mmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index a0f8a0ca0788..d65d0e6ed10a 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -34,7 +34,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; int do_align = 0; int aliasing = cache_is_vipt_aliasing(); - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = {}; /* * We only need to do colour alignment if either the I or D @@ -68,7 +68,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } - info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; @@ -87,7 +86,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, unsigned long addr = addr0; int do_align = 0; int aliasing = cache_is_vipt_aliasing(); - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = {}; /* * We only need to do colour alignment if either the I or D -- cgit From 15e4a5f5d8c9438530574d10241b8ece9f5edb61 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 3 Apr 2024 16:38:01 +0800 Subject: arm: mm: accelerate pagefault when VM_FAULT_BADACCESS The vm_flags of vma already checked under per-VMA lock, if it is a bad access, directly set fault to VM_FAULT_BADACCESS and handle error, no need to retry with mmap_lock again. Since the page faut is handled under per-VMA lock, count it as a vma lock event with VMA_LOCK_SUCCESS. Link: https://lkml.kernel.org/r/20240403083805.1818160-4-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Suren Baghdasaryan Cc: Albert Ou Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Gerald Schaefer Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 439dc6a26bb9..5c4b417e24f9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -294,7 +294,9 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!(vma->vm_flags & vm_flags)) { vma_end_read(vma); - goto lock_mmap; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + fault = VM_FAULT_BADACCESS; + goto bad_area; } fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) -- cgit From 25176ad09ca395fcc83b1fc78adf25c8eb1bd964 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 2 Apr 2024 14:55:15 +0200 Subject: mm/treewide: rename CONFIG_HAVE_FAST_GUP to CONFIG_HAVE_GUP_FAST Nowadays, we call it "GUP-fast", the external interface includes functions like "get_user_pages_fast()", and we renamed all internal functions to reflect that as well. Let's make the config option reflect that. Link: https://lkml.kernel.org/r/20240402125516.223131-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Mike Rapoport (IBM) Reviewed-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Peter Xu Signed-off-by: Andrew Morton --- arch/arm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b14aed3a17ab..817918f6635a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -99,7 +99,7 @@ config ARM select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD - select HAVE_FAST_GUP if ARM_LPAE + select HAVE_GUP_FAST if ARM_LPAE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER -- cgit From e9016174621112624f957c8138bd3c34692e2e93 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 11 Apr 2024 21:09:25 +0800 Subject: arm: mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESS If bad map or access, directly set code to SEGV_MAPRR or SEGV_ACCERR, also set fault to 0 and goto error handling, which make us to drop the arch's special vm fault reason. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20240411130925.73281-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Aishwarya TCV Cc: Catalin Marinas Cc: Cristian Marussi Cc: Mark Brown Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/fault.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5c4b417e24f9..bf07afdb2dd9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -226,9 +226,6 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } #ifdef CONFIG_MMU -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) - static inline bool is_permission_fault(unsigned int fsr) { int fs = fsr_fs(fsr); @@ -295,7 +292,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!(vma->vm_flags & vm_flags)) { vma_end_read(vma); count_vm_vma_lock_event(VMA_LOCK_SUCCESS); - fault = VM_FAULT_BADACCESS; + fault = 0; + code = SEGV_ACCERR; goto bad_area; } fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); @@ -321,7 +319,8 @@ lock_mmap: retry: vma = lock_mm_and_find_vma(mm, addr, regs); if (unlikely(!vma)) { - fault = VM_FAULT_BADMAP; + fault = 0; + code = SEGV_MAPERR; goto bad_area; } @@ -329,10 +328,14 @@ retry: * ok, we have a good vm_area for this memory access, check the * permissions on the VMA allow for the fault which occurred. */ - if (!(vma->vm_flags & vm_flags)) - fault = VM_FAULT_BADACCESS; - else - fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); + if (!(vma->vm_flags & vm_flags)) { + mmap_read_unlock(mm); + fault = 0; + code = SEGV_ACCERR; + goto bad_area; + } + + fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because @@ -358,12 +361,11 @@ retry: mmap_read_unlock(mm); done: - /* - * Handle the "normal" case first - VM_FAULT_MAJOR - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) + /* Handle the "normal" case first */ + if (likely(!(fault & VM_FAULT_ERROR))) return 0; + code = SEGV_MAPERR; bad_area: /* * If we are in kernel mode at this point, we @@ -395,8 +397,6 @@ bad_area: * isn't in our memory map.. */ sig = SIGSEGV; - code = fault == VM_FAULT_BADACCESS ? - SEGV_ACCERR : SEGV_MAPERR; } __do_user_fault(addr, fsr, sig, code, regs); -- cgit