From 3a72917ccfbb2eecabefc86aade8acd4a1e85888 Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Wed, 23 Mar 2022 16:05:20 -0700 Subject: proc: alloc PATH_MAX bytes for /proc/${pid}/fd/ symlinks It's not a standard approach that use __get_free_page() to alloc path buffer directly. We'd better use kmalloc and PATH_MAX. PAGE_SIZE is different on different archs. An unlinked file with very long canonical pathname will readlink differently because "(deleted)" eats into a buffer. --adobriyan [akpm@linux-foundation.org: remove now-unneeded cast] Link: https://lkml.kernel.org/r/Ye1fCxyZZ0I5lgOL@localhost.localdomain Signed-off-by: Hao Lee Signed-off-by: Alexey Dobriyan Cc: Christian Brauner Cc: Kees Cook Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index d654ce7150fd..76bf1aa3cfe8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1764,25 +1764,25 @@ out: static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) { - char *tmp = (char *)__get_free_page(GFP_KERNEL); + char *tmp = kmalloc(PATH_MAX, GFP_KERNEL); char *pathname; int len; if (!tmp) return -ENOMEM; - pathname = d_path(path, tmp, PAGE_SIZE); + pathname = d_path(path, tmp, PATH_MAX); len = PTR_ERR(pathname); if (IS_ERR(pathname)) goto out; - len = tmp + PAGE_SIZE - 1 - pathname; + len = tmp + PATH_MAX - 1 - pathname; if (len > buflen) len = buflen; if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: - free_page((unsigned long)tmp); + kfree(tmp); return len; } -- cgit From 5039b170369d22613ebc07e81410891f52280a45 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 23 Mar 2022 16:05:23 -0700 Subject: proc/vmcore: fix possible deadlock on concurrent mmap and read Lockdep noticed that there is chance for a deadlock if we have concurrent mmap, concurrent read, and the addition/removal of a callback. As nicely explained by Boqun: "Lockdep warned about the above sequences because rw_semaphore is a fair read-write lock, and the following can cause a deadlock: TASK 1 TASK 2 TASK 3 ====== ====== ====== down_write(mmap_lock); down_read(vmcore_cb_rwsem) down_write(vmcore_cb_rwsem); // blocked down_read(vmcore_cb_rwsem); // cannot get the lock because of the fairness down_read(mmap_lock); // blocked IOW, a reader can block another read if there is a writer queued by the second reader and the lock is fair" To fix this, convert to srcu to make this deadlock impossible. We need srcu as our callbacks can sleep. With this change, I cannot trigger any lockdep warnings. ====================================================== WARNING: possible circular locking dependency detected 5.17.0-0.rc0.20220117git0c947b893d69.68.test.fc36.x86_64 #1 Not tainted ------------------------------------------------------ makedumpfile/542 is trying to acquire lock: ffffffff832d2eb8 (vmcore_cb_rwsem){.+.+}-{3:3}, at: mmap_vmcore+0x340/0x580 but task is already holding lock: ffff8880af226438 (&mm->mmap_lock#2){++++}-{3:3}, at: vm_mmap_pgoff+0x84/0x150 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_lock#2){++++}-{3:3}: lock_acquire+0xc3/0x1a0 __might_fault+0x4e/0x70 _copy_to_user+0x1f/0x90 __copy_oldmem_page+0x72/0xc0 read_from_oldmem+0x77/0x1e0 read_vmcore+0x2c2/0x310 proc_reg_read+0x47/0xa0 vfs_read+0x101/0x340 __x64_sys_pread64+0x5d/0xa0 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (vmcore_cb_rwsem){.+.+}-{3:3}: validate_chain+0x9f4/0x2670 __lock_acquire+0x8f7/0xbc0 lock_acquire+0xc3/0x1a0 down_read+0x4a/0x140 mmap_vmcore+0x340/0x580 proc_reg_mmap+0x3e/0x90 mmap_region+0x504/0x880 do_mmap+0x38a/0x520 vm_mmap_pgoff+0xc1/0x150 ksys_mmap_pgoff+0x178/0x200 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_lock#2); lock(vmcore_cb_rwsem); lock(&mm->mmap_lock#2); lock(vmcore_cb_rwsem); *** DEADLOCK *** 1 lock held by makedumpfile/542: #0: ffff8880af226438 (&mm->mmap_lock#2){++++}-{3:3}, at: vm_mmap_pgoff+0x84/0x150 stack backtrace: CPU: 0 PID: 542 Comm: makedumpfile Not tainted 5.17.0-0.rc0.20220117git0c947b893d69.68.test.fc36.x86_64 #1 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Call Trace: __lock_acquire+0x8f7/0xbc0 lock_acquire+0xc3/0x1a0 down_read+0x4a/0x140 mmap_vmcore+0x340/0x580 proc_reg_mmap+0x3e/0x90 mmap_region+0x504/0x880 do_mmap+0x38a/0x520 vm_mmap_pgoff+0xc1/0x150 ksys_mmap_pgoff+0x178/0x200 do_syscall_64+0x43/0x90 Link: https://lkml.kernel.org/r/20220119193417.100385-1-david@redhat.com Fixes: cc5f2704c934 ("proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcore callbacks") Signed-off-by: David Hildenbrand Reported-by: Baoquan He Acked-by: Baoquan He Cc: Vivek Goyal Cc: Dave Young Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Peter Zijlstra Cc: Boqun Feng Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/vmcore.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 702754dd1daf..edeb01dfe05d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -62,7 +62,8 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0); /* Device Dump Size */ static size_t vmcoredd_orig_sz; -static DECLARE_RWSEM(vmcore_cb_rwsem); +static DEFINE_SPINLOCK(vmcore_cb_lock); +DEFINE_STATIC_SRCU(vmcore_cb_srcu); /* List of registered vmcore callbacks. */ static LIST_HEAD(vmcore_cb_list); /* Whether the vmcore has been opened once. */ @@ -70,8 +71,8 @@ static bool vmcore_opened; void register_vmcore_cb(struct vmcore_cb *cb) { - down_write(&vmcore_cb_rwsem); INIT_LIST_HEAD(&cb->next); + spin_lock(&vmcore_cb_lock); list_add_tail(&cb->next, &vmcore_cb_list); /* * Registering a vmcore callback after the vmcore was opened is @@ -79,14 +80,14 @@ void register_vmcore_cb(struct vmcore_cb *cb) */ if (vmcore_opened) pr_warn_once("Unexpected vmcore callback registration\n"); - up_write(&vmcore_cb_rwsem); + spin_unlock(&vmcore_cb_lock); } EXPORT_SYMBOL_GPL(register_vmcore_cb); void unregister_vmcore_cb(struct vmcore_cb *cb) { - down_write(&vmcore_cb_rwsem); - list_del(&cb->next); + spin_lock(&vmcore_cb_lock); + list_del_rcu(&cb->next); /* * Unregistering a vmcore callback after the vmcore was opened is * very unusual (e.g., forced driver removal), but we cannot stop @@ -94,7 +95,9 @@ void unregister_vmcore_cb(struct vmcore_cb *cb) */ if (vmcore_opened) pr_warn_once("Unexpected vmcore callback unregistration\n"); - up_write(&vmcore_cb_rwsem); + spin_unlock(&vmcore_cb_lock); + + synchronize_srcu(&vmcore_cb_srcu); } EXPORT_SYMBOL_GPL(unregister_vmcore_cb); @@ -103,9 +106,8 @@ static bool pfn_is_ram(unsigned long pfn) struct vmcore_cb *cb; bool ret = true; - lockdep_assert_held_read(&vmcore_cb_rwsem); - - list_for_each_entry(cb, &vmcore_cb_list, next) { + list_for_each_entry_srcu(cb, &vmcore_cb_list, next, + srcu_read_lock_held(&vmcore_cb_srcu)) { if (unlikely(!cb->pfn_is_ram)) continue; ret = cb->pfn_is_ram(cb, pfn); @@ -118,9 +120,9 @@ static bool pfn_is_ram(unsigned long pfn) static int open_vmcore(struct inode *inode, struct file *file) { - down_read(&vmcore_cb_rwsem); + spin_lock(&vmcore_cb_lock); vmcore_opened = true; - up_read(&vmcore_cb_rwsem); + spin_unlock(&vmcore_cb_lock); return 0; } @@ -133,6 +135,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, unsigned long pfn, offset; size_t nr_bytes; ssize_t read = 0, tmp; + int idx; if (!count) return 0; @@ -140,7 +143,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset = (unsigned long)(*ppos % PAGE_SIZE); pfn = (unsigned long)(*ppos / PAGE_SIZE); - down_read(&vmcore_cb_rwsem); + idx = srcu_read_lock(&vmcore_cb_srcu); do { if (count > (PAGE_SIZE - offset)) nr_bytes = PAGE_SIZE - offset; @@ -165,7 +168,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset, userbuf); } if (tmp < 0) { - up_read(&vmcore_cb_rwsem); + srcu_read_unlock(&vmcore_cb_srcu, idx); return tmp; } @@ -176,8 +179,8 @@ ssize_t read_from_oldmem(char *buf, size_t count, ++pfn; offset = 0; } while (count); + srcu_read_unlock(&vmcore_cb_srcu, idx); - up_read(&vmcore_cb_rwsem); return read; } @@ -568,18 +571,18 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) { - int ret; + int ret, idx; /* - * Check if oldmem_pfn_is_ram was registered to avoid - * looping over all pages without a reason. + * Check if a callback was registered to avoid looping over all + * pages without a reason. */ - down_read(&vmcore_cb_rwsem); + idx = srcu_read_lock(&vmcore_cb_srcu); if (!list_empty(&vmcore_cb_list)) ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot); else ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot); - up_read(&vmcore_cb_rwsem); + srcu_read_unlock(&vmcore_cb_srcu, idx); return ret; } -- cgit From e9f5d1017c50db2ec3976d026d48daa5800b0ee1 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 23 Mar 2022 16:05:26 -0700 Subject: proc/vmcore: fix vmcore_alloc_buf() kernel-doc comment Fix a spelling problem to remove warnings found by running scripts/kernel-doc, which is caused by using 'make W=1'. fs/proc/vmcore.c:492: warning: Function parameter or member 'size' not described in 'vmcore_alloc_buf' fs/proc/vmcore.c:492: warning: Excess function parameter 'sizez' description in 'vmcore_alloc_buf' Link: https://lkml.kernel.org/r/20220129011449.105278-1-yang.lee@linux.alibaba.com Signed-off-by: Yang Li Reported-by: Abaci Robot Acked-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/vmcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index edeb01dfe05d..6f1b8ddc6f7a 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -480,7 +480,7 @@ static const struct vm_operations_struct vmcore_mmap_ops = { /** * vmcore_alloc_buf - allocate buffer in vmalloc memory - * @sizez: size of buffer + * @size: size of buffer * * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap * the buffer to user-space by means of remap_vmalloc_range(). -- cgit From c724c866bb70cb8c607081a26823a1f0ebde4387 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:29 -0700 Subject: linux/types.h: remove unnecessary __bitwise__ There are no users of "__bitwise__" except the definition of "__bitwise". Remove __bitwise__ and define __bitwise directly. This is a follow-up to 05de97003c77 ("linux/types.h: enable endian checks for all sparse builds"). [akpm@linux-foundation.org: change the tools/include/linux/types.h definition also] Link: https://lkml.kernel.org/r/20220310220927.245704-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Michael S. Tsirkin Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/types.h | 5 ++--- tools/include/linux/types.h | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index f6d2f83cbe29..71696f424ac8 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -20,11 +20,10 @@ */ #ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) +#define __bitwise __attribute__((bitwise)) #else -#define __bitwise__ +#define __bitwise #endif -#define __bitwise __bitwise__ typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 6e14a533ab4e..6c18c54e7d7f 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -43,11 +43,10 @@ typedef __u8 u8; typedef __s8 s8; #ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) +#define __bitwise __attribute__((bitwise)) #else -#define __bitwise__ +#define __bitwise #endif -#define __bitwise __bitwise__ #define __force #define __user -- cgit From 179fd6ba3bacbf7b19cbdf9b14be109d54318394 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:32 -0700 Subject: Documentation/sparse: add hints about __CHECKER__ Several attributes depend on __CHECKER__, but previously there was no clue in the tree about when __CHECKER__ might be defined. Add hints at the most common places (__kernel, __user, __iomem, __bitwise) and in the sparse documentation. Link: https://lkml.kernel.org/r/20220310220927.245704-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Michael S . Tsirkin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/sparse.rst | 2 ++ include/linux/compiler_types.h | 1 + include/uapi/linux/types.h | 1 + 3 files changed, 4 insertions(+) diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst index 02102be7ff49..dc791c8d84d1 100644 --- a/Documentation/dev-tools/sparse.rst +++ b/Documentation/dev-tools/sparse.rst @@ -100,3 +100,5 @@ have already built it. The optional make variable CF can be used to pass arguments to sparse. The build system passes -Wbitwise to sparse automatically. + +Note that sparse defines the __CHECKER__ preprocessor symbol. diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3c1795fdb568..4b3915ce38a4 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ /* address spaces */ # define __kernel __attribute__((address_space(0))) diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 71696f424ac8..c4dc597f3dcf 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -19,6 +19,7 @@ * any application/library that wants linux/types.h. */ +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else -- cgit From a7cd9a5376aa188529393a802329645dc8c5a63c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 23 Mar 2022 16:05:35 -0700 Subject: kernel/ksysfs.c: use helper macro __ATTR_RW Use helper macro __ATTR_RW to define kobj_attribute to make code more clear. Minor readability improvement. Link: https://lkml.kernel.org/r/20220222112034.48298-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ksysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 35859da8bd4f..b1292a57c2a5 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -24,8 +24,7 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) #define KERNEL_ATTR_RW(_name) \ -static struct kobj_attribute _name##_attr = \ - __ATTR(_name, 0644, _name##_show, _name##_store) +static struct kobj_attribute _name##_attr = __ATTR_RW(_name) /* current uevent sequence number */ static ssize_t uevent_seqnum_show(struct kobject *kobj, -- cgit From f9b3cd24578401e7a392974b3353277286e49cee Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Mar 2022 16:05:38 -0700 Subject: Kconfig.debug: make DEBUG_INFO selectable from a choice Currently it's not possible to enable DEBUG_INFO for an all*config build, since it is marked as "depends on !COMPILE_TEST". This generally makes sense because a debug build of an all*config target ends up taking much longer and the output is much larger. Having this be "default off" makes sense. However, there are cases where enabling DEBUG_INFO for such builds is useful for doing treewide A/B comparisons of build options, etc. Make DEBUG_INFO selectable from any of the DWARF version choice options, with DEBUG_INFO_NONE being the default for COMPILE_TEST. The mutually exclusive relationship between DWARF5 and BTF must be inverted, but the result remains the same. Additionally moves DEBUG_KERNEL and DEBUG_MISC up to the top of the menu because they were enabling features _above_ it, making it weird to navigate menuconfig. [keescook@chromium.org: make DEBUG_INFO always default=n] Link: https://lkml.kernel.org/r/20220128214131.580131-1-keescook@chromium.org Link: https://lore.kernel.org/lkml/YfRY6+CaQxX7O8vF@dev-arch.archlinux-ax161 Link: https://lkml.kernel.org/r/20220125075126.891825-1-keescook@chromium.org Signed-off-by: Kees Cook Suggested-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Reviewed-by: Masahiro Yamada Cc: Nick Desaulniers Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 139 +++++++++++++++++++++++++++++------------------------- 1 file changed, 74 insertions(+), 65 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 14b89aa37c5c..6c84ef66ceb8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -208,20 +208,87 @@ config DEBUG_BUGVERBOSE endmenu # "printk and dmesg options" +config DEBUG_KERNEL + bool "Kernel debugging" + help + Say Y here if you are developing drivers or trying to debug and + identify kernel problems. + +config DEBUG_MISC + bool "Miscellaneous debug code" + default DEBUG_KERNEL + depends on DEBUG_KERNEL + help + Say Y here if you need to enable miscellaneous debug code that should + be under a more specific debug option but isn't. + menu "Compile-time checks and compiler options" config DEBUG_INFO - bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL && !COMPILE_TEST + bool help - If you say Y here the resulting kernel image will include - debugging info resulting in a larger kernel image. + A kernel debug info option other than "None" has been selected + in the "Debug information" choice below, indicating that debug + information will be generated for build targets. + +choice + prompt "Debug information" + depends on DEBUG_KERNEL + help + Selecting something other than "None" results in a kernel image + that will include debugging info resulting in a larger kernel image. This adds debug symbols to the kernel and modules (gcc -g), and is needed if you intend to use kernel crashdump or binary object tools like crash, kgdb, LKCD, gdb, etc on the kernel. - Say Y here only if you plan to debug the kernel. - If unsure, say N. + Choose which version of DWARF debug info to emit. If unsure, + select "Toolchain default". + +config DEBUG_INFO_NONE + bool "Disable debug information" + help + Do not build the kernel with debugging information, which will + result in a faster and smaller build. + +config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT + bool "Rely on the toolchain's implicit default DWARF version" + select DEBUG_INFO + help + The implicit default version of DWARF debug info produced by a + toolchain changes over time. + + This can break consumers of the debug info that haven't upgraded to + support newer revisions, and prevent testing newer versions, but + those should be less common scenarios. + +config DEBUG_INFO_DWARF4 + bool "Generate DWARF Version 4 debuginfo" + select DEBUG_INFO + help + Generate DWARF v4 debug info. This requires gcc 4.5+ and gdb 7.0+. + + If you have consumers of DWARF debug info that are not ready for + newer revisions of DWARF, you may wish to choose this or have your + config select this. + +config DEBUG_INFO_DWARF5 + bool "Generate DWARF Version 5 debuginfo" + select DEBUG_INFO + depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) + help + Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc + 5.0+ accepts the -gdwarf-5 flag but only had partial support for some + draft features until 7.0), and gdb 8.0+. + + Changes to the structure of debug info in Version 5 allow for around + 15-18% savings in resulting image and debug info section sizes as + compared to DWARF Version 4. DWARF Version 5 standardizes previous + extensions such as accelerators for symbol indexing and the format + for fission (.dwo/.dwp) files. Users may not want to select this + config if they rely on tooling that has not yet been updated to + support DWARF Version 5. + +endchoice # "Debug information" if DEBUG_INFO @@ -267,56 +334,12 @@ config DEBUG_INFO_SPLIT to know about the .dwo files and include them. Incompatible with older versions of ccache. -choice - prompt "DWARF version" - help - Which version of DWARF debug info to emit. - -config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT - bool "Rely on the toolchain's implicit default DWARF version" - help - The implicit default version of DWARF debug info produced by a - toolchain changes over time. - - This can break consumers of the debug info that haven't upgraded to - support newer revisions, and prevent testing newer versions, but - those should be less common scenarios. - - If unsure, say Y. - -config DEBUG_INFO_DWARF4 - bool "Generate DWARF Version 4 debuginfo" - help - Generate DWARF v4 debug info. This requires gcc 4.5+ and gdb 7.0+. - - If you have consumers of DWARF debug info that are not ready for - newer revisions of DWARF, you may wish to choose this or have your - config select this. - -config DEBUG_INFO_DWARF5 - bool "Generate DWARF Version 5 debuginfo" - depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) - depends on !DEBUG_INFO_BTF - help - Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc - 5.0+ accepts the -gdwarf-5 flag but only had partial support for some - draft features until 7.0), and gdb 8.0+. - - Changes to the structure of debug info in Version 5 allow for around - 15-18% savings in resulting image and debug info section sizes as - compared to DWARF Version 4. DWARF Version 5 standardizes previous - extensions such as accelerators for symbol indexing and the format - for fission (.dwo/.dwp) files. Users may not want to select this - config if they rely on tooling that has not yet been updated to - support DWARF Version 5. - -endchoice # "DWARF version" - config DEBUG_INFO_BTF bool "Generate BTF typeinfo" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST depends on BPF_SYSCALL + depends on !DEBUG_INFO_DWARF5 help Generate deduplicated BTF type information from DWARF debug info. Turning this on expects presence of pahole tool, which will convert @@ -585,20 +608,6 @@ source "lib/Kconfig.kcsan" endmenu -config DEBUG_KERNEL - bool "Kernel debugging" - help - Say Y here if you are developing drivers or trying to debug and - identify kernel problems. - -config DEBUG_MISC - bool "Miscellaneous debug code" - default DEBUG_KERNEL - depends on DEBUG_KERNEL - help - Say Y here if you need to enable miscellaneous debug code that should - be under a more specific debug option but isn't. - menu "Networking Debugging" source "net/Kconfig.debug" -- cgit From 14e83077d55ff4b88fe39f5e98fb8230c2ccb4fb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 23 Mar 2022 16:05:41 -0700 Subject: include: drop pointless __compiler_offsetof indirection (1) compiler_types.h is unconditionally included via an -include flag (see scripts/Makefile.lib), and it defines __compiler_offsetof unconditionally. So testing for definedness of __compiler_offsetof is mostly pointless. (2) Every relevant compiler provides __builtin_offsetof (even sparse has had that for 14 years), and if for whatever reason one would end up picking up the poor man's fallback definition (C file compiler with completely custom CFLAGS?), newer clang versions won't treat the result as an Integer Constant Expression, so if used in place where such is required (static initializer or static_assert), one would get errors like t.c:11:16: error: static_assert expression is not an integral constant expression t.c:11:16: note: cast that performs the conversions of a reinterpret_cast is not allowed in a constant expression t.c:4:33: note: expanded from macro 'offsetof' #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) So just define offsetof unconditionally and directly in terms of __builtin_offsetof. Link: https://lkml.kernel.org/r/20220202102147.326672-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Reviewed-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Acked-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 2 -- include/linux/stddef.h | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4b3915ce38a4..232dbd97f8b1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -138,8 +138,6 @@ struct ftrace_likely_data { */ #define __naked __attribute__((__naked__)) notrace -#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) - /* * Prefer gnu_inline, so that extern inline functions do not emit an * externally visible function. This makes extern inline behave as per gnu89 diff --git a/include/linux/stddef.h b/include/linux/stddef.h index ca507bd5f808..929d67710cc5 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -13,11 +13,7 @@ enum { }; #undef offsetof -#ifdef __compiler_offsetof -#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER) -#else -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) -#endif +#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) /** * sizeof_field() - Report the size of a struct field in bytes -- cgit From f334f5668bedf7307f6df1d98b14f55902931926 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Mar 2022 16:05:44 -0700 Subject: ilog2: force inlining of __ilog2_u32() and __ilog2_u64() Building a kernel with CONFIG_CC_OPTIMISE_FOR_SIZE leads to __ilog2_u32() being duplicated 50 times and __ilog2_u64() 3 times in vmlinux on a tiny powerpc32 config. __ilog2_u32() being 2 instructions it is not worth being kept out of line, so force inlining. Allthough the u64 version is a bit bigger, there is still a small benefit in keeping it inlined. On a 64 bits config there's a real benefit. With this change the size of vmlinux text is reduced by 1 kbytes, which is approx 50% more than the size of the removed functions. Before the patch there is for instance: c00d2a94 <__ilog2_u32>: c00d2a94: 7c 63 00 34 cntlzw r3,r3 c00d2a98: 20 63 00 1f subfic r3,r3,31 c00d2a9c: 4e 80 00 20 blr c00d36d8 <__order_base_2>: c00d36d8: 28 03 00 01 cmplwi r3,1 c00d36dc: 40 81 00 2c ble c00d3708 <__order_base_2+0x30> c00d36e0: 94 21 ff f0 stwu r1,-16(r1) c00d36e4: 7c 08 02 a6 mflr r0 c00d36e8: 38 63 ff ff addi r3,r3,-1 c00d36ec: 90 01 00 14 stw r0,20(r1) c00d36f0: 4b ff f3 a5 bl c00d2a94 <__ilog2_u32> c00d36f4: 80 01 00 14 lwz r0,20(r1) c00d36f8: 38 63 00 01 addi r3,r3,1 c00d36fc: 7c 08 03 a6 mtlr r0 c00d3700: 38 21 00 10 addi r1,r1,16 c00d3704: 4e 80 00 20 blr c00d3708: 38 60 00 00 li r3,0 c00d370c: 4e 80 00 20 blr With the patch it has become: c00d356c <__order_base_2>: c00d356c: 28 03 00 01 cmplwi r3,1 c00d3570: 40 81 00 14 ble c00d3584 <__order_base_2+0x18> c00d3574: 38 63 ff ff addi r3,r3,-1 c00d3578: 7c 63 00 34 cntlzw r3,r3 c00d357c: 20 63 00 20 subfic r3,r3,32 c00d3580: 4e 80 00 20 blr c00d3584: 38 60 00 00 li r3,0 c00d3588: 4e 80 00 20 blr No more need for __order_base_2() to setup a stack frame and save/restore caller address. And the following 'add 1' is merged in the subtract. Another typical use of it: c080ff28 : ... c080fff8: 7f c3 f3 78 mr r3,r30 c080fffc: 4b 8f 81 f1 bl c01081ec <__ilog2_u32> c0810000: 38 63 ff f2 addi r3,r3,-14 ... Becomes c080ff1c : ... c080ffec: 7f c3 00 34 cntlzw r3,r30 c080fff0: 20 63 00 11 subfic r3,r3,17 ... Here no need to move r30 argument to r3 then substract 14 to result. Just work on r30 and merge the 'sub 14' with the 'sub from 31'. Link: https://lkml.kernel.org/r/803a2ac3d923ebcfd0dd40f5886b05cae7bb0aba.1644243860.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/log2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/log2.h b/include/linux/log2.h index df0b155c2141..9f30d087a128 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -18,7 +18,7 @@ * - the arch is not required to handle n==0 if implementing the fallback */ #ifndef CONFIG_ARCH_HAS_ILOG2_U32 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; @@ -26,7 +26,7 @@ int __ilog2_u32(u32 n) #endif #ifndef CONFIG_ARCH_HAS_ILOG2_U64 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; -- cgit From 25cb5b7ac6a7f7943e22a6831b74a6aa964d2a0c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Mar 2022 16:05:47 -0700 Subject: bitfield: add explicit inclusions to the example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not obvious that bitfield.h doesn't guarantee the bits.h inclusion and the example in the former is confusing. Some developers think that it's okay to just include bitfield.h to get it working. Change example to explicitly include necessary headers in order to avoid confusion. Link: https://lkml.kernel.org/r/20220207123341.47533-1-andriy.shevchenko@linux.intel.com Fixes: 3e9b3112ec74 ("add basic register-field manipulation macros") Depends-on: 8bd9cb51daac ("locking/atomics, asm-generic: Move some macros from to a new file") Signed-off-by: Andy Shevchenko Reported-by: Jan Dąbroś Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitfield.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 6093fa6db260..c9be1657f03d 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -19,6 +19,9 @@ * * Example: * + * #include + * #include + * * #define REG_FIELD_A GENMASK(6, 0) * #define REG_FIELD_B BIT(7) * #define REG_FIELD_C GENMASK(15, 8) -- cgit From 1bf18da62106225dbc47aab41efee2aeb99caccd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 23 Mar 2022 16:05:50 -0700 Subject: lib/Kconfig.debug: add ARCH dependency for FUNCTION_ALIGN option 0Day robots reported there is compiling issue for 'csky' ARCH when CONFIG_DEBUG_FORCE_DATA_SECTION_ALIGNED is enabled [1]: All errors (new ones prefixed by >>): {standard input}: Assembler messages: >> {standard input}:2277: Error: pcrel offset for branch to .LS000B too far (0x3c) Which was discussed in [2]. And as there is no solution for csky yet, add some dependency for this config to limit it to several ARCHs which have no compiling issue so far. [1]. https://lore.kernel.org/lkml/202202271612.W32UJAj2-lkp@intel.com/ [2]. https://www.spinics.net/lists/linux-kbuild/msg30298.html Link: https://lkml.kernel.org/r/20220304021100.GN4548@shbuild999.sh.intel.com Reported-by: kernel test robot Signed-off-by: Feng Tang Cc: Guo Ren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6c84ef66ceb8..b84c2462fe8d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -439,7 +439,8 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. config DEBUG_FORCE_FUNCTION_ALIGN_64B - bool "Force all function address 64B aligned" if EXPERT + bool "Force all function address 64B aligned" + depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC) help There are cases that a commit from one domain changes the function address alignment of other domains, and cause magic performance -- cgit From 2699e5143c9b071d68b12b9a86b6312866a6160b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Mar 2022 16:05:53 -0700 Subject: lib: bitmap: fix many kernel-doc warnings Fix kernel-doc warings in lib/bitmap.c: lib/bitmap.c:498: warning: Function parameter or member 'buf' not described in 'bitmap_print_to_buf' lib/bitmap.c:498: warning: Function parameter or member 'maskp' not described in 'bitmap_print_to_buf' lib/bitmap.c:498: warning: Function parameter or member 'nmaskbits' not described in 'bitmap_print_to_buf' lib/bitmap.c:498: warning: Function parameter or member 'off' not described in 'bitmap_print_to_buf' lib/bitmap.c:498: warning: Function parameter or member 'count' not described in 'bitmap_print_to_buf' lib/bitmap.c:561: warning: contents before sections lib/bitmap.c:606: warning: Function parameter or member 'buf' not described in 'bitmap_print_list_to_buf' lib/bitmap.c:606: warning: Function parameter or member 'maskp' not described in 'bitmap_print_list_to_buf' lib/bitmap.c:606: warning: Function parameter or member 'nmaskbits' not described in 'bitmap_print_list_to_buf' lib/bitmap.c:606: warning: Function parameter or member 'off' not described in 'bitmap_print_list_to_buf' lib/bitmap.c:606: warning: Function parameter or member 'count' not described in 'bitmap_print_list_to_buf' lib/bitmap.c:819: warning: missing initial short description on line: * bitmap_parselist_user() This still leaves 15 warnings for function return values not described, similar to this one: bitmap.c:890: warning: No description found for return value of 'bitmap_parse' Link: https://lkml.kernel.org/r/20220306065823.5153-1-rdunlap@infradead.org Fixes: 1fae562983ca ("cpumask: introduce cpumap_print_list/bitmask_to_buf to support large bitmask and list") Fixes: 4b060420a596 ("bitmap, irq: add smp_affinity_list interface to /proc/irq") Signed-off-by: Randy Dunlap Cc: Yury Norov Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Tian Tao Cc: Mike Travis Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 926408883456..0d5c2ece0bcb 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -492,6 +492,11 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf); * @list: indicates whether the bitmap must be list * true: print in decimal list format * false: print in hexadecimal bitmask format + * @buf: buffer into which string is placed + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * @off: in the string from which we are copying, We copy to @buf + * @count: the maximum number of bytes to print */ static int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp, int nmaskbits, loff_t off, size_t count) @@ -512,6 +517,11 @@ static int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp, /** * bitmap_print_bitmask_to_buf - convert bitmap to hex bitmask format ASCII string + * @buf: buffer into which string is placed + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * @off: in the string from which we are copying, We copy to @buf + * @count: the maximum number of bytes to print * * The bitmap_print_to_pagebuf() is used indirectly via its cpumap wrapper * cpumap_print_to_pagebuf() or directly by drivers to export hexadecimal @@ -553,12 +563,6 @@ static int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp, * move to use bin_attribute. In result, we have to pass the corresponding * parameters such as off, count from bin_attribute show entry to this API. * - * @buf: buffer into which string is placed - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * @off: in the string from which we are copying, We copy to @buf - * @count: the maximum number of bytes to print - * * The role of cpumap_print_bitmask_to_buf() and cpumap_print_list_to_buf() * is similar with cpumap_print_to_pagebuf(), the difference is that * bitmap_print_to_pagebuf() mainly serves sysfs attribute with the assumption @@ -597,6 +601,11 @@ EXPORT_SYMBOL(bitmap_print_bitmask_to_buf); /** * bitmap_print_list_to_buf - convert bitmap to decimal list format ASCII string + * @buf: buffer into which string is placed + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * @off: in the string from which we are copying, We copy to @buf + * @count: the maximum number of bytes to print * * Everything is same with the above bitmap_print_bitmask_to_buf() except * the print format. @@ -807,7 +816,8 @@ EXPORT_SYMBOL(bitmap_parselist); /** - * bitmap_parselist_user() + * bitmap_parselist_user() - convert user buffer's list format ASCII + * string to bitmap * * @ubuf: pointer to user buffer containing string. * @ulen: buffer size in bytes. If string is smaller than this -- cgit From 6e8f42dc9c8548c6b37566f3b8bda1873700c4a6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 23 Mar 2022 16:05:56 -0700 Subject: checkpatch: prefer MODULE_LICENSE("GPL") over MODULE_LICENSE("GPL v2") There is no effective difference. Given the large number of uses of "GPL v2", emit this message only for patches as a trivial treeside sed could be done one day. Ref: commit bf7fbeeae6db ("module: Cure the MODULE_LICENSE "GPL" vs. "GPL v2" bogosity") Link: https://lkml.kernel.org/r/20220128185924.80137-1-joe@perches.com Signed-off-by: Joe Perches Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b01c36a15d9d..b7c181ea0ac5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -7418,6 +7418,13 @@ sub process { WARN("MODULE_LICENSE", "unknown module license " . $extracted_string . "\n" . $herecurr); } + if (!$file && $extracted_string eq '"GPL v2"') { + if (WARN("MODULE_LICENSE", + "Prefer \"GPL\" over \"GPL v2\" - see commit bf7fbeeae6db (\"module: Cure the MODULE_LICENSE \"GPL\" vs. \"GPL v2\" bogosity\")\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bMODULE_LICENSE\s*\(\s*"GPL v2"\s*\)/MODULE_LICENSE("GPL")/; + } + } } # check for sysctl duplicate constants -- cgit From 481efd7bd6f28febddf189e7a970bb0bc5a53de8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 23 Mar 2022 16:05:59 -0700 Subject: checkpatch: add --fix option for some TRAILING_STATEMENTS Single line code like: if (foo) bar; should generally be written: if (foo) bar; Add a --fix test to do so. This fix is not done when an ASSIGN_IN_IF in the same line exists. Link: https://lkml.kernel.org/r/20220128185924.80137-2-joe@perches.com Signed-off-by: Joe Perches Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b7c181ea0ac5..046a018093a7 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5551,6 +5551,7 @@ sub process { defined($stat) && defined($cond) && $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { my ($s, $c) = ($stat, $cond); + my $fixed_assign_in_if = 0; if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) { if (ERROR("ASSIGN_IN_IF", @@ -5575,6 +5576,7 @@ sub process { $newline .= ')'; $newline .= " {" if (defined($brace)); fix_insert_line($fixlinenr + 1, $newline); + $fixed_assign_in_if = 1; } } } @@ -5598,8 +5600,20 @@ sub process { $stat_real = "[...]\n$stat_real"; } - ERROR("TRAILING_STATEMENTS", - "trailing statements should be on next line\n" . $herecurr . $stat_real); + if (ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr . $stat_real) && + !$fixed_assign_in_if && + $cond_lines == 0 && + $fix && $perl_version_ok && + $fixed[$fixlinenr] =~ /^\+(\s*)((?:if|while|for)\s*$balanced_parens)\s*(.*)$/) { + my $indent = $1; + my $test = $2; + my $rest = rtrim($4); + if ($rest =~ /;$/) { + $fixed[$fixlinenr] = "\+$indent$test"; + fix_insert_line($fixlinenr + 1, "$indent\t$rest"); + } + } } } -- cgit From 05dc40e694e0ff527011d0b09542f08da60e28cc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 23 Mar 2022 16:06:02 -0700 Subject: checkpatch: add early_param exception to blank line after struct/function test Add early_param as another exception to the blank line preferred after function/struct/union declaration or definition test. Link: https://lkml.kernel.org/r/3bd6ada59f411a7685d7e64eeb670540d4bfdcde.camel@perches.com Signed-off-by: Joe Perches Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 046a018093a7..2653177f52d9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3926,7 +3926,7 @@ sub process { if ($prevline =~ /^[\+ ]};?\s*$/ && $line =~ /^\+/ && !($line =~ /^\+\s*$/ || - $line =~ /^\+\s*EXPORT_SYMBOL/ || + $line =~ /^\+\s*(?:EXPORT_SYMBOL|early_param)/ || $line =~ /^\+\s*MODULE_/i || $line =~ /^\+\s*\#\s*(?:end|elif|else)/ || $line =~ /^\+[a-z_]*init/ || -- cgit From c882c6b1cb3105d378768aa168d2283f50b6e304 Mon Sep 17 00:00:00 2001 From: Sagar Patel Date: Wed, 23 Mar 2022 16:06:05 -0700 Subject: checkpatch: use python3 to find codespell dictionary Commit 0ee3e7b8893e ("checkpatch: get default codespell dictionary path from package location") introduced the ability to search for the codespell dictionary rather than hardcoding its path. codespell requires Python 3.6 or above, but on some systems, the python executable is a Python 2.7 interpreter. In this case, searching for the dictionary fails, subsequently making codespell fail: No codespell typos will be found - file '/usr/share/codespell/dictionary.txt': No such file or directory So, use python3 to remove ambiguity. In addition, when searching for dictionary.txt, do not check if the codespell executable exists since, - checkpatch.pl only uses dictionary.txt, not the codespell executable. - codespell can be installed via a Python package manager, in which case the codespell executable may not be present in a typical $PATH, but a dictionary does exist. Link: https://lkml.kernel.org/r/20220309180048.147672-1-sagarmp@cs.unc.edu Signed-off-by: Sagar Patel Reviewed-by: Peter Ujfalusi Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2653177f52d9..577e02998701 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -334,7 +334,7 @@ if ($user_codespellfile) { } elsif (!(-f $codespellfile)) { # If /usr/share/codespell/dictionary.txt is not present, try to find it # under codespell's install directory: /data/dictionary.txt - if (($codespell || $help) && which("codespell") ne "" && which("python") ne "") { + if (($codespell || $help) && which("python3") ne "") { my $python_codespell_dict = << "EOF"; import os.path as op @@ -344,7 +344,7 @@ codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt') print(codespell_file, end='') EOF - my $codespell_dict = `python -c "$python_codespell_dict" 2> /dev/null`; + my $codespell_dict = `python3 -c "$python_codespell_dict" 2> /dev/null`; $codespellfile = $codespell_dict if (-f $codespell_dict); } } -- cgit From 105e8c2e47788a612bbc0fc135a3c75ef25f29e4 Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Wed, 23 Mar 2022 16:06:08 -0700 Subject: init: use ktime_us_delta() to make initcall_debug log more precise Use ktime_us_delta() to make the initcall_debug log more precise than right shifting the result of ktime_to_ns() by 10 bits. Link: https://lkml.kernel.org/r/20220209053350.15771-1-mark-pk.tsai@mediatek.com Signed-off-by: Mark-PK Tsai Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney Cc: Steven Rostedt Cc: Matthias Brugger Cc: Masami Hiramatsu Cc: Vlastimil Babka Cc: Kefeng Wang Cc: Rasmus Villemoes Cc: Kees Cook Cc: Valentin Schneider Cc: Peter Zijlstra Cc: YJ Chiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/init/main.c b/init/main.c index 65fa2e41a9c0..c8edcc3029b1 100644 --- a/init/main.c +++ b/init/main.c @@ -1246,15 +1246,11 @@ trace_initcall_start_cb(void *data, initcall_t fn) static __init_or_module void trace_initcall_finish_cb(void *data, initcall_t fn, int ret) { - ktime_t *calltime = (ktime_t *)data; - ktime_t delta, rettime; - unsigned long long duration; + ktime_t rettime, *calltime = (ktime_t *)data; rettime = ktime_get(); - delta = ktime_sub(rettime, *calltime); - duration = (unsigned long long) ktime_to_ns(delta) >> 10; printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", - fn, ret, duration); + fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); } static ktime_t initcall_calltime; -- cgit From abc7da58c4b388ab9f6a756c0f297c29d3af665f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Mar 2022 16:06:11 -0700 Subject: init.h: improve __setup and early_param documentation Igor noted in [1] that there are quite a few __setup() handling functions that return incorrect values. Doing this can be harmless, but it can also cause strings to be added to init's argument or environment list, polluting them. Since __setup() handling and return values are not documented, first add documentation for that. Also add more documentation for early_param() handling and return values. For __setup() functions, returning 0 (not handled) has questionable value if it is just a malformed option value, as in rodata=junk since returning 0 would just cause "rodata=junk" to be added to init's environment unnecessarily: Run /sbin/init as init process with arguments: /sbin/init with environment: HOME=/ TERM=linux splash=native rodata=junk Also, there are no recommendations on whether to print a warning when an unknown parameter value is seen. I am not addressing that here. [1] lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Link: https://lkml.kernel.org/r/20220221050852.1147-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Cc: Ingo Molnar Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index d82b4b2e1d25..baf0b29a7010 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -320,12 +320,19 @@ struct obs_kernel_param { __aligned(__alignof__(struct obs_kernel_param)) \ = { __setup_str_##unique_id, fn, early } +/* + * NOTE: __setup functions return values: + * @fn returns 1 (or non-zero) if the option argument is "handled" + * and returns 0 if the option argument is "not handled". + */ #define __setup(str, fn) \ __setup_param(str, fn, fn, 0) /* - * NOTE: fn is as per module_param, not __setup! - * Emits warning if fn returns non-zero. + * NOTE: @fn is as per module_param, not __setup! + * I.e., @fn returns 0 for no error or non-zero for error + * (possibly @fn returns a -errno value, but it does not matter). + * Emits warning if @fn returns non-zero. */ #define early_param(str, fn) \ __setup_param(str, fn, fn, 1) -- cgit From f9a40b0890658330c83c95511f9d6b396610defc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Mar 2022 16:06:14 -0700 Subject: init/main.c: return 1 from handled __setup() functions initcall_blacklist() should return 1 to indicate that it handled its cmdline arguments. set_debug_rodata() should return 1 to indicate that it handled its cmdline arguments. Print a warning if the option string is invalid. This prevents these strings from being added to the 'init' program's environment as they are not init arguments/parameters. Link: https://lkml.kernel.org/r/20220221050901.23985-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Cc: Ingo Molnar Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index c8edcc3029b1..4a12c9546f69 100644 --- a/init/main.c +++ b/init/main.c @@ -1190,7 +1190,7 @@ static int __init initcall_blacklist(char *str) } } while (str_entry); - return 0; + return 1; } static bool __init_or_module initcall_blacklisted(initcall_t fn) @@ -1448,7 +1448,9 @@ static noinline void __init kernel_init_freeable(void); bool rodata_enabled __ro_after_init = true; static int __init set_debug_rodata(char *str) { - return strtobool(str, &rodata_enabled); + if (strtobool(str, &rodata_enabled)) + pr_warn("Invalid option string for rodata: '%s'\n", str); + return 1; } __setup("rodata=", set_debug_rodata); #endif -- cgit From 5a519c8fe4d620912385f94372fc8472fa98c662 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 23 Mar 2022 16:06:17 -0700 Subject: fs/pipe: use kvcalloc to allocate a pipe_buffer array Right now, kcalloc is used to allocate a pipe_buffer array. The size of the pipe_buffer struct is 40 bytes. kcalloc allows allocating reliably chunks with sizes less or equal to PAGE_ALLOC_COSTLY_ORDER (3). It means that the maximum pipe size is 3.2MB in this case. In CRIU, we use pipes to dump processes memory. CRIU freezes a target process, injects a parasite code into it and then this code splices memory into pipes. If a maximum pipe size is small, we need to do many iterations or create many pipes. kvcalloc attempt to allocate physically contiguous memory, but upon failure, fall back to non-contiguous (vmalloc) allocation and so it isn't limited by PAGE_ALLOC_COSTLY_ORDER. The maximum pipe size for non-root users is limited by the /proc/sys/fs/pipe-max-size sysctl that is 1MB by default, so only the root user will be able to trigger vmalloc allocations. Link: https://lkml.kernel.org/r/20220104171058.22580-1-avagin@gmail.com Signed-off-by: Andrei Vagin Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 2667db9506e2..ba0841b6674b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -804,7 +804,7 @@ struct pipe_inode_info *alloc_pipe_info(void) if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) goto out_revert_acct; - pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), + pipe->bufs = kvcalloc(pipe_bufs, sizeof(struct pipe_buffer), GFP_KERNEL_ACCOUNT); if (pipe->bufs) { @@ -849,7 +849,7 @@ void free_pipe_info(struct pipe_inode_info *pipe) #endif if (pipe->tmp_page) __free_page(pipe->tmp_page); - kfree(pipe->bufs); + kvfree(pipe->bufs); kfree(pipe); } @@ -1264,8 +1264,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) if (nr_slots < n) return -EBUSY; - bufs = kcalloc(nr_slots, sizeof(*bufs), - GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + bufs = kvcalloc(nr_slots, sizeof(*bufs), GFP_KERNEL_ACCOUNT); if (unlikely(!bufs)) return -ENOMEM; @@ -1292,7 +1291,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) head = n; tail = 0; - kfree(pipe->bufs); + kvfree(pipe->bufs); pipe->bufs = bufs; pipe->ring_size = nr_slots; if (pipe->max_usage > nr_slots) -- cgit From aeb213cddeb5c31f8d418180e0b9956bfd53b018 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 23 Mar 2022 16:06:20 -0700 Subject: fs/pipe.c: local vars have to match types of proper pipe_inode_info fields head, tail, ring_size are declared as unsigned int, so all local variables that operate with these fields have to be unsigned to avoid signed integer overflow. Right now, it isn't an issue because the maximum pipe size is limited by 1U<<31. Link: https://lkml.kernel.org/r/20220106171946.36128-1-avagin@gmail.com Signed-off-by: Andrei Vagin Suggested-by: Dmitry Safonov <0x7f454c46@gmail.com> Acked-by: Christian Brauner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index ba0841b6674b..9648ac15164a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -607,7 +607,7 @@ out: static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe = filp->private_data; - int count, head, tail, mask; + unsigned int count, head, tail, mask; switch (cmd) { case FIONREAD: @@ -829,7 +829,7 @@ out_free_uid: void free_pipe_info(struct pipe_inode_info *pipe) { - int i; + unsigned int i; #ifdef CONFIG_WATCH_QUEUE if (pipe->watch_queue) -- cgit From 9ce3c0d26c42d279b6c378a03cd6a61d828f19ca Mon Sep 17 00:00:00 2001 From: Qinghua Jin Date: Wed, 23 Mar 2022 16:06:23 -0700 Subject: minix: fix bug when opening a file with O_DIRECT Testcase: 1. create a minix file system and mount it 2. open a file on the file system with O_RDWR|O_CREAT|O_TRUNC|O_DIRECT 3. open fails with -EINVAL but leaves an empty file behind. All other open() failures don't leave the failed open files behind. It is hard to check the direct_IO op before creating the inode. Just as ext4 and btrfs do, this patch will resolve the issue by allowing to create the file with O_DIRECT but returning error when writing the file. Link: https://lkml.kernel.org/r/20220107133626.413379-1-qhjin.dev@gmail.com Signed-off-by: Qinghua Jin Reported-by: Colin Ian King Reviewed-by: Jan Kara Acked-by: Christian Brauner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/minix/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index a71f1cf894b9..d4bd94234ef7 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -447,7 +447,8 @@ static const struct address_space_operations minix_aops = { .writepage = minix_writepage, .write_begin = minix_write_begin, .write_end = generic_write_end, - .bmap = minix_bmap + .bmap = minix_bmap, + .direct_IO = noop_direct_IO }; static const struct inode_operations minix_symlink_inode_operations = { -- cgit From 2cd50532ce5c798c029a39234ad7f48a174e15ec Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 23 Mar 2022 16:06:26 -0700 Subject: fat: use pointer to simple type in put_user() The put_user(val,ptr) macro wants a pointer to a simple type, but in fat_ioctl_filldir() the d_name field references an "array of chars". Be more accurate and explicitly give the pointer to the first character of the d_name[] array. I noticed that issue while trying to optimize the parisc put_user() macro and used an intermediate variable to store the pointer. In that case I got this error: In file included from include/linux/uaccess.h:11, from include/linux/compat.h:17, from fs/fat/dir.c:18: fs/fat/dir.c: In function `fat_ioctl_filldir': fs/fat/dir.c:725:33: error: invalid initializer 725 | if (put_user(0, d2->d_name) || \ | ^~ include/asm/uaccess.h:152:33: note: in definition of macro `__put_user' 152 | __typeof__(ptr) __ptr = ptr; \ | ^~~ fs/fat/dir.c:759:1: note: in expansion of macro `FAT_IOCTL_FILLDIR_FUNC' 759 | FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent) Andreas Schwab suggested to use __typeof__(&*(ptr)) __ptr = ptr; instead. This works, but nevertheless it's probably reasonable to fix the original caller too. Link: https://lkml.kernel.org/r/Ygo+A9MREmC1H3kr@p100 Signed-off-by: Helge Deller Acked-by: OGAWA Hirofumi Cc: David Laight Cc: Andreas Schwab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index c4a274285858..249825017da7 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -722,7 +722,7 @@ static int func(struct dir_context *ctx, const char *name, int name_len, \ if (name_len >= sizeof(d1->d_name)) \ name_len = sizeof(d1->d_name) - 1; \ \ - if (put_user(0, d2->d_name) || \ + if (put_user(0, &d2->d_name[0]) || \ put_user(0, &d2->d_reclen) || \ copy_to_user(d1->d_name, name, name_len) || \ put_user(0, d1->d_name + name_len) || \ -- cgit From b1e2c8df0f007f34c373925471c7b493c9e16620 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 23 Mar 2022 16:06:29 -0700 Subject: cgroup: use irqsave in cgroup_rstat_flush_locked(). All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock either with spin_lock_irq() or spin_lock_irqsave(). cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated() in IRQ context and therefore requires _irqsave() locking suffix in cgroup_rstat_flush_locked(). Since there is no difference between spin_lock_t and raw_spin_lock_t on !RT lockdep does not complain here. On RT lockdep complains because the interrupts were not disabled here and a deadlock is possible. Acquire the raw_spin_lock_t with disabled interrupts. Link: https://lkml.kernel.org/r/20220301122143.1521823-2-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tejun Heo Cc: Johannes Weiner Cc: Thomas Gleixner Cc: Zefan Li From: Sebastian Andrzej Siewior Subject: cgroup: add a comment to cgroup_rstat_flush_locked(). Add a comment why spin_lock_irq() -> raw_spin_lock_irqsave() is needed. Link: https://lkml.kernel.org/r/Yh+DOK73hfVV5ThX@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tejun Heo Cc: Johannes Weiner Cc: Thomas Gleixner Cc: Zefan Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup/rstat.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 9d331ba44870..ba7a660184e4 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -153,8 +153,17 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); struct cgroup *pos = NULL; + unsigned long flags; - raw_spin_lock(cpu_lock); + /* + * The _irqsave() is needed because cgroup_rstat_lock is + * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring + * this lock with the _irq() suffix only disables interrupts on + * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables + * interrupts on both configurations. The _irqsave() ensures + * that interrupts are always disabled and later restored. + */ + raw_spin_lock_irqsave(cpu_lock, flags); while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { struct cgroup_subsys_state *css; @@ -166,7 +175,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) css->ss->css_rstat_flush(css, cpu); rcu_read_unlock(); } - raw_spin_unlock(cpu_lock); + raw_spin_unlock_irqrestore(cpu_lock, flags); /* if @may_sleep, play nice and yield if necessary */ if (may_sleep && (need_resched() || -- cgit From f05fa10901aa4b11a84257ecb7ac7f8fe7c4ee1b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 23 Mar 2022 16:06:33 -0700 Subject: kexec: make crashk_res, crashk_low_res and crash_notes symbols always visible Patch series "kexec: use IS_ENABLED(CONFIG_KEXEC_CORE) instead of #ifdef", v2. Replace the conditional compilation using "#ifdef CONFIG_KEXEC_CORE" by a check for "IS_ENABLED(CONFIG_KEXEC_CORE)", to simplify the code and increase compile coverage. I only modified x86, arm, arm64 and riscv, other architectures such as sh, powerpc and s390 are better to be kept kexec code as-is so they are not touched. This patch (of 5): Make the forward declarations of crashk_res, crashk_low_res and crash_notes always visible. Code referring to these symbols can then just check for IS_ENABLED(CONFIG_KEXEC_CORE), instead of requiring conditional compilation using an #ifdef, thus preparing to increase compile coverage and simplify the code. Link: https://lkml.kernel.org/r/20211206160514.2000-1-jszhang@kernel.org Link: https://lkml.kernel.org/r/20211206160514.2000-2-jszhang@kernel.org Signed-off-by: Jisheng Zhang Acked-by: Baoquan He Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Eric W. Biederman Cc: Alexandre Ghiti Cc: Palmer Dabbelt Cc: Russell King (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0c994ae37729..58d1b58a971e 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -20,6 +20,12 @@ #include +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; +extern note_buf_t __percpu *crash_notes; + #ifdef CONFIG_KEXEC_CORE #include #include @@ -350,12 +356,6 @@ extern int kexec_load_disabled; #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; -extern note_buf_t __percpu *crash_notes; - /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -- cgit From d414cb379ac35e3523484e1d866b27832c2218c7 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 23 Mar 2022 16:06:36 -0700 Subject: riscv: mm: init: use IS_ENABLED(CONFIG_KEXEC_CORE) instead of #ifdef Replace the conditional compilation using "#ifdef CONFIG_KEXEC_CORE" by a check for "IS_ENABLED(CONFIG_KEXEC_CORE)", to simplify the code and increase compile coverage. Link: https://lkml.kernel.org/r/20211206160514.2000-3-jszhang@kernel.org Signed-off-by: Jisheng Zhang Acked-by: Palmer Dabbelt Acked-by: Baoquan He Cc: Albert Ou Cc: Alexandre Ghiti Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: Eric W. Biederman Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Russell King Cc: Russell King (Oracle) Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/riscv/mm/init.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0d588032d6e6..f6275b317129 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -957,7 +957,6 @@ static inline void setup_vm_final(void) } #endif /* CONFIG_MMU */ -#ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -974,6 +973,8 @@ static void __init reserve_crashkernel(void) int ret = 0; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; /* * Don't reserve a region for a crash kernel on a crash kernel * since it doesn't make much sense and we have limited memory @@ -1023,7 +1024,6 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -#endif /* CONFIG_KEXEC_CORE */ void __init paging_init(void) { @@ -1037,9 +1037,7 @@ void __init misc_mem_init(void) arch_numa_init(); sparse_init(); zone_sizes_init(); -#ifdef CONFIG_KEXEC_CORE reserve_crashkernel(); -#endif memblock_dump_all(); } -- cgit From 4ece09be9913a87ff99ea347fd7e7adad5bdbc8f Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 23 Mar 2022 16:06:39 -0700 Subject: x86/setup: use IS_ENABLED(CONFIG_KEXEC_CORE) instead of #ifdef Replace the conditional compilation using "#ifdef CONFIG_KEXEC_CORE" by a check for "IS_ENABLED(CONFIG_KEXEC_CORE)", to simplify the code and increase compile coverage. Link: https://lkml.kernel.org/r/20211206160514.2000-4-jszhang@kernel.org Signed-off-by: Jisheng Zhang Acked-by: Baoquan He Cc: Albert Ou Cc: Alexandre Ghiti Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: Eric W. Biederman Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Palmer Dabbelt Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Russell King Cc: Russell King (Oracle) Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 90d7e1788c91..c95b9ac5a457 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -411,8 +411,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) * --------- Crashkernel reservation ------------------------------ */ -#ifdef CONFIG_KEXEC_CORE - /* 16M alignment for crash kernel regions */ #define CRASH_ALIGN SZ_16M @@ -490,6 +488,9 @@ static void __init reserve_crashkernel(void) bool high = false; int ret; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + total_mem = memblock_phys_mem_size(); /* crashkernel=XM */ @@ -555,11 +556,6 @@ static void __init reserve_crashkernel(void) crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); } -#else -static void __init reserve_crashkernel(void) -{ -} -#endif static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, -- cgit From d339f1584f0acf32b32326627fa3b48e6e65c599 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 23 Mar 2022 16:06:42 -0700 Subject: arm64: mm: use IS_ENABLED(CONFIG_KEXEC_CORE) instead of #ifdef Replace the conditional compilation using "#ifdef CONFIG_KEXEC_CORE" by a check for "IS_ENABLED(CONFIG_KEXEC_CORE)", to simplify the code and increase compile coverage. Link: https://lkml.kernel.org/r/20211206160514.2000-5-jszhang@kernel.org Signed-off-by: Jisheng Zhang Acked-by: Catalin Marinas Acked-by: Baoquan He Cc: Albert Ou Cc: Alexandre Ghiti Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric W. Biederman Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Palmer Dabbelt Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Russell King Cc: Russell King (Oracle) Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/init.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771..3973e305adc8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -64,7 +64,6 @@ EXPORT_SYMBOL(memstart_addr); */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -#ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -78,6 +77,9 @@ static void __init reserve_crashkernel(void) unsigned long long crash_max = arm64_dma_phys_limit; int ret; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); /* no crashkernel= or invalid value specified */ @@ -110,11 +112,6 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -#else -static void __init reserve_crashkernel(void) -{ -} -#endif /* CONFIG_KEXEC_CORE */ /* * Return the maximum physical address for a zone accessible by the given bits -- cgit From b2377d4b94519dd84364a82ba14c79c50ac34fa9 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Mar 2022 16:06:45 -0700 Subject: docs: kdump: update description about sysfs file system support Patch series "Update doc and fix some issues about kdump", v2. This patch (of 5): After commit 6a108a14fa35 ("kconfig: rename CONFIG_EMBEDDED to CONFIG_EXPERT"), "Configure standard kernel features (for small systems)" is not exist, we should use "Configure standard kernel features (expert users)" now. Link: https://lkml.kernel.org/r/1644324666-15947-1-git-send-email-yangtiezhu@loongson.cn Link: https://lkml.kernel.org/r/1644324666-15947-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Acked-by: Baoquan He Cc: Baoquan He Cc: Jonathan Corbet Cc: Marco Elver Cc: Andrey Ryabinin Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kdump/kdump.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index cb30ca3df27c..d187df2f76ae 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -146,9 +146,9 @@ System kernel config options CONFIG_SYSFS=y Note that "sysfs file system support" might not appear in the "Pseudo - filesystems" menu if "Configure standard kernel features (for small - systems)" is not enabled in "General Setup." In this case, check the - .config file itself to ensure that sysfs is turned on, as follows:: + filesystems" menu if "Configure standard kernel features (expert users)" + is not enabled in "General Setup." In this case, check the .config file + itself to ensure that sysfs is turned on, as follows:: grep 'CONFIG_SYSFS' .config -- cgit From ae6694c1aa8f97c20b448622e4d6ba9adeca082d Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Mar 2022 16:06:48 -0700 Subject: docs: kdump: add scp example to write out the dump file Except cp and makedumpfile, add scp example to write out the dump file. Link: https://lkml.kernel.org/r/1644324666-15947-3-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Acked-by: Baoquan He Cc: Andrey Ryabinin Cc: Jonathan Corbet Cc: Marco Elver Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kdump/kdump.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index d187df2f76ae..a748e7eb4429 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -533,6 +533,10 @@ the following command:: cp /proc/vmcore +or use scp to write out the dump file between hosts on a network, e.g:: + + scp /proc/vmcore remote_username@remote_ip: + You can also use makedumpfile utility to write out the dump file with specified options to filter out unwanted contents, e.g:: -- cgit From 1a2383e8b84c0451fd9b1eec3b9aab16f30b597c Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Mar 2022 16:06:51 -0700 Subject: panic: unset panic_on_warn inside panic() In the current code, the following three places need to unset panic_on_warn before calling panic() to avoid recursive panics: kernel/kcsan/report.c: print_report() kernel/sched/core.c: __schedule_bug() mm/kfence/report.c: kfence_report_error() In order to avoid copy-pasting "panic_on_warn = 0" all over the places, it is better to move it inside panic() and then remove it from the other places. Link: https://lkml.kernel.org/r/1644324666-15947-4-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Baoquan He Cc: Jonathan Corbet Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 55b50e052ec3..95ba825522dd 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -185,6 +185,16 @@ void panic(const char *fmt, ...) int old_cpu, this_cpu; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + if (panic_on_warn) { + /* + * This thread may hit another WARN() in the panic path. + * Resetting this prevents additional WARN() from panicking the + * system on this thread. Other threads are blocked by the + * panic_mutex in panic(). + */ + panic_on_warn = 0; + } + /* * Disable local interrupts. This will prevent panic_smp_self_stop * from deadlocking the first cpu that invokes the panic, since @@ -576,16 +586,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, if (regs) show_regs(regs); - if (panic_on_warn) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from panicking the - * system on this thread. Other threads are blocked by the - * panic_mutex in panic(). - */ - panic_on_warn = 0; + if (panic_on_warn) panic("panic_on_warn set ...\n"); - } if (!regs) dump_stack(); -- cgit From d83ce027a54068fabb70d2c252e1ce2da86784a4 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Mar 2022 16:06:54 -0700 Subject: ubsan: no need to unset panic_on_warn in ubsan_epilogue() panic_on_warn is unset inside panic(), so no need to unset it before calling panic() in ubsan_epilogue(). Link: https://lkml.kernel.org/r/1644324666-15947-5-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Baoquan He Cc: Jonathan Corbet Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index bdc380ff5d5c..36bd75e33426 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -154,16 +154,8 @@ static void ubsan_epilogue(void) current->in_ubsan--; - if (panic_on_warn) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from panicking the - * system on this thread. Other threads are blocked by the - * panic_mutex in panic(). - */ - panic_on_warn = 0; + if (panic_on_warn) panic("panic_on_warn set ...\n"); - } } void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs) -- cgit From e7ce7500375a63348e1d3a703c8d5003cbe3fea6 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Mar 2022 16:06:57 -0700 Subject: kasan: no need to unset panic_on_warn in end_report() panic_on_warn is unset inside panic(), so no need to unset it before calling panic() in end_report(). Link: https://lkml.kernel.org/r/1644324666-15947-6-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Baoquan He Cc: Jonathan Corbet Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/report.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 3ad9624dcc56..f14146563d41 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -117,16 +117,8 @@ static void end_report(unsigned long *flags, unsigned long addr) pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, *flags); - if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from panicking the - * system on this thread. Other threads are blocked by the - * panic_mutex in panic(). - */ - panic_on_warn = 0; + if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) panic("panic_on_warn set ...\n"); - } if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC) panic("kasan.fault=panic set ...\n"); kasan_enable_current(); -- cgit From 92333baaceb399c7878c7f868a0746d7d22f9b77 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 23 Mar 2022 16:07:00 -0700 Subject: taskstats: remove unneeded dead assignment make clang-analyzer on x86_64 defconfig caught my attention with: kernel/taskstats.c:120:2: warning: Value stored to 'rc' is never read \ [clang-analyzer-deadcode.DeadStores] rc = 0; ^ Commit d94a041519f3 ("taskstats: free skb, avoid returns in send_cpu_listeners") made send_cpu_listeners() not return a value and hence, the rc variable remained only to be used within the loop where it is always assigned before read and it does not need any other initialisation. So, simply remove this unneeded dead initializing assignment. As compilers will detect this unneeded assignment and optimize this anyway, the resulting object code is identical before and after this change. No functional change. No change to object code. [akpm@linux-foundation.org: reduce scope of `rc'] Link: https://lkml.kernel.org/r/20220307093942.21310-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Reviewed-by: Nick Desaulniers Cc: Balbir Singh Cc: Tom Rix Cc: Nathan Chancellor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 2b4898b4752e..bcac5a9043aa 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -113,13 +113,14 @@ static void send_cpu_listeners(struct sk_buff *skb, struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); - int rc, delcount = 0; + int delcount = 0; genlmsg_end(skb, reply); - rc = 0; down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { + int rc; + skb_next = NULL; if (!list_is_last(&s->list, &listeners->list)) { skb_next = skb_clone(skb_cur, GFP_KERNEL); -- cgit From a1ff1de00db21ecb956213f046b79741b64c6b65 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 23 Mar 2022 16:07:03 -0700 Subject: docs: sysctl/kernel: add missing bit to panic_print Patch series "Some improvements on panic_print". This is a mix of a documentation fix with some additions to the "panic_print" syscall / parameter. The goal here is being able to collect all CPUs backtraces during a panic event and also to enable "panic_print" in a kdump event - details of the reasoning and design choices in the patches. This patch (of 3): Commit de6da1e8bcf0 ("panic: add an option to replay all the printk message in buffer") added a new bit to the sysctl/kernel parameter "panic_print", but the documentation was added only in kernel-parameters.txt, not in the sysctl guide. Fix it here by adding bit 5 to sysctl admin-guide documentation. [rdunlap@infradead.org: fix table format warning] Link: https://lkml.kernel.org/r/20220109055635.6999-1-rdunlap@infradead.org Link: https://lkml.kernel.org/r/20211109202848.610874-1-gpiccoli@igalia.com Link: https://lkml.kernel.org/r/20211109202848.610874-2-gpiccoli@igalia.com Fixes: de6da1e8bcf0 ("panic: add an option to replay all the printk message in buffer") Signed-off-by: Guilherme G. Piccoli Reviewed-by: Feng Tang Cc: Luis Chamberlain Cc: Kees Cook Cc: Iurii Zaikin Cc: Samuel Iglesias Gonsalvez Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/sysctl/kernel.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d359bcfadd39..0f86e9f93129 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -795,6 +795,7 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer +bit 5 print all printk messages in buffer ===== ============================================ So for example to print tasks and memory info on panic, user can:: -- cgit From 8d470a45d1a65e6a308aeee5da7f5b37d3303c04 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 23 Mar 2022 16:07:06 -0700 Subject: panic: add option to dump all CPUs backtraces in panic_print Currently the "panic_print" parameter/sysctl allows some interesting debug information to be printed during a panic event. This is useful for example in cases the user cannot kdump due to resource limits, or if the user collects panic logs in a serial output (or pstore) and prefers a fast reboot instead of a kdump. Happens that currently there's no way to see all CPUs backtraces in a panic using "panic_print" on architectures that support that. We do have "oops_all_cpu_backtrace" sysctl, but although partially overlapping in the functionality, they are orthogonal in nature: "panic_print" is a panic tuning (and we have panics without oopses, like direct calls to panic() or maybe other paths that don't go through oops_enter() function), and the original purpose of "oops_all_cpu_backtrace" is to provide more information on oopses for cases in which the users desire to continue running the kernel even after an oops, i.e., used in non-panic scenarios. So, we hereby introduce an additional bit for "panic_print" to allow dumping the CPUs backtraces during a panic event. Link: https://lkml.kernel.org/r/20211109202848.610874-3-gpiccoli@igalia.com Signed-off-by: Guilherme G. Piccoli Reviewed-by: Feng Tang Cc: Iurii Zaikin Cc: Kees Cook Cc: Luis Chamberlain Cc: Samuel Iglesias Gonsalvez Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 1 + Documentation/admin-guide/sysctl/kernel.rst | 1 + kernel/panic.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7123524a86b8..6287f6be4a98 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3726,6 +3726,7 @@ bit 3: print locks info if CONFIG_LOCKDEP is on bit 4: print ftrace buffer bit 5: print all printk messages in buffer + bit 6: print all CPUs backtrace (if available in the arch) panic_on_taint= Bitmask for conditionally calling panic() in add_taint() Format: [,nousertaint] diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 0f86e9f93129..cfd6c20ce42e 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -796,6 +796,7 @@ bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer bit 5 print all printk messages in buffer +bit 6 print all CPUs backtrace (if available in the arch) ===== ============================================ So for example to print tasks and memory info on panic, user can:: diff --git a/kernel/panic.c b/kernel/panic.c index 95ba825522dd..3c3fb36d8d41 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -66,6 +66,7 @@ EXPORT_SYMBOL_GPL(panic_timeout); #define PANIC_PRINT_LOCK_INFO 0x00000008 #define PANIC_PRINT_FTRACE_INFO 0x00000010 #define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020 +#define PANIC_PRINT_ALL_CPU_BT 0x00000040 unsigned long panic_print; ATOMIC_NOTIFIER_HEAD(panic_notifier_list); @@ -152,6 +153,9 @@ static void panic_print_sys_info(void) if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) console_flush_on_panic(CONSOLE_REPLAY_ALL); + if (panic_print & PANIC_PRINT_ALL_CPU_BT) + trigger_all_cpu_backtrace(); + if (panic_print & PANIC_PRINT_TASK_INFO) show_state(); -- cgit From f953f140f318a641c443b0b8c618155ed90a7a10 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 23 Mar 2022 16:07:09 -0700 Subject: panic: move panic_print before kmsg dumpers The panic_print setting allows users to collect more information in a panic event, like memory stats, tasks, CPUs backtraces, etc. This is an interesting debug mechanism, but currently the print event happens *after* kmsg_dump(), meaning that pstore, for example, cannot collect a dmesg with the panic_print extra information. This patch changes that in 2 steps: (a) The panic_print setting allows to replay the existing kernel log buffer to the console (bit 5), besides the extra information dump. This functionality makes sense only at the end of the panic() function. So, we hereby allow to distinguish the two situations by a new boolean parameter in the function panic_print_sys_info(). (b) With the above change, we can safely call panic_print_sys_info() before kmsg_dump(), allowing to dump the extra information when using pstore or other kmsg dumpers. The additional messages from panic_print could overwrite the oldest messages when the buffer is full. The only reasonable solution is to use a large enough log buffer, hence we added an advice into the kernel parameters documentation about that. Link: https://lkml.kernel.org/r/20220214141308.841525-1-gpiccoli@igalia.com Signed-off-by: Guilherme G. Piccoli Acked-by: Baoquan He Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Cc: Feng Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ kernel/panic.c | 13 +++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6287f6be4a98..a18e30a95931 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3727,6 +3727,10 @@ bit 4: print ftrace buffer bit 5: print all printk messages in buffer bit 6: print all CPUs backtrace (if available in the arch) + *Be aware* that this option may print a _lot_ of lines, + so there are risks of losing older messages in the log. + Use this option carefully, maybe worth to setup a + bigger log buffer with "log_buf_len" along with this. panic_on_taint= Bitmask for conditionally calling panic() in add_taint() Format: [,nousertaint] diff --git a/kernel/panic.c b/kernel/panic.c index 3c3fb36d8d41..eb4dfb932c85 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -148,10 +148,13 @@ void nmi_panic(struct pt_regs *regs, const char *msg) } EXPORT_SYMBOL(nmi_panic); -static void panic_print_sys_info(void) +static void panic_print_sys_info(bool console_flush) { - if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) - console_flush_on_panic(CONSOLE_REPLAY_ALL); + if (console_flush) { + if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) + console_flush_on_panic(CONSOLE_REPLAY_ALL); + return; + } if (panic_print & PANIC_PRINT_ALL_CPU_BT) trigger_all_cpu_backtrace(); @@ -286,6 +289,8 @@ void panic(const char *fmt, ...) */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); + panic_print_sys_info(false); + kmsg_dump(KMSG_DUMP_PANIC); /* @@ -316,7 +321,7 @@ void panic(const char *fmt, ...) debug_locks_off(); console_flush_on_panic(CONSOLE_FLUSH_PENDING); - panic_print_sys_info(); + panic_print_sys_info(true); if (!panic_blink) panic_blink = no_blink; -- cgit From 17581aa13680e1c38759cbb0ed32d04aa8849bea Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 23 Mar 2022 16:07:12 -0700 Subject: kcov: split ioctl handling into locked and unlocked parts Patch series "kcov: improve mmap processing", v3. Subsequent mmaps of the same kcov descriptor currently do not update the virtual memory of the task and yet return 0 (success). This is counter-intuitive and may lead to unexpected memory access errors. Also, this unnecessarily limits the functionality of kcov to only the simplest usage scenarios. Kcov instances are effectively forever attached to their first address spaces and it becomes impossible to e.g. reuse the same kcov handle in forked child processes without mmapping the memory first. This is exactly what we tried to do in syzkaller and inadvertently came upon this behavior. This patch series addresses the problem described above. This patch (of 3): Currently all ioctls are de facto processed under a spinlock in order to serialise them. This, however, prohibits the use of vmalloc and other memory management functions in the implementations of those ioctls, unnecessary complicating any further changes to the code. Let all ioctls first be processed inside the kcov_ioctl() function which should execute the ones that are not compatible with spinlock and then pass control to kcov_ioctl_locked() for all other ones. KCOV_REMOTE_ENABLE is processed both in kcov_ioctl() and kcov_ioctl_locked() as the steps are easily separable. Although it is still compatible with a spinlock, move KCOV_INIT_TRACE handling to kcov_ioctl(), so that the changes from the next commit are easier to follow. Link: https://lkml.kernel.org/r/20220117153634.150357-1-nogikh@google.com Link: https://lkml.kernel.org/r/20220117153634.150357-2-nogikh@google.com Signed-off-by: Aleksandr Nogikh Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Konovalov Cc: Marco Elver Cc: Alexander Potapenko Cc: Taras Madan Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcov.c | 68 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 36ca640c4f8e..e1be7301500b 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -564,31 +564,12 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; - unsigned long size, unused; + unsigned long flags, unused; int mode, i; struct kcov_remote_arg *remote_arg; struct kcov_remote *remote; - unsigned long flags; switch (cmd) { - case KCOV_INIT_TRACE: - /* - * Enable kcov in trace mode and setup buffer size. - * Must happen before anything else. - */ - if (kcov->mode != KCOV_MODE_DISABLED) - return -EBUSY; - /* - * Size must be at least 2 to hold current position and one PC. - * Later we allocate size * sizeof(unsigned long) memory, - * that must not overflow. - */ - size = arg; - if (size < 2 || size > INT_MAX / sizeof(unsigned long)) - return -EINVAL; - kcov->size = size; - kcov->mode = KCOV_MODE_INIT; - return 0; case KCOV_ENABLE: /* * Enable coverage for the current task. @@ -692,9 +673,32 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) struct kcov_remote_arg *remote_arg = NULL; unsigned int remote_num_handles; unsigned long remote_arg_size; - unsigned long flags; + unsigned long size, flags; - if (cmd == KCOV_REMOTE_ENABLE) { + kcov = filep->private_data; + switch (cmd) { + case KCOV_INIT_TRACE: + /* + * Enable kcov in trace mode and setup buffer size. + * Must happen before anything else. + * + * First check the size argument - it must be at least 2 + * to hold the current position and one PC. Later we allocate + * size * sizeof(unsigned long) memory, that must not overflow. + */ + size = arg; + if (size < 2 || size > INT_MAX / sizeof(unsigned long)) + return -EINVAL; + spin_lock_irqsave(&kcov->lock, flags); + if (kcov->mode != KCOV_MODE_DISABLED) { + spin_unlock_irqrestore(&kcov->lock, flags); + return -EBUSY; + } + kcov->size = size; + kcov->mode = KCOV_MODE_INIT; + spin_unlock_irqrestore(&kcov->lock, flags); + return 0; + case KCOV_REMOTE_ENABLE: if (get_user(remote_num_handles, (unsigned __user *)(arg + offsetof(struct kcov_remote_arg, num_handles)))) return -EFAULT; @@ -710,16 +714,18 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) return -EINVAL; } arg = (unsigned long)remote_arg; + fallthrough; + default: + /* + * All other commands can be normally executed under a spin lock, so we + * obtain and release it here in order to simplify kcov_ioctl_locked(). + */ + spin_lock_irqsave(&kcov->lock, flags); + res = kcov_ioctl_locked(kcov, cmd, arg); + spin_unlock_irqrestore(&kcov->lock, flags); + kfree(remote_arg); + return res; } - - kcov = filep->private_data; - spin_lock_irqsave(&kcov->lock, flags); - res = kcov_ioctl_locked(kcov, cmd, arg); - spin_unlock_irqrestore(&kcov->lock, flags); - - kfree(remote_arg); - - return res; } static const struct file_operations kcov_fops = { -- cgit From b3d7fe86fbd06638c71dd851ba921adf50d912ce Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 23 Mar 2022 16:07:15 -0700 Subject: kcov: properly handle subsequent mmap calls Allocate the kcov buffer during KCOV_MODE_INIT in order to untie mmapping of a kcov instance and the actual coverage collection process. Modify kcov_mmap, so that it can be reliably used any number of times once KCOV_MODE_INIT has succeeded. These changes to the user-facing interface of the tool only weaken the preconditions, so all existing user space code should remain compatible with the new version. Link: https://lkml.kernel.org/r/20220117153634.150357-3-nogikh@google.com Signed-off-by: Aleksandr Nogikh Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Marco Elver Cc: Sebastian Andrzej Siewior Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcov.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index e1be7301500b..475524bd900a 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -459,37 +459,28 @@ void kcov_task_exit(struct task_struct *t) static int kcov_mmap(struct file *filep, struct vm_area_struct *vma) { int res = 0; - void *area; struct kcov *kcov = vma->vm_file->private_data; unsigned long size, off; struct page *page; unsigned long flags; - area = vmalloc_user(vma->vm_end - vma->vm_start); - if (!area) - return -ENOMEM; - spin_lock_irqsave(&kcov->lock, flags); size = kcov->size * sizeof(unsigned long); - if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 || + if (kcov->area == NULL || vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != size) { res = -EINVAL; goto exit; } - if (!kcov->area) { - kcov->area = area; - vma->vm_flags |= VM_DONTEXPAND; - spin_unlock_irqrestore(&kcov->lock, flags); - for (off = 0; off < size; off += PAGE_SIZE) { - page = vmalloc_to_page(kcov->area + off); - if (vm_insert_page(vma, vma->vm_start + off, page)) - WARN_ONCE(1, "vm_insert_page() failed"); - } - return 0; + spin_unlock_irqrestore(&kcov->lock, flags); + vma->vm_flags |= VM_DONTEXPAND; + for (off = 0; off < size; off += PAGE_SIZE) { + page = vmalloc_to_page(kcov->area + off); + if (vm_insert_page(vma, vma->vm_start + off, page)) + WARN_ONCE(1, "vm_insert_page() failed"); } + return 0; exit: spin_unlock_irqrestore(&kcov->lock, flags); - vfree(area); return res; } @@ -674,6 +665,7 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) unsigned int remote_num_handles; unsigned long remote_arg_size; unsigned long size, flags; + void *area; kcov = filep->private_data; switch (cmd) { @@ -683,17 +675,21 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) * Must happen before anything else. * * First check the size argument - it must be at least 2 - * to hold the current position and one PC. Later we allocate - * size * sizeof(unsigned long) memory, that must not overflow. + * to hold the current position and one PC. */ size = arg; if (size < 2 || size > INT_MAX / sizeof(unsigned long)) return -EINVAL; + area = vmalloc_user(size * sizeof(unsigned long)); + if (area == NULL) + return -ENOMEM; spin_lock_irqsave(&kcov->lock, flags); if (kcov->mode != KCOV_MODE_DISABLED) { spin_unlock_irqrestore(&kcov->lock, flags); + vfree(area); return -EBUSY; } + kcov->area = area; kcov->size = size; kcov->mode = KCOV_MODE_INIT; spin_unlock_irqrestore(&kcov->lock, flags); -- cgit From 0cbcc92917c5de80f15c24d033566539ad696892 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 23 Mar 2022 16:07:18 -0700 Subject: kernel/resource: fix kfree() of bootmem memory again Since commit ebff7d8f270d ("mem hotunplug: fix kfree() of bootmem memory"), we could get a resource allocated during boot via alloc_resource(). And it's required to release the resource using free_resource(). Howerver, many people use kfree directly which will result in kernel BUG. In order to fix this without fixing every call site, just leak a couple of bytes in such corner case. Link: https://lkml.kernel.org/r/20220217083619.19305-1-linmiaohe@huawei.com Fixes: ebff7d8f270d ("mem hotunplug: fix kfree() of bootmem memory") Signed-off-by: Miaohe Lin Suggested-by: David Hildenbrand Cc: Dan Williams Cc: Alistair Popple Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 41 ++++++++--------------------------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 9c08d6e9eef2..34eaee179689 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -56,14 +56,6 @@ struct resource_constraint { static DEFINE_RWLOCK(resource_lock); -/* - * For memory hotplug, there is no way to free resource entries allocated - * by boot mem after the system is up. So for reusing the resource entry - * we need to remember the resource. - */ -static struct resource *bootmem_resource_free; -static DEFINE_SPINLOCK(bootmem_resource_lock); - static struct resource *next_resource(struct resource *p) { if (p->child) @@ -160,36 +152,19 @@ __initcall(ioresources_init); static void free_resource(struct resource *res) { - if (!res) - return; - - if (!PageSlab(virt_to_head_page(res))) { - spin_lock(&bootmem_resource_lock); - res->sibling = bootmem_resource_free; - bootmem_resource_free = res; - spin_unlock(&bootmem_resource_lock); - } else { + /** + * If the resource was allocated using memblock early during boot + * we'll leak it here: we can only return full pages back to the + * buddy and trying to be smart and reusing them eventually in + * alloc_resource() overcomplicates resource handling. + */ + if (res && PageSlab(virt_to_head_page(res))) kfree(res); - } } static struct resource *alloc_resource(gfp_t flags) { - struct resource *res = NULL; - - spin_lock(&bootmem_resource_lock); - if (bootmem_resource_free) { - res = bootmem_resource_free; - bootmem_resource_free = res->sibling; - } - spin_unlock(&bootmem_resource_lock); - - if (res) - memset(res, 0, sizeof(struct resource)); - else - res = kzalloc(sizeof(struct resource), flags); - - return res; + return kzalloc(sizeof(struct resource), flags); } /* Return the conflict entry if you can't request it */ -- cgit From b027471adaf955efde6153d67f391fe1604b7292 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 23 Mar 2022 16:07:21 -0700 Subject: Revert "ubsan, kcsan: Don't combine sanitizer with kcov on clang" This reverts commit ea91a1d45d19469001a4955583187b0d75915759. Since df05c0e9496c ("Documentation: Raise the minimum supported version of LLVM to 11.0.0") the minimum Clang version is now 11.0, which fixed the UBSAN/KCSAN vs. KCOV incompatibilities. Link: https://bugs.llvm.org/show_bug.cgi?id=45831 Link: https://lkml.kernel.org/r/YaodyZzu0MTCJcvO@elver.google.com Link: https://lkml.kernel.org/r/20220128105631.509772-1-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kcsan | 11 ----------- lib/Kconfig.ubsan | 12 ------------ 2 files changed, 23 deletions(-) diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 63b70b8c5551..de022445fbba 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -10,21 +10,10 @@ config HAVE_KCSAN_COMPILER For the list of compilers that support KCSAN, please see . -config KCSAN_KCOV_BROKEN - def_bool KCOV && CC_HAS_SANCOV_TRACE_PC - depends on CC_IS_CLANG - depends on !$(cc-option,-Werror=unused-command-line-argument -fsanitize=thread -fsanitize-coverage=trace-pc) - help - Some versions of clang support either KCSAN and KCOV but not the - combination of the two. - See https://bugs.llvm.org/show_bug.cgi?id=45831 for the status - in newer releases. - menuconfig KCSAN bool "KCSAN: dynamic data race detector" depends on HAVE_ARCH_KCSAN && HAVE_KCSAN_COMPILER depends on DEBUG_KERNEL && !KASAN - depends on !KCSAN_KCOV_BROKEN select STACKTRACE help The Kernel Concurrency Sanitizer (KCSAN) is a dynamic diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 236c5cefc4cc..f3c57ed51838 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -27,16 +27,6 @@ config UBSAN_TRAP the system. For some system builders this is an acceptable trade-off. -config UBSAN_KCOV_BROKEN - def_bool KCOV && CC_HAS_SANCOV_TRACE_PC - depends on CC_IS_CLANG - depends on !$(cc-option,-Werror=unused-command-line-argument -fsanitize=bounds -fsanitize-coverage=trace-pc) - help - Some versions of clang support either UBSAN or KCOV but not the - combination of the two. - See https://bugs.llvm.org/show_bug.cgi?id=45831 for the status - in newer releases. - config CC_HAS_UBSAN_BOUNDS def_bool $(cc-option,-fsanitize=bounds) @@ -46,7 +36,6 @@ config CC_HAS_UBSAN_ARRAY_BOUNDS config UBSAN_BOUNDS bool "Perform array index bounds checking" default UBSAN - depends on !UBSAN_KCOV_BROKEN depends on CC_HAS_UBSAN_ARRAY_BOUNDS || CC_HAS_UBSAN_BOUNDS help This option enables detection of directly indexed out of bounds @@ -72,7 +61,6 @@ config UBSAN_ARRAY_BOUNDS config UBSAN_LOCAL_BOUNDS bool "Perform array local bounds checking" depends on UBSAN_TRAP - depends on !UBSAN_KCOV_BROKEN depends on $(cc-option,-fsanitize=local-bounds) help This option enables -fsanitize=local-bounds which traps when an -- cgit