From ce6616724fb425d6043d0dad6af996cd7c79bcc4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 5 Jul 2023 23:51:27 +0200 Subject: ubsan: Clarify Kconfig text for CONFIG_UBSAN_TRAP Make it clearer in the one-line description and the verbose description text that CONFIG_UBSAN_TRAP as currently implemented involves a tradeoff of much less helpful oops messages in exchange for a smaller kernel image. (With the additional effect of turning UBSAN warnings into crashes, which may or may not be desired.) Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20230705215128.486054-1-jannh@google.com Signed-off-by: Kees Cook --- lib/Kconfig.ubsan | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index efae7e011956..59e21bfec188 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -13,7 +13,7 @@ menuconfig UBSAN if UBSAN config UBSAN_TRAP - bool "On Sanitizer warnings, abort the running kernel code" + bool "Abort on Sanitizer warnings (smaller kernel but less verbose)" depends on !COMPILE_TEST help Building kernels with Sanitizer features enabled tends to grow @@ -26,6 +26,14 @@ config UBSAN_TRAP the system. For some system builders this is an acceptable trade-off. + Also note that selecting Y will cause your kernel to Oops + with an "illegal instruction" error with no further details + when a UBSAN violation occurs. (Except on arm64, which will + report which Sanitizer failed.) This may make it hard to + determine whether an Oops was caused by UBSAN or to figure + out the details of a UBSAN violation. It makes the kernel log + output less useful for bug reports. + config CC_HAS_UBSAN_BOUNDS_STRICT def_bool $(cc-option,-fsanitize=bounds-strict) help -- cgit From 8453e7924a1a9130f2b4d2c507de2cdc3892a5b5 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Tue, 23 May 2023 02:14:25 +0000 Subject: soc: fsl: qe: Replace all non-returning strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230523021425.2406309-1-azeemshaikh38@gmail.com Signed-off-by: Kees Cook --- drivers/soc/fsl/qe/qe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c index b3c226eb5292..58746e570d14 100644 --- a/drivers/soc/fsl/qe/qe.c +++ b/drivers/soc/fsl/qe/qe.c @@ -524,7 +524,7 @@ int qe_upload_firmware(const struct qe_firmware *firmware) * saved microcode information and put in the new. */ memset(&qe_firmware_info, 0, sizeof(qe_firmware_info)); - strlcpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id)); + strscpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id)); qe_firmware_info.extended_modes = be64_to_cpu(firmware->extended_modes); memcpy(qe_firmware_info.vtraps, firmware->vtraps, sizeof(firmware->vtraps)); @@ -599,7 +599,7 @@ struct qe_firmware_info *qe_get_firmware_info(void) /* Copy the data into qe_firmware_info*/ sprop = of_get_property(fw, "id", NULL); if (sprop) - strlcpy(qe_firmware_info.id, sprop, + strscpy(qe_firmware_info.id, sprop, sizeof(qe_firmware_info.id)); of_property_read_u64(fw, "extended-modes", -- cgit From 630fdd592912614a72d00026fdadad72d9ef62eb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 26 Jul 2023 14:59:57 -0700 Subject: seq_file: seq_show_option_n() is used for precise sizes When seq_show_option_n() is used, it is for non-string memory that happens to be printable bytes. As such, we must use memcpy() to copy the bytes and then explicitly NUL-terminate the result. Cc: Andy Shevchenko Cc: Andrew Morton Cc: Al Viro Cc: Muchun Song Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230726215957.never.619-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/seq_file.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index bd023dd38ae6..386ab580b839 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -249,18 +249,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name, /** * seq_show_option_n - display mount options with appropriate escapes - * where @value must be a specific length. + * where @value must be a specific length (i.e. + * not NUL-terminated). * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, cannot be NULL - * @length: the length of @value to display + * @length: the exact length of @value to display, must be constant expression * * This is a macro since this uses "length" to define the size of the * stack buffer. */ #define seq_show_option_n(m, name, value, length) { \ char val_buf[length + 1]; \ - strncpy(val_buf, value, length); \ + memcpy(val_buf, value, length); \ val_buf[length] = '\0'; \ seq_show_option(m, name, val_buf); \ } -- cgit From 61ce78f29a694772c3b2c5c749589682dbdfec2d Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Mon, 3 Jul 2023 16:06:41 +0000 Subject: um: Remove strlcpy declaration strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230703160641.1790935-1-azeemshaikh38@gmail.com Signed-off-by: Kees Cook --- arch/um/include/shared/user.h | 1 - arch/um/os-Linux/umid.c | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 0347a190429c..981e11d8e025 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -50,7 +50,6 @@ static inline int printk(const char *fmt, ...) #endif extern int in_aton(char *str); -extern size_t strlcpy(char *, const char *, size_t); extern size_t strlcat(char *, const char *, size_t); extern size_t strscpy(char *, const char *, size_t); diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 7a1abb829930..288c422bfa96 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -40,7 +40,7 @@ static int __init make_uml_dir(void) __func__); goto err; } - strlcpy(dir, home, sizeof(dir)); + strscpy(dir, home, sizeof(dir)); uml_dir++; } strlcat(dir, uml_dir, sizeof(dir)); @@ -243,7 +243,7 @@ int __init set_umid(char *name) if (strlen(name) > UMID_LEN - 1) return -E2BIG; - strlcpy(umid, name, sizeof(umid)); + strscpy(umid, name, sizeof(umid)); return 0; } @@ -262,7 +262,7 @@ static int __init make_umid(void) make_uml_dir(); if (*umid == '\0') { - strlcpy(tmp, uml_dir, sizeof(tmp)); + strscpy(tmp, uml_dir, sizeof(tmp)); strlcat(tmp, "XXXXXX", sizeof(tmp)); fd = mkstemp(tmp); if (fd < 0) { -- cgit From c9732f1461f947429f8ee9289792c5ffea793350 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Mon, 3 Jul 2023 16:58:16 +0000 Subject: perf: Replace strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230703165817.2840457-1-azeemshaikh38@gmail.com Signed-off-by: Kees Cook --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 78ae7b6f90fd..2554f5fc70dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8249,7 +8249,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) unsigned int size; memset(comm, 0, sizeof(comm)); - strlcpy(comm, comm_event->task->comm, sizeof(comm)); + strscpy(comm, comm_event->task->comm, sizeof(comm)); size = ALIGN(strlen(comm)+1, sizeof(u64)); comm_event->comm = comm; @@ -8704,7 +8704,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) } cpy_name: - strlcpy(tmp, name, sizeof(tmp)); + strscpy(tmp, name, sizeof(tmp)); name = tmp; got_name: /* @@ -9128,7 +9128,7 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) goto err; - strlcpy(name, sym, KSYM_NAME_LEN); + strscpy(name, sym, KSYM_NAME_LEN); name_len = strlen(name) + 1; while (!IS_ALIGNED(name_len, sizeof(u64))) name[name_len++] = '\0'; -- cgit From b6ed2f7758a558485ff889b4a3912fcb6abcf689 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Thu, 6 Jul 2023 17:58:04 +0000 Subject: EISA: Replace all non-returning strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230706175804.2249018-1-azeemshaikh38@gmail.com Signed-off-by: Kees Cook # diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c # index 713582cc27d1..33f0ba11c6ad 100644 # --- a/drivers/eisa/eisa-bus.c # +++ b/drivers/eisa/eisa-bus.c # @@ -60,7 +60,7 @@ static void __init eisa_name_device(struct eisa_device *edev) # int i; # for (i = 0; i < EISA_INFOS; i++) { # if (!strcmp(edev->id.sig, eisa_table[i].id.sig)) { # - strlcpy(edev->pretty_name, # + strscpy(edev->pretty_name, # eisa_table[i].name, # sizeof(edev->pretty_name)); # return; --- drivers/eisa/eisa-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c index 713582cc27d1..33f0ba11c6ad 100644 --- a/drivers/eisa/eisa-bus.c +++ b/drivers/eisa/eisa-bus.c @@ -60,7 +60,7 @@ static void __init eisa_name_device(struct eisa_device *edev) int i; for (i = 0; i < EISA_INFOS; i++) { if (!strcmp(edev->id.sig, eisa_table[i].id.sig)) { - strlcpy(edev->pretty_name, + strscpy(edev->pretty_name, eisa_table[i].name, sizeof(edev->pretty_name)); return; -- cgit From fcce1c6cb156d79debaf84f63233f146540031b1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 26 Jul 2023 16:11:43 -0700 Subject: x86/paravirt: Fix tlb_remove_table function callback prototype warning Under W=1, this warning is visible in Clang 16 and newer: arch/x86/kernel/paravirt.c:337:4: warning: cast from 'void (*)(struct mmu_gather *, struct page *)' to 'void (*)(struct mmu_gather *, void *)' converts to incompatible function type [-Wcast-function-type-strict] (void (*)(struct mmu_gather *, void *))tlb_remove_page, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Add a direct wrapper instead, which will make this warning (and potential KCFI failures) go away. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202307260332.pJntWR6o-lkp@intel.com/ Cc: Juergen Gross Cc: Peter Zijlstra Cc: Sami Tolvanen Cc: Nathan Chancellor Cc: Ajay Kaher Cc: Alexey Makhalov Cc: VMware PV-Drivers Reviewers Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: virtualization@lists.linux-foundation.org Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20230726231139.never.601-kees@kernel.org Signed-off-by: Kees Cook --- arch/x86/kernel/paravirt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ac10b46c5832..23d4d7114473 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -79,6 +79,11 @@ void __init native_pv_lock_init(void) static_branch_disable(&virt_spin_lock_key); } +static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + tlb_remove_page(tlb, table); +} + unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len) { @@ -295,8 +300,7 @@ struct paravirt_patch_template pv_ops = { .mmu.flush_tlb_kernel = native_flush_tlb_global, .mmu.flush_tlb_one_user = native_flush_tlb_one_user, .mmu.flush_tlb_multi = native_flush_tlb_multi, - .mmu.tlb_remove_table = - (void (*)(struct mmu_gather *, void *))tlb_remove_page, + .mmu.tlb_remove_table = native_tlb_remove_table, .mmu.exit_mmap = paravirt_nop, .mmu.notify_page_enc_status_changed = paravirt_nop, -- cgit From b3d46e11fec0c5a8972e5061bb1462119ae5736d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 7 Aug 2023 10:43:58 -0700 Subject: selftests/harness: Actually report SKIP for signal tests Tests that were expecting a signal were not correctly checking for a SKIP condition. Move the check before the signal checking when processing test result. Cc: Shuah Khan Cc: Andy Lutomirski Cc: Will Drewry Cc: linux-kselftest@vger.kernel.org Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Kees Cook --- tools/testing/selftests/kselftest_harness.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 5fd49ad0c696..e05ac8261046 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -938,7 +938,11 @@ void __wait_for_test(struct __test_metadata *t) fprintf(TH_LOG_STREAM, "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { - if (t->termsig != -1) { + if (WEXITSTATUS(status) == 255) { + /* SKIP */ + t->passed = 1; + t->skip = 1; + } else if (t->termsig != -1) { t->passed = 0; fprintf(TH_LOG_STREAM, "# %s: Test exited normally instead of by signal (code: %d)\n", @@ -950,11 +954,6 @@ void __wait_for_test(struct __test_metadata *t) case 0: t->passed = 1; break; - /* SKIP */ - case 255: - t->passed = 1; - t->skip = 1; - break; /* Other failure, assume step report. */ default: t->passed = 0; -- cgit From 2e3f65ccfe6b0778b261ad69c9603ae85f210334 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 7 Aug 2023 09:41:19 -0700 Subject: gcc-plugins: Rename last_stmt() for GCC 14+ In GCC 14, last_stmt() was renamed to last_nondebug_stmt(). Add a helper macro to handle the renaming. Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook --- scripts/gcc-plugins/gcc-common.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 84c730da36dd..1ae39b9f4a95 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -440,4 +440,8 @@ static inline void debug_gimple_stmt(const_gimple s) #define SET_DECL_MODE(decl, mode) DECL_MODE(decl) = (mode) #endif +#if BUILDING_GCC_VERSION >= 14000 +#define last_stmt(x) last_nondebug_stmt(x) +#endif + #endif -- cgit From 7a0fd5e1678505534573b3c14c6ff69ed8592596 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 11 Aug 2023 17:18:38 +0200 Subject: compiler_types: Introduce the Clang __preserve_most function attribute [1]: "On X86-64 and AArch64 targets, this attribute changes the calling convention of a function. The preserve_most calling convention attempts to make the code in the caller as unintrusive as possible. This convention behaves identically to the C calling convention on how arguments and return values are passed, but it uses a different set of caller/callee-saved registers. This alleviates the burden of saving and recovering a large register set before and after the call in the caller. If the arguments are passed in callee-saved registers, then they will be preserved by the callee across the call. This doesn't apply for values returned in callee-saved registers. * On X86-64 the callee preserves all general purpose registers, except for R11. R11 can be used as a scratch register. Floating-point registers (XMMs/YMMs) are not preserved and need to be saved by the caller. * On AArch64 the callee preserve all general purpose registers, except x0-X8 and X16-X18." [1] https://clang.llvm.org/docs/AttributeReference.html#preserve-most Introduce the attribute to compiler_types.h as __preserve_most. Use of this attribute results in better code generation for calls to very rarely called functions, such as error-reporting functions, or rarely executed slow paths. Beware that the attribute conflicts with instrumentation calls inserted on function entry which do not use __preserve_most themselves. Notably, function tracing which assumes the normal C calling convention for the given architecture. Where the attribute is supported, __preserve_most will imply notrace. It is recommended to restrict use of the attribute to functions that should or already disable tracing. Note: The additional preprocessor check against architecture should not be necessary if __has_attribute() only returns true where supported; also see https://github.com/ClangBuiltLinux/linux/issues/1908. But until __has_attribute() does the right thing, we also guard by known-supported architectures to avoid build warnings on other architectures. The attribute may be supported by a future GCC version (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110899). Signed-off-by: Marco Elver Reviewed-by: Miguel Ojeda Reviewed-by: Nick Desaulniers Acked-by: "Steven Rostedt (Google)" Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230811151847.1594958-1-elver@google.com Signed-off-by: Kees Cook --- include/linux/compiler_types.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 547ea1ff806e..c523c6683789 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -106,6 +106,34 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } #define __cold #endif +/* + * On x86-64 and arm64 targets, __preserve_most changes the calling convention + * of a function to make the code in the caller as unintrusive as possible. This + * convention behaves identically to the C calling convention on how arguments + * and return values are passed, but uses a different set of caller- and callee- + * saved registers. + * + * The purpose is to alleviates the burden of saving and recovering a large + * register set before and after the call in the caller. This is beneficial for + * rarely taken slow paths, such as error-reporting functions that may be called + * from hot paths. + * + * Note: This may conflict with instrumentation inserted on function entry which + * does not use __preserve_most or equivalent convention (if in assembly). Since + * function tracing assumes the normal C calling convention, where the attribute + * is supported, __preserve_most implies notrace. It is recommended to restrict + * use of the attribute to functions that should or already disable tracing. + * + * Optional: not supported by gcc. + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most + */ +#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64)) +# define __preserve_most notrace __attribute__((__preserve_most__)) +#else +# define __preserve_most +#endif + /* Builtins */ /* -- cgit From b16c42c8fde808b4f047d94f1f2aeda93487670d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 11 Aug 2023 17:18:39 +0200 Subject: list_debug: Introduce inline wrappers for debug checks Turn the list debug checking functions __list_*_valid() into inline functions that wrap the out-of-line functions. Care is taken to ensure the inline wrappers are always inlined, so that additional compiler instrumentation (such as sanitizers) does not result in redundant outlining. This change is preparation for performing checks in the inline wrappers. No functional change intended. Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20230811151847.1594958-2-elver@google.com Signed-off-by: Kees Cook --- arch/arm64/kvm/hyp/nvhe/list_debug.c | 6 +++--- include/linux/list.h | 37 ++++++++++++++++++++++++++++++++---- lib/list_debug.c | 11 +++++------ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index d68abd7ea124..16266a939a4c 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -26,8 +26,8 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) /* The predicates checked here are taken from lib/list_debug.c. */ -bool __list_add_valid(struct list_head *new, struct list_head *prev, - struct list_head *next) +bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, + struct list_head *next) { if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) || NVHE_CHECK_DATA_CORRUPTION(prev->next != next) || @@ -37,7 +37,7 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, return true; } -bool __list_del_entry_valid(struct list_head *entry) +bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; diff --git a/include/linux/list.h b/include/linux/list.h index f10344dbad4d..130c6a1bb45c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -39,10 +39,39 @@ static inline void INIT_LIST_HEAD(struct list_head *list) } #ifdef CONFIG_DEBUG_LIST -extern bool __list_add_valid(struct list_head *new, - struct list_head *prev, - struct list_head *next); -extern bool __list_del_entry_valid(struct list_head *entry); +/* + * Performs the full set of list corruption checks before __list_add(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); + +/* + * Performs list corruption checks before __list_add(). Returns false if a + * corruption is detected, true otherwise. + */ +static __always_inline bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + return __list_add_valid_or_report(new, prev, next); +} + +/* + * Performs the full set of list corruption checks before __list_del_entry(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_del_entry_valid_or_report(struct list_head *entry); + +/* + * Performs list corruption checks before __list_del_entry(). Returns false if a + * corruption is detected, true otherwise. + */ +static __always_inline bool __list_del_entry_valid(struct list_head *entry) +{ + return __list_del_entry_valid_or_report(entry); +} #else static inline bool __list_add_valid(struct list_head *new, struct list_head *prev, diff --git a/lib/list_debug.c b/lib/list_debug.c index d98d43f80958..2def33b1491f 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -17,8 +17,8 @@ * attempt). */ -bool __list_add_valid(struct list_head *new, struct list_head *prev, - struct list_head *next) +bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, + struct list_head *next) { if (CHECK_DATA_CORRUPTION(prev == NULL, "list_add corruption. prev is NULL.\n") || @@ -37,9 +37,9 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, return true; } -EXPORT_SYMBOL(__list_add_valid); +EXPORT_SYMBOL(__list_add_valid_or_report); -bool __list_del_entry_valid(struct list_head *entry) +bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; @@ -65,6 +65,5 @@ bool __list_del_entry_valid(struct list_head *entry) return false; return true; - } -EXPORT_SYMBOL(__list_del_entry_valid); +EXPORT_SYMBOL(__list_del_entry_valid_or_report); -- cgit From aebc7b0d8d91bbc69e976909963046bc48bca4fd Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 11 Aug 2023 17:18:40 +0200 Subject: list: Introduce CONFIG_LIST_HARDENED Numerous production kernel configs (see [1, 2]) are choosing to enable CONFIG_DEBUG_LIST, which is also being recommended by KSPP for hardened configs [3]. The motivation behind this is that the option can be used as a security hardening feature (e.g. CVE-2019-2215 and CVE-2019-2025 are mitigated by the option [4]). The feature has never been designed with performance in mind, yet common list manipulation is happening across hot paths all over the kernel. Introduce CONFIG_LIST_HARDENED, which performs list pointer checking inline, and only upon list corruption calls the reporting slow path. To generate optimal machine code with CONFIG_LIST_HARDENED: 1. Elide checking for pointer values which upon dereference would result in an immediate access fault (i.e. minimal hardening checks). The trade-off is lower-quality error reports. 2. Use the __preserve_most function attribute (available with Clang, but not yet with GCC) to minimize the code footprint for calling the reporting slow path. As a result, function size of callers is reduced by avoiding saving registers before calling the rarely called reporting slow path. Note that all TUs in lib/Makefile already disable function tracing, including list_debug.c, and __preserve_most's implied notrace has no effect in this case. 3. Because the inline checks are a subset of the full set of checks in __list_*_valid_or_report(), always return false if the inline checks failed. This avoids redundant compare and conditional branch right after return from the slow path. As a side-effect of the checks being inline, if the compiler can prove some condition to always be true, it can completely elide some checks. Since DEBUG_LIST is functionally a superset of LIST_HARDENED, the Kconfig variables are changed to reflect that: DEBUG_LIST selects LIST_HARDENED, whereas LIST_HARDENED itself has no dependency on DEBUG_LIST. Running netperf with CONFIG_LIST_HARDENED (using a Clang compiler with "preserve_most") shows throughput improvements, in my case of ~7% on average (up to 20-30% on some test cases). Link: https://r.android.com/1266735 [1] Link: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/blob/main/config [2] Link: https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings [3] Link: https://googleprojectzero.blogspot.com/2019/11/bad-binder-android-in-wild-exploit.html [4] Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20230811151847.1594958-3-elver@google.com Signed-off-by: Kees Cook --- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/list_debug.c | 2 ++ drivers/misc/lkdtm/bugs.c | 4 +-- include/linux/list.h | 64 ++++++++++++++++++++++++++++++++---- lib/Kconfig.debug | 9 +++-- lib/Makefile | 2 +- lib/list_debug.c | 5 ++- security/Kconfig.hardening | 13 ++++++++ 8 files changed, 88 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 9ddc025e4b86..2250253a6429 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -25,7 +25,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o -hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o +hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-y += $(lib-objs) ## diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index 16266a939a4c..46a2d4f2b3c6 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -26,6 +26,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) /* The predicates checked here are taken from lib/list_debug.c. */ +__list_valid_slowpath bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -37,6 +38,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, return true; } +__list_valid_slowpath bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 3c95600ab2f7..963b4dee6a7d 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -393,7 +393,7 @@ static void lkdtm_CORRUPT_LIST_ADD(void) pr_err("Overwrite did not happen, but no BUG?!\n"); else { pr_err("list_add() corruption not detected!\n"); - pr_expected_config(CONFIG_DEBUG_LIST); + pr_expected_config(CONFIG_LIST_HARDENED); } } @@ -420,7 +420,7 @@ static void lkdtm_CORRUPT_LIST_DEL(void) pr_err("Overwrite did not happen, but no BUG?!\n"); else { pr_err("list_del() corruption not detected!\n"); - pr_expected_config(CONFIG_DEBUG_LIST); + pr_expected_config(CONFIG_LIST_HARDENED); } } diff --git a/include/linux/list.h b/include/linux/list.h index 130c6a1bb45c..164b4d0e9d2a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -38,39 +38,91 @@ static inline void INIT_LIST_HEAD(struct list_head *list) WRITE_ONCE(list->prev, list); } +#ifdef CONFIG_LIST_HARDENED + #ifdef CONFIG_DEBUG_LIST +# define __list_valid_slowpath +#else +# define __list_valid_slowpath __cold __preserve_most +#endif + /* * Performs the full set of list corruption checks before __list_add(). * On list corruption reports a warning, and returns false. */ -extern bool __list_add_valid_or_report(struct list_head *new, - struct list_head *prev, - struct list_head *next); +extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); /* * Performs list corruption checks before __list_add(). Returns false if a * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_add_valid_or_report(). */ static __always_inline bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next) { - return __list_add_valid_or_report(new, prev, next); + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + /* + * With the hardening version, elide checking if next and prev + * are NULL, since the immediate dereference of them below would + * result in a fault if NULL. + * + * With the reduced set of checks, we can afford to inline the + * checks, which also gives the compiler a chance to elide some + * of them completely if they can be proven at compile-time. If + * one of the pre-conditions does not hold, the slow-path will + * show a report which pre-condition failed. + */ + if (likely(next->prev == prev && prev->next == next && new != prev && new != next)) + return true; + ret = false; + } + + ret &= __list_add_valid_or_report(new, prev, next); + return ret; } /* * Performs the full set of list corruption checks before __list_del_entry(). * On list corruption reports a warning, and returns false. */ -extern bool __list_del_entry_valid_or_report(struct list_head *entry); +extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); /* * Performs list corruption checks before __list_del_entry(). Returns false if a * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_del_entry_valid_or_report(). */ static __always_inline bool __list_del_entry_valid(struct list_head *entry) { - return __list_del_entry_valid_or_report(entry); + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + struct list_head *prev = entry->prev; + struct list_head *next = entry->next; + + /* + * With the hardening version, elide checking if next and prev + * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate + * dereference of them below would result in a fault. + */ + if (likely(prev->next == entry && next->prev == entry)) + return true; + ret = false; + } + + ret &= __list_del_entry_valid_or_report(entry); + return ret; } #else static inline bool __list_add_valid(struct list_head *new, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fbc89baf7de6..c38745ad46eb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1674,9 +1674,14 @@ menu "Debug kernel data structures" config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION + select LIST_HARDENED help - Enable this to turn on extended checks in the linked-list - walking routines. + Enable this to turn on extended checks in the linked-list walking + routines. + + This option trades better quality error reports for performance, and + is more suitable for kernel debugging. If you care about performance, + you should only enable CONFIG_LIST_HARDENED instead. If unsure, say N. diff --git a/lib/Makefile b/lib/Makefile index 42d307ade225..110936c9a68b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -161,7 +161,7 @@ obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o -obj-$(CONFIG_DEBUG_LIST) += list_debug.o +obj-$(CONFIG_LIST_HARDENED) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o diff --git a/lib/list_debug.c b/lib/list_debug.c index 2def33b1491f..db602417febf 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -2,7 +2,8 @@ * Copyright 2006, Red Hat, Inc., Dave Jones * Released under the General Public License (GPL). * - * This file contains the linked list validation for DEBUG_LIST. + * This file contains the linked list validation and error reporting for + * LIST_HARDENED and DEBUG_LIST. */ #include @@ -17,6 +18,7 @@ * attempt). */ +__list_valid_slowpath bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -39,6 +41,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, } EXPORT_SYMBOL(__list_add_valid_or_report); +__list_valid_slowpath bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 0f295961e773..ffc3c702b461 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -279,6 +279,19 @@ config ZERO_CALL_USED_REGS endmenu +menu "Hardening of kernel data structures" + +config LIST_HARDENED + bool "Check integrity of linked list manipulation" + help + Minimal integrity checking in the linked-list manipulation routines + to catch memory corruptions that are not guaranteed to result in an + immediate access fault. + + If unsure, say N. + +endmenu + config CC_HAS_RANDSTRUCT def_bool $(cc-option,-frandomize-layout-seed-file=/dev/null) # Randstruct was first added in Clang 15, but it isn't safe to use until -- cgit From aa9f10d57056cea51d41283d3785bccbbb9f459e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 11 Aug 2023 17:18:41 +0200 Subject: hardening: Move BUG_ON_DATA_CORRUPTION to hardening options BUG_ON_DATA_CORRUPTION is turning detected corruptions of list data structures from WARNings into BUGs. This can be useful to stop further corruptions or even exploitation attempts. However, the option has less to do with debugging than with hardening. With the introduction of LIST_HARDENED, it makes more sense to move it to the hardening options, where it selects LIST_HARDENED instead. Without this change, combining BUG_ON_DATA_CORRUPTION with LIST_HARDENED alone wouldn't be possible, because DEBUG_LIST would always be selected by BUG_ON_DATA_CORRUPTION. Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20230811151847.1594958-4-elver@google.com Signed-off-by: Kees Cook --- lib/Kconfig.debug | 12 +----------- security/Kconfig.hardening | 10 ++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c38745ad46eb..c7348d1fabe5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1673,7 +1673,7 @@ menu "Debug kernel data structures" config DEBUG_LIST bool "Debug linked list manipulation" - depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION + depends on DEBUG_KERNEL select LIST_HARDENED help Enable this to turn on extended checks in the linked-list walking @@ -1715,16 +1715,6 @@ config DEBUG_NOTIFIERS This is a relatively cheap check but if you care about maximum performance, say N. -config BUG_ON_DATA_CORRUPTION - bool "Trigger a BUG when data corruption is detected" - select DEBUG_LIST - help - Select this option if the kernel should BUG when it encounters - data corruption in kernel memory structures when they get checked - for validity. - - If unsure, say N. - config DEBUG_MAPLE_TREE bool "Debug maple trees" depends on DEBUG_KERNEL diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index ffc3c702b461..2cff851ebfd7 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -290,6 +290,16 @@ config LIST_HARDENED If unsure, say N. +config BUG_ON_DATA_CORRUPTION + bool "Trigger a BUG when data corruption is detected" + select LIST_HARDENED + help + Select this option if the kernel should BUG when it encounters + data corruption in kernel memory structures when they get checked + for validity. + + If unsure, say N. + endmenu config CC_HAS_RANDSTRUCT -- cgit From 967afdf808cf66908a55c55b8ec5937cc20676ce Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 2 Aug 2023 07:25:56 -0600 Subject: alpha: Replace one-element array with flexible-array member One-element and zero-length arrays are deprecated. So, replace one-element array in struct osf_dirent with flexible-array member. This results in no differences in binary output. Signed-off-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/ZMpZZBShlLqyD3ax@work Signed-off-by: Kees Cook --- arch/alpha/kernel/osf_sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d98701ee36c6..5db88b627439 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -97,7 +97,7 @@ struct osf_dirent { unsigned int d_ino; unsigned short d_reclen; unsigned short d_namlen; - char d_name[1]; + char d_name[]; }; struct osf_dirent_callback { -- cgit From 30bed99e0c6335b711119b9fda806da7b4031dfb Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 7 Aug 2023 18:22:30 +0000 Subject: um: vector: refactor deprecated strncpy `strncpy` is deprecated for use on NUL-terminated destination strings [1]. A suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on its destination buffer argument which is _not_ the case for `strncpy`! In this case, we are able to drop the now superfluous `... - 1` instances because `strscpy` will automatically truncate the last byte by setting it to a NUL byte if the source size exceeds the destination size or if the source string is not NUL-terminated. I've also opted to remove the seemingly useless char* casts. I'm not sure why they're present at all since (after expanding the `ifr_name` macro) `ifr.ifr_ifrn.ifrn_name` is a char* already. All in all, `strscpy` is a more robust and less ambiguous interface while also letting us remove some `... -1`'s which cleans things up a bit. [1]: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [2]: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Signed-off-by: Justin Stitt Acked-by: Anton Ivanov Link: https://lore.kernel.org/r/20230807-arch-um-drivers-v1-1-10d602c5577a@google.com Signed-off-by: Kees Cook --- arch/um/drivers/vector_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index c650e428432b..c719e1ec4645 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -141,7 +141,7 @@ static int create_tap_fd(char *iface) } memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; - strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); + strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); err = ioctl(fd, TUNSETIFF, (void *) &ifr); if (err != 0) { @@ -171,7 +171,7 @@ static int create_raw_fd(char *iface, int flags, int proto) goto raw_fd_cleanup; } memset(&ifr, 0, sizeof(ifr)); - strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); + strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) { err = -errno; goto raw_fd_cleanup; -- cgit From be8dffa04de3894bad0bf31d1531a2f06353b70e Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Wed, 9 Aug 2023 18:19:32 +0000 Subject: um: refactor deprecated strncpy to memcpy Use `memcpy` since `console_buf` is not expected to be NUL-terminated and it more accurately describes what is happening with the buffers `console_buf` and `string` as per Kees' analysis [1]. Also mark char buffer as `__nonstring` as per Kees' suggestion [2]. This change now makes it more clear what this code does and that `console_buf` is not expected to be NUL-terminated. Link: https://lore.kernel.org/all/202308081708.D5ADC80F@keescook/ [1] Link: https://github.com/KSPP/linux/issues/90 [2] Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings Cc: linux-hardening@vger.kernel.org Suggested-by: Kees Cook Signed-off-by: Justin Stitt Link: https://lore.kernel.org/r/20230809-arch-um-v3-1-f63e1122d77e@google.com Signed-off-by: Kees Cook --- arch/um/drivers/mconsole_kern.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 5026e7b9adfe..ff4bda95b9c7 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -554,7 +554,7 @@ struct mconsole_output { static DEFINE_SPINLOCK(client_lock); static LIST_HEAD(clients); -static char console_buf[MCONSOLE_MAX_DATA]; +static char console_buf[MCONSOLE_MAX_DATA] __nonstring; static void console_write(struct console *console, const char *string, unsigned int len) @@ -567,7 +567,7 @@ static void console_write(struct console *console, const char *string, while (len > 0) { n = min((size_t) len, ARRAY_SIZE(console_buf)); - strncpy(console_buf, string, n); + memcpy(console_buf, string, n); string += n; len -= n; -- cgit From c8248faf3ca276ebdf60f003b3e04bf764daba91 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Aug 2023 13:06:03 -0700 Subject: Compiler Attributes: counted_by: Adjust name and identifier expansion GCC and Clang's current RFCs name this attribute "counted_by", and have moved away from using a string for the member name. Update the kernel's macros to match. Additionally provide a UAPI no-op macro for UAPI structs that will gain annotations. Cc: Miguel Ojeda Cc: Nick Desaulniers Fixes: dd06e72e68bc ("Compiler Attributes: Add __counted_by macro") Acked-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230817200558.never.077-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/compiler_attributes.h | 26 +++++++++++++------------- include/uapi/linux/stddef.h | 4 ++++ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 00efa35c350f..28566624f008 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -94,6 +94,19 @@ # define __copy(symbol) #endif +/* + * Optional: only supported since gcc >= 14 + * Optional: only supported since clang >= 18 + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + * clang: https://reviews.llvm.org/D148381 + */ +#if __has_attribute(__counted_by__) +# define __counted_by(member) __attribute__((__counted_by__(member))) +#else +# define __counted_by(member) +#endif + /* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 @@ -129,19 +142,6 @@ # define __designated_init #endif -/* - * Optional: only supported since gcc >= 14 - * Optional: only supported since clang >= 17 - * - * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://reviews.llvm.org/D148381 - */ -#if __has_attribute(__element_count__) -# define __counted_by(member) __attribute__((__element_count__(#member))) -#else -# define __counted_by(member) -#endif - /* * Optional: only supported since clang >= 14.0 * diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 7837ba4fe728..7c3fc3980881 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -45,3 +45,7 @@ TYPE NAME[]; \ } #endif + +#ifndef __counted_by +#define __counted_by(m) +#endif -- cgit From 5d207e83ca41206e75c2cd414d40b451ef04c259 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Aug 2023 21:27:35 -0700 Subject: lkdtm: Add FAM_BOUNDS test for __counted_by Add new CONFIG_UBSAN_BOUNDS test for __counted_by attribute. Cc: Greg Kroah-Hartman Cc: Arnd Bergmann Signed-off-by: Kees Cook --- drivers/misc/lkdtm/bugs.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 963b4dee6a7d..c66cc05a68c4 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -273,8 +273,8 @@ static void lkdtm_HUNG_TASK(void) schedule(); } -volatile unsigned int huge = INT_MAX - 2; -volatile unsigned int ignored; +static volatile unsigned int huge = INT_MAX - 2; +static volatile unsigned int ignored; static void lkdtm_OVERFLOW_SIGNED(void) { @@ -305,7 +305,7 @@ static void lkdtm_OVERFLOW_UNSIGNED(void) ignored = value; } -/* Intentionally using old-style flex array definition of 1 byte. */ +/* Intentionally using unannotated flex array definition. */ struct array_bounds_flex_array { int one; int two; @@ -357,6 +357,46 @@ static void lkdtm_ARRAY_BOUNDS(void) pr_expected_config(CONFIG_UBSAN_BOUNDS); } +struct lkdtm_annotated { + unsigned long flags; + int count; + int array[] __counted_by(count); +}; + +static volatile int fam_count = 4; + +static void lkdtm_FAM_BOUNDS(void) +{ + struct lkdtm_annotated *inst; + + inst = kzalloc(struct_size(inst, array, fam_count + 1), GFP_KERNEL); + if (!inst) { + pr_err("FAIL: could not allocate test struct!\n"); + return; + } + + inst->count = fam_count; + pr_info("Array access within bounds ...\n"); + inst->array[1] = fam_count; + ignored = inst->array[1]; + + pr_info("Array access beyond bounds ...\n"); + inst->array[fam_count] = fam_count; + ignored = inst->array[fam_count]; + + kfree(inst); + + pr_err("FAIL: survived access of invalid flexible array member index!\n"); + + if (!__has_attribute(__counted_by__)) + pr_warn("This is expected since this %s was built a compiler supporting __counted_by\n", + lkdtm_kernel_info); + else if (IS_ENABLED(CONFIG_UBSAN_BOUNDS)) + pr_expected_config(CONFIG_UBSAN_TRAP); + else + pr_expected_config(CONFIG_UBSAN_BOUNDS); +} + static void lkdtm_CORRUPT_LIST_ADD(void) { /* @@ -616,6 +656,7 @@ static struct crashtype crashtypes[] = { CRASHTYPE(OVERFLOW_SIGNED), CRASHTYPE(OVERFLOW_UNSIGNED), CRASHTYPE(ARRAY_BOUNDS), + CRASHTYPE(FAM_BOUNDS), CRASHTYPE(CORRUPT_LIST_ADD), CRASHTYPE(CORRUPT_LIST_DEL), CRASHTYPE(STACK_GUARD_PAGE_LEADING), -- cgit From a4b35d4d05b9f2e84c1dd6301d3dca65719334ef Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Aug 2023 14:03:28 -0700 Subject: integrity: Annotate struct ima_rule_opt_list with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ima_rule_opt_list. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Mimi Zohar Cc: Dmitry Kasatkin Cc: Paul Moore Cc: James Morris Cc: "Serge E. Hallyn" Cc: linux-integrity@vger.kernel.org Cc: linux-security-module@vger.kernel.org Acked-by: Mimi Zohar Reviewed-by: "Gustavo A. R. Silva" Acked-by: Jarkko Sakkinen Link: https://lore.kernel.org/r/20230817210327.never.598-kees@kernel.org Signed-off-by: Kees Cook --- security/integrity/ima/ima_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index c9b3bd8f1bb9..7a0420cf1a6a 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -68,7 +68,7 @@ enum policy_rule_list { IMA_DEFAULT_POLICY = 1, IMA_CUSTOM_POLICY }; struct ima_rule_opt_list { size_t count; - char *items[]; + char *items[] __counted_by(count); }; /* @@ -342,6 +342,7 @@ static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src) kfree(src_copy); return ERR_PTR(-ENOMEM); } + opt_list->count = count; /* * strsep() has already replaced all instances of '|' with '\0', @@ -357,7 +358,6 @@ static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src) opt_list->items[i] = cur; cur = strchr(cur, '\0') + 1; } - opt_list->count = count; return opt_list; } -- cgit From 2ddd3cac1fa988323684cee567356e970e6750bd Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 17 Aug 2023 21:13:32 -0700 Subject: nsproxy: Convert nsproxy.count to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nsproxy.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in refcount.h have different memory ordering guarantees than their atomic counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst for more information. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the nsproxy.count it might make a difference in following places: - put_nsproxy() and switch_task_namespaces(): decrement in refcount_dec_and_test() only provides RELEASE ordering and ACQUIRE ordering on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20230818041327.gonna.210-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/nsproxy.h | 7 +++---- kernel/nsproxy.c | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index fee881cded01..771cb0285872 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -29,7 +29,7 @@ struct fs_struct; * nsproxy is copied. */ struct nsproxy { - atomic_t count; + refcount_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; @@ -102,14 +102,13 @@ int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { - if (atomic_dec_and_test(&ns->count)) { + if (refcount_dec_and_test(&ns->count)) free_nsproxy(ns); - } } static inline void get_nsproxy(struct nsproxy *ns) { - atomic_inc(&ns->count); + refcount_inc(&ns->count); } #endif diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 80d9c6d77a45..15781acaac1c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -30,7 +30,7 @@ static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .uts_ns = &init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) .ipc_ns = &init_ipc_ns, @@ -55,7 +55,7 @@ static inline struct nsproxy *create_nsproxy(void) nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); if (nsproxy) - atomic_set(&nsproxy->count, 1); + refcount_set(&nsproxy->count, 1); return nsproxy; } -- cgit From 33f0467fe06934d5e4ea6e24ce2b9c65ce618e26 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 24 Aug 2023 20:46:59 -0700 Subject: kallsyms: Fix kallsyms_selftest failure Kernel test robot reported a kallsyms_test failure when clang lto is enabled (thin or full) and CONFIG_KALLSYMS_SELFTEST is also enabled. I can reproduce in my local environment with the following error message with thin lto: [ 1.877897] kallsyms_selftest: Test for 1750th symbol failed: (tsc_cs_mark_unstable) addr=ffffffff81038090 [ 1.877901] kallsyms_selftest: abort It appears that commit 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions") caused the failure. Commit 8cc32a9bbf29 changed cleanup_symbol_name() based on ".llvm." instead of '.' where ".llvm." is appended to a before-lto-optimization local symbol name. We need to propagate such knowledge in kallsyms_selftest.c as well. Further more, compare_symbol_name() in kallsyms.c needs change as well. In scripts/kallsyms.c, kallsyms_names and kallsyms_seqs_of_names are used to record symbol names themselves and index to symbol names respectively. For example: kallsyms_names: ... __amd_smn_rw._entry <== seq 1000 __amd_smn_rw._entry.5 <== seq 1001 __amd_smn_rw.llvm. <== seq 1002 ... kallsyms_seqs_of_names are sorted based on cleanup_symbol_name() through, so the order in kallsyms_seqs_of_names actually has index 1000: seq 1002 <== __amd_smn_rw.llvm. (actual symbol comparison using '__amd_smn_rw') index 1001: seq 1000 <== __amd_smn_rw._entry index 1002: seq 1001 <== __amd_smn_rw._entry.5 Let us say at a particular point, at index 1000, symbol '__amd_smn_rw.llvm.' is comparing to '__amd_smn_rw._entry' where '__amd_smn_rw._entry' is the one to search e.g., with function kallsyms_on_each_match_symbol(). The current implementation will find out '__amd_smn_rw._entry' is less than '__amd_smn_rw.llvm.' and then continue to search e.g., index 999 and never found a match although the actual index 1001 is a match. To fix this issue, let us do cleanup_symbol_name() first and then do comparison. In the above case, comparing '__amd_smn_rw' vs '__amd_smn_rw._entry' and '__amd_smn_rw._entry' being greater than '__amd_smn_rw', the next comparison will be > index 1000 and eventually index 1001 will be hit an a match is found. For any symbols not having '.llvm.' substr, there is no functionality change for compare_symbol_name(). Fixes: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202308232200.1c932a90-oliver.sang@intel.com Signed-off-by: Yonghong Song Reviewed-by: Song Liu Reviewed-by: Zhen Lei Link: https://lore.kernel.org/r/20230825034659.1037627-1-yonghong.song@linux.dev Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- kernel/kallsyms.c | 17 +++++++---------- kernel/kallsyms_selftest.c | 23 +---------------------- 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 016d997131d4..e12d26c10dba 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -188,16 +188,13 @@ static bool cleanup_symbol_name(char *s) static int compare_symbol_name(const char *name, char *namebuf) { - int ret; - - ret = strcmp(name, namebuf); - if (!ret) - return ret; - - if (cleanup_symbol_name(namebuf) && !strcmp(name, namebuf)) - return 0; - - return ret; + /* The kallsyms_seqs_of_names is sorted based on names after + * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled. + * To ensure correct bisection in kallsyms_lookup_names(), do + * cleanup_symbol_name(namebuf) before comparing name and namebuf. + */ + cleanup_symbol_name(namebuf); + return strcmp(name, namebuf); } static unsigned int get_symbol_seq(int index) diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index a2e3745d15c4..e05ddc33a752 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -196,7 +196,7 @@ static bool match_cleanup_name(const char *s, const char *name) if (!IS_ENABLED(CONFIG_LTO_CLANG)) return false; - p = strchr(s, '.'); + p = strstr(s, ".llvm."); if (!p) return false; @@ -344,27 +344,6 @@ static int test_kallsyms_basic_function(void) goto failed; } - /* - * The first '.' may be the initial letter, in which case the - * entire symbol name will be truncated to an empty string in - * cleanup_symbol_name(). Do not test these symbols. - * - * For example: - * cat /proc/kallsyms | awk '{print $3}' | grep -E "^\." | head - * .E_read_words - * .E_leading_bytes - * .E_trailing_bytes - * .E_write_words - * .E_copy - * .str.292.llvm.12122243386960820698 - * .str.24.llvm.12122243386960820698 - * .str.29.llvm.12122243386960820698 - * .str.75.llvm.12122243386960820698 - * .str.99.llvm.12122243386960820698 - */ - if (IS_ENABLED(CONFIG_LTO_CLANG) && !namebuf[0]) - continue; - lookup_addr = kallsyms_lookup_name(namebuf); memset(stat, 0, sizeof(*stat)); -- cgit From 76903a9648744c081547c91f31ec3917204b74e5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 25 Aug 2023 13:20:36 -0700 Subject: kallsyms: Change func signature for cleanup_symbol_name() All users of cleanup_symbol_name() do not use the return value. So let us change the return value of cleanup_symbol_name() to 'void' to reflect its usage pattern. Suggested-by: Nick Desaulniers Signed-off-by: Yonghong Song Reviewed-by: Nick Desaulniers Reviewed-by: Song Liu Link: https://lore.kernel.org/r/20230825202036.441212-1-yonghong.song@linux.dev Signed-off-by: Kees Cook --- kernel/kallsyms.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index e12d26c10dba..18edd57b5fe8 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -163,12 +163,12 @@ unsigned long kallsyms_sym_address(int idx) return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; } -static bool cleanup_symbol_name(char *s) +static void cleanup_symbol_name(char *s) { char *res; if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return false; + return; /* * LLVM appends various suffixes for local functions and variables that @@ -178,12 +178,10 @@ static bool cleanup_symbol_name(char *s) * - foo.llvm.[0-9a-f]+ */ res = strstr(s, ".llvm."); - if (res) { + if (res) *res = '\0'; - return true; - } - return false; + return; } static int compare_symbol_name(const char *name, char *namebuf) -- cgit From 5f536ac6a5a7b67351e4e5ae4f9e1e57d31268e6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Aug 2023 16:59:56 -0700 Subject: LoadPin: Annotate struct dm_verity_loadpin_trusted_root_digest with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct dm_verity_loadpin_trusted_root_digest. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Alasdair Kergon Cc: Mike Snitzer Cc: dm-devel@redhat.com Cc: Paul Moore Cc: James Morris Cc: "Serge E. Hallyn" Cc: linux-security-module@vger.kernel.org Link: https://lore.kernel.org/r/20230817235955.never.762-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/dm-verity-loadpin.h | 2 +- security/loadpin/loadpin.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h index 552b817ab102..3ac6dbaeaa37 100644 --- a/include/linux/dm-verity-loadpin.h +++ b/include/linux/dm-verity-loadpin.h @@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests; struct dm_verity_loadpin_trusted_root_digest { struct list_head node; unsigned int len; - u8 data[]; + u8 data[] __counted_by(len); }; #if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY) diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index ebae964f7cc9..a9d40456a064 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -336,6 +336,7 @@ static int read_trusted_verity_root_digests(unsigned int fd) rc = -ENOMEM; goto err; } + trd->len = len; if (hex2bin(trd->data, d, len)) { kfree(trd); @@ -343,8 +344,6 @@ static int read_trusted_verity_root_digests(unsigned int fd) goto err; } - trd->len = len; - list_add_tail(&trd->node, &dm_verity_loadpin_trusted_root_digests); } -- cgit