diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2024-04-22 13:35:18 -0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2024-04-22 13:35:18 -0300 |
commit | 173b0b5b0e865348684c02bd9cb1d22b5d46e458 (patch) | |
tree | a42a2cc8b724b94a9478aedfcbfabcec0197b246 /arch | |
parent | 61ba075d9911d2a6db181fbb5602762a24d5d0a9 (diff) | |
parent | ed30a4a51bb196781c8058073ea720133a65596f (diff) |
Merge remote-tracking branch 'torvalds/master' into perf-tools-next
To pick up fixes sent via perf-tools, by Namhyung Kim.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'arch')
318 files changed, 7524 insertions, 2119 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index fd18b7db2c77..65afb1de48b3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -799,7 +799,7 @@ config CFI_CLANG depends on ARCH_SUPPORTS_CFI_CLANG depends on $(cc-option,-fsanitize=kcfi) help - This option enables Clang’s forward-edge Control Flow Integrity + This option enables Clang's forward-edge Control Flow Integrity (CFI) checking, where the compiler injects a runtime check to each indirect function call to ensure the target is a valid function with the correct static type. This restricts possible call targets and @@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB config PAGE_SHIFT int - default 12 if PAGE_SIZE_4KB - default 13 if PAGE_SIZE_8KB - default 14 if PAGE_SIZE_16KB - default 15 if PAGE_SIZE_32KB - default 16 if PAGE_SIZE_64KB - default 18 if PAGE_SIZE_256KB + default 12 if PAGE_SIZE_4KB + default 13 if PAGE_SIZE_8KB + default 14 if PAGE_SIZE_16KB + default 15 if PAGE_SIZE_32KB + default 16 if PAGE_SIZE_64KB + default 18 if PAGE_SIZE_256KB # This allows to use a set of generic functions to determine mmap base # address by giving priority to top-down scheme only if the process @@ -1597,4 +1597,16 @@ config FUNCTION_ALIGNMENT default 4 if FUNCTION_ALIGNMENT_4B default 0 +config CC_HAS_MIN_FUNCTION_ALIGNMENT + # Detect availability of the GCC option -fmin-function-alignment which + # guarantees minimal alignment for all functions, unlike + # -falign-functions which the compiler ignores for cold functions. + def_bool $(cc-option, -fmin-function-alignment=8) + +config CC_HAS_SANE_FUNCTION_ALIGNMENT + # Set if the guaranteed alignment with -fmin-function-alignment is + # available or extra care is required in the kernel. Clang provides + # strict alignment always, even with -falign-functions. + def_bool CC_HAS_MIN_FUNCTION_ALIGNMENT || CC_IS_CLANG + endmenu diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 4f490250d323..3afd042150f8 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -339,6 +339,7 @@ config ALPHA_EV4 bool depends on ALPHA_JENSEN || (ALPHA_SABLE && !ALPHA_GAMMA) || ALPHA_LYNX || ALPHA_NORITAKE && !ALPHA_PRIMO || ALPHA_MIKASA && !ALPHA_PRIMO || ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P_CH || ALPHA_XL || ALPHA_NONAME || ALPHA_EB66 || ALPHA_EB66P || ALPHA_P2K default y if !ALPHA_LYNX + default y if !ALPHA_EV5 config ALPHA_LCA bool @@ -366,10 +367,6 @@ config ALPHA_EV5 bool "EV5 CPU(s) (model 5/xxx)?" if ALPHA_LYNX default y if ALPHA_RX164 || ALPHA_RAWHIDE || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_SABLE && ALPHA_GAMMA || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR -config ALPHA_EV4 - bool - default y if ALPHA_LYNX && !ALPHA_EV5 - config ALPHA_CIA bool depends on ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR @@ -394,16 +391,12 @@ config ALPHA_PRIMO Say Y if you have an AS 1000 5/xxx or an AS 1000A 5/xxx. config ALPHA_GAMMA - bool "EV5 CPU(s) (model 5/xxx)?" - depends on ALPHA_SABLE + bool "EV5 CPU(s) (model 5/xxx)?" if ALPHA_SABLE + depends on ALPHA_SABLE || ALPHA_LYNX + default ALPHA_LYNX help Say Y if you have an AS 2000 5/xxx or an AS 2100 5/xxx. -config ALPHA_GAMMA - bool - depends on ALPHA_LYNX - default y - config ALPHA_T2 bool depends on ALPHA_SABLE || ALPHA_LYNX diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 702d97a9c304..b14aed3a17ab 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -505,8 +505,8 @@ source "arch/arm/mm/Kconfig" config IWMMXT bool "Enable iWMMXt support" - depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 || CPU_PJ4B - default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4 || CPU_PJ4B + depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK + default y if PXA27x || PXA3xx || ARCH_MMP help Enable support for iWMMXt context switching at run time if running on a CPU that supports it. diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index f1fc278081d0..7f47b4f335c3 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -90,9 +90,6 @@ config BACKTRACE_VERBOSE In most cases, say N here, unless you are intending to debug the kernel and have access to the kernel binary image. -config FRAME_POINTER - bool - config DEBUG_USER bool "Verbose user fault messages" help diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi index 1235a71c6abe..52869e68f833 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi @@ -666,7 +666,7 @@ bus-width = <4>; no-1-8-v; no-sdio; - no-emmc; + no-mmc; status = "okay"; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts index ba7231b364bb..7bab113ca6da 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts @@ -210,6 +210,7 @@ remote-endpoint = <&mipi_from_sensor>; clock-lanes = <0>; data-lanes = <1>; + link-frequencies = /bits/ 64 <330000000>; }; }; }; diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h new file mode 100644 index 000000000000..2189e507c8e0 --- /dev/null +++ b/arch/arm/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <asm/system_info.h> +#include <uapi/asm/mman.h> + +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 7f44e88d1f25..14a38cc67e0b 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -10,6 +10,7 @@ #include <uapi/asm/ptrace.h> #ifndef __ASSEMBLY__ +#include <linux/bitfield.h> #include <linux/types.h> struct pt_regs { @@ -35,8 +36,8 @@ struct svc_pt_regs { #ifndef CONFIG_CPU_V7M #define isa_mode(regs) \ - ((((regs)->ARM_cpsr & PSR_J_BIT) >> (__ffs(PSR_J_BIT) - 1)) | \ - (((regs)->ARM_cpsr & PSR_T_BIT) >> (__ffs(PSR_T_BIT)))) + (FIELD_GET(PSR_J_BIT, (regs)->ARM_cpsr) << 1 | \ + FIELD_GET(PSR_T_BIT, (regs)->ARM_cpsr)) #else #define isa_mode(regs) 1 /* Thumb */ #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 6a9de826ffd3..89a77e3f51d2 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -76,8 +76,6 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o -obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o -obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index a0218c4867b9..4a335d3c5969 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -18,18 +18,6 @@ #include <asm/assembler.h> #include "iwmmxt.h" -#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) -#define PJ4(code...) code -#define XSC(code...) -#elif defined(CONFIG_CPU_MOHAWK) || \ - defined(CONFIG_CPU_XSC3) || \ - defined(CONFIG_CPU_XSCALE) -#define PJ4(code...) -#define XSC(code...) code -#else -#error "Unsupported iWMMXt architecture" -#endif - #define MMX_WR0 (0x00) #define MMX_WR1 (0x08) #define MMX_WR2 (0x10) @@ -81,17 +69,13 @@ ENDPROC(iwmmxt_undef_handler) ENTRY(iwmmxt_task_enable) inc_preempt_count r10, r3 - XSC(mrc p15, 0, r2, c15, c1, 0) - PJ4(mrc p15, 0, r2, c1, c0, 2) + mrc p15, 0, r2, c15, c1, 0 @ CP0 and CP1 accessible? - XSC(tst r2, #0x3) - PJ4(tst r2, #0xf) + tst r2, #0x3 bne 4f @ if so no business here @ enable access to CP0 and CP1 - XSC(orr r2, r2, #0x3) - XSC(mcr p15, 0, r2, c15, c1, 0) - PJ4(orr r2, r2, #0xf) - PJ4(mcr p15, 0, r2, c1, c0, 2) + orr r2, r2, #0x3 + mcr p15, 0, r2, c15, c1, 0 ldr r3, =concan_owner ldr r2, [r0, #S_PC] @ current task pc value @@ -218,12 +202,9 @@ ENTRY(iwmmxt_task_disable) bne 1f @ no: quit @ enable access to CP0 and CP1 - XSC(mrc p15, 0, r4, c15, c1, 0) - XSC(orr r4, r4, #0x3) - XSC(mcr p15, 0, r4, c15, c1, 0) - PJ4(mrc p15, 0, r4, c1, c0, 2) - PJ4(orr r4, r4, #0xf) - PJ4(mcr p15, 0, r4, c1, c0, 2) + mrc p15, 0, r4, c15, c1, 0 + orr r4, r4, #0x3 + mcr p15, 0, r4, c15, c1, 0 mov r0, #0 @ nothing to load str r0, [r3] @ no more current owner @@ -232,10 +213,8 @@ ENTRY(iwmmxt_task_disable) bl concan_save @ disable access to CP0 and CP1 - XSC(bic r4, r4, #0x3) - XSC(mcr p15, 0, r4, c15, c1, 0) - PJ4(bic r4, r4, #0xf) - PJ4(mcr p15, 0, r4, c1, c0, 2) + bic r4, r4, #0x3 + mcr p15, 0, r4, c15, c1, 0 mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait @@ -330,11 +309,9 @@ ENDPROC(iwmmxt_task_restore) */ ENTRY(iwmmxt_task_switch) - XSC(mrc p15, 0, r1, c15, c1, 0) - PJ4(mrc p15, 0, r1, c1, c0, 2) + mrc p15, 0, r1, c15, c1, 0 @ CP0 and CP1 accessible? - XSC(tst r1, #0x3) - PJ4(tst r1, #0xf) + tst r1, #0x3 bne 1f @ yes: block them for next task ldr r2, =concan_owner @@ -344,10 +321,8 @@ ENTRY(iwmmxt_task_switch) retne lr @ no: leave Concan disabled 1: @ flip Concan access - XSC(eor r1, r1, #0x3) - XSC(mcr p15, 0, r1, c15, c1, 0) - PJ4(eor r1, r1, #0xf) - PJ4(mcr p15, 0, r1, c1, c0, 2) + eor r1, r1, #0x3 + mcr p15, 0, r1, c15, c1, 0 mrc p15, 0, r1, c2, c0, 0 sub pc, lr, r1, lsr #32 @ cpwait and return diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c deleted file mode 100644 index 4bca8098c4ff..000000000000 --- a/arch/arm/kernel/pj4-cp0.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/arch/arm/kernel/pj4-cp0.c - * - * PJ4 iWMMXt coprocessor context switching and handling - * - * Copyright (c) 2010 Marvell International Inc. - */ - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/io.h> -#include <asm/thread_notify.h> -#include <asm/cputype.h> - -static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t) -{ - struct thread_info *thread = t; - - switch (cmd) { - case THREAD_NOTIFY_FLUSH: - /* - * flush_thread() zeroes thread->fpstate, so no need - * to do anything here. - * - * FALLTHROUGH: Ensure we don't try to overwrite our newly - * initialised state information on the first fault. - */ - - case THREAD_NOTIFY_EXIT: - iwmmxt_task_release(thread); - break; - - case THREAD_NOTIFY_SWITCH: - iwmmxt_task_switch(thread); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block __maybe_unused iwmmxt_notifier_block = { - .notifier_call = iwmmxt_do, -}; - - -static u32 __init pj4_cp_access_read(void) -{ - u32 value; - - __asm__ __volatile__ ( - "mrc p15, 0, %0, c1, c0, 2\n\t" - : "=r" (value)); - return value; -} - -static void __init pj4_cp_access_write(u32 value) -{ - u32 temp; - - __asm__ __volatile__ ( - "mcr p15, 0, %1, c1, c0, 2\n\t" -#ifdef CONFIG_THUMB2_KERNEL - "isb\n\t" -#else - "mrc p15, 0, %0, c1, c0, 2\n\t" - "mov %0, %0\n\t" - "sub pc, pc, #4\n\t" -#endif - : "=r" (temp) : "r" (value)); -} - -static int __init pj4_get_iwmmxt_version(void) -{ - u32 cp_access, wcid; - - cp_access = pj4_cp_access_read(); - pj4_cp_access_write(cp_access | 0xf); - - /* check if coprocessor 0 and 1 are available */ - if ((pj4_cp_access_read() & 0xf) != 0xf) { - pj4_cp_access_write(cp_access); - return -ENODEV; - } - - /* read iWMMXt coprocessor id register p1, c0 */ - __asm__ __volatile__ ("mrc p1, 0, %0, c0, c0, 0\n" : "=r" (wcid)); - - pj4_cp_access_write(cp_access); - - /* iWMMXt v1 */ - if ((wcid & 0xffffff00) == 0x56051000) - return 1; - /* iWMMXt v2 */ - if ((wcid & 0xffffff00) == 0x56052000) - return 2; - - return -EINVAL; -} - -/* - * Disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy - * switch code handle iWMMXt context switching. - */ -static int __init pj4_cp0_init(void) -{ - u32 __maybe_unused cp_access; - int vers; - - if (!cpu_is_pj4()) - return 0; - - vers = pj4_get_iwmmxt_version(); - if (vers < 0) - return 0; - -#ifndef CONFIG_IWMMXT - pr_info("PJ4 iWMMXt coprocessor detected, but kernel support is missing.\n"); -#else - cp_access = pj4_cp_access_read() & ~0xf; - pj4_cp_access_write(cp_access); - - pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers); - elf_hwcap |= HWCAP_IWMMXT; - thread_register_notifier(&iwmmxt_notifier_block); - register_iwmmxt_undef_handler(); -#endif - - return 0; -} - -late_initcall(pj4_cp0_init); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3bad79db5d6e..72c82a4d63ac 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -220,7 +220,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, unsigned int fp, mode; int ok = 1; - printk("%sBacktrace: ", loglvl); + printk("%sCall trace: ", loglvl); if (!tsk) tsk = current; diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 9d2192156087..f60547dadc93 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -524,6 +524,8 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk, { struct stackframe frame; + printk("%sCall trace: ", loglvl); + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 31755a378c73..ff2a4a4d8220 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = { static struct gpiod_lookup_table tusb_gpio_table = { .dev_id = "musb-tusb", .table = { - GPIO_LOOKUP("gpio-0-15", 0, "enable", - GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-48-63", 10, "int", - GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH), { } }, }; @@ -140,12 +138,11 @@ static int slot1_cover_open; static int slot2_cover_open; static struct device *mmc_device; -static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = { +static struct gpiod_lookup_table nokia800_mmc_gpio_table = { .dev_id = "mmci-omap.0", .table = { /* Slot switch, GPIO 96 */ - GPIO_LOOKUP("gpio-80-111", 16, - "switch", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH), { } }, }; @@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = { static struct gpiod_lookup_table nokia810_mmc_gpio_table = { .dev_id = "mmci-omap.0", .table = { + /* Slot switch, GPIO 96 */ + GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH), /* Slot index 1, VSD power, GPIO 23 */ - GPIO_LOOKUP_IDX("gpio-16-31", 7, - "vsd", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH), /* Slot index 1, VIO power, GPIO 9 */ - GPIO_LOOKUP_IDX("gpio-0-15", 9, - "vio", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH), { } }, }; @@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC]; static void __init n8x0_mmc_init(void) { - gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table); - if (board_is_n810()) { mmc1_data.slots[0].name = "external"; @@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void) mmc1_data.slots[1].name = "internal"; mmc1_data.slots[1].ban_openended = 1; gpiod_add_lookup_table(&nokia810_mmc_gpio_table); + } else { + gpiod_add_lookup_table(&nokia800_mmc_gpio_table); } mmc1_data.nr_slots = 2; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 07565b593ed6..439dc6a26bb9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -25,6 +25,13 @@ #include "fault.h" +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + unsigned long addr = (unsigned long)unsafe_src; + + return addr >= TASK_SIZE && ULONG_MAX - addr >= size; +} + #ifdef CONFIG_MMU /* @@ -588,6 +595,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) return; + pr_alert("8<--- cut here ---\n"); pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index d19d140a10c7..0749cf8a6637 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -296,6 +296,9 @@ void __sync_icache_dcache(pte_t pteval) return; folio = page_folio(pfn_to_page(pfn)); + if (folio_test_reserved(folio)) + return; + if (cache_is_vipt_aliasing()) mapping = folio_flush_mapping(folio); else diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 4c3d78691279..e8c6f4be0ce1 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -418,7 +418,7 @@ static void set_section_perms(struct section_perm *perms, int n, bool set, } -/** +/* * update_sections_early intended to be called only through stop_machine * framework and executed by only one CPU while all other CPUs will spin and * wait, so no locking is required in this function. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 77e05d4959f2..7b11c98b3e84 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -120,6 +120,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) + select CPUMASK_OFFSTACK if NR_CPUS > 256 select CRC32 select DCACHE_WORD_ACCESS select DYNAMIC_FTRACE if FUNCTION_TRACER @@ -1425,7 +1426,7 @@ config SCHED_SMT config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 - default "256" + default "512" config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 3c42240e78e2..4aaf5a0c1ed8 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>; fsl,usbphy = <&usbphy1>; fsl,usbmisc = <&usbmisc1 0>; - clocks = <&usb2_lpcg 0>; + clocks = <&usb2_lpcg IMX_LPCG_CLK_6>; ahb-burst-config = <0x0>; tx-burst-size-dword = <0x10>; rx-burst-size-dword = <0x10>; @@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 { usbphy1: usbphy@5b100000 { compatible = "fsl,imx7ulp-usbphy"; reg = <0x5b100000 0x1000>; - clocks = <&usb2_lpcg 1>; + clocks = <&usb2_lpcg IMX_LPCG_CLK_7>; power-domains = <&pd IMX_SC_R_USB_0_PHY>; status = "disabled"; }; @@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>; + <&sdhc0_lpcg IMX_LPCG_CLK_5>, + <&sdhc0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; @@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>; + <&sdhc1_lpcg IMX_LPCG_CLK_5>, + <&sdhc1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; @@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>; + <&sdhc2_lpcg IMX_LPCG_CLK_5>, + <&sdhc2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index cab3468b1875..f7a91d43a0ff 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi0_lpcg 0>, - <&spi0_lpcg 1>; + clocks = <&spi0_lpcg IMX_LPCG_CLK_0>, + <&spi0_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi1_lpcg 0>, - <&spi1_lpcg 1>; + clocks = <&spi1_lpcg IMX_LPCG_CLK_0>, + <&spi1_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi2_lpcg 0>, - <&spi2_lpcg 1>; + clocks = <&spi2_lpcg IMX_LPCG_CLK_0>, + <&spi2_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi3_lpcg 0>, - <&spi3_lpcg 1>; + clocks = <&spi3_lpcg IMX_LPCG_CLK_0>, + <&spi3_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 { compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm"; reg = <0x5a190000 0x1000>; interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&adma_pwm_lpcg 1>, - <&adma_pwm_lpcg 0>; + clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>, + <&adma_pwm_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a880000 0x10000>; interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&adc0_lpcg 0>, - <&adc0_lpcg 1>; + clocks = <&adc0_lpcg IMX_LPCG_CLK_0>, + <&adc0_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a890000 0x10000>; interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&adc1_lpcg 0>, - <&adc1_lpcg 1>; + clocks = <&adc1_lpcg IMX_LPCG_CLK_0>, + <&adc1_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a8d0000 0x10000>; interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; @@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 { * CAN1 shares CAN0's clock and to enable CAN0's clock it * has to be powered on. */ - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; @@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 { * CAN2 shares CAN0's clock and to enable CAN0's clock it * has to be powered on. */ - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 7e510b21bbac..764c1a08e3b1 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d000000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm0_lpcg 4>, - <&pwm0_lpcg 1>; + clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>, + <&pwm0_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d010000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm1_lpcg 4>, - <&pwm1_lpcg 1>; + clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>, + <&pwm1_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d020000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm2_lpcg 4>, - <&pwm2_lpcg 1>; + clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>, + <&pwm2_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d030000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm3_lpcg 4>, - <&pwm3_lpcg 1>; + clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>, + <&pwm3_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index 41c79d2ebdd6..f24b14744799 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -14,6 +14,7 @@ pinctrl-0 = <&pinctrl_usbcon1>; type = "micro"; label = "otg"; + vbus-supply = <®_usb1_vbus>; id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; port { @@ -183,7 +184,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index d5c400b355af..f5491a608b2f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -14,6 +14,7 @@ pinctrl-0 = <&pinctrl_usbcon1>; type = "micro"; label = "otg"; + vbus-supply = <®_usb1_vbus>; id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; port { @@ -202,7 +203,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi index 11626fae5f97..aa9f28c4431d 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi @@ -153,15 +153,15 @@ }; &flexcan2 { - clocks = <&can1_lpcg 1>, - <&can1_lpcg 0>; + clocks = <&can1_lpcg IMX_LPCG_CLK_4>, + <&can1_lpcg IMX_LPCG_CLK_0>; assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>; fsl,clk-source = /bits/ 8 <1>; }; &flexcan3 { - clocks = <&can2_lpcg 1>, - <&can2_lpcg 0>; + clocks = <&can2_lpcg IMX_LPCG_CLK_4>, + <&can2_lpcg IMX_LPCG_CLK_0>; assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>; fsl,clk-source = /bits/ 8 <1>; }; diff --git a/arch/arm64/boot/dts/qcom/pm6150.dtsi b/arch/arm64/boot/dts/qcom/pm6150.dtsi index ddbaf7280b03..11158c2bd524 100644 --- a/arch/arm64/boot/dts/qcom/pm6150.dtsi +++ b/arch/arm64/boot/dts/qcom/pm6150.dtsi @@ -63,6 +63,52 @@ }; }; + pm6150_vbus: usb-vbus-regulator@1100 { + compatible = "qcom,pm6150-vbus-reg, + qcom,pm8150b-vbus-reg"; + reg = <0x1100>; + status = "disabled"; + }; + + pm6150_typec: typec@1500 { + compatible = "qcom,pm6150-typec, + qcom,pm8150b-typec"; + reg = <0x1500>, <0x1700>; + interrupts = <0x0 0x15 0x00 IRQ_TYPE_EDGE_RISING>, + <0x0 0x15 0x01 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x15 0x02 IRQ_TYPE_EDGE_RISING>, + <0x0 0x15 0x03 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x15 0x04 IRQ_TYPE_EDGE_RISING>, + <0x0 0x15 0x05 IRQ_TYPE_EDGE_RISING>, + <0x0 0x15 0x06 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x15 0x07 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x00 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x01 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x02 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x03 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x04 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x05 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x06 IRQ_TYPE_EDGE_RISING>, + <0x0 0x17 0x07 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "or-rid-detect-change", + "vpd-detect", + "cc-state-change", + "vconn-oc", + "vbus-change", + "attach-detach", + "legacy-cable-detect", + "try-snk-src-detect", + "sig-tx", + "sig-rx", + "msg-tx", + "msg-rx", + "msg-tx-failed", + "msg-tx-discarded", + "msg-rx-discarded", + "fr-swap"; + status = "disabled"; + }; + pm6150_temp: temp-alarm@2400 { compatible = "qcom,spmi-temp-alarm"; reg = <0x2400>; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index f3a6da8b2890..5260c63db007 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -944,6 +944,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c index b54c34793701..f1ebc025e1df 100644 --- a/arch/arm64/hyperv/hv_core.c +++ b/arch/arm64/hyperv/hv_core.c @@ -160,22 +160,22 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) return; panic_reported = true; - guest_id = hv_get_vpreg(HV_REGISTER_GUEST_OSID); + guest_id = hv_get_vpreg(HV_REGISTER_GUEST_OS_ID); /* * Hyper-V provides the ability to store only 5 values. * Pick the passed in error value, the guest_id, the PC, * and the SP. */ - hv_set_vpreg(HV_REGISTER_CRASH_P0, err); - hv_set_vpreg(HV_REGISTER_CRASH_P1, guest_id); - hv_set_vpreg(HV_REGISTER_CRASH_P2, regs->pc); - hv_set_vpreg(HV_REGISTER_CRASH_P3, regs->sp); - hv_set_vpreg(HV_REGISTER_CRASH_P4, 0); + hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P0, err); + hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P1, guest_id); + hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P2, regs->pc); + hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P3, regs->sp); + hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P4, 0); /* * Let Hyper-V know there is crash data available */ - hv_set_vpreg(HV_REGISTER_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); + hv_set_vpreg(HV_REGISTER_GUEST_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); } EXPORT_SYMBOL_GPL(hyperv_report_panic); diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index f1b8a04ee9f2..b1a4de4eee29 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -19,10 +19,17 @@ static bool hyperv_initialized; +int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) +{ + hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION, + (struct hv_get_vp_registers_output *)info); + + return 0; +} + static int __init hyperv_init(void) { struct hv_get_vp_registers_output result; - u32 a, b, c, d; u64 guest_id; int ret; @@ -39,7 +46,7 @@ static int __init hyperv_init(void) /* Setup the guest ID */ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); - hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id); + hv_set_vpreg(HV_REGISTER_GUEST_OS_ID, guest_id); /* Get the features and hints from Hyper-V */ hv_get_vpreg_128(HV_REGISTER_FEATURES, &result); @@ -54,15 +61,6 @@ static int __init hyperv_init(void) ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints, ms_hyperv.misc_features); - /* Get information about the Hyper-V host version */ - hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION, &result); - a = result.as32.a; - b = result.as32.b; - c = result.as32.c; - d = result.as32.d; - pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", - b >> 16, b & 0xFFFF, a, d & 0xFFFFFF, c, d >> 24); - ret = hv_common_init(); if (ret) return ret; @@ -74,6 +72,8 @@ static int __init hyperv_init(void) return ret; } + ms_hyperv_late_init(); + hyperv_initialized = true; return 0; } diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h index bc6c7ac934a1..bc30aadedfe9 100644 --- a/arch/arm64/include/asm/hyperv-tlfs.h +++ b/arch/arm64/include/asm/hyperv-tlfs.h @@ -22,14 +22,6 @@ */ /* - * These Hyper-V registers provide information equivalent to the CPUID - * instruction on x86/x64. - */ -#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */ -#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */ -#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */ - -/* * Group C Features. See the asm-generic version of hyperv-tlfs.h * for a description of Feature Groups. */ @@ -41,28 +33,29 @@ #define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) /* - * Synthetic register definitions equivalent to MSRs on x86/x64 + * To support arch-generic code calling hv_set/get_register: + * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl + * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall */ -#define HV_REGISTER_CRASH_P0 0x00000210 -#define HV_REGISTER_CRASH_P1 0x00000211 -#define HV_REGISTER_CRASH_P2 0x00000212 -#define HV_REGISTER_CRASH_P3 0x00000213 -#define HV_REGISTER_CRASH_P4 0x00000214 -#define HV_REGISTER_CRASH_CTL 0x00000215 +#define HV_MSR_CRASH_P0 (HV_REGISTER_GUEST_CRASH_P0) +#define HV_MSR_CRASH_P1 (HV_REGISTER_GUEST_CRASH_P1) +#define HV_MSR_CRASH_P2 (HV_REGISTER_GUEST_CRASH_P2) +#define HV_MSR_CRASH_P3 (HV_REGISTER_GUEST_CRASH_P3) +#define HV_MSR_CRASH_P4 (HV_REGISTER_GUEST_CRASH_P4) +#define HV_MSR_CRASH_CTL (HV_REGISTER_GUEST_CRASH_CTL) -#define HV_REGISTER_GUEST_OSID 0x00090002 -#define HV_REGISTER_VP_INDEX 0x00090003 -#define HV_REGISTER_TIME_REF_COUNT 0x00090004 -#define HV_REGISTER_REFERENCE_TSC 0x00090017 +#define HV_MSR_VP_INDEX (HV_REGISTER_VP_INDEX) +#define HV_MSR_TIME_REF_COUNT (HV_REGISTER_TIME_REF_COUNT) +#define HV_MSR_REFERENCE_TSC (HV_REGISTER_REFERENCE_TSC) -#define HV_REGISTER_SINT0 0x000A0000 -#define HV_REGISTER_SCONTROL 0x000A0010 -#define HV_REGISTER_SIEFP 0x000A0012 -#define HV_REGISTER_SIMP 0x000A0013 -#define HV_REGISTER_EOM 0x000A0014 +#define HV_MSR_SINT0 (HV_REGISTER_SINT0) +#define HV_MSR_SCONTROL (HV_REGISTER_SCONTROL) +#define HV_MSR_SIEFP (HV_REGISTER_SIEFP) +#define HV_MSR_SIMP (HV_REGISTER_SIMP) +#define HV_MSR_EOM (HV_REGISTER_EOM) -#define HV_REGISTER_STIMER0_CONFIG 0x000B0000 -#define HV_REGISTER_STIMER0_COUNT 0x000B0001 +#define HV_MSR_STIMER0_CONFIG (HV_REGISTER_STIMER0_CONFIG) +#define HV_MSR_STIMER0_COUNT (HV_REGISTER_STIMER0_COUNT) union hv_msi_entry { u64 as_uint64[2]; diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h index 20070a847304..a975e1a689dd 100644 --- a/arch/arm64/include/asm/mshyperv.h +++ b/arch/arm64/include/asm/mshyperv.h @@ -31,12 +31,12 @@ void hv_set_vpreg(u32 reg, u64 value); u64 hv_get_vpreg(u32 reg); void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result); -static inline void hv_set_register(unsigned int reg, u64 value) +static inline void hv_set_msr(unsigned int reg, u64 value) { hv_set_vpreg(reg, value); } -static inline u64 hv_get_register(unsigned int reg) +static inline u64 hv_get_msr(unsigned int reg) { return hv_get_vpreg(reg); } diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3b0e8248e1a4..a75de2665d84 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -161,12 +161,18 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only + * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If + * 'pages' is more than that, you must iterate over the overall + * range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + int __pages = min((pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -379,10 +385,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * 3. If there is 1 page remaining, flush it through non-range operations. Range * operations can only span an even number of pages. We save this for last to * ensure 64KB start alignment is maintained for the LPA2 case. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. */ #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user, lpa2) \ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ce08b744aaab..cb68adcabe07 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -289,8 +289,28 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) adr_l x1, __hyp_text_end adr_l x2, dcache_clean_poc blr x2 + + mov_q x0, INIT_SCTLR_EL2_MMU_OFF + pre_disable_mmu_workaround + msr sctlr_el2, x0 + isb 0: mov_q x0, HCR_HOST_NVHE_FLAGS + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be + * RES1 in that case. Publish the E2H bit early so that + * it can be picked up by the init_el2_state macro. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f + + orr x0, x0, #HCR_E2H +1: msr hcr_el2, x0 isb @@ -303,30 +323,16 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) mov_q x1, INIT_SCTLR_EL1_MMU_OFF - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x0, SYS_ID_AA64MMFR4_EL1 - ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH - tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - mrs x0, hcr_el2 and x0, x0, #HCR_E2H cbz x0, 2f -1: + /* Set a sane SCTLR_EL1, the VHE way */ - pre_disable_mmu_workaround msr_s SYS_SCTLR_EL12, x1 mov x2, #BOOT_CPU_FLAG_E2H b 3f 2: - pre_disable_mmu_workaround msr sctlr_el1, x1 mov x2, xzr 3: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 162b030ab9da..0d022599eb61 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -761,7 +761,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, { unsigned int vq; bool active; - bool fpsimd_only; enum vec_type task_type; memset(header, 0, sizeof(*header)); @@ -777,12 +776,10 @@ static void sve_init_header_from_task(struct user_sve_header *header, case ARM64_VEC_SVE: if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); break; case ARM64_VEC_SME: if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = false; break; default: WARN_ON_ONCE(1); @@ -790,7 +787,7 @@ static void sve_init_header_from_task(struct user_sve_header *header, } if (active) { - if (fpsimd_only) { + if (target->thread.fp_type == FP_STATE_FPSIMD) { header->flags |= SVE_PT_REGS_FPSIMD; } else { header->flags |= SVE_PT_REGS_SVE; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3dee5490eea9..c4a0a35e02c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - if (is_protected_kvm_enabled()) { - kvm_info("Protected nVHE mode initialized successfully\n"); - } else if (in_hyp_mode) { - kvm_info("VHE mode initialized successfully\n"); - } else { - char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n'; - kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode); - } + kvm_info("%s%sVHE mode initialized successfully\n", + in_hyp_mode ? "" : (is_protected_kvm_enabled() ? + "Protected " : "Hyp "), + in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? + "h" : "n")); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index a60fb13e2192..2fc68da4036d 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt, false); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3fae5830f8d2..5a59ef88b646 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN); } else { if (ctx->end - ctx->addr < granule) return -EINVAL; @@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * Perform the appropriate TLB invalidation based on the * evicted pte value (if any). */ - if (kvm_pte_table(ctx->old, ctx->level)) - kvm_tlb_flush_vmid_range(mmu, ctx->addr, - kvm_granule_size(ctx->level)); - else if (kvm_pte_valid(ctx->old)) + if (kvm_pte_table(ctx->old, ctx->level)) { + u64 size = kvm_granule_size(ctx->level); + u64 addr = ALIGN_DOWN(ctx->addr, size); + + kvm_tlb_flush_vmid_range(mmu, addr, size); + } else if (kvm_pte_valid(ctx->old)) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + } } if (stage2_pte_is_counted(ctx->old)) @@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - if (!stage2_unmap_defer_tlb_flush(pgt)) - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, - ctx->addr, ctx->level); + if (kvm_pte_table(ctx->old, ctx->level)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + TLBI_TTL_UNKNOWN); + } else if (!stage2_unmap_defer_tlb_flush(pgt)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + ctx->level); + } } mm_ops->put_page(ctx->ptep); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index b32e2940df7d..1a60b95381e8 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18680771cdb0..dc04bc767865 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (esr_fsc_is_permission_fault(esr)) { + if (esr_fsc_is_translation_fault(esr)) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0f0e10bb0a95..b872b003a55f 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -276,7 +276,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, pte_t *ptep = NULL; pgdp = pgd_offset(mm, addr); - p4dp = p4d_offset(pgdp, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (!p4dp) + return NULL; + pudp = pud_alloc(mm, p4dp, addr); if (!pudp) return NULL; diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0c4e3ecf989d..0e270a1c51e6 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -219,9 +219,6 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); - if (!can_set_direct_map()) - return true; - pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c5b461dda438..122021f9bdfc 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -943,7 +943,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: - emit(A64_REV32(is64, dst, dst), ctx); + emit(A64_REV32(0, dst, dst), ctx); /* upper 32 bits already cleared */ break; case 64: @@ -1256,7 +1256,7 @@ emit_cond_jmp: } else { emit_a64_mov_i(1, tmp, off, ctx); if (sign_extend) - emit(A64_LDRSW(dst, src_adj, off_adj), ctx); + emit(A64_LDRSW(dst, src, tmp), ctx); else emit(A64_LDR32(dst, src, tmp), ctx); } diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 1414052e7d6b..e233b5efa276 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -7,11 +7,13 @@ config HEXAGON select ARCH_32BIT_OFF_T select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_PREEMPT + select ARCH_WANT_FRAME_POINTERS select DMA_GLOBAL_POOL select HAVE_PAGE_SIZE_4KB select HAVE_PAGE_SIZE_16KB select HAVE_PAGE_SIZE_64KB select HAVE_PAGE_SIZE_256KB + select FRAME_POINTER # Other pending projects/to-do items. # select HAVE_REGS_AND_STACK_ACCESS_API # select HAVE_HW_BREAKPOINT if PERF_EVENTS @@ -23,6 +25,7 @@ config HEXAGON select HAVE_PERF_EVENTS # GENERIC_ALLOCATOR is used by dma_alloc_coherent() select GENERIC_ALLOCATOR + select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK @@ -47,9 +50,6 @@ config HEXAGON_PHYS_OFFSET help Platforms that don't load the kernel at zero set this. -config FRAME_POINTER - def_bool y - config LOCKDEP_SUPPORT def_bool y @@ -62,12 +62,6 @@ config MMU config GENERIC_CSUM def_bool y -# -# Use the generic interrupt handling code in kernel/irq/: -# -config GENERIC_IRQ_PROBE - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 1140051a0c45..1150b77fa281 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -63,6 +63,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index c139d0d72802..a5f300ec6f28 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -15,6 +15,7 @@ config LOONGARCH select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS @@ -104,6 +105,7 @@ config LOONGARCH select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT @@ -133,20 +135,24 @@ config LOONGARCH select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES + select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC select HAVE_RETHOOK select HAVE_RSEQ select HAVE_RUST select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SETUP_PER_CPU_AREA if NUMA + select HAVE_STACK_VALIDATION if HAVE_OBJTOOL select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ @@ -624,6 +630,8 @@ config RANDOMIZE_BASE_MAX_OFFSET This is limited by the size of the lower address memory, 256MB. +source "kernel/livepatch/Kconfig" + endmenu config ARCH_SELECT_MEMORY_MODEL diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 8d36aab53008..98d60630c3d4 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -26,4 +26,15 @@ config UNWINDER_PROLOGUE Some of the addresses it reports may be incorrect (but better than the Guess unwinder). +config UNWINDER_ORC + bool "ORC unwinder" + select OBJTOOL + help + This option enables the ORC (Oops Rewind Capability) unwinder for + unwinding kernel stack traces. It uses a custom data format which is + a simplified version of the DWARF Call Frame Information standard. + + Enabling this option will increase the kernel's runtime memory usage + by roughly 2-4MB, depending on your kernel config. + endchoice diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index fa4fb09909ae..df6caf79537a 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -26,6 +26,18 @@ endif 32bit-emul = elf32loongarch 64bit-emul = elf64loongarch +ifdef CONFIG_UNWINDER_ORC +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h +orc_hash_sh := $(srctree)/scripts/orc_hash.sh +targets += $(orc_hash_h) +quiet_cmd_orc_hash = GEN $@ + cmd_orc_hash = mkdir -p $(dir $@); \ + $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@ +$(orc_hash_h): $(srctree)/arch/loongarch/include/asm/orc_types.h $(orc_hash_sh) FORCE + $(call if_changed,orc_hash) +archprepare: $(orc_hash_h) +endif + ifdef CONFIG_DYNAMIC_FTRACE KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY CC_FLAGS_FTRACE := -fpatchable-function-entry=2 @@ -72,8 +84,6 @@ KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) -KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) -KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel @@ -82,6 +92,15 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs endif +KBUILD_AFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) +KBUILD_CFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) +KBUILD_AFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub) +KBUILD_CFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub) + +ifdef CONFIG_OBJTOOL +KBUILD_CFLAGS += -fno-jump-tables +endif + KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index 49a70f8c3cab..b6aeb1f70e2a 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -100,6 +100,13 @@ #size-cells = <2>; dma-coherent; + isa@18000000 { + compatible = "isa"; + #size-cells = <1>; + #address-cells = <2>; + ranges = <1 0x0 0x0 0x18000000 0x4000>; + }; + liointc0: interrupt-controller@1fe01400 { compatible = "loongson,liointc-2.0"; reg = <0x0 0x1fe01400 0x0 0x40>, diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts index dca91caf895e..74b99bd234cc 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts @@ -61,12 +61,45 @@ &gmac0 { status = "okay"; + + phy-mode = "gmii"; + phy-handle = <&phy0>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <2>; + }; + }; }; &gmac1 { status = "okay"; + + phy-mode = "gmii"; + phy-handle = <&phy1>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy1: ethernet-phy@1 { + reg = <2>; + }; + }; }; &gmac2 { status = "okay"; + + phy-mode = "rgmii"; + phy-handle = <&phy2>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy2: ethernet-phy@2 { + reg = <0>; + }; + }; }; diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi index a231949b5f55..9eab2d02cbe8 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -51,6 +51,13 @@ #address-cells = <2>; #size-cells = <2>; + isa@18400000 { + compatible = "isa"; + #size-cells = <1>; + #address-cells = <2>; + ranges = <1 0x0 0x0 0x18400000 0x4000>; + }; + pmc: power-management@100d0000 { compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon"; reg = <0x0 0x100d0000 0x0 0x58>; @@ -109,6 +116,8 @@ msi: msi-controller@1fe01140 { compatible = "loongson,pch-msi-1.0"; reg = <0x0 0x1fe01140 0x0 0x8>; + interrupt-controller; + #interrupt-cells = <1>; msi-controller; loongson,msi-base-vec = <64>; loongson,msi-num-vecs = <192>; @@ -140,27 +149,34 @@ #address-cells = <3>; #size-cells = <2>; device_type = "pci"; + msi-parent = <&msi>; bus-range = <0x0 0xff>; - ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>, + ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>, <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>; gmac0: ethernet@3,0 { reg = <0x1800 0x0 0x0 0x0 0x0>; - interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, + <13 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; gmac1: ethernet@3,1 { reg = <0x1900 0x0 0x0 0x0 0x0>; - interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, + <15 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; gmac2: ethernet@3,2 { reg = <0x1a00 0x0 0x0 0x0 0x0>; - interrupts = <17 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH>, + <18 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c index a49e507af38c..3eebea3a7b47 100644 --- a/arch/loongarch/crypto/crc32-loongarch.c +++ b/arch/loongarch/crypto/crc32-loongarch.c @@ -44,7 +44,6 @@ static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) CRC32(crc, value, w); p += sizeof(u32); - len -= sizeof(u32); } if (len & sizeof(u16)) { @@ -80,7 +79,6 @@ static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) CRC32C(crc, value, w); p += sizeof(u32); - len -= sizeof(u32); } if (len & sizeof(u16)) { diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 93783fa24f6e..2dbec7853ae8 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -1,9 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 +generated-y += orc_hash.h + generic-y += dma-contiguous.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h +generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index b24437e28c6e..7bd47d65bf7a 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -11,6 +11,7 @@ #define _ASM_ADDRSPACE_H #include <linux/const.h> +#include <linux/sizes.h> #include <asm/loongarch.h> diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h index d4ca3ba25418..08388876ade4 100644 --- a/arch/loongarch/include/asm/bug.h +++ b/arch/loongarch/include/asm/bug.h @@ -44,6 +44,7 @@ do { \ instrumentation_begin(); \ __BUG_FLAGS(BUGFLAG_WARNING|(flags)); \ + annotate_reachable(); \ instrumentation_end(); \ } while (0) diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index 80bd74106985..f8754d08a31a 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -37,8 +37,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 - #define flush_cache_all() do { } while (0) #define flush_cache_mm(mm) do { } while (0) #define flush_cache_dup_mm(mm) do { } while (0) @@ -47,7 +45,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) #define flush_icache_user_page(vma, page, addr, len) do { } while (0) -#define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h index af74a3fdcad1..c6d20736fd92 100644 --- a/arch/loongarch/include/asm/exception.h +++ b/arch/loongarch/include/asm/exception.h @@ -6,6 +6,8 @@ #include <asm/ptrace.h> #include <linux/kprobes.h> +extern void *exception_table[]; + void show_registers(struct pt_regs *regs); asmlinkage void cache_parity_error(void); diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index c486c2341b66..c2f9979b2979 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -14,11 +14,6 @@ #include <asm/pgtable-bits.h> #include <asm/string.h> -/* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT) - extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); extern void __init early_iounmap(void __iomem *addr, unsigned long size); @@ -71,6 +66,23 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l)) #define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l)) +#define __io_aw() mmiowb() + +#ifdef CONFIG_KFENCE +#define virt_to_phys(kaddr) \ +({ \ + (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \ + page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\ +}) + +#define phys_to_virt(paddr) \ +({ \ + extern char *__kfence_pool; \ + (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \ + page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\ +}) +#endif + #include <asm-generic/io.h> #define ARCH_HAS_VALID_PHYS_ADDR_RANGE diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 6c82aea1c993..a6a5760da3a3 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -16,6 +16,7 @@ static inline bool arch_kfence_init_pool(void) { int err; + char *kaddr, *vaddr; char *kfence_pool = __kfence_pool; struct vm_struct *area; @@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void) return false; } + kaddr = kfence_pool; + vaddr = __kfence_pool; + while (kaddr < kfence_pool + KFENCE_POOL_SIZE) { + set_page_address(virt_to_page(kaddr), vaddr); + kaddr += PAGE_SIZE; + vaddr += PAGE_SIZE; + } + return true; } diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 2ecd82bb64e1..f33f3fd32ecc 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -6,6 +6,7 @@ #define _ASM_MODULE_H #include <asm/inst.h> +#include <asm/orc_types.h> #include <asm-generic/module.h> #define RELA_STACK_DEPTH 16 @@ -21,6 +22,12 @@ struct mod_arch_specific { struct mod_section plt; struct mod_section plt_idx; +#ifdef CONFIG_UNWINDER_ORC + unsigned int num_orcs; + int *orc_unwind_ip; + struct orc_entry *orc_unwind; +#endif + /* For CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; }; diff --git a/arch/loongarch/include/asm/orc_header.h b/arch/loongarch/include/asm/orc_header.h new file mode 100644 index 000000000000..f9d509c3fd70 --- /dev/null +++ b/arch/loongarch/include/asm/orc_header.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _ORC_HEADER_H +#define _ORC_HEADER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/orc_hash.h> + +/* + * The header is currently a 20-byte hash of the ORC entry definition; see + * scripts/orc_hash.sh. + */ +#define ORC_HEADER \ + __used __section(".orc_header") __aligned(4) \ + static const u8 orc_header[] = { ORC_HASH } + +#endif /* _ORC_HEADER_H */ diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h new file mode 100644 index 000000000000..b02e6357def4 --- /dev/null +++ b/arch/loongarch/include/asm/orc_lookup.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ORC_LOOKUP_H +#define _ORC_LOOKUP_H + +/* + * This is a lookup table for speeding up access to the .orc_unwind table. + * Given an input address offset, the corresponding lookup table entry + * specifies a subset of the .orc_unwind table to search. + * + * Each block represents the end of the previous range and the start of the + * next range. An extra block is added to give the last range an end. + * + * The block size should be a power of 2 to avoid a costly 'div' instruction. + * + * A block size of 256 was chosen because it roughly doubles unwinder + * performance while only adding ~5% to the ORC data footprint. + */ +#define LOOKUP_BLOCK_ORDER 8 +#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER) + +#ifndef LINKER_SCRIPT + +extern unsigned int orc_lookup[]; +extern unsigned int orc_lookup_end[]; + +#define LOOKUP_START_IP (unsigned long)_stext +#define LOOKUP_STOP_IP (unsigned long)_etext + +#endif /* LINKER_SCRIPT */ + +#endif /* _ORC_LOOKUP_H */ diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h new file mode 100644 index 000000000000..caf1f71a1057 --- /dev/null +++ b/arch/loongarch/include/asm/orc_types.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ORC_TYPES_H +#define _ORC_TYPES_H + +#include <linux/types.h> + +/* + * The ORC_REG_* registers are base registers which are used to find other + * registers on the stack. + * + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the + * address of the previous frame: the caller's SP before it called the current + * function. + * + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in + * the current frame. + * + * The most commonly used base registers are SP and FP -- which the previous SP + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is + * usually based on. + * + * The rest of the base registers are needed for special cases like entry code + * and GCC realigned stacks. + */ +#define ORC_REG_UNDEFINED 0 +#define ORC_REG_PREV_SP 1 +#define ORC_REG_SP 2 +#define ORC_REG_FP 3 +#define ORC_REG_MAX 4 + +#define ORC_TYPE_UNDEFINED 0 +#define ORC_TYPE_END_OF_STACK 1 +#define ORC_TYPE_CALL 2 +#define ORC_TYPE_REGS 3 +#define ORC_TYPE_REGS_PARTIAL 4 + +#ifndef __ASSEMBLY__ +/* + * This struct is more or less a vastly simplified version of the DWARF Call + * Frame Information standard. It contains only the necessary parts of DWARF + * CFI, simplified for ease of access by the in-kernel unwinder. It tells the + * unwinder how to find the previous SP and FP (and sometimes entry regs) on + * the stack for a given code address. Each instance of the struct corresponds + * to one or more code locations. + */ +struct orc_entry { + s16 sp_offset; + s16 fp_offset; + s16 ra_offset; + unsigned int sp_reg:4; + unsigned int fp_reg:4; + unsigned int ra_reg:4; + unsigned int type:3; + unsigned int signal:1; +}; +#endif /* __ASSEMBLY__ */ + +#endif /* _ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index afb6fa16b826..e85df33f11c7 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -75,7 +75,29 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) -#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) +struct page *dmw_virt_to_page(unsigned long kaddr); +struct page *tlb_virt_to_page(unsigned long kaddr); + +#define pfn_to_phys(pfn) __pfn_to_phys(pfn) +#define phys_to_pfn(paddr) __phys_to_pfn(paddr) + +#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) +#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr)) + +#ifndef CONFIG_KFENCE + +#define page_to_virt(page) __va(page_to_phys(page)) +#define virt_to_page(kaddr) phys_to_page(__pa(kaddr)) + +#else + +#define WANT_PAGE_VIRTUAL + +#define page_to_virt(page) \ +({ \ + extern char *__kfence_pool; \ + (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \ +}) #define virt_to_page(kaddr) \ ({ \ @@ -83,6 +105,11 @@ typedef struct { unsigned long pgprot; } pgprot_t; dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\ }) +#endif + +#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn)) +#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr)) + extern int __virt_addr_valid(volatile void *kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 9b36ac003f89..8f290e5546cf 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -29,7 +29,12 @@ static inline void set_my_cpu_offset(unsigned long off) __my_cpu_offset = off; csr_write64(off, PERCPU_BASE_KS); } -#define __my_cpu_offset __my_cpu_offset + +#define __my_cpu_offset \ +({ \ + __asm__ __volatile__("":"+r"(__my_cpu_offset)); \ + __my_cpu_offset; \ +}) #define PERCPU_OP(op, asm_op, c_op) \ static __always_inline unsigned long __percpu_##op(void *ptr, \ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 8b5df1bbf9e9..af3acdf3481a 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -363,9 +363,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt extern pgd_t swapper_pg_dir[]; extern pgd_t invalid_pg_dir[]; -struct page *dmw_virt_to_page(unsigned long kaddr); -struct page *tlb_virt_to_page(unsigned long kaddr); - /* * The following only work if pte_present() is true. * Undefined behaviour if not.. diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h deleted file mode 100644 index 34f43f8ad591..000000000000 --- a/arch/loongarch/include/asm/qspinlock.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_QSPINLOCK_H -#define _ASM_QSPINLOCK_H - -#include <asm-generic/qspinlock_types.h> - -#define queued_spin_unlock queued_spin_unlock - -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - compiletime_assert_atomic_type(lock->locked); - c_sync(); - WRITE_ONCE(lock->locked, 0); -} - -#include <asm-generic/qspinlock.h> - -#endif /* _ASM_QSPINLOCK_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 4fb1e6408b98..45b507a7b06f 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -13,6 +13,7 @@ #include <asm/asm-offsets.h> #include <asm/loongarch.h> #include <asm/thread_info.h> +#include <asm/unwind_hints.h> /* Make the addition of cfi info a little easier. */ .macro cfi_rel_offset reg offset=0 docfi=0 @@ -162,6 +163,7 @@ li.w t0, CSR_CRMD_WE csrxchg t0, t0, LOONGARCH_CSR_CRMD #endif + UNWIND_HINT_REGS .endm .macro SAVE_ALL docfi=0 @@ -219,6 +221,7 @@ .macro RESTORE_SP_AND_RET docfi=0 cfi_ld sp, PT_R3, \docfi + UNWIND_HINT_FUNC ertn .endm diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 8cb653d49a54..8bf0e6f51546 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -86,6 +86,7 @@ register unsigned long current_stack_pointer __asm__("$sp"); #define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */ #define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */ #define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */ +#define TIF_PATCH_PENDING 21 /* pending live patching update */ #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) @@ -105,6 +106,7 @@ register unsigned long current_stack_pointer __asm__("$sp"); #define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE) #define _TIF_USEDLBT (1<<TIF_USEDLBT) #define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE) +#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index b9dce87afd2e..40a6763c5aec 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -16,6 +16,7 @@ enum unwinder_type { UNWINDER_GUESS, UNWINDER_PROLOGUE, + UNWINDER_ORC, }; struct unwind_state { @@ -24,7 +25,7 @@ struct unwind_state { struct task_struct *task; bool first, error, reset; int graph_idx; - unsigned long sp, pc, ra; + unsigned long sp, fp, pc, ra; }; bool default_next_frame(struct unwind_state *state); @@ -61,14 +62,17 @@ static __always_inline void __unwind_start(struct unwind_state *state, state->sp = regs->regs[3]; state->pc = regs->csr_era; state->ra = regs->regs[1]; + state->fp = regs->regs[22]; } else if (task && task != current) { state->sp = thread_saved_fp(task); state->pc = thread_saved_ra(task); state->ra = 0; + state->fp = 0; } else { state->sp = (unsigned long)__builtin_frame_address(0); state->pc = (unsigned long)__builtin_return_address(0); state->ra = 0; + state->fp = 0; } state->task = task; get_stack_info(state->sp, state->task, &state->stack_info); @@ -77,6 +81,18 @@ static __always_inline void __unwind_start(struct unwind_state *state, static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state) { - return unwind_done(state) ? 0 : state->pc; + if (unwind_done(state)) + return 0; + + return __kernel_text_address(state->pc) ? state->pc : 0; } + +#ifdef CONFIG_UNWINDER_ORC +void unwind_init(void); +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); +#else +static inline void unwind_init(void) {} +static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} +#endif + #endif /* _ASM_UNWIND_H */ diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h new file mode 100644 index 000000000000..a01086ad9dde --- /dev/null +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H +#define _ASM_LOONGARCH_UNWIND_HINTS_H + +#include <linux/objtool.h> +#include <asm/orc_types.h> + +#ifdef __ASSEMBLY__ + +.macro UNWIND_HINT_UNDEFINED + UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED +.endm + +.macro UNWIND_HINT_END_OF_STACK + UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK +.endm + +.macro UNWIND_HINT_REGS + UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS +.endm + +.macro UNWIND_HINT_FUNC + UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 3c808c680370..3a7620b66bc6 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -3,6 +3,8 @@ # Makefile for the Linux/LoongArch kernel. # +OBJECT_FILES_NON_STANDARD_head.o := y + extra-y := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ @@ -21,6 +23,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o CFLAGS_module.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) +CFLAGS_traps.o += $(call cc-option,-Wno-override-init,) CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_FUNCTION_TRACER @@ -62,6 +65,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 1ec8e4c4cc2b..48e7e34e355e 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -14,11 +14,13 @@ #include <asm/regdef.h> #include <asm/stackframe.h> #include <asm/thread_info.h> +#include <asm/unwind_hints.h> .text .cfi_sections .debug_frame .align 5 SYM_CODE_START(handle_syscall) + UNWIND_HINT_UNDEFINED csrrd t0, PERCPU_BASE_KS la.pcrel t1, kernelsp add.d t1, t1, t0 @@ -57,6 +59,7 @@ SYM_CODE_START(handle_syscall) cfi_st fp, PT_R22 SAVE_STATIC + UNWIND_HINT_REGS #ifdef CONFIG_KGDB li.w t1, CSR_CRMD_WE @@ -75,6 +78,7 @@ SYM_CODE_END(handle_syscall) _ASM_NOKPROBE(handle_syscall) SYM_CODE_START(ret_from_fork) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, sp bl syscall_exit_to_user_mode @@ -84,6 +88,7 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) SYM_CODE_START(ret_from_kernel_thread) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, s1 jirl ra, s0, 0 diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 4382e36ae3d4..69a85f2479fb 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -15,6 +15,7 @@ #include <asm/fpregdef.h> #include <asm/loongarch.h> #include <asm/regdef.h> +#include <asm/unwind_hints.h> #define FPU_REG_WIDTH 8 #define LSX_REG_WIDTH 16 @@ -526,3 +527,9 @@ SYM_FUNC_END(_restore_lasx_context) .L_fpu_fault: li.w a0, -EFAULT # failure jr ra + +#ifdef CONFIG_CPU_HAS_LBT +STACK_FRAME_NON_STANDARD _restore_fp +STACK_FRAME_NON_STANDARD _restore_lsx +STACK_FRAME_NON_STANDARD _restore_lasx +#endif diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 2bb3aa2dcfcb..86d5d90ebefe 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -32,6 +32,7 @@ SYM_FUNC_START(__arch_cpu_idle) SYM_FUNC_END(__arch_cpu_idle) SYM_CODE_START(handle_vint) + UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL la_abs t1, __arch_cpu_idle @@ -49,6 +50,7 @@ SYM_CODE_START(handle_vint) SYM_CODE_END(handle_vint) SYM_CODE_START(except_vec_cex) + UNWIND_HINT_UNDEFINED b cache_parity_error SYM_CODE_END(except_vec_cex) @@ -67,6 +69,7 @@ SYM_CODE_END(except_vec_cex) .macro BUILD_HANDLER exception handler prep .align 5 SYM_CODE_START(handle_\exception) + UNWIND_HINT_UNDEFINED 666: BACKUP_T0T1 SAVE_ALL @@ -77,7 +80,9 @@ SYM_CODE_END(except_vec_cex) 668: RESTORE_ALL_AND_RET SYM_CODE_END(handle_\exception) + .pushsection ".data", "aw", %progbits SYM_DATA(unwind_hint_\exception, .word 668b - 666b) + .popsection .endm BUILD_HANDLER ade ade badv @@ -94,6 +99,7 @@ SYM_CODE_END(except_vec_cex) BUILD_HANDLER reserved reserved none /* others */ SYM_CODE_START(handle_sys) + UNWIND_HINT_UNDEFINED la_abs t0, handle_syscall jr t0 SYM_CODE_END(handle_sys) diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S index 9c75120a26d8..001f061d226a 100644 --- a/arch/loongarch/kernel/lbt.S +++ b/arch/loongarch/kernel/lbt.S @@ -11,6 +11,7 @@ #include <asm/asm-offsets.h> #include <asm/errno.h> #include <asm/regdef.h> +#include <asm/unwind_hints.h> #define SCR_REG_WIDTH 8 @@ -153,3 +154,5 @@ SYM_FUNC_END(_restore_ftop_context) .L_lbt_fault: li.w a0, -EFAULT # failure jr ra + +STACK_FRAME_NON_STANDARD _restore_ftop_context diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index 482aa553aa2d..0c65cf09110c 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -73,6 +73,7 @@ SYM_FUNC_START(ftrace_stub) SYM_FUNC_END(ftrace_stub) SYM_CODE_START(ftrace_common) + UNWIND_HINT_UNDEFINED PTR_ADDI a0, ra, -8 /* arg0: ip */ move a1, t0 /* arg1: parent_ip */ la.pcrel t1, function_trace_op @@ -113,12 +114,14 @@ ftrace_common_return: SYM_CODE_END(ftrace_common) SYM_CODE_START(ftrace_caller) + UNWIND_HINT_UNDEFINED ftrace_regs_entry allregs=0 b ftrace_common SYM_CODE_END(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS SYM_CODE_START(ftrace_regs_caller) + UNWIND_HINT_UNDEFINED ftrace_regs_entry allregs=1 b ftrace_common SYM_CODE_END(ftrace_regs_caller) @@ -126,6 +129,7 @@ SYM_CODE_END(ftrace_regs_caller) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(ftrace_graph_caller) + UNWIND_HINT_UNDEFINED PTR_L a0, sp, PT_ERA PTR_ADDI a0, a0, -8 /* arg0: self_addr */ PTR_ADDI a1, sp, PT_R1 /* arg1: parent */ @@ -134,6 +138,7 @@ SYM_CODE_START(ftrace_graph_caller) SYM_CODE_END(ftrace_graph_caller) SYM_CODE_START(return_to_handler) + UNWIND_HINT_UNDEFINED /* Save return value regs */ PTR_ADDI sp, sp, -FGRET_REGS_SIZE PTR_S a0, sp, FGRET_REGS_A0 @@ -155,6 +160,7 @@ SYM_CODE_END(return_to_handler) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) + UNWIND_HINT_UNDEFINED jr t0 SYM_CODE_END(ftrace_stub_direct_tramp) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index b13b2858fe39..c7d0338d12c1 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <asm/alternative.h> #include <asm/inst.h> +#include <asm/unwind.h> static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { @@ -515,15 +516,28 @@ static void module_init_ftrace_plt(const Elf_Ehdr *hdr, int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - const Elf_Shdr *s, *se; const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *ftrace = NULL; - for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".altinstructions", secstrs + s->sh_name)) - apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + alt = s; + if (!strcmp(".orc_unwind", secstrs + s->sh_name)) + orc = s; + if (!strcmp(".orc_unwind_ip", secstrs + s->sh_name)) + orc_ip = s; if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name)) - module_init_ftrace_plt(hdr, s, mod); + ftrace = s; } + if (alt) + apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size); + + if (orc && orc_ip) + unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size); + + if (ftrace) + module_init_ftrace_plt(hdr, ftrace, mod); + return 0; } diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index f49f6b053763..84e6de2fd973 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -15,6 +15,7 @@ #include <asm/addrspace.h> SYM_CODE_START(relocate_new_kernel) + UNWIND_HINT_UNDEFINED /* * a0: EFI boot flag for the new kernel * a1: Command line pointer for the new kernel @@ -90,6 +91,7 @@ SYM_CODE_END(relocate_new_kernel) * then start at the entry point from LOONGARCH_IOCSR_MBUF0. */ SYM_CODE_START(kexec_smp_wait) + UNWIND_HINT_UNDEFINED 1: li.w t0, 0x100 /* wait for init loop */ 2: addi.w t0, t0, -1 /* limit mailbox access */ bnez t0, 2b @@ -106,6 +108,5 @@ SYM_CODE_END(kexec_smp_wait) relocate_new_kernel_end: -SYM_DATA_START(relocate_new_kernel_size) - PTR relocate_new_kernel_end - relocate_new_kernel -SYM_DATA_END(relocate_new_kernel_size) + .section ".data" +SYM_DATA(relocate_new_kernel_size, .long relocate_new_kernel_end - relocate_new_kernel) diff --git a/arch/loongarch/kernel/rethook_trampoline.S b/arch/loongarch/kernel/rethook_trampoline.S index bd5772c96338..d4ceb2fa2a5c 100644 --- a/arch/loongarch/kernel/rethook_trampoline.S +++ b/arch/loongarch/kernel/rethook_trampoline.S @@ -76,6 +76,7 @@ .endm SYM_CODE_START(arch_rethook_trampoline) + UNWIND_HINT_UNDEFINED addi.d sp, sp, -PT_SIZE save_all_base_regs diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 2b72eb326b44..60e0fe97f61a 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -47,6 +47,7 @@ #include <asm/sections.h> #include <asm/setup.h> #include <asm/time.h> +#include <asm/unwind.h> #define SMBIOS_BIOSSIZE_OFFSET 0x09 #define SMBIOS_BIOSEXTERN_OFFSET 0x13 @@ -587,6 +588,7 @@ static void __init prefill_possible_map(void) void __init setup_arch(char **cmdline_p) { cpu_probe(); + unwind_init(); init_environ(); efi_init(); diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index f623feb2129f..9a038d1070d7 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -29,6 +29,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, regs->csr_era = thread_saved_ra(task); } regs->regs[1] = 0; + regs->regs[22] = 0; } for (unwind_start(&state, task, regs); @@ -39,6 +40,46 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, } } +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) +{ + unsigned long addr; + struct pt_regs dummyregs; + struct pt_regs *regs = &dummyregs; + struct unwind_state state; + + if (task == current) { + regs->regs[3] = (unsigned long)__builtin_frame_address(0); + regs->csr_era = (unsigned long)__builtin_return_address(0); + } else { + regs->regs[3] = thread_saved_fp(task); + regs->csr_era = thread_saved_ra(task); + } + regs->regs[1] = 0; + regs->regs[22] = 0; + + for (unwind_start(&state, task, regs); + !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + + /* + * A NULL or invalid return address probably means there's some + * generated code which __kernel_text_address() doesn't know about. + */ + if (!addr) + return -EINVAL; + + if (!consume_entry(cookie, addr)) + return -EINVAL; + } + + /* Check for stack corruption */ + if (unwind_error(&state)) + return -EINVAL; + + return 0; +} + static int copy_stack_frame(unsigned long fp, struct stack_frame *frame) { diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index aebfc3733a76..f9f4eb00c92e 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -53,6 +53,32 @@ #include "access-helper.h" +void *exception_table[EXCCODE_INT_START] = { + [0 ... EXCCODE_INT_START - 1] = handle_reserved, + + [EXCCODE_TLBI] = handle_tlb_load, + [EXCCODE_TLBL] = handle_tlb_load, + [EXCCODE_TLBS] = handle_tlb_store, + [EXCCODE_TLBM] = handle_tlb_modify, + [EXCCODE_TLBNR] = handle_tlb_protect, + [EXCCODE_TLBNX] = handle_tlb_protect, + [EXCCODE_TLBPE] = handle_tlb_protect, + [EXCCODE_ADE] = handle_ade, + [EXCCODE_ALE] = handle_ale, + [EXCCODE_BCE] = handle_bce, + [EXCCODE_SYS] = handle_sys, + [EXCCODE_BP] = handle_bp, + [EXCCODE_INE] = handle_ri, + [EXCCODE_IPE] = handle_ri, + [EXCCODE_FPDIS] = handle_fpu, + [EXCCODE_LSXDIS] = handle_lsx, + [EXCCODE_LASXDIS] = handle_lasx, + [EXCCODE_FPE] = handle_fpe, + [EXCCODE_WATCH] = handle_watch, + [EXCCODE_BTDIS] = handle_lbt, +}; +EXPORT_SYMBOL_GPL(exception_table); + static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { @@ -1150,19 +1176,9 @@ void __init trap_init(void) for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++) set_handler(i * VECSIZE, handle_vint, VECSIZE); - set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); - set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); - set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE); - set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); - set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); - set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE); - set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE); - set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE); - set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE); - set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE); - set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE); + /* Set exception vector handler */ + for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); cache_error_setup(); diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c new file mode 100644 index 000000000000..b25722876331 --- /dev/null +++ b/arch/loongarch/kernel/unwind_orc.c @@ -0,0 +1,528 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/objtool.h> +#include <linux/module.h> +#include <linux/sort.h> +#include <asm/exception.h> +#include <asm/orc_header.h> +#include <asm/orc_lookup.h> +#include <asm/orc_types.h> +#include <asm/ptrace.h> +#include <asm/setup.h> +#include <asm/stacktrace.h> +#include <asm/tlb.h> +#include <asm/unwind.h> + +ORC_HEADER; + +#define orc_warn(fmt, ...) \ + printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) + +extern int __start_orc_unwind_ip[]; +extern int __stop_orc_unwind_ip[]; +extern struct orc_entry __start_orc_unwind[]; +extern struct orc_entry __stop_orc_unwind[]; + +static bool orc_init __ro_after_init; +static unsigned int lookup_num_blocks __ro_after_init; + +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .sp_reg = ORC_REG_FP, + .sp_offset = 16, + .fp_reg = ORC_REG_PREV_SP, + .fp_offset = -16, + .ra_reg = ORC_REG_PREV_SP, + .ra_offset = -8, + .type = ORC_TYPE_CALL +}; + +/* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer, + * and we don't have unwind information for NULL. + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function + * pointer into its parent and then continue normally from there. + */ +static struct orc_entry orc_null_entry = { + .sp_reg = ORC_REG_SP, + .sp_offset = sizeof(long), + .fp_reg = ORC_REG_UNDEFINED, + .type = ORC_TYPE_CALL +}; + +static inline unsigned long orc_ip(const int *ip) +{ + return (unsigned long)ip + *ip; +} + +static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table, + unsigned int num_entries, unsigned long ip) +{ + int *first = ip_table; + int *mid = first, *found = first; + int *last = ip_table + num_entries - 1; + + if (!num_entries) + return NULL; + + /* + * Do a binary range search to find the rightmost duplicate of a given + * starting address. Some entries are section terminators which are + * "weak" entries for ensuring there are no gaps. They should be + * ignored when they conflict with a real entry. + */ + while (first <= last) { + mid = first + ((last - first) / 2); + + if (orc_ip(mid) <= ip) { + found = mid; + first = mid + 1; + } else + last = mid - 1; + } + + return u_table + (found - ip_table); +} + +#ifdef CONFIG_MODULES +static struct orc_entry *orc_module_find(unsigned long ip) +{ + struct module *mod; + + mod = __module_address(ip); + if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip) + return NULL; + + return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, mod->arch.num_orcs, ip); +} +#else +static struct orc_entry *orc_module_find(unsigned long ip) +{ + return NULL; +} +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +static struct orc_entry *orc_find(unsigned long ip); + +/* + * Ftrace dynamic trampolines do not have orc entries of their own. + * But they are copies of the ftrace entries that are static and + * defined in ftrace_*.S, which do have orc entries. + * + * If the unwinder comes across a ftrace trampoline, then find the + * ftrace function that was used to create it, and use that ftrace + * function's orc entry, as the placement of the return code in + * the stack will be identical. + */ +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + struct ftrace_ops *ops; + unsigned long tramp_addr, offset; + + ops = ftrace_ops_trampoline(ip); + if (!ops) + return NULL; + + /* Set tramp_addr to the start of the code copied by the trampoline */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + tramp_addr = (unsigned long)ftrace_regs_caller; + else + tramp_addr = (unsigned long)ftrace_caller; + + /* Now place tramp_addr to the location within the trampoline ip is at */ + offset = ip - ops->trampoline; + tramp_addr += offset; + + /* Prevent unlikely recursion */ + if (ip == tramp_addr) + return NULL; + + return orc_find(tramp_addr); +} +#else +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + return NULL; +} +#endif + +static struct orc_entry *orc_find(unsigned long ip) +{ + static struct orc_entry *orc; + + if (ip == 0) + return &orc_null_entry; + + /* For non-init vmlinux addresses, use the fast lookup table: */ + if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { + unsigned int idx, start, stop; + + idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE; + + if (unlikely((idx >= lookup_num_blocks-1))) { + orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n", + idx, lookup_num_blocks, (void *)ip); + return NULL; + } + + start = orc_lookup[idx]; + stop = orc_lookup[idx + 1] + 1; + + if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) || + (__start_orc_unwind + stop > __stop_orc_unwind))) { + orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n", + idx, lookup_num_blocks, start, stop, (void *)ip); + return NULL; + } + + return __orc_find(__start_orc_unwind_ip + start, + __start_orc_unwind + start, stop - start, ip); + } + + /* vmlinux .init slow lookup: */ + if (is_kernel_inittext(ip)) + return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); + + /* Module lookup: */ + orc = orc_module_find(ip); + if (orc) + return orc; + + return orc_ftrace_find(ip); +} + +#ifdef CONFIG_MODULES + +static DEFINE_MUTEX(sort_mutex); +static int *cur_orc_ip_table = __start_orc_unwind_ip; +static struct orc_entry *cur_orc_table = __start_orc_unwind; + +static void orc_sort_swap(void *_a, void *_b, int size) +{ + int delta = _b - _a; + int *a = _a, *b = _b, tmp; + struct orc_entry *orc_a, *orc_b; + + /* Swap the .orc_unwind_ip entries: */ + tmp = *a; + *a = *b + delta; + *b = tmp - delta; + + /* Swap the corresponding .orc_unwind entries: */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + orc_b = cur_orc_table + (b - cur_orc_ip_table); + swap(*orc_a, *orc_b); +} + +static int orc_sort_cmp(const void *_a, const void *_b) +{ + const int *a = _a, *b = _b; + unsigned long a_val = orc_ip(a); + unsigned long b_val = orc_ip(b); + struct orc_entry *orc_a; + + if (a_val > b_val) + return 1; + if (a_val < b_val) + return -1; + + /* + * The "weak" section terminator entries need to always be first + * to ensure the lookup code skips them in favor of real entries. + * These terminator entries exist to handle any gaps created by + * whitelisted .o files which didn't get objtool generation. + */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + + return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1; +} + +void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, + void *_orc, size_t orc_size) +{ + int *orc_ip = _orc_ip; + struct orc_entry *orc = _orc; + unsigned int num_entries = orc_ip_size / sizeof(int); + + WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(*orc) != 0 || + num_entries != orc_size / sizeof(*orc)); + + /* + * The 'cur_orc_*' globals allow the orc_sort_swap() callback to + * associate an .orc_unwind_ip table entry with its corresponding + * .orc_unwind entry so they can both be swapped. + */ + mutex_lock(&sort_mutex); + cur_orc_ip_table = orc_ip; + cur_orc_table = orc; + sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap); + mutex_unlock(&sort_mutex); + + mod->arch.orc_unwind_ip = orc_ip; + mod->arch.orc_unwind = orc; + mod->arch.num_orcs = num_entries; +} +#endif + +void __init unwind_init(void) +{ + int i; + size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind; + size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip; + size_t num_entries = orc_ip_size / sizeof(int); + struct orc_entry *orc; + + if (!num_entries || orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(struct orc_entry) != 0 || + num_entries != orc_size / sizeof(struct orc_entry)) { + orc_warn("WARNING: Bad or missing .orc_unwind table. Disabling unwinder.\n"); + return; + } + + /* + * Note, the orc_unwind and orc_unwind_ip tables were already + * sorted at build time via the 'sorttable' tool. + * It's ready for binary search straight away, no need to sort it. + */ + + /* Initialize the fast lookup table: */ + lookup_num_blocks = orc_lookup_end - orc_lookup; + for (i = 0; i < lookup_num_blocks-1; i++) { + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + num_entries, LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i)); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + + orc_lookup[i] = orc - __start_orc_unwind; + } + + /* Initialize the ending block: */ + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, LOOKUP_STOP_IP); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind; + + orc_init = true; +} + +static inline bool on_stack(struct stack_info *info, unsigned long addr, size_t len) +{ + unsigned long begin = info->begin; + unsigned long end = info->end; + + return (info->type != STACK_TYPE_UNKNOWN && + addr >= begin && addr < end && addr + len > begin && addr + len <= end); +} + +static bool stack_access_ok(struct unwind_state *state, unsigned long addr, size_t len) +{ + struct stack_info *info = &state->stack_info; + + if (on_stack(info, addr, len)) + return true; + + return !get_stack_info(addr, state->task, info) && on_stack(info, addr, len); +} + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + return __unwind_get_return_address(state); +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + __unwind_start(state, task, regs); + state->type = UNWINDER_ORC; + if (!unwind_done(state) && !__kernel_text_address(state->pc)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(unwind_start); + +static bool is_entry_func(unsigned long addr) +{ + extern u32 kernel_entry; + extern u32 kernel_entry_end; + + return addr >= (unsigned long)&kernel_entry && addr < (unsigned long)&kernel_entry_end; +} + +static inline unsigned long bt_address(unsigned long ra) +{ + extern unsigned long eentry; + + if (__kernel_text_address(ra)) + return ra; + + if (__module_text_address(ra)) + return ra; + + if (ra >= eentry && ra < eentry + EXCCODE_INT_END * VECSIZE) { + unsigned long func; + unsigned long type = (ra - eentry) / VECSIZE; + unsigned long offset = (ra - eentry) % VECSIZE; + + switch (type) { + case 0 ... EXCCODE_INT_START - 1: + func = (unsigned long)exception_table[type]; + break; + case EXCCODE_INT_START ... EXCCODE_INT_END: + func = (unsigned long)handle_vint; + break; + default: + func = (unsigned long)handle_reserved; + break; + } + + return func + offset; + } + + return ra; +} + +bool unwind_next_frame(struct unwind_state *state) +{ + unsigned long *p, pc; + struct pt_regs *regs; + struct orc_entry *orc; + struct stack_info *info = &state->stack_info; + + if (unwind_done(state)) + return false; + + /* Don't let modules unload while we're reading their ORC data. */ + preempt_disable(); + + if (is_entry_func(state->pc)) + goto end; + + orc = orc_find(state->pc); + if (!orc) { + /* + * As a fallback, try to assume this code uses a frame pointer. + * This is useful for generated code, like BPF, which ORC + * doesn't know about. This is just a guess, so the rest of + * the unwind is no longer considered reliable. + */ + orc = &orc_fp_entry; + state->error = true; + } else { + if (orc->type == ORC_TYPE_UNDEFINED) + goto err; + + if (orc->type == ORC_TYPE_END_OF_STACK) + goto end; + } + + switch (orc->sp_reg) { + case ORC_REG_SP: + if (info->type == STACK_TYPE_IRQ && state->sp == info->end) + orc->type = ORC_TYPE_REGS; + else + state->sp = state->sp + orc->sp_offset; + break; + case ORC_REG_FP: + state->sp = state->fp; + break; + default: + orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->pc); + goto err; + } + + switch (orc->fp_reg) { + case ORC_REG_PREV_SP: + p = (unsigned long *)(state->sp + orc->fp_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + state->fp = *p; + break; + case ORC_REG_UNDEFINED: + /* Nothing. */ + break; + default: + orc_warn("unknown FP base reg %d at %pB\n", orc->fp_reg, (void *)state->pc); + goto err; + } + + switch (orc->type) { + case ORC_TYPE_CALL: + if (orc->ra_reg == ORC_REG_PREV_SP) { + p = (unsigned long *)(state->sp + orc->ra_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + pc = unwind_graph_addr(state, *p, state->sp); + pc -= LOONGARCH_INSN_SIZE; + } else if (orc->ra_reg == ORC_REG_UNDEFINED) { + if (!state->ra || state->ra == state->pc) + goto err; + + pc = unwind_graph_addr(state, state->ra, state->sp); + pc -= LOONGARCH_INSN_SIZE; + state->ra = 0; + } else { + orc_warn("unknown ra base reg %d at %pB\n", orc->ra_reg, (void *)state->pc); + goto err; + } + break; + case ORC_TYPE_REGS: + if (info->type == STACK_TYPE_IRQ && state->sp == info->end) + regs = (struct pt_regs *)info->next_sp; + else + regs = (struct pt_regs *)state->sp; + + if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs))) + goto err; + + if ((info->end == (unsigned long)regs + sizeof(*regs)) && + !regs->regs[3] && !regs->regs[1]) + goto end; + + if (user_mode(regs)) + goto end; + + pc = regs->csr_era; + if (!__kernel_text_address(pc)) + goto err; + + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->fp = regs->regs[22]; + get_stack_info(state->sp, state->task, info); + + break; + default: + orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)state->pc); + goto err; + } + + state->pc = bt_address(pc); + if (!state->pc) { + pr_err("cannot find unwind pc at %pK\n", (void *)pc); + goto err; + } + + if (!__kernel_text_address(state->pc)) + goto err; + + preempt_enable(); + return true; + +err: + state->error = true; + +end: + preempt_enable(); + state->stack_info.type = STACK_TYPE_UNKNOWN; + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index a5d0cd2035da..e8e97dbf9ca4 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -2,6 +2,7 @@ #include <linux/sizes.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> +#include <asm/orc_lookup.h> #define PAGE_SIZE _PAGE_SIZE #define RO_EXCEPTION_TABLE_ALIGN 4 @@ -122,6 +123,8 @@ SECTIONS } #endif + ORC_UNWIND_TABLE + .sdata : { *(.sdata) } diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 3634431db18a..80e988985a6a 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -8,7 +8,7 @@ #include <asm/asmmacro.h> #include <asm/loongarch.h> #include <asm/regdef.h> -#include <asm/stackframe.h> +#include <asm/unwind_hints.h> #define HGPR_OFFSET(x) (PT_R0 + 8*x) #define GGPR_OFFSET(x) (KVM_ARCH_GGPR + 8*x) @@ -112,6 +112,7 @@ .text .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) + UNWIND_HINT_UNDEFINED csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS addi.d a2, a2, KVM_VCPU_ARCH @@ -273,3 +274,9 @@ SYM_FUNC_END(kvm_restore_lasx) .section ".rodata" SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) + +#ifdef CONFIG_CPU_HAS_LBT +STACK_FRAME_NON_STANDARD kvm_restore_fpu +STACK_FRAME_NON_STANDARD kvm_restore_lsx +STACK_FRAME_NON_STANDARD kvm_restore_lasx +#endif diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index be741544e62b..7a0db643b286 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -10,6 +10,7 @@ #include <asm/asm-extable.h> #include <asm/cpu.h> #include <asm/regdef.h> +#include <asm/unwind_hints.h> SYM_FUNC_START(__clear_user) /* @@ -204,3 +205,5 @@ SYM_FUNC_START(__clear_user_fast) _asm_extable 28b, .Lsmall_fixup _asm_extable 29b, .Lexit SYM_FUNC_END(__clear_user_fast) + +STACK_FRAME_NON_STANDARD __clear_user_fast diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index feec3d362803..095ce9181c6c 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -10,6 +10,7 @@ #include <asm/asm-extable.h> #include <asm/cpu.h> #include <asm/regdef.h> +#include <asm/unwind_hints.h> SYM_FUNC_START(__copy_user) /* @@ -278,3 +279,5 @@ SYM_FUNC_START(__copy_user_fast) _asm_extable 58b, .Lexit _asm_extable 59b, .Lexit SYM_FUNC_END(__copy_user_fast) + +STACK_FRAME_NON_STANDARD __copy_user_fast diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index fa1148878d2b..9517a2f961af 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -9,6 +9,7 @@ #include <asm/asmmacro.h> #include <asm/cpu.h> #include <asm/regdef.h> +#include <asm/unwind_hints.h> .section .noinstr.text, "ax" @@ -197,3 +198,5 @@ SYM_FUNC_START(__memcpy_fast) jr ra SYM_FUNC_END(__memcpy_fast) _ASM_NOKPROBE(__memcpy_fast) + +STACK_FRAME_NON_STANDARD __memcpy_small diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 06d3ca54cbfe..df3846620553 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -9,6 +9,7 @@ #include <asm/asmmacro.h> #include <asm/cpu.h> #include <asm/regdef.h> +#include <asm/unwind_hints.h> .macro fill_to_64 r0 bstrins.d \r0, \r0, 15, 8 @@ -166,3 +167,5 @@ SYM_FUNC_START(__memset_fast) jr ra SYM_FUNC_END(__memset_fast) _ASM_NOKPROBE(__memset_fast) + +STACK_FRAME_NON_STANDARD __memset_fast diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index a9630a81b38a..89af7c12e8c0 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -4,6 +4,7 @@ */ #include <linux/export.h> #include <linux/io.h> +#include <linux/kfence.h> #include <linux/memblock.h> #include <linux/mm.h> #include <linux/mman.h> @@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr) { unsigned long vaddr = (unsigned long)kaddr; + if (is_kfence_address((void *)kaddr)) + return 1; + if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) return 0; diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 2aae72e63871..bda018150000 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -11,13 +11,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr) { - return pfn_to_page(virt_to_pfn(kaddr)); + return phys_to_page(__pa(kaddr)); } EXPORT_SYMBOL(dmw_virt_to_page); struct page *tlb_virt_to_page(unsigned long kaddr) { - return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); + return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr)))); } EXPORT_SYMBOL(tlb_virt_to_page); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 0b95d32b30c9..5ac9beb5f093 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -9,8 +9,9 @@ #include <linux/hugetlb.h> #include <linux/export.h> -#include <asm/cpu.h> #include <asm/bootinfo.h> +#include <asm/cpu.h> +#include <asm/exception.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/tlb.h> @@ -266,24 +267,20 @@ static void setup_tlb_handler(int cpu) setup_ptwalker(); local_flush_tlb_all(); + if (cpu_has_ptw) { + exception_table[EXCCODE_TLBI] = handle_tlb_load_ptw; + exception_table[EXCCODE_TLBL] = handle_tlb_load_ptw; + exception_table[EXCCODE_TLBS] = handle_tlb_store_ptw; + exception_table[EXCCODE_TLBM] = handle_tlb_modify_ptw; + } + /* The tlb handlers are generated only once */ if (cpu == 0) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); - if (!cpu_has_ptw) { - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); - } else { - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE); - } - set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); + + for (int i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); } else { int vec_sz __maybe_unused; void *addr __maybe_unused; diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index d5d682f3d29f..a44387b838af 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -18,6 +18,7 @@ .macro tlb_do_page_fault, write SYM_CODE_START(tlb_do_page_fault_\write) + UNWIND_HINT_UNDEFINED SAVE_ALL csrrd a2, LOONGARCH_CSR_BADV move a0, sp @@ -32,6 +33,7 @@ tlb_do_page_fault 1 SYM_CODE_START(handle_tlb_protect) + UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL move a0, sp @@ -44,6 +46,7 @@ SYM_CODE_START(handle_tlb_protect) SYM_CODE_END(handle_tlb_protect) SYM_CODE_START(handle_tlb_load) + UNWIND_HINT_UNDEFINED csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -190,6 +193,7 @@ nopage_tlb_load: SYM_CODE_END(handle_tlb_load) SYM_CODE_START(handle_tlb_load_ptw) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_0 @@ -197,6 +201,7 @@ SYM_CODE_START(handle_tlb_load_ptw) SYM_CODE_END(handle_tlb_load_ptw) SYM_CODE_START(handle_tlb_store) + UNWIND_HINT_UNDEFINED csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -346,6 +351,7 @@ nopage_tlb_store: SYM_CODE_END(handle_tlb_store) SYM_CODE_START(handle_tlb_store_ptw) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_1 @@ -353,6 +359,7 @@ SYM_CODE_START(handle_tlb_store_ptw) SYM_CODE_END(handle_tlb_store_ptw) SYM_CODE_START(handle_tlb_modify) + UNWIND_HINT_UNDEFINED csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -500,6 +507,7 @@ nopage_tlb_modify: SYM_CODE_END(handle_tlb_modify) SYM_CODE_START(handle_tlb_modify_ptw) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_1 @@ -507,6 +515,7 @@ SYM_CODE_START(handle_tlb_modify_ptw) SYM_CODE_END(handle_tlb_modify_ptw) SYM_CODE_START(handle_tlb_refill) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_TLBRSAVE csrrd t0, LOONGARCH_CSR_PGD lddir t0, t0, 3 diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index f597cd08a96b..75c6726382c3 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -4,6 +4,7 @@ KASAN_SANITIZE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n +OBJECT_FILES_NON_STANDARD := y # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 7791673e547b..99718f3dc686 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -846,6 +846,6 @@ static void amiga_get_hardware_list(struct seq_file *m) * The Amiga keyboard driver needs key_maps, but we cannot export it in * drivers/char/defkeymap.c, as it is autogenerated */ -#ifdef CONFIG_HW_CONSOLE +#ifdef CONFIG_VT EXPORT_SYMBOL_GPL(key_maps); #endif diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c index e4bd6913f50e..1a2739852351 100644 --- a/arch/m68k/hp300/config.c +++ b/arch/m68k/hp300/config.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/serial_8250.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/console.h> @@ -67,9 +68,6 @@ static char *hp300_models[] __initdata = { static char hp300_model_name[13] = "HP9000/"; extern void hp300_reset(void); -#ifdef CONFIG_SERIAL_8250_CONSOLE -extern int hp300_setup_serial_console(void) __init; -#endif int __init hp300_parse_bootinfo(const struct bi_record *record) { @@ -263,7 +261,5 @@ void __init config_hp300(void) } else { panic("Unknown HP9000 Model"); } -#ifdef CONFIG_SERIAL_8250_CONSOLE hp300_setup_serial_console(); -#endif } diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 06ef440d16ce..516dc7022bd7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -619,15 +619,6 @@ config MACH_EYEQ5 bool -config FIT_IMAGE_FDT_EPM5 - bool "Include FDT for Mobileye EyeQ5 development platforms" - depends on MACH_EYEQ5 - default n - help - Enable this to include the FDT for the EyeQ5 development platforms - from Mobileye in the FIT kernel image. - This requires u-boot on the platform. - config MACH_NINTENDO64 bool "Nintendo 64 console" select CEVT_R4K @@ -1011,6 +1002,15 @@ config CAVIUM_OCTEON_SOC endchoice +config FIT_IMAGE_FDT_EPM5 + bool "Include FDT for Mobileye EyeQ5 development platforms" + depends on MACH_EYEQ5 + default n + help + Enable this to include the FDT for the EyeQ5 development platforms + from Mobileye in the FIT kernel image. + This requires u-boot on the platform. + source "arch/mips/alchemy/Kconfig" source "arch/mips/ath25/Kconfig" source "arch/mips/ath79/Kconfig" diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index d14d0e37ad02..4a2b40ce39e0 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs); #define exception_ip(regs) exception_ip(regs) #define profile_pc(regs) instruction_pointer(regs) -extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall); +extern asmlinkage long syscall_trace_enter(struct pt_regs *regs); extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); extern void die(const char *, struct pt_regs *) __noreturn; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index d1b11f66f748..cb1045ebab06 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -101,6 +101,7 @@ void output_thread_info_defines(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE_COUNT, thread_info, preempt_count); OFFSET(TI_REGS, thread_info, regs); + OFFSET(TI_SYSCALL, thread_info, syscall); DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 59288c13b581..61503a36067e 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request, * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ -asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) +asmlinkage long syscall_trace_enter(struct pt_regs *regs) { user_exit(); - current_thread_info()->syscall = syscall; - if (test_thread_flag(TIF_SYSCALL_TRACE)) { if (ptrace_report_syscall_entry(regs)) return -1; - syscall = current_thread_info()->syscall; } #ifdef CONFIG_SECCOMP @@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) struct seccomp_data sd; unsigned long args[6]; - sd.nr = syscall; + sd.nr = current_thread_info()->syscall; sd.arch = syscall_get_arch(current); syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) @@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) ret = __secure_computing(&sd); if (ret == -1) return ret; - syscall = current_thread_info()->syscall; } #endif if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[2]); - audit_syscall_entry(syscall, regs->regs[4], regs->regs[5], + audit_syscall_entry(current_thread_info()->syscall, + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); /* * Negative syscall numbers are mistaken for rejected syscalls, but * won't have had the return value set appropriately, so we do so now. */ - if (syscall < 0) + if (current_thread_info()->syscall < 0) syscall_set_return_value(current, regs, -ENOSYS, 0); - return syscall; + return current_thread_info()->syscall; } /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 18dc9b345056..2c604717e630 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -77,6 +77,18 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + */ + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + lw t0, TI_FLAGS($28) # syscall tracing enabled? li t1, _TIF_WORK_SYSCALL_ENTRY and t0, t1 @@ -114,16 +126,7 @@ syscall_trace_entry: SAVE_STATIC move a0, sp - /* - * syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - */ - move a1, v0 - subu t2, v0, __NR_O32_Linux - bnez t2, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 97456b2ca7dc..97788859238c 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -72,7 +74,6 @@ syscall_common: n32_syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index e6264aa62e45..be11ea5cc67e 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -82,7 +84,6 @@ n64_syscall_exit: syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d3c2616cba22..7a5abb73e531 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -79,6 +79,22 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * absolute syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + * note: NR_syscall is the first O32 syscall but the macro is + * only defined when compiling with -mabi=32 (CONFIG_32BIT) + * therefore __NR_O32_Linux is used (4000) + */ + + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -113,22 +129,7 @@ trace_a_syscall: sd a7, PT_R11(sp) # For indirect syscalls move a0, sp - /* - * absolute syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - * note: NR_syscall is the first O32 syscall but the macro is - * only defined when compiling with -mabi=32 (CONFIG_32BIT) - * therefore __NR_O32_Linux is used (4000) - */ - .set push - .set reorder - subu t1, v0, __NR_O32_Linux - move a1, v0 - bnez t1, 1f /* __NR_syscall at offset 0 */ - ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ - .set pop - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index 8d98af5c7201..9a8393e6b4a8 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -21,7 +21,8 @@ void __init early_init_devtree(void *params) { - __be32 *dtb = (u32 *)__dtb_start; + __be32 __maybe_unused *dtb = (u32 *)__dtb_start; + #if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR) if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) == OF_DT_HEADER) { @@ -30,8 +31,11 @@ void __init early_init_devtree(void *params) return; } #endif + +#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER) params = (void *)__dtb_start; +#endif early_init_dt_scan(params); } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index ee29c4c8d7c1..daafeb20f993 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -238,9 +238,9 @@ config PARISC_HUGE_KERNEL def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST config MLONGCALLS - def_bool y if PARISC_HUGE_KERNEL bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL depends on PA8X00 + default PARISC_HUGE_KERNEL help If you configure the kernel to include many drivers built-in instead as modules, the kernel executable may become too big, so that the @@ -255,9 +255,9 @@ config MLONGCALLS Enabling this option will probably slow down your kernel. config 64BIT - def_bool y if "$(ARCH)" = "parisc64" bool "64-bit kernel" if "$(ARCH)" = "parisc" depends on PA8X00 + default "$(ARCH)" = "parisc64" help Enable this if you want to support 64bit kernel on PA-RISC platform. diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h new file mode 100644 index 000000000000..47c5a1991d10 --- /dev/null +++ b/arch/parisc/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <uapi/asm/mman.h> + +/* PARISC cannot allow mdwe as it needs writable stacks */ +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return false; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a68b9e637eda..1c4be3373686 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -607,11 +607,6 @@ config PPC64_SUPPORTS_MEMORY_FAILURE config ARCH_SUPPORTS_KEXEC def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP) -config ARCH_SELECTS_KEXEC - def_bool y - depends on KEXEC - select CRASH_DUMP - config ARCH_SUPPORTS_KEXEC_FILE def_bool PPC64 @@ -622,7 +617,6 @@ config ARCH_SELECTS_KEXEC_FILE def_bool y depends on KEXEC_FILE select KEXEC_ELF - select CRASH_DUMP select HAVE_IMA_KEXEC if IMA config PPC64_BIG_ENDIAN_ELF_ABI_V2 @@ -694,8 +688,7 @@ config ARCH_SELECTS_CRASH_DUMP config FA_DUMP bool "Firmware-assisted dump" - depends on PPC64 && (PPC_RTAS || PPC_POWERNV) - select CRASH_DUMP + depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV) help A robust mechanism to get reliable kernel crash dump with assistance from firmware. This approach does not use kexec, diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index df18f8dc4642..343326c30380 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -126,7 +126,7 @@ interrupts = <93 2>; }; - EHCI0: ehci@30010000000 { + EHCI0: usb@30010000000 { compatible = "ibm,476gtr-ehci", "generic-ehci"; reg = <0x300 0x10000000 0x0 0x10000>; interrupt-parent = <&MPIC>; @@ -140,14 +140,14 @@ interrupt-parent = <&MPIC>; }; - OHCI0: ohci@30010010000 { + OHCI0: usb@30010010000 { compatible = "ibm,476gtr-ohci", "generic-ohci"; reg = <0x300 0x10010000 0x0 0x10000>; interrupt-parent = <&MPIC>; interrupts = <89 1>; }; - OHCI1: ohci@30010020000 { + OHCI1: usb@30010020000 { compatible = "ibm,476gtr-ohci", "generic-ohci"; reg = <0x300 0x10020000 0x0 0x10000>; interrupt-parent = <&MPIC>; diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c index 74fb86b0d209..7c728755852e 100644 --- a/arch/powerpc/crypto/chacha-p10-glue.c +++ b/arch/powerpc/crypto/chacha-p10-glue.c @@ -197,6 +197,9 @@ static struct skcipher_alg algs[] = { static int __init chacha_p10_init(void) { + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + static_branch_enable(&have_p10); return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); @@ -204,10 +207,13 @@ static int __init chacha_p10_init(void) static void __exit chacha_p10_exit(void) { + if (!static_branch_likely(&have_p10)) + return; + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init); +module_init(chacha_p10_init); module_exit(chacha_p10_exit); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)"); diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index e1b43aa12175..fdb90e24dc74 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -55,59 +55,18 @@ typedef void (*crash_shutdown_t)(void); #ifdef CONFIG_KEXEC_CORE - -/* - * This function is responsible for capturing register states if coming - * via panic or invoking dump using sysrq-trigger. - */ -static inline void crash_setup_regs(struct pt_regs *newregs, - struct pt_regs *oldregs) -{ - if (oldregs) - memcpy(newregs, oldregs, sizeof(*newregs)); - else - ppc_save_regs(newregs); -} +struct kimage; +struct pt_regs; extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ -extern int crashing_cpu; -extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); -extern void crash_ipi_callback(struct pt_regs *); -extern int crash_wake_offline; - -struct kimage; -struct pt_regs; extern void default_machine_kexec(struct kimage *image); -extern void default_machine_crash_shutdown(struct pt_regs *regs); -extern int crash_shutdown_register(crash_shutdown_t handler); -extern int crash_shutdown_unregister(crash_shutdown_t handler); - -extern void crash_kexec_prepare(void); -extern void crash_kexec_secondary(struct pt_regs *regs); -int __init overlaps_crashkernel(unsigned long start, unsigned long size); -extern void reserve_crashkernel(void); extern void machine_kexec_mask_interrupts(void); -static inline bool kdump_in_progress(void) -{ - return crashing_cpu >= 0; -} - void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address) __noreturn; - void kexec_copy_flush(struct kimage *image); -#if defined(CONFIG_CRASH_DUMP) -bool is_kdump_kernel(void); -#define is_kdump_kernel is_kdump_kernel -#if defined(CONFIG_PPC_RTAS) -void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); -#define crash_free_reserved_phys_range crash_free_reserved_phys_range -#endif /* CONFIG_PPC_RTAS */ -#endif /* CONFIG_CRASH_DUMP */ - #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; @@ -152,15 +111,56 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, #endif /* CONFIG_KEXEC_FILE */ -#else /* !CONFIG_KEXEC_CORE */ -static inline void crash_kexec_secondary(struct pt_regs *regs) { } +#endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_CRASH_RESERVE +int __init overlaps_crashkernel(unsigned long start, unsigned long size); +extern void reserve_crashkernel(void); +#else +static inline void reserve_crashkernel(void) {} +static inline int overlaps_crashkernel(unsigned long start, unsigned long size) { return 0; } +#endif -static inline int overlaps_crashkernel(unsigned long start, unsigned long size) +#if defined(CONFIG_CRASH_DUMP) +/* + * This function is responsible for capturing register states if coming + * via panic or invoking dump using sysrq-trigger. + */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) { - return 0; + if (oldregs) + memcpy(newregs, oldregs, sizeof(*newregs)); + else + ppc_save_regs(newregs); +} + +extern int crashing_cpu; +extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); +extern void crash_ipi_callback(struct pt_regs *regs); +extern int crash_wake_offline; + +extern int crash_shutdown_register(crash_shutdown_t handler); +extern int crash_shutdown_unregister(crash_shutdown_t handler); +extern void default_machine_crash_shutdown(struct pt_regs *regs); + +extern void crash_kexec_prepare(void); +extern void crash_kexec_secondary(struct pt_regs *regs); + +static inline bool kdump_in_progress(void) +{ + return crashing_cpu >= 0; } -static inline void reserve_crashkernel(void) { ; } +bool is_kdump_kernel(void); +#define is_kdump_kernel is_kdump_kernel +#if defined(CONFIG_PPC_RTAS) +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); +#define crash_free_reserved_phys_range crash_free_reserved_phys_range +#endif /* CONFIG_PPC_RTAS */ + +#else /* !CONFIG_CRASH_DUMP */ +static inline void crash_kexec_secondary(struct pt_regs *regs) { } static inline int crash_shutdown_register(crash_shutdown_t handler) { @@ -183,7 +183,7 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { } -#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_CRASH_DUMP */ #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kexec.h> diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index f0a4cf01e85c..78302f6c2580 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include <asm/page.h> #include <asm/vdso/timebase.h> #include <asm/barrier.h> #include <asm/unistd.h> @@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void); static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { - return (void *)vd + PAGE_SIZE; + return (void *)vd + (1U << CONFIG_PAGE_SHIFT); } #endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 1185efebf032..29a8c8e18585 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1285,15 +1285,14 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iommu_group *grp = iommu_group_get(dev); struct iommu_table_group *table_group; + struct iommu_group *grp; /* At first attach the ownership is already set */ - if (!domain) { - iommu_group_put(grp); + if (!domain) return 0; - } + grp = iommu_group_get(dev); table_group = iommu_group_get_iommudata(grp); /* * The domain being set to PLATFORM from earlier diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1dc32a058156..cd8d8883de90 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -475,7 +475,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_RESERVE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2add292da494..01ed1263e1a9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -110,7 +110,7 @@ int ppc_do_canonicalize_irqs; EXPORT_SYMBOL(ppc_do_canonicalize_irqs); #endif -#ifdef CONFIG_VMCORE_INFO +#ifdef CONFIG_CRASH_DUMP /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; #endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a60e4139214b..12e53b3d7923 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -588,7 +588,7 @@ void smp_send_debugger_break(void) } #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { int cpu; @@ -631,7 +631,7 @@ void crash_smp_send_stop(void) stopped = true; -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP if (kexec_crash_image) { crash_kexec_prepare(); return; diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 91e96f5168b7..8e469c4da3f8 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -3,12 +3,13 @@ # Makefile for the linux kernel. # -obj-y += core.o crash.o core_$(BITS).o +obj-y += core.o core_$(BITS).o obj-$(CONFIG_PPC32) += relocate_32.o obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o +obj-$(CONFIG_CRASH_DUMP) += crash.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_core_$(BITS).o := n diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 3ff4411ed496..b8333a49ea5d 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -44,10 +44,12 @@ void machine_kexec_mask_interrupts(void) { } } +#ifdef CONFIG_CRASH_DUMP void machine_crash_shutdown(struct pt_regs *regs) { default_machine_crash_shutdown(regs); } +#endif void machine_kexec_cleanup(struct kimage *image) { @@ -77,6 +79,7 @@ void machine_kexec(struct kimage *image) for(;;); } +#ifdef CONFIG_CRASH_RESERVE void __init reserve_crashkernel(void) { unsigned long long crash_size, crash_base, total_mem_sz; @@ -251,3 +254,4 @@ static int __init kexec_setup(void) return 0; } late_initcall(kexec_setup); +#endif /* CONFIG_CRASH_RESERVE */ diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 904016cf89ea..6d8951e8e966 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -47,7 +47,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) return ERR_PTR(ret); - if (image->type == KEXEC_TYPE_CRASH) { + if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) { /* min & max buffer values for kdump case */ kbuf.buf_min = pbuf.buf_min = crashk_res.start; kbuf.buf_max = pbuf.buf_max = @@ -70,7 +70,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, kexec_dprintk("Loaded purgatory at 0x%lx\n", pbuf.mem); /* Load additional segments needed for panic kernel */ - if (image->type == KEXEC_TYPE_CRASH) { + if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) { ret = load_crashdump_segments_ppc64(image, &kbuf); if (ret) { pr_err("Failed to load kdump kernel segments\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 5b4c5cb23354..1bc65de6174f 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -97,119 +97,6 @@ out: } /** - * get_usable_memory_ranges - Get usable memory ranges. This list includes - * regions like crashkernel, opal/rtas & tce-table, - * that kdump kernel could use. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_usable_memory_ranges(struct crash_mem **mem_ranges) -{ - int ret; - - /* - * Early boot failure observed on guests when low memory (first memory - * block?) is not added to usable memory. So, add [0, crashk_res.end] - * instead of [crashk_res.start, crashk_res.end] to workaround it. - * Also, crashed kernel's memory must be added to reserve map to - * avoid kdump kernel from using it. - */ - ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); - if (ret) - goto out; - - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_tce_mem_ranges(mem_ranges); -out: - if (ret) - pr_err("Failed to setup usable memory ranges\n"); - return ret; -} - -/** - * get_crash_memory_ranges - Get crash memory ranges. This list includes - * first/crashing kernel's memory regions that - * would be exported via an elfcore. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_crash_memory_ranges(struct crash_mem **mem_ranges) -{ - phys_addr_t base, end; - struct crash_mem *tmem; - u64 i; - int ret; - - for_each_mem_range(i, &base, &end) { - u64 size = end - base; - - /* Skip backup memory region, which needs a separate entry */ - if (base == BACKUP_SRC_START) { - if (size > BACKUP_SRC_SIZE) { - base = BACKUP_SRC_END + 1; - size -= BACKUP_SRC_SIZE; - } else - continue; - } - - ret = add_mem_range(mem_ranges, base, size); - if (ret) - goto out; - - /* Try merging adjacent ranges before reallocation attempt */ - if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) - sort_memory_ranges(*mem_ranges, true); - } - - /* Reallocate memory ranges if there is no space to split ranges */ - tmem = *mem_ranges; - if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { - tmem = realloc_mem_ranges(mem_ranges); - if (!tmem) - goto out; - } - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - /* - * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL - * regions are exported to save their context at the time of - * crash, they should actually be backed up just like the - * first 64K bytes of memory. - */ - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - /* create a separate program header for the backup region */ - ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); - if (ret) - goto out; - - sort_memory_ranges(*mem_ranges, false); -out: - if (ret) - pr_err("Failed to setup crash memory ranges\n"); - return ret; -} - -/** * get_reserved_memory_ranges - Get reserve memory ranges. This list includes * memory regions that should be added to the * memory reserve map to ensure the region is @@ -434,6 +321,120 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, return ret; } +#ifdef CONFIG_CRASH_DUMP +/** + * get_usable_memory_ranges - Get usable memory ranges. This list includes + * regions like crashkernel, opal/rtas & tce-table, + * that kdump kernel could use. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_usable_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + /* + * Early boot failure observed on guests when low memory (first memory + * block?) is not added to usable memory. So, add [0, crashk_res.end] + * instead of [crashk_res.start, crashk_res.end] to workaround it. + * Also, crashed kernel's memory must be added to reserve map to + * avoid kdump kernel from using it. + */ + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup usable memory ranges\n"); + return ret; +} + +/** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + phys_addr_t base, end; + struct crash_mem *tmem; + u64 i; + int ret; + + for_each_mem_range(i, &base, &end) { + u64 size = end - base; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Reallocate memory ranges if there is no space to split ranges */ + tmem = *mem_ranges; + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + goto out; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + /* + * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL + * regions are exported to save their context at the time of + * crash, they should actually be backed up just like the + * first 64K bytes of memory. + */ + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + /** * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries * @um_info: Usable memory buffer and ranges info. @@ -863,6 +864,7 @@ int load_crashdump_segments_ppc64(struct kimage *image, return 0; } +#endif /** * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global @@ -972,26 +974,14 @@ static unsigned int cpu_node_size(void) return size; } -/** - * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to - * setup FDT for kexec/kdump kernel. - * @image: kexec image being loaded. - * - * Returns the estimated extra size needed for kexec/kdump kernel FDT. - */ -unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) +static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image) { unsigned int cpu_nodes, extra_size = 0; struct device_node *dn; u64 usm_entries; - // Budget some space for the password blob. There's already extra space - // for the key name - if (plpks_is_available()) - extra_size += (unsigned int)plpks_get_passwordlen(); - - if (image->type != KEXEC_TYPE_CRASH) - return extra_size; + if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH) + return 0; /* * For kdump kernel, account for linux,usable-memory and @@ -1020,6 +1010,25 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) } /** + * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to + * setup FDT for kexec/kdump kernel. + * @image: kexec image being loaded. + * + * Returns the estimated extra size needed for kexec/kdump kernel FDT. + */ +unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) +{ + unsigned int extra_size = 0; + + // Budget some space for the password blob. There's already extra space + // for the key name + if (plpks_is_available()) + extra_size += (unsigned int)plpks_get_passwordlen(); + + return extra_size + kdump_extra_fdt_size_ppc64(image); +} + +/** * add_node_props - Reads node properties from device node structure and add * them to fdt. * @fdt: Flattened device tree of the kernel @@ -1171,6 +1180,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, struct crash_mem *umem = NULL, *rmem = NULL; int i, nr_ranges, ret; +#ifdef CONFIG_CRASH_DUMP /* * Restrict memory usage for kdump kernel by setting up * usable memory ranges and memory reserve map. @@ -1207,6 +1217,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, goto out; } } +#endif /* Update cpus nodes information to account hotplug CPUs. */ ret = update_cpus_node(fdt); @@ -1278,7 +1289,7 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) buf_min = kbuf->buf_min; buf_max = kbuf->buf_max; /* Segments for kdump kernel should be within crashkernel region */ - if (kbuf->image->type == KEXEC_TYPE_CRASH) { + if (IS_ENABLED(CONFIG_CRASH_DUMP) && kbuf->image->type == KEXEC_TYPE_CRASH) { buf_min = (buf_min < crashk_res.start ? crashk_res.start : buf_min); buf_max = (buf_max > crashk_res.end ? diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 5445587bfe84..100f999871bc 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -193,7 +193,7 @@ static bool is_module_segment(unsigned long addr) return true; } -void mmu_mark_initmem_nx(void) +int mmu_mark_initmem_nx(void) { int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; @@ -230,9 +230,10 @@ void mmu_mark_initmem_nx(void) mtsr(mfsr(i << 28) | 0x10000000, i << 28); } + return 0; } -void mmu_mark_rodata_ro(void) +int mmu_mark_rodata_ro(void) { int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; @@ -245,6 +246,8 @@ void mmu_mark_rodata_ro(void) } update_bats(); + + return 0; } /* diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 8e84bc214d13..6949c2c937e7 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -160,11 +160,11 @@ static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } #endif #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500) -void mmu_mark_initmem_nx(void); -void mmu_mark_rodata_ro(void); +int mmu_mark_initmem_nx(void); +int mmu_mark_rodata_ro(void); #else -static inline void mmu_mark_initmem_nx(void) { } -static inline void mmu_mark_rodata_ro(void) { } +static inline int mmu_mark_initmem_nx(void) { return 0; } +static inline int mmu_mark_rodata_ro(void) { return 0; } #endif #ifdef CONFIG_PPC_8xx diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 6be6421086ed..43d4842bb1c7 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -119,23 +119,26 @@ void __init mmu_mapin_immr(void) PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } -static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, - pgprot_t prot, bool new) +static int mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, + pgprot_t prot, bool new) { unsigned long v = PAGE_OFFSET + offset; unsigned long p = offset; + int err = 0; WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K)); - for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K) - __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); - for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M) - __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); - for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K) - __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN(p, SZ_8M) && p < top && !err; p += SZ_512K, v += SZ_512K) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN_DOWN(top, SZ_8M) && p < top && !err; p += SZ_8M, v += SZ_8M) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); + for (; p < ALIGN_DOWN(top, SZ_512K) && p < top && !err; p += SZ_512K, v += SZ_512K) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); if (!new) flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top); + + return err; } unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) @@ -166,27 +169,33 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return top; } -void mmu_mark_initmem_nx(void) +int mmu_mark_initmem_nx(void) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + int err = 0; if (!debug_pagealloc_enabled_or_kfence()) - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + err = mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); mmu_pin_tlb(block_mapped_ram, false); + + return err; } #ifdef CONFIG_STRICT_KERNEL_RWX -void mmu_mark_rodata_ro(void) +int mmu_mark_rodata_ro(void) { unsigned long sinittext = __pa(_sinittext); + int err; - mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); + err = mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) mmu_pin_tlb(block_mapped_ram, true); + + return err; } #endif diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c index 921c3521ec11..266fb22131fc 100644 --- a/arch/powerpc/mm/nohash/e500.c +++ b/arch/powerpc/mm/nohash/e500.c @@ -285,19 +285,23 @@ void __init adjust_total_lowmem(void) } #ifdef CONFIG_STRICT_KERNEL_RWX -void mmu_mark_rodata_ro(void) +int mmu_mark_rodata_ro(void) { unsigned long remapped; remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false); - WARN_ON(__max_low_memory != remapped); + if (WARN_ON(__max_low_memory != remapped)) + return -EINVAL; + + return 0; } #endif -void mmu_mark_initmem_nx(void) +int mmu_mark_initmem_nx(void) { /* Everything is done in mmu_mark_rodata_ro() */ + return 0; } void setup_initial_memory_limit(phys_addr_t first_memblock_base, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index face94977cb2..cfd622ebf774 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -130,31 +130,41 @@ void __init mapin_ram(void) } } -void mark_initmem_nx(void) +static int __mark_initmem_nx(void) { unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); + int err; - mmu_mark_initmem_nx(); + err = mmu_mark_initmem_nx(); if (!v_block_mapped((unsigned long)_sinittext)) { - set_memory_nx((unsigned long)_sinittext, numpages); - set_memory_rw((unsigned long)_sinittext, numpages); + err = set_memory_nx((unsigned long)_sinittext, numpages); + if (err) + return err; + err = set_memory_rw((unsigned long)_sinittext, numpages); } + return err; +} + +void mark_initmem_nx(void) +{ + int err = __mark_initmem_nx(); + + if (err) + panic("%s() failed, err = %d\n", __func__, err); } #ifdef CONFIG_STRICT_KERNEL_RWX -void mark_rodata_ro(void) +static int __mark_rodata_ro(void) { unsigned long numpages; if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE)) pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n"); - if (v_block_mapped((unsigned long)_stext + 1)) { - mmu_mark_rodata_ro(); - return; - } + if (v_block_mapped((unsigned long)_stext + 1)) + return mmu_mark_rodata_ro(); /* * mark text and rodata as read only. __end_rodata is set by @@ -164,6 +174,14 @@ void mark_rodata_ro(void) numpages = PFN_UP((unsigned long)__end_rodata) - PFN_DOWN((unsigned long)_stext); - set_memory_ro((unsigned long)_stext, numpages); + return set_memory_ro((unsigned long)_stext, numpages); +} + +void mark_rodata_ro(void) +{ + int err = __mark_rodata_ro(); + + if (err) + panic("%s() failed, err = %d\n", __func__, err); } #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 9e1a25398f98..8f14f0581a21 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -434,7 +434,7 @@ void __init pnv_smp_init(void) smp_ops = &pnv_smp_ops; #ifdef CONFIG_HOTPLUG_CPU -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP crash_wake_offline = 1; #endif #endif diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index d25ad1c19f88..2c585f7a0b6e 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -2,6 +2,7 @@ obj-y += kernel/ mm/ net/ obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ +obj-$(CONFIG_CRYPTO) += crypto/ obj-y += errata/ obj-$(CONFIG_KVM) += kvm/ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 92b1dbf55176..be09c8836d56 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -27,14 +27,18 @@ config RISCV select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_CALLBACKS + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MMIOWB select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API + select ARCH_HAS_PREPARE_SYNC_CORE_CMD select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU select ARCH_HAS_SET_MEMORY if MMU select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL + select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN @@ -47,6 +51,9 @@ config RISCV select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU + # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 + select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 + select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000 select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK @@ -106,6 +113,7 @@ config RISCV select HAVE_ARCH_KGDB_QXFER_PKT select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK @@ -124,6 +132,7 @@ config RISCV select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU + select HAVE_FAST_GUP if MMU select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION select HAVE_GCC_PLUGINS @@ -155,6 +164,7 @@ config RISCV select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA + select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU select MODULES_USE_ELF_RELA if MODULES select MODULE_SECTIONS if MODULES select OF @@ -576,6 +586,13 @@ config TOOLCHAIN_HAS_ZBB depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 depends on AS_HAS_OPTION_ARCH +# This symbol indicates that the toolchain supports all v1.0 vector crypto +# extensions, including Zvk*, Zvbb, and Zvbc. LLVM added all of these at once. +# binutils added all except Zvkb, then added Zvkb. So we just check for Zvkb. +config TOOLCHAIN_HAS_VECTOR_CRYPTO + def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb) + depends on AS_HAS_OPTION_ARCH + config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" depends on TOOLCHAIN_HAS_ZBB @@ -686,27 +703,61 @@ config THREAD_SIZE_ORDER affects irq stack size, which is equal to thread stack size. config RISCV_MISALIGNED - bool "Support misaligned load/store traps for kernel and userspace" + bool select SYSCTL_ARCH_UNALIGN_ALLOW - default y help - Say Y here if you want the kernel to embed support for misaligned - load/store for both kernel and userspace. When disable, misaligned - accesses will generate SIGBUS in userspace and panic in kernel. + Embed support for emulating misaligned loads and stores. + +choice + prompt "Unaligned Accesses Support" + default RISCV_PROBE_UNALIGNED_ACCESS + help + This determines the level of support for unaligned accesses. This + information is used by the kernel to perform optimizations. It is also + exposed to user space via the hwprobe syscall. The hardware will be + probed at boot by default. + +config RISCV_PROBE_UNALIGNED_ACCESS + bool "Probe for hardware unaligned access support" + select RISCV_MISALIGNED + help + During boot, the kernel will run a series of tests to determine the + speed of unaligned accesses. This probing will dynamically determine + the speed of unaligned accesses on the underlying system. If unaligned + memory accesses trap into the kernel as they are not supported by the + system, the kernel will emulate the unaligned accesses to preserve the + UABI. + +config RISCV_EMULATED_UNALIGNED_ACCESS + bool "Emulate unaligned access where system support is missing" + select RISCV_MISALIGNED + help + If unaligned memory accesses trap into the kernel as they are not + supported by the system, the kernel will emulate the unaligned + accesses to preserve the UABI. When the underlying system does support + unaligned accesses, the unaligned accesses are assumed to be slow. + +config RISCV_SLOW_UNALIGNED_ACCESS + bool "Assume the system supports slow unaligned memory accesses" + depends on NONPORTABLE + help + Assume that the system supports slow unaligned memory accesses. The + kernel and userspace programs may not be able to run at all on systems + that do not support unaligned memory accesses. config RISCV_EFFICIENT_UNALIGNED_ACCESS - bool "Assume the CPU supports fast unaligned memory accesses" + bool "Assume the system supports fast unaligned memory accesses" depends on NONPORTABLE select DCACHE_WORD_ACCESS if MMU select HAVE_EFFICIENT_UNALIGNED_ACCESS help - Say Y here if you want the kernel to assume that the CPU supports - efficient unaligned memory accesses. When enabled, this option - improves the performance of the kernel on such CPUs. However, the - kernel will run much more slowly, or will not be able to run at all, - on CPUs that do not support efficient unaligned memory accesses. + Assume that the system supports fast unaligned memory accesses. When + enabled, this option improves the performance of the kernel on such + systems. However, the kernel and userspace programs will run much more + slowly, or will not be able to run at all, on systems that do not + support efficient unaligned memory accesses. - If unsure what to do here, say N. +endchoice endmenu # "Platform type" @@ -1011,11 +1062,8 @@ menu "Power management options" source "kernel/power/Kconfig" -# Hibernation is only possible on systems where the SBI implementation has -# marked its reserved memory as not accessible from, or does not run -# from the same memory as, Linux config ARCH_HIBERNATION_POSSIBLE - def_bool NONPORTABLE + def_bool y config ARCH_HIBERNATION_HEADER def_bool HIBERNATION diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 0b7d109258e7..5b3115a19852 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -50,6 +50,11 @@ ifndef CONFIG_AS_IS_LLVM KBUILD_CFLAGS += -Wa,-mno-relax KBUILD_AFLAGS += -Wa,-mno-relax endif +# LLVM has an issue with target-features and LTO: https://github.com/llvm/llvm-project/issues/59350 +# Ensure it is aware of linker relaxation with LTO, otherwise relocations may +# be incorrect: https://github.com/llvm/llvm-project/issues/65090 +else ifeq ($(CONFIG_LTO_CLANG),y) + KBUILD_LDFLAGS += -mllvm -mattr=+c -mllvm -mattr=+relax endif ifeq ($(CONFIG_SHADOW_CALL_STACK),y) @@ -146,7 +151,7 @@ endif endif vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg -vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so +vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) diff --git a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi index 09ef10b39f46..f35324b9173c 100644 --- a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi +++ b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi @@ -27,7 +27,7 @@ riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", "zifencei", - "zihpm"; + "zihpm", "xandespmu"; mmu-type = "riscv,sv39"; i-cache-size = <0x8000>; i-cache-line-size = <0x40>; @@ -39,7 +39,7 @@ cpu0_intc: interrupt-controller { #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; + compatible = "andestech,cpu-intc", "riscv,cpu-intc"; interrupt-controller; }; }; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index eaf34e871e30..fc0ec2ee13bc 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -44,6 +44,7 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m CONFIG_CPUFREQ_DT=y +CONFIG_ACPI_CPPC_CPUFREQ=m CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_ACPI=y @@ -215,6 +216,7 @@ CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_CADENCE=y +CONFIG_MMC_SDHCI_OF_DWCMSHC=y CONFIG_MMC_SPI=y CONFIG_MMC_DW=y CONFIG_MMC_DW_STARFIVE=y @@ -224,6 +226,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SUN6I=y CONFIG_DMADEVICES=y CONFIG_DMA_SUN6I=m +CONFIG_DW_AXI_DMAC=y CONFIG_RZ_DMAC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig new file mode 100644 index 000000000000..ad58dad9a580 --- /dev/null +++ b/arch/riscv/crypto/Kconfig @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (riscv)" + +config CRYPTO_AES_RISCV64 + tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTS" + depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + select CRYPTO_ALGAPI + select CRYPTO_LIB_AES + select CRYPTO_SKCIPHER + help + Block cipher: AES cipher algorithms + Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTS + + Architecture: riscv64 using: + - Zvkned vector crypto extension + - Zvbb vector extension (XTS) + - Zvkb vector crypto extension (CTR) + - Zvkg vector crypto extension (XTS) + +config CRYPTO_CHACHA_RISCV64 + tristate "Ciphers: ChaCha" + depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + select CRYPTO_SKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC + help + Length-preserving ciphers: ChaCha20 stream cipher algorithm + + Architecture: riscv64 using: + - Zvkb vector crypto extension + +config CRYPTO_GHASH_RISCV64 + tristate "Hash functions: GHASH" + depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + select CRYPTO_GCM + help + GCM GHASH function (NIST SP 800-38D) + + Architecture: riscv64 using: + - Zvkg vector crypto extension + +config CRYPTO_SHA256_RISCV64 + tristate "Hash functions: SHA-224 and SHA-256" + depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + select CRYPTO_SHA256 + help + SHA-224 and SHA-256 secure hash algorithm (FIPS 180) + + Architecture: riscv64 using: + - Zvknha or Zvknhb vector crypto extensions + - Zvkb vector crypto extension + +config CRYPTO_SHA512_RISCV64 + tristate "Hash functions: SHA-384 and SHA-512" + depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + select CRYPTO_SHA512 + help + SHA-384 and SHA-512 secure hash algorithm (FIPS 180) + + Architecture: riscv64 using: + - Zvknhb vector crypto extension + - Zvkb vector crypto extension + +config CRYPTO_SM3_RISCV64 + tristate "Hash functions: SM3 (ShangMi 3)" + depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + select CRYPTO_HASH + select CRYPTO_SM3 + help + SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012) + + Architecture: riscv64 using: + - Zvksh vector crypto extension + - Zvkb vector crypto extension + +config CRYPTO_SM4_RISCV64 + tristate "Ciphers: SM4 (ShangMi 4)" + depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + select CRYPTO_ALGAPI + select CRYPTO_SM4 + help + SM4 block cipher algorithm (OSCCA GB/T 32907-2016, + ISO/IEC 18033-3:2010/Amd 1:2021) + + SM4 (GBT.32907-2016) is a cryptographic standard issued by the + Organization of State Commercial Administration of China (OSCCA) + as an authorized cryptographic algorithm for use within China. + + Architecture: riscv64 using: + - Zvksed vector crypto extension + - Zvkb vector crypto extension + +endmenu diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile new file mode 100644 index 000000000000..247c7bc7288c --- /dev/null +++ b/arch/riscv/crypto/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_CRYPTO_AES_RISCV64) += aes-riscv64.o +aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \ + aes-riscv64-zvkned-zvbb-zvkg.o aes-riscv64-zvkned-zvkb.o + +obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o +chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o + +obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o +ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o + +obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o +sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o + +obj-$(CONFIG_CRYPTO_SHA512_RISCV64) += sha512-riscv64.o +sha512-riscv64-y := sha512-riscv64-glue.o sha512-riscv64-zvknhb-zvkb.o + +obj-$(CONFIG_CRYPTO_SM3_RISCV64) += sm3-riscv64.o +sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o + +obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o +sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o diff --git a/arch/riscv/crypto/aes-macros.S b/arch/riscv/crypto/aes-macros.S new file mode 100644 index 000000000000..d1a258d04bc7 --- /dev/null +++ b/arch/riscv/crypto/aes-macros.S @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> +// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file contains macros that are shared by the other aes-*.S files. The +// generated code of these macros depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector AES block cipher extension ('Zvkned') + +// Loads the AES round keys from \keyp into vector registers and jumps to code +// specific to the length of the key. Specifically: +// - If AES-128, loads round keys into v1-v11 and jumps to \label128. +// - If AES-192, loads round keys into v1-v13 and jumps to \label192. +// - If AES-256, loads round keys into v1-v15 and continues onwards. +// +// Also sets vl=4 and vtype=e32,m1,ta,ma. Clobbers t0 and t1. +.macro aes_begin keyp, label128, label192 + lwu t0, 480(\keyp) // t0 = key length in bytes + li t1, 24 // t1 = key length for AES-192 + vsetivli zero, 4, e32, m1, ta, ma + vle32.v v1, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v2, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v3, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v4, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v5, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v6, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v7, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v8, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v9, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v10, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v11, (\keyp) + blt t0, t1, \label128 // If AES-128, goto label128. + addi \keyp, \keyp, 16 + vle32.v v12, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v13, (\keyp) + beq t0, t1, \label192 // If AES-192, goto label192. + // Else, it's AES-256. + addi \keyp, \keyp, 16 + vle32.v v14, (\keyp) + addi \keyp, \keyp, 16 + vle32.v v15, (\keyp) +.endm + +// Encrypts \data using zvkned instructions, using the round keys loaded into +// v1-v11 (for AES-128), v1-v13 (for AES-192), or v1-v15 (for AES-256). \keylen +// is the AES key length in bits. vl and vtype must already be set +// appropriately. Note that if vl > 4, multiple blocks are encrypted. +.macro aes_encrypt data, keylen + vaesz.vs \data, v1 + vaesem.vs \data, v2 + vaesem.vs \data, v3 + vaesem.vs \data, v4 + vaesem.vs \data, v5 + vaesem.vs \data, v6 + vaesem.vs \data, v7 + vaesem.vs \data, v8 + vaesem.vs \data, v9 + vaesem.vs \data, v10 +.if \keylen == 128 + vaesef.vs \data, v11 +.elseif \keylen == 192 + vaesem.vs \data, v11 + vaesem.vs \data, v12 + vaesef.vs \data, v13 +.else + vaesem.vs \data, v11 + vaesem.vs \data, v12 + vaesem.vs \data, v13 + vaesem.vs \data, v14 + vaesef.vs \data, v15 +.endif +.endm + +// Same as aes_encrypt, but decrypts instead of encrypts. +.macro aes_decrypt data, keylen +.if \keylen == 128 + vaesz.vs \data, v11 +.elseif \keylen == 192 + vaesz.vs \data, v13 + vaesdm.vs \data, v12 + vaesdm.vs \data, v11 +.else + vaesz.vs \data, v15 + vaesdm.vs \data, v14 + vaesdm.vs \data, v13 + vaesdm.vs \data, v12 + vaesdm.vs \data, v11 +.endif + vaesdm.vs \data, v10 + vaesdm.vs \data, v9 + vaesdm.vs \data, v8 + vaesdm.vs \data, v7 + vaesdm.vs \data, v6 + vaesdm.vs \data, v5 + vaesdm.vs \data, v4 + vaesdm.vs \data, v3 + vaesdm.vs \data, v2 + vaesdf.vs \data, v1 +.endm + +// Expands to aes_encrypt or aes_decrypt according to \enc, which is 1 or 0. +.macro aes_crypt data, enc, keylen +.if \enc + aes_encrypt \data, \keylen +.else + aes_decrypt \data, \keylen +.endif +.endm diff --git a/arch/riscv/crypto/aes-riscv64-glue.c b/arch/riscv/crypto/aes-riscv64-glue.c new file mode 100644 index 000000000000..f814ee048555 --- /dev/null +++ b/arch/riscv/crypto/aes-riscv64-glue.c @@ -0,0 +1,637 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AES using the RISC-V vector crypto extensions. Includes the bare block + * cipher and the ECB, CBC, CBC-CTS, CTR, and XTS modes. + * + * Copyright (C) 2023 VRULL GmbH + * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih <jerry.shih@sifive.com> + * + * Copyright 2024 Google LLC + */ + +#include <asm/simd.h> +#include <asm/vector.h> +#include <crypto/aes.h> +#include <crypto/internal/cipher.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> +#include <crypto/xts.h> +#include <linux/linkage.h> +#include <linux/module.h> + +asmlinkage void aes_encrypt_zvkned(const struct crypto_aes_ctx *key, + const u8 in[AES_BLOCK_SIZE], + u8 out[AES_BLOCK_SIZE]); +asmlinkage void aes_decrypt_zvkned(const struct crypto_aes_ctx *key, + const u8 in[AES_BLOCK_SIZE], + u8 out[AES_BLOCK_SIZE]); + +asmlinkage void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len); +asmlinkage void aes_ecb_decrypt_zvkned(const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len); + +asmlinkage void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len, + u8 iv[AES_BLOCK_SIZE]); +asmlinkage void aes_cbc_decrypt_zvkned(const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len, + u8 iv[AES_BLOCK_SIZE]); + +asmlinkage void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len, + const u8 iv[AES_BLOCK_SIZE], bool enc); + +asmlinkage void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len, + u8 iv[AES_BLOCK_SIZE]); + +asmlinkage void aes_xts_encrypt_zvkned_zvbb_zvkg( + const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len, + u8 tweak[AES_BLOCK_SIZE]); + +asmlinkage void aes_xts_decrypt_zvkned_zvbb_zvkg( + const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len, + u8 tweak[AES_BLOCK_SIZE]); + +static int riscv64_aes_setkey(struct crypto_aes_ctx *ctx, + const u8 *key, unsigned int keylen) +{ + /* + * For now we just use the generic key expansion, for these reasons: + * + * - zvkned's key expansion instructions don't support AES-192. + * So, non-zvkned fallback code would be needed anyway. + * + * - Users of AES in Linux usually don't change keys frequently. + * So, key expansion isn't performance-critical. + * + * - For single-block AES exposed as a "cipher" algorithm, it's + * necessary to use struct crypto_aes_ctx and initialize its 'key_dec' + * field with the round keys for the Equivalent Inverse Cipher. This + * is because with "cipher", decryption can be requested from a + * context where the vector unit isn't usable, necessitating a + * fallback to aes_decrypt(). But, zvkned can only generate and use + * the normal round keys. Of course, it's preferable to not have + * special code just for "cipher", as e.g. XTS also uses a + * single-block AES encryption. It's simplest to just use + * struct crypto_aes_ctx and aes_expandkey() everywhere. + */ + return aes_expandkey(ctx, key, keylen); +} + +static int riscv64_aes_setkey_cipher(struct crypto_tfm *tfm, + const u8 *key, unsigned int keylen) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + return riscv64_aes_setkey(ctx, key, keylen); +} + +static int riscv64_aes_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + + return riscv64_aes_setkey(ctx, key, keylen); +} + +/* Bare AES, without a mode of operation */ + +static void riscv64_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (crypto_simd_usable()) { + kernel_vector_begin(); + aes_encrypt_zvkned(ctx, src, dst); + kernel_vector_end(); + } else { + aes_encrypt(ctx, dst, src); + } +} + +static void riscv64_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (crypto_simd_usable()) { + kernel_vector_begin(); + aes_decrypt_zvkned(ctx, src, dst); + kernel_vector_end(); + } else { + aes_decrypt(ctx, dst, src); + } +} + +/* AES-ECB */ + +static inline int riscv64_aes_ecb_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) != 0) { + kernel_vector_begin(); + if (enc) + aes_ecb_encrypt_zvkned(ctx, walk.src.virt.addr, + walk.dst.virt.addr, + nbytes & ~(AES_BLOCK_SIZE - 1)); + else + aes_ecb_decrypt_zvkned(ctx, walk.src.virt.addr, + walk.dst.virt.addr, + nbytes & ~(AES_BLOCK_SIZE - 1)); + kernel_vector_end(); + err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1)); + } + + return err; +} + +static int riscv64_aes_ecb_encrypt(struct skcipher_request *req) +{ + return riscv64_aes_ecb_crypt(req, true); +} + +static int riscv64_aes_ecb_decrypt(struct skcipher_request *req) +{ + return riscv64_aes_ecb_crypt(req, false); +} + +/* AES-CBC */ + +static int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) != 0) { + kernel_vector_begin(); + if (enc) + aes_cbc_encrypt_zvkned(ctx, walk.src.virt.addr, + walk.dst.virt.addr, + nbytes & ~(AES_BLOCK_SIZE - 1), + walk.iv); + else + aes_cbc_decrypt_zvkned(ctx, walk.src.virt.addr, + walk.dst.virt.addr, + nbytes & ~(AES_BLOCK_SIZE - 1), + walk.iv); + kernel_vector_end(); + err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1)); + } + + return err; +} + +static int riscv64_aes_cbc_encrypt(struct skcipher_request *req) +{ + return riscv64_aes_cbc_crypt(req, true); +} + +static int riscv64_aes_cbc_decrypt(struct skcipher_request *req) +{ + return riscv64_aes_cbc_crypt(req, false); +} + +/* AES-CBC-CTS */ + +static int riscv64_aes_cbc_cts_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; + struct skcipher_walk walk; + unsigned int cbc_len; + int err; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + /* + * If the full message is available in one step, decrypt it in one call + * to the CBC-CTS assembly function. This reduces overhead, especially + * on short messages. Otherwise, fall back to doing CBC up to the last + * two blocks, then invoke CTS just for the ciphertext stealing. + */ + if (unlikely(walk.nbytes != req->cryptlen)) { + cbc_len = round_down(req->cryptlen - AES_BLOCK_SIZE - 1, + AES_BLOCK_SIZE); + skcipher_walk_abort(&walk); + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_len, req->iv); + err = riscv64_aes_cbc_crypt(&subreq, enc); + if (err) + return err; + dst = src = scatterwalk_ffwd(sg_src, req->src, cbc_len); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, cbc_len); + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_len, req->iv); + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + } + kernel_vector_begin(); + aes_cbc_cts_crypt_zvkned(ctx, walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, req->iv, enc); + kernel_vector_end(); + return skcipher_walk_done(&walk, 0); +} + +static int riscv64_aes_cbc_cts_encrypt(struct skcipher_request *req) +{ + return riscv64_aes_cbc_cts_crypt(req, true); +} + +static int riscv64_aes_cbc_cts_decrypt(struct skcipher_request *req) +{ + return riscv64_aes_cbc_cts_crypt(req, false); +} + +/* AES-CTR */ + +static int riscv64_aes_ctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned int nbytes, p1_nbytes; + struct skcipher_walk walk; + u32 ctr32, nblocks; + int err; + + /* Get the low 32-bit word of the 128-bit big endian counter. */ + ctr32 = get_unaligned_be32(req->iv + 12); + + err = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) != 0) { + if (nbytes < walk.total) { + /* Not the end yet, so keep the length block-aligned. */ + nbytes = round_down(nbytes, AES_BLOCK_SIZE); + nblocks = nbytes / AES_BLOCK_SIZE; + } else { + /* It's the end, so include any final partial block. */ + nblocks = DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE); + } + ctr32 += nblocks; + + kernel_vector_begin(); + if (ctr32 >= nblocks) { + /* The low 32-bit word of the counter won't overflow. */ + aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, + req->iv); + } else { + /* + * The low 32-bit word of the counter will overflow. + * The assembly doesn't handle this case, so split the + * operation into two at the point where the overflow + * will occur. After the first part, add the carry bit. + */ + p1_nbytes = min_t(unsigned int, nbytes, + (nblocks - ctr32) * AES_BLOCK_SIZE); + aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr, + walk.dst.virt.addr, + p1_nbytes, req->iv); + crypto_inc(req->iv, 12); + + if (ctr32) { + aes_ctr32_crypt_zvkned_zvkb( + ctx, + walk.src.virt.addr + p1_nbytes, + walk.dst.virt.addr + p1_nbytes, + nbytes - p1_nbytes, req->iv); + } + } + kernel_vector_end(); + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +/* AES-XTS */ + +struct riscv64_aes_xts_ctx { + struct crypto_aes_ctx ctx1; + struct crypto_aes_ctx ctx2; +}; + +static int riscv64_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + + return xts_verify_key(tfm, key, keylen) ?: + riscv64_aes_setkey(&ctx->ctx1, key, keylen / 2) ?: + riscv64_aes_setkey(&ctx->ctx2, key + keylen / 2, keylen / 2); +} + +static int riscv64_aes_xts_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; + struct skcipher_walk walk; + int err; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + /* Encrypt the IV with the tweak key to get the first tweak. */ + kernel_vector_begin(); + aes_encrypt_zvkned(&ctx->ctx2, req->iv, req->iv); + kernel_vector_end(); + + err = skcipher_walk_virt(&walk, req, false); + + /* + * If the message length isn't divisible by the AES block size and the + * full message isn't available in one step of the scatterlist walk, + * then separate off the last full block and the partial block. This + * ensures that they are processed in the same call to the assembly + * function, which is required for ciphertext stealing. + */ + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + req->cryptlen - tail - AES_BLOCK_SIZE, + req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + while (walk.nbytes) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, AES_BLOCK_SIZE); + + kernel_vector_begin(); + if (enc) + aes_xts_encrypt_zvkned_zvbb_zvkg( + &ctx->ctx1, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, req->iv); + else + aes_xts_decrypt_zvkned_zvbb_zvkg( + &ctx->ctx1, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, req->iv); + kernel_vector_end(); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + if (err || likely(!tail)) + return err; + + /* Do ciphertext stealing with the last full block and partial block. */ + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + kernel_vector_begin(); + if (enc) + aes_xts_encrypt_zvkned_zvbb_zvkg( + &ctx->ctx1, walk.src.virt.addr, + walk.dst.virt.addr, walk.nbytes, req->iv); + else + aes_xts_decrypt_zvkned_zvbb_zvkg( + &ctx->ctx1, walk.src.virt.addr, + walk.dst.virt.addr, walk.nbytes, req->iv); + kernel_vector_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int riscv64_aes_xts_encrypt(struct skcipher_request *req) +{ + return riscv64_aes_xts_crypt(req, true); +} + +static int riscv64_aes_xts_decrypt(struct skcipher_request *req) +{ + return riscv64_aes_xts_crypt(req, false); +} + +/* Algorithm definitions */ + +static struct crypto_alg riscv64_zvkned_aes_cipher_alg = { + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_priority = 300, + .cra_name = "aes", + .cra_driver_name = "aes-riscv64-zvkned", + .cra_cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = riscv64_aes_setkey_cipher, + .cia_encrypt = riscv64_aes_encrypt, + .cia_decrypt = riscv64_aes_decrypt, + }, + .cra_module = THIS_MODULE, +}; + +static struct skcipher_alg riscv64_zvkned_aes_skcipher_algs[] = { + { + .setkey = riscv64_aes_setkey_skcipher, + .encrypt = riscv64_aes_ecb_encrypt, + .decrypt = riscv64_aes_ecb_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, /* matches LMUL=8 */ + .base = { + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_priority = 300, + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-riscv64-zvkned", + .cra_module = THIS_MODULE, + }, + }, { + .setkey = riscv64_aes_setkey_skcipher, + .encrypt = riscv64_aes_cbc_encrypt, + .decrypt = riscv64_aes_cbc_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_priority = 300, + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-riscv64-zvkned", + .cra_module = THIS_MODULE, + }, + }, { + .setkey = riscv64_aes_setkey_skcipher, + .encrypt = riscv64_aes_cbc_cts_encrypt, + .decrypt = riscv64_aes_cbc_cts_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */ + .base = { + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_priority = 300, + .cra_name = "cts(cbc(aes))", + .cra_driver_name = "cts-cbc-aes-riscv64-zvkned", + .cra_module = THIS_MODULE, + }, + } +}; + +static struct skcipher_alg riscv64_zvkned_zvkb_aes_skcipher_alg = { + .setkey = riscv64_aes_setkey_skcipher, + .encrypt = riscv64_aes_ctr_crypt, + .decrypt = riscv64_aes_ctr_crypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */ + .base = { + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_priority = 300, + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-riscv64-zvkned-zvkb", + .cra_module = THIS_MODULE, + }, +}; + +static struct skcipher_alg riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg = { + .setkey = riscv64_aes_xts_setkey, + .encrypt = riscv64_aes_xts_encrypt, + .decrypt = riscv64_aes_xts_decrypt, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */ + .base = { + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct riscv64_aes_xts_ctx), + .cra_priority = 300, + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-riscv64-zvkned-zvbb-zvkg", + .cra_module = THIS_MODULE, + }, +}; + +static inline bool riscv64_aes_xts_supported(void) +{ + return riscv_isa_extension_available(NULL, ZVBB) && + riscv_isa_extension_available(NULL, ZVKG) && + riscv_vector_vlen() < 2048 /* Implementation limitation */; +} + +static int __init riscv64_aes_mod_init(void) +{ + int err = -ENODEV; + + if (riscv_isa_extension_available(NULL, ZVKNED) && + riscv_vector_vlen() >= 128) { + err = crypto_register_alg(&riscv64_zvkned_aes_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers( + riscv64_zvkned_aes_skcipher_algs, + ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs)); + if (err) + goto unregister_zvkned_cipher_alg; + + if (riscv_isa_extension_available(NULL, ZVKB)) { + err = crypto_register_skcipher( + &riscv64_zvkned_zvkb_aes_skcipher_alg); + if (err) + goto unregister_zvkned_skcipher_algs; + } + + if (riscv64_aes_xts_supported()) { + err = crypto_register_skcipher( + &riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg); + if (err) + goto unregister_zvkned_zvkb_skcipher_alg; + } + } + + return err; + +unregister_zvkned_zvkb_skcipher_alg: + if (riscv_isa_extension_available(NULL, ZVKB)) + crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg); +unregister_zvkned_skcipher_algs: + crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs, + ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs)); +unregister_zvkned_cipher_alg: + crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg); + return err; +} + +static void __exit riscv64_aes_mod_exit(void) +{ + if (riscv64_aes_xts_supported()) + crypto_unregister_skcipher(&riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg); + if (riscv_isa_extension_available(NULL, ZVKB)) + crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg); + crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs, + ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs)); + crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg); +} + +module_init(riscv64_aes_mod_init); +module_exit(riscv64_aes_mod_exit); + +MODULE_DESCRIPTION("AES-ECB/CBC/CTS/CTR/XTS (RISC-V accelerated)"); +MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("ecb(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("cts(cbc(aes))"); +MODULE_ALIAS_CRYPTO("ctr(aes)"); +MODULE_ALIAS_CRYPTO("xts(aes)"); diff --git a/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S b/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S new file mode 100644 index 000000000000..146fc9cfb268 --- /dev/null +++ b/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S @@ -0,0 +1,312 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 && VLEN < 2048 +// - RISC-V Vector AES block cipher extension ('Zvkned') +// - RISC-V Vector Bit-manipulation extension ('Zvbb') +// - RISC-V Vector GCM/GMAC extension ('Zvkg') + +#include <linux/linkage.h> + +.text +.option arch, +zvkned, +zvbb, +zvkg + +#include "aes-macros.S" + +#define KEYP a0 +#define INP a1 +#define OUTP a2 +#define LEN a3 +#define TWEAKP a4 + +#define LEN32 a5 +#define TAIL_LEN a6 +#define VL a7 +#define VLMAX t4 + +// v1-v15 contain the AES round keys, but they are used for temporaries before +// the AES round keys have been loaded. +#define TWEAKS v16 // LMUL=4 (most of the time) +#define TWEAKS_BREV v20 // LMUL=4 (most of the time) +#define MULTS_BREV v24 // LMUL=4 (most of the time) +#define TMP0 v28 +#define TMP1 v29 +#define TMP2 v30 +#define TMP3 v31 + +// xts_init initializes the following values: +// +// TWEAKS: N 128-bit tweaks T*(x^i) for i in 0..(N - 1) +// TWEAKS_BREV: same as TWEAKS, but bit-reversed +// MULTS_BREV: N 128-bit values x^N, bit-reversed. Only if N > 1. +// +// N is the maximum number of blocks that will be processed per loop iteration, +// computed using vsetvli. +// +// The field convention used by XTS is the same as that of GHASH, but with the +// bits reversed within each byte. The zvkg extension provides the vgmul +// instruction which does multiplication in this field. Therefore, for tweak +// computation we use vgmul to do multiplications in parallel, instead of +// serially multiplying by x using shifting+xoring. Note that for this to work, +// the inputs and outputs to vgmul must be bit-reversed (we do it with vbrev8). +.macro xts_init + + // Load the first tweak T. + vsetivli zero, 4, e32, m1, ta, ma + vle32.v TWEAKS, (TWEAKP) + + // If there's only one block (or no blocks at all), then skip the tweak + // sequence computation because (at most) T itself is needed. + li t0, 16 + ble LEN, t0, .Linit_single_block\@ + + // Save a copy of T bit-reversed in v12. + vbrev8.v v12, TWEAKS + + // + // Generate x^i for i in 0..(N - 1), i.e. 128-bit values 1 << i assuming + // that N <= 128. Though, this code actually requires N < 64 (or + // equivalently VLEN < 2048) due to the use of 64-bit intermediate + // values here and in the x^N computation later. + // + vsetvli VL, LEN32, e32, m4, ta, ma + srli t0, VL, 2 // t0 = N (num blocks) + // Generate two sequences, each with N 32-bit values: + // v0=[1, 1, 1, ...] and v1=[0, 1, 2, ...]. + vsetvli zero, t0, e32, m1, ta, ma + vmv.v.i v0, 1 + vid.v v1 + // Use vzext to zero-extend the sequences to 64 bits. Reinterpret them + // as two sequences, each with 2*N 32-bit values: + // v2=[1, 0, 1, 0, 1, 0, ...] and v4=[0, 0, 1, 0, 2, 0, ...]. + vsetvli zero, t0, e64, m2, ta, ma + vzext.vf2 v2, v0 + vzext.vf2 v4, v1 + slli t1, t0, 1 // t1 = 2*N + vsetvli zero, t1, e32, m2, ta, ma + // Use vwsll to compute [1<<0, 0<<0, 1<<1, 0<<0, 1<<2, 0<<0, ...], + // widening to 64 bits per element. When reinterpreted as N 128-bit + // values, this is the needed sequence of 128-bit values 1 << i (x^i). + vwsll.vv v8, v2, v4 + + // Copy the bit-reversed T to all N elements of TWEAKS_BREV, then + // multiply by x^i. This gives the sequence T*(x^i), bit-reversed. + vsetvli zero, LEN32, e32, m4, ta, ma + vmv.v.i TWEAKS_BREV, 0 + vaesz.vs TWEAKS_BREV, v12 + vbrev8.v v8, v8 + vgmul.vv TWEAKS_BREV, v8 + + // Save a copy of the sequence T*(x^i) with the bit reversal undone. + vbrev8.v TWEAKS, TWEAKS_BREV + + // Generate N copies of x^N, i.e. 128-bit values 1 << N, bit-reversed. + li t1, 1 + sll t1, t1, t0 // t1 = 1 << N + vsetivli zero, 2, e64, m1, ta, ma + vmv.v.i v0, 0 + vsetivli zero, 1, e64, m1, tu, ma + vmv.v.x v0, t1 + vbrev8.v v0, v0 + vsetvli zero, LEN32, e32, m4, ta, ma + vmv.v.i MULTS_BREV, 0 + vaesz.vs MULTS_BREV, v0 + + j .Linit_done\@ + +.Linit_single_block\@: + vbrev8.v TWEAKS_BREV, TWEAKS +.Linit_done\@: +.endm + +// Set the first 128 bits of MULTS_BREV to 0x40, i.e. 'x' bit-reversed. This is +// the multiplier required to advance the tweak by one. +.macro load_x + li t0, 0x40 + vsetivli zero, 4, e32, m1, ta, ma + vmv.v.i MULTS_BREV, 0 + vsetivli zero, 1, e8, m1, tu, ma + vmv.v.x MULTS_BREV, t0 +.endm + +.macro __aes_xts_crypt enc, keylen + // With 16 < len <= 31, there's no main loop, just ciphertext stealing. + beqz LEN32, .Lcts_without_main_loop\@ + + vsetvli VLMAX, zero, e32, m4, ta, ma +1: + vsetvli VL, LEN32, e32, m4, ta, ma +2: + // Encrypt or decrypt VL/4 blocks. + vle32.v TMP0, (INP) + vxor.vv TMP0, TMP0, TWEAKS + aes_crypt TMP0, \enc, \keylen + vxor.vv TMP0, TMP0, TWEAKS + vse32.v TMP0, (OUTP) + + // Update the pointers and the remaining length. + slli t0, VL, 2 + add INP, INP, t0 + add OUTP, OUTP, t0 + sub LEN32, LEN32, VL + + // Check whether more blocks remain. + beqz LEN32, .Lmain_loop_done\@ + + // Compute the next sequence of tweaks by multiplying the previous + // sequence by x^N. Store the result in both bit-reversed order and + // regular order (i.e. with the bit reversal undone). + vgmul.vv TWEAKS_BREV, MULTS_BREV + vbrev8.v TWEAKS, TWEAKS_BREV + + // Since we compute the tweak multipliers x^N in advance, we require + // that each iteration process the same length except possibly the last. + // This conflicts slightly with the behavior allowed by RISC-V Vector + // Extension, where CPUs can select a lower length for both of the last + // two iterations. E.g., vl might take the sequence of values + // [16, 16, 16, 12, 12], whereas we need [16, 16, 16, 16, 8] so that we + // can use x^4 again instead of computing x^3. Therefore, we explicitly + // keep the vl at VLMAX if there is at least VLMAX remaining. + bge LEN32, VLMAX, 2b + j 1b + +.Lmain_loop_done\@: + load_x + + // Compute the next tweak. + addi t0, VL, -4 + vsetivli zero, 4, e32, m4, ta, ma + vslidedown.vx TWEAKS_BREV, TWEAKS_BREV, t0 // Extract last tweak + vsetivli zero, 4, e32, m1, ta, ma + vgmul.vv TWEAKS_BREV, MULTS_BREV // Advance to next tweak + + bnez TAIL_LEN, .Lcts\@ + + // Update *TWEAKP to contain the next tweak. + vbrev8.v TWEAKS, TWEAKS_BREV + vse32.v TWEAKS, (TWEAKP) + ret + +.Lcts_without_main_loop\@: + load_x +.Lcts\@: + // TWEAKS_BREV now contains the next tweak. Compute the one after that. + vsetivli zero, 4, e32, m1, ta, ma + vmv.v.v TMP0, TWEAKS_BREV + vgmul.vv TMP0, MULTS_BREV + // Undo the bit reversal of the next two tweaks and store them in TMP1 + // and TMP2, such that TMP1 is the first needed and TMP2 the second. +.if \enc + vbrev8.v TMP1, TWEAKS_BREV + vbrev8.v TMP2, TMP0 +.else + vbrev8.v TMP1, TMP0 + vbrev8.v TMP2, TWEAKS_BREV +.endif + + // Encrypt/decrypt the last full block. + vle32.v TMP0, (INP) + vxor.vv TMP0, TMP0, TMP1 + aes_crypt TMP0, \enc, \keylen + vxor.vv TMP0, TMP0, TMP1 + + // Swap the first TAIL_LEN bytes of the above result with the tail. + // Note that to support in-place encryption/decryption, the load from + // the input tail must happen before the store to the output tail. + addi t0, INP, 16 + addi t1, OUTP, 16 + vmv.v.v TMP3, TMP0 + vsetvli zero, TAIL_LEN, e8, m1, tu, ma + vle8.v TMP0, (t0) + vse8.v TMP3, (t1) + + // Encrypt/decrypt again and store the last full block. + vsetivli zero, 4, e32, m1, ta, ma + vxor.vv TMP0, TMP0, TMP2 + aes_crypt TMP0, \enc, \keylen + vxor.vv TMP0, TMP0, TMP2 + vse32.v TMP0, (OUTP) + + ret +.endm + +.macro aes_xts_crypt enc + + // Check whether the length is a multiple of the AES block size. + andi TAIL_LEN, LEN, 15 + beqz TAIL_LEN, 1f + + // The length isn't a multiple of the AES block size, so ciphertext + // stealing will be required. Ciphertext stealing involves special + // handling of the partial block and the last full block, so subtract + // the length of both from the length to be processed in the main loop. + sub LEN, LEN, TAIL_LEN + addi LEN, LEN, -16 +1: + srli LEN32, LEN, 2 + // LEN and LEN32 now contain the total length of the blocks that will be + // processed in the main loop, in bytes and 32-bit words respectively. + + xts_init + aes_begin KEYP, 128f, 192f + __aes_xts_crypt \enc, 256 +128: + __aes_xts_crypt \enc, 128 +192: + __aes_xts_crypt \enc, 192 +.endm + +// void aes_xts_encrypt_zvkned_zvbb_zvkg(const struct crypto_aes_ctx *key, +// const u8 *in, u8 *out, size_t len, +// u8 tweak[16]); +// +// |key| is the data key. |tweak| contains the next tweak; the encryption of +// the original IV with the tweak key was already done. This function supports +// incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and +// |len| must be a multiple of 16 except on the last call. If |len| is a +// multiple of 16, then this function updates |tweak| to contain the next tweak. +SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvkg) + aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg) + +// Same prototype and calling convention as the encryption function +SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvkg) + aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg) diff --git a/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S b/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S new file mode 100644 index 000000000000..9962d4500587 --- /dev/null +++ b/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector AES block cipher extension ('Zvkned') +// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') + +#include <linux/linkage.h> + +.text +.option arch, +zvkned, +zvkb + +#include "aes-macros.S" + +#define KEYP a0 +#define INP a1 +#define OUTP a2 +#define LEN a3 +#define IVP a4 + +#define LEN32 a5 +#define VL_E32 a6 +#define VL_BLOCKS a7 + +.macro aes_ctr32_crypt keylen + // LEN32 = number of blocks, rounded up, in 32-bit words. + addi t0, LEN, 15 + srli t0, t0, 4 + slli LEN32, t0, 2 + + // Create a mask that selects the last 32-bit word of each 128-bit + // block. This is the word that contains the (big-endian) counter. + li t0, 0x88 + vsetvli t1, zero, e8, m1, ta, ma + vmv.v.x v0, t0 + + // Load the IV into v31. The last 32-bit word contains the counter. + vsetivli zero, 4, e32, m1, ta, ma + vle32.v v31, (IVP) + + // Convert the big-endian counter into little-endian. + vsetivli zero, 4, e32, m1, ta, mu + vrev8.v v31, v31, v0.t + + // Splat the IV to v16 (with LMUL=4). The number of copies is the + // maximum number of blocks that will be processed per iteration. + vsetvli zero, LEN32, e32, m4, ta, ma + vmv.v.i v16, 0 + vaesz.vs v16, v31 + + // v20 = [x, x, x, 0, x, x, x, 1, ...] + viota.m v20, v0, v0.t + // v16 = [IV0, IV1, IV2, counter+0, IV0, IV1, IV2, counter+1, ...] + vsetvli VL_E32, LEN32, e32, m4, ta, mu + vadd.vv v16, v16, v20, v0.t + + j 2f +1: + // Set the number of blocks to process in this iteration. vl=VL_E32 is + // the length in 32-bit words, i.e. 4 times the number of blocks. + vsetvli VL_E32, LEN32, e32, m4, ta, mu + + // Increment the counters by the number of blocks processed in the + // previous iteration. + vadd.vx v16, v16, VL_BLOCKS, v0.t +2: + // Prepare the AES inputs into v24. + vmv.v.v v24, v16 + vrev8.v v24, v24, v0.t // Convert counters back to big-endian. + + // Encrypt the AES inputs to create the next portion of the keystream. + aes_encrypt v24, \keylen + + // XOR the data with the keystream. + vsetvli t0, LEN, e8, m4, ta, ma + vle8.v v20, (INP) + vxor.vv v20, v20, v24 + vse8.v v20, (OUTP) + + // Advance the pointers and update the remaining length. + add INP, INP, t0 + add OUTP, OUTP, t0 + sub LEN, LEN, t0 + sub LEN32, LEN32, VL_E32 + srli VL_BLOCKS, VL_E32, 2 + + // Repeat if more data remains. + bnez LEN, 1b + + // Update *IVP to contain the next counter. + vsetivli zero, 4, e32, m1, ta, mu + vadd.vx v16, v16, VL_BLOCKS, v0.t + vrev8.v v16, v16, v0.t // Convert counters back to big-endian. + vse32.v v16, (IVP) + + ret +.endm + +// void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key, +// const u8 *in, u8 *out, size_t len, +// u8 iv[16]); +SYM_FUNC_START(aes_ctr32_crypt_zvkned_zvkb) + aes_begin KEYP, 128f, 192f + aes_ctr32_crypt 256 +128: + aes_ctr32_crypt 128 +192: + aes_ctr32_crypt 192 +SYM_FUNC_END(aes_ctr32_crypt_zvkned_zvkb) diff --git a/arch/riscv/crypto/aes-riscv64-zvkned.S b/arch/riscv/crypto/aes-riscv64-zvkned.S new file mode 100644 index 000000000000..23d063f94ce6 --- /dev/null +++ b/arch/riscv/crypto/aes-riscv64-zvkned.S @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> +// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector AES block cipher extension ('Zvkned') + +#include <linux/linkage.h> + +.text +.option arch, +zvkned + +#include "aes-macros.S" + +#define KEYP a0 +#define INP a1 +#define OUTP a2 +#define LEN a3 +#define IVP a4 + +.macro __aes_crypt_zvkned enc, keylen + vle32.v v16, (INP) + aes_crypt v16, \enc, \keylen + vse32.v v16, (OUTP) + ret +.endm + +.macro aes_crypt_zvkned enc + aes_begin KEYP, 128f, 192f + __aes_crypt_zvkned \enc, 256 +128: + __aes_crypt_zvkned \enc, 128 +192: + __aes_crypt_zvkned \enc, 192 +.endm + +// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key, +// const u8 in[16], u8 out[16]); +SYM_FUNC_START(aes_encrypt_zvkned) + aes_crypt_zvkned 1 +SYM_FUNC_END(aes_encrypt_zvkned) + +// Same prototype and calling convention as the encryption function +SYM_FUNC_START(aes_decrypt_zvkned) + aes_crypt_zvkned 0 +SYM_FUNC_END(aes_decrypt_zvkned) + +.macro __aes_ecb_crypt enc, keylen + srli t0, LEN, 2 + // t0 is the remaining length in 32-bit words. It's a multiple of 4. +1: + vsetvli t1, t0, e32, m8, ta, ma + sub t0, t0, t1 // Subtract number of words processed + slli t1, t1, 2 // Words to bytes + vle32.v v16, (INP) + aes_crypt v16, \enc, \keylen + vse32.v v16, (OUTP) + add INP, INP, t1 + add OUTP, OUTP, t1 + bnez t0, 1b + + ret +.endm + +.macro aes_ecb_crypt enc + aes_begin KEYP, 128f, 192f + __aes_ecb_crypt \enc, 256 +128: + __aes_ecb_crypt \enc, 128 +192: + __aes_ecb_crypt \enc, 192 +.endm + +// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key, +// const u8 *in, u8 *out, size_t len); +// +// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). +SYM_FUNC_START(aes_ecb_encrypt_zvkned) + aes_ecb_crypt 1 +SYM_FUNC_END(aes_ecb_encrypt_zvkned) + +// Same prototype and calling convention as the encryption function +SYM_FUNC_START(aes_ecb_decrypt_zvkned) + aes_ecb_crypt 0 +SYM_FUNC_END(aes_ecb_decrypt_zvkned) + +.macro aes_cbc_encrypt keylen + vle32.v v16, (IVP) // Load IV +1: + vle32.v v17, (INP) // Load plaintext block + vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block + aes_encrypt v16, \keylen // Encrypt + vse32.v v16, (OUTP) // Store ciphertext block + addi INP, INP, 16 + addi OUTP, OUTP, 16 + addi LEN, LEN, -16 + bnez LEN, 1b + + vse32.v v16, (IVP) // Store next IV + ret +.endm + +.macro aes_cbc_decrypt keylen + srli LEN, LEN, 2 // Convert LEN from bytes to words + vle32.v v16, (IVP) // Load IV +1: + vsetvli t0, LEN, e32, m4, ta, ma + vle32.v v20, (INP) // Load ciphertext blocks + vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks + addi t1, t0, -4 + vslidedown.vx v24, v20, t1 // Save last ciphertext block + aes_decrypt v20, \keylen // Decrypt the blocks + vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks + vse32.v v20, (OUTP) // Store plaintext blocks + vmv.v.v v16, v24 // Next "IV" is last ciphertext block + slli t1, t0, 2 // Words to bytes + add INP, INP, t1 + add OUTP, OUTP, t1 + sub LEN, LEN, t0 + bnez LEN, 1b + + vsetivli zero, 4, e32, m1, ta, ma + vse32.v v16, (IVP) // Store next IV + ret +.endm + +// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key, +// const u8 *in, u8 *out, size_t len, u8 iv[16]); +// +// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). +SYM_FUNC_START(aes_cbc_encrypt_zvkned) + aes_begin KEYP, 128f, 192f + aes_cbc_encrypt 256 +128: + aes_cbc_encrypt 128 +192: + aes_cbc_encrypt 192 +SYM_FUNC_END(aes_cbc_encrypt_zvkned) + +// Same prototype and calling convention as the encryption function +SYM_FUNC_START(aes_cbc_decrypt_zvkned) + aes_begin KEYP, 128f, 192f + aes_cbc_decrypt 256 +128: + aes_cbc_decrypt 128 +192: + aes_cbc_decrypt 192 +SYM_FUNC_END(aes_cbc_decrypt_zvkned) + +.macro aes_cbc_cts_encrypt keylen + + // CBC-encrypt all blocks except the last. But don't store the + // second-to-last block to the output buffer yet, since it will be + // handled specially in the ciphertext stealing step. Exception: if the + // message is single-block, still encrypt the last (and only) block. + li t0, 16 + j 2f +1: + vse32.v v16, (OUTP) // Store ciphertext block + addi OUTP, OUTP, 16 +2: + vle32.v v17, (INP) // Load plaintext block + vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block + aes_encrypt v16, \keylen // Encrypt + addi INP, INP, 16 + addi LEN, LEN, -16 + bgt LEN, t0, 1b // Repeat if more than one block remains + + // Special case: if the message is a single block, just do CBC. + beqz LEN, .Lcts_encrypt_done\@ + + // Encrypt the last two blocks using ciphertext stealing as follows: + // C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n]) + // C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN] + // + // C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th + // plaintext block. Block n, the last block, may be partial; its length + // is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV. + // + // v16 already contains Encrypt(P[n-1] ^ C[n-2]). + // INP points to P[n]. OUTP points to where C[n-1] should go. + // To support in-place encryption, load P[n] before storing C[n]. + addi t0, OUTP, 16 // Get pointer to where C[n] should go + vsetvli zero, LEN, e8, m1, tu, ma + vle8.v v17, (INP) // Load P[n] + vse8.v v16, (t0) // Store C[n] + vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n] + vsetivli zero, 4, e32, m1, ta, ma + aes_encrypt v16, \keylen +.Lcts_encrypt_done\@: + vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case) + ret +.endm + +#define LEN32 t4 // Length of remaining full blocks in 32-bit words +#define LEN_MOD16 t5 // Length of message in bytes mod 16 + +.macro aes_cbc_cts_decrypt keylen + andi LEN32, LEN, ~15 + srli LEN32, LEN32, 2 + andi LEN_MOD16, LEN, 15 + + // Save C[n-2] in v28 so that it's available later during the ciphertext + // stealing step. If there are fewer than three blocks, C[n-2] means + // the IV, otherwise it means the third-to-last ciphertext block. + vmv.v.v v28, v16 // IV + add t0, LEN, -33 + bltz t0, .Lcts_decrypt_loop\@ + andi t0, t0, ~15 + add t0, t0, INP + vle32.v v28, (t0) + + // CBC-decrypt all full blocks. For the last full block, or the last 2 + // full blocks if the message is block-aligned, this doesn't write the + // correct output blocks (unless the message is only a single block), + // because it XORs the wrong values with the raw AES plaintexts. But we + // fix this after this loop without redoing the AES decryptions. This + // approach allows more of the AES decryptions to be parallelized. +.Lcts_decrypt_loop\@: + vsetvli t0, LEN32, e32, m4, ta, ma + addi t1, t0, -4 + vle32.v v20, (INP) // Load next set of ciphertext blocks + vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set + vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks + vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set + aes_decrypt v20, \keylen // Decrypt this set of blocks + vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks + vse32.v v24, (OUTP) // Store this set of plaintext blocks + sub LEN32, LEN32, t0 + slli t0, t0, 2 // Words to bytes + add INP, INP, t0 + add OUTP, OUTP, t0 + bnez LEN32, .Lcts_decrypt_loop\@ + + vsetivli zero, 4, e32, m4, ta, ma + vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block + addi t0, OUTP, -16 // Get pointer to last full plaintext block + bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@ + + // Special case: if the message is a single block, just do CBC. + li t1, 16 + beq LEN, t1, .Lcts_decrypt_done\@ + + // Block-aligned message. Just fix up the last 2 blocks. We need: + // + // P[n-1] = Decrypt(C[n]) ^ C[n-2] + // P[n] = Decrypt(C[n-1]) ^ C[n] + // + // We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28. + // Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this + // is everything needed to fix the output without re-decrypting blocks. + addi t1, OUTP, -32 // Get pointer to where P[n-1] should go + vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1] + vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2] + vse32.v v20, (t1) // Store P[n-1] + vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2] + j .Lcts_decrypt_finish\@ + +.Lcts_decrypt_non_block_aligned\@: + // Decrypt the last two blocks using ciphertext stealing as follows: + // + // P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2] + // P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16] + // + // We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28. + vmv.v.v v16, v20 // v16 = Decrypt(C[n-1]) + vsetvli zero, LEN_MOD16, e8, m1, tu, ma + vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16] + vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n] + vse8.v v16, (OUTP) // Store P[n] + vsetivli zero, 4, e32, m1, ta, ma + aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) +.Lcts_decrypt_finish\@: + vxor.vv v20, v20, v28 // XOR with C[n-2] + vse32.v v20, (t0) // Store last full plaintext block +.Lcts_decrypt_done\@: + ret +.endm + +.macro aes_cbc_cts_crypt keylen + vle32.v v16, (IVP) // Load IV + beqz a5, .Lcts_decrypt\@ + aes_cbc_cts_encrypt \keylen +.Lcts_decrypt\@: + aes_cbc_cts_decrypt \keylen +.endm + +// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key, +// const u8 *in, u8 *out, size_t len, +// const u8 iv[16], bool enc); +// +// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS. +// This is the variant that unconditionally swaps the last two blocks. +SYM_FUNC_START(aes_cbc_cts_crypt_zvkned) + aes_begin KEYP, 128f, 192f + aes_cbc_cts_crypt 256 +128: + aes_cbc_cts_crypt 128 +192: + aes_cbc_cts_crypt 192 +SYM_FUNC_END(aes_cbc_cts_crypt_zvkned) diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c new file mode 100644 index 000000000000..10b46f36375a --- /dev/null +++ b/arch/riscv/crypto/chacha-riscv64-glue.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ChaCha20 using the RISC-V vector crypto extensions + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih <jerry.shih@sifive.com> + */ + +#include <asm/simd.h> +#include <asm/vector.h> +#include <crypto/internal/chacha.h> +#include <crypto/internal/skcipher.h> +#include <linux/linkage.h> +#include <linux/module.h> + +asmlinkage void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, + size_t len, const u32 iv[4]); + +static int riscv64_chacha20_crypt(struct skcipher_request *req) +{ + u32 iv[CHACHA_IV_SIZE / sizeof(u32)]; + u8 block_buffer[CHACHA_BLOCK_SIZE]; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + unsigned int tail_bytes; + int err; + + iv[0] = get_unaligned_le32(req->iv); + iv[1] = get_unaligned_le32(req->iv + 4); + iv[2] = get_unaligned_le32(req->iv + 8); + iv[3] = get_unaligned_le32(req->iv + 12); + + err = skcipher_walk_virt(&walk, req, false); + while (walk.nbytes) { + nbytes = walk.nbytes & ~(CHACHA_BLOCK_SIZE - 1); + tail_bytes = walk.nbytes & (CHACHA_BLOCK_SIZE - 1); + kernel_vector_begin(); + if (nbytes) { + chacha20_zvkb(ctx->key, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, iv); + iv[0] += nbytes / CHACHA_BLOCK_SIZE; + } + if (walk.nbytes == walk.total && tail_bytes > 0) { + memcpy(block_buffer, walk.src.virt.addr + nbytes, + tail_bytes); + chacha20_zvkb(ctx->key, block_buffer, block_buffer, + CHACHA_BLOCK_SIZE, iv); + memcpy(walk.dst.virt.addr + nbytes, block_buffer, + tail_bytes); + tail_bytes = 0; + } + kernel_vector_end(); + + err = skcipher_walk_done(&walk, tail_bytes); + } + + return err; +} + +static struct skcipher_alg riscv64_chacha_alg = { + .setkey = chacha20_setkey, + .encrypt = riscv64_chacha20_crypt, + .decrypt = riscv64_chacha20_crypt, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .base = { + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct chacha_ctx), + .cra_priority = 300, + .cra_name = "chacha20", + .cra_driver_name = "chacha20-riscv64-zvkb", + .cra_module = THIS_MODULE, + }, +}; + +static int __init riscv64_chacha_mod_init(void) +{ + if (riscv_isa_extension_available(NULL, ZVKB) && + riscv_vector_vlen() >= 128) + return crypto_register_skcipher(&riscv64_chacha_alg); + + return -ENODEV; +} + +static void __exit riscv64_chacha_mod_exit(void) +{ + crypto_unregister_skcipher(&riscv64_chacha_alg); +} + +module_init(riscv64_chacha_mod_init); +module_exit(riscv64_chacha_mod_exit); + +MODULE_DESCRIPTION("ChaCha20 (RISC-V accelerated)"); +MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/riscv/crypto/chacha-riscv64-zvkb.S b/arch/riscv/crypto/chacha-riscv64-zvkb.S new file mode 100644 index 000000000000..bf057737ac69 --- /dev/null +++ b/arch/riscv/crypto/chacha-riscv64-zvkb.S @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') + +#include <linux/linkage.h> + +.text +.option arch, +zvkb + +#define KEYP a0 +#define INP a1 +#define OUTP a2 +#define LEN a3 +#define IVP a4 + +#define CONSTS0 a5 +#define CONSTS1 a6 +#define CONSTS2 a7 +#define CONSTS3 t0 +#define TMP t1 +#define VL t2 +#define STRIDE t3 +#define NROUNDS t4 +#define KEY0 s0 +#define KEY1 s1 +#define KEY2 s2 +#define KEY3 s3 +#define KEY4 s4 +#define KEY5 s5 +#define KEY6 s6 +#define KEY7 s7 +#define COUNTER s8 +#define NONCE0 s9 +#define NONCE1 s10 +#define NONCE2 s11 + +.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \ + a2, b2, c2, d2, a3, b3, c3, d3 + // a += b; d ^= a; d = rol(d, 16); + vadd.vv \a0, \a0, \b0 + vadd.vv \a1, \a1, \b1 + vadd.vv \a2, \a2, \b2 + vadd.vv \a3, \a3, \b3 + vxor.vv \d0, \d0, \a0 + vxor.vv \d1, \d1, \a1 + vxor.vv \d2, \d2, \a2 + vxor.vv \d3, \d3, \a3 + vror.vi \d0, \d0, 32 - 16 + vror.vi \d1, \d1, 32 - 16 + vror.vi \d2, \d2, 32 - 16 + vror.vi \d3, \d3, 32 - 16 + + // c += d; b ^= c; b = rol(b, 12); + vadd.vv \c0, \c0, \d0 + vadd.vv \c1, \c1, \d1 + vadd.vv \c2, \c2, \d2 + vadd.vv \c3, \c3, \d3 + vxor.vv \b0, \b0, \c0 + vxor.vv \b1, \b1, \c1 + vxor.vv \b2, \b2, \c2 + vxor.vv \b3, \b3, \c3 + vror.vi \b0, \b0, 32 - 12 + vror.vi \b1, \b1, 32 - 12 + vror.vi \b2, \b2, 32 - 12 + vror.vi \b3, \b3, 32 - 12 + + // a += b; d ^= a; d = rol(d, 8); + vadd.vv \a0, \a0, \b0 + vadd.vv \a1, \a1, \b1 + vadd.vv \a2, \a2, \b2 + vadd.vv \a3, \a3, \b3 + vxor.vv \d0, \d0, \a0 + vxor.vv \d1, \d1, \a1 + vxor.vv \d2, \d2, \a2 + vxor.vv \d3, \d3, \a3 + vror.vi \d0, \d0, 32 - 8 + vror.vi \d1, \d1, 32 - 8 + vror.vi \d2, \d2, 32 - 8 + vror.vi \d3, \d3, 32 - 8 + + // c += d; b ^= c; b = rol(b, 7); + vadd.vv \c0, \c0, \d0 + vadd.vv \c1, \c1, \d1 + vadd.vv \c2, \c2, \d2 + vadd.vv \c3, \c3, \d3 + vxor.vv \b0, \b0, \c0 + vxor.vv \b1, \b1, \c1 + vxor.vv \b2, \b2, \c2 + vxor.vv \b3, \b3, \c3 + vror.vi \b0, \b0, 32 - 7 + vror.vi \b1, \b1, 32 - 7 + vror.vi \b2, \b2, 32 - 7 + vror.vi \b3, \b3, 32 - 7 +.endm + +// void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len, +// const u32 iv[4]); +// +// |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE). +// The counter is treated as 32-bit, following the RFC7539 convention. +SYM_FUNC_START(chacha20_zvkb) + srli LEN, LEN, 6 // Bytes to blocks + + addi sp, sp, -96 + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + sd s6, 48(sp) + sd s7, 56(sp) + sd s8, 64(sp) + sd s9, 72(sp) + sd s10, 80(sp) + sd s11, 88(sp) + + li STRIDE, 64 + + // Set up the initial state matrix in scalar registers. + li CONSTS0, 0x61707865 // "expa" little endian + li CONSTS1, 0x3320646e // "nd 3" little endian + li CONSTS2, 0x79622d32 // "2-by" little endian + li CONSTS3, 0x6b206574 // "te k" little endian + lw KEY0, 0(KEYP) + lw KEY1, 4(KEYP) + lw KEY2, 8(KEYP) + lw KEY3, 12(KEYP) + lw KEY4, 16(KEYP) + lw KEY5, 20(KEYP) + lw KEY6, 24(KEYP) + lw KEY7, 28(KEYP) + lw COUNTER, 0(IVP) + lw NONCE0, 4(IVP) + lw NONCE1, 8(IVP) + lw NONCE2, 12(IVP) + +.Lblock_loop: + // Set vl to the number of blocks to process in this iteration. + vsetvli VL, LEN, e32, m1, ta, ma + + // Set up the initial state matrix for the next VL blocks in v0-v15. + // v{i} holds the i'th 32-bit word of the state matrix for all blocks. + // Note that only the counter word, at index 12, differs across blocks. + vmv.v.x v0, CONSTS0 + vmv.v.x v1, CONSTS1 + vmv.v.x v2, CONSTS2 + vmv.v.x v3, CONSTS3 + vmv.v.x v4, KEY0 + vmv.v.x v5, KEY1 + vmv.v.x v6, KEY2 + vmv.v.x v7, KEY3 + vmv.v.x v8, KEY4 + vmv.v.x v9, KEY5 + vmv.v.x v10, KEY6 + vmv.v.x v11, KEY7 + vid.v v12 + vadd.vx v12, v12, COUNTER + vmv.v.x v13, NONCE0 + vmv.v.x v14, NONCE1 + vmv.v.x v15, NONCE2 + + // Load the first half of the input data for each block into v16-v23. + // v{16+i} holds the i'th 32-bit word for all blocks. + vlsseg8e32.v v16, (INP), STRIDE + + li NROUNDS, 20 +.Lnext_doubleround: + addi NROUNDS, NROUNDS, -2 + // column round + chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \ + v2, v6, v10, v14, v3, v7, v11, v15 + // diagonal round + chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \ + v2, v7, v8, v13, v3, v4, v9, v14 + bnez NROUNDS, .Lnext_doubleround + + // Load the second half of the input data for each block into v24-v31. + // v{24+i} holds the {8+i}'th 32-bit word for all blocks. + addi TMP, INP, 32 + vlsseg8e32.v v24, (TMP), STRIDE + + // Finalize the first half of the keystream for each block. + vadd.vx v0, v0, CONSTS0 + vadd.vx v1, v1, CONSTS1 + vadd.vx v2, v2, CONSTS2 + vadd.vx v3, v3, CONSTS3 + vadd.vx v4, v4, KEY0 + vadd.vx v5, v5, KEY1 + vadd.vx v6, v6, KEY2 + vadd.vx v7, v7, KEY3 + + // Encrypt/decrypt the first half of the data for each block. + vxor.vv v16, v16, v0 + vxor.vv v17, v17, v1 + vxor.vv v18, v18, v2 + vxor.vv v19, v19, v3 + vxor.vv v20, v20, v4 + vxor.vv v21, v21, v5 + vxor.vv v22, v22, v6 + vxor.vv v23, v23, v7 + + // Store the first half of the output data for each block. + vssseg8e32.v v16, (OUTP), STRIDE + + // Finalize the second half of the keystream for each block. + vadd.vx v8, v8, KEY4 + vadd.vx v9, v9, KEY5 + vadd.vx v10, v10, KEY6 + vadd.vx v11, v11, KEY7 + vid.v v0 + vadd.vx v12, v12, COUNTER + vadd.vx v13, v13, NONCE0 + vadd.vx v14, v14, NONCE1 + vadd.vx v15, v15, NONCE2 + vadd.vv v12, v12, v0 + + // Encrypt/decrypt the second half of the data for each block. + vxor.vv v24, v24, v8 + vxor.vv v25, v25, v9 + vxor.vv v26, v26, v10 + vxor.vv v27, v27, v11 + vxor.vv v29, v29, v13 + vxor.vv v28, v28, v12 + vxor.vv v30, v30, v14 + vxor.vv v31, v31, v15 + + // Store the second half of the output data for each block. + addi TMP, OUTP, 32 + vssseg8e32.v v24, (TMP), STRIDE + + // Update the counter, the remaining number of blocks, and the input and + // output pointers according to the number of blocks processed (VL). + add COUNTER, COUNTER, VL + sub LEN, LEN, VL + slli TMP, VL, 6 + add OUTP, OUTP, TMP + add INP, INP, TMP + bnez LEN, .Lblock_loop + + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + ld s6, 48(sp) + ld s7, 56(sp) + ld s8, 64(sp) + ld s9, 72(sp) + ld s10, 80(sp) + ld s11, 88(sp) + addi sp, sp, 96 + ret +SYM_FUNC_END(chacha20_zvkb) diff --git a/arch/riscv/crypto/ghash-riscv64-glue.c b/arch/riscv/crypto/ghash-riscv64-glue.c new file mode 100644 index 000000000000..312e7891fd0a --- /dev/null +++ b/arch/riscv/crypto/ghash-riscv64-glue.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * GHASH using the RISC-V vector crypto extensions + * + * Copyright (C) 2023 VRULL GmbH + * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih <jerry.shih@sifive.com> + */ + +#include <asm/simd.h> +#include <asm/vector.h> +#include <crypto/ghash.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> +#include <linux/linkage.h> +#include <linux/module.h> + +asmlinkage void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data, + size_t len); + +struct riscv64_ghash_tfm_ctx { + be128 key; +}; + +struct riscv64_ghash_desc_ctx { + be128 accumulator; + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int riscv64_ghash_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) +{ + struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) + return -EINVAL; + + memcpy(&tctx->key, key, GHASH_BLOCK_SIZE); + + return 0; +} + +static int riscv64_ghash_init(struct shash_desc *desc) +{ + struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + *dctx = (struct riscv64_ghash_desc_ctx){}; + + return 0; +} + +static inline void +riscv64_ghash_blocks(const struct riscv64_ghash_tfm_ctx *tctx, + struct riscv64_ghash_desc_ctx *dctx, + const u8 *src, size_t srclen) +{ + /* The srclen is nonzero and a multiple of 16. */ + if (crypto_simd_usable()) { + kernel_vector_begin(); + ghash_zvkg(&dctx->accumulator, &tctx->key, src, srclen); + kernel_vector_end(); + } else { + do { + crypto_xor((u8 *)&dctx->accumulator, src, + GHASH_BLOCK_SIZE); + gf128mul_lle(&dctx->accumulator, &tctx->key); + src += GHASH_BLOCK_SIZE; + srclen -= GHASH_BLOCK_SIZE; + } while (srclen); + } +} + +static int riscv64_ghash_update(struct shash_desc *desc, const u8 *src, + unsigned int srclen) +{ + const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + unsigned int len; + + if (dctx->bytes) { + if (dctx->bytes + srclen < GHASH_BLOCK_SIZE) { + memcpy(dctx->buffer + dctx->bytes, src, srclen); + dctx->bytes += srclen; + return 0; + } + memcpy(dctx->buffer + dctx->bytes, src, + GHASH_BLOCK_SIZE - dctx->bytes); + riscv64_ghash_blocks(tctx, dctx, dctx->buffer, + GHASH_BLOCK_SIZE); + src += GHASH_BLOCK_SIZE - dctx->bytes; + srclen -= GHASH_BLOCK_SIZE - dctx->bytes; + dctx->bytes = 0; + } + + len = round_down(srclen, GHASH_BLOCK_SIZE); + if (len) { + riscv64_ghash_blocks(tctx, dctx, src, len); + src += len; + srclen -= len; + } + + if (srclen) { + memcpy(dctx->buffer, src, srclen); + dctx->bytes = srclen; + } + + return 0; +} + +static int riscv64_ghash_final(struct shash_desc *desc, u8 *out) +{ + const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + int i; + + if (dctx->bytes) { + for (i = dctx->bytes; i < GHASH_BLOCK_SIZE; i++) + dctx->buffer[i] = 0; + + riscv64_ghash_blocks(tctx, dctx, dctx->buffer, + GHASH_BLOCK_SIZE); + } + + memcpy(out, &dctx->accumulator, GHASH_DIGEST_SIZE); + return 0; +} + +static struct shash_alg riscv64_ghash_alg = { + .init = riscv64_ghash_init, + .update = riscv64_ghash_update, + .final = riscv64_ghash_final, + .setkey = riscv64_ghash_setkey, + .descsize = sizeof(struct riscv64_ghash_desc_ctx), + .digestsize = GHASH_DIGEST_SIZE, + .base = { + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct riscv64_ghash_tfm_ctx), + .cra_priority = 300, + .cra_name = "ghash", + .cra_driver_name = "ghash-riscv64-zvkg", + .cra_module = THIS_MODULE, + }, +}; + +static int __init riscv64_ghash_mod_init(void) +{ + if (riscv_isa_extension_available(NULL, ZVKG) && + riscv_vector_vlen() >= 128) + return crypto_register_shash(&riscv64_ghash_alg); + + return -ENODEV; +} + +static void __exit riscv64_ghash_mod_exit(void) +{ + crypto_unregister_shash(&riscv64_ghash_alg); +} + +module_init(riscv64_ghash_mod_init); +module_exit(riscv64_ghash_mod_exit); + +MODULE_DESCRIPTION("GHASH (RISC-V accelerated)"); +MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("ghash"); diff --git a/arch/riscv/crypto/ghash-riscv64-zvkg.S b/arch/riscv/crypto/ghash-riscv64-zvkg.S new file mode 100644 index 000000000000..f2b43fb4d434 --- /dev/null +++ b/arch/riscv/crypto/ghash-riscv64-zvkg.S @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector GCM/GMAC extension ('Zvkg') + +#include <linux/linkage.h> + +.text +.option arch, +zvkg + +#define ACCUMULATOR a0 +#define KEY a1 +#define DATA a2 +#define LEN a3 + +// void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data, +// size_t len); +// +// |len| must be nonzero and a multiple of 16 (GHASH_BLOCK_SIZE). +SYM_FUNC_START(ghash_zvkg) + vsetivli zero, 4, e32, m1, ta, ma + vle32.v v1, (ACCUMULATOR) + vle32.v v2, (KEY) +.Lnext_block: + vle32.v v3, (DATA) + vghsh.vv v1, v2, v3 + addi DATA, DATA, 16 + addi LEN, LEN, -16 + bnez LEN, .Lnext_block + + vse32.v v1, (ACCUMULATOR) + ret +SYM_FUNC_END(ghash_zvkg) diff --git a/arch/riscv/crypto/sha256-riscv64-glue.c b/arch/riscv/crypto/sha256-riscv64-glue.c new file mode 100644 index 000000000000..71e051e40a64 --- /dev/null +++ b/arch/riscv/crypto/sha256-riscv64-glue.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 and SHA-224 using the RISC-V vector crypto extensions + * + * Copyright (C) 2022 VRULL GmbH + * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih <jerry.shih@sifive.com> + */ + +#include <asm/simd.h> +#include <asm/vector.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> +#include <crypto/sha256_base.h> +#include <linux/linkage.h> +#include <linux/module.h> + +/* + * Note: the asm function only uses the 'state' field of struct sha256_state. + * It is assumed to be the first field. + */ +asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb( + struct sha256_state *state, const u8 *data, int num_blocks); + +static int riscv64_sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + /* + * Ensure struct sha256_state begins directly with the SHA-256 + * 256-bit internal state, as this is what the asm function expects. + */ + BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); + + if (crypto_simd_usable()) { + kernel_vector_begin(); + sha256_base_do_update(desc, data, len, + sha256_transform_zvknha_or_zvknhb_zvkb); + kernel_vector_end(); + } else { + crypto_sha256_update(desc, data, len); + } + return 0; +} + +static int riscv64_sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (crypto_simd_usable()) { + kernel_vector_begin(); + if (len) + sha256_base_do_update( + desc, data, len, + sha256_transform_zvknha_or_zvknhb_zvkb); + sha256_base_do_finalize( + desc, sha256_transform_zvknha_or_zvknhb_zvkb); + kernel_vector_end(); + + return sha256_base_finish(desc, out); + } + + return crypto_sha256_finup(desc, data, len, out); +} + +static int riscv64_sha256_final(struct shash_desc *desc, u8 *out) +{ + return riscv64_sha256_finup(desc, NULL, 0, out); +} + +static int riscv64_sha256_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_base_init(desc) ?: + riscv64_sha256_finup(desc, data, len, out); +} + +static struct shash_alg riscv64_sha256_algs[] = { + { + .init = sha256_base_init, + .update = riscv64_sha256_update, + .final = riscv64_sha256_final, + .finup = riscv64_sha256_finup, + .digest = riscv64_sha256_digest, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA256_DIGEST_SIZE, + .base = { + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_priority = 300, + .cra_name = "sha256", + .cra_driver_name = "sha256-riscv64-zvknha_or_zvknhb-zvkb", + .cra_module = THIS_MODULE, + }, + }, { + .init = sha224_base_init, + .update = riscv64_sha256_update, + .final = riscv64_sha256_final, + .finup = riscv64_sha256_finup, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA224_DIGEST_SIZE, + .base = { + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_priority = 300, + .cra_name = "sha224", + .cra_driver_name = "sha224-riscv64-zvknha_or_zvknhb-zvkb", + .cra_module = THIS_MODULE, + }, + }, +}; + +static int __init riscv64_sha256_mod_init(void) +{ + /* Both zvknha and zvknhb provide the SHA-256 instructions. */ + if ((riscv_isa_extension_available(NULL, ZVKNHA) || + riscv_isa_extension_available(NULL, ZVKNHB)) && + riscv_isa_extension_available(NULL, ZVKB) && + riscv_vector_vlen() >= 128) + return crypto_register_shashes(riscv64_sha256_algs, + ARRAY_SIZE(riscv64_sha256_algs)); + + return -ENODEV; +} + +static void __exit riscv64_sha256_mod_exit(void) +{ + crypto_unregister_shashes(riscv64_sha256_algs, + ARRAY_SIZE(riscv64_sha256_algs)); +} + +module_init(riscv64_sha256_mod_init); +module_exit(riscv64_sha256_mod_exit); + +MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)"); +MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); diff --git a/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S b/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S new file mode 100644 index 000000000000..8ebcc17de4dc --- /dev/null +++ b/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> +// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb') +// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') + +#include <linux/cfi_types.h> + +.text +.option arch, +zvknha, +zvkb + +#define STATEP a0 +#define DATA a1 +#define NUM_BLOCKS a2 + +#define STATEP_C a3 + +#define MASK v0 +#define INDICES v1 +#define W0 v2 +#define W1 v3 +#define W2 v4 +#define W3 v5 +#define VTMP v6 +#define FEBA v7 +#define HGDC v8 +#define K0 v10 +#define K1 v11 +#define K2 v12 +#define K3 v13 +#define K4 v14 +#define K5 v15 +#define K6 v16 +#define K7 v17 +#define K8 v18 +#define K9 v19 +#define K10 v20 +#define K11 v21 +#define K12 v22 +#define K13 v23 +#define K14 v24 +#define K15 v25 +#define PREV_FEBA v26 +#define PREV_HGDC v27 + +// Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words. +// +// If not all the message schedule words have been computed yet, then this also +// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4 +// message schedule words; this macro computes the group after w3 and writes it +// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3, +// w0), so the caller must cycle through the registers accordingly. +.macro sha256_4rounds last, k, w0, w1, w2, w3 + vadd.vv VTMP, \k, \w0 + vsha2cl.vv HGDC, FEBA, VTMP + vsha2ch.vv FEBA, HGDC, VTMP +.if !\last + vmerge.vvm VTMP, \w2, \w1, MASK + vsha2ms.vv \w0, VTMP, \w3 +.endif +.endm + +.macro sha256_16rounds last, k0, k1, k2, k3 + sha256_4rounds \last, \k0, W0, W1, W2, W3 + sha256_4rounds \last, \k1, W1, W2, W3, W0 + sha256_4rounds \last, \k2, W2, W3, W0, W1 + sha256_4rounds \last, \k3, W3, W0, W1, W2 +.endm + +// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data, +// int num_blocks); +SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb) + + // Load the round constants into K0-K15. + vsetivli zero, 4, e32, m1, ta, ma + la t0, K256 + vle32.v K0, (t0) + addi t0, t0, 16 + vle32.v K1, (t0) + addi t0, t0, 16 + vle32.v K2, (t0) + addi t0, t0, 16 + vle32.v K3, (t0) + addi t0, t0, 16 + vle32.v K4, (t0) + addi t0, t0, 16 + vle32.v K5, (t0) + addi t0, t0, 16 + vle32.v K6, (t0) + addi t0, t0, 16 + vle32.v K7, (t0) + addi t0, t0, 16 + vle32.v K8, (t0) + addi t0, t0, 16 + vle32.v K9, (t0) + addi t0, t0, 16 + vle32.v K10, (t0) + addi t0, t0, 16 + vle32.v K11, (t0) + addi t0, t0, 16 + vle32.v K12, (t0) + addi t0, t0, 16 + vle32.v K13, (t0) + addi t0, t0, 16 + vle32.v K14, (t0) + addi t0, t0, 16 + vle32.v K15, (t0) + + // Setup mask for the vmerge to replace the first word (idx==0) in + // message scheduling. There are 4 words, so an 8-bit mask suffices. + vsetivli zero, 1, e8, m1, ta, ma + vmv.v.i MASK, 0x01 + + // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we + // need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype + // is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0}, + // loaded using the 32-bit little endian value 0x00041014. + li t0, 0x00041014 + vsetivli zero, 1, e32, m1, ta, ma + vmv.v.x INDICES, t0 + addi STATEP_C, STATEP, 8 + vsetivli zero, 4, e32, m1, ta, ma + vluxei8.v FEBA, (STATEP), INDICES + vluxei8.v HGDC, (STATEP_C), INDICES + +.Lnext_block: + addi NUM_BLOCKS, NUM_BLOCKS, -1 + + // Save the previous state, as it's needed later. + vmv.v.v PREV_FEBA, FEBA + vmv.v.v PREV_HGDC, HGDC + + // Load the next 512-bit message block and endian-swap each 32-bit word. + vle32.v W0, (DATA) + vrev8.v W0, W0 + addi DATA, DATA, 16 + vle32.v W1, (DATA) + vrev8.v W1, W1 + addi DATA, DATA, 16 + vle32.v W2, (DATA) + vrev8.v W2, W2 + addi DATA, DATA, 16 + vle32.v W3, (DATA) + vrev8.v W3, W3 + addi DATA, DATA, 16 + + // Do the 64 rounds of SHA-256. + sha256_16rounds 0, K0, K1, K2, K3 + sha256_16rounds 0, K4, K5, K6, K7 + sha256_16rounds 0, K8, K9, K10, K11 + sha256_16rounds 1, K12, K13, K14, K15 + + // Add the previous state. + vadd.vv FEBA, FEBA, PREV_FEBA + vadd.vv HGDC, HGDC, PREV_HGDC + + // Repeat if more blocks remain. + bnez NUM_BLOCKS, .Lnext_block + + // Store the new state and return. + vsuxei8.v FEBA, (STATEP), INDICES + vsuxei8.v HGDC, (STATEP_C), INDICES + ret +SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb) + +.section ".rodata" +.p2align 2 +.type K256, @object +K256: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +.size K256, . - K256 diff --git a/arch/riscv/crypto/sha512-riscv64-glue.c b/arch/riscv/crypto/sha512-riscv64-glue.c new file mode 100644 index 000000000000..43b56a08aeb5 --- /dev/null +++ b/arch/riscv/crypto/sha512-riscv64-glue.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-512 and SHA-384 using the RISC-V vector crypto extensions + * + * Copyright (C) 2023 VRULL GmbH + * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih <jerry.shih@sifive.com> + */ + +#include <asm/simd.h> +#include <asm/vector.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> +#include <crypto/sha512_base.h> +#include <linux/linkage.h> +#include <linux/module.h> + +/* + * Note: the asm function only uses the 'state' field of struct sha512_state. + * It is assumed to be the first field. + */ +asmlinkage void sha512_transform_zvknhb_zvkb( + struct sha512_state *state, const u8 *data, int num_blocks); + +static int riscv64_sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + /* + * Ensure struct sha512_state begins directly with the SHA-512 + * 512-bit internal state, as this is what the asm function expects. + */ + BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); + + if (crypto_simd_usable()) { + kernel_vector_begin(); + sha512_base_do_update(desc, data, len, + sha512_transform_zvknhb_zvkb); + kernel_vector_end(); + } else { + crypto_sha512_update(desc, data, len); + } + return 0; +} + +static int riscv64_sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (crypto_simd_usable()) { + kernel_vector_begin(); + if (len) + sha512_base_do_update(desc, data, len, + sha512_transform_zvknhb_zvkb); + sha512_base_do_finalize(desc, sha512_transform_zvknhb_zvkb); + kernel_vector_end(); + + return sha512_base_finish(desc, out); + } + + return crypto_sha512_finup(desc, data, len, out); +} + +static int riscv64_sha512_final(struct shash_desc *desc, u8 *out) +{ + return riscv64_sha512_finup(desc, NULL, 0, out); +} + +static int riscv64_sha512_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_base_init(desc) ?: + riscv64_sha512_finup(desc, data, len, out); +} + +static struct shash_alg riscv64_sha512_algs[] = { + { + .init = sha512_base_init, + .update = riscv64_sha512_update, + .final = riscv64_sha512_final, + .finup = riscv64_sha512_finup, + .digest = riscv64_sha512_digest, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA512_DIGEST_SIZE, + .base = { + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_priority = 300, + .cra_name = "sha512", + .cra_driver_name = "sha512-riscv64-zvknhb-zvkb", + .cra_module = THIS_MODULE, + }, + }, { + .init = sha384_base_init, + .update = riscv64_sha512_update, + .final = riscv64_sha512_final, + .finup = riscv64_sha512_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA384_DIGEST_SIZE, + .base = { + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_priority = 300, + .cra_name = "sha384", + .cra_driver_name = "sha384-riscv64-zvknhb-zvkb", + .cra_module = THIS_MODULE, + }, + }, +}; + +static int __init riscv64_sha512_mod_init(void) +{ + if (riscv_isa_extension_available(NULL, ZVKNHB) && + riscv_isa_extension_available(NULL, ZVKB) && + riscv_vector_vlen() >= 128) + return crypto_register_shashes(riscv64_sha512_algs, + ARRAY_SIZE(riscv64_sha512_algs)); + + return -ENODEV; +} + +static void __exit riscv64_sha512_mod_exit(void) +{ + crypto_unregister_shashes(riscv64_sha512_algs, + ARRAY_SIZE(riscv64_sha512_algs)); +} + +module_init(riscv64_sha512_mod_init); +module_exit(riscv64_sha512_mod_exit); + +MODULE_DESCRIPTION("SHA-512 (RISC-V accelerated)"); +MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); diff --git a/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S new file mode 100644 index 000000000000..3a9ae210f915 --- /dev/null +++ b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> +// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknhb') +// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') + +#include <linux/cfi_types.h> + +.text +.option arch, +zvknhb, +zvkb + +#define STATEP a0 +#define DATA a1 +#define NUM_BLOCKS a2 + +#define STATEP_C a3 +#define K a4 + +#define MASK v0 +#define INDICES v1 +#define W0 v10 // LMUL=2 +#define W1 v12 // LMUL=2 +#define W2 v14 // LMUL=2 +#define W3 v16 // LMUL=2 +#define VTMP v20 // LMUL=2 +#define FEBA v22 // LMUL=2 +#define HGDC v24 // LMUL=2 +#define PREV_FEBA v26 // LMUL=2 +#define PREV_HGDC v28 // LMUL=2 + +// Do 4 rounds of SHA-512. w0 contains the current 4 message schedule words. +// +// If not all the message schedule words have been computed yet, then this also +// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4 +// message schedule words; this macro computes the group after w3 and writes it +// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3, +// w0), so the caller must cycle through the registers accordingly. +.macro sha512_4rounds last, w0, w1, w2, w3 + vle64.v VTMP, (K) + addi K, K, 32 + vadd.vv VTMP, VTMP, \w0 + vsha2cl.vv HGDC, FEBA, VTMP + vsha2ch.vv FEBA, HGDC, VTMP +.if !\last + vmerge.vvm VTMP, \w2, \w1, MASK + vsha2ms.vv \w0, VTMP, \w3 +.endif +.endm + +.macro sha512_16rounds last + sha512_4rounds \last, W0, W1, W2, W3 + sha512_4rounds \last, W1, W2, W3, W0 + sha512_4rounds \last, W2, W3, W0, W1 + sha512_4rounds \last, W3, W0, W1, W2 +.endm + +// void sha512_transform_zvknhb_zvkb(u64 state[8], const u8 *data, +// int num_blocks); +SYM_TYPED_FUNC_START(sha512_transform_zvknhb_zvkb) + + // Setup mask for the vmerge to replace the first word (idx==0) in + // message scheduling. There are 4 words, so an 8-bit mask suffices. + vsetivli zero, 1, e8, m1, ta, ma + vmv.v.i MASK, 0x01 + + // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we + // need {f,e,b,a},{h,g,d,c}. The dst vtype is e64m2 and the index vtype + // is e8mf4. We use index-load with the i8 indices {40, 32, 8, 0}, + // loaded using the 32-bit little endian value 0x00082028. + li t0, 0x00082028 + vsetivli zero, 1, e32, m1, ta, ma + vmv.v.x INDICES, t0 + addi STATEP_C, STATEP, 16 + vsetivli zero, 4, e64, m2, ta, ma + vluxei8.v FEBA, (STATEP), INDICES + vluxei8.v HGDC, (STATEP_C), INDICES + +.Lnext_block: + la K, K512 + addi NUM_BLOCKS, NUM_BLOCKS, -1 + + // Save the previous state, as it's needed later. + vmv.v.v PREV_FEBA, FEBA + vmv.v.v PREV_HGDC, HGDC + + // Load the next 1024-bit message block and endian-swap each 64-bit word + vle64.v W0, (DATA) + vrev8.v W0, W0 + addi DATA, DATA, 32 + vle64.v W1, (DATA) + vrev8.v W1, W1 + addi DATA, DATA, 32 + vle64.v W2, (DATA) + vrev8.v W2, W2 + addi DATA, DATA, 32 + vle64.v W3, (DATA) + vrev8.v W3, W3 + addi DATA, DATA, 32 + + // Do the 80 rounds of SHA-512. + sha512_16rounds 0 + sha512_16rounds 0 + sha512_16rounds 0 + sha512_16rounds 0 + sha512_16rounds 1 + + // Add the previous state. + vadd.vv FEBA, FEBA, PREV_FEBA + vadd.vv HGDC, HGDC, PREV_HGDC + + // Repeat if more blocks remain. + bnez NUM_BLOCKS, .Lnext_block + + // Store the new state and return. + vsuxei8.v FEBA, (STATEP), INDICES + vsuxei8.v HGDC, (STATEP_C), INDICES + ret +SYM_FUNC_END(sha512_transform_zvknhb_zvkb) + +.section ".rodata" +.p2align 3 +.type K512, @object +K512: + .dword 0x428a2f98d728ae22, 0x7137449123ef65cd + .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc + .dword 0x3956c25bf348b538, 0x59f111f1b605d019 + .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 + .dword 0xd807aa98a3030242, 0x12835b0145706fbe + .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 + .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1 + .dword 0x9bdc06a725c71235, 0xc19bf174cf692694 + .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3 + .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 + .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483 + .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 + .dword 0x983e5152ee66dfab, 0xa831c66d2db43210 + .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4 + .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725 + .dword 0x06ca6351e003826f, 0x142929670a0e6e70 + .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926 + .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df + .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8 + .dword 0x81c2c92e47edaee6, 0x92722c851482353b + .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001 + .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30 + .dword 0xd192e819d6ef5218, 0xd69906245565a910 + .dword 0xf40e35855771202a, 0x106aa07032bbd1b8 + .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53 + .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 + .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb + .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 + .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60 + .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec + .dword 0x90befffa23631e28, 0xa4506cebde82bde9 + .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b + .dword 0xca273eceea26619c, 0xd186b8c721c0c207 + .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 + .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6 + .dword 0x113f9804bef90dae, 0x1b710b35131c471b + .dword 0x28db77f523047d84, 0x32caab7b40c72493 + .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c + .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a + .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 +.size K512, . - K512 diff --git a/arch/riscv/crypto/sm3-riscv64-glue.c b/arch/riscv/crypto/sm3-riscv64-glue.c new file mode 100644 index 000000000000..e1737a970c7c --- /dev/null +++ b/arch/riscv/crypto/sm3-riscv64-glue.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SM3 using the RISC-V vector crypto extensions + * + * Copyright (C) 2023 VRULL GmbH + * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih <jerry.shih@sifive.com> + */ + +#include <asm/simd.h> +#include <asm/vector.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> +#include <crypto/sm3_base.h> +#include <linux/linkage.h> +#include <linux/module.h> + +/* + * Note: the asm function only uses the 'state' field of struct sm3_state. + * It is assumed to be the first field. + */ +asmlinkage void sm3_transform_zvksh_zvkb( + struct sm3_state *state, const u8 *data, int num_blocks); + +static int riscv64_sm3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + /* + * Ensure struct sm3_state begins directly with the SM3 + * 256-bit internal state, as this is what the asm function expects. + */ + BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0); + + if (crypto_simd_usable()) { + kernel_vector_begin(); + sm3_base_do_update(desc, data, len, sm3_transform_zvksh_zvkb); + kernel_vector_end(); + } else { + sm3_update(shash_desc_ctx(desc), data, len); + } + return 0; +} + +static int riscv64_sm3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sm3_state *ctx; + + if (crypto_simd_usable()) { + kernel_vector_begin(); + if (len) + sm3_base_do_update(desc, data, len, + sm3_transform_zvksh_zvkb); + sm3_base_do_finalize(desc, sm3_transform_zvksh_zvkb); + kernel_vector_end(); + + return sm3_base_finish(desc, out); + } + + ctx = shash_desc_ctx(desc); + if (len) + sm3_update(ctx, data, len); + sm3_final(ctx, out); + + return 0; +} + +static int riscv64_sm3_final(struct shash_desc *desc, u8 *out) +{ + return riscv64_sm3_finup(desc, NULL, 0, out); +} + +static struct shash_alg riscv64_sm3_alg = { + .init = sm3_base_init, + .update = riscv64_sm3_update, + .final = riscv64_sm3_final, + .finup = riscv64_sm3_finup, + .descsize = sizeof(struct sm3_state), + .digestsize = SM3_DIGEST_SIZE, + .base = { + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_priority = 300, + .cra_name = "sm3", + .cra_driver_name = "sm3-riscv64-zvksh-zvkb", + .cra_module = THIS_MODULE, + }, +}; + +static int __init riscv64_sm3_mod_init(void) +{ + if (riscv_isa_extension_available(NULL, ZVKSH) && + riscv_isa_extension_available(NULL, ZVKB) && + riscv_vector_vlen() >= 128) + return crypto_register_shash(&riscv64_sm3_alg); + + return -ENODEV; +} + +static void __exit riscv64_sm3_mod_exit(void) +{ + crypto_unregister_shash(&riscv64_sm3_alg); +} + +module_init(riscv64_sm3_mod_init); +module_exit(riscv64_sm3_mod_exit); + +MODULE_DESCRIPTION("SM3 (RISC-V accelerated)"); +MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("sm3"); diff --git a/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S new file mode 100644 index 000000000000..a2b65d961c04 --- /dev/null +++ b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector SM3 Secure Hash extension ('Zvksh') +// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') + +#include <linux/cfi_types.h> + +.text +.option arch, +zvksh, +zvkb + +#define STATEP a0 +#define DATA a1 +#define NUM_BLOCKS a2 + +#define STATE v0 // LMUL=2 +#define PREV_STATE v2 // LMUL=2 +#define W0 v4 // LMUL=2 +#define W1 v6 // LMUL=2 +#define VTMP v8 // LMUL=2 + +.macro sm3_8rounds i, w0, w1 + // Do 4 rounds using W_{0+i}..W_{7+i}. + vsm3c.vi STATE, \w0, \i + 0 + vslidedown.vi VTMP, \w0, 2 + vsm3c.vi STATE, VTMP, \i + 1 + + // Compute W_{4+i}..W_{11+i}. + vslidedown.vi VTMP, \w0, 4 + vslideup.vi VTMP, \w1, 4 + + // Do 4 rounds using W_{4+i}..W_{11+i}. + vsm3c.vi STATE, VTMP, \i + 2 + vslidedown.vi VTMP, VTMP, 2 + vsm3c.vi STATE, VTMP, \i + 3 + +.if \i < 28 + // Compute W_{16+i}..W_{23+i}. + vsm3me.vv \w0, \w1, \w0 +.endif + // For the next 8 rounds, w0 and w1 are swapped. +.endm + +// void sm3_transform_zvksh_zvkb(u32 state[8], const u8 *data, int num_blocks); +SYM_TYPED_FUNC_START(sm3_transform_zvksh_zvkb) + + // Load the state and endian-swap each 32-bit word. + vsetivli zero, 8, e32, m2, ta, ma + vle32.v STATE, (STATEP) + vrev8.v STATE, STATE + +.Lnext_block: + addi NUM_BLOCKS, NUM_BLOCKS, -1 + + // Save the previous state, as it's needed later. + vmv.v.v PREV_STATE, STATE + + // Load the next 512-bit message block into W0-W1. + vle32.v W0, (DATA) + addi DATA, DATA, 32 + vle32.v W1, (DATA) + addi DATA, DATA, 32 + + // Do the 64 rounds of SM3. + sm3_8rounds 0, W0, W1 + sm3_8rounds 4, W1, W0 + sm3_8rounds 8, W0, W1 + sm3_8rounds 12, W1, W0 + sm3_8rounds 16, W0, W1 + sm3_8rounds 20, W1, W0 + sm3_8rounds 24, W0, W1 + sm3_8rounds 28, W1, W0 + + // XOR in the previous state. + vxor.vv STATE, STATE, PREV_STATE + + // Repeat if more blocks remain. + bnez NUM_BLOCKS, .Lnext_block + + // Store the new state and return. + vrev8.v STATE, STATE + vse32.v STATE, (STATEP) + ret +SYM_FUNC_END(sm3_transform_zvksh_zvkb) diff --git a/arch/riscv/crypto/sm4-riscv64-glue.c b/arch/riscv/crypto/sm4-riscv64-glue.c new file mode 100644 index 000000000000..47fb84ebe577 --- /dev/null +++ b/arch/riscv/crypto/sm4-riscv64-glue.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SM4 using the RISC-V vector crypto extensions + * + * Copyright (C) 2023 VRULL GmbH + * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih <jerry.shih@sifive.com> + */ + +#include <asm/simd.h> +#include <asm/vector.h> +#include <crypto/internal/cipher.h> +#include <crypto/internal/simd.h> +#include <crypto/sm4.h> +#include <linux/linkage.h> +#include <linux/module.h> + +asmlinkage void sm4_expandkey_zvksed_zvkb(const u8 user_key[SM4_KEY_SIZE], + u32 rkey_enc[SM4_RKEY_WORDS], + u32 rkey_dec[SM4_RKEY_WORDS]); +asmlinkage void sm4_crypt_zvksed_zvkb(const u32 rkey[SM4_RKEY_WORDS], + const u8 in[SM4_BLOCK_SIZE], + u8 out[SM4_BLOCK_SIZE]); + +static int riscv64_sm4_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (crypto_simd_usable()) { + if (keylen != SM4_KEY_SIZE) + return -EINVAL; + kernel_vector_begin(); + sm4_expandkey_zvksed_zvkb(key, ctx->rkey_enc, ctx->rkey_dec); + kernel_vector_end(); + return 0; + } + return sm4_expandkey(ctx, key, keylen); +} + +static void riscv64_sm4_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (crypto_simd_usable()) { + kernel_vector_begin(); + sm4_crypt_zvksed_zvkb(ctx->rkey_enc, src, dst); + kernel_vector_end(); + } else { + sm4_crypt_block(ctx->rkey_enc, dst, src); + } +} + +static void riscv64_sm4_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (crypto_simd_usable()) { + kernel_vector_begin(); + sm4_crypt_zvksed_zvkb(ctx->rkey_dec, src, dst); + kernel_vector_end(); + } else { + sm4_crypt_block(ctx->rkey_dec, dst, src); + } +} + +static struct crypto_alg riscv64_sm4_alg = { + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_priority = 300, + .cra_name = "sm4", + .cra_driver_name = "sm4-riscv64-zvksed-zvkb", + .cra_cipher = { + .cia_min_keysize = SM4_KEY_SIZE, + .cia_max_keysize = SM4_KEY_SIZE, + .cia_setkey = riscv64_sm4_setkey, + .cia_encrypt = riscv64_sm4_encrypt, + .cia_decrypt = riscv64_sm4_decrypt, + }, + .cra_module = THIS_MODULE, +}; + +static int __init riscv64_sm4_mod_init(void) +{ + if (riscv_isa_extension_available(NULL, ZVKSED) && + riscv_isa_extension_available(NULL, ZVKB) && + riscv_vector_vlen() >= 128) + return crypto_register_alg(&riscv64_sm4_alg); + + return -ENODEV; +} + +static void __exit riscv64_sm4_mod_exit(void) +{ + crypto_unregister_alg(&riscv64_sm4_alg); +} + +module_init(riscv64_sm4_mod_init); +module_exit(riscv64_sm4_mod_exit); + +MODULE_DESCRIPTION("SM4 (RISC-V accelerated)"); +MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("sm4"); diff --git a/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S b/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S new file mode 100644 index 000000000000..fae62179a4a3 --- /dev/null +++ b/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// This file is dual-licensed, meaning that you can use it under your +// choice of either of the following two licenses: +// +// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You can obtain +// a copy in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// or +// +// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> +// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> +// Copyright 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The generated code of this file depends on the following RISC-V extensions: +// - RV64I +// - RISC-V Vector ('V') with VLEN >= 128 +// - RISC-V Vector SM4 Block Cipher extension ('Zvksed') +// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') + +#include <linux/linkage.h> + +.text +.option arch, +zvksed, +zvkb + +// void sm4_expandkey_zksed_zvkb(const u8 user_key[16], u32 rkey_enc[32], +// u32 rkey_dec[32]); +SYM_FUNC_START(sm4_expandkey_zvksed_zvkb) + vsetivli zero, 4, e32, m1, ta, ma + + // Load the user key. + vle32.v v1, (a0) + vrev8.v v1, v1 + + // XOR the user key with the family key. + la t0, FAMILY_KEY + vle32.v v2, (t0) + vxor.vv v1, v1, v2 + + // Compute the round keys. Store them in forwards order in rkey_enc + // and in reverse order in rkey_dec. + addi a2, a2, 31*4 + li t0, -4 + .set i, 0 +.rept 8 + vsm4k.vi v1, v1, i + vse32.v v1, (a1) // Store to rkey_enc. + vsse32.v v1, (a2), t0 // Store to rkey_dec. +.if i < 7 + addi a1, a1, 16 + addi a2, a2, -16 +.endif + .set i, i + 1 +.endr + + ret +SYM_FUNC_END(sm4_expandkey_zvksed_zvkb) + +// void sm4_crypt_zvksed_zvkb(const u32 rkey[32], const u8 in[16], u8 out[16]); +SYM_FUNC_START(sm4_crypt_zvksed_zvkb) + vsetivli zero, 4, e32, m1, ta, ma + + // Load the input data. + vle32.v v1, (a1) + vrev8.v v1, v1 + + // Do the 32 rounds of SM4, 4 at a time. + .set i, 0 +.rept 8 + vle32.v v2, (a0) + vsm4r.vs v1, v2 +.if i < 7 + addi a0, a0, 16 +.endif + .set i, i + 1 +.endr + + // Store the output data (in reverse element order). + vrev8.v v1, v1 + li t0, -4 + addi a2, a2, 12 + vsse32.v v1, (a2), t0 + + ret +SYM_FUNC_END(sm4_crypt_zvksed_zvkb) + +.section ".rodata" +.p2align 2 +.type FAMILY_KEY, @object +FAMILY_KEY: + .word 0xA3B1BAC6, 0x56AA3350, 0x677D9197, 0xB27022DC +.size FAMILY_KEY, . - FAMILY_KEY diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index 17a904869724..f2708a9494a1 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -18,9 +18,9 @@ #include <asm/sbi.h> #include <asm/vendorid_list.h> -#define ANDESTECH_AX45MP_MARCHID 0x8000000000008a45UL -#define ANDESTECH_AX45MP_MIMPID 0x500UL -#define ANDESTECH_SBI_EXT_ANDES 0x0900031E +#define ANDES_AX45MP_MARCHID 0x8000000000008a45UL +#define ANDES_AX45MP_MIMPID 0x500UL +#define ANDES_SBI_EXT_ANDES 0x0900031E #define ANDES_SBI_EXT_IOCP_SW_WORKAROUND 1 @@ -32,7 +32,7 @@ static long ax45mp_iocp_sw_workaround(void) * ANDES_SBI_EXT_IOCP_SW_WORKAROUND SBI EXT checks if the IOCP is missing and * cache is controllable only then CMO will be applied to the platform. */ - ret = sbi_ecall(ANDESTECH_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND, + ret = sbi_ecall(ANDES_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND, 0, 0, 0, 0, 0, 0); return ret.error ? 0 : ret.value; @@ -50,7 +50,7 @@ static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigne done = true; - if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID) + if (arch_id != ANDES_AX45MP_MARCHID || impid != ANDES_AX45MP_MIMPID) return; if (!ax45mp_iocp_sw_workaround()) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index b0487b39e674..776354895b81 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -183,6 +183,16 @@ REG_L x31, PT_T6(sp) .endm +/* Annotate a function as being unsuitable for kprobes. */ +#ifdef CONFIG_KPROBES +#define ASM_NOKPROBE(name) \ + .pushsection "_kprobe_blacklist", "aw"; \ + RISCV_PTR name; \ + .popsection +#else +#define ASM_NOKPROBE(name) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_ASM_H */ diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index f5dfef6c2153..0e0522e588ca 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -17,7 +17,6 @@ #endif #include <asm/cmpxchg.h> -#include <asm/barrier.h> #define __atomic_acquire_fence() \ __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory") @@ -207,7 +206,7 @@ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int " add %[rc], %[p], %[a]\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) @@ -228,7 +227,7 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, " add %[rc], %[p], %[a]\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) @@ -248,7 +247,7 @@ static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) " addi %[rc], %[p], 1\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -268,7 +267,7 @@ static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) " addi %[rc], %[p], -1\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -288,7 +287,7 @@ static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) " bltz %[rc], 1f\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -310,7 +309,7 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v) " addi %[rc], %[p], 1\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -331,7 +330,7 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v) " addi %[rc], %[p], -1\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -352,7 +351,7 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) " bltz %[rc], 1f\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 110752594228..880b56d8480d 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -11,28 +11,27 @@ #define _ASM_RISCV_BARRIER_H #ifndef __ASSEMBLY__ +#include <asm/fence.h> #define nop() __asm__ __volatile__ ("nop") #define __nops(n) ".rept " #n "\nnop\n.endr\n" #define nops(n) __asm__ __volatile__ (__nops(n)) -#define RISCV_FENCE(p, s) \ - __asm__ __volatile__ ("fence " #p "," #s : : : "memory") /* These barriers need to enforce ordering on both devices or memory. */ -#define mb() RISCV_FENCE(iorw,iorw) -#define rmb() RISCV_FENCE(ir,ir) -#define wmb() RISCV_FENCE(ow,ow) +#define __mb() RISCV_FENCE(iorw, iorw) +#define __rmb() RISCV_FENCE(ir, ir) +#define __wmb() RISCV_FENCE(ow, ow) /* These barriers do not need to enforce ordering on devices, just memory. */ -#define __smp_mb() RISCV_FENCE(rw,rw) -#define __smp_rmb() RISCV_FENCE(r,r) -#define __smp_wmb() RISCV_FENCE(w,w) +#define __smp_mb() RISCV_FENCE(rw, rw) +#define __smp_rmb() RISCV_FENCE(r, r) +#define __smp_wmb() RISCV_FENCE(w, w) #define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(rw,w); \ + RISCV_FENCE(rw, w); \ WRITE_ONCE(*p, v); \ } while (0) @@ -40,7 +39,7 @@ do { \ ({ \ typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(r,rw); \ + RISCV_FENCE(r, rw); \ ___p1; \ }) @@ -69,7 +68,7 @@ do { \ * instances the scheduler pairs this with an mb(), so nothing is necessary on * the new hart. */ -#define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw) +#define smp_mb__after_spinlock() RISCV_FENCE(iorw, iorw) #include <asm-generic/barrier.h> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 329d8244a9b3..880606b0469a 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -22,6 +22,16 @@ #include <asm-generic/bitops/fls.h> #else +#define __HAVE_ARCH___FFS +#define __HAVE_ARCH___FLS +#define __HAVE_ARCH_FFS +#define __HAVE_ARCH_FLS + +#include <asm-generic/bitops/__ffs.h> +#include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/ffs.h> +#include <asm-generic/bitops/fls.h> + #include <asm/alternative-macros.h> #include <asm/hwcap.h> @@ -37,8 +47,6 @@ static __always_inline unsigned long variable__ffs(unsigned long word) { - int num; - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -52,32 +60,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word) return word; legacy: - num = 0; -#if BITS_PER_LONG == 64 - if ((word & 0xffffffff) == 0) { - num += 32; - word >>= 32; - } -#endif - if ((word & 0xffff) == 0) { - num += 16; - word >>= 16; - } - if ((word & 0xff) == 0) { - num += 8; - word >>= 8; - } - if ((word & 0xf) == 0) { - num += 4; - word >>= 4; - } - if ((word & 0x3) == 0) { - num += 2; - word >>= 2; - } - if ((word & 0x1) == 0) - num += 1; - return num; + return generic___ffs(word); } /** @@ -93,8 +76,6 @@ legacy: static __always_inline unsigned long variable__fls(unsigned long word) { - int num; - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -108,32 +89,7 @@ static __always_inline unsigned long variable__fls(unsigned long word) return BITS_PER_LONG - 1 - word; legacy: - num = BITS_PER_LONG - 1; -#if BITS_PER_LONG == 64 - if (!(word & (~0ul << 32))) { - num -= 32; - word <<= 32; - } -#endif - if (!(word & (~0ul << (BITS_PER_LONG - 16)))) { - num -= 16; - word <<= 16; - } - if (!(word & (~0ul << (BITS_PER_LONG - 8)))) { - num -= 8; - word <<= 8; - } - if (!(word & (~0ul << (BITS_PER_LONG - 4)))) { - num -= 4; - word <<= 4; - } - if (!(word & (~0ul << (BITS_PER_LONG - 2)))) { - num -= 2; - word <<= 2; - } - if (!(word & (~0ul << (BITS_PER_LONG - 1)))) - num -= 1; - return num; + return generic___fls(word); } /** @@ -149,46 +105,23 @@ legacy: static __always_inline int variable_ffs(int x) { - int r; - - if (!x) - return 0; - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); + if (!x) + return 0; + asm volatile (".option push\n" ".option arch,+zbb\n" CTZW "%0, %1\n" ".option pop\n" - : "=r" (r) : "r" (x) :); + : "=r" (x) : "r" (x) :); - return r + 1; + return x + 1; legacy: - r = 1; - if (!(x & 0xffff)) { - x >>= 16; - r += 16; - } - if (!(x & 0xff)) { - x >>= 8; - r += 8; - } - if (!(x & 0xf)) { - x >>= 4; - r += 4; - } - if (!(x & 3)) { - x >>= 2; - r += 2; - } - if (!(x & 1)) { - x >>= 1; - r += 1; - } - return r; + return generic_ffs(x); } /** @@ -204,46 +137,23 @@ legacy: static __always_inline int variable_fls(unsigned int x) { - int r; - - if (!x) - return 0; - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); + if (!x) + return 0; + asm volatile (".option push\n" ".option arch,+zbb\n" CLZW "%0, %1\n" ".option pop\n" - : "=r" (r) : "r" (x) :); + : "=r" (x) : "r" (x) :); - return 32 - r; + return 32 - x; legacy: - r = 32; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; + return generic_fls(x); } /** diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2f4726d3cfcc..2fee65cc8443 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -8,7 +8,6 @@ #include <linux/bug.h> -#include <asm/barrier.h> #include <asm/fence.h> #define __xchg_relaxed(ptr, new, size) \ @@ -313,7 +312,7 @@ " bne %0, %z3, 1f\n" \ " sc.w.rl %1, %z4, %2\n" \ " bnez %1, 0b\n" \ - " fence rw, rw\n" \ + RISCV_FULL_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" ((long)__old), "rJ" (__new) \ @@ -325,7 +324,7 @@ " bne %0, %z3, 1f\n" \ " sc.d.rl %1, %z4, %2\n" \ " bnez %1, 0b\n" \ - " fence rw, rw\n" \ + RISCV_FULL_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" (__old), "rJ" (__new) \ diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h index 2ac955b51148..aa103530a5c8 100644 --- a/arch/riscv/include/asm/compat.h +++ b/arch/riscv/include/asm/compat.h @@ -14,9 +14,28 @@ static inline int is_compat_task(void) { + if (!IS_ENABLED(CONFIG_COMPAT)) + return 0; + return test_thread_flag(TIF_32BIT); } +static inline int is_compat_thread(struct thread_info *thread) +{ + if (!IS_ENABLED(CONFIG_COMPAT)) + return 0; + + return test_ti_thread_flag(thread, TIF_32BIT); +} + +static inline void set_compat_task(bool is_compat) +{ + if (is_compat) + set_thread_flag(TIF_32BIT); + else + clear_thread_flag(TIF_32BIT); +} + struct compat_user_regs_struct { compat_ulong_t pc; compat_ulong_t ra; diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 0bd11862b760..347805446151 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright 2022-2023 Rivos, Inc + * Copyright 2022-2024 Rivos, Inc */ #ifndef _ASM_CPUFEATURE_H @@ -28,29 +28,38 @@ struct riscv_isainfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); -DECLARE_PER_CPU(long, misaligned_access_speed); - /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; void riscv_user_isa_enable(void); -#ifdef CONFIG_RISCV_MISALIGNED -bool unaligned_ctl_available(void); -bool check_unaligned_access_emulated(int cpu); +#if defined(CONFIG_RISCV_MISALIGNED) +bool check_unaligned_access_emulated_all_cpus(void); void unaligned_emulation_finish(void); +bool unaligned_ctl_available(void); +DECLARE_PER_CPU(long, misaligned_access_speed); #else static inline bool unaligned_ctl_available(void) { return false; } +#endif + +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) +DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); -static inline bool check_unaligned_access_emulated(int cpu) +static __always_inline bool has_fast_unaligned_accesses(void) { - return false; + return static_branch_likely(&fast_unaligned_access_speed_key); +} +#else +static __always_inline bool has_fast_unaligned_accesses(void) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + return true; + else + return false; } - -static inline void unaligned_emulation_finish(void) {} #endif unsigned long riscv_get_elf_hwcap(void); @@ -135,6 +144,4 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); } -DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); - #endif diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 06c236bfab53..c7aea7886d22 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -53,13 +53,9 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); #define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) #ifdef CONFIG_64BIT -#ifdef CONFIG_COMPAT -#define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ +#define STACK_RND_MASK (is_compat_task() ? \ 0x7ff >> (PAGE_SHIFT - 12) : \ 0x3ffff >> (PAGE_SHIFT - 12)) -#else -#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) -#endif #endif /* @@ -139,10 +135,7 @@ do { \ #ifdef CONFIG_COMPAT #define SET_PERSONALITY(ex) \ -do { if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ - set_thread_flag(TIF_32BIT); \ - else \ - clear_thread_flag(TIF_32BIT); \ +do { set_compat_task((ex).e_ident[EI_CLASS] == ELFCLASS32); \ if (personality(current->personality) != PER_LINUX32) \ set_personality(PER_LINUX | \ (current->personality & (~PER_MASK))); \ diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index ea33288f8a25..1f2dbfb8a8bf 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -12,8 +12,8 @@ #include <asm/vendorid_list.h> #ifdef CONFIG_ERRATA_ANDES -#define ERRATA_ANDESTECH_NO_IOCP 0 -#define ERRATA_ANDESTECH_NUMBER 1 +#define ERRATA_ANDES_NO_IOCP 0 +#define ERRATA_ANDES_NUMBER 1 #endif #ifdef CONFIG_ERRATA_SIFIVE @@ -112,15 +112,6 @@ asm volatile(ALTERNATIVE( \ #define THEAD_C9XX_RV_IRQ_PMU 17 #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 -#define ALT_SBI_PMU_OVERFLOW(__ovl) \ -asm volatile(ALTERNATIVE( \ - "csrr %0, " __stringify(CSR_SSCOUNTOVF), \ - "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \ - THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ - CONFIG_ERRATA_THEAD_PMU) \ - : "=r" (__ovl) : \ - : "memory") - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h index 2b443a3a487f..6bcd80325dfc 100644 --- a/arch/riscv/include/asm/fence.h +++ b/arch/riscv/include/asm/fence.h @@ -1,12 +1,18 @@ #ifndef _ASM_RISCV_FENCE_H #define _ASM_RISCV_FENCE_H +#define RISCV_FENCE_ASM(p, s) "\tfence " #p "," #s "\n" +#define RISCV_FENCE(p, s) \ + ({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); }) + #ifdef CONFIG_SMP -#define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n" -#define RISCV_RELEASE_BARRIER "\tfence rw, w\n" +#define RISCV_ACQUIRE_BARRIER RISCV_FENCE_ASM(r, rw) +#define RISCV_RELEASE_BARRIER RISCV_FENCE_ASM(rw, w) +#define RISCV_FULL_BARRIER RISCV_FENCE_ASM(rw, rw) #else #define RISCV_ACQUIRE_BARRIER #define RISCV_RELEASE_BARRIER +#define RISCV_FULL_BARRIER #endif #endif /* _ASM_RISCV_FENCE_H */ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 1f2d2599c655..e17d0078a651 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -80,6 +80,7 @@ #define RISCV_ISA_EXT_ZFA 71 #define RISCV_ISA_EXT_ZTSO 72 #define RISCV_ISA_EXT_ZACAS 73 +#define RISCV_ISA_EXT_XANDESPMU 74 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 42497d487a17..1c5c641075d2 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -47,10 +47,10 @@ * sufficient to ensure this works sanely on controllers that support I/O * writes. */ -#define __io_pbr() __asm__ __volatile__ ("fence io,i" : : : "memory"); -#define __io_par(v) __asm__ __volatile__ ("fence i,ior" : : : "memory"); -#define __io_pbw() __asm__ __volatile__ ("fence iow,o" : : : "memory"); -#define __io_paw() __asm__ __volatile__ ("fence o,io" : : : "memory"); +#define __io_pbr() RISCV_FENCE(io, i) +#define __io_par(v) RISCV_FENCE(i, ior) +#define __io_pbw() RISCV_FENCE(iow, o) +#define __io_paw() RISCV_FENCE(o, io) /* * Accesses from a single hart to a single I/O address must be ordered. This diff --git a/arch/riscv/include/asm/membarrier.h b/arch/riscv/include/asm/membarrier.h new file mode 100644 index 000000000000..47b240d0d596 --- /dev/null +++ b/arch/riscv/include/asm/membarrier.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_RISCV_MEMBARRIER_H +#define _ASM_RISCV_MEMBARRIER_H + +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + /* + * Only need the full barrier when switching between processes. + * Barrier when switching from kernel to userspace is not + * required here, given that it is implied by mmdrop(). Barrier + * when switching from userspace to kernel is not needed after + * store to rq->curr. + */ + if (IS_ENABLED(CONFIG_SMP) && + likely(!(atomic_read(&next->membarrier_state) & + (MEMBARRIER_STATE_PRIVATE_EXPEDITED | + MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev)) + return; + + /* + * The membarrier system call requires a full memory barrier + * after storing to rq->curr, before going back to user-space. + * + * This barrier is also needed for the SYNC_CORE command when + * switching between processes; in particular, on a transition + * from a thread belonging to another mm to a thread belonging + * to the mm for which a membarrier SYNC_CORE is done on CPU0: + * + * - [CPU0] sets all bits in the mm icache_stale_mask (in + * prepare_sync_core_cmd()); + * + * - [CPU1] stores to rq->curr (by the scheduler); + * + * - [CPU0] loads rq->curr within membarrier and observes + * cpu_rq(1)->curr->mm != mm, so the IPI is skipped on + * CPU1; this means membarrier relies on switch_mm() to + * issue the sync-core; + * + * - [CPU1] switch_mm() loads icache_stale_mask; if the bit + * is zero, switch_mm() may incorrectly skip the sync-core. + * + * Matches a full barrier in the proximity of the membarrier + * system call entry. + */ + smp_mb(); +} + +#endif /* _ASM_RISCV_MEMBARRIER_H */ diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index 4c58ee7f95ec..06cadfd7a237 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/arch/riscv/include/asm/mmio.h @@ -12,6 +12,7 @@ #define _ASM_RISCV_MMIO_H #include <linux/types.h> +#include <asm/fence.h> #include <asm/mmiowb.h> /* Generic IO read/write. These perform native-endian accesses. */ @@ -131,8 +132,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * doesn't define any ordering between the memory space and the I/O space. */ #define __io_br() do {} while (0) -#define __io_ar(v) ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); }) -#define __io_bw() ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); }) +#define __io_ar(v) RISCV_FENCE(i, ir) +#define __io_bw() RISCV_FENCE(w, o) #define __io_aw() mmiowb_set_pending() #define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; }) diff --git a/arch/riscv/include/asm/mmiowb.h b/arch/riscv/include/asm/mmiowb.h index 0b2333e71fdc..52ce4a399d9b 100644 --- a/arch/riscv/include/asm/mmiowb.h +++ b/arch/riscv/include/asm/mmiowb.h @@ -7,7 +7,7 @@ * "o,w" is sufficient to ensure that all writes to the device have completed * before the write to the spinlock is allowed to commit. */ -#define mmiowb() __asm__ __volatile__ ("fence o,w" : : : "memory"); +#define mmiowb() RISCV_FENCE(o, w) #include <linux/smp.h> #include <asm-generic/mmiowb.h> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index c80bb9990d32..deaf971253a2 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -95,13 +95,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) __pud_free(mm, pud); } -#define __pud_free_tlb(tlb, pud, addr) \ -do { \ - if (pgtable_l4_enabled) { \ - pagetable_pud_dtor(virt_to_ptdesc(pud)); \ - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ - } \ -} while (0) +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, + unsigned long addr) +{ + if (pgtable_l4_enabled) { + struct ptdesc *ptdesc = virt_to_ptdesc(pud); + + pagetable_pud_dtor(ptdesc); + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, ptdesc); + else + tlb_remove_ptdesc(tlb, ptdesc); + } +} #define p4d_alloc_one p4d_alloc_one static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -130,11 +136,16 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) __p4d_free(mm, p4d); } -#define __p4d_free_tlb(tlb, p4d, addr) \ -do { \ - if (pgtable_l5_enabled) \ - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d)); \ -} while (0) +static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, + unsigned long addr) +{ + if (pgtable_l5_enabled) { + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d)); + else + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); + } +} #endif /* __PAGETABLE_PMD_FOLDED */ static inline void sync_kernel_mappings(pgd_t *pgd) @@ -159,19 +170,31 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #ifndef __PAGETABLE_PMD_FOLDED -#define __pmd_free_tlb(tlb, pmd, addr) \ -do { \ - pagetable_pmd_dtor(virt_to_ptdesc(pmd)); \ - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ -} while (0) +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long addr) +{ + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); + + pagetable_pmd_dtor(ptdesc); + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, ptdesc); + else + tlb_remove_ptdesc(tlb, ptdesc); +} #endif /* __PAGETABLE_PMD_FOLDED */ -#define __pte_free_tlb(tlb, pte, buf) \ -do { \ - pagetable_pte_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ -} while (0) +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, + unsigned long addr) +{ + struct ptdesc *ptdesc = page_ptdesc(pte); + + pagetable_pte_dtor(ptdesc); + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, ptdesc); + else + tlb_remove_ptdesc(tlb, ptdesc); +} #endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_PGALLOC_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 20242402fc11..9f8ea0e33eb1 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -127,17 +127,11 @@ #define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) #define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) -#ifdef CONFIG_COMPAT #define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) #define MMAP_MIN_VA_BITS_64 (VA_BITS_SV39) #define MMAP_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_VA_BITS_64) #define MMAP_MIN_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_MIN_VA_BITS_64) #else -#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) -#define MMAP_MIN_VA_BITS (VA_BITS_SV39) -#endif /* CONFIG_COMPAT */ - -#else #include <asm/pgtable-32.h> #endif /* CONFIG_64BIT */ @@ -439,9 +433,11 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } +#ifdef CONFIG_RISCV_ISA_SVNAPOT #define pte_leaf_size(pte) (pte_napot(pte) ? \ napot_cont_size(napot_cont_order(pte)) :\ PAGE_SIZE) +#endif #ifdef CONFIG_NUMA_BALANCING /* @@ -517,12 +513,12 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) WRITE_ONCE(*ptep, pteval); } -void flush_icache_pte(pte_t pte); +void flush_icache_pte(struct mm_struct *mm, pte_t pte); -static inline void __set_pte_at(pte_t *ptep, pte_t pteval) +static inline void __set_pte_at(struct mm_struct *mm, pte_t *ptep, pte_t pteval) { if (pte_present(pteval) && pte_exec(pteval)) - flush_icache_pte(pteval); + flush_icache_pte(mm, pteval); set_pte(ptep, pteval); } @@ -535,7 +531,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, page_table_check_ptes_set(mm, ptep, pteval, nr); for (;;) { - __set_pte_at(ptep, pteval); + __set_pte_at(mm, ptep, pteval); if (--nr == 0) break; ptep++; @@ -547,7 +543,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - __set_pte_at(ptep, __pte(0)); + __set_pte_at(mm, ptep, __pte(0)); } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */ @@ -597,6 +593,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } +#define pgprot_nx pgprot_nx +static inline pgprot_t pgprot_nx(pgprot_t _prot) +{ + return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC); +} + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t _prot) { @@ -662,6 +664,12 @@ static inline int pmd_write(pmd_t pmd) return pte_write(pmd_pte(pmd)); } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pte_write(pud_pte(pud)); +} + #define pmd_dirty pmd_dirty static inline int pmd_dirty(pmd_t pmd) { @@ -713,14 +721,14 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(mm, pmdp, pmd); - return __set_pte_at((pte_t *)pmdp, pmd_pte(pmd)); + return __set_pte_at(mm, (pte_t *)pmdp, pmd_pte(pmd)); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { page_table_check_pud_set(mm, pudp, pud); - return __set_pte_at((pte_t *)pudp, pud_pte(pud)); + return __set_pte_at(mm, (pte_t *)pudp, pud_pte(pud)); } #ifdef CONFIG_PAGE_TABLE_CHECK @@ -871,8 +879,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) #ifdef CONFIG_COMPAT -#define TASK_SIZE_32 (_AC(0x80000000, UL)) -#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ +#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) +#define TASK_SIZE (is_compat_task() ? \ TASK_SIZE_32 : TASK_SIZE_64) #else #define TASK_SIZE TASK_SIZE_64 diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index a8509cc31ab2..0faf5f161f1e 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -14,22 +14,21 @@ #include <asm/ptrace.h> -#ifdef CONFIG_64BIT -#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) -#define STACK_TOP_MAX TASK_SIZE - +/* + * addr is a hint to the maximum userspace address that mmap should provide, so + * this macro needs to return the largest address space available so that + * mmap_end < addr, being mmap_end the top of that address space. + * See Documentation/arch/riscv/vm-layout.rst for more details. + */ #define arch_get_mmap_end(addr, len, flags) \ ({ \ unsigned long mmap_end; \ typeof(addr) _addr = (addr); \ - if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \ - mmap_end = STACK_TOP_MAX; \ - else if ((_addr) >= VA_USER_SV57) \ + if ((_addr) == 0 || is_compat_task() || \ + ((_addr + len) > BIT(VA_BITS - 1))) \ mmap_end = STACK_TOP_MAX; \ - else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ - mmap_end = VA_USER_SV48; \ else \ - mmap_end = VA_USER_SV39; \ + mmap_end = (_addr + len); \ mmap_end; \ }) @@ -39,17 +38,17 @@ typeof(addr) _addr = (addr); \ typeof(base) _base = (base); \ unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base); \ - if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \ + if ((_addr) == 0 || is_compat_task() || \ + ((_addr + len) > BIT(VA_BITS - 1))) \ mmap_base = (_base); \ - else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \ - mmap_base = VA_USER_SV57 - rnd_gap; \ - else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ - mmap_base = VA_USER_SV48 - rnd_gap; \ else \ - mmap_base = VA_USER_SV39 - rnd_gap; \ + mmap_base = (_addr + len) - rnd_gap; \ mmap_base; \ }) +#ifdef CONFIG_64BIT +#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) +#define STACK_TOP_MAX TASK_SIZE_64 #else #define DEFAULT_MAP_WINDOW TASK_SIZE #define STACK_TOP_MAX TASK_SIZE diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h index 54efbf523d49..adb50f3ec205 100644 --- a/arch/riscv/include/asm/simd.h +++ b/arch/riscv/include/asm/simd.h @@ -34,9 +34,9 @@ static __must_check inline bool may_use_simd(void) return false; /* - * Nesting is acheived in preempt_v by spreading the control for + * Nesting is achieved in preempt_v by spreading the control for * preemptible and non-preemptible kernel-mode Vector into two fields. - * Always try to match with prempt_v if kernel V-context exists. Then, + * Always try to match with preempt_v if kernel V-context exists. Then, * fallback to check non preempt_v if nesting happens, or if the config * is not set. */ diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 491296a335d0..4718096fa5e3 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -56,4 +56,7 @@ int hibernate_resume_nonboot_cpu_disable(void); asmlinkage void hibernate_restore_image(unsigned long resume_satp, unsigned long satp_temp, unsigned long cpu_resume); asmlinkage int hibernate_core_restore_code(void); +bool riscv_sbi_hsm_is_supported(void); +bool riscv_sbi_suspend_state_is_valid(u32 state); +int riscv_sbi_hart_suspend(u32 state); #endif diff --git a/arch/riscv/include/asm/sync_core.h b/arch/riscv/include/asm/sync_core.h new file mode 100644 index 000000000000..9153016da8f1 --- /dev/null +++ b/arch/riscv/include/asm/sync_core.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_SYNC_CORE_H +#define _ASM_RISCV_SYNC_CORE_H + +/* + * RISC-V implements return to user-space through an xRET instruction, + * which is not core serializing. + */ +static inline void sync_core_before_usermode(void) +{ + asm volatile ("fence.i" ::: "memory"); +} + +#ifdef CONFIG_SMP +/* + * Ensure the next switch_mm() on every CPU issues a core serializing + * instruction for the given @mm. + */ +static inline void prepare_sync_core_cmd(struct mm_struct *mm) +{ + cpumask_setall(&mm->context.icache_stale_mask); +} +#else +static inline void prepare_sync_core_cmd(struct mm_struct *mm) +{ +} +#endif /* CONFIG_SMP */ + +#endif /* _ASM_RISCV_SYNC_CORE_H */ diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index eeec04b7dae6..ac80216549ff 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -12,25 +12,52 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); -#define SC_RISCV_REGS_TO_ARGS(x, ...) \ - __MAP(x,__SC_ARGS \ - ,,regs->orig_a0,,regs->a1,,regs->a2 \ +#ifdef CONFIG_64BIT + +#define __SYSCALL_SE_DEFINEx(x, prefix, name, ...) \ + static long __se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static long __se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) + +#define SC_RISCV_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,regs->orig_a0,,regs->a1,,regs->a2 \ ,,regs->a3,,regs->a4,,regs->a5,,regs->a6) +#else +/* + * Use type aliasing to ensure registers a0-a6 are correctly passed to the syscall + * implementation when >word-size arguments are used. + */ +#define __SYSCALL_SE_DEFINEx(x, prefix, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ + static long __se_##prefix##name(ulong, ulong, ulong, ulong, ulong, ulong, \ + ulong) \ + __attribute__((alias(__stringify(___se_##prefix##name)))); \ + __diag_pop(); \ + static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + __used; \ + static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) + +#define SC_RISCV_REGS_TO_ARGS(x, ...) \ + regs->orig_a0,regs->a1,regs->a2,regs->a3,regs->a4,regs->a5,regs->a6 + +#endif /* CONFIG_64BIT */ + #ifdef CONFIG_COMPAT #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__riscv_compat_sys##name, ERRNO); \ - static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs) \ + __SYSCALL_SE_DEFINEx(x, compat_sys, name, __VA_ARGS__) \ { \ - return __se_compat_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__)); \ + return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ } \ - static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs) \ { \ - return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ + return __se_compat_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__)); \ } \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) @@ -51,19 +78,18 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long __riscv_sys##name(const struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__riscv_sys##name, ERRNO); \ - static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long __riscv_sys##name(const struct pt_regs *regs) \ - { \ - return __se_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__)); \ - } \ - static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + __SYSCALL_SE_DEFINEx(x, sys, name, __VA_ARGS__) \ { \ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ + asmlinkage long __riscv_sys##name(const struct pt_regs *regs) \ + { \ + return __se_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__)); \ + } \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #define SYSCALL_DEFINE0(sname) \ diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 50b63b5c15bd..1f6c38420d8e 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -10,6 +10,24 @@ struct mmu_gather; static void tlb_flush(struct mmu_gather *tlb); +#ifdef CONFIG_MMU +#include <linux/swap.h> + +/* + * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to + * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use + * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this + * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the + * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * for more details. + */ +static inline void __tlb_remove_table(void *table) +{ + free_page_and_swap_cache(table); +} + +#endif /* CONFIG_MMU */ + #define tlb_flush tlb_flush #include <asm-generic/tlb.h> diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index ec0cab9fbddd..72ec1d9bd3f3 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ if (unlikely(__kr_err)) \ @@ -328,7 +328,7 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 0cd6f0a027d1..731dcd0ed4de 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -284,4 +284,15 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #endif /* CONFIG_RISCV_ISA_V */ +/* + * Return the implementation's vlen value. + * + * riscv_v_vsize contains the value of "32 vector registers with vlenb length" + * so rebuild the vlen value in bits from it. + */ +static inline int riscv_vector_vlen(void) +{ + return riscv_v_vsize / 32 * 8; +} + #endif /* ! __ASM_RISCV_VECTOR_H */ diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h index e55407ace0c3..2f2bb0c84f9a 100644 --- a/arch/riscv/include/asm/vendorid_list.h +++ b/arch/riscv/include/asm/vendorid_list.h @@ -5,7 +5,7 @@ #ifndef ASM_VENDOR_LIST_H #define ASM_VENDOR_LIST_H -#define ANDESTECH_VENDOR_ID 0x31e +#define ANDES_VENDOR_ID 0x31e #define SIFIVE_VENDOR_ID 0x489 #define THEAD_VENDOR_ID 0x5b7 diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 10aaa83db89e..95050ebe9ad0 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -34,7 +34,7 @@ #define AT_L3_CACHEGEOMETRY 47 /* entries in ARCH_DLINFO */ -#define AT_VECTOR_SIZE_ARCH 9 +#define AT_VECTOR_SIZE_ARCH 10 #define AT_MINSIGSTKSZ 51 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 5e591f831638..81d94a8ee10f 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -39,7 +39,6 @@ extra-y += vmlinux.lds obj-y += head.o obj-y += soc.o obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o -obj-y += copy-unaligned.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o @@ -64,6 +63,9 @@ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o +obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o + obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 319a1da0358b..0128b161bfda 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info switch (cpu_mfr_info->vendor_id) { #ifdef CONFIG_ERRATA_ANDES - case ANDESTECH_VENDOR_ID: + case ANDES_VENDOR_ID: cpu_mfr_info->patch_func = andes_errata_patch_func; break; #endif diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 62fa393b2eb2..3df4cb788c1f 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -74,5 +74,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@ rm $@.tmp # actual build commands -quiet_cmd_compat_vdsoas = VDSOAS $@ +quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 79a5a35fab96..3ed2359eae35 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -11,7 +11,6 @@ #include <linux/cpu.h> #include <linux/cpuhotplug.h> #include <linux/ctype.h> -#include <linux/jump_label.h> #include <linux/log2.h> #include <linux/memory.h> #include <linux/module.h> @@ -21,21 +20,13 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> -#include <asm/hwprobe.h> #include <asm/patch.h> #include <asm/processor.h> #include <asm/sbi.h> #include <asm/vector.h> -#include "copy-unaligned.h" - #define NUM_ALPHA_EXTS ('z' - 'a' + 1) -#define MISALIGNED_ACCESS_JIFFIES_LG2 1 -#define MISALIGNED_BUFFER_SIZE 0x4000 -#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) -#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) - unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -44,11 +35,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; -/* Performance information */ -DEFINE_PER_CPU(long, misaligned_access_speed); - -static cpumask_t fast_misaligned_access; - /** * riscv_isa_extension_base() - Get base extension word * @@ -318,6 +304,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), + __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); @@ -731,247 +718,6 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -static int check_unaligned_access(void *param) -{ - int cpu = smp_processor_id(); - u64 start_cycles, end_cycles; - u64 word_cycles; - u64 byte_cycles; - int ratio; - unsigned long start_jiffies, now; - struct page *page = param; - void *dst; - void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; - - if (check_unaligned_access_emulated(cpu)) - return 0; - - /* Make an unaligned destination buffer. */ - dst = (void *)((unsigned long)page_address(page) | 0x1); - /* Unalign src as well, but differently (off by 1 + 2 = 3). */ - src = dst + (MISALIGNED_BUFFER_SIZE / 2); - src += 2; - word_cycles = -1ULL; - /* Do a warmup. */ - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - preempt_disable(); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - /* - * For a fixed amount of time, repeatedly try the function, and take - * the best time in cycles as the measurement. - */ - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - /* Ensure the CSR read can't reorder WRT to the copy. */ - mb(); - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - /* Ensure the copy ends before the end time is snapped. */ - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < word_cycles) - word_cycles = end_cycles - start_cycles; - } - - byte_cycles = -1ULL; - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - mb(); - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < byte_cycles) - byte_cycles = end_cycles - start_cycles; - } - - preempt_enable(); - - /* Don't divide by zero. */ - if (!word_cycles || !byte_cycles) { - pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", - cpu); - - return 0; - } - - if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; - - ratio = div_u64((byte_cycles * 100), word_cycles); - pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", - cpu, - ratio / 100, - ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); - - per_cpu(misaligned_access_speed, cpu) = speed; - - /* - * Set the value of fast_misaligned_access of a CPU. These operations - * are atomic to avoid race conditions. - */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) - cpumask_set_cpu(cpu, &fast_misaligned_access); - else - cpumask_clear_cpu(cpu, &fast_misaligned_access); - - return 0; -} - -static void check_unaligned_access_nonboot_cpu(void *param) -{ - unsigned int cpu = smp_processor_id(); - struct page **pages = param; - - if (smp_processor_id() != 0) - check_unaligned_access(pages[cpu]); -} - -DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); - -static void modify_unaligned_access_branches(cpumask_t *mask, int weight) -{ - if (cpumask_weight(mask) == weight) - static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); - else - static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); -} - -static void set_unaligned_access_static_branches_except_cpu(int cpu) -{ - /* - * Same as set_unaligned_access_static_branches, except excludes the - * given CPU from the result. When a CPU is hotplugged into an offline - * state, this function is called before the CPU is set to offline in - * the cpumask, and thus the CPU needs to be explicitly excluded. - */ - - cpumask_t fast_except_me; - - cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); - cpumask_clear_cpu(cpu, &fast_except_me); - - modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); -} - -static void set_unaligned_access_static_branches(void) -{ - /* - * This will be called after check_unaligned_access_all_cpus so the - * result of unaligned access speed for all CPUs will be available. - * - * To avoid the number of online cpus changing between reading - * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be - * held before calling this function. - */ - - cpumask_t fast_and_online; - - cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); - - modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); -} - -static int lock_and_set_unaligned_access_static_branch(void) -{ - cpus_read_lock(); - set_unaligned_access_static_branches(); - cpus_read_unlock(); - - return 0; -} - -arch_initcall_sync(lock_and_set_unaligned_access_static_branch); - -static int riscv_online_cpu(unsigned int cpu) -{ - static struct page *buf; - - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - goto exit; - - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; - } - - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); - -exit: - set_unaligned_access_static_branches(); - - return 0; -} - -static int riscv_offline_cpu(unsigned int cpu) -{ - set_unaligned_access_static_branches_except_cpu(cpu); - - return 0; -} - -/* Measure unaligned access on all CPUs present at boot in parallel. */ -static int check_unaligned_access_all_cpus(void) -{ - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), - GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, riscv_offline_cpu); - -out: - unaligned_emulation_finish(); - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); - return 0; -} - -arch_initcall(check_unaligned_access_all_cpus); - void riscv_user_isa_enable(void) { if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 9d1a305d5508..68a24cf9481a 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -111,6 +111,7 @@ SYM_CODE_START(handle_exception) 1: tail do_trap_unknown SYM_CODE_END(handle_exception) +ASM_NOKPROBE(handle_exception) /* * The ret_from_exception must be called with interrupt disabled. Here is the @@ -184,6 +185,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception) sret #endif SYM_CODE_END(ret_from_exception) +ASM_NOKPROBE(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) @@ -219,6 +221,7 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) move a0, sp tail handle_bad_stack SYM_CODE_END(handle_kernel_stack_overflow) +ASM_NOKPROBE(handle_kernel_stack_overflow) #endif SYM_CODE_START(ret_from_fork) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 37e87fdcf6a0..30e12b310cab 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) */ lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(__patch_insn_set); @@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (!riscv_patch_in_stop_machine) lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return ret; } NOKPROBE_SYMBOL(__patch_insn_write); diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile index 07915dc9279e..b75f150b923d 100644 --- a/arch/riscv/kernel/pi/Makefile +++ b/arch/riscv/kernel/pi/Makefile @@ -9,6 +9,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ -fno-asynchronous-unwind-tables -fno-unwind-tables \ $(call cc-option,-fno-addrsig) +# Disable LTO +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) + KBUILD_CFLAGS += -mcmodel=medany CFLAGS_cmdline_early.o += -D__NO_FORTIFY diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 92922dbd5b5c..e4bc61c4e58a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -27,8 +27,6 @@ #include <asm/vector.h> #include <asm/cpufeature.h> -register unsigned long gp_in_global __asm__("gp"); - #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> unsigned long __stack_chk_guard __read_mostly; @@ -37,7 +35,7 @@ EXPORT_SYMBOL(__stack_chk_guard); extern asmlinkage void ret_from_fork(void); -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { cpu_do_idle(); } @@ -207,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index e8515aa9d80b..92731ff8c79a 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -377,14 +377,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, return ret; } +#else +static const struct user_regset_view compat_riscv_user_native_view = {}; #endif /* CONFIG_COMPAT */ const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_32BIT)) + if (is_compat_thread(&task->thread_info)) return &compat_riscv_user_native_view; else -#endif return &riscv_user_native_view; } diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 501e66debf69..5a2edd7f027e 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_set_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index c4ed7d977f57..d41090fc3203 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -28,7 +28,6 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> -#include <asm/cpufeature.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/numa.h> diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 299795341e8a..8a327b485b90 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -132,4 +132,53 @@ static int __init sbi_system_suspend_init(void) } arch_initcall(sbi_system_suspend_init); + +static int sbi_suspend_finisher(unsigned long suspend_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND, + suspend_type, resume_addr, opaque, 0, 0, 0); + + return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0; +} + +int riscv_sbi_hart_suspend(u32 state) +{ + if (state & SBI_HSM_SUSP_NON_RET_BIT) + return cpu_suspend(state, sbi_suspend_finisher); + else + return sbi_suspend_finisher(state, 0, 0); +} + +bool riscv_sbi_suspend_state_is_valid(u32 state) +{ + if (state > SBI_HSM_SUSPEND_RET_DEFAULT && + state < SBI_HSM_SUSPEND_RET_PLATFORM) + return false; + + if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT && + state < SBI_HSM_SUSPEND_NON_RET_PLATFORM) + return false; + + return true; +} + +bool riscv_sbi_hsm_is_supported(void) +{ + /* + * The SBI HSM suspend function is only available when: + * 1) SBI version is 0.3 or higher + * 2) SBI HSM extension is available + */ + if (sbi_spec_version < sbi_mk_version(0, 3) || + !sbi_probe_extension(SBI_EXT_HSM)) { + pr_info("HSM suspend not available\n"); + return false; + } + + return true; +} #endif /* CONFIG_RISCV_SBI */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index a7c56b41efd2..8cae41a502dd 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -147,6 +147,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) return (pair.value & ext); } +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) static u64 hwprobe_misaligned(const struct cpumask *cpus) { int cpu; @@ -169,6 +170,18 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) return perf; } +#else +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_FAST; + + if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) + return RISCV_HWPROBE_MISALIGNED_EMULATED; + + return RISCV_HWPROBE_MISALIGNED_SLOW; +} +#endif static void hwprobe_one_pair(struct riscv_hwprobe *pair, const struct cpumask *cpus) diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug index 5dba64e8e977..78cea5d2c270 100644 --- a/arch/riscv/kernel/tests/Kconfig.debug +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -6,7 +6,7 @@ config AS_HAS_ULEB128 menuconfig RUNTIME_KERNEL_TESTING_MENU bool "arch/riscv/kernel runtime Testing" - def_bool y + default y help Enable riscv kernel runtime testing. diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index a1b9be3c4332..05a16b1f0aee 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/randomize_kstack.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/signal.h> @@ -121,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); - dump_instr(KERN_EMERG, regs); + dump_instr(KERN_INFO, regs); } force_sig_fault(signo, code, (void __user *)addr); @@ -310,7 +311,8 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) } } -asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) +asmlinkage __visible __trap_section __no_stack_protector +void do_trap_ecall_u(struct pt_regs *regs) { if (user_mode(regs)) { long syscall = regs->a7; @@ -322,10 +324,23 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) syscall = syscall_enter_from_user_mode(regs, syscall); + add_random_kstack_offset(); + if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); else if (syscall != -1) regs->a0 = -ENOSYS; + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned + * for RV32I or RV64I. + * + * The resulting 6 bits of entropy is seen in SP[9:4]. + */ + choose_random_kstack_offset(get_random_u16()); syscall_exit_to_user_mode(regs); } else { diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 8ded225e8c5b..2adb7c3e4dd5 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; +#endif if (!unaligned_enabled) return -1; @@ -596,7 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs) return 0; } -bool check_unaligned_access_emulated(int cpu) +static bool check_unaligned_access_emulated(int cpu) { long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); unsigned long tmp_var, tmp_val; @@ -623,7 +625,7 @@ bool check_unaligned_access_emulated(int cpu) return misaligned_emu_detected; } -void unaligned_emulation_finish(void) +bool check_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -632,13 +634,12 @@ void unaligned_emulation_finish(void) * accesses emulated since tasks requesting such control can run on any * CPU. */ - for_each_present_cpu(cpu) { - if (per_cpu(misaligned_access_speed, cpu) != - RISCV_HWPROBE_MISALIGNED_EMULATED) { - return; - } - } + for_each_online_cpu(cpu) + if (!check_unaligned_access_emulated(cpu)) + return false; + unaligned_ctl = true; + return true; } bool unaligned_ctl_available(void) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c new file mode 100644 index 000000000000..a9a6bcb02acf --- /dev/null +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/jump_label.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/types.h> +#include <asm/cpufeature.h> +#include <asm/hwprobe.h> + +#include "copy-unaligned.h" + +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 +#define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + +DEFINE_PER_CPU(long, misaligned_access_speed); + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static cpumask_t fast_misaligned_access; +static int check_unaligned_access(void *param) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page = param; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + return 0; + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + /* Do a warmup. */ + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + preempt_disable(); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + mb(); + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + preempt_enable(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + + return 0; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + + per_cpu(misaligned_access_speed, cpu) = speed; + + /* + * Set the value of fast_misaligned_access of a CPU. These operations + * are atomic to avoid race conditions. + */ + if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + cpumask_set_cpu(cpu, &fast_misaligned_access); + else + cpumask_clear_cpu(cpu, &fast_misaligned_access); + + return 0; +} + +static void check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); +} + +DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); + +static void modify_unaligned_access_branches(cpumask_t *mask, int weight) +{ + if (cpumask_weight(mask) == weight) + static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); + else + static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key); +} + +static void set_unaligned_access_static_branches_except_cpu(int cpu) +{ + /* + * Same as set_unaligned_access_static_branches, except excludes the + * given CPU from the result. When a CPU is hotplugged into an offline + * state, this function is called before the CPU is set to offline in + * the cpumask, and thus the CPU needs to be explicitly excluded. + */ + + cpumask_t fast_except_me; + + cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); + cpumask_clear_cpu(cpu, &fast_except_me); + + modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); +} + +static void set_unaligned_access_static_branches(void) +{ + /* + * This will be called after check_unaligned_access_all_cpus so the + * result of unaligned access speed for all CPUs will be available. + * + * To avoid the number of online cpus changing between reading + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be + * held before calling this function. + */ + + cpumask_t fast_and_online; + + cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); + + modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); +} + +static int lock_and_set_unaligned_access_static_branch(void) +{ + cpus_read_lock(); + set_unaligned_access_static_branches(); + cpus_read_unlock(); + + return 0; +} + +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); + +static int riscv_online_cpu(unsigned int cpu) +{ + static struct page *buf; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + goto exit; + + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + +exit: + set_unaligned_access_static_branches(); + + return 0; +} + +static int riscv_offline_cpu(unsigned int cpu) +{ + set_unaligned_access_static_branches_except_cpu(cpu); + + return 0; +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return 0; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); + return 0; +} + +static int check_unaligned_access_all_cpus(void) +{ + bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + + if (!all_cpus_emulated) + return check_unaligned_access_speed_all_cpus(); + + return 0; +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static int check_unaligned_access_all_cpus(void) +{ + check_unaligned_access_emulated_all_cpus(); + + return 0; +} +#endif + +arch_initcall(check_unaligned_access_all_cpus); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9b517fe1b8a8..272c431ac5b9 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -37,6 +37,7 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index 39e72aa016a4..b467ba5ed910 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c @@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) raw_spin_lock_irqsave(&irqd->lock, flags); sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK; - if (!pending && - ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) || - (sm == APLIC_SOURCECFG_SM_LEVEL_LOW))) + if (sm == APLIC_SOURCECFG_SM_INACTIVE) goto skip_write_pending; + if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH || + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) { + if (!pending) + goto skip_write_pending; + if ((irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) + goto skip_write_pending; + if (!(irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) + goto skip_write_pending; + } + if (pending) irqd->state |= APLIC_IRQ_STATE_PENDING; else @@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) static bool aplic_read_input(struct aplic *aplic, u32 irq) { - bool ret; - unsigned long flags; + u32 sourcecfg, sm, raw_input, irq_inverted; struct aplic_irq *irqd; + unsigned long flags; + bool ret = false; if (!irq || aplic->nr_irqs <= irq) return false; irqd = &aplic->irqs[irq]; raw_spin_lock_irqsave(&irqd->lock, flags); - ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false; + + sourcecfg = irqd->sourcecfg; + if (sourcecfg & APLIC_SOURCECFG_D) + goto skip; + + sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) + goto skip; + + raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0; + irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || + sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; + ret = !!(raw_input ^ irq_inverted); + +skip: raw_spin_unlock_irqrestore(&irqd->lock, flags); return ret; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f4a6124d25c9..994adc26db4b 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) { - return copy_isa_ext_reg_indices(vcpu, NULL);; + return copy_isa_ext_reg_indices(vcpu, NULL); } static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c index 74af3ab520b6..7fb12c59e571 100644 --- a/arch/riscv/lib/csum.c +++ b/arch/riscv/lib/csum.c @@ -3,7 +3,7 @@ * Checksum library * * Influenced by arch/arm64/lib/csum.c - * Copyright (C) 2023 Rivos Inc. + * Copyright (C) 2023-2024 Rivos Inc. */ #include <linux/bitops.h> #include <linux/compiler.h> @@ -318,10 +318,7 @@ unsigned int do_csum(const unsigned char *buff, int len) * branches. The largest chunk of overlap was delegated into the * do_csum_common function. */ - if (static_branch_likely(&fast_misaligned_access_speed_key)) - return do_csum_no_alignment(buff, len); - - if (((unsigned long)buff & OFFSET_MASK) == 0) + if (has_fast_unaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0)) return do_csum_no_alignment(buff, len); return do_csum_with_alignment(buff, len); diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S index 51ab5588e9ff..7c45f26de4f7 100644 --- a/arch/riscv/lib/uaccess_vector.S +++ b/arch/riscv/lib/uaccess_vector.S @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> -#include <asm-generic/export.h> #include <asm/asm.h> #include <asm/asm-extable.h> #include <asm/csr.h> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 55a34f2020a8..bc61ee5975e4 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -82,12 +82,12 @@ void flush_icache_mm(struct mm_struct *mm, bool local) #endif /* CONFIG_SMP */ #ifdef CONFIG_MMU -void flush_icache_pte(pte_t pte) +void flush_icache_pte(struct mm_struct *mm, pte_t pte) { struct folio *folio = page_folio(pte_page(pte)); if (!test_bit(PG_dcache_clean, &folio->flags)) { - flush_icache_all(); + flush_icache_mm(mm, false); set_bit(PG_dcache_clean, &folio->flags); } } diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 217fd4de6134..ba8eb3944687 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (unlikely(prev == next)) return; + membarrier_arch_switch_mm(prev, next, task); + /* * Mark the current MM context as inactive, and the next as * active. This is at least used by the icache flushing diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b5ffb2ef54ad..fe8e159394d8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -764,6 +764,11 @@ static int __init print_no5lvl(char *p) } early_param("no5lvl", print_no5lvl); +static void __init set_mmap_rnd_bits_max(void) +{ + mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3; +} + /* * There is a simple way to determine if 4-level is supported by the * underlying hardware: establish 1:1 mapping in 4-level page table mode @@ -1078,6 +1083,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) set_satp_mode(dtb_pa); + set_mmap_rnd_bits_max(); #endif /* diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index ef887efcb679..533ec9055fa0 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -10,7 +10,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, pte_t entry, int dirty) { if (!pte_same(ptep_get(ptep), entry)) - __set_pte_at(ptep, entry); + __set_pte_at(vma->vm_mm, ptep, entry); /* * update_mmu_cache will unconditionally execute, handling both * the case that the PTE changed and the spurious fault case. diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 893566e004b7..07d743f87b3f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -99,7 +99,7 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, +static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { @@ -200,7 +200,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID, + __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID, start, end - start, PAGE_SIZE); } diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index aac190085472..1adf2f39ce59 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1463,6 +1463,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, if (ret < 0) return ret; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + const struct btf_func_model *fm; + int idx; + + fm = bpf_jit_find_kfunc_model(ctx->prog, insn); + if (!fm) + return -EINVAL; + + for (idx = 0; idx < fm->nr_args; idx++) { + u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx); + + if (fm->arg_size[idx] == sizeof(int)) + emit_sextw(reg, reg, ctx); + } + } + ret = emit_call(addr, fixed_addr, ctx); if (ret) return ret; diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 7138d189cc42..0c4cad7d5a5b 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,31 +15,31 @@ #include <asm/barrier.h> #include <asm/cmpxchg.h> -static inline int arch_atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { return __atomic_read(v); } #define arch_atomic_read arch_atomic_read -static inline void arch_atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { __atomic_set(v, i); } #define arch_atomic_set arch_atomic_set -static inline int arch_atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter) + i; } #define arch_atomic_add_return arch_atomic_add_return -static inline int arch_atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter); } #define arch_atomic_fetch_add arch_atomic_fetch_add -static inline void arch_atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { __atomic_add(i, &v->counter); } @@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) #define ATOMIC_OPS(op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ +static __always_inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __atomic_##op(i, &v->counter); \ } \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ { \ return __atomic_##op##_barrier(i, &v->counter); \ } @@ -74,7 +74,7 @@ ATOMIC_OPS(xor) #define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return __atomic_cmpxchg(&v->counter, old, new); } @@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) #define ATOMIC64_INIT(i) { (i) } -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __atomic64_read(v); } #define arch_atomic64_read arch_atomic64_read -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __atomic64_set(v, i); } #define arch_atomic64_set arch_atomic64_set -static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter) + i; } #define arch_atomic64_add_return arch_atomic64_add_return -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline void arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __atomic64_add(i, (long *)&v->counter); } @@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v) #define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return __atomic64_cmpxchg((long *)&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -#define ATOMIC64_OPS(op) \ -static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ -{ \ - __atomic64_##op(i, (long *)&v->counter); \ -} \ -static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ -{ \ - return __atomic64_##op##_barrier(i, (long *)&v->counter); \ +#define ATOMIC64_OPS(op) \ +static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ +{ \ + __atomic64_##op(i, (long *)&v->counter); \ +} \ +static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + return __atomic64_##op##_barrier(i, (long *)&v->counter); \ } ATOMIC64_OPS(and) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 50510e08b893..7fa5f96a553a 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -8,7 +8,7 @@ #ifndef __ARCH_S390_ATOMIC_OPS__ #define __ARCH_S390_ATOMIC_OPS__ -static inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const atomic_t *v) { int c; @@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v) return c; } -static inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(atomic_t *v, int i) { asm volatile( " st %1,%0\n" : "=R" (v->counter) : "d" (i)); } -static inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline s64 __atomic64_read(const atomic64_t *v) { s64 c; @@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v) return c; } -static inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(atomic64_t *v, s64 i) { asm volatile( " stg %1,%0\n" @@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \ -static inline op_type op_name(op_type val, op_type *ptr) \ +static __always_inline op_type op_name(op_type val, op_type *ptr) \ { \ op_type old; \ \ @@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi") #else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #define __ATOMIC_OP(op_name, op_string) \ -static inline int op_name(int val, int *ptr) \ +static __always_inline int op_name(int val, int *ptr) \ { \ int old, new; \ \ @@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr") #undef __ATOMIC_OPS #define __ATOMIC64_OP(op_name, op_string) \ -static inline long op_name(long val, long *ptr) \ +static __always_inline long op_name(long val, long *ptr) \ { \ long old, new; \ \ @@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -static inline int __atomic_cmpxchg(int *ptr, int old, int new) +static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new) { asm volatile( " cs %[old],%[new],%[ptr]" @@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new) return old; } -static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) +static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { int old_expected = old; @@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) return old == old_expected; } -static inline long __atomic64_cmpxchg(long *ptr, long old, long new) +static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) { asm volatile( " csg %[old],%[new],%[ptr]" @@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) +static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) { long old_expected = old; diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index bf15da0fedbc..0e3da500e98c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -12,12 +12,12 @@ #define PREEMPT_NEED_RESCHED 0x80000000 #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED; } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { int old, new; @@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc) old, new) != old); } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED); } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { /* * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES @@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val) __atomic_add(val, &S390_lowcore.preempt_count); } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { __preempt_count_add(-val); } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return __atomic_add(-1, &S390_lowcore.preempt_count) == 1; } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(READ_ONCE(S390_lowcore.preempt_count) == preempt_offset); @@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset) #define PREEMPT_ENABLED (0) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count); } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { S390_lowcore.preempt_count = pc; } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return false; } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { S390_lowcore.preempt_count += val; } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { S390_lowcore.preempt_count -= val; } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return !--S390_lowcore.preempt_count && tif_need_resched(); } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(preempt_count() == preempt_offset && tif_need_resched()); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 787394978bc0..6a1e0fbbaa15 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler) mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK stctg %c1,%c1,__PT_CR1(%r11) #if IS_ENABLED(CONFIG_KVM) - lg %r12,__LC_GMAP + ltg %r12,__LC_GMAP + jz 5f clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST @@ -635,6 +636,7 @@ SYM_DATA_START_LOCAL(daton_psw) SYM_DATA_END(daton_psw) .section .rodata, "a" + .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) #include "asm/syscall_table.h" diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 823d652e3917..4ad472d130a3 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event) event->cpu); struct paicrypt_map *cpump = mp->mapptr; - cpump->event = NULL; static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" @@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags) static void paicrypt_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paicrypt_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 616a25606cd6..a6da7e0cc7a6 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event) free_page(PAI_SAVE_AREA(event)); mutex_lock(&paiext_reserve_mutex); - cpump->event = NULL; if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); @@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags) static void paiext_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paiext_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index c421dd44ffbe..0c66b32e0f9f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; gmap = (struct gmap *)S390_lowcore.gmap; - if (regs->cr1 == gmap->asce) + if (gmap && gmap->asce == regs->cr1) return GMAP_FAULT; return KERNEL_FAULT; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b418333bb086..5af0402e94b8 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size) * PLT for hotpatchable calls. The calling convention is the same as for the * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered. */ -extern const char bpf_plt[]; -extern const char bpf_plt_ret[]; -extern const char bpf_plt_target[]; -extern const char bpf_plt_end[]; -#define BPF_PLT_SIZE 32 +struct bpf_plt { + char code[16]; + void *ret; + void *target; +} __packed; +extern const struct bpf_plt bpf_plt; asm( ".pushsection .rodata\n" " .balign 8\n" @@ -531,15 +532,14 @@ asm( " .balign 8\n" "bpf_plt_ret: .quad 0\n" "bpf_plt_target: .quad 0\n" - "bpf_plt_end:\n" " .popsection\n" ); -static void bpf_jit_plt(void *plt, void *ret, void *target) +static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { - memcpy(plt, bpf_plt, BPF_PLT_SIZE); - *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret; - *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret; + memcpy(plt, &bpf_plt, sizeof(*plt)); + plt->ret = ret; + plt->target = target; } /* @@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; if (jit->prg_buf) - bpf_jit_plt(jit->prg_buf + jit->prg, + bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg), jit->prg_buf + jit->prologue_plt_ret, NULL); - jit->prg += BPF_PLT_SIZE; + jit->prg += sizeof(struct bpf_plt); } static int get_probe_mem_regno(const u8 *insn) @@ -2040,9 +2040,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_jit jit; int pass; - if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE)) - return orig_fp; - if (!fp->jit_requested) return orig_fp; @@ -2148,14 +2145,11 @@ bool bpf_jit_supports_far_kfunc_call(void) int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { + struct bpf_plt expected_plt, current_plt, new_plt, *plt; struct { u16 opc; s32 disp; } __packed insn; - char expected_plt[BPF_PLT_SIZE]; - char current_plt[BPF_PLT_SIZE]; - char new_plt[BPF_PLT_SIZE]; - char *plt; char *ret; int err; @@ -2174,18 +2168,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, */ } else { /* Verify the PLT. */ - plt = (char *)ip + (insn.disp << 1); - err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE); + plt = ip + (insn.disp << 1); + err = copy_from_kernel_nofault(¤t_plt, plt, + sizeof(current_plt)); if (err < 0) return err; ret = (char *)ip + 6; - bpf_jit_plt(expected_plt, ret, old_addr); - if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE)) + bpf_jit_plt(&expected_plt, ret, old_addr); + if (memcmp(¤t_plt, &expected_plt, sizeof(current_plt))) return -EINVAL; /* Adjust the call address. */ - bpf_jit_plt(new_plt, ret, new_addr); - s390_kernel_write(plt + (bpf_plt_target - bpf_plt), - new_plt + (bpf_plt_target - bpf_plt), + bpf_jit_plt(&new_plt, ret, new_addr); + s390_kernel_write(&plt->target, &new_plt.target, sizeof(void *)); } diff --git a/arch/sh/cchips/hd6446x/hd64461.c b/arch/sh/cchips/hd6446x/hd64461.c index f3fba967445a..81764882d87d 100644 --- a/arch/sh/cchips/hd6446x/hd64461.c +++ b/arch/sh/cchips/hd6446x/hd64461.c @@ -72,7 +72,7 @@ static void hd64461_irq_demux(struct irq_desc *desc) } } -int __init setup_hd64461(void) +static int __init setup_hd64461(void) { int irq_base, i; diff --git a/arch/sh/drivers/dma/dma-sysfs.c b/arch/sh/drivers/dma/dma-sysfs.c index 431bc18f0a41..9f666280d80c 100644 --- a/arch/sh/drivers/dma/dma-sysfs.c +++ b/arch/sh/drivers/dma/dma-sysfs.c @@ -15,7 +15,7 @@ #include <linux/string.h> #include <asm/dma.h> -static struct bus_type dma_subsys = { +static const struct bus_type dma_subsys = { .name = "dma", .dev_name = "dma", }; diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 6a1f36df6a18..cf0ad89f5639 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -28,7 +28,7 @@ obj-y += net/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ -obj-y += virt/svm/ +obj-y += virt/ # for cleaning subdir- += boot tools diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7aed87cbf386..4474bf32d0a4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -472,10 +472,6 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it -config GOLDFISH - def_bool y - depends on X86_GOLDFISH - config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) @@ -2443,6 +2439,8 @@ config USE_X86_SEG_SUPPORT # with named address spaces - see GCC PR sanitizer/111736. # depends on !KASAN + # -fsanitize=thread (KCSAN) is also incompatible. + depends on !KCSAN config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) @@ -2635,6 +2633,16 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" + depends on CPU_SUP_INTEL + default y + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See <file:Documentation/admin-guide/hw-vuln/spectre.rst> + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 662d9d4033e6..5ab93fcdd691 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -251,8 +251,6 @@ archheaders: libs-y += arch/x86/lib/ -core-y += arch/x86/virt/ - # drivers-y are linked after core-y drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/x86/pci/ diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index f4e22ef774ab..876fc6d46a13 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -15,10 +15,12 @@ */ #include <linux/linkage.h> +#include <asm/asm-offsets.h> #include <asm/msr.h> #include <asm/page_types.h> #include <asm/processor-flags.h> #include <asm/segment.h> +#include <asm/setup.h> .code64 .text @@ -49,6 +51,11 @@ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi + + /* Switch to the firmware's stack */ + movl efi32_boot_sp(%rip), %esp + andl $~7, %esp + #ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx @@ -144,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk) SYM_FUNC_START(efi32_stub_entry) call 1f 1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx /* Clear BSS */ xorl %eax, %eax @@ -158,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + movl %esi, 8(%ebx) jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) #endif @@ -234,8 +243,6 @@ SYM_FUNC_END(efi_enter32) * * Arguments: %ecx image handle * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) * * Since this is the point of no return for ordinary execution, no registers * are considered live except for the function parameters. [Note that the EFI @@ -254,13 +261,25 @@ SYM_FUNC_START_LOCAL(efi32_entry) /* Store firmware IDT descriptor */ sidtl (efi32_boot_idt - 1b)(%ebx) + /* Store firmware stack pointer */ + movl %esp, (efi32_boot_sp - 1b)(%ebx) + /* Store boot arguments */ leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) movl %edx, 4(%ebx) - movl %esi, 8(%ebx) movb $0x0, 12(%ebx) // efi_is64 + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -286,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry) movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi + jmp efi32_entry // pass %ecx, %edx // no other registers remain live 2: popl %edi // restore callee-save registers @@ -318,5 +336,6 @@ SYM_DATA_END(efi32_boot_idt) SYM_DATA_LOCAL(efi32_boot_cs, .word 0) SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_sp, .long 0) SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index d07be9d05cd0..b31ef2424d19 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -3,19 +3,28 @@ * Confidential Computing Platform Capability checks * * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * Author: Tom Lendacky <thomas.lendacky@amd.com> */ #include <linux/export.h> #include <linux/cc_platform.h> +#include <linux/string.h> +#include <linux/random.h> +#include <asm/archrandom.h> #include <asm/coco.h> #include <asm/processor.h> enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; u64 cc_mask __ro_after_init; +static struct cc_attr_flags { + __u64 host_sev_snp : 1, + __resv : 63; +} cc_flags; + static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { @@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + case CC_ATTR_HOST_SEV_SNP: + return cc_flags.host_sev_snp; + default: return false; } @@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); + +static void amd_cc_platform_clear(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 0; + break; + default: + break; + } +} + +void cc_platform_clear(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_clear(attr); + break; + default: + break; + } +} + +static void amd_cc_platform_set(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 1; + break; + default: + break; + } +} + +void cc_platform_set(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_set(attr); + break; + default: + break; + } +} + +__init void cc_random_init(void) +{ + /* + * The seed is 32 bytes (in units of longs), which is 256 bits, which + * is the security level that the RNG is targeting. + */ + unsigned long rng_seed[32 / sizeof(long)]; + size_t i, longs; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * Since the CoCo threat model includes the host, the only reliable + * source of entropy that can be neither observed nor manipulated is + * RDRAND. Usually, RDRAND failure is considered tolerable, but since + * CoCo guests have no other unobservable source of entropy, it's + * important to at least ensure the RNG gets some initial random seeds. + */ + for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { + longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); + + /* + * A zero return value means that the guest doesn't have RDRAND + * or the CPU is physically broken, and in both cases that + * means most crypto inside of the CoCo instance will be + * broken, defeating the purpose of CoCo in the first place. So + * just panic here because it's absolutely unsafe to continue + * executing. + */ + if (longs == 0) + panic("RDRAND is defective."); + } + add_device_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); +} diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config index 66c9e2aab16c..be3ee4294903 100644 --- a/arch/x86/configs/tiny.config +++ b/arch/x86/configs/tiny.config @@ -1,5 +1,6 @@ CONFIG_NOHIGHMEM=y # CONFIG_HIGHMEM4G is not set # CONFIG_HIGHMEM64G is not set +# CONFIG_UNWINDER_ORC is not set CONFIG_UNWINDER_GUESS=y # CONFIG_UNWINDER_FRAME_POINTER is not set diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6356060caaf3..51cc9c7cb9bd 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } @@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; @@ -255,6 +255,71 @@ DEFINE_IDTENTRY_RAW(int80_emulation) instrumentation_end(); syscall_exit_to_user_mode(regs); } + +#ifdef CONFIG_X86_FRED +/* + * A FRED-specific INT80 handler is warranted for the follwing reasons: + * + * 1) As INT instructions and hardware interrupts are separate event + * types, FRED does not preclude the use of vector 0x80 for external + * interrupts. As a result, the FRED setup code does not reserve + * vector 0x80 and calling int80_is_external() is not merely + * suboptimal but actively incorrect: it could cause a system call + * to be incorrectly ignored. + * + * 2) It is called only for handling vector 0x80 of event type + * EVENT_TYPE_SWINT and will never be called to handle any external + * interrupt (event type EVENT_TYPE_EXTINT). + * + * 3) FRED has separate entry flows depending on if the event came from + * user space or kernel space, and because the kernel does not use + * INT insns, the FRED kernel entry handler fred_entry_from_kernel() + * falls through to fred_bad_type() if the event type is + * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling + * an INT insn, it can only be from a user level. + * + * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will + * likely take a different approach if it is ever needed: it + * probably belongs in either fred_intx()/ fred_other() or + * asm_fred_entrypoint_user(), depending on if this ought to be done + * for all entries from userspace or only system + * calls. + * + * 5) INT $0x80 is the fast path for 32-bit system calls under FRED. + */ +DEFINE_FREDENTRY_RAW(int80_emulation) +{ + int nr; + + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * FRED pushed 0 into regs::orig_ax and regs::ax contains the + * syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#endif #else /* CONFIG_IA32_EMULATION */ /* Handles int $0x80 on a 32bit kernel */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8af2a26b24f6..1b5be07f8669 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index eabf48c4d4b4..c779046cc3fe 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index ac120cbdaaf2..89c1476fcdd9 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -28,9 +28,9 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code if (regs->fred_cs.sl > 0) { pr_emerg("PANIC: invalid or fatal FRED event; event type %u " "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n", - regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax, + regs->fred_ss.type, regs->fred_ss.vector, error_code, fred_event_data(regs), regs->cs, regs->ip); - die("invalid or fatal FRED event", regs, regs->orig_ax); + die("invalid or fatal FRED event", regs, error_code); panic("invalid or fatal FRED event"); } else { unsigned long flags = oops_begin(); @@ -38,10 +38,10 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code pr_alert("BUG: invalid or fatal FRED event; event type %u " "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n", - regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax, + regs->fred_ss.type, regs->fred_ss.vector, error_code, fred_event_data(regs), regs->cs, regs->ip); - if (__die("Invalid or fatal FRED event", regs, regs->orig_ax)) + if (__die("Invalid or fatal FRED event", regs, error_code)) sig = 0; oops_end(flags, regs, sig); @@ -66,7 +66,7 @@ static noinstr void fred_intx(struct pt_regs *regs) /* INT80 */ case IA32_SYSCALL_VECTOR: if (ia32_enabled()) - return int80_emulation(regs); + return fred_int80_emulation(regs); fallthrough; #endif diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e7f..c2235bae17ef 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include <asm/syscalls_32.h> #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_32.h> }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_32.h> + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1fc9..33b3f09e6f15 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include <asm/syscalls_64.h> #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_64.h> }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_64.h> + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a1265..03de4a932131 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include <asm/syscalls_x32.h> #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include <asm/syscalls_x32.h> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_x32.h> + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 620f6257bbe9..3d64bcc403cf 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -9,7 +9,9 @@ include $(srctree)/lib/vdso/Makefile # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KMSAN_SANITIZE_vclock_gettime.o := n +KMSAN_SANITIZE_vdso32/vclock_gettime.o := n KMSAN_SANITIZE_vgetcpu.o := n +KMSAN_SANITIZE_vdso32/vgetcpu.o := n UBSAN_SANITIZE := n KCSAN_SANITIZE := n @@ -39,6 +41,7 @@ obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index aec16e581f5b..985ef3b47919 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = /* * AMD Performance Monitor Family 17h and later: */ -static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, @@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; +static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, +}; + +static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120, +}; + static u64 amd_pmu_event_map(int hw_event) { - if (boot_cpu_data.x86 >= 0x17) - return amd_f17h_perfmon_event_map[hw_event]; + if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a) + return amd_zen4_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19) + return amd_zen2_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN1)) + return amd_zen1_perfmon_event_map[hw_event]; return amd_perfmon_event_map[hw_event]; } @@ -904,8 +933,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; - /* Read branch records before unfreezing */ - if (status & GLOBAL_STATUS_LBRS_FROZEN) { + /* Read branch records */ + if (x86_pmu.lbr_nr) { amd_pmu_lbr_read(); status &= ~GLOBAL_STATUS_LBRS_FROZEN; } diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 4a1e600314d5..5149830c7c4f 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void) wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } @@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 09050641ce5d..5b0dd07b1ef1 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2641ba620f12..e010bfed8417 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu = event->pmu; /* - * Make sure we get updated with the first PEBS - * event. It will trigger also during removal, but - * that does not hurt: + * Make sure we get updated with the first PEBS event. + * During removal, ->pebs_data_cfg is still valid for + * the last PEBS event. Don't clear it. */ - if (cpuc->n_pebs == 1) + if ((cpuc->n_pebs == 1) && add) cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; if (needed_cb != pebs_needs_sched_cb(cpuc)) { diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 78cd5084104e..4367aa77cb8d 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) lbr->from = x86_pmu.lbr_from; lbr->to = x86_pmu.lbr_to; lbr->info = x86_pmu.lbr_info; + lbr->has_callstack = x86_pmu_has_lbr_callstack(); } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5fc45543e955..0569f579338b 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu) * IPI implementation on Hyper-V. */ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { struct hv_send_ipi_ex *ipi_arg; unsigned long flags; @@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask, - exclude_self ? cpu_is_self : NULL); + nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask, + exclude_self ? cpu_is_self : NULL); /* * 'nr_bank <= 0' means some CPUs in cpumask can't be @@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, } status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, - ipi_arg, NULL); + ipi_arg, NULL); ipi_mask_ex_done: local_irq_restore(flags); @@ -155,7 +155,7 @@ ipi_mask_ex_done: } static bool __send_ipi_mask(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { int cur_cpu, vcpu, this_cpu = smp_processor_id(); struct hv_send_ipi ipi_arg; @@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; /* @@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, } status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, - ipi_arg.cpu_mask); + ipi_arg.cpu_mask); return hv_result_success(status); do_ex_hypercall: @@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector) return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; if (vp >= 64) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 8f3a4d16bb79..17a71e92a343 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -667,14 +667,14 @@ void hyperv_cleanup(void) hv_hypercall_pg = NULL; /* Reset the hypercall page */ - hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL); + hypercall_msr.as_uint64 = hv_get_msr(HV_X64_MSR_HYPERCALL); hypercall_msr.enable = 0; - hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hv_set_msr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); /* Reset the TSC page */ - tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC); + tsc_msr.as_uint64 = hv_get_msr(HV_X64_MSR_REFERENCE_TSC); tsc_msr.enable = 0; - hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); + hv_set_msr(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); } void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c index 68a0843d4750..3fa1f2ee7b0d 100644 --- a/arch/x86/hyperv/hv_proc.c +++ b/arch/x86/hyperv/hv_proc.c @@ -3,7 +3,6 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/clockchips.h> -#include <linux/acpi.h> #include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> @@ -116,12 +115,11 @@ free_buf: int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) { - struct hv_add_logical_processor_in *input; - struct hv_add_logical_processor_out *output; + struct hv_input_add_logical_processor *input; + struct hv_output_add_logical_processor *output; u64 status; unsigned long flags; int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); /* * When adding a logical processor, the hypervisor may return @@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) input->lp_index = lp_index; input->apic_id = apic_id; - input->flags = 0; - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, input, output); local_irq_restore(flags); @@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) u64 status; unsigned long irq_flags; int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); /* Root VPs don't seem to need pages deposited */ if (partition_id != hv_current_partition_id) { @@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) input->vp_index = vp_index; input->flags = flags; input->subnode_type = HvSubnodeAny; - if (node != NUMA_NO_NODE) { - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; - } else { - input->proximity_domain_info.as_uint64 = 0; - } + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); local_irq_restore(irq_flags); diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c index 737d6f7a6155..151e851bef09 100644 --- a/arch/x86/hyperv/hv_spinlock.c +++ b/arch/x86/hyperv/hv_spinlock.c @@ -16,7 +16,7 @@ #include <asm/paravirt.h> #include <asm/apic.h> -static bool __initdata hv_pvspin = true; +static bool hv_pvspin __initdata = true; static void hv_qlock_kick(int cpu) { @@ -64,6 +64,7 @@ __visible bool hv_vcpu_is_preempted(int vcpu) { return false; } + PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted); void __init hv_init_spinlocks(void) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index edd2f35b2a5e..5c7de79423b8 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -12,6 +12,7 @@ #include <asm/i8259.h> #include <asm/mshyperv.h> #include <asm/realmode.h> +#include <../kernel/smpboot.h> extern struct boot_params boot_params; static struct real_mode_header hv_vtl_real_mode_header; @@ -65,7 +66,7 @@ static void hv_vtl_ap_entry(void) ((secondary_startup_64_fn)secondary_startup_64)(&boot_params, &boot_params); } -static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored) +static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) { u64 status; int ret = 0; @@ -79,7 +80,9 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored) struct ldttss_desc *ldt; struct desc_struct *gdt; - u64 rsp = current->thread.sp; + struct task_struct *idle = idle_thread_get(cpu); + u64 rsp = (unsigned long)idle->thread.sp; + u64 rip = (u64)&hv_vtl_ap_entry; native_store_gdt(&gdt_ptr); @@ -206,7 +209,15 @@ static int hv_vtl_apicid_to_vp_id(u32 apic_id) static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { - int vp_id; + int vp_id, cpu; + + /* Find the logical CPU for the APIC ID */ + for_each_present_cpu(cpu) { + if (arch_match_cpu_phys_id(cpu, apicid)) + break; + } + if (cpu >= nr_cpu_ids) + return -EINVAL; pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); vp_id = hv_vtl_apicid_to_vp_id(apicid); @@ -220,7 +231,7 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) return -EINVAL; } - return hv_vtl_bringup_vcpu(vp_id, start_eip); + return hv_vtl_bringup_vcpu(vp_id, cpu, start_eip); } int __init hv_vtl_early_init(void) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index fcd20c6dc7f9..67b68d0d17d1 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) return dest; } static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, - void *func) + void *func, void *ip) { return 0; } diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 94ce0f7c9d3a..e6ab0cf15ed5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -13,6 +13,7 @@ #include <asm/mpspec.h> #include <asm/msr.h> #include <asm/hardirq.h> +#include <asm/io.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } static inline void native_apic_mem_eoi(void) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 076bf8dee702..25466c4d2134 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -14,6 +14,7 @@ #include <asm/asm.h> #include <asm/fred.h> #include <asm/gsseg.h> +#include <asm/nospec-branch.h> #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index fe1e7e3cc844..63bdc6b85219 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -79,6 +79,9 @@ do { \ #define __smp_mb__before_atomic() do { } while (0) #define __smp_mb__after_atomic() do { } while (0) +/* Writing to CR3 provides a full memory barrier in switch_mm(). */ +#define smp_mb__after_switch_mm() do { } while (0) + #include <asm-generic/barrier.h> #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index fb7388bbc212..c086699b0d0c 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask) u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); +void cc_random_init(void); #else #define cc_vendor (CC_VENDOR_NONE) @@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val) { return val; } +static inline void cc_random_init(void) { } #endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a1273698fc43..686e92d2663e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" @@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f0337f7bcf16..3c7434329661 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -460,6 +460,18 @@ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ /* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc and Linux defined features. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ + +/* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) @@ -507,4 +519,5 @@ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h index 152239f95541..7835b2cdff04 100644 --- a/arch/x86/include/asm/crash_reserve.h +++ b/arch/x86/include/asm/crash_reserve.h @@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void) #endif } +#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + #endif /* _X86_CRASH_RESERVE_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index da4054fbf533..c492bdc97b05 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -155,6 +155,7 @@ #define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 (DISABLE_SEV_SNP) #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 2ff26f53cd62..3787d26810c1 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -182,7 +182,7 @@ enum hv_isolation_type { #define HV_X64_MSR_HYPERCALL 0x40000001 /* MSR used to provide vcpu index */ -#define HV_REGISTER_VP_INDEX 0x40000002 +#define HV_X64_MSR_VP_INDEX 0x40000002 /* MSR used to reset the guest OS. */ #define HV_X64_MSR_RESET 0x40000003 @@ -191,10 +191,10 @@ enum hv_isolation_type { #define HV_X64_MSR_VP_RUNTIME 0x40000010 /* MSR used to read the per-partition time reference counter */ -#define HV_REGISTER_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 /* A partition's reference time stamp counter (TSC) page */ -#define HV_REGISTER_REFERENCE_TSC 0x40000021 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 /* MSR used to retrieve the TSC frequency */ #define HV_X64_MSR_TSC_FREQUENCY 0x40000022 @@ -209,61 +209,61 @@ enum hv_isolation_type { #define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 /* Define synthetic interrupt controller model specific registers. */ -#define HV_REGISTER_SCONTROL 0x40000080 -#define HV_REGISTER_SVERSION 0x40000081 -#define HV_REGISTER_SIEFP 0x40000082 -#define HV_REGISTER_SIMP 0x40000083 -#define HV_REGISTER_EOM 0x40000084 -#define HV_REGISTER_SINT0 0x40000090 -#define HV_REGISTER_SINT1 0x40000091 -#define HV_REGISTER_SINT2 0x40000092 -#define HV_REGISTER_SINT3 0x40000093 -#define HV_REGISTER_SINT4 0x40000094 -#define HV_REGISTER_SINT5 0x40000095 -#define HV_REGISTER_SINT6 0x40000096 -#define HV_REGISTER_SINT7 0x40000097 -#define HV_REGISTER_SINT8 0x40000098 -#define HV_REGISTER_SINT9 0x40000099 -#define HV_REGISTER_SINT10 0x4000009A -#define HV_REGISTER_SINT11 0x4000009B -#define HV_REGISTER_SINT12 0x4000009C -#define HV_REGISTER_SINT13 0x4000009D -#define HV_REGISTER_SINT14 0x4000009E -#define HV_REGISTER_SINT15 0x4000009F +#define HV_X64_MSR_SCONTROL 0x40000080 +#define HV_X64_MSR_SVERSION 0x40000081 +#define HV_X64_MSR_SIEFP 0x40000082 +#define HV_X64_MSR_SIMP 0x40000083 +#define HV_X64_MSR_EOM 0x40000084 +#define HV_X64_MSR_SINT0 0x40000090 +#define HV_X64_MSR_SINT1 0x40000091 +#define HV_X64_MSR_SINT2 0x40000092 +#define HV_X64_MSR_SINT3 0x40000093 +#define HV_X64_MSR_SINT4 0x40000094 +#define HV_X64_MSR_SINT5 0x40000095 +#define HV_X64_MSR_SINT6 0x40000096 +#define HV_X64_MSR_SINT7 0x40000097 +#define HV_X64_MSR_SINT8 0x40000098 +#define HV_X64_MSR_SINT9 0x40000099 +#define HV_X64_MSR_SINT10 0x4000009A +#define HV_X64_MSR_SINT11 0x4000009B +#define HV_X64_MSR_SINT12 0x4000009C +#define HV_X64_MSR_SINT13 0x4000009D +#define HV_X64_MSR_SINT14 0x4000009E +#define HV_X64_MSR_SINT15 0x4000009F /* * Define synthetic interrupt controller model specific registers for * nested hypervisor. */ -#define HV_REGISTER_NESTED_SCONTROL 0x40001080 -#define HV_REGISTER_NESTED_SVERSION 0x40001081 -#define HV_REGISTER_NESTED_SIEFP 0x40001082 -#define HV_REGISTER_NESTED_SIMP 0x40001083 -#define HV_REGISTER_NESTED_EOM 0x40001084 -#define HV_REGISTER_NESTED_SINT0 0x40001090 +#define HV_X64_MSR_NESTED_SCONTROL 0x40001080 +#define HV_X64_MSR_NESTED_SVERSION 0x40001081 +#define HV_X64_MSR_NESTED_SIEFP 0x40001082 +#define HV_X64_MSR_NESTED_SIMP 0x40001083 +#define HV_X64_MSR_NESTED_EOM 0x40001084 +#define HV_X64_MSR_NESTED_SINT0 0x40001090 /* * Synthetic Timer MSRs. Four timers per vcpu. */ -#define HV_REGISTER_STIMER0_CONFIG 0x400000B0 -#define HV_REGISTER_STIMER0_COUNT 0x400000B1 -#define HV_REGISTER_STIMER1_CONFIG 0x400000B2 -#define HV_REGISTER_STIMER1_COUNT 0x400000B3 -#define HV_REGISTER_STIMER2_CONFIG 0x400000B4 -#define HV_REGISTER_STIMER2_COUNT 0x400000B5 -#define HV_REGISTER_STIMER3_CONFIG 0x400000B6 -#define HV_REGISTER_STIMER3_COUNT 0x400000B7 +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 /* Hyper-V guest idle MSR */ #define HV_X64_MSR_GUEST_IDLE 0x400000F0 /* Hyper-V guest crash notification MSR's */ -#define HV_REGISTER_CRASH_P0 0x40000100 -#define HV_REGISTER_CRASH_P1 0x40000101 -#define HV_REGISTER_CRASH_P2 0x40000102 -#define HV_REGISTER_CRASH_P3 0x40000103 -#define HV_REGISTER_CRASH_P4 0x40000104 -#define HV_REGISTER_CRASH_CTL 0x40000105 +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 /* TSC emulation after migration */ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 @@ -276,31 +276,38 @@ enum hv_isolation_type { /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0) -/* Register name aliases for temporary compatibility */ -#define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT -#define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG -#define HV_X64_MSR_STIMER1_COUNT HV_REGISTER_STIMER1_COUNT -#define HV_X64_MSR_STIMER1_CONFIG HV_REGISTER_STIMER1_CONFIG -#define HV_X64_MSR_STIMER2_COUNT HV_REGISTER_STIMER2_COUNT -#define HV_X64_MSR_STIMER2_CONFIG HV_REGISTER_STIMER2_CONFIG -#define HV_X64_MSR_STIMER3_COUNT HV_REGISTER_STIMER3_COUNT -#define HV_X64_MSR_STIMER3_CONFIG HV_REGISTER_STIMER3_CONFIG -#define HV_X64_MSR_SCONTROL HV_REGISTER_SCONTROL -#define HV_X64_MSR_SVERSION HV_REGISTER_SVERSION -#define HV_X64_MSR_SIMP HV_REGISTER_SIMP -#define HV_X64_MSR_SIEFP HV_REGISTER_SIEFP -#define HV_X64_MSR_VP_INDEX HV_REGISTER_VP_INDEX -#define HV_X64_MSR_EOM HV_REGISTER_EOM -#define HV_X64_MSR_SINT0 HV_REGISTER_SINT0 -#define HV_X64_MSR_SINT15 HV_REGISTER_SINT15 -#define HV_X64_MSR_CRASH_P0 HV_REGISTER_CRASH_P0 -#define HV_X64_MSR_CRASH_P1 HV_REGISTER_CRASH_P1 -#define HV_X64_MSR_CRASH_P2 HV_REGISTER_CRASH_P2 -#define HV_X64_MSR_CRASH_P3 HV_REGISTER_CRASH_P3 -#define HV_X64_MSR_CRASH_P4 HV_REGISTER_CRASH_P4 -#define HV_X64_MSR_CRASH_CTL HV_REGISTER_CRASH_CTL -#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT -#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC +/* + * To support arch-generic code calling hv_set/get_register: + * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl + * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall + */ +#define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0) +#define HV_MSR_CRASH_P1 (HV_X64_MSR_CRASH_P1) +#define HV_MSR_CRASH_P2 (HV_X64_MSR_CRASH_P2) +#define HV_MSR_CRASH_P3 (HV_X64_MSR_CRASH_P3) +#define HV_MSR_CRASH_P4 (HV_X64_MSR_CRASH_P4) +#define HV_MSR_CRASH_CTL (HV_X64_MSR_CRASH_CTL) + +#define HV_MSR_VP_INDEX (HV_X64_MSR_VP_INDEX) +#define HV_MSR_TIME_REF_COUNT (HV_X64_MSR_TIME_REF_COUNT) +#define HV_MSR_REFERENCE_TSC (HV_X64_MSR_REFERENCE_TSC) + +#define HV_MSR_SINT0 (HV_X64_MSR_SINT0) +#define HV_MSR_SVERSION (HV_X64_MSR_SVERSION) +#define HV_MSR_SCONTROL (HV_X64_MSR_SCONTROL) +#define HV_MSR_SIEFP (HV_X64_MSR_SIEFP) +#define HV_MSR_SIMP (HV_X64_MSR_SIMP) +#define HV_MSR_EOM (HV_X64_MSR_EOM) + +#define HV_MSR_NESTED_SCONTROL (HV_X64_MSR_NESTED_SCONTROL) +#define HV_MSR_NESTED_SVERSION (HV_X64_MSR_NESTED_SVERSION) +#define HV_MSR_NESTED_SIEFP (HV_X64_MSR_NESTED_SIEFP) +#define HV_MSR_NESTED_SIMP (HV_X64_MSR_NESTED_SIMP) +#define HV_MSR_NESTED_EOM (HV_X64_MSR_NESTED_EOM) +#define HV_MSR_NESTED_SINT0 (HV_X64_MSR_NESTED_SINT0) + +#define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG) +#define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT) /* * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index b65e9c46b922..d0941f4c2724 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -127,6 +127,7 @@ #define INTEL_FAM6_ARROWLAKE_H 0xC5 #define INTEL_FAM6_ARROWLAKE 0xC6 +#define INTEL_FAM6_ARROWLAKE_U 0xB5 #define INTEL_FAM6_LUNARLAKE_M 0xBD diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 16e07a2eee19..6efd1497b026 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -855,6 +855,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; struct kvm_hypervisor_cpuid kvm_cpuid; + bool is_amd_compatible; /* * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ce4ce8720d55..390c4d13956d 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -293,24 +293,24 @@ static inline void hv_ivm_msr_write(u64 msr, u64 value) {} static inline void hv_ivm_msr_read(u64 msr, u64 *value) {} #endif -static inline bool hv_is_synic_reg(unsigned int reg) +static inline bool hv_is_synic_msr(unsigned int reg) { - return (reg >= HV_REGISTER_SCONTROL) && - (reg <= HV_REGISTER_SINT15); + return (reg >= HV_X64_MSR_SCONTROL) && + (reg <= HV_X64_MSR_SINT15); } -static inline bool hv_is_sint_reg(unsigned int reg) +static inline bool hv_is_sint_msr(unsigned int reg) { - return (reg >= HV_REGISTER_SINT0) && - (reg <= HV_REGISTER_SINT15); + return (reg >= HV_X64_MSR_SINT0) && + (reg <= HV_X64_MSR_SINT15); } -u64 hv_get_register(unsigned int reg); -void hv_set_register(unsigned int reg, u64 value); -u64 hv_get_non_nested_register(unsigned int reg); -void hv_set_non_nested_register(unsigned int reg, u64 value); +u64 hv_get_msr(unsigned int reg); +void hv_set_msr(unsigned int reg, u64 value); +u64 hv_get_non_nested_msr(unsigned int reg); +void hv_set_non_nested_msr(unsigned int reg, u64 value); -static __always_inline u64 hv_raw_get_register(unsigned int reg) +static __always_inline u64 hv_raw_get_msr(unsigned int reg) { return __rdmsr(reg); } @@ -331,10 +331,10 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, { return -1; } -static inline void hv_set_register(unsigned int reg, u64 value) { } -static inline u64 hv_get_register(unsigned int reg) { return 0; } -static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { } -static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } +static inline void hv_set_msr(unsigned int reg, u64 value) { } +static inline u64 hv_get_msr(unsigned int reg) { return 0; } +static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { } +static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 05956bd8bacf..e72c2b872957 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,10 +61,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -163,6 +166,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index fc3a8a3c7ffe..ff5f1ecc7d1e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -262,11 +262,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -282,8 +291,8 @@ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) VALIDATE_UNRET_END - ALTERNATIVE_3 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET + ALTERNATIVE_2 "", \ "call entry_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif @@ -317,6 +326,19 @@ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm +#else +#define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -342,6 +364,8 @@ extern void retbleed_return_thunk(void); static inline void retbleed_return_thunk(void) {} #endif +extern void srso_alias_untrain_ret(void); + #ifdef CONFIG_MITIGATION_SRSO extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); @@ -357,6 +381,10 @@ extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); extern void __warn_thunk(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 3736b8a46c04..7f1e17250546 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -555,6 +555,7 @@ struct x86_pmu_lbr { unsigned int from; unsigned int to; unsigned int info; + bool has_callstack; }; extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 7ba1726b71c7..e9187ddd3d1f 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 9477b4053bce..7f57382afee4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -218,17 +218,16 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages); void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages); -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); +void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); -void kdump_sev_callback(void); void sev_show_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } @@ -244,12 +243,12 @@ static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } -static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline void snp_dmi_setup(void) { } static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; @@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } -static inline void kdump_sev_callback(void) { } static inline void sev_show_status(void) { } #endif @@ -270,6 +268,7 @@ int psmash(u64 pfn); int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); +void kdump_sev_callback(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } @@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as } static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} +static inline void kdump_sev_callback(void) { } #endif #endif diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index a800abb1a992..d8416b3bf832 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,11 +12,6 @@ /* image of the saved processor state */ struct saved_context { - /* - * On x86_32, all segment registers except gs are saved at kernel - * entry in pt_regs. - */ - u16 gs; unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; struct saved_msrs saved_msrs; @@ -27,6 +22,11 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + /* + * On x86_32, all segment registers except gs are saved at kernel + * entry in pt_regs. + */ + u16 gs; bool misc_enable_saved; } __attribute__((packed)); diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index f44e2f9ab65d..2fc7bc3863ff 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. @@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) } bool do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b89b40f250e6..6149eabe200f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -30,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ad29984d5e39..ef11aa4cab42 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -694,6 +694,7 @@ enum sev_cmd_id { struct kvm_sev_cmd { __u32 id; + __u32 pad0; __u64 data; __u32 error; __u32 sev_fd; @@ -704,28 +705,35 @@ struct kvm_sev_launch_start { __u32 policy; __u64 dh_uaddr; __u32 dh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_launch_update_data { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_launch_secret { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_launch_measure { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_guest_status { @@ -738,33 +746,43 @@ struct kvm_sev_dbg { __u64 src_uaddr; __u64 dst_uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_attestation_report { __u8 mnonce[16]; __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_send_start { __u32 policy; + __u32 pad0; __u64 pdh_cert_uaddr; __u32 pdh_cert_len; + __u32 pad1; __u64 plat_certs_uaddr; __u32 plat_certs_len; + __u32 pad2; __u64 amd_certs_uaddr; __u32 amd_certs_len; + __u32 pad3; __u64 session_uaddr; __u32 session_len; + __u32 pad4; }; struct kvm_sev_send_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_receive_start { @@ -772,17 +790,22 @@ struct kvm_sev_receive_start { __u32 policy; __u64 pdh_uaddr; __u32 pdh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_receive_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6bc3456a8ebf..a1efa7907a0b 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { __u32 token; __u8 pad[56]; - __u32 enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a42d8a6f7149..c342c4aa9c68 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1687,11 +1687,11 @@ static int x2apic_state; static bool x2apic_hw_locked(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; u64 msr; - ia32_cap = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { + x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); return (msr & LEGACY_XAPIC_DISABLED); } diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 30335182b6b0..e92ff0c11db8 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr) return !bcmp(pad, insn_buff, tmpl_size); } -int x86_call_depth_emit_accounting(u8 **pprog, void *func) +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip) { unsigned int tmpl_size = SKL_TMPL_SIZE; u8 insn_buff[MAX_PATCH_LEN]; @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) return 0; memcpy(insn_buff, skl_call_thunk_template, tmpl_size); - apply_relocation(insn_buff, tmpl_size, *pprog, + apply_relocation(insn_buff, tmpl_size, ip, skl_call_thunk_template, tmpl_size); memcpy(*pprog, insn_buff, tmpl_size); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6d8677e80ddb..cb9eece55904 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } +static void bsp_determine_snp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; + + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined for. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + c->x86 >= 0x19 && snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } - if (cpu_has(c, X86_FEATURE_SEV_SNP)) { - /* - * RMP table entry format is not architectural and it can vary by processor - * and is defined by the per-processor PPR. Restrict SNP support on the - * known CPU model and family for which the RMP table entry format is - * currently defined for. - */ - if (!boot_cpu_has(X86_FEATURE_ZEN3) && - !boot_cpu_has(X86_FEATURE_ZEN4) && - !boot_cpu_has(X86_FEATURE_ZEN5)) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - else if (!snp_probe_rmptable_info()) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - } - + bsp_determine_snp(c); return; warn: @@ -527,7 +535,6 @@ clear_sev: static void early_init_amd(struct cpuinfo_x86 *c) { - u64 value; u32 dummy; if (c->x86 >= 0xf) @@ -595,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - /* Re-enable TopologyExtensions if switched off by BIOS */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x6f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (msr_set_bit(0xc0011005, 54) > 0) { - rdmsrl(0xc0011005, value); - if (value & BIT_64(54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); - } - } - } - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7ba936d798b..ab18185894df 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init x86_arch_cap_msr; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; @@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + x86_arch_cap_msr = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -301,8 +305,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -401,8 +402,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -659,8 +656,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + if (rrsba_disabled) + return; - if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; return; + } - ia32_cap = x86_read_arch_cap_msr(); + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; - if (ia32_cap & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1607,6 +1606,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + + /* Mitigate syscalls when the mitigation is forced =on */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1718,6 +1785,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -1832,8 +1902,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1848,7 +1916,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2695,15 +2763,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2712,10 +2780,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2725,14 +2793,32 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } +static const char *spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) + return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; + + return "; BHI: Vulnerable"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2745,13 +2831,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ba8cf5e9ce56..605c26c009c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Don't use AutoIBRS when SNP is enabled because it degrades host * userspace indirect branch performance. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || (cpu_has(c, X86_FEATURE_AUTOIBRS) && !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); @@ -2307,6 +2315,8 @@ void arch_smt_update(void) void __init arch_cpu_finalize_init(void) { + struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info); + identify_boot_cpu(); select_idle_routine(); @@ -2345,6 +2355,13 @@ void __init arch_cpu_finalize_init(void) fpu__init_system(); fpu__init_cpu(); + /* + * Ensure that access to the per CPU representation has the initial + * boot CPU configuration. + */ + *c = boot_cpu_data; + c->initialized = true; + alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index b7174209d855..946813d816bf 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b5cc557cfc37..84d41be6d06b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 303fef824167..e0fd57a8ba84 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -45,70 +45,70 @@ bool hyperv_paravisor_present __ro_after_init; EXPORT_SYMBOL_GPL(hyperv_paravisor_present); #if IS_ENABLED(CONFIG_HYPERV) -static inline unsigned int hv_get_nested_reg(unsigned int reg) +static inline unsigned int hv_get_nested_msr(unsigned int reg) { - if (hv_is_sint_reg(reg)) - return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0; + if (hv_is_sint_msr(reg)) + return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0; switch (reg) { - case HV_REGISTER_SIMP: - return HV_REGISTER_NESTED_SIMP; - case HV_REGISTER_SIEFP: - return HV_REGISTER_NESTED_SIEFP; - case HV_REGISTER_SVERSION: - return HV_REGISTER_NESTED_SVERSION; - case HV_REGISTER_SCONTROL: - return HV_REGISTER_NESTED_SCONTROL; - case HV_REGISTER_EOM: - return HV_REGISTER_NESTED_EOM; + case HV_X64_MSR_SIMP: + return HV_X64_MSR_NESTED_SIMP; + case HV_X64_MSR_SIEFP: + return HV_X64_MSR_NESTED_SIEFP; + case HV_X64_MSR_SVERSION: + return HV_X64_MSR_NESTED_SVERSION; + case HV_X64_MSR_SCONTROL: + return HV_X64_MSR_NESTED_SCONTROL; + case HV_X64_MSR_EOM: + return HV_X64_MSR_NESTED_EOM; default: return reg; } } -u64 hv_get_non_nested_register(unsigned int reg) +u64 hv_get_non_nested_msr(unsigned int reg) { u64 value; - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) hv_ivm_msr_read(reg, &value); else rdmsrl(reg, value); return value; } -EXPORT_SYMBOL_GPL(hv_get_non_nested_register); +EXPORT_SYMBOL_GPL(hv_get_non_nested_msr); -void hv_set_non_nested_register(unsigned int reg, u64 value) +void hv_set_non_nested_msr(unsigned int reg, u64 value) { - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) { + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) { hv_ivm_msr_write(reg, value); /* Write proxy bit via wrmsl instruction */ - if (hv_is_sint_reg(reg)) + if (hv_is_sint_msr(reg)) wrmsrl(reg, value | 1 << 20); } else { wrmsrl(reg, value); } } -EXPORT_SYMBOL_GPL(hv_set_non_nested_register); +EXPORT_SYMBOL_GPL(hv_set_non_nested_msr); -u64 hv_get_register(unsigned int reg) +u64 hv_get_msr(unsigned int reg) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - return hv_get_non_nested_register(reg); + return hv_get_non_nested_msr(reg); } -EXPORT_SYMBOL_GPL(hv_get_register); +EXPORT_SYMBOL_GPL(hv_get_msr); -void hv_set_register(unsigned int reg, u64 value) +void hv_set_msr(unsigned int reg, u64 value) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - hv_set_non_nested_register(reg, value); + hv_set_non_nested_msr(reg, value); } -EXPORT_SYMBOL_GPL(hv_set_register); +EXPORT_SYMBOL_GPL(hv_set_msr); static void (*vmbus_handler)(void); static void (*hv_stimer0_handler)(void); @@ -352,13 +352,24 @@ static void __init reduced_hw_init(void) x86_init.irqs.pre_vector_init = x86_init_noop; } +int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) +{ + unsigned int hv_max_functions; + + hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); + if (hv_max_functions < HYPERV_CPUID_VERSION) { + pr_err("%s: Could not detect Hyper-V version\n", __func__); + return -ENODEV; + } + + cpuid(HYPERV_CPUID_VERSION, &info->eax, &info->ebx, &info->ecx, &info->edx); + + return 0; +} + static void __init ms_hyperv_init_platform(void) { int hv_max_functions_eax; - int hv_host_info_eax; - int hv_host_info_ebx; - int hv_host_info_ecx; - int hv_host_info_edx; #ifdef CONFIG_PARAVIRT pv_info.name = "Hyper-V"; @@ -409,21 +420,6 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: running on a nested hypervisor\n"); } - /* - * Extract host information. - */ - if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) { - hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); - hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); - hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); - hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); - - pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", - hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF, - hv_host_info_eax, hv_host_info_edx & 0xFFFFFF, - hv_host_info_ecx, hv_host_info_edx >> 24); - } - if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { x86_platform.calibrate_tsc = hv_get_tsc_khz; @@ -456,7 +452,7 @@ static void __init ms_hyperv_init_platform(void) /* To be supported: more work is required. */ ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; - /* HV_REGISTER_CRASH_CTL is unsupported. */ + /* HV_MSR_CRASH_CTL is unsupported. */ ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; /* Don't trust Hyper-V's TLB-flushing hypercalls. */ @@ -648,6 +644,7 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init.x2apic_available = ms_hyperv_x2apic_available, .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, .init.init_platform = ms_hyperv_init_platform, + .init.guest_late_init = ms_hyperv_late_init, #ifdef CONFIG_AMD_MEM_ENCRYPT .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 422a4ddc2ab7..7b29ebda024f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; rdmsr(MSR_AMD64_SYSCFG, lo, hi); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c99f26ebe7a6..1a8687f8073a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + /* Only continue if tick_nohz_full_mask has been initialized. */ + if (!tick_nohz_full_enabled()) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0dad49a09b7a..af5aa2c754c2 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, @@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 3259b1d4fefe..d17c9b71eb4a 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); } @@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) topo_info.nr_disabled_cpus++; } - /* Register present and possible CPUs in the domain maps */ + /* + * Register present and possible CPUs in the domain + * maps. cpu_possible_map will be updated in + * topology_init_possible_cpus() after enumeration is done. + */ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } @@ -415,6 +418,17 @@ void __init topology_init_possible_cpus(void) unsigned int total = assigned + disabled; u32 apicid, firstid; + /* + * If there was no APIC registered, then fake one so that the + * topology bitmap is populated. That ensures that the code below + * is valid and the various query interfaces can be used + * unconditionally. This does not affect the actual APIC code in + * any way because either the local APIC address has not been + * registered or the local APIC was disabled on the command line. + */ + if (topo_info.boot_cpu_apic_id == BAD_APICID) + topology_register_boot_apic(0); + if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { disabled += assigned - nr_cpu_ids; diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 1a8b3ad493af..a7aa6eff4ae5 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan) if (!sft) sft = get_count_order(ecx.cpu_nthreads + 1); - topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + /* + * cpu_nthreads describes the number of threads in the package + * sft is the number of APIC ID bits per package + * + * As the number of actual threads per core is not described in + * this leaf, just set the CORE domain shift and let the later + * parsers set SMT shift. Assume one thread per core by default + * which is correct if there are no other CPUID leafs to parse. + */ + topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1); return true; } -static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id) { /* * Starting with Fam 17h the DIE domain could probably be used to @@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) tscan->c->topo.initial_apicid = leaf.ext_apic_id; /* - * If leaf 0xb is available, then SMT shift is set already. If not - * take it from ecx.threads_per_core and use topo_update_dom() - - * topology_set_dom() would propagate and overwrite the already - * propagated CORE level. + * If leaf 0xb is available, then the domain shifts are set + * already and nothing to do here. */ if (!has_0xb) { + /* + * Leaf 0x80000008 set the CORE domain shift already. + * Update the SMT domain, but do not propagate it. + */ unsigned int nthreads = leaf.core_nthreads + 1; topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); @@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) static bool parse_fam10h_node_id(struct topo_scan *tscan) { - struct { - union { + union { + struct { u64 node_id : 3, nodes_per_pkg : 3, unused : 58; - u64 msr; }; + u64 msr; } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) @@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan) tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; } +static void topoext_fixup(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u64 msrval; + + /* Try to re-enable TopologyExtensions if switched off by BIOS */ + if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f) + return; + + if (msr_set_bit(0xc0011005, 54) <= 0) + return; + + rdmsrl(0xc0011005, msrval); + if (msrval & BIT_64(54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + } +} + static void parse_topology_amd(struct topo_scan *tscan) { bool has_0xb = false; @@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan) void cpu_parse_topology_amd(struct topo_scan *tscan) { tscan->amd_nodes_per_pkg = 1; + topoext_fixup(tscan); parse_topology_amd(tscan); if (tscan->amd_nodes_per_pkg > 1) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a50ae8d63d1c..9a6069e7133c 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -140,7 +140,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) } } -static void topo_set_ids(struct topo_scan *tscan) +static void topo_set_ids(struct topo_scan *tscan, bool early) { struct cpuinfo_x86 *c = tscan->c; u32 apicid = c->topo.apicid; @@ -148,8 +148,10 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); - c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); - c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + if (!early) { + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + } /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> @@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - topo_set_ids(&tscan); + topo_set_ids(&tscan, false); } void __init cpu_init_topology(struct cpuinfo_x86 *c) @@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) x86_topo_system.dom_size[dom] = 1U << sft; } - topo_set_ids(&tscan); + topo_set_ids(&tscan, true); /* * AMD systems have Nodes per package which cannot be mapped to diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index b66f540de054..6f1b379e3b38 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1016,17 +1016,6 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - /* - * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by - * kexec and do not need to be reserved. - */ - if (data->type != SETUP_EFI && - data->type != SETUP_IMA && - data->type != SETUP_RNG_SEED) - e820__range_update_kexec(pa_data, - sizeof(*data) + data->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - if (data->type == SETUP_INDIRECT) { len += data->len; early_memunmap(data, sizeof(*data)); @@ -1038,12 +1027,9 @@ void __init e820__reserve_setup_data(void) indirect = (struct setup_indirect *)data->data; - if (indirect->type != SETUP_INDIRECT) { + if (indirect->type != SETUP_INDIRECT) e820__range_update(indirect->addr, indirect->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(indirect->addr, indirect->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - } } pa_data = pa_next; @@ -1051,7 +1037,6 @@ void __init e820__reserve_setup_data(void) } e820__update_table(e820_table); - e820__update_table(e820_table_kexec); pr_info("extended physical RAM map:\n"); e820__print_table("reserve setup_data"); diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b0449..53935b4d62e3 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include <linux/cc_platform.h> #include <linux/ioport.h> #include <linux/eisa.h> #include <linux/io.h> @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 117e74c44e75..33a214b1a4ce 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -178,10 +178,11 @@ void fpu__init_cpu_xstate(void) * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures - * that any stale state is wiped out from XFD. + * that any stale state is wiped out from XFD. Reset the per CPU + * xfd cache too. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) - wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + xfd_set_state(init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3518fb26d06b..19ca623ffa2a 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #endif #ifdef CONFIG_X86_64 +static inline void xfd_set_state(u64 xfd) +{ + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); +} + static inline void xfd_update_state(struct fpstate *fpstate) { if (fpu_state_size_dynamic()) { u64 xfd = fpstate->xfd; - if (__this_cpu_read(xfd_state) != xfd) { - wrmsrl(MSR_IA32_XFD, xfd); - __this_cpu_write(xfd_state, xfd); - } + if (__this_cpu_read(xfd_state) != xfd) + xfd_set_state(xfd); } } extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else +static inline void xfd_set_state(u64 xfd) { } + static inline void xfd_update_state(struct fpstate *fpstate) { } static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 212e8e06aeba..a817ed0724d1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -81,6 +81,13 @@ static inline bool check_la57_support(void) if (!(native_read_cr4() & X86_CR4_LA57)) return false; + RIP_REL_REF(__pgtable_l5_enabled) = 1; + RIP_REL_REF(pgdir_shift) = 48; + RIP_REL_REF(ptrs_per_p4d) = 512; + RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5; + RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5; + RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5; + return true; } @@ -175,7 +182,7 @@ unsigned long __head __startup_64(unsigned long physaddr, p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt); p4d[MAX_PTRS_PER_P4D - 1] += load_delta; - pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC; + pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; } RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; @@ -431,15 +438,6 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); - if (check_la57_support()) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - page_offset_base = __PAGE_OFFSET_BASE_L5; - vmalloc_base = __VMALLOC_BASE_L5; - vmemmap_base = __VMEMMAP_BASE_L5; - } - cr4_init_shadow(); /* Kill off the identity-map trampoline */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 091b3ab76a18..d0e49bd7c6f3 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -373,7 +373,16 @@ out: kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - if (is_endbr(*(u32 *)addr)) { + u32 insn; + + /* + * Since 'addr' is not guaranteed to be safe to access, use + * copy_from_kernel_nofault() to read the instruction: + */ + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) + return NULL; + + if (is_endbr(insn)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4cadfd606e8e..7f0732bc0ccd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); +static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled); static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; @@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void) { u32 flags = 0; - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { flags = __this_cpu_read(apf_reason.flags); __this_cpu_write(apf_reason.flags, 0); } @@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) inc_irq_stat(irq_hv_callback_count); - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { token = __this_cpu_read(apf_reason.token); kvm_async_pf_task_wake(token); __this_cpu_write(apf_reason.token, 0); @@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR); wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); - __this_cpu_write(apf_reason.enabled, 1); + __this_cpu_write(async_pf_enabled, true); pr_debug("setup async PF for cpu %d\n", smp_processor_id()); } @@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__this_cpu_read(apf_reason.enabled)) + if (!__this_cpu_read(async_pf_enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __this_cpu_write(apf_reason.enabled, 0); + __this_cpu_write(async_pf_enabled, false); pr_debug("disable async PF for cpu %d\n", smp_processor_id()); } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 1ccd30c8246f..e89171b0347a 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -197,12 +197,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) if (!smp_check_mpc(mpc, oem, str)) return 0; - /* Initialize the lapic mapping */ - if (!acpi_lapic) - register_lapic_address(mpc->lapic); - - if (early) + if (early) { + /* Initialize the lapic mapping */ + if (!acpi_lapic) + register_lapic_address(mpc->lapic); return 1; + } /* Now process the configuration blocks. */ while (count < mpc->length) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 9a5b372c706f..ed163c8c8604 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); static char *nmi_check_stall_msg[] = { /* */ -/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */ +/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */ /* | +------ cpu_is_offline(cpu) */ /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ /* | | | NMI handler has been invoked. */ @@ -628,22 +628,26 @@ void nmi_backtrace_stall_check(const struct cpumask *btp) nmi_seq = READ_ONCE(nsp->idt_nmi_seq); if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) { msgp = "CPU entered NMI handler function, but has not exited"; - } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) { - msgp = "CPU is handling NMIs"; - } else { - idx = ((nsp->idt_seq_snap & 0x1) << 2) | + } else if (nsp->idt_nmi_seq_snap == nmi_seq || + nsp->idt_nmi_seq_snap + 1 == nmi_seq) { + idx = ((nmi_seq & 0x1) << 2) | (cpu_is_offline(cpu) << 1) | (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls)); msgp = nmi_check_stall_msg[idx]; if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1)) modp = ", but OK because ignore_nmis was set"; - if (nmi_seq & 0x1) - msghp = " (CPU currently in NMI handler function)"; - else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) + if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) msghp = " (CPU exited one NMI handler function)"; + else if (nmi_seq & 0x1) + msghp = " (CPU currently in NMI handler function)"; + else + msghp = " (CPU was never in an NMI handler function)"; + } else { + msgp = "CPU is handling NMIs"; } - pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n", - __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies)); + pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp); + pr_alert("%s: last activity: %lu jiffies ago.\n", + __func__, j - READ_ONCE(nsp->recv_jiffies)); } } diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dc..cc2c34ba7228 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3e1e96efadfe..e125e059e2c4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include <linux/console.h> #include <linux/crash_dump.h> #include <linux/dma-map-ops.h> -#include <linux/dmi.h> #include <linux/efi.h> #include <linux/ima.h> #include <linux/init_ohci1394_dma.h> @@ -36,6 +35,7 @@ #include <asm/bios_ebda.h> #include <asm/bugs.h> #include <asm/cacheinfo.h> +#include <asm/coco.h> #include <asm/cpu.h> #include <asm/efi.h> #include <asm/gart.h> @@ -902,7 +902,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); reserve_ibft_region(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this @@ -992,6 +992,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ mem_encrypt_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); @@ -1206,16 +1207,6 @@ void __init i386_reserve_resources(void) #endif /* CONFIG_X86_32 */ -#ifndef CONFIG_SMP -void __init smp_prepare_boot_cpu(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} -#endif - static struct notifier_block kernel_offset_notifier = { .notifier_call = dump_kernel_offset }; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b59b09c2f284..38ad066179d8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/io.h> #include <linux/psp-sev.h> +#include <linux/dmi.h> #include <uapi/linux/sev-guest.h> #include <asm/init.h> @@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, unsigned long vaddr_end, int op) { @@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -2287,16 +2284,6 @@ static int __init snp_init_platform_device(void) } device_initcall(snp_init_platform_device); -void kdump_sev_callback(void) -{ - /* - * Do wbinvd() on remote CPUs when SNP is enabled in order to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); -} - void sev_show_status(void) { int i; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fe355c89f6c1..76bb65045c64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -static void __init smp_store_boot_cpu_info(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -1039,29 +1031,15 @@ static __init void disable_smp(void) cpumask_set_cpu(0, topology_die_cpumask(0)); } -static void __init smp_cpu_index_default(void) -{ - int i; - struct cpuinfo_x86 *c; - - for_each_possible_cpu(i) { - c = &cpu_data(i); - /* mark all to hotplug */ - c->cpu_index = nr_cpu_ids; - } -} - void __init smp_prepare_cpus_common(void) { unsigned int i; - smp_cpu_index_default(); - - /* - * Setup boot CPU information - */ - smp_store_boot_cpu_info(); /* Final full version of the data */ - mb(); + /* Mark all except the boot CPU as hotpluggable */ + for_each_possible_cpu(i) { + if (i) + per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids; + } for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a42830dc151b..d5dc5a92635a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include <linux/dmi.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/export.h> @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8c3032a96caf..0ebdd088f28b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -118,10 +118,11 @@ config KVM_AMD will be called kvm-amd. config KVM_AMD_SEV - def_bool y bool "AMD Secure Encrypted Virtualization (SEV) support" + default y depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) + select ARCH_HAS_CC_PLATFORM help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 475b5fa917a6..addc44fc7187 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,10 +3,6 @@ ccflags-y += -I $(srctree)/arch/x86/kvm ccflags-$(CONFIG_KVM_WERROR) += -Werror -ifeq ($(CONFIG_FRAME_POINTER),y) -OBJECT_FILES_NON_STANDARD_vmenter.o := y -endif - include $(srctree)/virt/kvm/Makefile.kvm kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index adba49afb5fe..77352a4abd87 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 return 0; } -static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, - const char *sig) +static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries, + int nent, const char *sig) { struct kvm_hypervisor_cpuid cpuid = {}; struct kvm_cpuid_entry2 *entry; u32 base; for_each_possible_hypervisor_cpuid_base(base) { - entry = kvm_find_cpuid_entry(vcpu, base); + entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (entry) { u32 signature[3]; @@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp return cpuid; } -static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu, - struct kvm_cpuid_entry2 *entries, int nent) +static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, + const char *sig) { - u32 base = vcpu->arch.kvm_cpuid.base; - - if (!base) - return NULL; + return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, sig); +} - return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, +static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries, + int nent, u32 kvm_cpuid_base) +{ + return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES, KVM_CPUID_INDEX_NOT_SIGNIFICANT); } static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) { - return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent); + u32 base = vcpu->arch.kvm_cpuid.base; + + if (!base) + return NULL; + + return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, base); } void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) @@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e int nent) { struct kvm_cpuid_entry2 *best; + struct kvm_hypervisor_cpuid kvm_cpuid; best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (best) { @@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent); - if (kvm_hlt_in_guest(vcpu->kvm) && best && - (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) - best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE); + if (kvm_cpuid.base) { + best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base); + if (kvm_hlt_in_guest(vcpu->kvm) && best) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + } if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); @@ -366,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_update_pv_runtime(vcpu); + vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 856e3037e74f..23dbb9eb277c 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx); } +static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.is_amd_compatible; +} + +static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) +{ + return !guest_cpuid_is_amd_compatible(vcpu); +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cf37586f0466..ebf41023be38 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); - if (r && lvt_type == APIC_LVTPC) + if (r && lvt_type == APIC_LVTPC && + guest_cpuid_is_intel_compatible(apic->vcpu)) kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); return r; } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 992e651540e8..db007a4dffa2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, context->cpu_role.base.level, is_efer_nx(context), guest_can_use(vcpu, X86_FEATURE_GBPAGES), is_cr4_pse(context), - guest_cpuid_is_amd_or_hygon(vcpu)); + guest_cpuid_is_amd_compatible(vcpu)); } static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, @@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * that problem is swept under the rug; KVM's CPUID API is horrific and * it's all but impossible to solve it without introducing a new API. */ - vcpu->arch.root_mmu.root_role.word = 0; - vcpu->arch.guest_mmu.root_role.word = 0; - vcpu->arch.nested_mmu.root_role.word = 0; + vcpu->arch.root_mmu.root_role.invalid = 1; + vcpu->arch.guest_mmu.root_role.invalid = 1; + vcpu->arch.nested_mmu.root_role.invalid = 1; vcpu->arch.root_mmu.cpu_role.ext.valid = 0; vcpu->arch.guest_mmu.cpu_role.ext.valid = 0; vcpu->arch.nested_mmu.cpu_role.ext.valid = 0; @@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, * by the memslot, KVM can't use a hugepage due to the * misaligned address regardless of memory attributes. */ - if (gfn >= slot->base_gfn) { + if (gfn >= slot->base_gfn && + gfn + nr_pages <= slot->base_gfn + slot->npages) { if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d078157e62aa..04c1f0957fea 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm, } } -/* - * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If - * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. - * If AD bits are not enabled, this will require clearing the writable bit on - * each SPTE. Returns true if an SPTE has been changed and the TLBs need to - * be flushed. - */ +static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp) +{ + /* + * All TDP MMU shadow pages share the same role as their root, aside + * from level, so it is valid to key off any shadow page to determine if + * write protection is needed for an entire tree. + */ + return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled(); +} + static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end) { - u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK; + const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK : + shadow_dirty_mask; struct tdp_iter iter; bool spte_set = false; @@ -1573,7 +1577,7 @@ retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; - KVM_MMU_WARN_ON(kvm_ad_enabled() && + KVM_MMU_WARN_ON(dbit == shadow_dirty_mask && spte_ad_need_write_protect(iter.old_spte)); if (!(iter.old_spte & dbit)) @@ -1590,11 +1594,9 @@ retry: } /* - * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If - * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. - * If AD bits are not enabled, this will require clearing the writable bit on - * each SPTE. Returns true if an SPTE has been changed and the TLBs need to - * be flushed. + * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the + * memslot. Returns true if an SPTE has been changed and the TLBs need to be + * flushed. */ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, const struct kvm_memory_slot *slot) @@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, return spte_set; } -/* - * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is - * set in mask, starting at gfn. The given memslot is expected to contain all - * the GFNs represented by set bits in the mask. If AD bits are enabled, - * clearing the dirty status will involve clearing the dirty bit on each SPTE - * or, if AD bits are not enabled, clearing the writable bit on each SPTE. - */ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t gfn, unsigned long mask, bool wrprot) { - u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK : - shadow_dirty_mask; + const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK : + shadow_dirty_mask; struct tdp_iter iter; lockdep_assert_held_write(&kvm->mmu_lock); @@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, if (!mask) break; - KVM_MMU_WARN_ON(kvm_ad_enabled() && + KVM_MMU_WARN_ON(dbit == shadow_dirty_mask && spte_ad_need_write_protect(iter.old_spte)); if (iter.level > PG_LEVEL_4K || @@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, } /* - * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is - * set in mask, starting at gfn. The given memslot is expected to contain all - * the GFNs represented by set bits in the mask. If AD bits are enabled, - * clearing the dirty status will involve clearing the dirty bit on each SPTE - * or, if AD bits are not enabled, clearing the writable bit on each SPTE. + * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for + * which a bit is set in mask, starting at gfn. The given memslot is expected to + * contain all the GFNs represented by set bits in the mask. */ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index c397b28e3d1b..a593b03c9aed 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_data_cfg_mask = ~0ull; bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); - if (vcpu->kvm->arch.enable_pmu) - static_call(kvm_x86_pmu_refresh)(vcpu); + if (!vcpu->kvm->arch.enable_pmu) + return; + + static_call(kvm_x86_pmu_refresh)(vcpu); + + /* + * At RESET, both Intel and AMD CPUs set all enable bits for general + * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that + * was written for v1 PMUs don't unknowingly leave GP counters disabled + * in the global controls). Emulate that behavior when refreshing the + * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL. + */ + if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) + pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0); } void kvm_pmu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index aadefcaa9561..2f4e155080ba 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) -#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) /* CPUID level 0x80000007 (EDX). */ @@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } @@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC); KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ae0ac12382b9..759581bb2128 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -84,9 +84,10 @@ struct enc_region { }; /* Called with the sev_bitmap_lock held, or on shutdown */ -static int sev_flush_asids(int min_asid, int max_asid) +static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) { - int ret, asid, error = 0; + int ret, error = 0; + unsigned int asid; /* Check if there are any ASIDs to reclaim before performing a flush */ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); @@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(int min_asid, int max_asid) +static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) { if (sev_flush_asids(min_asid, max_asid)) return false; @@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - int asid, min_asid, max_asid, ret; + /* + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. + * Note: min ASID can end up larger than the max if basic SEV support is + * effectively disabled by disallowing use of ASIDs for SEV guests. + */ + unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; + unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + unsigned int asid; bool retry = true; + int ret; + + if (min_asid > max_asid) + return -ENOTTY; WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); @@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev) mutex_lock(&sev_bitmap_lock); - /* - * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. - * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. - */ - min_asid = sev->es_active ? 1 : min_sev_asid; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); if (asid > max_asid) { @@ -179,7 +186,8 @@ again: mutex_unlock(&sev_bitmap_lock); - return asid; + sev->asid = asid; + return 0; e_uncharge: sev_misc_cg_uncharge(sev); put_misc_cg(sev->misc_cg); @@ -187,7 +195,7 @@ e_uncharge: return ret; } -static int sev_get_asid(struct kvm *kvm) +static unsigned int sev_get_asid(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_platform_init_args init_args = {0}; - int asid, ret; + int ret; if (kvm->created_vcpus) return -EINVAL; - ret = -EBUSY; if (unlikely(sev->active)) - return ret; + return -EINVAL; sev->active = true; sev->es_active = argp->id == KVM_SEV_ES_INIT; - asid = sev_asid_new(sev); - if (asid < 0) + ret = sev_asid_new(sev); + if (ret) goto e_no_asid; - sev->asid = asid; init_args.probe = false; ret = sev_platform_init(&init_args); @@ -287,8 +293,8 @@ e_no_asid: static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { + unsigned int asid = sev_get_asid(kvm); struct sev_data_activate activate; - int asid = sev_get_asid(kvm); int ret; /* activate ASID on the given handle */ @@ -428,7 +434,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, /* Avoid using vmalloc for smaller buffers. */ size = npages * sizeof(struct page *); if (size > PAGE_SIZE) - pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + pages = __vmalloc(size, GFP_KERNEL_ACCOUNT); else pages = kmalloc(size, GFP_KERNEL_ACCOUNT); @@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void) goto out; } - sev_asid_count = max_sev_asid - min_sev_asid + 1; - WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + if (min_sev_asid <= max_sev_asid) { + sev_asid_count = max_sev_asid - min_sev_asid + 1; + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + } sev_supported = true; /* SEV-ES support requested? */ @@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void) out: if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", - sev_supported ? "enabled" : "disabled", + sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : + "unusable" : + "disabled", min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", @@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) */ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { - int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + unsigned int asid = sev_get_asid(vcpu->kvm); /* * Note! The address must be a kernel address, as regular page walk @@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - int asid = sev_get_asid(svm->vcpu.kvm); + unsigned int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -3174,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) unsigned long pfn; struct page *p; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d1a9f9951635..9aaf83c8d57d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1503,6 +1503,11 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } +static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd) +{ + return page_address(sd->save_area) + 0x400; +} + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1519,12 +1524,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * or subsequent vmload of host save area. */ vmsave(sd->save_area_pa); - if (sev_es_guest(vcpu->kvm)) { - struct sev_es_save_area *hostsa; - hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); - - sev_es_prepare_switch_to_guest(hostsa); - } + if (sev_es_guest(vcpu->kvm)) + sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd)); if (tsc_scaling) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); @@ -4101,6 +4102,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); guest_state_enter_irqoff(); @@ -4108,7 +4110,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in amd_clear_divider(); if (sev_es_guest(vcpu->kvm)) - __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted, + sev_es_host_save_area(sd)); else __svm_vcpu_run(svm, spec_ctrl_intercepted); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7f1fbd874c45..33878efdebc8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -698,7 +698,8 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); /* vmenter.S */ -void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted, + struct sev_es_save_area *hostsa); void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); #define DEFINE_KVM_GHCB_ACCESSORS(field) \ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 187018c424bf..a0c8eb37d3e1 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -3,6 +3,7 @@ #include <asm/asm.h> #include <asm/asm-offsets.h> #include <asm/bitsperlong.h> +#include <asm/frame.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> #include "kvm-asm-offsets.h" @@ -67,7 +68,7 @@ "", X86_FEATURE_V_SPEC_CTRL 901: .endm -.macro RESTORE_HOST_SPEC_CTRL_BODY +.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req 900: /* Same for after vmexit. */ mov $MSR_IA32_SPEC_CTRL, %ecx @@ -76,7 +77,7 @@ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, * if it was not intercepted during guest execution. */ - cmpb $0, (%_ASM_SP) + cmpb $0, \spec_ctrl_intercepted jnz 998f rdmsr movl %eax, SVM_spec_ctrl(%_ASM_DI) @@ -99,6 +100,7 @@ */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP + mov %_ASM_SP, %_ASM_BP #ifdef CONFIG_X86_64 push %r15 push %r14 @@ -268,7 +270,7 @@ SYM_FUNC_START(__svm_vcpu_run) RET RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP) 10: cmpb $0, _ASM_RIP(kvm_rebooting) jne 2b @@ -290,66 +292,68 @@ SYM_FUNC_START(__svm_vcpu_run) SYM_FUNC_END(__svm_vcpu_run) +#ifdef CONFIG_KVM_AMD_SEV + + +#ifdef CONFIG_X86_64 +#define SEV_ES_GPRS_BASE 0x300 +#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE) +#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE) +#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE) +#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE) +#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE) +#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE) +#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE) +#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE) +#endif + /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode * @svm: struct vcpu_svm * * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) - push %_ASM_BP -#ifdef CONFIG_X86_64 - push %r15 - push %r14 - push %r13 - push %r12 -#else - push %edi - push %esi -#endif - push %_ASM_BX + FRAME_BEGIN /* - * Save variables needed after vmexit on the stack, in inverse - * order compared to when they are needed. + * Save non-volatile (callee-saved) registers to the host save area. + * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not + * saved on VMRUN. */ + mov %rbp, SEV_ES_RBP (%rdx) + mov %r15, SEV_ES_R15 (%rdx) + mov %r14, SEV_ES_R14 (%rdx) + mov %r13, SEV_ES_R13 (%rdx) + mov %r12, SEV_ES_R12 (%rdx) + mov %rbx, SEV_ES_RBX (%rdx) - /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ - push %_ASM_ARG2 - - /* Save @svm. */ - push %_ASM_ARG1 - -.ifnc _ASM_ARG1, _ASM_DI /* - * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX - * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + * Save volatile registers that hold arguments that are needed after + * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted). */ - mov %_ASM_ARG1, %_ASM_DI -.endif + mov %rdi, SEV_ES_RDI (%rdx) + mov %rsi, SEV_ES_RSI (%rdx) - /* Clobbers RAX, RCX, RDX. */ + /* Clobbers RAX, RCX, RDX (@hostsa). */ RESTORE_GUEST_SPEC_CTRL /* Get svm->current_vmcb->pa into RAX. */ - mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX - mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX + mov SVM_current_vmcb(%rdi), %rax + mov KVM_VMCB_pa(%rax), %rax /* Enter guest mode */ sti -1: vmrun %_ASM_AX +1: vmrun %rax 2: cli - /* Pop @svm to RDI, guest registers have been saved already. */ - pop %_ASM_DI - #ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif - /* Clobbers RAX, RCX, RDX. */ + /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */ RESTORE_HOST_SPEC_CTRL /* @@ -361,30 +365,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET_VM - /* "Pop" @spec_ctrl_intercepted. */ - pop %_ASM_BX - - pop %_ASM_BX - -#ifdef CONFIG_X86_64 - pop %r12 - pop %r13 - pop %r14 - pop %r15 -#else - pop %esi - pop %edi -#endif - pop %_ASM_BP + FRAME_END RET RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY %sil -3: cmpb $0, _ASM_RIP(kvm_rebooting) +3: cmpb $0, kvm_rebooting(%rip) jne 2b ud2 _ASM_EXTABLE(1b, 3b) SYM_FUNC_END(__svm_sev_es_vcpu_run) +#endif /* CONFIG_KVM_AMD_SEV */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 88659de4d2a7..c6b4b1728006 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit, * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, - TP_PROTO(__u64 rip, int asid, u64 address), + TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( - __field( __u64, rip ) - __field( int, asid ) - __field( __u64, address ) + __field( __u64, rip ) + __field( unsigned int, asid ) + __field( __u64, address ) ), TP_fast_assign( @@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", + TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 12ade343a17e..be40474de6e4 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -535,7 +535,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) perf_capabilities = vcpu_get_perf_capabilities(vcpu); if (cpuid_model_is_consistent(vcpu) && (perf_capabilities & PMU_CAP_LBR_FMT)) - x86_perf_get_lbr(&lbr_desc->records); + memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); else lbr_desc->records.nr = 0; diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 2bfbf758d061..f6986dee6f8c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY_VMEXIT + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c37a89eda90f..22411f4aff53 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -218,6 +218,8 @@ module_param(ple_window_max, uint, 0444); int __read_mostly pt_mode = PT_MODE_SYSTEM; module_param(pt_mode, int, S_IRUGO); +struct x86_pmu_lbr __ro_after_init vmx_lbr_caps; + static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); static DEFINE_MUTEX(vmx_l1d_flush_mutex); @@ -7862,10 +7864,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_update_exception_bitmap(vcpu); } -static u64 vmx_get_perf_capabilities(void) +static __init u64 vmx_get_perf_capabilities(void) { u64 perf_cap = PMU_CAP_FW_WRITES; - struct x86_pmu_lbr lbr; u64 host_perf_cap = 0; if (!enable_pmu) @@ -7875,15 +7876,43 @@ static u64 vmx_get_perf_capabilities(void) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { - x86_perf_get_lbr(&lbr); - if (lbr.nr) + x86_perf_get_lbr(&vmx_lbr_caps); + + /* + * KVM requires LBR callstack support, as the overhead due to + * context switching LBRs without said support is too high. + * See intel_pmu_create_guest_lbr_event() for more info. + */ + if (!vmx_lbr_caps.has_callstack) + memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps)); + else if (vmx_lbr_caps.nr) perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; } if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; + + /* + * Disallow adaptive PEBS as it is functionally broken, can be + * used by the guest to read *host* LBRs, and can be used to + * bypass userspace event filters. To correctly and safely + * support adaptive PEBS, KVM needs to: + * + * 1. Account for the ADAPTIVE flag when (re)programming fixed + * counters. + * + * 2. Gain support from perf (or take direct control of counter + * programming) to support events without adaptive PEBS + * enabled for the hardware counter. + * + * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with + * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1. + * + * 4. Document which PMU events are effectively exposed to the + * guest via adaptive PEBS, and make adaptive PEBS mutually + * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary. + */ + perf_cap &= ~PERF_CAP_PEBS_BASELINE; } return perf_cap; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 65786dbe7d60..90f9e4434646 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -15,6 +15,7 @@ #include "vmx_ops.h" #include "../cpuid.h" #include "run_flags.h" +#include "../mmu.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -109,6 +110,8 @@ struct lbr_desc { bool msr_passthrough; }; +extern struct x86_pmu_lbr vmx_lbr_caps; + /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -719,7 +722,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) if (!enable_ept) return true; - return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; + return allow_smaller_maxphyaddr && + cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits(); } static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47d9f03b7778..91478b769af0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { @@ -3470,7 +3470,7 @@ static bool is_mci_status_msr(u32 msr) static bool can_set_mci_status(struct kvm_vcpu *vcpu) { /* McStatusWrEn enabled? */ - if (guest_cpuid_is_amd_or_hygon(vcpu)) + if (guest_cpuid_is_amd_compatible(vcpu)) return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); return false; diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 721b528da9ac..391059b2c6fb 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -163,6 +163,7 @@ SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) lfence jmp srso_alias_return_thunk SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) .popsection .pushsection .text..__x86.rethunk_safe @@ -224,10 +225,16 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" #else /* !CONFIG_MITIGATION_SRSO */ +/* Dummy for the alternative in CALL_UNTRAIN_RET. */ +SYM_CODE_START(srso_alias_untrain_ret) + ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) #define JMP_SRSO_UNTRAIN_RET "ud2" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" #endif /* CONFIG_MITIGATION_SRSO */ #ifdef CONFIG_MITIGATION_UNRET_ENTRY @@ -319,9 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret) #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ - JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \ - JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS + ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) @@ -377,8 +382,15 @@ SYM_FUNC_END(call_depth_return_thunk) SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \ + defined(CONFIG_MITIGATION_SRSO) || \ + defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS +#else + ANNOTATE_UNRET_SAFE + ret +#endif int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index a204a332c71f..968d7005f4a7 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_leaf(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 70b91de2e053..422602f6039b 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -492,6 +492,24 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ENABLED) ia32_disable(); + + /* + * Override init functions that scan the ROM region in SEV-SNP guests, + * as this memory is not pre-validated and would thus cause a crash. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.pci.init_irq = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; + + /* + * DMI setup behavior for SEV-SNP guests depends on + * efi_enabled(EFI_CONFIG_TABLES), which hasn't been + * parsed yet. snp_dmi_setup() will run after that + * parsing has happened. + */ + x86_init.resources.dmi_setup = snp_dmi_setup; + } } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 104544359d69..025fd7ea5d69 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -24,6 +24,7 @@ #include <linux/memblock.h> #include <linux/init.h> +#include <asm/pgtable_areas.h> #include "numa_internal.h" diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 0d72183b5dd0..36b603d0cdde 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a7ba8e178645..df5fac428408 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip) static int emit_rsb_call(u8 **pprog, void *func, void *ip) { OPTIMIZER_HIDE_VAR(func); - x86_call_depth_emit_accounting(pprog, func); + ip += x86_call_depth_emit_accounting(pprog, func, ip); return emit_patch(pprog, func, ip, 0xE8); } @@ -1972,20 +1972,17 @@ populate_extable: /* call */ case BPF_JMP | BPF_CALL: { - int offs; + u8 *ip = image + addrs[i - 1]; func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); - if (!imm32) - return -EINVAL; - offs = 7 + x86_call_depth_emit_accounting(&prog, func); - } else { - if (!imm32) - return -EINVAL; - offs = x86_call_depth_emit_accounting(&prog, func); + ip += 7; } - if (emit_call(&prog, func, image + addrs[i - 1] + offs)) + if (!imm32) + return -EINVAL; + ip += x86_call_depth_emit_accounting(&prog, func, ip); + if (emit_call(&prog, func, ip)) return -EINVAL; break; } @@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * Direct-call fentry stub, as such it needs accounting for the * __fentry__ call. */ - x86_call_depth_emit_accounting(&prog, NULL); + x86_call_depth_emit_accounting(&prog, NULL, image); } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile index 1e36502cd738..ea343fc392dc 100644 --- a/arch/x86/virt/Makefile +++ b/arch/x86/virt/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += vmx/ +obj-y += svm/ vmx/ diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index cffe1157a90a..ab0e8448bb6e 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void) u64 rmptable_size; u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; if (!amd_iommu_snp_en) - return 0; + goto nosnp; if (!probed_rmp_size) goto nosnp; @@ -225,7 +225,7 @@ skip_enable: return 0; nosnp: - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); return -ENOSYS; } @@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level) { struct rmpentry *large_entry, *entry; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return ERR_PTR(-ENODEV); entry = get_rmpentry(pfn); @@ -363,7 +363,7 @@ int psmash(u64 pfn) unsigned long paddr = pfn << PAGE_SHIFT; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; if (!pfn_valid(pfn)) @@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state) unsigned long paddr = pfn << PAGE_SHIFT; int ret, level; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; level = RMP_TO_PG_LEVEL(state->pagesize); @@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages) spin_unlock(&snp_leaked_pages_list_lock); } EXPORT_SYMBOL_GPL(snp_leak_pages); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the local CPU. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + wbinvd(); +} diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index a65fc2ae15b4..77e788e928cd 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -81,7 +81,6 @@ config XEN_PVH bool "Xen PVH guest support" depends on XEN && XEN_PVHVM && ACPI select PVH - def_bool n help Support for running as a Xen PVH guest. |