diff options
Diffstat (limited to 'arch')
49 files changed, 343 insertions, 196 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b10515c0200b..78f20e632712 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1037,6 +1037,19 @@ config ARM64_ERRATUM_2645198 If unsure, say Y. +config ARM64_ERRATUM_2966298 + bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + default y + help + This option adds the workaround for ARM Cortex-A520 erratum 2966298. + + On an affected Cortex-A520 core, a speculatively executed unprivileged + load might leak data from a privileged level via a cache side channel. + + Work around this problem by executing a TLBI before returning to EL0. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 6f85a05ee7e1..dcf6e4846ac9 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -185,7 +185,7 @@ #size-cells = <1>; ranges; - anomix_ns_gpr: syscon@44210000 { + aonmix_ns_gpr: syscon@44210000 { compatible = "fsl,imx93-aonmix-ns-syscfg", "syscon"; reg = <0x44210000 0x1000>; }; @@ -319,6 +319,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&aonmix_ns_gpr 0x14 0>; status = "disabled"; }; @@ -591,6 +592,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&wakeupmix_gpr 0x0c 2>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 36ef2dbe8add..3ee9266fa8e9 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -905,7 +905,7 @@ status = "disabled"; }; - sata_phy: t-phy@1a243000 { + sata_phy: t-phy { compatible = "mediatek,mt7622-tphy", "mediatek,generic-tphy-v1"; #address-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 68539ea788df..24eda00e320d 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -434,7 +434,7 @@ }; }; - pcie_phy: t-phy@11c00000 { + pcie_phy: t-phy { compatible = "mediatek,mt7986-tphy", "mediatek,generic-tphy-v2"; #address-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index b2485ddfd33b..5d635085fe3f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -48,7 +48,7 @@ memory@40000000 { device_type = "memory"; - reg = <0 0x40000000 0 0x80000000>; + reg = <0 0x40000000 0x2 0x00000000>; }; reserved-memory { @@ -56,13 +56,8 @@ #size-cells = <2>; ranges; - /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ - bl31_secmon_reserved: secmon@54600000 { - no-map; - reg = <0 0x54600000 0x0 0x200000>; - }; - - /* 12 MiB reserved for OP-TEE (BL32) + /* + * 12 MiB reserved for OP-TEE (BL32) * +-----------------------+ 0x43e0_0000 * | SHMEM 2MiB | * +-----------------------+ 0x43c0_0000 @@ -75,6 +70,34 @@ no-map; reg = <0 0x43200000 0 0x00c00000>; }; + + scp_mem: memory@50000000 { + compatible = "shared-dma-pool"; + reg = <0 0x50000000 0 0x2900000>; + no-map; + }; + + vpu_mem: memory@53000000 { + compatible = "shared-dma-pool"; + reg = <0 0x53000000 0 0x1400000>; /* 20 MB */ + }; + + /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ + bl31_secmon_mem: memory@54600000 { + no-map; + reg = <0 0x54600000 0x0 0x200000>; + }; + + snd_dma_mem: memory@60000000 { + compatible = "shared-dma-pool"; + reg = <0 0x60000000 0 0x1100000>; + no-map; + }; + + apu_mem: memory@62000000 { + compatible = "shared-dma-pool"; + reg = <0 0x62000000 0 0x1400000>; /* 20 MB */ + }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index a9e52b50c8c4..54c674c45b49 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -313,6 +313,7 @@ interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH 0>; cpus = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>, <&cpu4>, <&cpu5>, <&cpu6>, <&cpu7>; + status = "fail"; }; dmic_codec: dmic-codec { diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index a7c3020a5de4..06c53000bb74 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3958,7 +3958,7 @@ pdc: interrupt-controller@b220000 { compatible = "qcom,sm8150-pdc", "qcom,pdc"; - reg = <0 0x0b220000 0 0x400>; + reg = <0 0x0b220000 0 0x30000>; qcom,pdc-ranges = <0 480 94>, <94 609 31>, <125 63 1>; #interrupt-cells = <2>; diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 4d537d56eb84..6792a1f83f2a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -9,6 +9,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H +#include <linux/cpuidle.h> #include <linux/efi.h> #include <linux/memblock.h> #include <linux/psci.h> @@ -44,6 +45,24 @@ #define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ trbe_interrupt) + sizeof(u16)) +/* + * ArmĀ® Functional Fixed Hardware Specification Version 1.2. + * Table 2: Arm Architecture context loss flags + */ +#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */ + +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + if (arch_flags & CPUIDLE_CORE_CTXT) + return CPUIDLE_FLAG_TIMER_STOP; + + return 0; +} +#define arch_get_idle_state_flags arch_get_idle_state_flags + +#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */ +#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */ +#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */ /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5f6f84837a49..74d00feb62f0 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,6 +79,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D #define ARM_CPU_PART_CORTEX_X2 0xD48 @@ -148,6 +149,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index be66e94a21bd..5706e74c5578 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -730,6 +730,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_clear_bf16_from_user_emulation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2966298 + { + .desc = "ARM erratum 2966298", + .capability = ARM64_WORKAROUND_2966298, + /* Cortex-A520 r0p0 - r0p1 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), + }, +#endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 { .desc = "AmpereOne erratum AC03_CPU_38", diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6ad61de03d0a..a6030913cd58 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,6 +428,10 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 +alternative_if ARM64_WORKAROUND_2966298 + tlbi vale1, xzr + dsb nsh +alternative_else_nop_endif alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c3f06fdef609..dea3dc89234b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -84,6 +84,7 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 +WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h index db125df9e088..642d71675ddb 100644 --- a/arch/ia64/include/asm/cpu.h +++ b/arch/ia64/include/asm/cpu.h @@ -15,9 +15,4 @@ DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -#endif - #endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 94a848b06f15..741863a187a6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -59,7 +59,7 @@ void __ref arch_unregister_cpu(int num) } EXPORT_SYMBOL(arch_unregister_cpu); #else -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&sysfs_cpus[num].cpu, num); } diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 6d28b5514699..ee9e071859b2 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -2,39 +2,42 @@ #ifndef __PARISC_LDCW_H #define __PARISC_LDCW_H -#ifndef CONFIG_PA20 /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, and GCC only guarantees 8-byte alignment for stack locals, we can't be assured of 16-byte alignment for atomic lock data even if we specify "__attribute ((aligned(16)))" in the type declaration. So, we use a struct containing an array of four ints for the atomic lock type and dynamically select the 16-byte aligned int from the array - for the semaphore. */ + for the semaphore. */ + +/* From: "Jim Hull" <jim.hull of hp.com> + I've attached a summary of the change, but basically, for PA 2.0, as + long as the ",CO" (coherent operation) completer is implemented, then the + 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead + they only require "natural" alignment (4-byte for ldcw, 8-byte for + ldcd). + + Although the cache control hint is accepted by all PA 2.0 processors, + it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still + require 16-byte alignment. If the address is unaligned, the operation + of the instruction is undefined. The ldcw instruction does not generate + unaligned data reference traps so misaligned accesses are not detected. + This hid the problem for years. So, restore the 16-byte alignment dropped + by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ #define __PA_LDCW_ALIGNMENT 16 -#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ & ~(__PA_LDCW_ALIGNMENT - 1); \ (volatile unsigned int *) __ret; \ }) -#define __LDCW "ldcw" -#else /*CONFIG_PA20*/ -/* From: "Jim Hull" <jim.hull of hp.com> - I've attached a summary of the change, but basically, for PA 2.0, as - long as the ",CO" (coherent operation) completer is specified, then the - 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead - they only require "natural" alignment (4-byte for ldcw, 8-byte for - ldcd). */ - -#define __PA_LDCW_ALIGNMENT 4 -#define __PA_LDCW_ALIGN_ORDER 2 -#define __ldcw_align(a) (&(a)->slock) +#ifdef CONFIG_PA20 #define __LDCW "ldcw,co" - -#endif /*!CONFIG_PA20*/ +#else +#define __LDCW "ldcw" +#endif /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. We don't explicitly expose that "*a" may be written as reload diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index efd06a897c6a..7b986b09dba8 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -9,15 +9,10 @@ #ifndef __ASSEMBLY__ typedef struct { -#ifdef CONFIG_PA20 - volatile unsigned int slock; -# define __ARCH_SPIN_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED_VAL } -#else volatile unsigned int lock[4]; # define __ARCH_SPIN_LOCK_UNLOCKED \ { { __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL, \ __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL } } -#endif } arch_spinlock_t; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 4098f9a0964b..2019c1f04bd0 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -440,7 +440,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) if (cpu_online(cpu)) return 0; - if (num_online_cpus() < setup_max_cpus && smp_boot_one_cpu(cpu, tidle)) + if (num_online_cpus() < nr_cpu_ids && + num_online_cpus() < setup_max_cpus && + smp_boot_one_cpu(cpu, tidle)) return -EIO; return cpu_online(cpu) ? 0 : -EIO; diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 21f681ee535a..e6fe1d5731f2 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -94,6 +94,13 @@ static inline pte_t pte_wrprotect(pte_t pte) #define pte_wrprotect pte_wrprotect +static inline int pte_read(pte_t pte) +{ + return (pte_val(pte) & _PAGE_RO) != _PAGE_NA; +} + +#define pte_read pte_read + static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RO); diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 5cd9acf58a7d..eb6891e34cbd 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -197,7 +197,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if (pte_young(*ptep)) + if (!pte_young(*ptep)) return 0; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 56ea48276356..c721478c5934 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -25,7 +25,9 @@ static inline int pte_write(pte_t pte) return pte_val(pte) & _PAGE_RW; } #endif +#ifndef pte_read static inline int pte_read(pte_t pte) { return 1; } +#endif static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9692acb0361f..7eda33a24bb4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -137,8 +137,9 @@ ret_from_syscall: lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 - bne- 2f + bne- .L44x_icache_flush #endif /* CONFIG_PPC_47x */ +.L44x_icache_flush_return: kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -172,10 +173,11 @@ syscall_exit_finish: b 1b #ifdef CONFIG_44x -2: li r7,0 +.L44x_icache_flush: + li r7,0 iccci r0,r0 stw r7,icache_44x_need_flush@l(r4) - b 1b + b .L44x_icache_flush_return #endif /* CONFIG_44x */ .globl ret_from_fork diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 97e9ea0c7297..0f1641a31250 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -395,7 +395,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt) #endif /* System Call Interrupt */ diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index bae45b358a09..2b0cac6fb61f 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -184,9 +184,6 @@ _GLOBAL_TOC(plpar_hcall) plpar_hcall_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 @@ -196,7 +193,7 @@ plpar_hcall_trace: HVSC - ld r12,STK_PARAM(R4)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) std r4,0(r12) std r5,8(r12) std r6,16(r12) @@ -296,9 +293,6 @@ _GLOBAL_TOC(plpar_hcall9) plpar_hcall9_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1329e060c548..b43a6bb7e4dc 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -6,7 +6,6 @@ # for more details. # -OBJCOPYFLAGS := -O binary LDFLAGS_vmlinux := -z norelro ifeq ($(CONFIG_RELOCATABLE),y) LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs diff --git a/arch/riscv/errata/andes/Makefile b/arch/riscv/errata/andes/Makefile index 2d644e19caef..6278c389b801 100644 --- a/arch/riscv/errata/andes/Makefile +++ b/arch/riscv/errata/andes/Makefile @@ -1 +1,5 @@ +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +CFLAGS_errata.o := -mcmodel=medany +endif + obj-y += errata.o diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 740a979171e5..2b2f5df7ef2c 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -31,6 +31,27 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +/* + * Let's do like x86/arm64 and ignore the compat syscalls. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + return is_compat_task(); +} + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Since all syscall functions have __riscv_ prefix, we must skip it. + * However, as we described above, we decided to ignore compat + * syscalls, so we don't care about __riscv_compat_ prefix here. + */ + return !strcmp(sym + 8, name); +} + struct dyn_arch_ftrace { }; #endif diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index e7882ccb0fd4..78ea44f76718 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,6 +40,15 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool kprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool kprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} #endif /* CONFIG_KPROBES */ #endif /* _ASM_RISCV_KPROBES_H */ diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h index f2183e00fdd2..3fc7deda9190 100644 --- a/arch/riscv/include/asm/uprobes.h +++ b/arch/riscv/include/asm/uprobes.h @@ -34,7 +34,18 @@ struct arch_uprobe { bool simulate; }; +#ifdef CONFIG_UPROBES bool uprobe_breakpoint_handler(struct pt_regs *regs); bool uprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool uprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool uprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ #endif /* _ASM_RISCV_UPROBES_H */ diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index a8efa053c4a5..9cc0a7669271 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -60,7 +60,7 @@ static void init_irq_stacks(void) } #endif /* CONFIG_VMAP_STACK */ -#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { #ifdef CONFIG_IRQ_STACKS @@ -92,7 +92,7 @@ void do_softirq_own_stack(void) #endif __do_softirq(); } -#endif /* CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK */ +#endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */ #else static void init_irq_stacks(void) {} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e600aab116a4..aac853ae4eb7 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -173,19 +173,6 @@ static void __init init_resources(void) if (ret < 0) goto error; -#ifdef CONFIG_KEXEC_CORE - if (crashk_res.start != crashk_res.end) { - ret = add_resource(&iomem_resource, &crashk_res); - if (ret < 0) - goto error; - } - if (crashk_low_res.start != crashk_low_res.end) { - ret = add_resource(&iomem_resource, &crashk_low_res); - if (ret < 0) - goto error; - } -#endif - #ifdef CONFIG_CRASH_DUMP if (elfcorehdr_size > 0) { elfcorehdr_res.start = elfcorehdr_addr; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 180d951d3624..21a4d0e111bc 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -311,13 +311,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig, /* Align the stack frame. */ sp &= ~0xfUL; - /* - * Fail if the size of the altstack is not large enough for the - * sigframe construction. - */ - if (current->sas_ss_size && sp < current->sas_ss_sp) - return (void __user __force *)-1UL; - return (void __user *)sp; } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 19807c4d3805..fae8f610d867 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -13,6 +13,8 @@ #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/kprobes.h> +#include <linux/uprobes.h> +#include <asm/uprobes.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/irq.h> @@ -247,22 +249,28 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } +static bool probe_single_step_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_single_step_handler(regs) : kprobe_single_step_handler(regs); +} + +static bool probe_breakpoint_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_breakpoint_handler(regs) : kprobe_breakpoint_handler(regs); +} + void handle_break(struct pt_regs *regs) { -#ifdef CONFIG_KPROBES - if (kprobe_single_step_handler(regs)) + if (probe_single_step_handler(regs)) return; - if (kprobe_breakpoint_handler(regs)) - return; -#endif -#ifdef CONFIG_UPROBES - if (uprobe_single_step_handler(regs)) + if (probe_breakpoint_handler(regs)) return; - if (uprobe_breakpoint_handler(regs)) - return; -#endif current->thread.bad_cause = regs->cause; if (user_mode(regs)) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index ecd3ae6f4116..8581693e62d3 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -245,7 +245,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); /* Set return value. */ if (!is_tail_call) - emit_mv(RV_REG_A0, RV_REG_A5, ctx); + emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ ctx); @@ -759,8 +759,10 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; - if (save_ret) - emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx); + if (save_ret) { + emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); + } /* update branch with beqz */ if (ctx->insns) { @@ -853,7 +855,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); if (save_ret) { - stack_size += 8; + stack_size += 16; /* Save both A5 (BPF R0) and A0 */ retval_off = stack_size; } @@ -957,6 +959,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (ret) goto out; emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); im->ip_after_call = ctx->insns + ctx->ninsns; /* 2 nops reserved for auipc+jalr pair */ emit(rv_nop(), ctx); @@ -988,8 +991,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_args(nregs, args_off, ctx); - if (save_ret) + if (save_ret) { emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx); + emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx); + } emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); @@ -1515,7 +1520,8 @@ out_be: if (ret) return ret; - emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); + if (insn->src_reg != BPF_PSEUDO_CALL) + emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); break; } /* tail call */ diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index de2fb12120d2..e507692e51e7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2066,6 +2066,7 @@ struct bpf_tramp_jit { * func_addr's original caller */ int stack_size; /* Trampoline stack size */ + int backchain_off; /* Offset of backchain */ int stack_args_off; /* Offset of stack arguments for calling * func_addr, has to be at the top */ @@ -2086,9 +2087,10 @@ struct bpf_tramp_jit { * for __bpf_prog_enter() return value and * func_addr respectively */ - int r14_off; /* Offset of saved %r14 */ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */ int tccnt_off; /* Offset of saved tailcall counter */ + int r14_off; /* Offset of saved %r14, has to be at the + * bottom */ int do_fexit; /* do_fexit: label */ }; @@ -2247,8 +2249,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, * Calculate the stack layout. */ - /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */ + /* + * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x + * ABI requires, put our backchain at the end of the allocated memory. + */ tjit->stack_size = STACK_FRAME_OVERHEAD; + tjit->backchain_off = tjit->stack_size - sizeof(u64); tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64)); tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64)); tjit->ip_off = alloc_stack(tjit, sizeof(u64)); @@ -2256,16 +2262,25 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64)); tjit->retval_off = alloc_stack(tjit, sizeof(u64)); tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64)); - tjit->r14_off = alloc_stack(tjit, sizeof(u64)); tjit->run_ctx_off = alloc_stack(tjit, sizeof(struct bpf_tramp_run_ctx)); tjit->tccnt_off = alloc_stack(tjit, sizeof(u64)); - /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */ - tjit->stack_size -= STACK_FRAME_OVERHEAD; + tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2); + /* + * In accordance with the s390x ABI, the caller has allocated + * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's + * backchain, and the rest we can use. + */ + tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64); tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD; + /* lgr %r1,%r15 */ + EMIT4(0xb9040000, REG_1, REG_15); /* aghi %r15,-stack_size */ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size); + /* stg %r1,backchain_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15, + tjit->backchain_off); /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ _EMIT6(0xd203f000 | tjit->tccnt_off, 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); @@ -2513,7 +2528,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, return -E2BIG; } - return ret; + return tjit.common.prg; } bool bpf_jit_supports_subprog_tailcalls(void) diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c index 76b1f8bb0fd5..dab4ed199227 100644 --- a/arch/x86/events/utils.c +++ b/arch/x86/events/utils.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/insn.h> +#include <linux/mm.h> #include "perf_event.h" @@ -132,9 +133,9 @@ static int get_branch_type(unsigned long from, unsigned long to, int abort, * The LBR logs any address in the IP, even if the IP just * faulted. This means userspace can control the from address. * Ensure we don't blindly read any address by validating it is - * a known text address. + * a known text address and not a vsyscall address. */ - if (kernel_text_address(from)) { + if (kernel_text_address(from) && !in_gate_area_no_mm(from)) { addr = (void *)from; /* * Assume we can get the maximum possible size diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 783ed339f341..21556ad87f4b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -7,6 +7,8 @@ * Author : K. Y. Srinivasan <kys@microsoft.com> */ +#define pr_fmt(fmt) "Hyper-V: " fmt + #include <linux/efi.h> #include <linux/types.h> #include <linux/bitfield.h> @@ -191,7 +193,7 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; if (!hv_reenlightenment_available()) { - pr_warn("Hyper-V: reenlightenment support is unavailable\n"); + pr_warn("reenlightenment support is unavailable\n"); return; } @@ -394,6 +396,7 @@ static void __init hv_get_partition_id(void) local_irq_restore(flags); } +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) static u8 __init get_vtl(void) { u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; @@ -416,13 +419,16 @@ static u8 __init get_vtl(void) if (hv_result_success(ret)) { ret = output->as64.low & HV_X64_VTL_MASK; } else { - pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret); - ret = 0; + pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); + BUG(); } local_irq_restore(flags); return ret; } +#else +static inline u8 get_vtl(void) { return 0; } +#endif /* * This function is to be invoked early in the boot sequence after the @@ -564,7 +570,7 @@ skip_hypercall_pg_init: if (cpu_feature_enabled(X86_FEATURE_IBT) && *(u32 *)hv_hypercall_pg != gen_endbr()) { setup_clear_cpu_cap(X86_FEATURE_IBT); - pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n"); + pr_warn("Disabling IBT because of Hyper-V bug\n"); } #endif @@ -604,8 +610,10 @@ skip_hypercall_pg_init: hv_query_ext_cap(0); /* Find the VTL */ - if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) - ms_hyperv.vtl = get_vtl(); + ms_hyperv.vtl = get_vtl(); + + if (ms_hyperv.vtl > 0) /* non default VTL */ + hv_vtl_early_init(); return; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 36a562218010..999f5ac82fe9 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -215,7 +215,7 @@ static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip) return hv_vtl_bringup_vcpu(vp_id, start_eip); } -static int __init hv_vtl_early_init(void) +int __init hv_vtl_early_init(void) { /* * `boot_cpu_has` returns the runtime feature support, @@ -230,4 +230,3 @@ static int __init hv_vtl_early_init(void) return 0; } -early_initcall(hv_vtl_early_init); diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 3a233ebff712..25050d953eee 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -28,8 +28,6 @@ struct x86_cpu { }; #ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); extern void soft_restart_cpu(void); #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 033b53f993c6..896445edc6a8 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -340,8 +340,10 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } #ifdef CONFIG_HYPERV_VTL_MODE void __init hv_vtl_init_platform(void); +int __init hv_vtl_early_init(void); #else static inline void __init hv_vtl_init_platform(void) {} +static inline int __init hv_vtl_early_init(void) { return 0; } #endif #include <asm-generic/mshyperv.h> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1d111350197f..b37abb55e948 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -637,12 +637,17 @@ /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e -/* Fam 17h MSRs */ -#define MSR_F17H_IRPERF 0xc00000e9 +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +/* Zen 2 */ #define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ad98dd1d9cfb..c31c633419fe 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -bool smp_park_other_cpus_in_init(void); void smp_store_cpu_info(int id); asmlinkage __visible void smp_reboot_interrupt(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 517ee01503be..73be3931e4f0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -403,6 +403,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 insn_buff[MAX_PATCH_LEN]; DPRINTK(ALT, "alt table %px, -> %px", start, end); + + /* + * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using + * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here. + * During the process, KASAN becomes confused seeing partial LA57 + * conversion and triggers a false-positive out-of-bound report. + * + * Disable KASAN until the patching is complete. + */ + kasan_disable_current(); + /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -452,6 +463,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, text_poke_early(instr, insn_buff, insn_buff_sz); } + + kasan_enable_current(); } static inline bool is_jcc32(struct insn *insn) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 03ef962a6992..ece2b5b7b0fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -80,6 +80,10 @@ static const int amd_div0[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); +static const int amd_erratum_1485[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), + AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { int osvw_id = *erratum++; @@ -1149,6 +1153,10 @@ static void init_amd(struct cpuinfo_x86 *c) pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); } + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + cpu_has_amd_erratum(c, amd_erratum_1485)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index ded1fc7cb7cb..f136ac046851 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -30,15 +30,15 @@ struct rmid_entry { struct list_head list; }; -/** - * @rmid_free_lru A least recently used list of free RMIDs +/* + * @rmid_free_lru - A least recently used list of free RMIDs * These RMIDs are guaranteed to have an occupancy less than the * threshold occupancy */ static LIST_HEAD(rmid_free_lru); -/** - * @rmid_limbo_count count of currently unused but (potentially) +/* + * @rmid_limbo_count - count of currently unused but (potentially) * dirty RMIDs. * This counts RMIDs that no one is currently using but that * may have a occupancy value > resctrl_rmid_realloc_threshold. User can @@ -46,7 +46,7 @@ static LIST_HEAD(rmid_free_lru); */ static unsigned int rmid_limbo_count; -/** +/* * @rmid_entry - The entry in the limbo and free lists. */ static struct rmid_entry *rmid_ptrs; diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2eabccde94fb..dcf325b7b022 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -256,7 +256,7 @@ static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) return 0; } -static int sev_cpuid_hv(struct cpuid_leaf *leaf) +static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) { int ret; @@ -279,6 +279,45 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf) return ret; } +static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + u32 cr4 = native_read_cr4(); + int ret; + + ghcb_set_rax(ghcb, leaf->fn); + ghcb_set_rcx(ghcb, leaf->subfn); + + if (cr4 & X86_CR4_OSXSAVE) + /* Safe to read xcr0 */ + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); + else + /* xgetbv will cause #UD - use reset value for xcr0 */ + ghcb_set_xcr0(ghcb, 1); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && + ghcb_rbx_is_valid(ghcb) && + ghcb_rcx_is_valid(ghcb) && + ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + leaf->eax = ghcb->save.rax; + leaf->ebx = ghcb->save.rbx; + leaf->ecx = ghcb->save.rcx; + leaf->edx = ghcb->save.rdx; + + return ES_OK; +} + +static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) + : __sev_cpuid_hv_msr(leaf); +} + /* * This may be called early while still running on the initial identity * mapping. Use RIP-relative addressing to obtain the correct address @@ -388,19 +427,20 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) return false; } -static void snp_cpuid_hv(struct cpuid_leaf *leaf) +static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { - if (sev_cpuid_hv(leaf)) + if (sev_cpuid_hv(ghcb, ctxt, leaf)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); } -static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) +static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + struct cpuid_leaf *leaf) { struct cpuid_leaf leaf_hv = *leaf; switch (leaf->fn) { case 0x1: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* initial APIC ID */ leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); @@ -419,7 +459,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) break; case 0xB: leaf_hv.subfn = 0; - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->edx = leaf_hv.edx; @@ -467,7 +507,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) } break; case 0x8000001E: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->eax = leaf_hv.eax; @@ -488,7 +528,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct cpuid_leaf *leaf) +static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -522,7 +562,7 @@ static int snp_cpuid(struct cpuid_leaf *leaf) return 0; } - return snp_cpuid_postprocess(leaf); + return snp_cpuid_postprocess(ghcb, ctxt, leaf); } /* @@ -544,14 +584,14 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) leaf.fn = fn; leaf.subfn = subfn; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(NULL, NULL, &leaf); if (!ret) goto cpuid_done; if (ret != -EOPNOTSUPP) goto fail; - if (sev_cpuid_hv(&leaf)) + if (__sev_cpuid_hv_msr(&leaf)) goto fail; cpuid_done: @@ -848,14 +888,15 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } -static int vc_handle_cpuid_snp(struct pt_regs *regs) +static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { + struct pt_regs *regs = ctxt->regs; struct cpuid_leaf leaf; int ret; leaf.fn = regs->ax; leaf.subfn = regs->cx; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(ghcb, ctxt, &leaf); if (!ret) { regs->ax = leaf.eax; regs->bx = leaf.ebx; @@ -874,7 +915,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, enum es_result ret; int snp_cpuid_ret; - snp_cpuid_ret = vc_handle_cpuid_snp(regs); + snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt); if (!snp_cpuid_ret) return ES_OK; if (snp_cpuid_ret != -EOPNOTSUPP) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 2787826d9f60..d8c1e3be74c0 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -868,8 +868,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned long npages) void snp_accept_memory(phys_addr_t start, phys_addr_t end) { - unsigned long vaddr; - unsigned int npages; + unsigned long vaddr, npages; if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return; diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 6eb06d001bcc..96a771f9f930 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) } /* - * Disable virtualization, APIC etc. and park the CPU in a HLT loop + * this function calls the 'stop' function on all other CPUs in the system. */ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { @@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait) * 2) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * 3) If the system uses INIT/STARTUP for CPU bringup, then - * send all present CPUs an INIT vector, which brings them - * completely out of the way. + * 3) If #2 timed out send an NMI to the CPUs which did not + * yet report * - * 4) If #3 is not possible and #2 timed out send an NMI to the - * CPUs which did not yet report - * - * 5) Wait for all other CPUs to report that they reached the + * 4) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * #4 can obviously race against a CPU reaching the HLT loop late. + * #3 can obviously race against a CPU reaching the HLT loop late. * That CPU will have reported already and the "have all CPUs * reached HLT" condition will be true despite the fact that the * other CPU is still handling the NMI. Again, there is no @@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait) /* * Don't wait longer than a second for IPI completion. The * wait request is not checked here because that would - * prevent an NMI/INIT shutdown in case that not all + * prevent an NMI shutdown attempt in case that not all * CPUs reach shutdown state. */ timeout = USEC_PER_SEC; @@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait) udelay(1); } - /* - * Park all other CPUs in INIT including "offline" CPUs, if - * possible. That's a safe place where they can't resume execution - * of HLT and then execute the HLT loop from overwritten text or - * page tables. - * - * The only downside is a broadcast MCE, but up to the point where - * the kexec() kernel brought all APs online again an MCE will just - * make HLT resume and handle the MCE. The machine crashes and burns - * due to overwritten text, page tables and data. So there is a - * choice between fire and frying pan. The result is pretty much - * the same. Chose frying pan until x86 provides a sane mechanism - * to park a CPU. - */ - if (smp_park_other_cpus_in_init()) - goto done; - - /* - * If park with INIT was not possible and the REBOOT_VECTOR didn't - * take all secondary CPUs offline, try with the NMI. - */ + /* if the REBOOT_VECTOR didn't work, try with the NMI */ if (!cpumask_empty(&cpus_stop_mask)) { /* * If NMI IPI is enabled, try to register the stop handler @@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait) udelay(1); } -done: local_irq_save(flags); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 48e040618731..2a187c0cbd5b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void) cache_aps_init(); } -bool smp_park_other_cpus_in_init(void) -{ - unsigned int cpu, this_cpu = smp_processor_id(); - unsigned int apicid; - - if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) - return false; - - /* - * If this is a crash stop which does not execute on the boot CPU, - * then this cannot use the INIT mechanism because INIT to the boot - * CPU will reset the machine. - */ - if (this_cpu) - return false; - - for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) { - if (cpu == this_cpu) - continue; - apicid = apic->cpu_present_to_apicid(cpu); - if (apicid == BAD_APICID) - continue; - send_init_sequence(apicid); - } - return true; -} - /* * Early setup to make printk work. */ diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index ca004e2e4469..0bab03130033 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -54,7 +54,7 @@ void arch_unregister_cpu(int num) EXPORT_SYMBOL(arch_unregister_cpu); #else /* CONFIG_HOTPLUG_CPU */ -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } |