From 989b5cfaa7b6054f4e1bde914470ee091c23e6a5 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Tue, 9 Jul 2024 08:40:46 -0700 Subject: x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param() Depending on whether FRED is enabled, sysvec_install() installs a system interrupt handler into either into FRED's system vector dispatch table or into the IDT. However FRED can be disabled later in trap_init(), after sysvec_install() has been invoked already; e.g., the HYPERVISOR_CALLBACK_VECTOR handler is registered with sysvec_install() in kvm_guest_init(), which is called in setup_arch() but way before trap_init(). IOW, there is a gap between FRED is available and available but disabled. As a result, when FRED is available but disabled, early sysvec_install() invocations fail to install the IDT handler resulting in spurious interrupts. Fix it by parsing cmdline param "fred=" in cpu_parse_early_param() to ensure that FRED is disabled before the first sysvec_install() incovations. Fixes: 3810da12710a ("x86/fred: Add a fred= cmdline param") Reported-by: Hou Wenlong Suggested-by: Thomas Gleixner Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240709154048.3543361-2-xin@zytor.com --- arch/x86/kernel/cpu/common.c | 5 +++++ arch/x86/kernel/traps.c | 26 -------------------------- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d4e539d4e158..10a5402d8297 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void) if (cmdline_find_option_bool(boot_command_line, "nousershstk")) setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK); + /* Minimize the gap between FRED is available and available but disabled. */ + arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg)); + if (arglen != 2 || strncmp(arg, "on", 2)) + setup_clear_cpu_cap(X86_FEATURE_FRED); + arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); if (arglen <= 0) return; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4fa0b17e5043..6afb41e6cbbb 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1402,34 +1402,8 @@ DEFINE_IDTENTRY_SW(iret_error) } #endif -/* Do not enable FRED by default yet. */ -static bool enable_fred __ro_after_init = false; - -#ifdef CONFIG_X86_FRED -static int __init fred_setup(char *str) -{ - if (!str) - return -EINVAL; - - if (!cpu_feature_enabled(X86_FEATURE_FRED)) - return 0; - - if (!strcmp(str, "on")) - enable_fred = true; - else if (!strcmp(str, "off")) - enable_fred = false; - else - pr_warn("invalid FRED option: 'fred=%s'\n", str); - return 0; -} -early_param("fred", fred_setup); -#endif - void __init trap_init(void) { - if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred) - setup_clear_cpu_cap(X86_FEATURE_FRED); - /* Init cpu_entry_area before IST entries are set up */ setup_cpu_entry_areas(); -- cgit From 73270c1f2369fb37683121ebe097cd37172602b6 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Tue, 9 Jul 2024 08:40:47 -0700 Subject: x86/fred: Move FRED RSP initialization into a separate function To enable FRED earlier, move the RSP initialization out of cpu_init_fred_exceptions() into cpu_init_fred_rsps(). This is required as the FRED RSP initialization depends on the availability of the CPU entry areas which are set up late in trap_init(), No functional change intended. Marked with Fixes as it's a depedency for the real fix. Fixes: 14619d912b65 ("x86/fred: FRED entry/exit and dispatch code") Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240709154048.3543361-3-xin@zytor.com --- arch/x86/include/asm/fred.h | 2 ++ arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kernel/fred.c | 28 +++++++++++++++++++--------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index e86c7ba32435..66d7dbe2d314 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -84,11 +84,13 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int } void cpu_init_fred_exceptions(void); +void cpu_init_fred_rsps(void); void fred_complete_exception_setup(void); #else /* CONFIG_X86_FRED */ static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; } static inline void cpu_init_fred_exceptions(void) { } +static inline void cpu_init_fred_rsps(void) { } static inline void fred_complete_exception_setup(void) { } static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { } #endif /* CONFIG_X86_FRED */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 10a5402d8297..6de12b3c1b04 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2195,10 +2195,12 @@ void cpu_init_exception_handling(void) /* GHCB needs to be setup to handle #VC. */ setup_ghcb(); - if (cpu_feature_enabled(X86_FEATURE_FRED)) + if (cpu_feature_enabled(X86_FEATURE_FRED)) { cpu_init_fred_exceptions(); - else + cpu_init_fred_rsps(); + } else { load_current_idt(); + } } /* diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c index 4bcd8791ad96..99a134fcd5bf 100644 --- a/arch/x86/kernel/fred.c +++ b/arch/x86/kernel/fred.c @@ -32,6 +32,25 @@ void cpu_init_fred_exceptions(void) FRED_CONFIG_INT_STKLVL(0) | FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user)); + wrmsrl(MSR_IA32_FRED_STKLVLS, 0); + wrmsrl(MSR_IA32_FRED_RSP0, 0); + wrmsrl(MSR_IA32_FRED_RSP1, 0); + wrmsrl(MSR_IA32_FRED_RSP2, 0); + wrmsrl(MSR_IA32_FRED_RSP3, 0); + + /* Enable FRED */ + cr4_set_bits(X86_CR4_FRED); + /* Any further IDT use is a bug */ + idt_invalidate(); + + /* Use int $0x80 for 32-bit system calls in FRED mode */ + setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); +} + +/* Must be called after setup_cpu_entry_areas() */ +void cpu_init_fred_rsps(void) +{ /* * The purpose of separate stacks for NMI, #DB and #MC *in the kernel* * (remember that user space faults are always taken on stack level 0) @@ -47,13 +66,4 @@ void cpu_init_fred_exceptions(void) wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB)); wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI)); wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF)); - - /* Enable FRED */ - cr4_set_bits(X86_CR4_FRED); - /* Any further IDT use is a bug */ - idt_invalidate(); - - /* Use int $0x80 for 32-bit system calls in FRED mode */ - setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); - setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } -- cgit From a97756cbec448032f84b5bbfe4e101478d1e01e0 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Tue, 9 Jul 2024 08:40:48 -0700 Subject: x86/fred: Enable FRED right after init_mem_mapping() On 64-bit init_mem_mapping() relies on the minimal page fault handler provided by the early IDT mechanism. The real page fault handler is installed right afterwards into the IDT. This is problematic on CPUs which have X86_FEATURE_FRED set because the real page fault handler retrieves the faulting address from the FRED exception stack frame and not from CR2, but that does obviously not work when FRED is not yet enabled in the CPU. To prevent this enable FRED right after init_mem_mapping() without interrupt stacks. Those are enabled later in trap_init() after the CPU entry area is set up. [ tglx: Encapsulate the FRED details ] Fixes: 14619d912b65 ("x86/fred: FRED entry/exit and dispatch code") Reported-by: Hou Wenlong Suggested-by: Thomas Gleixner Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240709154048.3543361-4-xin@zytor.com --- arch/x86/include/asm/processor.h | 3 ++- arch/x86/kernel/cpu/common.c | 15 +++++++++++++-- arch/x86/kernel/setup.c | 7 ++++++- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/traps.c | 2 +- 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a75a07f4931f..399f7d1c4c61 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -582,7 +582,8 @@ extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void cpu_init(void); -extern void cpu_init_exception_handling(void); +extern void cpu_init_exception_handling(bool boot_cpu); +extern void cpu_init_replace_early_idt(void); extern void cr4_init(void); extern void set_task_blockstep(struct task_struct *task, bool on); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6de12b3c1b04..a4735d9b5a1d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2176,7 +2176,7 @@ static inline void tss_setup_io_bitmap(struct tss_struct *tss) * Setup everything needed to handle exceptions from the IDT, including the IST * exceptions which use paranoid_entry(). */ -void cpu_init_exception_handling(void) +void cpu_init_exception_handling(bool boot_cpu) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); int cpu = raw_smp_processor_id(); @@ -2196,13 +2196,24 @@ void cpu_init_exception_handling(void) setup_ghcb(); if (cpu_feature_enabled(X86_FEATURE_FRED)) { - cpu_init_fred_exceptions(); + /* The boot CPU has enabled FRED during early boot */ + if (!boot_cpu) + cpu_init_fred_exceptions(); + cpu_init_fred_rsps(); } else { load_current_idt(); } } +void __init cpu_init_replace_early_idt(void) +{ + if (cpu_feature_enabled(X86_FEATURE_FRED)) + cpu_init_fred_exceptions(); + else + idt_setup_early_pf(); +} + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT. We diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6129dc2ba784..f1fea506e20f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1039,7 +1039,12 @@ void __init setup_arch(char **cmdline_p) init_mem_mapping(); - idt_setup_early_pf(); + /* + * init_mem_mapping() relies on the early IDT page fault handling. + * Now either enable FRED or install the real page fault handler + * for 64-bit in the IDT. + */ + cpu_init_replace_early_idt(); /* * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0c35207320cb..dc4fff8fccce 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -246,7 +246,7 @@ static void notrace start_secondary(void *unused) __flush_tlb_all(); } - cpu_init_exception_handling(); + cpu_init_exception_handling(false); /* * Load the microcode before reaching the AP alive synchronization diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6afb41e6cbbb..197d5888b0e2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1411,7 +1411,7 @@ void __init trap_init(void) sev_es_init_vc_handling(); /* Initialize TSS before setting up traps so ISTs work */ - cpu_init_exception_handling(); + cpu_init_exception_handling(true); /* Setup traps as cpu_init() might #GP */ if (!cpu_feature_enabled(X86_FEATURE_FRED)) -- cgit From 723edbd2ca5fb4c78ac4a5644511c63895fd1c57 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Fri, 16 Aug 2024 03:43:16 -0700 Subject: x86/fred: Set SS to __KERNEL_DS when enabling FRED SS is initialized to NULL during boot time and not explicitly set to __KERNEL_DS. With FRED enabled, if a kernel event is delivered before a CPU goes to user level for the first time, its SS is NULL thus NULL is pushed into the SS field of the FRED stack frame. But before ERETS is executed, the CPU may context switch to another task and go to user level. Then when the CPU comes back to kernel mode, SS is changed to __KERNEL_DS. Later when ERETS is executed to return from the kernel event handler, a #GP fault is generated because SS doesn't match the SS saved in the FRED stack frame. Initialize SS to __KERNEL_DS when enabling FRED to prevent that. Note, IRET doesn't check if SS matches the SS saved in its stack frame, thus IDT doesn't have this problem. For IDT it doesn't matter whether SS is set to __KERNEL_DS or not, because it's set to NULL upon interrupt or exception delivery and __KERNEL_DS upon SYSCALL. Thus it's pointless to initialize SS for IDT. Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240816104316.2276968-1-xin@zytor.com --- arch/x86/kernel/fred.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c index 99a134fcd5bf..266c69e332a4 100644 --- a/arch/x86/kernel/fred.c +++ b/arch/x86/kernel/fred.c @@ -26,6 +26,20 @@ void cpu_init_fred_exceptions(void) /* When FRED is enabled by default, remove this log message */ pr_info("Initialize FRED on CPU%d\n", smp_processor_id()); + /* + * If a kernel event is delivered before a CPU goes to user level for + * the first time, its SS is NULL thus NULL is pushed into the SS field + * of the FRED stack frame. But before ERETS is executed, the CPU may + * context switch to another task and go to user level. Then when the + * CPU comes back to kernel mode, SS is changed to __KERNEL_DS. Later + * when ERETS is executed to return from the kernel event handler, a #GP + * fault is generated because SS doesn't match the SS saved in the FRED + * stack frame. + * + * Initialize SS to __KERNEL_DS when enabling FRED to avoid such #GPs. + */ + loadsegment(ss, __KERNEL_DS); + wrmsrl(MSR_IA32_FRED_CONFIG, /* Reserve for CALL emulation */ FRED_CONFIG_REDZONE | -- cgit From 0dfac6f267fa091aa348c6a6742b463c9e7c98e3 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Thu, 22 Aug 2024 00:39:04 -0700 Subject: x86/entry: Test ti_work for zero before processing individual bits In most cases, ti_work values passed to arch_exit_to_user_mode_prepare() are zeros, e.g., 99% in kernel build tests. So an obvious optimization is to test ti_work for zero before processing individual bits in it. Omit the optimization when FPU debugging is enabled, otherwise the FPU consistency check is never executed. Intel 0day tests did not find a perfermance regression with this change. Suggested-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240822073906.2176342-2-xin@zytor.com --- arch/x86/include/asm/entry-common.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index fb2809b20b0a..db970828f385 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -44,8 +44,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) } #define arch_enter_from_user_mode arch_enter_from_user_mode -static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, - unsigned long ti_work) +static inline void arch_exit_work(unsigned long ti_work) { if (ti_work & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); @@ -56,6 +55,13 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, fpregs_assert_state_consistent(); if (unlikely(ti_work & _TIF_NEED_FPU_LOAD)) switch_fpu_return(); +} + +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work)) + arch_exit_work(ti_work); #ifdef CONFIG_COMPAT /* -- cgit From efe508816d2caf83536ff2f308e09043380fb2b7 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Thu, 22 Aug 2024 00:39:05 -0700 Subject: x86/msr: Switch between WRMSRNS and WRMSR with the alternatives mechanism Per the discussion about FRED MSR writes with WRMSRNS instruction [1], use the alternatives mechanism to choose WRMSRNS when it's available, otherwise fallback to WRMSR. Remove the dependency on X86_FEATURE_WRMSRNS as WRMSRNS is no longer dependent on FRED. [1] https://lore.kernel.org/lkml/15f56e6a-6edd-43d0-8e83-bb6430096514@citrix.com/ Use DS prefix to pad WRMSR instead of a NOP. The prefix is ignored. At least that's the current information from the hardware folks. Signed-off-by: Andrew Cooper Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240822073906.2176342-3-xin@zytor.com --- arch/x86/include/asm/msr.h | 25 +++++++++++-------------- arch/x86/include/asm/switch_to.h | 1 - arch/x86/kernel/cpu/cpuid-deps.c | 1 - 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index d642037f9ed5..001853541f1e 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -99,19 +99,6 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) : : "c" (msr), "a"(low), "d" (high) : "memory"); } -/* - * WRMSRNS behaves exactly like WRMSR with the only difference being - * that it is not a serializing instruction by default. - */ -static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high) -{ - /* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */ - asm volatile("1: .byte 0x0f,0x01,0xc6\n" - "2:\n" - _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) - : : "c" (msr), "a"(low), "d" (high)); -} - #define native_rdmsr(msr, val1, val2) \ do { \ u64 __val = __rdmsr((msr)); \ @@ -312,9 +299,19 @@ do { \ #endif /* !CONFIG_PARAVIRT_XXL */ +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ +#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) + +/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */ static __always_inline void wrmsrns(u32 msr, u64 val) { - __wrmsrns(msr, val, val >> 32); + /* + * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant + * DS prefix to avoid a trailing NOP. + */ + asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS) + "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) + : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32))); } /* diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index c3bd0c0758c9..e9ded149a9e3 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -71,7 +71,6 @@ static inline void update_task_stack(struct task_struct *task) this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); #else if (cpu_feature_enabled(X86_FEATURE_FRED)) { - /* WRMSRNS is a baseline feature for FRED. */ wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE); } else if (cpu_feature_enabled(X86_FEATURE_XENPV)) { /* Xen PV enters the kernel on the thread stack. */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index b7d9f530ae16..8bd84114c2d9 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -83,7 +83,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD }, { X86_FEATURE_SHSTK, X86_FEATURE_XSAVES }, { X86_FEATURE_FRED, X86_FEATURE_LKGS }, - { X86_FEATURE_FRED, X86_FEATURE_WRMSRNS }, {} }; -- cgit From fe85ee391966c4cf3bfe1c405314e894c951f521 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Thu, 22 Aug 2024 00:39:06 -0700 Subject: x86/entry: Set FRED RSP0 on return to userspace instead of context switch The FRED RSP0 MSR points to the top of the kernel stack for user level event delivery. As this is the task stack it needs to be updated when a task is scheduled in. The update is done at context switch. That means it's also done when switching to kernel threads, which is pointless as those never go out to user space. For KVM threads this means there are two writes to FRED_RSP0 as KVM has to switch to the guest value before VMENTER. Defer the update to the exit to user space path and cache the per CPU FRED_RSP0 value, so redundant writes can be avoided. Provide fred_sync_rsp0() for KVM to keep the cache in sync with the actual MSR value after returning from guest to host mode. [ tglx: Massage change log ] Suggested-by: Sean Christopherson Suggested-by: Thomas Gleixner Signed-off-by: Xin Li (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240822073906.2176342-4-xin@zytor.com --- arch/x86/include/asm/entry-common.h | 3 +++ arch/x86/include/asm/fred.h | 21 ++++++++++++++++++++- arch/x86/include/asm/switch_to.h | 5 +---- arch/x86/kernel/fred.c | 3 +++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index db970828f385..77d20555e04d 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -8,6 +8,7 @@ #include #include #include +#include /* Check that the stack and regs on entry from user mode are sane. */ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) @@ -63,6 +64,8 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work)) arch_exit_work(ti_work); + fred_update_rsp0(); + #ifdef CONFIG_COMPAT /* * Compat syscalls set TS_COMPAT. Make sure we clear it before diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index 66d7dbe2d314..25ca00bd70e8 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -36,6 +36,7 @@ #ifdef CONFIG_X86_FRED #include +#include #include @@ -87,12 +88,30 @@ void cpu_init_fred_exceptions(void); void cpu_init_fred_rsps(void); void fred_complete_exception_setup(void); +DECLARE_PER_CPU(unsigned long, fred_rsp0); + +static __always_inline void fred_sync_rsp0(unsigned long rsp0) +{ + __this_cpu_write(fred_rsp0, rsp0); +} + +static __always_inline void fred_update_rsp0(void) +{ + unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE; + + if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) { + wrmsrns(MSR_IA32_FRED_RSP0, rsp0); + __this_cpu_write(fred_rsp0, rsp0); + } +} #else /* CONFIG_X86_FRED */ static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; } static inline void cpu_init_fred_exceptions(void) { } static inline void cpu_init_fred_rsps(void) { } static inline void fred_complete_exception_setup(void) { } -static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { } +static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { } +static inline void fred_sync_rsp0(unsigned long rsp0) { } +static inline void fred_update_rsp0(void) { } #endif /* CONFIG_X86_FRED */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index e9ded149a9e3..75248546403d 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -70,12 +70,9 @@ static inline void update_task_stack(struct task_struct *task) #ifdef CONFIG_X86_32 this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); #else - if (cpu_feature_enabled(X86_FEATURE_FRED)) { - wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE); - } else if (cpu_feature_enabled(X86_FEATURE_XENPV)) { + if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV)) /* Xen PV enters the kernel on the thread stack. */ load_sp0(task_top_of_stack(task)); - } #endif } diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c index 266c69e332a4..8d32c3f48abc 100644 --- a/arch/x86/kernel/fred.c +++ b/arch/x86/kernel/fred.c @@ -21,6 +21,9 @@ #define FRED_STKLVL(vector, lvl) ((lvl) << (2 * (vector))) +DEFINE_PER_CPU(unsigned long, fred_rsp0); +EXPORT_PER_CPU_SYMBOL(fred_rsp0); + void cpu_init_fred_exceptions(void) { /* When FRED is enabled by default, remove this log message */ -- cgit