diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/entry/calling.h | 40 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 132 | ||||
-rw-r--r-- | arch/x86/include/asm/fsgsbase.h | 45 | ||||
-rw-r--r-- | arch/x86/include/asm/inst.h | 15 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/hwcap2.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 119 |
7 files changed, 50 insertions, 326 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index d3fbe2dc03ea..efb0d1b1f15f 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -6,7 +6,6 @@ #include <asm/percpu.h> #include <asm/asm-offsets.h> #include <asm/processor-flags.h> -#include <asm/inst.h> /* @@ -338,12 +337,6 @@ For 32-bit we have the following conventions - kernel is built with #endif .endm -.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req - rdgsbase \save_reg - GET_PERCPU_BASE \scratch_reg - wrgsbase \scratch_reg -.endm - #endif /* CONFIG_X86_64 */ .macro STACKLEAK_ERASE @@ -352,39 +345,6 @@ For 32-bit we have the following conventions - kernel is built with #endif .endm -#ifdef CONFIG_SMP - -/* - * CPU/node NR is loaded from the limit (size) field of a special segment - * descriptor entry in GDT. - */ -.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req - movq $__CPUNODE_SEG, \reg - lsl \reg, \reg -.endm - -/* - * Fetch the per-CPU GSBASE value for this processor and put it in @reg. - * We normally use %gs for accessing per-CPU data, but we are setting up - * %gs here and obviously can not use %gs itself to access per-CPU data. - */ -.macro GET_PERCPU_BASE reg:req - ALTERNATIVE \ - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \ - "RDPID \reg", \ - X86_FEATURE_RDPID - andq $VDSO_CPUNODE_MASK, \reg - movq __per_cpu_offset(, \reg, 8), \reg -.endm - -#else - -.macro GET_PERCPU_BASE reg:req - movq pcpu_unit_offsets(%rip), \reg -.endm - -#endif /* CONFIG_SMP */ - /* * This does 'call enter_from_user_mode' unless we can avoid it based on * kernel config or using the static jump infrastructure. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 670306f588bf..3b7a0e8d3bc0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -38,7 +38,6 @@ #include <asm/export.h> #include <asm/frame.h> #include <asm/nospec-branch.h> -#include <asm/fsgsbase.h> #include <linux/err.h> #include "calling.h" @@ -948,6 +947,7 @@ ENTRY(\sym) addq $\ist_offset, CPU_TSS_IST(\shift_ist) .endif + /* these procedures expect "no swapgs" flag in ebx */ .if \paranoid jmp paranoid_exit .else @@ -1164,21 +1164,24 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* - * Save all registers in pt_regs. Return GSBASE related information - * in EBX depending on the availability of the FSGSBASE instructions: - * - * FSGSBASE R/EBX - * N 0 -> SWAPGS on exit - * 1 -> no SWAPGS on exit - * - * Y GSBASE value at entry, must be restored in paranoid_exit + * Save all registers in pt_regs, and switch gs if needed. + * Use slow, but surefire "are we in kernel?" check. + * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) UNWIND_HINT_FUNC cld PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 + movl $1, %ebx + movl $MSR_GS_BASE, %ecx + rdmsr + testl %edx, %edx + js 1f /* negative -> in kernel */ + SWAPGS + xorl %ebx, %ebx +1: /* * Always stash CR3 in %r14. This value will be restored, * verbatim, at exit. Needed if paranoid_entry interrupted @@ -1188,49 +1191,9 @@ ENTRY(paranoid_entry) * This is also why CS (stashed in the "iret frame" by the * hardware at entry) can not be used: this may be a return * to kernel code, but with a user CR3 value. - * - * Switching CR3 does not depend on kernel GSBASE so it can - * be done before switching to the kernel GSBASE. This is - * required for FSGSBASE because the kernel GSBASE has to - * be retrieved from a kernel internal table. */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 - /* - * Handling GSBASE depends on the availability of FSGSBASE. - * - * Without FSGSBASE the kernel enforces that negative GSBASE - * values indicate kernel GSBASE. With FSGSBASE no assumptions - * can be made about the GSBASE value when entering from user - * space. - */ - ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE - - /* - * Read the current GSBASE and store it in in %rbx unconditionally, - * retrieve and set the current CPUs kernel GSBASE. The stored value - * has to be restored in paranoid_exit unconditionally. - */ - SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - ret - -.Lparanoid_entry_checkgs: - /* EBX = 1 -> kernel GSBASE active, no restore required */ - movl $1, %ebx - /* - * The kernel-enforced convention is a negative GSBASE indicates - * a kernel value. No SWAPGS needed on entry and exit. - */ - movl $MSR_GS_BASE, %ecx - rdmsr - testl %edx, %edx - jns .Lparanoid_entry_swapgs - ret - -.Lparanoid_entry_swapgs: - SWAPGS - /* EBX = 0 -> SWAPGS required on exit */ - xorl %ebx, %ebx ret END(paranoid_entry) @@ -1241,48 +1204,28 @@ END(paranoid_entry) * * We may be returning to very strange contexts (e.g. very early * in syscall entry), so checking for preemption here would - * be complicated. Fortunately, there's no good reason to try - * to handle preemption here. - * - * R/EBX contains the GSBASE related information depending on the - * availability of the FSGSBASE instructions: + * be complicated. Fortunately, we there's no good reason + * to try to handle preemption here. * - * FSGSBASE R/EBX - * N 0 -> SWAPGS on exit - * 1 -> no SWAPGS on exit - * - * Y User space GSBASE, must be restored unconditionally + * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(paranoid_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) - - /* - * The order of operations is important. IRQ tracing requires - * kernel GSBASE and CR3. RESTORE_CR3 requires kernel GS base. - * - * NB to anyone to tries to optimize this code: this code does - * not execute at all for exceptions coming from user mode. Those - * exceptions go through error_exit instead. - */ - TRACE_IRQS_IRETQ_DEBUG - RESTORE_CR3 scratch_reg=%rax save_reg=%r14 - - /* Handle the three GSBASE cases. */ - ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE - - /* With FSGSBASE enabled, unconditionally restore GSBASE */ - wrgsbase %rbx - jmp restore_regs_and_return_to_kernel - -.Lparanoid_exit_checkgs: - /* On non-FSGSBASE systems, conditionally do SWAPGS */ - testl %ebx, %ebx - jnz restore_regs_and_return_to_kernel - - /* We are returning to a context with user GSBASE. */ + TRACE_IRQS_OFF_DEBUG + testl %ebx, %ebx /* swapgs needed? */ + jnz .Lparanoid_exit_no_swapgs + TRACE_IRQS_IRETQ + /* Always restore stashed CR3 value (see paranoid_entry) */ + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK - jmp restore_regs_and_return_to_kernel + jmp .Lparanoid_exit_restore +.Lparanoid_exit_no_swapgs: + TRACE_IRQS_IRETQ_DEBUG + /* Always restore stashed CR3 value (see paranoid_entry) */ + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 +.Lparanoid_exit_restore: + jmp restore_regs_and_return_to_kernel END(paranoid_exit) /* @@ -1693,27 +1636,10 @@ end_repeat_nmi: /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 - /* - * The above invocation of paranoid_entry stored the GSBASE - * related information in R/EBX depending on the availability - * of FSGSBASE. - * - * If FSGSBASE is enabled, restore the saved GSBASE value - * unconditionally, otherwise take the conditional SWAPGS path. - */ - ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE - - wrgsbase %rbx - jmp nmi_restore - -nmi_no_fsgsbase: - /* EBX == 0 -> invoke SWAPGS */ - testl %ebx, %ebx + testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore - nmi_swapgs: SWAPGS_UNSAFE_STACK - nmi_restore: POP_REGS diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h index aefd53767a5d..bca4c743de77 100644 --- a/arch/x86/include/asm/fsgsbase.h +++ b/arch/x86/include/asm/fsgsbase.h @@ -19,63 +19,36 @@ extern unsigned long x86_gsbase_read_task(struct task_struct *task); extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); -/* Must be protected by X86_FEATURE_FSGSBASE check. */ +/* Helper functions for reading/writing FS/GS base */ -static __always_inline unsigned long rdfsbase(void) +static inline unsigned long x86_fsbase_read_cpu(void) { unsigned long fsbase; - asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory"); + rdmsrl(MSR_FS_BASE, fsbase); return fsbase; } -static __always_inline unsigned long rdgsbase(void) +static inline unsigned long x86_gsbase_read_cpu_inactive(void) { unsigned long gsbase; - asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory"); + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); return gsbase; } -static __always_inline void wrfsbase(unsigned long fsbase) -{ - asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); -} - -static __always_inline void wrgsbase(unsigned long gsbase) -{ - asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); -} - -#include <asm/cpufeature.h> - -/* Helper functions for reading/writing FS/GS base */ - -static inline unsigned long x86_fsbase_read_cpu(void) +static inline void x86_fsbase_write_cpu(unsigned long fsbase) { - unsigned long fsbase; - - if (static_cpu_has(X86_FEATURE_FSGSBASE)) - fsbase = rdfsbase(); - else - rdmsrl(MSR_FS_BASE, fsbase); - - return fsbase; + wrmsrl(MSR_FS_BASE, fsbase); } -static inline void x86_fsbase_write_cpu(unsigned long fsbase) +static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase) { - if (static_cpu_has(X86_FEATURE_FSGSBASE)) - wrfsbase(fsbase); - else - wrmsrl(MSR_FS_BASE, fsbase); + wrmsrl(MSR_KERNEL_GS_BASE, gsbase); } -extern unsigned long x86_gsbase_read_cpu_inactive(void); -extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase); - #endif /* CONFIG_X86_64 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index d063841a17e3..f5a796da07f8 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -306,21 +306,6 @@ .endif MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 .endm - -.macro RDPID opd - REG_TYPE rdpid_opd_type \opd - .if rdpid_opd_type == REG_TYPE_R64 - R64_NUM rdpid_opd \opd - .else - R32_NUM rdpid_opd \opd - .endif - .byte 0xf3 - .if rdpid_opd > 7 - PFX_REX rdpid_opd 0 - .endif - .byte 0x0f, 0xc7 - MODRM 0xc0 rdpid_opd 0x7 -.endm #endif #endif diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h index c5ce54e749f6..6ebaae90e207 100644 --- a/arch/x86/include/uapi/asm/hwcap2.h +++ b/arch/x86/include/uapi/asm/hwcap2.h @@ -5,7 +5,4 @@ /* MONITOR/MWAIT enabled in Ring 3 */ #define HWCAP2_RING3MWAIT (1 << 0) -/* Kernel allows FSGSBASE instructions available in Ring 3 */ -#define HWCAP2_FSGSBASE BIT(1) - #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 637c9117d5ae..dad20bc891d5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -366,22 +366,6 @@ out: cr4_clear_bits(X86_CR4_UMIP); } -static __init int x86_nofsgsbase_setup(char *arg) -{ - /* Require an exact match without trailing characters. */ - if (strlen(arg)) - return 0; - - /* Do not emit a message if the feature is not present. */ - if (!boot_cpu_has(X86_FEATURE_FSGSBASE)) - return 1; - - setup_clear_cpu_cap(X86_FEATURE_FSGSBASE); - pr_info("FSGSBASE disabled via kernel command line\n"); - return 1; -} -__setup("nofsgsbase", x86_nofsgsbase_setup); - /* * Protection Keys are not available in 32-bit mode. */ @@ -1386,12 +1370,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_smap(c); setup_umip(c); - /* Enable FSGSBASE instructions if available. */ - if (cpu_has(c, X86_FEATURE_FSGSBASE)) { - cr4_set_bits(X86_CR4_FSGSBASE); - elf_hwcap2 |= HWCAP2_FSGSBASE; - } - /* * The vendor-specific functions might have changed features. * Now we do "generic changes." diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8f239091c15d..250e4c4ac6d9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -162,40 +162,6 @@ enum which_selector { }; /* - * Out of line to be protected from kprobes. It is not used on Xen - * paravirt. When paravirt support is needed, it needs to be renamed - * with native_ prefix. - */ -static noinline unsigned long __rdgsbase_inactive(void) -{ - unsigned long gsbase; - - lockdep_assert_irqs_disabled(); - - native_swapgs(); - gsbase = rdgsbase(); - native_swapgs(); - - return gsbase; -} -NOKPROBE_SYMBOL(__rdgsbase_inactive); - -/* - * Out of line to be protected from kprobes. It is not used on Xen - * paravirt. When paravirt support is needed, it needs to be renamed - * with native_ prefix. - */ -static noinline void __wrgsbase_inactive(unsigned long gsbase) -{ - lockdep_assert_irqs_disabled(); - - native_swapgs(); - wrgsbase(gsbase); - native_swapgs(); -} -NOKPROBE_SYMBOL(__wrgsbase_inactive); - -/* * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are * not available. The goal is to be reasonably fast on non-FSGSBASE systems. * It's forcibly inlined because it'll generate better code and this function @@ -244,22 +210,8 @@ static __always_inline void save_fsgs(struct task_struct *task) { savesegment(fs, task->thread.fsindex); savesegment(gs, task->thread.gsindex); - if (static_cpu_has(X86_FEATURE_FSGSBASE)) { - unsigned long flags; - - /* - * If FSGSBASE is enabled, we can't make any useful guesses - * about the base, and user code expects us to save the current - * value. Fortunately, reading the base directly is efficient. - */ - task->thread.fsbase = rdfsbase(); - local_irq_save(flags); - task->thread.gsbase = __rdgsbase_inactive(); - local_irq_restore(flags); - } else { - save_base_legacy(task, task->thread.fsindex, FS); - save_base_legacy(task, task->thread.gsindex, GS); - } + save_base_legacy(task, task->thread.fsindex, FS); + save_base_legacy(task, task->thread.gsindex, GS); } #if IS_ENABLED(CONFIG_KVM) @@ -338,22 +290,10 @@ static __always_inline void load_seg_legacy(unsigned short prev_index, static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, struct thread_struct *next) { - if (static_cpu_has(X86_FEATURE_FSGSBASE)) { - /* Update the FS and GS selectors if they could have changed. */ - if (unlikely(prev->fsindex || next->fsindex)) - loadseg(FS, next->fsindex); - if (unlikely(prev->gsindex || next->gsindex)) - loadseg(GS, next->gsindex); - - /* Update the bases. */ - wrfsbase(next->fsbase); - __wrgsbase_inactive(next->gsbase); - } else { - load_seg_legacy(prev->fsindex, prev->fsbase, - next->fsindex, next->fsbase, FS); - load_seg_legacy(prev->gsindex, prev->gsbase, - next->gsindex, next->gsbase, GS); - } + load_seg_legacy(prev->fsindex, prev->fsbase, + next->fsindex, next->fsbase, FS); + load_seg_legacy(prev->gsindex, prev->gsbase, + next->gsindex, next->gsbase, GS); } static unsigned long x86_fsgsbase_read_task(struct task_struct *task, @@ -399,46 +339,13 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task, return base; } -unsigned long x86_gsbase_read_cpu_inactive(void) -{ - unsigned long gsbase; - - if (static_cpu_has(X86_FEATURE_FSGSBASE)) { - unsigned long flags; - - /* Interrupts are disabled here. */ - local_irq_save(flags); - gsbase = __rdgsbase_inactive(); - local_irq_restore(flags); - } else { - rdmsrl(MSR_KERNEL_GS_BASE, gsbase); - } - - return gsbase; -} - -void x86_gsbase_write_cpu_inactive(unsigned long gsbase) -{ - if (static_cpu_has(X86_FEATURE_FSGSBASE)) { - unsigned long flags; - - /* Interrupts are disabled here. */ - local_irq_save(flags); - __wrgsbase_inactive(gsbase); - local_irq_restore(flags); - } else { - wrmsrl(MSR_KERNEL_GS_BASE, gsbase); - } -} - unsigned long x86_fsbase_read_task(struct task_struct *task) { unsigned long fsbase; if (task == current) fsbase = x86_fsbase_read_cpu(); - else if (static_cpu_has(X86_FEATURE_FSGSBASE) || - (task->thread.fsindex == 0)) + else if (task->thread.fsindex == 0) fsbase = task->thread.fsbase; else fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); @@ -452,8 +359,7 @@ unsigned long x86_gsbase_read_task(struct task_struct *task) if (task == current) gsbase = x86_gsbase_read_cpu_inactive(); - else if (static_cpu_has(X86_FEATURE_FSGSBASE) || - (task->thread.gsindex == 0)) + else if (task->thread.gsindex == 0) gsbase = task->thread.gsbase; else gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); @@ -493,11 +399,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap_ptr = NULL; - save_fsgs(me); - p->thread.fsindex = me->thread.fsindex; - p->thread.fsbase = me->thread.fsbase; - p->thread.gsindex = me->thread.gsindex; - p->thread.gsbase = me->thread.gsbase; + savesegment(gs, p->thread.gsindex); + p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; + savesegment(fs, p->thread.fsindex); + p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |