diff options
Diffstat (limited to 'arch/x86/kernel/process_64.c')
-rw-r--r-- | arch/x86/kernel/process_64.c | 118 |
1 files changed, 94 insertions, 24 deletions
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 33b268747bb7..226472332a70 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -56,6 +56,7 @@ #include <asm/resctrl.h> #include <asm/unistd.h> #include <asm/fsgsbase.h> +#include <asm/fred.h> #ifdef CONFIG_IA32_EMULATION /* Not included via unistd.h */ #include <asm/unistd_32_ia32.h> @@ -117,7 +118,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", log_lvl, fs, fsindex, gs, gsindex, shadowgs); - printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n", + printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n", log_lvl, regs->cs, ds, es, cr0); printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n", log_lvl, cr2, cr3, cr4); @@ -138,7 +139,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + if (cr4 & X86_CR4_PKE) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } @@ -166,7 +167,29 @@ static noinstr unsigned long __rdgsbase_inactive(void) lockdep_assert_irqs_disabled(); - if (!cpu_feature_enabled(X86_FEATURE_XENPV)) { + /* + * SWAPGS is no longer needed thus NOT allowed with FRED because + * FRED transitions ensure that an operating system can _always_ + * operate with its own GS base address: + * - For events that occur in ring 3, FRED event delivery swaps + * the GS base address with the IA32_KERNEL_GS_BASE MSR. + * - ERETU (the FRED transition that returns to ring 3) also swaps + * the GS base address with the IA32_KERNEL_GS_BASE MSR. + * + * And the operating system can still setup the GS segment for a + * user thread without the need of loading a user thread GS with: + * - Using LKGS, available with FRED, to modify other attributes + * of the GS segment without compromising its ability always to + * operate with its own GS base address. + * - Accessing the GS segment base address for a user thread as + * before using RDMSR or WRMSR on the IA32_KERNEL_GS_BASE MSR. + * + * Note, LKGS loads the GS base address into the IA32_KERNEL_GS_BASE + * MSR instead of the GS segment’s descriptor cache. As such, the + * operating system never changes its runtime GS base address. + */ + if (!cpu_feature_enabled(X86_FEATURE_FRED) && + !cpu_feature_enabled(X86_FEATURE_XENPV)) { native_swapgs(); gsbase = rdgsbase(); native_swapgs(); @@ -191,7 +214,8 @@ static noinstr void __wrgsbase_inactive(unsigned long gsbase) { lockdep_assert_irqs_disabled(); - if (!cpu_feature_enabled(X86_FEATURE_XENPV)) { + if (!cpu_feature_enabled(X86_FEATURE_FRED) && + !cpu_feature_enabled(X86_FEATURE_XENPV)) { native_swapgs(); wrgsbase(gsbase); native_swapgs(); @@ -505,7 +529,7 @@ void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) static void start_thread_common(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp, - unsigned int _cs, unsigned int _ss, unsigned int _ds) + u16 _cs, u16 _ss, u16 _ds) { WARN_ON_ONCE(regs != current_pt_regs()); @@ -522,11 +546,36 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, loadsegment(ds, _ds); load_gs_index(0); - regs->ip = new_ip; - regs->sp = new_sp; - regs->cs = _cs; - regs->ss = _ss; - regs->flags = X86_EFLAGS_IF; + regs->ip = new_ip; + regs->sp = new_sp; + regs->csx = _cs; + regs->ssx = _ss; + /* + * Allow single-step trap and NMI when starting a new task, thus + * once the new task enters user space, single-step trap and NMI + * are both enabled immediately. + * + * Entering a new task is logically speaking a return from a + * system call (exec, fork, clone, etc.). As such, if ptrace + * enables single stepping a single step exception should be + * allowed to trigger immediately upon entering user space. + * This is not optional. + * + * NMI should *never* be disabled in user space. As such, this + * is an optional, opportunistic way to catch errors. + * + * Paranoia: High-order 48 bits above the lowest 16 bit SS are + * discarded by the legacy IRET instruction on all Intel, AMD, + * and Cyrix/Centaur/VIA CPUs, thus can be set unconditionally, + * even when FRED is not enabled. But we choose the safer side + * to use these bits only when FRED is enabled. + */ + if (cpu_feature_enabled(X86_FEATURE_FRED)) { + regs->fred_ss.swevent = true; + regs->fred_ss.nmi = true; + } + + regs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; } void @@ -562,14 +611,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(pcpu_hot.hardirq_stack_inuse)); - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_p, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -623,7 +671,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) raw_cpu_write(pcpu_hot.current_task, next_p); raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(); + switch_fpu_finish(next_p); /* Reload sp0. */ update_task_stack(next_p); @@ -750,6 +798,32 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) #define LAM_U57_BITS 6 +static void enable_lam_func(void *__mm) +{ + struct mm_struct *mm = __mm; + unsigned long lam; + + if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm) { + lam = mm_lam_cr3_mask(mm); + write_cr3(__read_cr3() | lam); + cpu_tlbstate_update_lam(lam, mm_untag_mask(mm)); + } +} + +static void mm_enable_lam(struct mm_struct *mm) +{ + mm->context.lam_cr3_mask = X86_CR3_LAM_U57; + mm->context.untag_mask = ~GENMASK(62, 57); + + /* + * Even though the process must still be single-threaded at this + * point, kernel threads may be using the mm. IPI those kernel + * threads if they exist. + */ + on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true); + set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); +} + static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) { if (!cpu_feature_enabled(X86_FEATURE_LAM)) @@ -766,25 +840,21 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) if (mmap_write_lock_killable(mm)) return -EINTR; + /* + * MM_CONTEXT_LOCK_LAM is set on clone. Prevent LAM from + * being enabled unless the process is single threaded: + */ if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) { mmap_write_unlock(mm); return -EBUSY; } - if (!nr_bits) { - mmap_write_unlock(mm); - return -EINVAL; - } else if (nr_bits <= LAM_U57_BITS) { - mm->context.lam_cr3_mask = X86_CR3_LAM_U57; - mm->context.untag_mask = ~GENMASK(62, 57); - } else { + if (!nr_bits || nr_bits > LAM_U57_BITS) { mmap_write_unlock(mm); return -EINVAL; } - write_cr3(__read_cr3() | mm->context.lam_cr3_mask); - set_tlbstate_lam_mode(mm); - set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); + mm_enable_lam(mm); mmap_write_unlock(mm); |