diff options
Diffstat (limited to 'arch/x86/kernel/traps.c')
-rw-r--r-- | arch/x86/kernel/traps.c | 130 |
1 files changed, 108 insertions, 22 deletions
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c876f1d36a81..2dbadf347b5f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -37,10 +37,12 @@ #include <linux/nmi.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/cpu.h> #include <linux/io.h> #include <linux/hardirq.h> #include <linux/atomic.h> #include <linux/iommu.h> +#include <linux/ubsan.h> #include <asm/stacktrace.h> #include <asm/processor.h> @@ -50,6 +52,7 @@ #include <asm/ftrace.h> #include <asm/traps.h> #include <asm/desc.h> +#include <asm/fred.h> #include <asm/fpu/api.h> #include <asm/cpu.h> #include <asm/cpu_entry_area.h> @@ -89,6 +92,47 @@ __always_inline int is_valid_bugaddr(unsigned long addr) return *(unsigned short *)addr == INSN_UD2; } +/* + * Check for UD1 or UD2, accounting for Address Size Override Prefixes. + * If it's a UD1, get the ModRM byte to pass along to UBSan. + */ +__always_inline int decode_bug(unsigned long addr, u32 *imm) +{ + u8 v; + + if (addr < TASK_SIZE_MAX) + return BUG_NONE; + + v = *(u8 *)(addr++); + if (v == INSN_ASOP) + v = *(u8 *)(addr++); + if (v != OPCODE_ESCAPE) + return BUG_NONE; + + v = *(u8 *)(addr++); + if (v == SECOND_BYTE_OPCODE_UD2) + return BUG_UD2; + + if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1) + return BUG_NONE; + + /* Retrieve the immediate (type value) for the UBSAN UD1 */ + v = *(u8 *)(addr++); + if (X86_MODRM_RM(v) == 4) + addr++; + + *imm = 0; + if (X86_MODRM_MOD(v) == 1) + *imm = *(u8 *)addr; + else if (X86_MODRM_MOD(v) == 2) + *imm = *(u32 *)addr; + else + WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v)); + + return BUG_UD1; +} + + static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) @@ -214,14 +258,11 @@ static inline void handle_invalid_op(struct pt_regs *regs) static noinstr bool handle_bug(struct pt_regs *regs) { bool handled = false; + int ud_type; + u32 imm; - /* - * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() - * is a rare case that uses @regs without passing them to - * irqentry_enter(). - */ - kmsan_unpoison_entry_regs(regs); - if (!is_valid_bugaddr(regs->ip)) + ud_type = decode_bug(regs->ip, &imm); + if (ud_type == BUG_NONE) return handled; /* @@ -229,15 +270,25 @@ static noinstr bool handle_bug(struct pt_regs *regs) */ instrumentation_begin(); /* + * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() + * is a rare case that uses @regs without passing them to + * irqentry_enter(). + */ + kmsan_unpoison_entry_regs(regs); + /* * Since we're emulating a CALL with exceptions, restore the interrupt * state to what it was at the exception site. */ if (regs->flags & X86_EFLAGS_IF) raw_local_irq_enable(); - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN || - handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { - regs->ip += LEN_UD2; - handled = true; + if (ud_type == BUG_UD2) { + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN || + handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { + regs->ip += LEN_UD2; + handled = true; + } + } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) { + pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip); } if (regs->flags & X86_EFLAGS_IF) raw_local_irq_disable(); @@ -565,7 +616,7 @@ static bool fixup_iopl_exception(struct pt_regs *regs) */ static bool try_fixup_enqcmd_gp(void) { -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID u32 pasid; /* @@ -591,7 +642,7 @@ static bool try_fixup_enqcmd_gp(void) if (!mm_valid_pasid(current->mm)) return false; - pasid = current->mm->pasid; + pasid = mm_get_enqcmd_pasid(current->mm); /* * Did this thread already have its PASID activated? @@ -772,7 +823,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) */ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1; + struct pt_regs *regs = (struct pt_regs *)current_top_of_stack() - 1; if (regs != eregs) *regs = *eregs; return regs; @@ -790,7 +841,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * trust it and switch to the current kernel stack */ if (ip_within_syscall_gap(regs)) { - sp = this_cpu_read(pcpu_hot.top_of_stack); + sp = current_top_of_stack(); goto sync; } @@ -934,8 +985,7 @@ static bool notify_debug(struct pt_regs *regs, unsigned long *dr6) return false; } -static __always_inline void exc_debug_kernel(struct pt_regs *regs, - unsigned long dr6) +static noinstr void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) { /* * Disable breakpoints during exception handling; recursive exceptions @@ -947,6 +997,11 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, * * Entry text is excluded for HW_BP_X and cpu_entry_area, which * includes the entry stack is excluded for everything. + * + * For FRED, nested #DB should just work fine. But when a watchpoint or + * breakpoint is set in the code path which is executed by #DB handler, + * it results in an endless recursion and stack overflow. Thus we stay + * with the IDT approach, i.e., save DR7 and disable #DB. */ unsigned long dr7 = local_db_save(); irqentry_state_t irq_state = irqentry_nmi_enter(regs); @@ -976,7 +1031,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, * Catch SYSENTER with TF set and clear DR_STEP. If this hit a * watchpoint at the same time then that will still be handled. */ - if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs)) + if (!cpu_feature_enabled(X86_FEATURE_FRED) && + (dr6 & DR_STEP) && is_sysenter_singlestep(regs)) dr6 &= ~DR_STEP; /* @@ -1008,8 +1064,7 @@ out: local_db_restore(dr7); } -static __always_inline void exc_debug_user(struct pt_regs *regs, - unsigned long dr6) +static noinstr void exc_debug_user(struct pt_regs *regs, unsigned long dr6) { bool icebp; @@ -1093,6 +1148,34 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { exc_debug_user(regs, debug_read_clear_dr6()); } + +#ifdef CONFIG_X86_FRED +/* + * When occurred on different ring level, i.e., from user or kernel + * context, #DB needs to be handled on different stack: User #DB on + * current task stack, while kernel #DB on a dedicated stack. + * + * This is exactly how FRED event delivery invokes an exception + * handler: ring 3 event on level 0 stack, i.e., current task stack; + * ring 0 event on the #DB dedicated stack specified in the + * IA32_FRED_STKLVLS MSR. So unlike IDT, the FRED debug exception + * entry stub doesn't do stack switch. + */ +DEFINE_FREDENTRY_DEBUG(exc_debug) +{ + /* + * FRED #DB stores DR6 on the stack in the format which + * debug_read_clear_dr6() returns for the IDT entry points. + */ + unsigned long dr6 = fred_event_data(regs); + + if (user_mode(regs)) + exc_debug_user(regs, dr6); + else + exc_debug_kernel(regs, dr6); +} +#endif /* CONFIG_X86_FRED */ + #else /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) @@ -1377,8 +1460,11 @@ void __init trap_init(void) sev_es_init_vc_handling(); /* Initialize TSS before setting up traps so ISTs work */ - cpu_init_exception_handling(); + cpu_init_exception_handling(true); + /* Setup traps as cpu_init() might #GP */ - idt_setup_traps(); + if (!cpu_feature_enabled(X86_FEATURE_FRED)) + idt_setup_traps(); + cpu_init(); } |