diff options
Diffstat (limited to 'arch/x86/entry')
-rw-r--r-- | arch/x86/entry/calling.h | 10 | ||||
-rw-r--r-- | arch/x86/entry/common.c | 31 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 2 | ||||
-rw-r--r-- | arch/x86/entry/thunk_32.S | 5 |
4 files changed, 29 insertions, 19 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 98e4d8886f11..ae9b0d4615b3 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel is built with * Fetch the per-CPU GSBASE value for this processor and put it in @reg. * We normally use %gs for accessing per-CPU data, but we are setting up * %gs here and obviously can not use %gs itself to access per-CPU data. + * + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and + * may not restore the host's value until the CPU returns to userspace. + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives + * while running KVM's run loop. */ .macro GET_PERCPU_BASE reg:req - ALTERNATIVE \ - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \ - "RDPID \reg", \ - X86_FEATURE_RDPID + LOAD_CPU_AND_NODE_SEG_LIMIT \reg andq $VDSO_CPUNODE_MASK, \reg movq __per_cpu_offset(, \reg, 8), \reg .endm diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 48512c7944e7..870efeec8bda 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) { - unsigned int nr = (unsigned int)regs->orig_ax; - if (IS_ENABLED(CONFIG_IA32_EMULATION)) current_thread_info()->status |= TS_COMPAT; - /* - * Subtlety here: if ptrace pokes something larger than 2^32-1 into - * orig_ax, the unsigned int return value truncates it. This may - * or may not be necessary, but it matches the old asm behavior. - */ - return (unsigned int)syscall_enter_from_user_mode(regs, nr); + + return (unsigned int)regs->orig_ax; } /* @@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { unsigned int nr = syscall_32_enter(regs); + /* + * Subtlety here: if ptrace pokes something larger than 2^32-1 into + * orig_ax, the unsigned int return value truncates it. This may + * or may not be necessary, but it matches the old asm behavior. + */ + nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); } static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { - unsigned int nr = syscall_32_enter(regs); + unsigned int nr = syscall_32_enter(regs); int res; + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + instrumentation_begin(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { @@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return false; } + /* The case truncates any ptrace induced syscall nr > 2^32 -1 */ + nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr); + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); @@ -288,7 +299,7 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) old_regs = set_irq_regs(regs); instrumentation_begin(); - run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs); + run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); instrumentation_begin(); set_irq_regs(old_regs); diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 70dea9337816..d977079a7d02 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -682,6 +682,8 @@ SYM_CODE_END(.Lbad_gs) * rdx: Function argument (can be NULL if none) */ SYM_FUNC_START(asm_call_on_stack) +SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL) +SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL) /* * Save the frame pointer unconditionally. This allows the ORC * unwinder to handle the stack switch. diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 3a07ce3ec70b..f1f96d4d8cd6 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -29,11 +29,6 @@ SYM_CODE_START_NOALIGN(\name) SYM_CODE_END(\name) .endm -#ifdef CONFIG_TRACE_IRQFLAGS - THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 - THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 -#endif - #ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace |