From 0aca53c6b522f8d6e2681ca875acbbe105f5fdcf Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:48 +0800 Subject: x86/traps: Use pt_regs directly in fixup_bad_iret() Always stash the address error_entry() is going to return to, in %r12 and get rid of the void *error_entry_ret; slot in struct bad_iret_stack which was supposed to account for it and pt_regs pushed on the stack. After this, both fixup_bad_iret() and sync_regs() can work on a struct pt_regs pointer directly. [ bp: Rewrite commit message, touch ups. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-2-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 73d958522b6a..ecbfca3cc18c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1061,9 +1061,12 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - mov %rsp, %rdi + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret mov %rax, %rsp + ENCODE_FRAME_POINTER + pushq %r12 jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) -- cgit From 520a7e80c96d655fbe4650d9cc985bd9d0443389 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:49 +0800 Subject: x86/entry: Switch the stack after error_entry() returns error_entry() calls fixup_bad_iret() before sync_regs() if it is a fault from a bad IRET, to copy pt_regs to the kernel stack. It switches to the kernel stack directly after sync_regs(). But error_entry() itself is also a function call, so it has to stash the address it is going to return to, in %r12 which is unnecessarily complicated. Move the stack switching after error_entry() and get rid of the need to handle the return address. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-3-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ecbfca3cc18c..ca3e99e08a44 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -326,6 +326,8 @@ SYM_CODE_END(ret_from_fork) .macro idtentry_body cfunc has_error_code:req call error_entry + movq %rax, %rsp /* switch to the task stack if from userspace */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ @@ -1002,14 +1004,10 @@ SYM_CODE_START_LOCAL(error_entry) /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ call sync_regs - movq %rax, %rsp /* switch stack */ - ENCODE_FRAME_POINTER - pushq %r12 RET /* @@ -1041,6 +1039,7 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY + leaq 8(%rsp), %rax /* return pt_regs pointer */ RET .Lbstep_iret: @@ -1061,12 +1060,9 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret - mov %rax, %rsp - ENCODE_FRAME_POINTER - pushq %r12 + mov %rax, %rdi jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) -- cgit From ee774dac0da1543376a69fd90840af6aa86879b3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:50 +0800 Subject: x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() The macro idtentry() (through idtentry_body()) calls error_entry() unconditionally even on XENPV. But XENPV needs to only push and clear regs. PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to its original place when the function returns, which means it is not possible to convert it to a C function. Carve out PUSH_AND_CLEAR_REGS out of error_entry() and into a separate function and call it before error_entry() in order to avoid calling error_entry() on XENPV. It will also allow for error_entry() to be converted to C code that can use inlined sync_regs() and save a function call. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-4-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ca3e99e08a44..b1cef3b0a7ab 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -318,6 +318,14 @@ SYM_CODE_END(ret_from_fork) #endif .endm +/* Save all registers in pt_regs */ +SYM_CODE_START_LOCAL(push_and_clear_regs) + UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + RET +SYM_CODE_END(push_and_clear_regs) + /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -325,6 +333,9 @@ SYM_CODE_END(ret_from_fork) */ .macro idtentry_body cfunc has_error_code:req + call push_and_clear_regs + UNWIND_HINT_REGS + call error_entry movq %rax, %rsp /* switch to the task stack if from userspace */ ENCODE_FRAME_POINTER @@ -985,13 +996,11 @@ SYM_CODE_START_LOCAL(paranoid_exit) SYM_CODE_END(paranoid_exit) /* - * Save all registers in pt_regs, and switch GS if needed. + * Switch GS and CR3 if needed. */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld - PUSH_AND_CLEAR_REGS save_ret=1 - ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace -- cgit From c64cc2802a784ecfd25d39945e57e7a147854a5b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:51 +0800 Subject: x86/entry: Move CLD to the start of the idtentry macro Move it after CLAC. Suggested-by: Peter Zijlstra Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-5-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b1cef3b0a7ab..ab6ab6d3dab5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -371,6 +371,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 ENDBR ASM_CLAC + cld .if \has_error_code == 0 pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -439,6 +440,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS ENDBR ASM_CLAC + cld pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -495,6 +497,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS ENDBR ASM_CLAC + cld /* * If the entry is from userspace, switch stacks and treat it as @@ -557,6 +560,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=8 ENDBR ASM_CLAC + cld /* paranoid_entry returns GS information for paranoid_exit in EBX. */ call paranoid_entry @@ -882,7 +886,6 @@ SYM_CODE_END(xen_failsafe_callback) */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC - cld PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -1000,7 +1003,6 @@ SYM_CODE_END(paranoid_exit) */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC - cld testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1134,6 +1136,7 @@ SYM_CODE_START(asm_exc_nmi) */ ASM_CLAC + cld /* Use %rdx as our temp variable throughout */ pushq %rdx @@ -1153,7 +1156,6 @@ SYM_CODE_START(asm_exc_nmi) */ swapgs - cld FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx -- cgit From 64cbd0acb58203fb769ed2f4eab526d43e243847 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 May 2022 11:21:06 +0800 Subject: x86/entry: Don't call error_entry() for XENPV XENPV guests enter already on the task stack and they can't fault for native_iret() nor native_load_gs_index() since they use their own pvop for IRET and load_gs_index(). A CR3 switch is not needed either. So there is no reason to call error_entry() in XENPV. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-6-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ab6ab6d3dab5..062aa9d95961 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -336,8 +336,17 @@ SYM_CODE_END(push_and_clear_regs) call push_and_clear_regs UNWIND_HINT_REGS - call error_entry - movq %rax, %rsp /* switch to the task stack if from userspace */ + /* + * Call error_entry() and switch to the task stack if from userspace. + * + * When in XENPV, it is already in the task stack, and it can't fault + * for native_iret() nor native_load_gs_index() since XENPV uses its + * own pvops for IRET and load_gs_index(). And it doesn't need to + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ + "", X86_FEATURE_XENPV + ENCODE_FRAME_POINTER UNWIND_HINT_REGS -- cgit From c89191ce67efa4e5353db6a67f7287c28e673740 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 May 2022 11:21:07 +0800 Subject: x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS XENPV doesn't use swapgs_restore_regs_and_return_to_usermode(), error_entry() and the code between entry_SYSENTER_compat() and entry_SYSENTER_compat_after_hwframe. Change the PV-compatible SWAPGS to the ASM instruction swapgs in these places. Also remove the definition of SWAPGS since no more users. Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-7-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 062aa9d95961..312186612f4e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1019,7 +1019,7 @@ SYM_CODE_START_LOCAL(error_entry) * We entered from user mode or we're pretending to have entered * from user mode due to an IRET fault. */ - SWAPGS + swapgs FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax @@ -1051,7 +1051,7 @@ SYM_CODE_START_LOCAL(error_entry) * gsbase and proceed. We'll fix up the exception and land in * .Lgs_change's error handler with kernel gsbase. */ - SWAPGS + swapgs /* * Issue an LFENCE to prevent GS speculation, regardless of whether it is a @@ -1072,7 +1072,7 @@ SYM_CODE_START_LOCAL(error_entry) * We came from an IRET to user mode, so we have user * gsbase and CR3. Switch to kernel gsbase and CR3: */ - SWAPGS + swapgs FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax -- cgit From 1b331eeea7b8676fc5dbdf80d0a07e41be226177 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 May 2022 14:14:35 +0200 Subject: x86/entry: Remove skip_r11rcx Yes, r11 and rcx have been restored previously, but since they're being popped anyway (into rsi) might as well pop them into their own regs -- setting them to the value they already are. Less magical code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org --- arch/x86/entry/entry_64.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 312186612f4e..3a1e3f215617 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - POP_REGS pop_rdi=0 skip_r11rcx=1 + POP_REGS pop_rdi=0 /* * Now all regs are restored except RSP and RDI. -- cgit