diff options
Diffstat (limited to 'arch/x86/entry/calling.h')
| -rw-r--r-- | arch/x86/entry/calling.h | 489 |
1 files changed, 489 insertions, 0 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h new file mode 100644 index 000000000000..77e2d920a640 --- /dev/null +++ b/arch/x86/entry/calling.h @@ -0,0 +1,489 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/jump_label.h> +#include <asm/unwind_hints.h> +#include <asm/cpufeatures.h> +#include <asm/page_types.h> +#include <asm/percpu.h> +#include <asm/asm-offsets.h> +#include <asm/processor-flags.h> +#include <asm/ptrace-abi.h> +#include <asm/msr.h> +#include <asm/nospec-branch.h> + +/* + + x86 function call convention, 64-bit: + ------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + --------------------------------------------------------------------------- + rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] + + ( rsp is obviously invariant across normal function calls. (gcc can 'merge' + functions when it sees tail-call optimization possibilities) rflags is + clobbered. Leftover arguments are passed over the stack frame.) + + [*] In the frame-pointers case rbp is fixed to the stack frame. + + [**] for struct return values wider than 64 bits the return convention is a + bit more complex: up to 128 bits width we return small structures + straight in rax, rdx. For structures larger than that (3 words or + larger) the caller puts a pointer to an on-stack return struct + [allocated in the caller's stack frame] into the first argument - i.e. + into rdi. All other arguments shift up by one in this case. + Fortunately this case is rare in the kernel. + +For 32-bit we have the following conventions - kernel is built with +-mregparm=3 and -freg-struct-return: + + x86 function calling convention, 32-bit: + ---------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + ------------------------------------------------------------------------- + eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**] + + ( here too esp is obviously invariant across normal function calls. eflags + is clobbered. Leftover arguments are passed over the stack frame. ) + + [*] In the frame-pointers case ebp is fixed to the stack frame. + + [**] We build with -freg-struct-return, which on 32-bit means similar + semantics as on 64-bit: edx can be used for a second return value + (i.e. covering integer and structure sizes up to 64 bits) - after that + it gets more complex and more expensive: 3-word or larger struct returns + get done in the caller's frame and the pointer to the return struct goes + into regparm0, i.e. eax - the other arguments shift up and the + function's register parameters degenerate to regparm=2 in essence. + +*/ + +#ifdef CONFIG_X86_64 + +/* + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: + */ + +.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 unwind_hint=1 + .if \save_ret + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ + movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + /* We just clobbered the return address - use the IRET frame for unwinding: */ + UNWIND_HINT_IRET_REGS offset=3*8 + .else + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + .endif + pushq \rdx /* pt_regs->dx */ + pushq \rcx /* pt_regs->cx */ + pushq \rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + .if \unwind_hint + UNWIND_HINT_REGS + .endif + + .if \save_ret + pushq %rsi /* return address on top of stack */ + .endif +.endm + +.macro CLEAR_REGS clear_callee=1 + /* + * Sanitize registers of values that a speculation attack might + * otherwise want to exploit. The lower registers are likely clobbered + * well before they could be put to use in a speculative execution + * gadget. + */ + xorl %esi, %esi /* nospec si */ + xorl %edx, %edx /* nospec dx */ + xorl %ecx, %ecx /* nospec cx */ + xorl %r8d, %r8d /* nospec r8 */ + xorl %r9d, %r9d /* nospec r9 */ + xorl %r10d, %r10d /* nospec r10 */ + xorl %r11d, %r11d /* nospec r11 */ + .if \clear_callee + xorl %ebx, %ebx /* nospec rbx */ + xorl %ebp, %ebp /* nospec rbp */ + xorl %r12d, %r12d /* nospec r12 */ + xorl %r13d, %r13d /* nospec r13 */ + xorl %r14d, %r14d /* nospec r14 */ + xorl %r15d, %r15d /* nospec r15 */ + .endif +.endm + +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1 + PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint + CLEAR_REGS clear_callee=\clear_callee +.endm + +.macro POP_REGS pop_rdi=1 + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + .if \pop_rdi + popq %rdi + .endif +.endm + +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION + +/* + * MITIGATION_PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two + * halves: + */ +#define PTI_USER_PGTABLE_BIT PAGE_SHIFT +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) +#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT +#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) +#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) + +.macro SET_NOFLUSH_BIT reg:req + bts $X86_CR3_PCID_NOFLUSH_BIT, \reg +.endm + +.macro ADJUST_KERNEL_CR3 reg:req + ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID + /* Clear PCID and "MITIGATION_PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ + andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg +.endm + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + ADJUST_KERNEL_CR3 \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +#define THIS_CPU_user_pcid_flush_mask \ + PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask) + +.macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req + mov %cr3, \scratch_reg + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * Test if the ASID needs a flush. + */ + movq \scratch_reg, \scratch_reg2 + andq $(0x7FF), \scratch_reg /* mask ASID */ + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + /* Flush needed, clear the bit */ + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + movq \scratch_reg2, \scratch_reg + jmp .Lwrcr3_pcid_\@ + +.Lnoflush_\@: + movq \scratch_reg2, \scratch_reg + SET_NOFLUSH_BIT \scratch_reg + +.Lwrcr3_pcid_\@: + /* Flip the ASID to the user version */ + orq $(PTI_USER_PCID_MASK), \scratch_reg + +.Lwrcr3_\@: + /* Flip the PGD to the user version */ + orq $(PTI_USER_PGTABLE_MASK), \scratch_reg + mov \scratch_reg, %cr3 +.endm + +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + SWITCH_TO_USER_CR3 \scratch_reg \scratch_reg2 +.Lend_\@: +.endm + +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + pushq %rax + SWITCH_TO_USER_CR3 scratch_reg=\scratch_reg scratch_reg2=%rax + popq %rax +.Lend_\@: +.endm + +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI + movq %cr3, \scratch_reg + movq \scratch_reg, \save_reg + /* + * Test the user pagetable bit. If set, then the user page tables + * are active. If clear CR3 already has the kernel page table + * active. + */ + bt $PTI_USER_PGTABLE_BIT, \scratch_reg + jnc .Ldone_\@ + + ADJUST_KERNEL_CR3 \scratch_reg + movq \scratch_reg, %cr3 + +.Ldone_\@: +.endm + +/* Restore CR3 from a kernel context. May restore a user CR3 value. */ +.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + + /* + * If CR3 contained the kernel page tables at the paranoid exception + * entry, then there is nothing to restore as CR3 is not modified while + * handling the exception. + */ + bt $PTI_USER_PGTABLE_BIT, \save_reg + jnc .Lend_\@ + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * Check if there's a pending flush for the user ASID we're + * about to set. + */ + movq \save_reg, \scratch_reg + andq $(0x7FF), \scratch_reg + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + jc .Lwrcr3_\@ + + SET_NOFLUSH_BIT \save_reg + +.Lwrcr3_\@: + movq \save_reg, %cr3 +.Lend_\@: +.endm + +#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=n: */ + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req +.endm +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req +.endm +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req +.endm +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req +.endm +.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req +.endm + +#endif + +/* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg +#ifdef CONFIG_MITIGATION_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg +#ifdef CONFIG_MITIGATION_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Mitigate Spectre v1 for conditional swapgs code paths. + * + * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to + * prevent a speculative swapgs when coming from kernel space. + * + * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path, + * to prevent the swapgs from getting speculatively skipped when coming from + * user space. + */ +.macro FENCE_SWAPGS_USER_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER +.endm +.macro FENCE_SWAPGS_KERNEL_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL +.endm + +.macro STACKLEAK_ERASE_NOCLOBBER +#ifdef CONFIG_KSTACK_ERASE + PUSH_AND_CLEAR_REGS + call stackleak_erase + POP_REGS +#endif +.endm + +.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req + rdgsbase \save_reg + GET_PERCPU_BASE \scratch_reg + wrgsbase \scratch_reg +.endm + +#else /* CONFIG_X86_64 */ +# undef UNWIND_HINT_IRET_REGS +# define UNWIND_HINT_IRET_REGS +#endif /* !CONFIG_X86_64 */ + +.macro STACKLEAK_ERASE +#ifdef CONFIG_KSTACK_ERASE + call stackleak_erase +#endif +.endm + +#ifdef CONFIG_SMP + +/* + * CPU/node NR is loaded from the limit (size) field of a special segment + * descriptor entry in GDT. + */ +.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req + movq $__CPUNODE_SEG, \reg + lsl \reg, \reg +.endm + +/* + * Fetch the per-CPU GSBASE value for this processor and put it in @reg. + * We normally use %gs for accessing per-CPU data, but we are setting up + * %gs here and obviously can not use %gs itself to access per-CPU data. + * + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and + * may not restore the host's value until the CPU returns to userspace. + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives + * while running KVM's run loop. + */ +.macro GET_PERCPU_BASE reg:req + LOAD_CPU_AND_NODE_SEG_LIMIT \reg + andq $VDSO_CPUNODE_MASK, \reg + movq __per_cpu_offset(, \reg, 8), \reg +.endm + +#else + +.macro GET_PERCPU_BASE reg:req + movq pcpu_unit_offsets(%rip), \reg +.endm + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_X86_64 + +/* rdi: arg1 ... normal C conventions. rax is saved/restored. */ +.macro THUNK name, func +SYM_FUNC_START(\name) + ANNOTATE_NOENDBR + pushq %rbp + movq %rsp, %rbp + + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + + call \func + + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %rbp + RET +SYM_FUNC_END(\name) + _ASM_NOKPROBE(\name) +.endm + +#else /* CONFIG_X86_32 */ + +/* put return address in eax (arg1) */ +.macro THUNK name, func, put_ret_addr_in_eax=0 +SYM_CODE_START_NOALIGN(\name) + pushl %eax + pushl %ecx + pushl %edx + + .if \put_ret_addr_in_eax + /* Place EIP in the arg1 */ + movl 3*4(%esp), %eax + .endif + + call \func + popl %edx + popl %ecx + popl %eax + RET + _ASM_NOKPROBE(\name) +SYM_CODE_END(\name) + .endm + +#endif |
