diff options
Diffstat (limited to 'arch/x86/lib/retpoline.S')
| -rw-r--r-- | arch/x86/lib/retpoline.S | 374 |
1 files changed, 343 insertions, 31 deletions
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 073289a55f84..8f1fed0c3b83 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -1,28 +1,31 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/stringify.h> #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> -#include <asm/export.h> +#include <asm/asm-offsets.h> #include <asm/nospec-branch.h> #include <asm/unwind_hints.h> +#include <asm/percpu.h> #include <asm/frame.h> +#include <asm/nops.h> - .section .text.__x86.indirect_thunk + .section .text..__x86.indirect_thunk -.macro RETPOLINE reg +.macro POLINE reg ANNOTATE_INTRA_FUNCTION_CALL call .Ldo_rop_\@ -.Lspec_trap_\@: - UNWIND_HINT_EMPTY - pause - lfence - jmp .Lspec_trap_\@ + int3 .Ldo_rop_\@: mov %\reg, (%_ASM_SP) UNWIND_HINT_FUNC +.endm + +.macro RETPOLINE reg + POLINE \reg RET .endm @@ -30,12 +33,13 @@ .align RETPOLINE_THUNK_SIZE SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) +SYM_PIC_ALIAS(__x86_indirect_thunk_\reg) .endm @@ -52,7 +56,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) */ #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) -#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) @@ -64,46 +67,264 @@ SYM_CODE_START(__x86_indirect_thunk_array) .align RETPOLINE_THUNK_SIZE SYM_CODE_END(__x86_indirect_thunk_array) -#define GEN(reg) EXPORT_THUNK(reg) +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) +#include <asm/GEN-for-each-reg.h> +#undef GEN + +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING + +.macro CALL_THUNK reg + .align RETPOLINE_THUNK_SIZE + +SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + + CALL_DEPTH_ACCOUNT + POLINE \reg + ANNOTATE_UNRET_SAFE + ret + int3 +.endm + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_call_thunk_array) + +#define GEN(reg) CALL_THUNK reg +#include <asm/GEN-for-each-reg.h> +#undef GEN + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_call_thunk_array) + +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg) +#include <asm/GEN-for-each-reg.h> +#undef GEN + +.macro JUMP_THUNK reg + .align RETPOLINE_THUNK_SIZE + +SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + POLINE \reg + ANNOTATE_UNRET_SAFE + ret + int3 +.endm + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_jump_thunk_array) + +#define GEN(reg) JUMP_THUNK reg #include <asm/GEN-for-each-reg.h> #undef GEN + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_jump_thunk_array) + +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg) +#include <asm/GEN-for-each-reg.h> +#undef GEN + +#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ + +#ifdef CONFIG_MITIGATION_ITS + +.macro ITS_THUNK reg + /* - * This function name is magical and is used by -mfunction-return=thunk-extern - * for the compiler to generate JMPs to it. + * If CFI paranoid is used then the ITS thunk starts with opcodes (1: udb; jne 1b) + * that complete the fineibt_paranoid caller sequence. + */ +1: ASM_UDB +SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + jne 1b +SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + jmp *%\reg + int3 + .align 32, 0xcc /* fill to the end of the line */ + .skip 32 - (__x86_indirect_its_thunk_\reg - 1b), 0xcc /* skip to the next upper half */ +.endm + +/* ITS mitigation requires thunks be aligned to upper half of cacheline */ +.align 64, 0xcc +.skip 29, 0xcc + +#define GEN(reg) ITS_THUNK reg +#include <asm/GEN-for-each-reg.h> +#undef GEN + + .align 64, 0xcc +SYM_FUNC_ALIAS(__x86_indirect_its_thunk_array, __x86_indirect_its_thunk_rax) +SYM_CODE_END(__x86_indirect_its_thunk_array) + +#endif /* CONFIG_MITIGATION_ITS */ + +#ifdef CONFIG_MITIGATION_RETHUNK + +/* + * Be careful here: that label cannot really be removed because in + * some configurations and toolchains, the JMP __x86_return_thunk the + * compiler issues is either a short one or the compiler doesn't use + * relocations for same-section JMPs and that breaks the returns + * detection logic in apply_returns() and in objtool. + */ + .section .text..__x86.return_thunk + +#ifdef CONFIG_MITIGATION_SRSO + +/* + * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at + * special addresses: + * + * - srso_alias_untrain_ret() is 2M aligned + * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 + * and 20 in its virtual address are set (while those bits in the + * srso_alias_untrain_ret() function are cleared). + * + * This guarantees that those two addresses will alias in the branch + * target buffer of Zen3/4 generations, leading to any potential + * poisoned entries at that BTB slot to get evicted. + * + * As a result, srso_alias_safe_ret() becomes a safe return. + */ + .pushsection .text..__x86.rethunk_untrain +SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ASM_NOP2 + lfence + jmp srso_alias_return_thunk +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) + .popsection + + .pushsection .text..__x86.rethunk_safe +SYM_CODE_START_NOALIGN(srso_alias_safe_ret) + lea 8(%_ASM_SP), %_ASM_SP + UNWIND_HINT_FUNC + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_alias_safe_ret) + +SYM_CODE_START_NOALIGN(srso_alias_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_alias_safe_ret + ud2 +SYM_CODE_END(srso_alias_return_thunk) + .popsection + +/* + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() + * above. On kernel entry, srso_untrain_ret() is executed which is a + * + * movabs $0xccccc30824648d48,%rax + * + * and when the return thunk executes the inner label srso_safe_ret() + * later, it is a stack manipulation and a RET which is mispredicted and + * thus a "safe" one to use. */ -#ifdef CONFIG_RETHUNK + .align 64 + .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc +SYM_CODE_START_LOCAL_NOALIGN(srso_untrain_ret) + ANNOTATE_NOENDBR + .byte 0x48, 0xb8 - .section .text.__x86.return_thunk +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ +SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) + lea 8(%_ASM_SP), %_ASM_SP + ret + int3 + int3 + /* end of movabs */ + lfence + call srso_safe_ret + ud2 +SYM_CODE_END(srso_safe_ret) +SYM_FUNC_END(srso_untrain_ret) + +SYM_CODE_START(srso_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret + ud2 +SYM_CODE_END(srso_return_thunk) + +#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" +#else /* !CONFIG_MITIGATION_SRSO */ +/* Dummy for the alternative in CALL_UNTRAIN_RET. */ +SYM_CODE_START(srso_alias_untrain_ret) + ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) +#define JMP_SRSO_UNTRAIN_RET "ud2" +#endif /* CONFIG_MITIGATION_SRSO */ + +#ifdef CONFIG_MITIGATION_UNRET_ENTRY + +/* + * Some generic notes on the untraining sequences: + * + * They are interchangeable when it comes to flushing potentially wrong + * RET predictions from the BTB. + * + * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the + * Retbleed sequence because the return sequence done there + * (srso_safe_ret()) is longer and the return sequence must fully nest + * (end before) the untraining sequence. Therefore, the untraining + * sequence must fully overlap the return sequence. + * + * Regarding alignment - the instructions which need to be untrained, + * must all start at a cacheline boundary for Zen1/2 generations. That + * is, instruction sequences starting at srso_safe_ret() and + * the respective instruction sequences at retbleed_return_thunk() + * must start at a cacheline boundary. + */ /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. - * 2) The instruction at zen_untrain_ret must contain, and not + * 2) The instruction at retbleed_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread * from re-poisioning the BTB prediction. */ .align 64 - .skip 63, 0xcc -SYM_FUNC_START_NOALIGN(zen_untrain_ret); - + .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc +SYM_CODE_START_LOCAL_NOALIGN(retbleed_untrain_ret) + ANNOTATE_NOENDBR /* - * As executed from zen_untrain_ret, this is: + * As executed from retbleed_untrain_ret, this is: * * TEST $0xcc, %bl * LFENCE - * JMP __x86_return_thunk + * JMP retbleed_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from __x86_return_thunk, this is a plain RET. + * As executed from retbleed_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -115,13 +336,13 @@ SYM_FUNC_START_NOALIGN(zen_untrain_ret); * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, __x86_return_thunk will suffer Straight Line Speculation + * evicted, retbleed_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) +SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(__x86_return_thunk) +SYM_CODE_END(retbleed_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -132,11 +353,102 @@ SYM_CODE_END(__x86_return_thunk) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp __x86_return_thunk + jmp retbleed_return_thunk int3 -SYM_FUNC_END(zen_untrain_ret) -__EXPORT_THUNK(zen_untrain_ret) +SYM_FUNC_END(retbleed_untrain_ret) + +#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" +#else /* !CONFIG_MITIGATION_UNRET_ENTRY */ +#define JMP_RETBLEED_UNTRAIN_RET "ud2" +#endif /* CONFIG_MITIGATION_UNRET_ENTRY */ + +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) +SYM_FUNC_START(entry_untrain_ret) + ANNOTATE_NOENDBR + ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO +SYM_FUNC_END(entry_untrain_ret) +__EXPORT_THUNK(entry_untrain_ret) + +#endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_MITIGATION_SRSO */ + +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING + + .align 64 +SYM_FUNC_START(call_depth_return_thunk) + ANNOTATE_NOENDBR + /* + * Keep the hotpath in a 16byte I-fetch for the non-debug + * case. + */ + CALL_THUNKS_DEBUG_INC_RETS + shlq $5, PER_CPU_VAR(__x86_call_depth) + jz 1f + ANNOTATE_UNRET_SAFE + ret + int3 +1: + CALL_THUNKS_DEBUG_INC_STUFFS + .rept 16 + ANNOTATE_INTRA_FUNCTION_CALL + call 2f + int3 +2: + .endr + add $(8*16), %rsp + + CREDIT_CALL_DEPTH + + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(call_depth_return_thunk) + +#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ + +#ifdef CONFIG_MITIGATION_ITS + +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(its_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(its_return_thunk) +EXPORT_SYMBOL(its_return_thunk) + +#endif /* CONFIG_MITIGATION_ITS */ + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + * + * This code is only used during kernel boot or module init. All + * 'JMP __x86_return_thunk' sites are changed to something else by + * apply_returns(). + * + * The ALTERNATIVE below adds a really loud warning to catch the case + * where the insufficient default return thunk ends up getting used for + * whatever reason like miscompilation or failure of + * objtool/alternatives/etc to patch all the return sites. + */ +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \ + defined(CONFIG_MITIGATION_SRSO) || \ + defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) + ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ + "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS +#else + ANNOTATE_UNRET_SAFE + ret +#endif + int3 +SYM_CODE_END(__x86_return_thunk) +SYM_PIC_ALIAS(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) -#endif /* CONFIG_RETHUNK */ +#endif /* CONFIG_MITIGATION_RETHUNK */ |
