summaryrefslogtreecommitdiff
path: root/arch/x86/lib/retpoline.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib/retpoline.S')
-rw-r--r--arch/x86/lib/retpoline.S137
1 files changed, 95 insertions, 42 deletions
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 2cff585f22f2..cd86aeb5fdd3 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -13,7 +13,7 @@
#include <asm/frame.h>
#include <asm/nops.h>
- .section .text.__x86.indirect_thunk
+ .section .text..__x86.indirect_thunk
.macro POLINE reg
@@ -133,75 +133,106 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
#ifdef CONFIG_RETHUNK
/*
- * srso_untrain_ret_alias() and srso_safe_ret_alias() are placed at
+ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
* special addresses:
*
- * - srso_untrain_ret_alias() is 2M aligned
- * - srso_safe_ret_alias() is also in the same 2M page but bits 2, 8, 14
+ * - srso_alias_untrain_ret() is 2M aligned
+ * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
* and 20 in its virtual address are set (while those bits in the
- * srso_untrain_ret_alias() function are cleared).
+ * srso_alias_untrain_ret() function are cleared).
*
* This guarantees that those two addresses will alias in the branch
* target buffer of Zen3/4 generations, leading to any potential
* poisoned entries at that BTB slot to get evicted.
*
- * As a result, srso_safe_ret_alias() becomes a safe return.
+ * As a result, srso_alias_safe_ret() becomes a safe return.
*/
#ifdef CONFIG_CPU_SRSO
- .section .text.__x86.rethunk_untrain
+ .section .text..__x86.rethunk_untrain
-SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
ASM_NOP2
lfence
- jmp __x86_return_thunk
-SYM_FUNC_END(srso_untrain_ret_alias)
-__EXPORT_THUNK(srso_untrain_ret_alias)
-
- .section .text.__x86.rethunk_safe
+ jmp srso_alias_return_thunk
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
+
+ .section .text..__x86.rethunk_safe
+#else
+/* dummy definition for alternatives */
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_FUNC_END(srso_alias_untrain_ret)
#endif
-/* Needs a definition for the __x86_return_thunk alternative below. */
-SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
-#ifdef CONFIG_CPU_SRSO
- add $8, %_ASM_SP
+SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ lea 8(%_ASM_SP), %_ASM_SP
UNWIND_HINT_FUNC
-#endif
ANNOTATE_UNRET_SAFE
ret
int3
-SYM_FUNC_END(srso_safe_ret_alias)
+SYM_FUNC_END(srso_alias_safe_ret)
- .section .text.__x86.return_thunk
+ .section .text..__x86.return_thunk
+
+SYM_CODE_START(srso_alias_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ call srso_alias_safe_ret
+ ud2
+SYM_CODE_END(srso_alias_return_thunk)
+
+/*
+ * Some generic notes on the untraining sequences:
+ *
+ * They are interchangeable when it comes to flushing potentially wrong
+ * RET predictions from the BTB.
+ *
+ * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
+ * Retbleed sequence because the return sequence done there
+ * (srso_safe_ret()) is longer and the return sequence must fully nest
+ * (end before) the untraining sequence. Therefore, the untraining
+ * sequence must fully overlap the return sequence.
+ *
+ * Regarding alignment - the instructions which need to be untrained,
+ * must all start at a cacheline boundary for Zen1/2 generations. That
+ * is, instruction sequences starting at srso_safe_ret() and
+ * the respective instruction sequences at retbleed_return_thunk()
+ * must start at a cacheline boundary.
+ */
/*
* Safety details here pertain to the AMD Zen{1,2} microarchitecture:
- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
* alignment within the BTB.
- * 2) The instruction at zen_untrain_ret must contain, and not
+ * 2) The instruction at retbleed_untrain_ret must contain, and not
* end with, the 0xc3 byte of the RET.
* 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
* from re-poisioning the BTB prediction.
*/
.align 64
- .skip 64 - (__ret - zen_untrain_ret), 0xcc
-SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
+SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
/*
- * As executed from zen_untrain_ret, this is:
+ * As executed from retbleed_untrain_ret, this is:
*
* TEST $0xcc, %bl
* LFENCE
- * JMP __x86_return_thunk
+ * JMP retbleed_return_thunk
*
* Executing the TEST instruction has a side effect of evicting any BTB
* prediction (potentially attacker controlled) attached to the RET, as
- * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
*/
.byte 0xf6
/*
- * As executed from __x86_return_thunk, this is a plain RET.
+ * As executed from retbleed_return_thunk, this is a plain RET.
*
* As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
*
@@ -213,13 +244,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
* With SMT enabled and STIBP active, a sibling thread cannot poison
* RET's prediction to a type of its choice, but can evict the
* prediction due to competitive sharing. If the prediction is
- * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * evicted, retbleed_return_thunk will suffer Straight Line Speculation
* which will be contained safely by the INT3.
*/
-SYM_INNER_LABEL(__ret, SYM_L_GLOBAL)
+SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
ret
int3
-SYM_CODE_END(__ret)
+SYM_CODE_END(retbleed_return_thunk)
/*
* Ensure the TEST decoding / BTB invalidation is complete.
@@ -230,16 +261,16 @@ SYM_CODE_END(__ret)
* Jump back and execute the RET in the middle of the TEST instruction.
* INT3 is for SLS protection.
*/
- jmp __ret
+ jmp retbleed_return_thunk
int3
-SYM_FUNC_END(zen_untrain_ret)
-__EXPORT_THUNK(zen_untrain_ret)
+SYM_FUNC_END(retbleed_untrain_ret)
+__EXPORT_THUNK(retbleed_untrain_ret)
/*
- * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret()
+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
* above. On kernel entry, srso_untrain_ret() is executed which is a
*
- * movabs $0xccccccc308c48348,%rax
+ * movabs $0xccccc30824648d48,%rax
*
* and when the return thunk executes the inner label srso_safe_ret()
* later, it is a stack manipulation and a RET which is mispredicted and
@@ -251,22 +282,44 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
.byte 0x48, 0xb8
+/*
+ * This forces the function return instruction to speculate into a trap
+ * (UD2 in srso_return_thunk() below). This RET will then mispredict
+ * and execution will continue at the return site read from the top of
+ * the stack.
+ */
SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
- add $8, %_ASM_SP
+ lea 8(%_ASM_SP), %_ASM_SP
ret
int3
int3
- int3
+ /* end of movabs */
lfence
call srso_safe_ret
- int3
+ ud2
SYM_CODE_END(srso_safe_ret)
SYM_FUNC_END(srso_untrain_ret)
__EXPORT_THUNK(srso_untrain_ret)
-SYM_FUNC_START(__x86_return_thunk)
- ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \
- "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS
+SYM_CODE_START(srso_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ call srso_safe_ret
+ ud2
+SYM_CODE_END(srso_return_thunk)
+
+SYM_FUNC_START(entry_untrain_ret)
+ ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
+ "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
+ "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+SYM_FUNC_END(entry_untrain_ret)
+__EXPORT_THUNK(entry_untrain_ret)
+
+SYM_CODE_START(__x86_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
+ ret
int3
SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)