summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/nospec-branch.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/nospec-branch.h')
-rw-r--r--arch/x86/include/asm/nospec-branch.h385
1 files changed, 300 insertions, 85 deletions
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c936ce9f0c47..4f4b5e8a1574 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -13,7 +13,93 @@
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
-#define RETPOLINE_THUNK_SIZE 32
+/*
+ * Call depth tracking for Intel SKL CPUs to address the RSB underflow
+ * issue in software.
+ *
+ * The tracking does not use a counter. It uses uses arithmetic shift
+ * right on call entry and logical shift left on return.
+ *
+ * The depth tracking variable is initialized to 0x8000.... when the call
+ * depth is zero. The arithmetic shift right sign extends the MSB and
+ * saturates after the 12th call. The shift count is 5 for both directions
+ * so the tracking covers 12 nested calls.
+ *
+ * Call
+ * 0: 0x8000000000000000 0x0000000000000000
+ * 1: 0xfc00000000000000 0xf000000000000000
+ * ...
+ * 11: 0xfffffffffffffff8 0xfffffffffffffc00
+ * 12: 0xffffffffffffffff 0xffffffffffffffe0
+ *
+ * After a return buffer fill the depth is credited 12 calls before the
+ * next stuffing has to take place.
+ *
+ * There is a inaccuracy for situations like this:
+ *
+ * 10 calls
+ * 5 returns
+ * 3 calls
+ * 4 returns
+ * 3 calls
+ * ....
+ *
+ * The shift count might cause this to be off by one in either direction,
+ * but there is still a cushion vs. the RSB depth. The algorithm does not
+ * claim to be perfect and it can be speculated around by the CPU, but it
+ * is considered that it obfuscates the problem enough to make exploitation
+ * extremely difficult.
+ */
+#define RET_DEPTH_SHIFT 5
+#define RSB_RET_STUFF_LOOPS 16
+#define RET_DEPTH_INIT 0x8000000000000000ULL
+#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL
+#define RET_DEPTH_CREDIT 0xffffffffffffffffULL
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+# define CALL_THUNKS_DEBUG_INC_CALLS \
+ incq PER_CPU_VAR(__x86_call_count);
+# define CALL_THUNKS_DEBUG_INC_RETS \
+ incq PER_CPU_VAR(__x86_ret_count);
+# define CALL_THUNKS_DEBUG_INC_STUFFS \
+ incq PER_CPU_VAR(__x86_stuffs_count);
+# define CALL_THUNKS_DEBUG_INC_CTXSW \
+ incq PER_CPU_VAR(__x86_ctxsw_count);
+#else
+# define CALL_THUNKS_DEBUG_INC_CALLS
+# define CALL_THUNKS_DEBUG_INC_RETS
+# define CALL_THUNKS_DEBUG_INC_STUFFS
+# define CALL_THUNKS_DEBUG_INC_CTXSW
+#endif
+
+#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
+
+#include <asm/asm-offsets.h>
+
+#define CREDIT_CALL_DEPTH \
+ movq $-1, PER_CPU_VAR(__x86_call_depth);
+
+#define RESET_CALL_DEPTH \
+ xor %eax, %eax; \
+ bts $63, %rax; \
+ movq %rax, PER_CPU_VAR(__x86_call_depth);
+
+#define RESET_CALL_DEPTH_FROM_CALL \
+ movb $0xfc, %al; \
+ shl $56, %rax; \
+ movq %rax, PER_CPU_VAR(__x86_call_depth); \
+ CALL_THUNKS_DEBUG_INC_CALLS
+
+#define INCREMENT_CALL_DEPTH \
+ sarq $5, PER_CPU_VAR(__x86_call_depth); \
+ CALL_THUNKS_DEBUG_INC_CALLS
+
+#else
+#define CREDIT_CALL_DEPTH
+#define RESET_CALL_DEPTH
+#define RESET_CALL_DEPTH_FROM_CALL
+#define INCREMENT_CALL_DEPTH
+#endif
/*
* Fill the CPU return stack buffer.
@@ -32,6 +118,7 @@
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
*/
+#define RETPOLINE_THUNK_SIZE 32
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
@@ -60,7 +147,9 @@
dec reg; \
jnz 771b; \
/* barrier for jnz misprediction */ \
- lfence;
+ lfence; \
+ CREDIT_CALL_DEPTH \
+ CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
* i386 doesn't unconditionally have LFENCE, as such it can't
@@ -87,19 +176,7 @@
add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
-#ifdef __ASSEMBLY__
-
-/*
- * This should be used immediately before an indirect jump/call. It tells
- * objtool the subsequent indirect jump/call is vouched safe for retpoline
- * builds.
- */
-.macro ANNOTATE_RETPOLINE_SAFE
- .Lannotate_\@:
- .pushsection .discard.retpoline_safe
- _ASM_PTR .Lannotate_\@
- .popsection
-.endm
+#ifdef __ASSEMBLER__
/*
* (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
@@ -109,19 +186,19 @@
/*
* Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
- * eventually turn into it's own annotation.
+ * eventually turn into its own annotation.
*/
-.macro ANNOTATE_UNRET_END
-#ifdef CONFIG_DEBUG_ENTRY
+.macro VALIDATE_UNRET_END
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
ANNOTATE_RETPOLINE_SAFE
nop
#endif
.endm
/*
- * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
- * to the retpoline thunk with a CS prefix when the register requires
- * a RAX prefix byte to encode. Also see apply_retpolines().
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
*/
.macro __CS_PREFIX reg:req
.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
@@ -135,9 +212,13 @@
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* attack.
+ *
+ * NOTE: these do not take kCFI into account and are thus not comparable to C
+ * indirect calls, take care when using. The target of these should be an ENDBR
+ * instruction irrespective of kCFI.
*/
.macro JMP_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
__CS_PREFIX \reg
jmp __x86_indirect_thunk_\reg
#else
@@ -147,7 +228,7 @@
.endm
.macro CALL_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
__CS_PREFIX \reg
call __x86_indirect_thunk_\reg
#else
@@ -162,75 +243,218 @@
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
- __stringify(__FILL_ONE_RETURN), \ftr2
+ __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
.Lskip_rsb_\@:
.endm
-#ifdef CONFIG_CPU_UNRET_ENTRY
-#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
-#else
-#define CALL_ZEN_UNTRAIN_RET ""
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
+.endm
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
* return thunk isn't mapped into the userspace tables (then again, AMD
* typically has NO_MELTDOWN).
*
- * While zen_untrain_ret() doesn't clobber anything but requires stack,
- * entry_ibpb() will clobber AX, CX, DX.
+ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
+ * write_ibpb() will clobber AX, CX, DX.
*
* As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
* where we have a stack but before any RET instruction.
*/
-.macro UNTRAIN_RET
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
- ANNOTATE_UNRET_END
+.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
+#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
+ VALIDATE_UNRET_END
+ CALL_UNTRAIN_RET
ALTERNATIVE_2 "", \
- CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ "call write_ibpb", \ibpb_feature, \
+ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+#define UNTRAIN_RET \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_VM \
+ __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_FROM_CALL \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
+
+
+.macro CALL_DEPTH_ACCOUNT
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+ ALTERNATIVE "", \
+ __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
-#else /* __ASSEMBLY__ */
+/*
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+#ifdef CONFIG_X86_64
+#define VERW verw x86_verw_sel(%rip)
+#else
+/*
+ * In 32bit mode, the memory operand must be a %cs reference. The data segments
+ * may not be usable (vm86 mode), and the stack segment may not be flat (ESPFIX32).
+ */
+#define VERW verw %cs:x86_verw_sel
+#endif
+
+/*
+ * Provide a stringified VERW macro for simple usage, and a non-stringified
+ * VERW macro for use in more elaborate sequences, e.g. to encode a conditional
+ * VERW within an ALTERNATIVE.
+ */
+#define __CLEAR_CPU_BUFFERS __stringify(VERW)
+
+/* If necessary, emit VERW on exit-to-userspace to clear CPU buffers. */
+#define CLEAR_CPU_BUFFERS \
+ ALTERNATIVE "", __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF
+
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
-#define ANNOTATE_RETPOLINE_SAFE \
- "999:\n\t" \
- ".pushsection .discard.retpoline_safe\n\t" \
- _ASM_PTR " 999b\n\t" \
- ".popsection\n\t"
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
+#else /* __ASSEMBLER__ */
+
+#define ITS_THUNK_SIZE 64
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+typedef u8 its_thunk_t[ITS_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
+extern its_thunk_t __x86_indirect_its_thunk_array[];
+#ifdef CONFIG_MITIGATION_RETHUNK
extern void __x86_return_thunk(void);
-extern void zen_untrain_ret(void);
-extern void entry_ibpb(void);
+#else
+static inline void __x86_return_thunk(void) {}
+#endif
+
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
+extern void retbleed_return_thunk(void);
+#else
+static inline void retbleed_return_thunk(void) {}
+#endif
+
+extern void srso_alias_untrain_ret(void);
+
+#ifdef CONFIG_MITIGATION_SRSO
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+#else
+static inline void srso_return_thunk(void) {}
+static inline void srso_alias_return_thunk(void) {}
+#endif
+
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_return_thunk(void);
+#else
+static inline void its_return_thunk(void) {}
+#endif
+
+extern void retbleed_return_thunk(void);
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+
+extern void entry_untrain_ret(void);
+extern void write_ibpb(void);
+
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
+extern void (*x86_return_thunk)(void);
+
+extern void __warn_thunk(void);
+
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+extern void call_depth_return_thunk(void);
+
+#define CALL_DEPTH_ACCOUNT \
+ ALTERNATIVE("", \
+ __stringify(INCREMENT_CALL_DEPTH), \
+ X86_FEATURE_CALL_DEPTH)
+
+DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+DECLARE_PER_CPU(u64, __x86_call_count);
+DECLARE_PER_CPU(u64, __x86_ret_count);
+DECLARE_PER_CPU(u64, __x86_stuffs_count);
+DECLARE_PER_CPU(u64, __x86_ctxsw_count);
+#endif
+#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
+
+static inline void call_depth_return_thunk(void) {}
+#define CALL_DEPTH_ACCOUNT ""
+
+#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
#ifdef CONFIG_X86_64
/*
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
+ */
+#define __CS_PREFIX(reg) \
+ ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\rs," reg "\n" \
+ ".byte 0x2e\n" \
+ ".endif\n" \
+ ".endr\n"
+
+/*
* Inline asm uses the %V modifier which is only in newer GCC
- * which is ensured when CONFIG_RETPOLINE is defined.
+ * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
*/
-# define CALL_NOSPEC \
- ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE, \
- "lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_LFENCE)
+#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \
+ "call __x86_indirect_thunk_%V[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
@@ -242,7 +466,7 @@ extern void entry_ibpb(void);
*/
# define CALL_NOSPEC \
ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
+ ANNOTATE_RETPOLINE_SAFE "\n" \
"call *%[thunk_target]\n", \
" jmp 904f;\n" \
" .align 16\n" \
@@ -258,7 +482,7 @@ extern void entry_ibpb(void);
"904: call 901b;\n", \
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
+ ANNOTATE_RETPOLINE_SAFE "\n" \
"call *%[thunk_target]\n", \
X86_FEATURE_RETPOLINE_LFENCE)
@@ -292,14 +516,12 @@ enum spectre_v2_user_mitigation {
/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_AUTO,
SPEC_STORE_BYPASS_DISABLE,
SPEC_STORE_BYPASS_PRCTL,
SPEC_STORE_BYPASS_SECCOMP,
};
-extern char __indirect_thunk_start[];
-extern char __indirect_thunk_end[];
-
static __always_inline
void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
{
@@ -311,17 +533,19 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
: "memory");
}
+DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
+
static inline void indirect_branch_prediction_barrier(void)
{
- u64 val = PRED_CMD_IBPB;
-
- alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
+ asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
+ : ASM_CALL_CONSTRAINT
+ :: "rax", "rcx", "rdx", "memory");
}
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
-extern void write_spec_ctrl_current(u64 val, bool force);
+extern void update_spec_ctrl_cond(u64 val);
extern u64 spec_ctrl_current(void);
/*
@@ -352,23 +576,24 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
-DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
+
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
-DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+extern u16 x86_verw_sel;
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static __always_inline void mds_clear_cpu_buffers(void)
+static __always_inline void x86_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -385,27 +610,17 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
- if (static_branch_likely(&mds_user_clear))
- mds_clear_cpu_buffers();
-}
-
-/**
- * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void x86_idle_clear_cpu_buffers(void)
{
- if (static_branch_likely(&mds_idle_clear))
- mds_clear_cpu_buffers();
+ if (static_branch_likely(&cpu_buf_idle_clear))
+ x86_clear_cpu_buffers();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */