summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/alternative.h8
-rw-r--r--arch/x86/kernel/alternative.c145
-rw-r--r--arch/x86/lib/retpoline.S15
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--tools/objtool/arch/x86/decode.c9
5 files changed, 159 insertions, 20 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 47948ebbb409..f2294784babc 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -6,6 +6,7 @@
#include <linux/stringify.h>
#include <linux/objtool.h>
#include <asm/asm.h>
+#include <asm/bug.h>
#define ALT_FLAGS_SHIFT 16
@@ -128,10 +129,17 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
extern void its_init_mod(struct module *mod);
extern void its_fini_mod(struct module *mod);
extern void its_free_mod(struct module *mod);
+extern u8 *its_static_thunk(int reg);
#else /* CONFIG_MITIGATION_ITS */
static inline void its_init_mod(struct module *mod) { }
static inline void its_fini_mod(struct module *mod) { }
static inline void its_free_mod(struct module *mod) { }
+static inline u8 *its_static_thunk(int reg)
+{
+ WARN_ONCE(1, "ITS not compiled in");
+
+ return NULL;
+}
#endif
#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7a10e3ed5d0b..48fd04e90114 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -127,6 +127,10 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
#endif
};
+#ifdef CONFIG_FINEIBT
+static bool cfi_paranoid __ro_after_init;
+#endif
+
#ifdef CONFIG_MITIGATION_ITS
static struct module *its_mod;
@@ -137,8 +141,25 @@ static unsigned int its_offset;
static void *its_init_thunk(void *thunk, int reg)
{
u8 *bytes = thunk;
+ int offset = 0;
int i = 0;
+#ifdef CONFIG_FINEIBT
+ if (cfi_paranoid) {
+ /*
+ * When ITS uses indirect branch thunk the fineibt_paranoid
+ * caller sequence doesn't fit in the caller site. So put the
+ * remaining part of the sequence (<ea> + JNE) into the ITS
+ * thunk.
+ */
+ bytes[i++] = 0xea; /* invalid instruction */
+ bytes[i++] = 0x75; /* JNE */
+ bytes[i++] = 0xfd;
+
+ offset = 1;
+ }
+#endif
+
if (reg >= 8) {
bytes[i++] = 0x41; /* REX.B prefix */
reg -= 8;
@@ -147,7 +168,7 @@ static void *its_init_thunk(void *thunk, int reg)
bytes[i++] = 0xe0 + reg; /* jmp *reg */
bytes[i++] = 0xcc;
- return thunk;
+ return thunk + offset;
}
void its_init_mod(struct module *mod)
@@ -217,6 +238,17 @@ static void *its_allocate_thunk(int reg)
int size = 3 + (reg / 8);
void *thunk;
+#ifdef CONFIG_FINEIBT
+ /*
+ * The ITS thunk contains an indirect jump and an int3 instruction so
+ * its size is 3 or 4 bytes depending on the register used. If CFI
+ * paranoid is used then 3 extra bytes are added in the ITS thunk to
+ * complete the fineibt_paranoid caller sequence.
+ */
+ if (cfi_paranoid)
+ size += 3;
+#endif
+
if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) {
its_page = its_alloc();
if (!its_page) {
@@ -240,6 +272,18 @@ static void *its_allocate_thunk(int reg)
return its_init_thunk(thunk, reg);
}
+u8 *its_static_thunk(int reg)
+{
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+
+#ifdef CONFIG_FINEIBT
+ /* Paranoid thunk starts 2 bytes before */
+ if (cfi_paranoid)
+ return thunk - 2;
+#endif
+ return thunk;
+}
+
#endif
/*
@@ -775,8 +819,17 @@ static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
/* Lower-half of the cacheline? */
return !(addr & 0x20);
}
+#else /* CONFIG_MITIGATION_ITS */
+
+#ifdef CONFIG_FINEIBT
+static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
+{
+ return false;
+}
#endif
+#endif /* CONFIG_MITIGATION_ITS */
+
/*
* Rewrite the compiler generated retpoline thunk calls.
*
@@ -893,6 +946,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
int len, ret;
u8 bytes[16];
u8 op1, op2;
+ u8 *dest;
ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
@@ -909,6 +963,12 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
+ /* Check for cfi_paranoid + ITS */
+ dest = addr + insn.length + insn.immediate.value;
+ if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) {
+ WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
+ continue;
+ }
break;
case 0x0f: /* escape */
@@ -1198,8 +1258,6 @@ int cfi_get_func_arity(void *func)
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;
-static bool cfi_paranoid __ro_after_init = false;
-
/*
* Re-hash the CFI hash with a boot-time seed while making sure the result is
* not a valid ENDBR instruction.
@@ -1612,6 +1670,19 @@ static int cfi_rand_callers(s32 *start, s32 *end)
return 0;
}
+static int emit_paranoid_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ u8 *thunk = (void *)__x86_indirect_its_thunk_array[reg] - 2;
+
+#ifdef CONFIG_MITIGATION_ITS
+ u8 *tmp = its_allocate_thunk(reg);
+ if (tmp)
+ thunk = tmp;
+#endif
+
+ return __emit_trampoline(addr, insn, bytes, thunk, thunk);
+}
+
static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
@@ -1653,9 +1724,14 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size);
memcpy(bytes + fineibt_caller_hash, &hash, 4);
- ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
- if (WARN_ON_ONCE(ret != 3))
- continue;
+ if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + fineibt_paranoid_ind, 11)) {
+ emit_paranoid_trampoline(addr + fineibt_caller_size,
+ &insn, 11, bytes + fineibt_caller_size);
+ } else {
+ ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
+ if (WARN_ON_ONCE(ret != 3))
+ continue;
+ }
text_poke_early(addr, bytes, fineibt_paranoid_size);
}
@@ -1882,29 +1958,66 @@ Efault:
return false;
}
+static bool is_paranoid_thunk(unsigned long addr)
+{
+ u32 thunk;
+
+ __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault);
+ return (thunk & 0x00FFFFFF) == 0xfd75ea;
+
+Efault:
+ return false;
+}
+
/*
* regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
- * sequence.
+ * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS
+ * thunk.
*/
static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
{
unsigned long addr = regs->ip - fineibt_paranoid_ud;
- u32 hash;
- if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2))
+ if (!cfi_paranoid)
return false;
- __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault);
- *target = regs->r11 + fineibt_preamble_size;
- *type = regs->r10;
+ if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) {
+ *target = regs->r11 + fineibt_preamble_size;
+ *type = regs->r10;
+
+ /*
+ * Since the trapping instruction is the exact, but LOCK prefixed,
+ * Jcc.d8 that got us here, the normal fixup will work.
+ */
+ return true;
+ }
/*
- * Since the trapping instruction is the exact, but LOCK prefixed,
- * Jcc.d8 that got us here, the normal fixup will work.
+ * The cfi_paranoid + ITS thunk combination results in:
+ *
+ * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
+ * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
+ * a: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11
+ *
+ * Where the paranoid_thunk looks like:
+ *
+ * 1d: <ea> (bad)
+ * __x86_indirect_paranoid_thunk_r11:
+ * 1e: 75 fd jne 1d
+ * __x86_indirect_its_thunk_r11:
+ * 20: 41 ff eb jmp *%r11
+ * 23: cc int3
+ *
*/
- return true;
+ if (is_paranoid_thunk(regs->ip)) {
+ *target = regs->r11 + fineibt_preamble_size;
+ *type = regs->r10;
+
+ regs->ip = *target;
+ return true;
+ }
-Efault:
return false;
}
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index ebca28fe7e31..39374949daa2 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -371,6 +371,15 @@ SYM_FUNC_END(call_depth_return_thunk)
.macro ITS_THUNK reg
+/*
+ * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b)
+ * that complete the fineibt_paranoid caller sequence.
+ */
+1: .byte 0xea
+SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ jne 1b
SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
@@ -378,19 +387,19 @@ SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL)
jmp *%\reg
int3
.align 32, 0xcc /* fill to the end of the line */
- .skip 32, 0xcc /* skip to the next upper half */
+ .skip 32 - (__x86_indirect_its_thunk_\reg - 1b), 0xcc /* skip to the next upper half */
.endm
/* ITS mitigation requires thunks be aligned to upper half of cacheline */
.align 64, 0xcc
-.skip 32, 0xcc
-SYM_CODE_START(__x86_indirect_its_thunk_array)
+.skip 29, 0xcc
#define GEN(reg) ITS_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN
.align 64, 0xcc
+SYM_FUNC_ALIAS(__x86_indirect_its_thunk_array, __x86_indirect_its_thunk_rax)
SYM_CODE_END(__x86_indirect_its_thunk_array)
.align 64, 0xcc
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a5b65c09910b..a31e58c6d89e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -663,7 +663,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
OPTIMIZER_HIDE_VAR(reg);
- emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip);
+ emit_jump(&prog, its_static_thunk(reg), ip);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 3ce7b54003c2..687c5eafb49a 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op2 = ins.opcode.bytes[1];
op3 = ins.opcode.bytes[2];
+ /*
+ * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
+ */
+ if (op1 == 0xea) {
+ insn->len = 1;
+ insn->type = INSN_BUG;
+ return 0;
+ }
+
if (ins.rex_prefix.nbytes) {
rex = ins.rex_prefix.bytes[0];
rex_w = X86_REX_W(rex) >> 3;