summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/alternative.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/alternative.c')
-rw-r--r--arch/x86/kernel/alternative.c651
1 files changed, 510 insertions, 141 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 45a280f2161c..bf82c6f7d690 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -125,6 +125,20 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
};
/*
+ * Nomenclature for variable names to simplify and clarify this code and ease
+ * any potential staring at it:
+ *
+ * @instr: source address of the original instructions in the kernel text as
+ * generated by the compiler.
+ *
+ * @buf: temporary buffer on which the patching operates. This buffer is
+ * eventually text-poked into the kernel image.
+ *
+ * @replacement/@repl: pointer to the opcodes which are replacing @instr, located
+ * in the .altinstr_replacement section.
+ */
+
+/*
* Fill the buffer with a single effective instruction of size @len.
*
* In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info)
@@ -133,28 +147,28 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
* each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
* *jump* over instead of executing long and daft NOPs.
*/
-static void add_nop(u8 *instr, unsigned int len)
+static void add_nop(u8 *buf, unsigned int len)
{
- u8 *target = instr + len;
+ u8 *target = buf + len;
if (!len)
return;
if (len <= ASM_NOP_MAX) {
- memcpy(instr, x86_nops[len], len);
+ memcpy(buf, x86_nops[len], len);
return;
}
if (len < 128) {
- __text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE);
- instr += JMP8_INSN_SIZE;
+ __text_gen_insn(buf, JMP8_INSN_OPCODE, buf, target, JMP8_INSN_SIZE);
+ buf += JMP8_INSN_SIZE;
} else {
- __text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE);
- instr += JMP32_INSN_SIZE;
+ __text_gen_insn(buf, JMP32_INSN_OPCODE, buf, target, JMP32_INSN_SIZE);
+ buf += JMP32_INSN_SIZE;
}
- for (;instr < target; instr++)
- *instr = INT3_INSN_OPCODE;
+ for (;buf < target; buf++)
+ *buf = INT3_INSN_OPCODE;
}
extern s32 __retpoline_sites[], __retpoline_sites_end[];
@@ -187,12 +201,12 @@ static bool insn_is_nop(struct insn *insn)
* Find the offset of the first non-NOP instruction starting at @offset
* but no further than @len.
*/
-static int skip_nops(u8 *instr, int offset, int len)
+static int skip_nops(u8 *buf, int offset, int len)
{
struct insn insn;
for (; offset < len; offset += insn.length) {
- if (insn_decode_kernel(&insn, &instr[offset]))
+ if (insn_decode_kernel(&insn, &buf[offset]))
break;
if (!insn_is_nop(&insn))
@@ -203,66 +217,32 @@ static int skip_nops(u8 *instr, int offset, int len)
}
/*
- * Optimize a sequence of NOPs, possibly preceded by an unconditional jump
- * to the end of the NOP sequence into a single NOP.
- */
-static bool
-__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target)
-{
- int i = *next - insn->length;
-
- switch (insn->opcode.bytes[0]) {
- case JMP8_INSN_OPCODE:
- case JMP32_INSN_OPCODE:
- *prev = i;
- *target = *next + insn->immediate.value;
- return false;
- }
-
- if (insn_is_nop(insn)) {
- int nop = i;
-
- *next = skip_nops(instr, *next, len);
- if (*target && *next == *target)
- nop = *prev;
-
- add_nop(instr + nop, *next - nop);
- DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next);
- return true;
- }
-
- *target = 0;
- return false;
-}
-
-/*
* "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification.
*/
-static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
+static void noinline optimize_nops(const u8 * const instr, u8 *buf, size_t len)
{
- int prev, target = 0;
-
for (int next, i = 0; i < len; i = next) {
struct insn insn;
- if (insn_decode_kernel(&insn, &instr[i]))
+ if (insn_decode_kernel(&insn, &buf[i]))
return;
next = i + insn.length;
- __optimize_nops(instr, len, &insn, &next, &prev, &target);
- }
-}
+ if (insn_is_nop(&insn)) {
+ int nop = i;
-static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len)
-{
- unsigned long flags;
+ /* Has the NOP already been optimized? */
+ if (i + insn.length == len)
+ return;
- local_irq_save(flags);
- optimize_nops(instr, len);
- sync_core();
- local_irq_restore(flags);
+ next = skip_nops(buf, next, len);
+
+ add_nop(buf + nop, next - nop);
+ DUMP_BYTES(ALT, buf, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, next);
+ }
+ }
}
/*
@@ -335,11 +315,9 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
return (target < src || target > src + src_len);
}
-void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
+static void __apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len)
{
- int prev, target = 0;
-
- for (int next, i = 0; i < len; i = next) {
+ for (int next, i = 0; i < instrlen; i = next) {
struct insn insn;
if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
@@ -347,9 +325,6 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
next = i + insn.length;
- if (__optimize_nops(buf, len, &insn, &next, &prev, &target))
- continue;
-
switch (insn.opcode.bytes[0]) {
case 0x0f:
if (insn.opcode.bytes[1] < 0x80 ||
@@ -361,10 +336,10 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
case JMP8_INSN_OPCODE:
case JMP32_INSN_OPCODE:
case CALL_INSN_OPCODE:
- if (need_reloc(next + insn.immediate.value, src, src_len)) {
+ if (need_reloc(next + insn.immediate.value, repl, repl_len)) {
apply_reloc(insn.immediate.nbytes,
buf + i + insn_offset_immediate(&insn),
- src - dest);
+ repl - instr);
}
/*
@@ -372,7 +347,7 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
*/
if (insn.opcode.bytes[0] == JMP32_INSN_OPCODE) {
s32 imm = insn.immediate.value;
- imm += src - dest;
+ imm += repl - instr;
imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE;
if ((imm >> 31) == (imm >> 7)) {
buf[i+0] = JMP8_INSN_OPCODE;
@@ -385,15 +360,21 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
}
if (insn_rip_relative(&insn)) {
- if (need_reloc(next + insn.displacement.value, src, src_len)) {
+ if (need_reloc(next + insn.displacement.value, repl, repl_len)) {
apply_reloc(insn.displacement.nbytes,
buf + i + insn_offset_displacement(&insn),
- src - dest);
+ repl - instr);
}
}
}
}
+void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len)
+{
+ __apply_relocation(buf, instr, instrlen, repl, repl_len);
+ optimize_nops(instr, buf, instrlen);
+}
+
/* Low-level backend functions usable from alternative code replacements. */
DEFINE_ASM_FUNC(nop_func, "", .entry.text);
EXPORT_SYMBOL_GPL(nop_func);
@@ -451,6 +432,11 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
return 5;
}
+static inline u8 * instr_va(struct alt_instr *i)
+{
+ return (u8 *)&i->instr_offset + i->instr_offset;
+}
+
/*
* Replace instructions with better alternatives for this CPU type. This runs
* before SMP is initialized to avoid SMP problems with self modifying code.
@@ -464,9 +450,9 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
void __init_or_module noinline apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
- struct alt_instr *a;
- u8 *instr, *replacement;
u8 insn_buff[MAX_PATCH_LEN];
+ u8 *instr, *replacement;
+ struct alt_instr *a, *b;
DPRINTK(ALT, "alt table %px, -> %px", start, end);
@@ -492,7 +478,18 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
- instr = (u8 *)&a->instr_offset + a->instr_offset;
+ /*
+ * In case of nested ALTERNATIVE()s the outer alternative might
+ * add more padding. To ensure consistent patching find the max
+ * padding for all alt_instr entries for this site (nested
+ * alternatives result in consecutive entries).
+ */
+ for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
+ u8 len = max(a->instrlen, b->instrlen);
+ a->instrlen = b->instrlen = len;
+ }
+
+ instr = instr_va(a);
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
@@ -504,7 +501,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* patch if feature is *NOT* present.
*/
if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
- optimize_nops_inplace(instr, a->instrlen);
+ memcpy(insn_buff, instr, a->instrlen);
+ optimize_nops(instr, insn_buff, a->instrlen);
+ text_poke_early(instr, insn_buff, a->instrlen);
continue;
}
@@ -526,7 +525,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
insn_buff[insn_buff_sz] = 0x90;
- apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen);
+ apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
@@ -742,6 +741,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
op2 = insn.opcode.bytes[1];
switch (op1) {
+ case 0x70 ... 0x7f: /* Jcc.d8 */
+ /* See cfi_paranoid. */
+ WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
+ continue;
+
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
break;
@@ -761,7 +765,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
- optimize_nops(bytes, len);
+ optimize_nops(addr, bytes, len);
DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr);
text_poke_early(addr, bytes, len);
@@ -840,32 +844,53 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
}
}
}
-#else
+#else /* !CONFIG_MITIGATION_RETHUNK: */
void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
-#endif /* CONFIG_MITIGATION_RETHUNK */
+#endif /* !CONFIG_MITIGATION_RETHUNK */
#else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
-#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */
+#endif /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */
#ifdef CONFIG_X86_KERNEL_IBT
-static void poison_cfi(void *addr);
+__noendbr bool is_endbr(u32 *val)
+{
+ u32 endbr;
+
+ __get_kernel_nofault(&endbr, val, u32, Efault);
+ return __is_endbr(endbr);
+
+Efault:
+ return false;
+}
-static void __init_or_module poison_endbr(void *addr, bool warn)
+#ifdef CONFIG_FINEIBT
+
+static __noendbr bool exact_endbr(u32 *val)
{
- u32 endbr, poison = gen_endbr_poison();
+ u32 endbr;
- if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
- return;
+ __get_kernel_nofault(&endbr, val, u32, Efault);
+ return endbr == gen_endbr();
+
+Efault:
+ return false;
+}
+
+#endif
- if (!is_endbr(endbr)) {
- WARN_ON_ONCE(warn);
+static void poison_cfi(void *addr);
+
+static void __init_or_module poison_endbr(void *addr)
+{
+ u32 poison = gen_endbr_poison();
+
+ if (WARN_ON_ONCE(!is_endbr(addr)))
return;
- }
DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr);
@@ -890,28 +915,32 @@ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- poison_endbr(addr, true);
+ poison_endbr(addr);
if (IS_ENABLED(CONFIG_FINEIBT))
poison_cfi(addr - 16);
}
}
-#else
+#else /* !CONFIG_X86_KERNEL_IBT: */
void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
-#endif /* CONFIG_X86_KERNEL_IBT */
+#endif /* !CONFIG_X86_KERNEL_IBT */
-#ifdef CONFIG_FINEIBT
-#define __CFI_DEFAULT CFI_DEFAULT
+#ifdef CONFIG_CFI_AUTO_DEFAULT
+# define __CFI_DEFAULT CFI_AUTO
#elif defined(CONFIG_CFI_CLANG)
-#define __CFI_DEFAULT CFI_KCFI
+# define __CFI_DEFAULT CFI_KCFI
#else
-#define __CFI_DEFAULT CFI_OFF
+# define __CFI_DEFAULT CFI_OFF
#endif
enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
+#ifdef CONFIG_FINEIBT_BHI
+bool cfi_bhi __ro_after_init = false;
+#endif
+
#ifdef CONFIG_CFI_CLANG
struct bpf_insn;
@@ -919,11 +948,7 @@ struct bpf_insn;
extern unsigned int __bpf_prog_runX(const void *ctx,
const struct bpf_insn *insn);
-/*
- * Force a reference to the external symbol so the compiler generates
- * __kcfi_typid.
- */
-__ADDRESSABLE(__bpf_prog_runX);
+KCFI_REFERENCE(__bpf_prog_runX);
/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
asm (
@@ -940,7 +965,7 @@ asm (
/* Must match bpf_callback_t */
extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64);
-__ADDRESSABLE(__bpf_callback_fn);
+KCFI_REFERENCE(__bpf_callback_fn);
/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */
asm (
@@ -975,6 +1000,21 @@ u32 cfi_get_func_hash(void *func)
return hash;
}
+
+int cfi_get_func_arity(void *func)
+{
+ bhi_thunk *target;
+ s32 disp;
+
+ if (cfi_mode != CFI_FINEIBT && !cfi_bhi)
+ return 0;
+
+ if (get_kernel_nofault(disp, func - 4))
+ return 0;
+
+ target = func + disp;
+ return target - __bhi_args;
+}
#endif
#ifdef CONFIG_FINEIBT
@@ -982,6 +1022,8 @@ u32 cfi_get_func_hash(void *func)
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;
+static bool cfi_paranoid __ro_after_init = false;
+
/*
* Re-hash the CFI hash with a boot-time seed while making sure the result is
* not a valid ENDBR instruction.
@@ -989,7 +1031,7 @@ static u32 cfi_seed __ro_after_init;
static u32 cfi_rehash(u32 hash)
{
hash ^= cfi_seed;
- while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
+ while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) {
bool lsb = hash & 1;
hash >>= 1;
if (lsb)
@@ -1011,7 +1053,7 @@ static __init int cfi_parse_cmdline(char *str)
}
if (!strcmp(str, "auto")) {
- cfi_mode = CFI_DEFAULT;
+ cfi_mode = CFI_AUTO;
} else if (!strcmp(str, "off")) {
cfi_mode = CFI_OFF;
cfi_rand = false;
@@ -1021,6 +1063,25 @@ static __init int cfi_parse_cmdline(char *str)
cfi_mode = CFI_FINEIBT;
} else if (!strcmp(str, "norand")) {
cfi_rand = false;
+ } else if (!strcmp(str, "warn")) {
+ pr_alert("CFI mismatch non-fatal!\n");
+ cfi_warn = true;
+ } else if (!strcmp(str, "paranoid")) {
+ if (cfi_mode == CFI_FINEIBT) {
+ cfi_paranoid = true;
+ } else {
+ pr_err("Ignoring paranoid; depends on fineibt.\n");
+ }
+ } else if (!strcmp(str, "bhi")) {
+#ifdef CONFIG_FINEIBT_BHI
+ if (cfi_mode == CFI_FINEIBT) {
+ cfi_bhi = true;
+ } else {
+ pr_err("Ignoring bhi; depends on fineibt.\n");
+ }
+#else
+ pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n");
+#endif
} else {
pr_err("Ignoring unknown cfi option (%s).", str);
}
@@ -1038,9 +1099,9 @@ early_param("cfi", cfi_parse_cmdline);
* __cfi_\func: __cfi_\func:
* movl $0x12345678,%eax // 5 endbr64 // 4
* nop subl $0x12345678,%r10d // 7
- * nop jz 1f // 2
- * nop ud2 // 2
- * nop 1: nop // 1
+ * nop jne __cfi_\func+6 // 2
+ * nop nop3 // 3
+ * nop
* nop
* nop
* nop
@@ -1052,34 +1113,53 @@ early_param("cfi", cfi_parse_cmdline);
*
* caller: caller:
* movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6
- * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4
+ * addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4
* je 1f // 2 nop4 // 4
* ud2 // 2
- * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5
+ * 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6
*
*/
-asm( ".pushsection .rodata \n"
- "fineibt_preamble_start: \n"
- " endbr64 \n"
- " subl $0x12345678, %r10d \n"
- " je fineibt_preamble_end \n"
- " ud2 \n"
- " nop \n"
- "fineibt_preamble_end: \n"
+/*
+ * <fineibt_preamble_start>:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d
+ * b: 75 f9 jne 6 <fineibt_preamble_start+0x6>
+ * d: 0f 1f 00 nopl (%rax)
+ *
+ * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as
+ * (bad) on x86_64 and raises #UD.
+ */
+asm( ".pushsection .rodata \n"
+ "fineibt_preamble_start: \n"
+ " endbr64 \n"
+ " subl $0x12345678, %r10d \n"
+ "fineibt_preamble_bhi: \n"
+ " jne fineibt_preamble_start+6 \n"
+ ASM_NOP3
+ "fineibt_preamble_end: \n"
".popsection\n"
);
extern u8 fineibt_preamble_start[];
+extern u8 fineibt_preamble_bhi[];
extern u8 fineibt_preamble_end[];
#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
+#define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start)
+#define fineibt_preamble_ud 6
#define fineibt_preamble_hash 7
+/*
+ * <fineibt_caller_start>:
+ * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
+ * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * a: 0f 1f 40 00 nopl 0x0(%rax)
+ */
asm( ".pushsection .rodata \n"
"fineibt_caller_start: \n"
" movl $0x12345678, %r10d \n"
- " sub $16, %r11 \n"
+ " lea -0x10(%r11), %r11 \n"
ASM_NOP4
"fineibt_caller_end: \n"
".popsection \n"
@@ -1093,13 +1173,62 @@ extern u8 fineibt_caller_end[];
#define fineibt_caller_jmp (fineibt_caller_size - 2)
-static u32 decode_preamble_hash(void *addr)
+/*
+ * Since FineIBT does hash validation on the callee side it is prone to
+ * circumvention attacks where a 'naked' ENDBR instruction exists that
+ * is not part of the fineibt_preamble sequence.
+ *
+ * Notably the x86 entry points must be ENDBR and equally cannot be
+ * fineibt_preamble.
+ *
+ * The fineibt_paranoid caller sequence adds additional caller side
+ * hash validation. This stops such circumvention attacks dead, but at the cost
+ * of adding a load.
+ *
+ * <fineibt_paranoid_start>:
+ * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
+ * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
+ * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11
+ * e: 75 fd jne d <fineibt_paranoid_start+0xd>
+ * 10: 41 ff d3 call *%r11
+ * 13: 90 nop
+ *
+ * Notably LEA does not modify flags and can be reordered with the CMP,
+ * avoiding a dependency. Again, using a non-taken (backwards) branch
+ * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the
+ * Jcc.d8, causing #UD.
+ */
+asm( ".pushsection .rodata \n"
+ "fineibt_paranoid_start: \n"
+ " movl $0x12345678, %r10d \n"
+ " cmpl -9(%r11), %r10d \n"
+ " lea -0x10(%r11), %r11 \n"
+ " jne fineibt_paranoid_start+0xd \n"
+ "fineibt_paranoid_ind: \n"
+ " call *%r11 \n"
+ " nop \n"
+ "fineibt_paranoid_end: \n"
+ ".popsection \n"
+);
+
+extern u8 fineibt_paranoid_start[];
+extern u8 fineibt_paranoid_ind[];
+extern u8 fineibt_paranoid_end[];
+
+#define fineibt_paranoid_size (fineibt_paranoid_end - fineibt_paranoid_start)
+#define fineibt_paranoid_ind (fineibt_paranoid_ind - fineibt_paranoid_start)
+#define fineibt_paranoid_ud 0xd
+
+static u32 decode_preamble_hash(void *addr, int *reg)
{
u8 *p = addr;
- /* b8 78 56 34 12 mov $0x12345678,%eax */
- if (p[0] == 0xb8)
+ /* b8+reg 78 56 34 12 movl $0x12345678,\reg */
+ if (p[0] >= 0xb8 && p[0] < 0xc0) {
+ if (reg)
+ *reg = p[0] - 0xb8;
return *(u32 *)(addr + 1);
+ }
return 0; /* invalid hash value */
}
@@ -1108,11 +1237,11 @@ static u32 decode_caller_hash(void *addr)
{
u8 *p = addr;
- /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */
+ /* 41 ba 88 a9 cb ed mov $(-0x12345678),%r10d */
if (p[0] == 0x41 && p[1] == 0xba)
return -*(u32 *)(addr + 2);
- /* e8 0c 78 56 34 12 jmp.d8 +12 */
+ /* e8 0c 88 a9 cb ed jmp.d8 +12 */
if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp)
return -*(u32 *)(addr + 2);
@@ -1177,7 +1306,7 @@ static int cfi_rand_preamble(s32 *start, s32 *end)
void *addr = (void *)s + *s;
u32 hash;
- hash = decode_preamble_hash(addr);
+ hash = decode_preamble_hash(addr, NULL);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
@@ -1189,15 +1318,71 @@ static int cfi_rand_preamble(s32 *start, s32 *end)
return 0;
}
+static void cfi_fineibt_bhi_preamble(void *addr, int arity)
+{
+ if (!arity)
+ return;
+
+ if (!cfi_warn && arity == 1) {
+ /*
+ * Crazy scheme to allow arity-1 inline:
+ *
+ * __cfi_foo:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d
+ * b: 49 0f 45 fa cmovne %r10, %rdi
+ * f: 75 f5 jne __cfi_foo+6
+ * 11: 0f 1f 00 nopl (%rax)
+ *
+ * Code that direct calls to foo()+0, decodes the tail end as:
+ *
+ * foo:
+ * 0: f5 cmc
+ * 1: 0f 1f 00 nopl (%rax)
+ *
+ * which clobbers CF, but does not affect anything ABI
+ * wise.
+ *
+ * Notably, this scheme is incompatible with permissive CFI
+ * because the CMOVcc is unconditional and RDI will have been
+ * clobbered.
+ */
+ const u8 magic[9] = {
+ 0x49, 0x0f, 0x45, 0xfa,
+ 0x75, 0xf5,
+ BYTES_NOP3,
+ };
+
+ text_poke_early(addr + fineibt_preamble_bhi, magic, 9);
+
+ return;
+ }
+
+ text_poke_early(addr + fineibt_preamble_bhi,
+ text_gen_insn(CALL_INSN_OPCODE,
+ addr + fineibt_preamble_bhi,
+ __bhi_args[arity]),
+ CALL_INSN_SIZE);
+}
+
static int cfi_rewrite_preamble(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ int arity;
u32 hash;
- hash = decode_preamble_hash(addr);
+ /*
+ * When the function doesn't start with ENDBR the compiler will
+ * have determined there are no indirect calls to it and we
+ * don't need no CFI either.
+ */
+ if (!is_endbr(addr + 16))
+ continue;
+
+ hash = decode_preamble_hash(addr, &arity);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
@@ -1205,6 +1390,13 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
+
+ WARN_ONCE(!IS_ENABLED(CONFIG_FINEIBT_BHI) && arity,
+ "kCFI preamble has wrong register at: %pS %*ph\n",
+ addr, 5, addr);
+
+ if (cfi_bhi)
+ cfi_fineibt_bhi_preamble(addr, arity);
}
return 0;
@@ -1217,7 +1409,10 @@ static void cfi_rewrite_endbr(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- poison_endbr(addr+16, false);
+ if (!exact_endbr(addr + 16))
+ continue;
+
+ poison_endbr(addr + 16);
}
}
@@ -1245,18 +1440,48 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
+ BUG_ON(fineibt_paranoid_size != 20);
+
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ struct insn insn;
+ u8 bytes[20];
u32 hash;
+ int ret;
+ u8 op;
addr -= fineibt_caller_size;
hash = decode_caller_hash(addr);
- if (hash) {
+ if (!hash)
+ continue;
+
+ if (!cfi_paranoid) {
text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
text_poke_early(addr + fineibt_caller_hash, &hash, 4);
+ /* rely on apply_retpolines() */
+ continue;
}
- /* rely on apply_retpolines() */
+
+ /* cfi_paranoid */
+ ret = insn_decode_kernel(&insn, addr + fineibt_caller_size);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op = insn.opcode.bytes[0];
+ if (op != CALL_INSN_OPCODE && op != JMP32_INSN_OPCODE) {
+ WARN_ON_ONCE(1);
+ continue;
+ }
+
+ memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size);
+ memcpy(bytes + fineibt_caller_hash, &hash, 4);
+
+ ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
+ if (WARN_ON_ONCE(ret != 3))
+ continue;
+
+ text_poke_early(addr, bytes, fineibt_paranoid_size);
}
return 0;
@@ -1271,10 +1496,17 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
"FineIBT preamble wrong size: %ld", fineibt_preamble_size))
return;
- if (cfi_mode == CFI_DEFAULT) {
+ if (cfi_mode == CFI_AUTO) {
cfi_mode = CFI_KCFI;
- if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
+ if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) {
+ /*
+ * FRED has much saner context on exception entry and
+ * is less easy to take advantage of.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ cfi_paranoid = true;
cfi_mode = CFI_FINEIBT;
+ }
}
/*
@@ -1331,8 +1563,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
/* now that nobody targets func()+0, remove ENDBR there */
cfi_rewrite_endbr(start_cfi, end_cfi);
- if (builtin)
- pr_info("Using FineIBT CFI\n");
+ if (builtin) {
+ pr_info("Using %sFineIBT%s CFI\n",
+ cfi_paranoid ? "paranoid " : "",
+ cfi_bhi ? "+BHI" : "");
+ }
return;
default:
@@ -1350,9 +1585,23 @@ static inline void poison_hash(void *addr)
static void poison_cfi(void *addr)
{
+ /*
+ * Compilers manage to be inconsistent with ENDBR vs __cfi prefixes,
+ * some (static) functions for which they can determine the address
+ * is never taken do not get a __cfi prefix, but *DO* get an ENDBR.
+ *
+ * As such, these functions will get sealed, but we need to be careful
+ * to not unconditionally scribble the previous function.
+ */
switch (cfi_mode) {
case CFI_FINEIBT:
/*
+ * FineIBT prefix should start with an ENDBR.
+ */
+ if (!is_endbr(addr))
+ break;
+
+ /*
* __cfi_\func:
* osp nopl (%rax)
* subl $0, %r10d
@@ -1360,12 +1609,18 @@ static void poison_cfi(void *addr)
* ud2
* 1: nop
*/
- poison_endbr(addr, false);
+ poison_endbr(addr);
poison_hash(addr + fineibt_preamble_hash);
break;
case CFI_KCFI:
/*
+ * kCFI prefix should start with a valid hash.
+ */
+ if (!decode_preamble_hash(addr, NULL))
+ break;
+
+ /*
* __cfi_\func:
* movl $0, %eax
* .skip 11, 0x90
@@ -1378,7 +1633,117 @@ static void poison_cfi(void *addr)
}
}
-#else
+/*
+ * When regs->ip points to a 0xEA byte in the FineIBT preamble,
+ * return true and fill out target and type.
+ *
+ * We check the preamble by checking for the ENDBR instruction relative to the
+ * 0xEA instruction.
+ */
+static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ unsigned long addr = regs->ip - fineibt_preamble_ud;
+ u32 hash;
+
+ if (!exact_endbr((void *)addr))
+ return false;
+
+ *target = addr + fineibt_preamble_size;
+
+ __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
+ *type = (u32)regs->r10 + hash;
+
+ /*
+ * Since regs->ip points to the middle of an instruction; it cannot
+ * continue with the normal fixup.
+ */
+ regs->ip = *target;
+
+ return true;
+
+Efault:
+ return false;
+}
+
+/*
+ * regs->ip points to one of the UD2 in __bhi_args[].
+ */
+static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ unsigned long addr;
+ u32 hash;
+
+ if (!cfi_bhi)
+ return false;
+
+ if (regs->ip < (unsigned long)__bhi_args ||
+ regs->ip >= (unsigned long)__bhi_args_end)
+ return false;
+
+ /*
+ * Fetch the return address from the stack, this points to the
+ * FineIBT preamble. Since the CALL instruction is in the 5 last
+ * bytes of the preamble, the return address is in fact the target
+ * address.
+ */
+ __get_kernel_nofault(&addr, regs->sp, unsigned long, Efault);
+ *target = addr;
+
+ addr -= fineibt_preamble_size;
+ if (!exact_endbr((void *)addr))
+ return false;
+
+ __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
+ *type = (u32)regs->r10 + hash;
+
+ /*
+ * The UD2 sites are constructed with a RET immediately following,
+ * as such the non-fatal case can use the regular fixup.
+ */
+ return true;
+
+Efault:
+ return false;
+}
+
+/*
+ * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
+ * sequence.
+ */
+static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ unsigned long addr = regs->ip - fineibt_paranoid_ud;
+ u32 hash;
+
+ if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2))
+ return false;
+
+ __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault);
+ *target = regs->r11 + fineibt_preamble_size;
+ *type = regs->r10;
+
+ /*
+ * Since the trapping instruction is the exact, but LOCK prefixed,
+ * Jcc.d8 that got us here, the normal fixup will work.
+ */
+ return true;
+
+Efault:
+ return false;
+}
+
+bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ if (decode_fineibt_paranoid(regs, target, type))
+ return true;
+
+ if (decode_fineibt_bhi(regs, target, type))
+ return true;
+
+ return decode_fineibt_preamble(regs, target, type);
+}
+
+#else /* !CONFIG_FINEIBT: */
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, bool builtin)
@@ -1389,7 +1754,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
static void poison_cfi(void *addr) { }
#endif
-#endif
+#endif /* !CONFIG_FINEIBT */
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi)
@@ -1658,7 +2023,7 @@ static noinline void __init alt_reloc_selftest(void)
*/
asm_inline volatile (
ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS)
- : /* output */
+ : ASM_CALL_CONSTRAINT
: [mem] "m" (__alt_reloc_selftest_addr)
: _ASM_ARG1
);
@@ -1702,14 +2067,14 @@ void __init alternative_instructions(void)
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
apply_returns(__return_sites, __return_sites_end);
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-
/*
- * Now all calls are established. Apply the call thunks if
- * required.
+ * Adjust all CALL instructions to point to func()-10, including
+ * those in .altinstr_replacement.
*/
callthunks_patch_builtin_calls();
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+
/*
* Seal all functions that do not have their address taken.
*/
@@ -1826,11 +2191,18 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
return temp_state;
}
+__ro_after_init struct mm_struct *poking_mm;
+__ro_after_init unsigned long poking_addr;
+
static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
{
lockdep_assert_irqs_disabled();
+
switch_mm_irqs_off(NULL, prev_state.mm, current);
+ /* Clear the cpumask, to indicate no TLB flushing is needed anywhere */
+ cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm));
+
/*
* Restore the breakpoints if they were disabled before the temporary mm
* was loaded.
@@ -1839,9 +2211,6 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
hw_breakpoint_restore();
}
-__ro_after_init struct mm_struct *poking_mm;
-__ro_after_init unsigned long poking_addr;
-
static void text_poke_memcpy(void *dst, const void *src, size_t len)
{
memcpy(dst, src, len);