summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/alternative.c292
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cfi.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c19
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c15
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c15
-rw-r--r--arch/x86/kernel/cpu/sgx/encls.h6
-rw-r--r--arch/x86/kernel/irqinit.c6
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/traps.c8
10 files changed, 208 insertions, 160 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 79ae9cb50019..8ee5ff547357 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -147,10 +147,10 @@ static void *its_init_thunk(void *thunk, int reg)
/*
* When ITS uses indirect branch thunk the fineibt_paranoid
* caller sequence doesn't fit in the caller site. So put the
- * remaining part of the sequence (<ea> + JNE) into the ITS
+ * remaining part of the sequence (UDB + JNE) into the ITS
* thunk.
*/
- bytes[i++] = 0xea; /* invalid instruction */
+ bytes[i++] = 0xd6; /* UDB */
bytes[i++] = 0x75; /* JNE */
bytes[i++] = 0xfd;
@@ -163,7 +163,7 @@ static void *its_init_thunk(void *thunk, int reg)
reg -= 8;
}
bytes[i++] = 0xff;
- bytes[i++] = 0xe0 + reg; /* jmp *reg */
+ bytes[i++] = 0xe0 + reg; /* JMP *reg */
bytes[i++] = 0xcc;
return thunk + offset;
@@ -713,20 +713,33 @@ static inline bool is_jcc32(struct insn *insn)
#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL)
/*
- * CALL/JMP *%\reg
+ * [CS]{,3} CALL/JMP *%\reg [INT3]*
*/
-static int emit_indirect(int op, int reg, u8 *bytes)
+static int emit_indirect(int op, int reg, u8 *bytes, int len)
{
+ int cs = 0, bp = 0;
int i = 0;
u8 modrm;
+ /*
+ * Set @len to the excess bytes after writing the instruction.
+ */
+ len -= 2 + (reg >= 8);
+ WARN_ON_ONCE(len < 0);
+
switch (op) {
case CALL_INSN_OPCODE:
modrm = 0x10; /* Reg = 2; CALL r/m */
+ /*
+ * Additional NOP is better than prefix decode penalty.
+ */
+ if (len <= 3)
+ cs = len;
break;
case JMP32_INSN_OPCODE:
modrm = 0x20; /* Reg = 4; JMP r/m */
+ bp = len;
break;
default:
@@ -734,6 +747,9 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return -1;
}
+ while (cs--)
+ bytes[i++] = 0x2e; /* CS-prefix */
+
if (reg >= 8) {
bytes[i++] = 0x41; /* REX.B prefix */
reg -= 8;
@@ -745,6 +761,9 @@ static int emit_indirect(int op, int reg, u8 *bytes)
bytes[i++] = 0xff; /* opcode */
bytes[i++] = modrm;
+ while (bp--)
+ bytes[i++] = 0xcc; /* INT3 */
+
return i;
}
@@ -918,20 +937,11 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
return emit_its_trampoline(addr, insn, reg, bytes);
#endif
- ret = emit_indirect(op, reg, bytes + i);
+ ret = emit_indirect(op, reg, bytes + i, insn->length - i);
if (ret < 0)
return ret;
i += ret;
- /*
- * The compiler is supposed to EMIT an INT3 after every unconditional
- * JMP instruction due to AMD BTC. However, if the compiler is too old
- * or MITIGATION_SLS isn't enabled, we still need an INT3 after
- * indirect JMPs even on Intel.
- */
- if (op == JMP32_INSN_OPCODE && i < insn->length)
- bytes[i++] = INT3_INSN_OPCODE;
-
for (; i < insn->length;)
bytes[i++] = BYTES_NOP1;
@@ -970,7 +980,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
case JMP32_INSN_OPCODE:
/* Check for cfi_paranoid + ITS */
dest = addr + insn.length + insn.immediate.value;
- if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) {
+ if (dest[-1] == 0xd6 && (dest[0] & 0xf0) == 0x70) {
WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
continue;
}
@@ -1177,6 +1187,7 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
#endif
enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
+static bool cfi_debug __ro_after_init;
#ifdef CONFIG_FINEIBT_BHI
bool cfi_bhi __ro_after_init = false;
@@ -1259,6 +1270,8 @@ static __init int cfi_parse_cmdline(char *str)
} else if (!strcmp(str, "off")) {
cfi_mode = CFI_OFF;
cfi_rand = false;
+ } else if (!strcmp(str, "debug")) {
+ cfi_debug = true;
} else if (!strcmp(str, "kcfi")) {
cfi_mode = CFI_KCFI;
} else if (!strcmp(str, "fineibt")) {
@@ -1266,26 +1279,26 @@ static __init int cfi_parse_cmdline(char *str)
} else if (!strcmp(str, "norand")) {
cfi_rand = false;
} else if (!strcmp(str, "warn")) {
- pr_alert("CFI mismatch non-fatal!\n");
+ pr_alert("CFI: mismatch non-fatal!\n");
cfi_warn = true;
} else if (!strcmp(str, "paranoid")) {
if (cfi_mode == CFI_FINEIBT) {
cfi_paranoid = true;
} else {
- pr_err("Ignoring paranoid; depends on fineibt.\n");
+ pr_err("CFI: ignoring paranoid; depends on fineibt.\n");
}
} else if (!strcmp(str, "bhi")) {
#ifdef CONFIG_FINEIBT_BHI
if (cfi_mode == CFI_FINEIBT) {
cfi_bhi = true;
} else {
- pr_err("Ignoring bhi; depends on fineibt.\n");
+ pr_err("CFI: ignoring bhi; depends on fineibt.\n");
}
#else
- pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n");
+ pr_err("CFI: ignoring bhi; depends on FINEIBT_BHI=y.\n");
#endif
} else {
- pr_err("Ignoring unknown cfi option (%s).", str);
+ pr_err("CFI: Ignoring unknown option (%s).", str);
}
str = next;
@@ -1300,9 +1313,9 @@ early_param("cfi", cfi_parse_cmdline);
*
* __cfi_\func: __cfi_\func:
* movl $0x12345678,%eax // 5 endbr64 // 4
- * nop subl $0x12345678,%r10d // 7
- * nop jne __cfi_\func+6 // 2
- * nop nop3 // 3
+ * nop subl $0x12345678,%eax // 5
+ * nop jne.d32,pn \func+3 // 7
+ * nop
* nop
* nop
* nop
@@ -1311,34 +1324,44 @@ early_param("cfi", cfi_parse_cmdline);
* nop
* nop
* nop
+ * \func: \func:
+ * endbr64 nopl -42(%rax)
*
*
* caller: caller:
- * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6
+ * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%eax // 5
* addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4
- * je 1f // 2 nop4 // 4
+ * je 1f // 2 nop5 // 5
* ud2 // 2
* 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6
*
+ *
+ * Notably, the FineIBT sequences are crafted such that branches are presumed
+ * non-taken. This is based on Agner Fog's optimization manual, which states:
+ *
+ * "Make conditional jumps most often not taken: The efficiency and throughput
+ * for not-taken branches is better than for taken branches on most
+ * processors. Therefore, it is good to place the most frequent branch first"
*/
/*
* <fineibt_preamble_start>:
* 0: f3 0f 1e fa endbr64
- * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d
- * b: 75 f9 jne 6 <fineibt_preamble_start+0x6>
- * d: 0f 1f 00 nopl (%rax)
+ * 4: 2d 78 56 34 12 sub $0x12345678, %eax
+ * 9: 2e 0f 85 03 00 00 00 jne,pn 13 <fineibt_preamble_start+0x13>
+ * 10: 0f 1f 40 d6 nopl -0x2a(%rax)
*
- * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as
- * (bad) on x86_64 and raises #UD.
+ * Note that the JNE target is the 0xD6 byte inside the NOPL, this decodes as
+ * UDB on x86_64 and raises #UD.
*/
asm( ".pushsection .rodata \n"
"fineibt_preamble_start: \n"
" endbr64 \n"
- " subl $0x12345678, %r10d \n"
+ " subl $0x12345678, %eax \n"
"fineibt_preamble_bhi: \n"
- " jne fineibt_preamble_start+6 \n"
- ASM_NOP3
+ " cs jne.d32 fineibt_preamble_start+0x13 \n"
+ "#fineibt_func: \n"
+ " nopl -42(%rax) \n"
"fineibt_preamble_end: \n"
".popsection\n"
);
@@ -1349,20 +1372,20 @@ extern u8 fineibt_preamble_end[];
#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
#define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start)
-#define fineibt_preamble_ud 6
-#define fineibt_preamble_hash 7
+#define fineibt_preamble_ud 0x13
+#define fineibt_preamble_hash 5
/*
* <fineibt_caller_start>:
- * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
- * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11
- * a: 0f 1f 40 00 nopl 0x0(%rax)
+ * 0: b8 78 56 34 12 mov $0x12345678, %eax
+ * 5: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * 9: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
*/
asm( ".pushsection .rodata \n"
"fineibt_caller_start: \n"
- " movl $0x12345678, %r10d \n"
+ " movl $0x12345678, %eax \n"
" lea -0x10(%r11), %r11 \n"
- ASM_NOP4
+ ASM_NOP5
"fineibt_caller_end: \n"
".popsection \n"
);
@@ -1371,7 +1394,7 @@ extern u8 fineibt_caller_start[];
extern u8 fineibt_caller_end[];
#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start)
-#define fineibt_caller_hash 2
+#define fineibt_caller_hash 1
#define fineibt_caller_jmp (fineibt_caller_size - 2)
@@ -1388,9 +1411,9 @@ extern u8 fineibt_caller_end[];
* of adding a load.
*
* <fineibt_paranoid_start>:
- * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
- * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
- * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11
+ * 0: b8 78 56 34 12 mov $0x12345678, %eax
+ * 5: 41 3b 43 f5 cmp -0x11(%r11), %eax
+ * 9: 2e 4d 8d 5b <f0> cs lea -0x10(%r11), %r11
* e: 75 fd jne d <fineibt_paranoid_start+0xd>
* 10: 41 ff d3 call *%r11
* 13: 90 nop
@@ -1402,13 +1425,13 @@ extern u8 fineibt_caller_end[];
*/
asm( ".pushsection .rodata \n"
"fineibt_paranoid_start: \n"
- " movl $0x12345678, %r10d \n"
- " cmpl -9(%r11), %r10d \n"
- " lea -0x10(%r11), %r11 \n"
+ " mov $0x12345678, %eax \n"
+ " cmpl -11(%r11), %eax \n"
+ " cs lea -0x10(%r11), %r11 \n"
+ "#fineibt_caller_size: \n"
" jne fineibt_paranoid_start+0xd \n"
"fineibt_paranoid_ind: \n"
- " call *%r11 \n"
- " nop \n"
+ " cs call *%r11 \n"
"fineibt_paranoid_end: \n"
".popsection \n"
);
@@ -1520,51 +1543,67 @@ static int cfi_rand_preamble(s32 *start, s32 *end)
return 0;
}
+/*
+ * Inline the bhi-arity 1 case:
+ *
+ * __cfi_foo:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 2d 78 56 34 12 sub $0x12345678, %eax
+ * 9: 49 0f 45 fa cmovne %rax, %rdi
+ * d: 2e 75 03 jne,pn foo+0x3
+ *
+ * foo:
+ * 10: 0f 1f 40 <d6> nopl -42(%rax)
+ *
+ * Notably, this scheme is incompatible with permissive CFI
+ * because the CMOVcc is unconditional and RDI will have been
+ * clobbered.
+ */
+asm( ".pushsection .rodata \n"
+ "fineibt_bhi1_start: \n"
+ " cmovne %rax, %rdi \n"
+ " cs jne fineibt_bhi1_func + 0x3 \n"
+ "fineibt_bhi1_func: \n"
+ " nopl -42(%rax) \n"
+ "fineibt_bhi1_end: \n"
+ ".popsection \n"
+);
+
+extern u8 fineibt_bhi1_start[];
+extern u8 fineibt_bhi1_end[];
+
+#define fineibt_bhi1_size (fineibt_bhi1_end - fineibt_bhi1_start)
+
static void cfi_fineibt_bhi_preamble(void *addr, int arity)
{
+ u8 bytes[MAX_INSN_SIZE];
+
if (!arity)
return;
if (!cfi_warn && arity == 1) {
- /*
- * Crazy scheme to allow arity-1 inline:
- *
- * __cfi_foo:
- * 0: f3 0f 1e fa endbr64
- * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d
- * b: 49 0f 45 fa cmovne %r10, %rdi
- * f: 75 f5 jne __cfi_foo+6
- * 11: 0f 1f 00 nopl (%rax)
- *
- * Code that direct calls to foo()+0, decodes the tail end as:
- *
- * foo:
- * 0: f5 cmc
- * 1: 0f 1f 00 nopl (%rax)
- *
- * which clobbers CF, but does not affect anything ABI
- * wise.
- *
- * Notably, this scheme is incompatible with permissive CFI
- * because the CMOVcc is unconditional and RDI will have been
- * clobbered.
- */
- const u8 magic[9] = {
- 0x49, 0x0f, 0x45, 0xfa,
- 0x75, 0xf5,
- BYTES_NOP3,
- };
-
- text_poke_early(addr + fineibt_preamble_bhi, magic, 9);
-
+ text_poke_early(addr + fineibt_preamble_bhi,
+ fineibt_bhi1_start, fineibt_bhi1_size);
return;
}
- text_poke_early(addr + fineibt_preamble_bhi,
- text_gen_insn(CALL_INSN_OPCODE,
- addr + fineibt_preamble_bhi,
- __bhi_args[arity]),
- CALL_INSN_SIZE);
+ /*
+ * Replace the bytes at fineibt_preamble_bhi with a CALL instruction
+ * that lines up exactly with the end of the preamble, such that the
+ * return address will be foo+0.
+ *
+ * __cfi_foo:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 2d 78 56 34 12 sub $0x12345678, %eax
+ * 9: 2e 2e e8 DD DD DD DD cs cs call __bhi_args[arity]
+ */
+ bytes[0] = 0x2e;
+ bytes[1] = 0x2e;
+ __text_gen_insn(bytes + 2, CALL_INSN_OPCODE,
+ addr + fineibt_preamble_bhi + 2,
+ __bhi_args[arity], CALL_INSN_SIZE);
+
+ text_poke_early(addr + fineibt_preamble_bhi, bytes, 7);
}
static int cfi_rewrite_preamble(s32 *start, s32 *end)
@@ -1655,8 +1694,6 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
- BUG_ON(fineibt_paranoid_size != 20);
-
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
struct insn insn;
@@ -1696,8 +1733,9 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
emit_paranoid_trampoline(addr + fineibt_caller_size,
&insn, 11, bytes + fineibt_caller_size);
} else {
- ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
- if (WARN_ON_ONCE(ret != 3))
+ int len = fineibt_paranoid_size - fineibt_paranoid_ind;
+ ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind, len);
+ if (WARN_ON_ONCE(ret != len))
continue;
}
@@ -1707,13 +1745,20 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
return 0;
}
+#define pr_cfi_debug(X...) if (cfi_debug) pr_info(X)
+
+#define FINEIBT_WARN(_f, _v) \
+ WARN_ONCE((_f) != (_v), "FineIBT: " #_f " %ld != %d\n", _f, _v)
+
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
int ret;
- if (WARN_ONCE(fineibt_preamble_size != 16,
- "FineIBT preamble wrong size: %ld", fineibt_preamble_size))
+ if (FINEIBT_WARN(fineibt_preamble_size, 20) ||
+ FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) ||
+ FINEIBT_WARN(fineibt_caller_size, 14) ||
+ FINEIBT_WARN(fineibt_paranoid_size, 20))
return;
if (cfi_mode == CFI_AUTO) {
@@ -1734,6 +1779,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
* rewrite them. This disables all CFI. If this succeeds but any of the
* later stages fails, we're without CFI.
*/
+ pr_cfi_debug("CFI: disabling all indirect call checking\n");
ret = cfi_disable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
@@ -1744,43 +1790,53 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
cfi_bpf_hash = cfi_rehash(cfi_bpf_hash);
cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
}
+ pr_cfi_debug("CFI: cfi_seed: 0x%08x\n", cfi_seed);
+ pr_cfi_debug("CFI: rehashing all preambles\n");
ret = cfi_rand_preamble(start_cfi, end_cfi);
if (ret)
goto err;
+ pr_cfi_debug("CFI: rehashing all indirect calls\n");
ret = cfi_rand_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
+ } else {
+ pr_cfi_debug("CFI: rehashing disabled\n");
}
switch (cfi_mode) {
case CFI_OFF:
if (builtin)
- pr_info("Disabling CFI\n");
+ pr_info("CFI: disabled\n");
return;
case CFI_KCFI:
+ pr_cfi_debug("CFI: re-enabling all indirect call checking\n");
ret = cfi_enable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
if (builtin)
- pr_info("Using kCFI\n");
+ pr_info("CFI: Using %sretpoline kCFI\n",
+ cfi_rand ? "rehashed " : "");
return;
case CFI_FINEIBT:
+ pr_cfi_debug("CFI: adding FineIBT to all preambles\n");
/* place the FineIBT preamble at func()-16 */
ret = cfi_rewrite_preamble(start_cfi, end_cfi);
if (ret)
goto err;
/* rewrite the callers to target func()-16 */
+ pr_cfi_debug("CFI: rewriting indirect call sites to use FineIBT\n");
ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
/* now that nobody targets func()+0, remove ENDBR there */
+ pr_cfi_debug("CFI: removing old endbr insns\n");
cfi_rewrite_endbr(start_cfi, end_cfi);
if (builtin) {
@@ -1823,11 +1879,11 @@ static void poison_cfi(void *addr)
/*
* __cfi_\func:
- * osp nopl (%rax)
- * subl $0, %r10d
- * jz 1f
- * ud2
- * 1: nop
+ * nopl -42(%rax)
+ * sub $0, %eax
+ * jne \func+3
+ * \func:
+ * nopl -42(%rax)
*/
poison_endbr(addr);
poison_hash(addr + fineibt_preamble_hash);
@@ -1853,12 +1909,14 @@ static void poison_cfi(void *addr)
}
}
+#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
+
/*
- * When regs->ip points to a 0xEA byte in the FineIBT preamble,
+ * When regs->ip points to a 0xD6 byte in the FineIBT preamble,
* return true and fill out target and type.
*
* We check the preamble by checking for the ENDBR instruction relative to the
- * 0xEA instruction.
+ * UDB instruction.
*/
static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type)
{
@@ -1868,10 +1926,10 @@ static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target,
if (!exact_endbr((void *)addr))
return false;
- *target = addr + fineibt_preamble_size;
+ *target = addr + fineibt_prefix_size;
__get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
- *type = (u32)regs->r10 + hash;
+ *type = (u32)regs->ax + hash;
/*
* Since regs->ip points to the middle of an instruction; it cannot
@@ -1909,12 +1967,12 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32
__get_kernel_nofault(&addr, regs->sp, unsigned long, Efault);
*target = addr;
- addr -= fineibt_preamble_size;
+ addr -= fineibt_prefix_size;
if (!exact_endbr((void *)addr))
return false;
__get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
- *type = (u32)regs->r10 + hash;
+ *type = (u32)regs->ax + hash;
/*
* The UD2 sites are constructed with a RET immediately following,
@@ -1931,7 +1989,7 @@ static bool is_paranoid_thunk(unsigned long addr)
u32 thunk;
__get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault);
- return (thunk & 0x00FFFFFF) == 0xfd75ea;
+ return (thunk & 0x00FFFFFF) == 0xfd75d6;
Efault:
return false;
@@ -1939,8 +1997,7 @@ Efault:
/*
* regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
- * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS
- * thunk.
+ * sequence, or to UDB + Jcc.d8 for cfi_paranoid + ITS thunk.
*/
static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
{
@@ -1950,8 +2007,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target,
return false;
if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) {
- *target = regs->r11 + fineibt_preamble_size;
- *type = regs->r10;
+ *target = regs->r11 + fineibt_prefix_size;
+ *type = regs->ax;
/*
* Since the trapping instruction is the exact, but LOCK prefixed,
@@ -1963,14 +2020,14 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target,
/*
* The cfi_paranoid + ITS thunk combination results in:
*
- * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
- * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
- * a: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * 0: b8 78 56 34 12 mov $0x12345678, %eax
+ * 5: 41 3b 43 f7 cmp -11(%r11), %eax
+ * a: 2e 3d 8d 5b f0 cs lea -0x10(%r11), %r11
* e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11
*
* Where the paranoid_thunk looks like:
*
- * 1d: <ea> (bad)
+ * 1d: <d6> udb
* __x86_indirect_paranoid_thunk_r11:
* 1e: 75 fd jne 1d
* __x86_indirect_its_thunk_r11:
@@ -1979,8 +2036,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target,
*
*/
if (is_paranoid_thunk(regs->ip)) {
- *target = regs->r11 + fineibt_preamble_size;
- *type = regs->r10;
+ *target = regs->r11 + fineibt_prefix_size;
+ *type = regs->ax;
regs->ip = *target;
return true;
@@ -2005,6 +2062,8 @@ bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
+ if (IS_ENABLED(CONFIG_CFI) && builtin)
+ pr_info("CFI: Using standard kCFI\n");
}
#ifdef CONFIG_X86_KERNEL_IBT
@@ -2321,6 +2380,7 @@ void __init alternative_instructions(void)
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
__cfi_sites, __cfi_sites_end, true);
+ cfi_debug = false;
/*
* Rewrite the retpolines, must be done before alternatives since
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 6259b474073b..32ba599a51f8 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -102,6 +102,7 @@ static void __used common(void)
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+ OFFSET(C_PTREGS_SIZE, pt_regs, orig_ax);
/* TLB state for the entry code */
OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c
index 77086cf565ec..638eb5c933e0 100644
--- a/arch/x86/kernel/cfi.c
+++ b/arch/x86/kernel/cfi.c
@@ -27,7 +27,7 @@ static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target,
* for indirect call checks:
*
*   movl -<id>, %r10d ; 6 bytes
- * addl -4(%reg), %r10d ; 4 bytes
+ * addl -<pos>(%reg), %r10d; 4 bytes
* je .Ltmp1 ; 2 bytes
* ud2 ; <- regs->ip
* .Ltmp1:
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 25773af116bc..c4febdbcfe4d 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -38,10 +38,6 @@
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
-/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
-bool hyperv_paravisor_present __ro_after_init;
-EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
-
#if IS_ENABLED(CONFIG_HYPERV)
static inline unsigned int hv_get_nested_msr(unsigned int reg)
{
@@ -288,8 +284,18 @@ static void __init x86_setup_ops_for_tsc_pg_clock(void)
old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
}
+
+#ifdef CONFIG_X86_64
+DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
+EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
+#define hypercall_update(hc) static_call_update(hv_hypercall, hc)
+#endif
#endif /* CONFIG_HYPERV */
+#ifndef hypercall_update
+#define hypercall_update(hc) (void)hc
+#endif
+
static uint32_t __init ms_hyperv_platform(void)
{
u32 eax;
@@ -484,14 +490,14 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.shared_gpa_boundary =
BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
- hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
-
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
static_branch_enable(&isolation_type_snp);
+ if (!ms_hyperv.paravisor_present)
+ hypercall_update(hv_snp_hypercall);
} else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
static_branch_enable(&isolation_type_tdx);
@@ -499,6 +505,7 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
if (!ms_hyperv.paravisor_present) {
+ hypercall_update(hv_tdx_hypercall);
/*
* Mark the Hyper-V TSC page feature as disabled
* in a TDX VM without paravisor so that the
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 18cf79d6e2c5..763534d77f59 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.0+
/*
* MTRR (Memory Type Range Register) cleanup
*
* Copyright (C) 2009 Yinghai Lu
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/init.h>
#include <linux/pci.h>
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index ecbda0341a8a..4b3d492afe17 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: LGPL-2.0+
/* Generic MTRR (Memory Type Range Register) driver.
Copyright (C) 1997-2000 Richard Gooch
Copyright (c) 2002 Patrick Mochel
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
The postal address is:
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index 99004b02e2ed..42a088a337c5 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -68,7 +68,7 @@ static inline bool encls_failed(int ret)
({ \
int ret; \
asm volatile( \
- "1: .byte 0x0f, 0x01, 0xcf;\n\t" \
+ "1: encls\n" \
"2:\n" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \
: "=a"(ret) \
@@ -111,8 +111,8 @@ static inline bool encls_failed(int ret)
({ \
int ret; \
asm volatile( \
- "1: .byte 0x0f, 0x01, 0xcf;\n\t" \
- " xor %%eax,%%eax;\n" \
+ "1: encls\n\t" \
+ "xor %%eax,%%eax\n" \
"2:\n" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \
: "=a"(ret), "=b"(rbx_out) \
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f79c5edc0b89..6ab9eac64670 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -97,9 +97,11 @@ void __init native_init_IRQ(void)
/* Execute any quirks before the call gates are initialised: */
x86_init.irqs.pre_vector_init();
- if (cpu_feature_enabled(X86_FEATURE_FRED))
+ /* FRED's IRQ path may be used even if FRED isn't fully enabled. */
+ if (IS_ENABLED(CONFIG_X86_FRED))
fred_complete_exception_setup();
- else
+
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
idt_setup_apic_and_irq_gates();
lapic_assign_system_vectors();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 15088d14904f..201137b98fb8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -479,6 +479,10 @@ void __nocfi machine_kexec(struct kimage *image)
__ftrace_enabled_restore(save_ftrace_enabled);
}
+/*
+ * Handover to the next kernel, no CFI concern.
+ */
+ANNOTATE_NOCFI_SYM(machine_kexec);
/* arch-dependent functionality related to kexec file-based syscall */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 36354b470590..6b22611e69cc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -97,7 +97,7 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
* Check for UD1 or UD2, accounting for Address Size Override Prefixes.
* If it's a UD1, further decode to determine its use:
*
- * FineIBT: ea (bad)
+ * FineIBT: d6 udb
* FineIBT: f0 75 f9 lock jne . - 6
* UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax
* UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax
@@ -130,9 +130,9 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
WARN_ON_ONCE(!lock);
return BUG_LOCK;
- case 0xea:
+ case 0xd6:
*len = addr - start;
- return BUG_EA;
+ return BUG_UDB;
case OPCODE_ESCAPE:
break;
@@ -341,7 +341,7 @@ static noinstr bool handle_bug(struct pt_regs *regs)
}
fallthrough;
- case BUG_EA:
+ case BUG_UDB:
case BUG_LOCK:
if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
handled = true;