diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/apic/vector.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 58 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 581 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c | 28 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/intel.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/amd_shas.c | 112 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/core.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 53 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 40 | ||||
-rw-r--r-- | arch/x86/kernel/itmt.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/ksysfs.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 51 |
20 files changed, 838 insertions, 247 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 93069b13d3af..a947b46a8b64 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -183,6 +183,7 @@ setnew: apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); per_cpu(vector_irq, newcpu)[newvec] = desc; + apic_update_irq_cfg(irqd, newvec, newcpu); } static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -261,7 +262,6 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest) if (vector < 0) return vector; apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); return 0; } @@ -338,7 +338,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector < 0) return vector; apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); + return 0; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b2ad8d13211a..a5ece6ebe8a7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include <linux/sched/clock.h> #include <linux/random.h> #include <linux/topology.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cacheinfo.h> @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + tsa_init(c); + + if (cpu_has(c, X86_FEATURE_GP_ON_USER_CPUID)) + setup_force_cpu_cap(X86_FEATURE_CPUID_FAULT); + return; warn: @@ -930,6 +976,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) init_spectral_chicken(c); fix_erratum_1386(c); zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..b74bf937cd9f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -113,10 +115,9 @@ void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; static void __init set_return_thunk(void *thunk) { - if (x86_return_thunk != __x86_return_thunk) - pr_warn("x86/bugs: return thunk changed\n"); - x86_return_thunk = thunk; + + pr_info("active return thunk: %ps\n", thunk); } /* Update SPEC_CTRL MSR and its cached copy unconditionally */ @@ -169,9 +170,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -188,6 +189,39 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DEFINE_STATIC_KEY_FALSE(cpu_buf_vm_clear); EXPORT_SYMBOL_GPL(cpu_buf_vm_clear); +#undef pr_fmt +#define pr_fmt(fmt) "mitigations: " fmt + +static void __init cpu_print_attack_vectors(void) +{ + pr_info("Enabled attack vectors: "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL)) + pr_cont("user_kernel, "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER)) + pr_cont("user_user, "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST)) + pr_cont("guest_host, "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) + pr_cont("guest_guest, "); + + pr_cont("SMT mitigations: "); + + switch (smt_mitigations) { + case SMT_MITIGATIONS_OFF: + pr_cont("off\n"); + break; + case SMT_MITIGATIONS_AUTO: + pr_cont("auto\n"); + break; + case SMT_MITIGATIONS_ON: + pr_cont("on\n"); + } +} + void __init cpu_select_mitigations(void) { /* @@ -208,6 +242,8 @@ void __init cpu_select_mitigations(void) x86_arch_cap_msr = x86_read_arch_cap_msr(); + cpu_print_attack_vectors(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -225,6 +261,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +309,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -329,6 +367,62 @@ static void x86_amd_ssb_disable(void) #undef pr_fmt #define pr_fmt(fmt) "MDS: " fmt +/* + * Returns true if vulnerability should be mitigated based on the + * selected attack vector controls. + * + * See Documentation/admin-guide/hw-vuln/attack_vector_controls.rst + */ +static bool __init should_mitigate_vuln(unsigned int bug) +{ + switch (bug) { + /* + * The only runtime-selected spectre_v1 mitigations in the kernel are + * related to SWAPGS protection on kernel entry. Therefore, protection + * is only required for the user->kernel attack vector. + */ + case X86_BUG_SPECTRE_V1: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL); + + case X86_BUG_SPECTRE_V2: + case X86_BUG_RETBLEED: + case X86_BUG_SRSO: + case X86_BUG_L1TF: + case X86_BUG_ITS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST); + + case X86_BUG_SPECTRE_V2_USER: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST); + + /* + * All the vulnerabilities below allow potentially leaking data + * across address spaces. Therefore, mitigation is required for + * any of these 4 attack vectors. + */ + case X86_BUG_MDS: + case X86_BUG_TAA: + case X86_BUG_MMIO_STALE_DATA: + case X86_BUG_RFDS: + case X86_BUG_SRBDS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST); + + case X86_BUG_GDS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST) || + (smt_mitigations != SMT_MITIGATIONS_OFF); + default: + WARN(1, "Unknown bug %x\n", bug); + return false; + } +} + /* Default mitigation for MDS-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = IS_ENABLED(CONFIG_MITIGATION_MDS) ? MDS_MITIGATION_AUTO : MDS_MITIGATION_OFF; @@ -382,13 +476,17 @@ static bool verw_clear_cpu_buf_mitigation_selected __ro_after_init; static void __init mds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_MDS)) { mds_mitigation = MDS_MITIGATION_OFF; return; } - if (mds_mitigation == MDS_MITIGATION_AUTO) - mds_mitigation = MDS_MITIGATION_FULL; + if (mds_mitigation == MDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_MDS)) + mds_mitigation = MDS_MITIGATION_FULL; + else + mds_mitigation = MDS_MITIGATION_OFF; + } if (mds_mitigation == MDS_MITIGATION_OFF) return; @@ -398,7 +496,7 @@ static void __init mds_select_mitigation(void) static void __init mds_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_MDS)) return; /* If TAA, MMIO, or RFDS are being mitigated, MDS gets mitigated too. */ @@ -419,7 +517,7 @@ static void __init mds_apply_mitigation(void) mds_mitigation == MDS_MITIGATION_VMWERV) { setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && - (mds_nosmt || cpu_mitigations_auto_nosmt())) + (mds_nosmt || smt_mitigations == SMT_MITIGATIONS_ON)) cpu_smt_disable(false); } } @@ -475,12 +573,13 @@ static void __init taa_select_mitigation(void) return; } - if (cpu_mitigations_off()) - taa_mitigation = TAA_MITIGATION_OFF; - /* Microcode will be checked in taa_update_mitigation(). */ - if (taa_mitigation == TAA_MITIGATION_AUTO) - taa_mitigation = TAA_MITIGATION_VERW; + if (taa_mitigation == TAA_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_TAA)) + taa_mitigation = TAA_MITIGATION_VERW; + else + taa_mitigation = TAA_MITIGATION_OFF; + } if (taa_mitigation != TAA_MITIGATION_OFF) verw_clear_cpu_buf_mitigation_selected = true; @@ -488,7 +587,7 @@ static void __init taa_select_mitigation(void) static void __init taa_update_mitigation(void) { - if (!taa_vulnerable() || cpu_mitigations_off()) + if (!taa_vulnerable()) return; if (verw_clear_cpu_buf_mitigation_selected) @@ -529,7 +628,7 @@ static void __init taa_apply_mitigation(void) */ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); - if (taa_nosmt || cpu_mitigations_auto_nosmt()) + if (taa_nosmt || smt_mitigations == SMT_MITIGATIONS_ON) cpu_smt_disable(false); } } @@ -575,8 +674,12 @@ static void __init mmio_select_mitigation(void) } /* Microcode will be checked in mmio_update_mitigation(). */ - if (mmio_mitigation == MMIO_MITIGATION_AUTO) - mmio_mitigation = MMIO_MITIGATION_VERW; + if (mmio_mitigation == MMIO_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_MMIO_STALE_DATA)) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_OFF; + } if (mmio_mitigation == MMIO_MITIGATION_OFF) return; @@ -591,7 +694,7 @@ static void __init mmio_select_mitigation(void) static void __init mmio_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) return; if (verw_clear_cpu_buf_mitigation_selected) @@ -637,9 +740,9 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); - if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + if (mmio_nosmt || smt_mitigations == SMT_MITIGATIONS_ON) cpu_smt_disable(false); } @@ -680,13 +783,17 @@ static inline bool __init verw_clears_cpu_reg_file(void) static void __init rfds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_RFDS)) { rfds_mitigation = RFDS_MITIGATION_OFF; return; } - if (rfds_mitigation == RFDS_MITIGATION_AUTO) - rfds_mitigation = RFDS_MITIGATION_VERW; + if (rfds_mitigation == RFDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_RFDS)) + rfds_mitigation = RFDS_MITIGATION_VERW; + else + rfds_mitigation = RFDS_MITIGATION_OFF; + } if (rfds_mitigation == RFDS_MITIGATION_OFF) return; @@ -697,7 +804,7 @@ static void __init rfds_select_mitigation(void) static void __init rfds_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_RFDS)) return; if (verw_clear_cpu_buf_mitigation_selected) @@ -798,13 +905,19 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_SRBDS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) { srbds_mitigation = SRBDS_MITIGATION_OFF; return; } - if (srbds_mitigation == SRBDS_MITIGATION_AUTO) - srbds_mitigation = SRBDS_MITIGATION_FULL; + if (srbds_mitigation == SRBDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_SRBDS)) + srbds_mitigation = SRBDS_MITIGATION_FULL; + else { + srbds_mitigation = SRBDS_MITIGATION_OFF; + return; + } + } /* * Check to see if this is one of the MDS_NO systems supporting TSX that @@ -952,12 +1065,15 @@ static void __init gds_select_mitigation(void) return; } - if (cpu_mitigations_off()) - gds_mitigation = GDS_MITIGATION_OFF; /* Will verify below that mitigation _can_ be disabled */ - - if (gds_mitigation == GDS_MITIGATION_AUTO) - gds_mitigation = GDS_MITIGATION_FULL; + if (gds_mitigation == GDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_GDS)) + gds_mitigation = GDS_MITIGATION_FULL; + else { + gds_mitigation = GDS_MITIGATION_OFF; + return; + } + } /* No microcode */ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { @@ -1063,13 +1179,16 @@ static bool smap_works_speculatively(void) static void __init spectre_v1_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; + + if (!should_mitigate_vuln(X86_BUG_SPECTRE_V1)) spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; } static void __init spectre_v1_apply_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return; if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) { @@ -1120,6 +1239,20 @@ early_param("nospectre_v1", nospectre_v1_cmdline); enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +/* Depends on spectre_v2 mitigation selected already */ +static inline bool cdt_possible(enum spectre_v2_mitigation mode) +{ + if (!IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) || + !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) + return false; + + if (mode == SPECTRE_V2_RETPOLINE || + mode == SPECTRE_V2_EIBRS_RETPOLINE) + return true; + + return false; +} + #undef pr_fmt #define pr_fmt(fmt) "RETBleed: " fmt @@ -1158,6 +1291,21 @@ static enum retbleed_mitigation retbleed_mitigation __ro_after_init = static int __ro_after_init retbleed_nosmt = false; +enum srso_mitigation { + SRSO_MITIGATION_NONE, + SRSO_MITIGATION_AUTO, + SRSO_MITIGATION_UCODE_NEEDED, + SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED, + SRSO_MITIGATION_MICROCODE, + SRSO_MITIGATION_NOSMT, + SRSO_MITIGATION_SAFE_RET, + SRSO_MITIGATION_IBPB, + SRSO_MITIGATION_IBPB_ON_VMEXIT, + SRSO_MITIGATION_BP_SPEC_REDUCE, +}; + +static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_AUTO; + static int __init retbleed_parse_cmdline(char *str) { if (!str) @@ -1200,7 +1348,7 @@ early_param("retbleed", retbleed_parse_cmdline); static void __init retbleed_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) { retbleed_mitigation = RETBLEED_MITIGATION_NONE; return; } @@ -1237,6 +1385,11 @@ static void __init retbleed_select_mitigation(void) if (retbleed_mitigation != RETBLEED_MITIGATION_AUTO) return; + if (!should_mitigate_vuln(X86_BUG_RETBLEED)) { + retbleed_mitigation = RETBLEED_MITIGATION_NONE; + return; + } + /* Intel mitigation selected in retbleed_update_mitigation() */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { @@ -1247,35 +1400,36 @@ static void __init retbleed_select_mitigation(void) retbleed_mitigation = RETBLEED_MITIGATION_IBPB; else retbleed_mitigation = RETBLEED_MITIGATION_NONE; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + /* Final mitigation depends on spectre-v2 selection */ + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + else if (boot_cpu_has(X86_FEATURE_IBRS)) + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + else + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } } static void __init retbleed_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) return; - if (retbleed_mitigation == RETBLEED_MITIGATION_NONE) - goto out; + /* ITS can also enable stuffing */ + if (its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF) + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; - /* - * retbleed=stuff is only allowed on Intel. If stuffing can't be used - * then a different mitigation will be selected below. - * - * its=stuff will also attempt to enable stuffing. - */ - if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF || - its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF) { - if (spectre_v2_enabled != SPECTRE_V2_RETPOLINE) { - pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n"); - retbleed_mitigation = RETBLEED_MITIGATION_AUTO; - } else { - if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) - pr_info("Retbleed mitigation updated to stuffing\n"); + /* If SRSO is using IBPB, that works for retbleed too */ + if (srso_mitigation == SRSO_MITIGATION_IBPB) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; - retbleed_mitigation = RETBLEED_MITIGATION_STUFF; - } + if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF && + !cdt_possible(spectre_v2_enabled)) { + pr_err("WARNING: retbleed=stuff depends on retpoline\n"); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } + /* * Let IBRS trump all on Intel without affecting the effects of the * retbleed= cmdline option except for call depth based stuffing @@ -1294,15 +1448,11 @@ static void __init retbleed_update_mitigation(void) if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) pr_err(RETBLEED_INTEL_MSG); } - /* If nothing has set the mitigation yet, default to NONE. */ - if (retbleed_mitigation == RETBLEED_MITIGATION_AUTO) - retbleed_mitigation = RETBLEED_MITIGATION_NONE; } -out: + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } - static void __init retbleed_apply_mitigation(void) { bool mitigate_smt = false; @@ -1358,7 +1508,7 @@ static void __init retbleed_apply_mitigation(void) } if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && - (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + (retbleed_nosmt || smt_mitigations == SMT_MITIGATIONS_ON)) cpu_smt_disable(false); } @@ -1403,13 +1553,17 @@ early_param("indirect_target_selection", its_parse_cmdline); static void __init its_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_ITS)) { its_mitigation = ITS_MITIGATION_OFF; return; } - if (its_mitigation == ITS_MITIGATION_AUTO) - its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + if (its_mitigation == ITS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_ITS)) + its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + else + its_mitigation = ITS_MITIGATION_OFF; + } if (its_mitigation == ITS_MITIGATION_OFF) return; @@ -1440,15 +1594,17 @@ static void __init its_select_mitigation(void) static void __init its_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_ITS)) return; switch (spectre_v2_enabled) { case SPECTRE_V2_NONE: - pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); + if (its_mitigation != ITS_MITIGATION_OFF) + pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); its_mitigation = ITS_MITIGATION_OFF; break; case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: /* Retpoline+CDT mitigates ITS */ if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF) its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; @@ -1462,13 +1618,8 @@ static void __init its_update_mitigation(void) break; } - /* - * retbleed_update_mitigation() will try to do stuffing if its=stuff. - * If it can't, such as if spectre_v2!=retpoline, then fall back to - * aligned thunks. - */ if (its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF && - retbleed_mitigation != RETBLEED_MITIGATION_STUFF) + !cdt_possible(spectre_v2_enabled)) its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; pr_info("%s\n", its_strings[its_mitigation]); @@ -1476,15 +1627,127 @@ static void __init its_update_mitigation(void) static void __init its_apply_mitigation(void) { - /* its=stuff forces retbleed stuffing and is enabled there. */ - if (its_mitigation != ITS_MITIGATION_ALIGNED_THUNKS) + switch (its_mitigation) { + case ITS_MITIGATION_OFF: + case ITS_MITIGATION_AUTO: + case ITS_MITIGATION_VMEXIT_ONLY: + break; + case ITS_MITIGATION_ALIGNED_THUNKS: + if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) + setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + set_return_thunk(its_return_thunk); + break; + case ITS_MITIGATION_RETPOLINE_STUFF: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); + set_return_thunk(call_depth_return_thunk); + break; + } +} + +#undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; return; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) { + bool vm = false, uk = false; + + tsa_mitigation = TSA_MITIGATION_NONE; - if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) - setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER)) { + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + uk = true; + } + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) { + tsa_mitigation = TSA_MITIGATION_VM; + vm = true; + } - setup_force_cpu_cap(X86_FEATURE_RETHUNK); - set_return_thunk(its_return_thunk); + if (uk && vm) + tsa_mitigation = TSA_MITIGATION_FULL; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } } #undef pr_fmt @@ -1609,7 +1872,7 @@ static enum spectre_v2_user_cmd __init spectre_v2_parse_user_cmdline(void) char arg[20]; int ret, i; - if (cpu_mitigations_off() || !IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2)) + if (!IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2)) return SPECTRE_V2_USER_CMD_NONE; ret = cmdline_find_option(boot_command_line, "spectre_v2_user", @@ -1647,6 +1910,13 @@ static void __init spectre_v2_user_select_mitigation(void) spectre_v2_user_stibp = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_AUTO: + if (!should_mitigate_vuln(X86_BUG_SPECTRE_V2_USER)) + break; + spectre_v2_user_ibpb = SPECTRE_V2_USER_PRCTL; + if (smt_mitigations == SMT_MITIGATIONS_OFF) + break; + spectre_v2_user_stibp = SPECTRE_V2_USER_PRCTL; + break; case SPECTRE_V2_USER_CMD_PRCTL: spectre_v2_user_ibpb = SPECTRE_V2_USER_PRCTL; spectre_v2_user_stibp = SPECTRE_V2_USER_PRCTL; @@ -1798,8 +2068,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) int ret, i; cmd = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ? SPECTRE_V2_CMD_AUTO : SPECTRE_V2_CMD_NONE; - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") || - cpu_mitigations_off()) + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); @@ -2002,11 +2271,20 @@ early_param("spectre_bhi", spectre_bhi_parse_cmdline); static void __init bhi_select_mitigation(void) { - if (!boot_cpu_has(X86_BUG_BHI) || cpu_mitigations_off()) + if (!boot_cpu_has(X86_BUG_BHI)) bhi_mitigation = BHI_MITIGATION_OFF; - if (bhi_mitigation == BHI_MITIGATION_AUTO) - bhi_mitigation = BHI_MITIGATION_ON; + if (bhi_mitigation != BHI_MITIGATION_AUTO) + return; + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST)) { + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL)) + bhi_mitigation = BHI_MITIGATION_ON; + else + bhi_mitigation = BHI_MITIGATION_VMEXIT_ONLY; + } else { + bhi_mitigation = BHI_MITIGATION_OFF; + } } static void __init bhi_update_mitigation(void) @@ -2062,8 +2340,11 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_NONE: return; - case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: + if (!should_mitigate_vuln(X86_BUG_SPECTRE_V2)) + break; + fallthrough; + case SPECTRE_V2_CMD_FORCE: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { spectre_v2_enabled = SPECTRE_V2_EIBRS; break; @@ -2117,7 +2398,7 @@ static void __init spectre_v2_update_mitigation(void) } } - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && !cpu_mitigations_off()) + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) pr_info("%s\n", spectre_v2_strings[spectre_v2_enabled]); } @@ -2249,10 +2530,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2597,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -2750,17 +3050,23 @@ static void override_cache_bits(struct cpuinfo_x86 *c) static void __init l1tf_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_L1TF) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_L1TF)) { l1tf_mitigation = L1TF_MITIGATION_OFF; return; } - if (l1tf_mitigation == L1TF_MITIGATION_AUTO) { - if (cpu_mitigations_auto_nosmt()) - l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; - else - l1tf_mitigation = L1TF_MITIGATION_FLUSH; + if (l1tf_mitigation != L1TF_MITIGATION_AUTO) + return; + + if (!should_mitigate_vuln(X86_BUG_L1TF)) { + l1tf_mitigation = L1TF_MITIGATION_OFF; + return; } + + if (smt_mitigations == SMT_MITIGATIONS_ON) + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; + else + l1tf_mitigation = L1TF_MITIGATION_FLUSH; } static void __init l1tf_apply_mitigation(void) @@ -2834,31 +3140,18 @@ early_param("l1tf", l1tf_cmdline); #undef pr_fmt #define pr_fmt(fmt) "Speculative Return Stack Overflow: " fmt -enum srso_mitigation { - SRSO_MITIGATION_NONE, - SRSO_MITIGATION_AUTO, - SRSO_MITIGATION_UCODE_NEEDED, - SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED, - SRSO_MITIGATION_MICROCODE, - SRSO_MITIGATION_SAFE_RET, - SRSO_MITIGATION_IBPB, - SRSO_MITIGATION_IBPB_ON_VMEXIT, - SRSO_MITIGATION_BP_SPEC_REDUCE, -}; - static const char * const srso_strings[] = { [SRSO_MITIGATION_NONE] = "Vulnerable", [SRSO_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", [SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED] = "Vulnerable: Safe RET, no microcode", [SRSO_MITIGATION_MICROCODE] = "Vulnerable: Microcode, no safe RET", + [SRSO_MITIGATION_NOSMT] = "Mitigation: SMT disabled", [SRSO_MITIGATION_SAFE_RET] = "Mitigation: Safe RET", [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only", [SRSO_MITIGATION_BP_SPEC_REDUCE] = "Mitigation: Reduced Speculation" }; -static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_AUTO; - static int __init srso_parse_cmdline(char *str) { if (!str) @@ -2885,35 +3178,44 @@ early_param("spec_rstack_overflow", srso_parse_cmdline); static void __init srso_select_mitigation(void) { - bool has_microcode; - - if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_SRSO)) { srso_mitigation = SRSO_MITIGATION_NONE; - - if (srso_mitigation == SRSO_MITIGATION_NONE) return; + } - if (srso_mitigation == SRSO_MITIGATION_AUTO) - srso_mitigation = SRSO_MITIGATION_SAFE_RET; - - has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE); - if (has_microcode) { - /* - * Zen1/2 with SMT off aren't vulnerable after the right - * IBPB microcode has been applied. - */ - if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { - setup_force_cpu_cap(X86_FEATURE_SRSO_NO); + if (srso_mitigation == SRSO_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_SRSO)) { + srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else { srso_mitigation = SRSO_MITIGATION_NONE; return; } - } else { + } + + /* Zen1/2 with SMT off aren't vulnerable to SRSO. */ + if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { + srso_mitigation = SRSO_MITIGATION_NOSMT; + return; + } + + if (!boot_cpu_has(X86_FEATURE_IBPB_BRTYPE)) { pr_warn("IBPB-extending microcode not applied!\n"); pr_warn(SRSO_NOTICE); + + /* + * Safe-RET provides partial mitigation without microcode, but + * other mitigations require microcode to provide any + * mitigations. + */ + if (srso_mitigation == SRSO_MITIGATION_SAFE_RET) + srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; + else + srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED; } switch (srso_mitigation) { case SRSO_MITIGATION_SAFE_RET: + case SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED: if (boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO)) { srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; goto ibpb_on_vmexit; @@ -2923,9 +3225,6 @@ static void __init srso_select_mitigation(void) pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); srso_mitigation = SRSO_MITIGATION_NONE; } - - if (!has_microcode) - srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; break; ibpb_on_vmexit: case SRSO_MITIGATION_IBPB_ON_VMEXIT: @@ -2940,9 +3239,6 @@ ibpb_on_vmexit: pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); srso_mitigation = SRSO_MITIGATION_NONE; } - - if (!has_microcode) - srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED; break; default: break; @@ -2957,8 +3253,7 @@ static void __init srso_update_mitigation(void) srso_mitigation = SRSO_MITIGATION_IBPB; if (boot_cpu_has_bug(X86_BUG_SRSO) && - !cpu_mitigations_off() && - !boot_cpu_has(X86_FEATURE_SRSO_NO)) + !cpu_mitigations_off()) pr_info("%s\n", srso_strings[srso_mitigation]); } @@ -3254,9 +3549,6 @@ static ssize_t retbleed_show_state(char *buf) static ssize_t srso_show_state(char *buf) { - if (boot_cpu_has(X86_FEATURE_SRSO_NO)) - return sysfs_emit(buf, "Mitigation: SMT disabled\n"); - return sysfs_emit(buf, "%s\n", srso_strings[srso_mitigation]); } @@ -3265,6 +3557,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3625,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3714,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 27125e009847..34a054181c4d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -26,6 +26,7 @@ #include <linux/pgtable.h> #include <linux/stackprotector.h> #include <linux/utsname.h> +#include <linux/efi.h> #include <asm/alternative.h> #include <asm/cmdline.h> @@ -1233,6 +1234,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1283,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1533,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -2526,6 +2539,12 @@ void __init arch_cpu_finalize_init(void) fpu__init_cpu(); /* + * This needs to follow the FPU initializtion, since EFI depends on it. + */ + if (efi_enabled(EFI_RUNTIME_SERVICES)) + efi_enter_virtual_mode(); + + /* * Ensure that access to the per CPU representation has the initial * boot CPU configuration. */ diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9d852c3b2cb5..5c4eb28c3ac9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; @@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr) rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol } bank_type = smca_get_bank_type(cpu, bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; + } + + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; } if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9b3c5d4a52e..4da4eab56c81 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) @@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); - int ret; mce_device_create(cpu); - - ret = mce_threshold_create_device(cpu); - if (ret) { - mce_device_remove(cpu); - return ret; - } + mce_threshold_create_device(cpu); mce_reenable_cpu(); mce_start_timer(t); return 0; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index efcf21e9552e..9b149b9c4109 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); + cmci_clear(); } bool intel_filter_mce(struct mce *m) diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index fe50eb5b7c4a..b92e09a87c69 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -17,8 +17,8 @@ #define pr_fmt(fmt) "microcode: " fmt -#include <linux/platform_device.h> #include <linux/stop_machine.h> +#include <linux/device/faux.h> #include <linux/syscore_ops.h> #include <linux/miscdevice.h> #include <linux/capability.h> @@ -249,7 +249,7 @@ static void reload_early_microcode(unsigned int cpu) } /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +static struct faux_device *microcode_fdev; #ifdef CONFIG_MICROCODE_LATE_LOADING /* @@ -690,7 +690,7 @@ static int load_late_locked(void) if (!setup_cpus()) return -EBUSY; - switch (microcode_ops->request_microcode_fw(0, µcode_pdev->dev)) { + switch (microcode_ops->request_microcode_fw(0, µcode_fdev->dev)) { case UCODE_NEW: return load_late_stop_cpus(false); case UCODE_NEW_SAFE: @@ -841,9 +841,9 @@ static int __init microcode_init(void) if (early_data.new_rev) pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev); - microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); - if (IS_ERR(microcode_pdev)) - return PTR_ERR(microcode_pdev); + microcode_fdev = faux_device_create("microcode", NULL, NULL); + if (!microcode_fdev) + return -ENODEV; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { @@ -862,7 +862,7 @@ static int __init microcode_init(void) return 0; out_pdev: - platform_device_unregister(microcode_pdev); + faux_device_destroy(microcode_fdev); return error; } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ea138583dd92..aefd412a23dc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -37,6 +37,7 @@ DEFINE_PER_CPU(u64, xfd_state); /* The FPU state configuration data for kernel and user space */ struct fpu_state_config fpu_kernel_cfg __ro_after_init; struct fpu_state_config fpu_user_cfg __ro_after_init; +struct vcpu_fpu_config guest_default_cfg __ro_after_init; /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, @@ -217,7 +218,7 @@ void fpu_reset_from_exception_fixup(void) } #if IS_ENABLED(CONFIG_KVM) -static void __fpstate_reset(struct fpstate *fpstate, u64 xfd); +static void __fpstate_reset(struct fpstate *fpstate); static void fpu_lock_guest_permissions(void) { @@ -242,19 +243,21 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) struct fpstate *fpstate; unsigned int size; - size = fpu_kernel_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64); + size = guest_default_cfg.size + ALIGN(offsetof(struct fpstate, regs), 64); + fpstate = vzalloc(size); if (!fpstate) return false; - /* Leave xfd to 0 (the reset value defined by spec) */ - __fpstate_reset(fpstate, 0); - fpstate_init_user(fpstate); + /* Initialize indicators to reflect properties of the fpstate */ fpstate->is_valloc = true; fpstate->is_guest = true; + __fpstate_reset(fpstate); + fpstate_init_user(fpstate); + gfpu->fpstate = fpstate; - gfpu->xfeatures = fpu_kernel_cfg.default_features; + gfpu->xfeatures = guest_default_cfg.features; /* * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state @@ -541,28 +544,50 @@ void fpstate_init_user(struct fpstate *fpstate) fpstate_init_fstate(fpstate); } -static void __fpstate_reset(struct fpstate *fpstate, u64 xfd) +static void __fpstate_reset(struct fpstate *fpstate) { - /* Initialize sizes and feature masks */ - fpstate->size = fpu_kernel_cfg.default_size; + /* + * Supervisor features (and thus sizes) may diverge between guest + * FPUs and host FPUs, as some supervisor features are supported + * for guests despite not being utilized by the host. User + * features and sizes are always identical, which allows for + * common guest and userspace ABI. + * + * For the host, set XFD to the kernel's desired initialization + * value. For guests, set XFD to its architectural RESET value. + */ + if (fpstate->is_guest) { + fpstate->size = guest_default_cfg.size; + fpstate->xfeatures = guest_default_cfg.features; + fpstate->xfd = 0; + } else { + fpstate->size = fpu_kernel_cfg.default_size; + fpstate->xfeatures = fpu_kernel_cfg.default_features; + fpstate->xfd = init_fpstate.xfd; + } + fpstate->user_size = fpu_user_cfg.default_size; - fpstate->xfeatures = fpu_kernel_cfg.default_features; fpstate->user_xfeatures = fpu_user_cfg.default_features; - fpstate->xfd = xfd; } void fpstate_reset(struct fpu *fpu) { /* Set the fpstate pointer to the default fpstate */ fpu->fpstate = &fpu->__fpstate; - __fpstate_reset(fpu->fpstate, init_fpstate.xfd); + __fpstate_reset(fpu->fpstate); /* Initialize the permission related info in fpu */ fpu->perm.__state_perm = fpu_kernel_cfg.default_features; fpu->perm.__state_size = fpu_kernel_cfg.default_size; fpu->perm.__user_state_size = fpu_user_cfg.default_size; - /* Same defaults for guests */ - fpu->guest_perm = fpu->perm; + + fpu->guest_perm.__state_perm = guest_default_cfg.features; + fpu->guest_perm.__state_size = guest_default_cfg.size; + /* + * User features and sizes are always identical between host and + * guest FPUs, which allows for common guest and userspace ABI. + */ + fpu->guest_perm.__user_state_size = fpu_user_cfg.default_size; } static inline void fpu_inherit_perms(struct fpu *dst_fpu) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 99db41bf9fa6..ff988b9ea39f 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -205,6 +205,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) fpu_kernel_cfg.default_size = size; fpu_user_cfg.max_size = size; fpu_user_cfg.default_size = size; + guest_default_cfg.size = size; } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9aa9ac8399ae..12ed75c1b567 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -57,7 +57,7 @@ static const char *xfeature_names[] = "Protection Keys User registers", "PASID state", "Control-flow User registers", - "Control-flow Kernel registers (unused)", + "Control-flow Kernel registers (KVM only)", "unknown xstate feature", "unknown xstate feature", "unknown xstate feature", @@ -81,6 +81,7 @@ static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_PKRU] = X86_FEATURE_OSPKE, [XFEATURE_PASID] = X86_FEATURE_ENQCMD, [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, + [XFEATURE_CET_KERNEL] = X86_FEATURE_SHSTK, [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, [XFEATURE_APX] = X86_FEATURE_APX, @@ -372,6 +373,7 @@ static __init void os_xrstor_booting(struct xregs_state *xstate) XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_PASID | \ XFEATURE_MASK_CET_USER | \ + XFEATURE_MASK_CET_KERNEL | \ XFEATURE_MASK_XTILE | \ XFEATURE_MASK_APX) @@ -573,6 +575,7 @@ static bool __init check_xstate_against_struct(int nr) case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); + case XFEATURE_CET_KERNEL: return XCHECK_SZ(sz, nr, struct cet_supervisor_state); case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state); case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; default: @@ -743,6 +746,9 @@ static int __init init_xstate_size(void) fpu_user_cfg.default_size = xstate_calculate_size(fpu_user_cfg.default_features, false); + guest_default_cfg.size = + xstate_calculate_size(guest_default_cfg.features, compacted); + return 0; } @@ -763,6 +769,7 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) fpu_kernel_cfg.default_size = legacy_size; fpu_user_cfg.max_size = legacy_size; fpu_user_cfg.default_size = legacy_size; + guest_default_cfg.size = legacy_size; /* * Prevent enabling the static branch which enables writes to the @@ -773,6 +780,24 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) fpstate_reset(x86_task_fpu(current)); } +static u64 __init host_default_mask(void) +{ + /* + * Exclude dynamic features (require userspace opt-in) and features + * that are supported only for KVM guests. + */ + return ~((u64)XFEATURE_MASK_USER_DYNAMIC | XFEATURE_MASK_GUEST_SUPERVISOR); +} + +static u64 __init guest_default_mask(void) +{ + /* + * Exclude dynamic features, which require userspace opt-in even + * for KVM guests. + */ + return ~(u64)XFEATURE_MASK_USER_DYNAMIC; +} + /* * Enable and initialize the xsave feature. * Called once per system bootup. @@ -855,12 +880,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; - /* Clean out dynamic features from default */ - fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; - fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; - - fpu_user_cfg.default_features = fpu_user_cfg.max_features; - fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; + /* + * Now, given maximum feature set, determine default values by + * applying default masks. + */ + fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features & host_default_mask(); + fpu_user_cfg.default_features = fpu_user_cfg.max_features & host_default_mask(); + guest_default_cfg.features = fpu_kernel_cfg.max_features & guest_default_mask(); /* Store it for paranoia check at the end */ xfeatures = fpu_kernel_cfg.max_features; diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 9cea1fc36c18..243a769fdd97 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -59,6 +59,18 @@ static ssize_t sched_itmt_enabled_write(struct file *filp, return result; } +static int sched_core_priority_show(struct seq_file *s, void *unused) +{ + int cpu; + + seq_puts(s, "CPU #\tPriority\n"); + for_each_possible_cpu(cpu) + seq_printf(s, "%d\t%d\n", cpu, arch_asym_cpu_priority(cpu)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(sched_core_priority); + static const struct file_operations dfs_sched_itmt_fops = { .read = debugfs_read_file_bool, .write = sched_itmt_enabled_write, @@ -67,6 +79,7 @@ static const struct file_operations dfs_sched_itmt_fops = { }; static struct dentry *dfs_sched_itmt; +static struct dentry *dfs_sched_core_prio; /** * sched_set_itmt_support() - Indicate platform supports ITMT @@ -102,6 +115,14 @@ int sched_set_itmt_support(void) return -ENOMEM; } + dfs_sched_core_prio = debugfs_create_file("sched_core_priority", 0644, + arch_debugfs_dir, NULL, + &sched_core_priority_fops); + if (IS_ERR_OR_NULL(dfs_sched_core_prio)) { + dfs_sched_core_prio = NULL; + return -ENOMEM; + } + sched_itmt_capable = true; sysctl_sched_itmt_enabled = 1; @@ -133,6 +154,8 @@ void sched_clear_itmt_support(void) debugfs_remove(dfs_sched_itmt); dfs_sched_itmt = NULL; + debugfs_remove(dfs_sched_core_prio); + dfs_sched_core_prio = NULL; if (sysctl_sched_itmt_enabled) { /* disable sched_itmt if we are no longer ITMT capable */ diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index b68d4be9464e..d547de9b3ed8 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -40,7 +40,7 @@ static const struct bin_attribute boot_params_data_attr = { .name = "data", .mode = S_IRUGO, }, - .read_new = boot_params_data_read, + .read = boot_params_data_read, .size = sizeof(boot_params), }; @@ -56,7 +56,7 @@ static const struct bin_attribute *const boot_params_data_attrs[] = { static const struct attribute_group boot_params_attr_group = { .attrs = boot_params_version_attrs, - .bin_attrs_new = boot_params_data_attrs, + .bin_attrs = boot_params_data_attrs, }; static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr) @@ -250,7 +250,7 @@ static struct bin_attribute data_attr __ro_after_init = { .name = "data", .mode = S_IRUGO, }, - .read_new = setup_data_data_read, + .read = setup_data_data_read, }; static struct attribute *setup_data_type_attrs[] = { @@ -265,7 +265,7 @@ static const struct bin_attribute *const setup_data_data_attrs[] = { static const struct attribute_group setup_data_attr_group = { .attrs = setup_data_type_attrs, - .bin_attrs_new = setup_data_data_attrs, + .bin_attrs = setup_data_data_attrs, }; static int __init create_setup_data_node(struct kobject *parent, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 921c1c783bc1..8ae750cde0c6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -420,7 +420,7 @@ static u64 kvm_steal_clock(int cpu) return steal; } -static inline void __set_percpu_decrypted(void *ptr, unsigned long size) +static inline __init void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 704883c21f3a..1b7960cf6eb0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -334,13 +334,21 @@ DEFINE_PER_CPU(u64, msr_misc_features_shadow); static void set_cpuid_faulting(bool on) { - u64 msrval; - msrval = this_cpu_read(msr_misc_features_shadow); - msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; - msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); - this_cpu_write(msr_misc_features_shadow, msrval); - wrmsrq(MSR_MISC_FEATURES_ENABLES, msrval); + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + u64 msrval; + + msrval = this_cpu_read(msr_misc_features_shadow); + msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; + msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); + this_cpu_write(msr_misc_features_shadow, msrval); + wrmsrq(MSR_MISC_FEATURES_ENABLES, msrval); + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (on) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_CPUID_USER_DIS_BIT); + else + msr_clear_bit(MSR_K7_HWCR, MSR_K7_HWCR_CPUID_USER_DIS_BIT); + } } static void disable_cpuid(void) @@ -907,16 +915,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b972bf72fb8b..52a5c03c353c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -707,6 +707,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Load the Intel cache allocation PQR MSR. */ resctrl_arch_sched_in(next_p); + /* Reset hw history on AMD CPUs */ + if (cpu_feature_enabled(X86_FEATURE_AMD_WORKLOAD_CLASS)) + wrmsrl(MSR_AMD_WORKLOAD_HRST, 0x1); + return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 095f04bdabdc..3dcadc13f09a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1236,7 +1236,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, static struct user_regset x86_64_regsets[] __ro_after_init = { [REGSET64_GENERAL] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(struct user_regs_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1244,7 +1244,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { .set = genregs_set }, [REGSET64_FP] = { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1253,7 +1253,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { .set = xfpregs_set }, [REGSET64_XSTATE] = { - .core_note_type = NT_X86_XSTATE, + USER_REGSET_NOTE_TYPE(X86_XSTATE), .size = sizeof(u64), .align = sizeof(u64), .active = xstateregs_active, @@ -1261,7 +1261,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { .set = xstateregs_set }, [REGSET64_IOPERM] = { - .core_note_type = NT_386_IOPERM, + USER_REGSET_NOTE_TYPE(386_IOPERM), .n = IO_BITMAP_LONGS, .size = sizeof(long), .align = sizeof(long), @@ -1270,7 +1270,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { }, #ifdef CONFIG_X86_USER_SHADOW_STACK [REGSET64_SSP] = { - .core_note_type = NT_X86_SHSTK, + USER_REGSET_NOTE_TYPE(X86_SHSTK), .n = 1, .size = sizeof(u64), .align = sizeof(u64), @@ -1297,7 +1297,7 @@ static const struct user_regset_view user_x86_64_view = { #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION static struct user_regset x86_32_regsets[] __ro_after_init = { [REGSET32_GENERAL] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(struct user_regs_struct32) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1305,7 +1305,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = genregs32_set }, [REGSET32_FP] = { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1314,7 +1314,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = fpregs_set }, [REGSET32_XFP] = { - .core_note_type = NT_PRXFPREG, + USER_REGSET_NOTE_TYPE(PRXFPREG), .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1323,7 +1323,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = xfpregs_set }, [REGSET32_XSTATE] = { - .core_note_type = NT_X86_XSTATE, + USER_REGSET_NOTE_TYPE(X86_XSTATE), .size = sizeof(u64), .align = sizeof(u64), .active = xstateregs_active, @@ -1331,7 +1331,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = xstateregs_set }, [REGSET32_TLS] = { - .core_note_type = NT_386_TLS, + USER_REGSET_NOTE_TYPE(386_TLS), .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, .size = sizeof(struct user_desc), @@ -1341,7 +1341,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = regset_tls_set }, [REGSET32_IOPERM] = { - .core_note_type = NT_386_IOPERM, + USER_REGSET_NOTE_TYPE(386_IOPERM), .n = IO_BITMAP_BYTES / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58ede3fa6a75..33e166f6ab12 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -478,44 +478,41 @@ static int x86_cluster_flags(void) */ static bool x86_has_numa_in_package; -static struct sched_domain_topology_level x86_topology[6]; - -static void __init build_sched_topology(void) -{ - int i = 0; - -#ifdef CONFIG_SCHED_SMT - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) - }; -#endif +static struct sched_domain_topology_level x86_topology[] = { + SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT), #ifdef CONFIG_SCHED_CLUSTER - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) - }; + SDTL_INIT(cpu_clustergroup_mask, x86_cluster_flags, CLS), #endif #ifdef CONFIG_SCHED_MC - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) - }; + SDTL_INIT(cpu_coregroup_mask, x86_core_flags, MC), #endif + SDTL_INIT(cpu_cpu_mask, x86_sched_itmt_flags, PKG), + { NULL }, +}; + +static void __init build_sched_topology(void) +{ + struct sched_domain_topology_level *topology = x86_topology; + /* - * When there is NUMA topology inside the package skip the PKG domain - * since the NUMA domains will auto-magically create the right spanning - * domains based on the SLIT. + * When there is NUMA topology inside the package invalidate the + * PKG domain since the NUMA domains will auto-magically create the + * right spanning domains based on the SLIT. */ - if (!x86_has_numa_in_package) { - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_cpu_mask, x86_sched_itmt_flags, SD_INIT_NAME(PKG) - }; + if (x86_has_numa_in_package) { + unsigned int pkgdom = ARRAY_SIZE(x86_topology) - 2; + + memset(&x86_topology[pkgdom], 0, sizeof(x86_topology[pkgdom])); } /* - * There must be one trailing NULL entry left. + * Drop the SMT domains if there is only one thread per-core + * since it'll get degenerated by the scheduler anyways. */ - BUG_ON(i >= ARRAY_SIZE(x86_topology)-1); + if (cpu_smt_num_threads <= 1) + ++topology; - set_sched_topology(x86_topology); + set_sched_topology(topology); } void set_cpu_sibling_map(int cpu) |