diff options
Diffstat (limited to 'arch/x86/kernel')
30 files changed, 988 insertions, 357 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 99a783fd4691..0d2a6d953be9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel. # -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ecfe7b497cad..ea1d984166cd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -116,6 +116,24 @@ static struct module *its_mod; #endif static void *its_page; static unsigned int its_offset; +struct its_array its_pages; + +static void *__its_alloc(struct its_array *pages) +{ + void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + if (!page) + return NULL; + + void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + pages->pages = tmp; + pages->pages[pages->num++] = page; + + return no_free_ptr(page); +} /* Initialize a thunk with the "jmp *reg; int3" instructions. */ static void *its_init_thunk(void *thunk, int reg) @@ -151,6 +169,21 @@ static void *its_init_thunk(void *thunk, int reg) return thunk + offset; } +static void its_pages_protect(struct its_array *pages) +{ + for (int i = 0; i < pages->num; i++) { + void *page = pages->pages[i]; + execmem_restore_rox(page, PAGE_SIZE); + } +} + +static void its_fini_core(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + its_pages_protect(&its_pages); + kfree(its_pages.pages); +} + #ifdef CONFIG_MODULES void its_init_mod(struct module *mod) { @@ -173,10 +206,8 @@ void its_fini_mod(struct module *mod) its_page = NULL; mutex_unlock(&text_mutex); - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; - execmem_restore_rox(page, PAGE_SIZE); - } + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + its_pages_protect(&mod->arch.its_pages); } void its_free_mod(struct module *mod) @@ -184,37 +215,33 @@ void its_free_mod(struct module *mod) if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) return; - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; + for (int i = 0; i < mod->arch.its_pages.num; i++) { + void *page = mod->arch.its_pages.pages[i]; execmem_free(page); } - kfree(mod->its_page_array); + kfree(mod->arch.its_pages.pages); } #endif /* CONFIG_MODULES */ static void *its_alloc(void) { - void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); - - if (!page) - return NULL; + struct its_array *pages = &its_pages; + void *page; #ifdef CONFIG_MODULES - if (its_mod) { - void *tmp = krealloc(its_mod->its_page_array, - (its_mod->its_num_pages+1) * sizeof(void *), - GFP_KERNEL); - if (!tmp) - return NULL; + if (its_mod) + pages = &its_mod->arch.its_pages; +#endif - its_mod->its_page_array = tmp; - its_mod->its_page_array[its_mod->its_num_pages++] = page; + page = __its_alloc(pages); + if (!page) + return NULL; - execmem_make_temp_rw(page, PAGE_SIZE); - } -#endif /* CONFIG_MODULES */ + execmem_make_temp_rw(page, PAGE_SIZE); + if (pages == &its_pages) + set_memory_x((unsigned long)page, 1); - return no_free_ptr(page); + return page; } static void *its_allocate_thunk(int reg) @@ -268,7 +295,9 @@ u8 *its_static_thunk(int reg) return thunk; } -#endif +#else +static inline void its_fini_core(void) {} +#endif /* CONFIG_MITIGATION_ITS */ /* * Nomenclature for variable names to simplify and clarify this code and ease @@ -2338,6 +2367,8 @@ void __init alternative_instructions(void) apply_retpolines(__retpoline_sites, __retpoline_sites_end); apply_returns(__return_sites, __return_sites_end); + its_fini_core(); + /* * Adjust all CALL instructions to point to func()-10, including * those in .altinstr_replacement. @@ -3107,6 +3138,6 @@ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, c */ void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate) { - __smp_text_poke_batch_add(addr, opcode, len, emulate); + smp_text_poke_batch_add(addr, opcode, len, emulate); smp_text_poke_batch_finish(); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 93069b13d3af..a947b46a8b64 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -183,6 +183,7 @@ setnew: apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); per_cpu(vector_irq, newcpu)[newvec] = desc; + apic_update_irq_cfg(irqd, newvec, newcpu); } static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -261,7 +262,6 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest) if (vector < 0) return vector; apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); return 0; } @@ -338,7 +338,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector < 0) return vector; apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); + return 0; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 93da466dfe2c..a5ece6ebe8a7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include <linux/sched/clock.h> #include <linux/random.h> #include <linux/topology.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cacheinfo.h> @@ -31,7 +31,7 @@ #include "cpu.h" -u16 invlpgb_count_max __ro_after_init; +u16 invlpgb_count_max __ro_after_init = 1; static inline int rdmsrq_amd_safe(unsigned msr, u64 *p) { @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + tsa_init(c); + + if (cpu_has(c, X86_FEATURE_GP_ON_USER_CPUID)) + setup_force_cpu_cap(X86_FEATURE_CPUID_FAULT); + return; warn: @@ -930,6 +976,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) init_spectral_chicken(c); fix_erratum_1386(c); zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..b74bf937cd9f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -113,10 +115,9 @@ void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; static void __init set_return_thunk(void *thunk) { - if (x86_return_thunk != __x86_return_thunk) - pr_warn("x86/bugs: return thunk changed\n"); - x86_return_thunk = thunk; + + pr_info("active return thunk: %ps\n", thunk); } /* Update SPEC_CTRL MSR and its cached copy unconditionally */ @@ -169,9 +170,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -188,6 +189,39 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DEFINE_STATIC_KEY_FALSE(cpu_buf_vm_clear); EXPORT_SYMBOL_GPL(cpu_buf_vm_clear); +#undef pr_fmt +#define pr_fmt(fmt) "mitigations: " fmt + +static void __init cpu_print_attack_vectors(void) +{ + pr_info("Enabled attack vectors: "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL)) + pr_cont("user_kernel, "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER)) + pr_cont("user_user, "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST)) + pr_cont("guest_host, "); + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) + pr_cont("guest_guest, "); + + pr_cont("SMT mitigations: "); + + switch (smt_mitigations) { + case SMT_MITIGATIONS_OFF: + pr_cont("off\n"); + break; + case SMT_MITIGATIONS_AUTO: + pr_cont("auto\n"); + break; + case SMT_MITIGATIONS_ON: + pr_cont("on\n"); + } +} + void __init cpu_select_mitigations(void) { /* @@ -208,6 +242,8 @@ void __init cpu_select_mitigations(void) x86_arch_cap_msr = x86_read_arch_cap_msr(); + cpu_print_attack_vectors(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -225,6 +261,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +309,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -329,6 +367,62 @@ static void x86_amd_ssb_disable(void) #undef pr_fmt #define pr_fmt(fmt) "MDS: " fmt +/* + * Returns true if vulnerability should be mitigated based on the + * selected attack vector controls. + * + * See Documentation/admin-guide/hw-vuln/attack_vector_controls.rst + */ +static bool __init should_mitigate_vuln(unsigned int bug) +{ + switch (bug) { + /* + * The only runtime-selected spectre_v1 mitigations in the kernel are + * related to SWAPGS protection on kernel entry. Therefore, protection + * is only required for the user->kernel attack vector. + */ + case X86_BUG_SPECTRE_V1: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL); + + case X86_BUG_SPECTRE_V2: + case X86_BUG_RETBLEED: + case X86_BUG_SRSO: + case X86_BUG_L1TF: + case X86_BUG_ITS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST); + + case X86_BUG_SPECTRE_V2_USER: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST); + + /* + * All the vulnerabilities below allow potentially leaking data + * across address spaces. Therefore, mitigation is required for + * any of these 4 attack vectors. + */ + case X86_BUG_MDS: + case X86_BUG_TAA: + case X86_BUG_MMIO_STALE_DATA: + case X86_BUG_RFDS: + case X86_BUG_SRBDS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST); + + case X86_BUG_GDS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST) || + (smt_mitigations != SMT_MITIGATIONS_OFF); + default: + WARN(1, "Unknown bug %x\n", bug); + return false; + } +} + /* Default mitigation for MDS-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = IS_ENABLED(CONFIG_MITIGATION_MDS) ? MDS_MITIGATION_AUTO : MDS_MITIGATION_OFF; @@ -382,13 +476,17 @@ static bool verw_clear_cpu_buf_mitigation_selected __ro_after_init; static void __init mds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_MDS)) { mds_mitigation = MDS_MITIGATION_OFF; return; } - if (mds_mitigation == MDS_MITIGATION_AUTO) - mds_mitigation = MDS_MITIGATION_FULL; + if (mds_mitigation == MDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_MDS)) + mds_mitigation = MDS_MITIGATION_FULL; + else + mds_mitigation = MDS_MITIGATION_OFF; + } if (mds_mitigation == MDS_MITIGATION_OFF) return; @@ -398,7 +496,7 @@ static void __init mds_select_mitigation(void) static void __init mds_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_MDS)) return; /* If TAA, MMIO, or RFDS are being mitigated, MDS gets mitigated too. */ @@ -419,7 +517,7 @@ static void __init mds_apply_mitigation(void) mds_mitigation == MDS_MITIGATION_VMWERV) { setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && - (mds_nosmt || cpu_mitigations_auto_nosmt())) + (mds_nosmt || smt_mitigations == SMT_MITIGATIONS_ON)) cpu_smt_disable(false); } } @@ -475,12 +573,13 @@ static void __init taa_select_mitigation(void) return; } - if (cpu_mitigations_off()) - taa_mitigation = TAA_MITIGATION_OFF; - /* Microcode will be checked in taa_update_mitigation(). */ - if (taa_mitigation == TAA_MITIGATION_AUTO) - taa_mitigation = TAA_MITIGATION_VERW; + if (taa_mitigation == TAA_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_TAA)) + taa_mitigation = TAA_MITIGATION_VERW; + else + taa_mitigation = TAA_MITIGATION_OFF; + } if (taa_mitigation != TAA_MITIGATION_OFF) verw_clear_cpu_buf_mitigation_selected = true; @@ -488,7 +587,7 @@ static void __init taa_select_mitigation(void) static void __init taa_update_mitigation(void) { - if (!taa_vulnerable() || cpu_mitigations_off()) + if (!taa_vulnerable()) return; if (verw_clear_cpu_buf_mitigation_selected) @@ -529,7 +628,7 @@ static void __init taa_apply_mitigation(void) */ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); - if (taa_nosmt || cpu_mitigations_auto_nosmt()) + if (taa_nosmt || smt_mitigations == SMT_MITIGATIONS_ON) cpu_smt_disable(false); } } @@ -575,8 +674,12 @@ static void __init mmio_select_mitigation(void) } /* Microcode will be checked in mmio_update_mitigation(). */ - if (mmio_mitigation == MMIO_MITIGATION_AUTO) - mmio_mitigation = MMIO_MITIGATION_VERW; + if (mmio_mitigation == MMIO_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_MMIO_STALE_DATA)) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_OFF; + } if (mmio_mitigation == MMIO_MITIGATION_OFF) return; @@ -591,7 +694,7 @@ static void __init mmio_select_mitigation(void) static void __init mmio_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) return; if (verw_clear_cpu_buf_mitigation_selected) @@ -637,9 +740,9 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); - if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + if (mmio_nosmt || smt_mitigations == SMT_MITIGATIONS_ON) cpu_smt_disable(false); } @@ -680,13 +783,17 @@ static inline bool __init verw_clears_cpu_reg_file(void) static void __init rfds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_RFDS)) { rfds_mitigation = RFDS_MITIGATION_OFF; return; } - if (rfds_mitigation == RFDS_MITIGATION_AUTO) - rfds_mitigation = RFDS_MITIGATION_VERW; + if (rfds_mitigation == RFDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_RFDS)) + rfds_mitigation = RFDS_MITIGATION_VERW; + else + rfds_mitigation = RFDS_MITIGATION_OFF; + } if (rfds_mitigation == RFDS_MITIGATION_OFF) return; @@ -697,7 +804,7 @@ static void __init rfds_select_mitigation(void) static void __init rfds_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_RFDS)) return; if (verw_clear_cpu_buf_mitigation_selected) @@ -798,13 +905,19 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_SRBDS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) { srbds_mitigation = SRBDS_MITIGATION_OFF; return; } - if (srbds_mitigation == SRBDS_MITIGATION_AUTO) - srbds_mitigation = SRBDS_MITIGATION_FULL; + if (srbds_mitigation == SRBDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_SRBDS)) + srbds_mitigation = SRBDS_MITIGATION_FULL; + else { + srbds_mitigation = SRBDS_MITIGATION_OFF; + return; + } + } /* * Check to see if this is one of the MDS_NO systems supporting TSX that @@ -952,12 +1065,15 @@ static void __init gds_select_mitigation(void) return; } - if (cpu_mitigations_off()) - gds_mitigation = GDS_MITIGATION_OFF; /* Will verify below that mitigation _can_ be disabled */ - - if (gds_mitigation == GDS_MITIGATION_AUTO) - gds_mitigation = GDS_MITIGATION_FULL; + if (gds_mitigation == GDS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_GDS)) + gds_mitigation = GDS_MITIGATION_FULL; + else { + gds_mitigation = GDS_MITIGATION_OFF; + return; + } + } /* No microcode */ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { @@ -1063,13 +1179,16 @@ static bool smap_works_speculatively(void) static void __init spectre_v1_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; + + if (!should_mitigate_vuln(X86_BUG_SPECTRE_V1)) spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; } static void __init spectre_v1_apply_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return; if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) { @@ -1120,6 +1239,20 @@ early_param("nospectre_v1", nospectre_v1_cmdline); enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +/* Depends on spectre_v2 mitigation selected already */ +static inline bool cdt_possible(enum spectre_v2_mitigation mode) +{ + if (!IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) || + !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) + return false; + + if (mode == SPECTRE_V2_RETPOLINE || + mode == SPECTRE_V2_EIBRS_RETPOLINE) + return true; + + return false; +} + #undef pr_fmt #define pr_fmt(fmt) "RETBleed: " fmt @@ -1158,6 +1291,21 @@ static enum retbleed_mitigation retbleed_mitigation __ro_after_init = static int __ro_after_init retbleed_nosmt = false; +enum srso_mitigation { + SRSO_MITIGATION_NONE, + SRSO_MITIGATION_AUTO, + SRSO_MITIGATION_UCODE_NEEDED, + SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED, + SRSO_MITIGATION_MICROCODE, + SRSO_MITIGATION_NOSMT, + SRSO_MITIGATION_SAFE_RET, + SRSO_MITIGATION_IBPB, + SRSO_MITIGATION_IBPB_ON_VMEXIT, + SRSO_MITIGATION_BP_SPEC_REDUCE, +}; + +static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_AUTO; + static int __init retbleed_parse_cmdline(char *str) { if (!str) @@ -1200,7 +1348,7 @@ early_param("retbleed", retbleed_parse_cmdline); static void __init retbleed_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) { retbleed_mitigation = RETBLEED_MITIGATION_NONE; return; } @@ -1237,6 +1385,11 @@ static void __init retbleed_select_mitigation(void) if (retbleed_mitigation != RETBLEED_MITIGATION_AUTO) return; + if (!should_mitigate_vuln(X86_BUG_RETBLEED)) { + retbleed_mitigation = RETBLEED_MITIGATION_NONE; + return; + } + /* Intel mitigation selected in retbleed_update_mitigation() */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { @@ -1247,35 +1400,36 @@ static void __init retbleed_select_mitigation(void) retbleed_mitigation = RETBLEED_MITIGATION_IBPB; else retbleed_mitigation = RETBLEED_MITIGATION_NONE; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + /* Final mitigation depends on spectre-v2 selection */ + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + else if (boot_cpu_has(X86_FEATURE_IBRS)) + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + else + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } } static void __init retbleed_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) return; - if (retbleed_mitigation == RETBLEED_MITIGATION_NONE) - goto out; + /* ITS can also enable stuffing */ + if (its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF) + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; - /* - * retbleed=stuff is only allowed on Intel. If stuffing can't be used - * then a different mitigation will be selected below. - * - * its=stuff will also attempt to enable stuffing. - */ - if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF || - its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF) { - if (spectre_v2_enabled != SPECTRE_V2_RETPOLINE) { - pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n"); - retbleed_mitigation = RETBLEED_MITIGATION_AUTO; - } else { - if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) - pr_info("Retbleed mitigation updated to stuffing\n"); + /* If SRSO is using IBPB, that works for retbleed too */ + if (srso_mitigation == SRSO_MITIGATION_IBPB) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; - retbleed_mitigation = RETBLEED_MITIGATION_STUFF; - } + if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF && + !cdt_possible(spectre_v2_enabled)) { + pr_err("WARNING: retbleed=stuff depends on retpoline\n"); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } + /* * Let IBRS trump all on Intel without affecting the effects of the * retbleed= cmdline option except for call depth based stuffing @@ -1294,15 +1448,11 @@ static void __init retbleed_update_mitigation(void) if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) pr_err(RETBLEED_INTEL_MSG); } - /* If nothing has set the mitigation yet, default to NONE. */ - if (retbleed_mitigation == RETBLEED_MITIGATION_AUTO) - retbleed_mitigation = RETBLEED_MITIGATION_NONE; } -out: + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } - static void __init retbleed_apply_mitigation(void) { bool mitigate_smt = false; @@ -1358,7 +1508,7 @@ static void __init retbleed_apply_mitigation(void) } if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && - (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + (retbleed_nosmt || smt_mitigations == SMT_MITIGATIONS_ON)) cpu_smt_disable(false); } @@ -1403,13 +1553,17 @@ early_param("indirect_target_selection", its_parse_cmdline); static void __init its_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_ITS)) { its_mitigation = ITS_MITIGATION_OFF; return; } - if (its_mitigation == ITS_MITIGATION_AUTO) - its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + if (its_mitigation == ITS_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_ITS)) + its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + else + its_mitigation = ITS_MITIGATION_OFF; + } if (its_mitigation == ITS_MITIGATION_OFF) return; @@ -1440,15 +1594,17 @@ static void __init its_select_mitigation(void) static void __init its_update_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_ITS)) return; switch (spectre_v2_enabled) { case SPECTRE_V2_NONE: - pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); + if (its_mitigation != ITS_MITIGATION_OFF) + pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); its_mitigation = ITS_MITIGATION_OFF; break; case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: /* Retpoline+CDT mitigates ITS */ if (retbleed_mitigation == RETBLEED_MITIGATION_STUFF) its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; @@ -1462,13 +1618,8 @@ static void __init its_update_mitigation(void) break; } - /* - * retbleed_update_mitigation() will try to do stuffing if its=stuff. - * If it can't, such as if spectre_v2!=retpoline, then fall back to - * aligned thunks. - */ if (its_mitigation == ITS_MITIGATION_RETPOLINE_STUFF && - retbleed_mitigation != RETBLEED_MITIGATION_STUFF) + !cdt_possible(spectre_v2_enabled)) its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; pr_info("%s\n", its_strings[its_mitigation]); @@ -1476,15 +1627,127 @@ static void __init its_update_mitigation(void) static void __init its_apply_mitigation(void) { - /* its=stuff forces retbleed stuffing and is enabled there. */ - if (its_mitigation != ITS_MITIGATION_ALIGNED_THUNKS) + switch (its_mitigation) { + case ITS_MITIGATION_OFF: + case ITS_MITIGATION_AUTO: + case ITS_MITIGATION_VMEXIT_ONLY: + break; + case ITS_MITIGATION_ALIGNED_THUNKS: + if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) + setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + set_return_thunk(its_return_thunk); + break; + case ITS_MITIGATION_RETPOLINE_STUFF: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); + set_return_thunk(call_depth_return_thunk); + break; + } +} + +#undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; return; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) { + bool vm = false, uk = false; + + tsa_mitigation = TSA_MITIGATION_NONE; - if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) - setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || + cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER)) { + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + uk = true; + } + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) { + tsa_mitigation = TSA_MITIGATION_VM; + vm = true; + } - setup_force_cpu_cap(X86_FEATURE_RETHUNK); - set_return_thunk(its_return_thunk); + if (uk && vm) + tsa_mitigation = TSA_MITIGATION_FULL; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } } #undef pr_fmt @@ -1609,7 +1872,7 @@ static enum spectre_v2_user_cmd __init spectre_v2_parse_user_cmdline(void) char arg[20]; int ret, i; - if (cpu_mitigations_off() || !IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2)) + if (!IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2)) return SPECTRE_V2_USER_CMD_NONE; ret = cmdline_find_option(boot_command_line, "spectre_v2_user", @@ -1647,6 +1910,13 @@ static void __init spectre_v2_user_select_mitigation(void) spectre_v2_user_stibp = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_AUTO: + if (!should_mitigate_vuln(X86_BUG_SPECTRE_V2_USER)) + break; + spectre_v2_user_ibpb = SPECTRE_V2_USER_PRCTL; + if (smt_mitigations == SMT_MITIGATIONS_OFF) + break; + spectre_v2_user_stibp = SPECTRE_V2_USER_PRCTL; + break; case SPECTRE_V2_USER_CMD_PRCTL: spectre_v2_user_ibpb = SPECTRE_V2_USER_PRCTL; spectre_v2_user_stibp = SPECTRE_V2_USER_PRCTL; @@ -1798,8 +2068,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) int ret, i; cmd = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ? SPECTRE_V2_CMD_AUTO : SPECTRE_V2_CMD_NONE; - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") || - cpu_mitigations_off()) + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); @@ -2002,11 +2271,20 @@ early_param("spectre_bhi", spectre_bhi_parse_cmdline); static void __init bhi_select_mitigation(void) { - if (!boot_cpu_has(X86_BUG_BHI) || cpu_mitigations_off()) + if (!boot_cpu_has(X86_BUG_BHI)) bhi_mitigation = BHI_MITIGATION_OFF; - if (bhi_mitigation == BHI_MITIGATION_AUTO) - bhi_mitigation = BHI_MITIGATION_ON; + if (bhi_mitigation != BHI_MITIGATION_AUTO) + return; + + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST)) { + if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL)) + bhi_mitigation = BHI_MITIGATION_ON; + else + bhi_mitigation = BHI_MITIGATION_VMEXIT_ONLY; + } else { + bhi_mitigation = BHI_MITIGATION_OFF; + } } static void __init bhi_update_mitigation(void) @@ -2062,8 +2340,11 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_NONE: return; - case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: + if (!should_mitigate_vuln(X86_BUG_SPECTRE_V2)) + break; + fallthrough; + case SPECTRE_V2_CMD_FORCE: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { spectre_v2_enabled = SPECTRE_V2_EIBRS; break; @@ -2117,7 +2398,7 @@ static void __init spectre_v2_update_mitigation(void) } } - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && !cpu_mitigations_off()) + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) pr_info("%s\n", spectre_v2_strings[spectre_v2_enabled]); } @@ -2249,10 +2530,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2597,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -2750,17 +3050,23 @@ static void override_cache_bits(struct cpuinfo_x86 *c) static void __init l1tf_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_L1TF) || cpu_mitigations_off()) { + if (!boot_cpu_has_bug(X86_BUG_L1TF)) { l1tf_mitigation = L1TF_MITIGATION_OFF; return; } - if (l1tf_mitigation == L1TF_MITIGATION_AUTO) { - if (cpu_mitigations_auto_nosmt()) - l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; - else - l1tf_mitigation = L1TF_MITIGATION_FLUSH; + if (l1tf_mitigation != L1TF_MITIGATION_AUTO) + return; + + if (!should_mitigate_vuln(X86_BUG_L1TF)) { + l1tf_mitigation = L1TF_MITIGATION_OFF; + return; } + + if (smt_mitigations == SMT_MITIGATIONS_ON) + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; + else + l1tf_mitigation = L1TF_MITIGATION_FLUSH; } static void __init l1tf_apply_mitigation(void) @@ -2834,31 +3140,18 @@ early_param("l1tf", l1tf_cmdline); #undef pr_fmt #define pr_fmt(fmt) "Speculative Return Stack Overflow: " fmt -enum srso_mitigation { - SRSO_MITIGATION_NONE, - SRSO_MITIGATION_AUTO, - SRSO_MITIGATION_UCODE_NEEDED, - SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED, - SRSO_MITIGATION_MICROCODE, - SRSO_MITIGATION_SAFE_RET, - SRSO_MITIGATION_IBPB, - SRSO_MITIGATION_IBPB_ON_VMEXIT, - SRSO_MITIGATION_BP_SPEC_REDUCE, -}; - static const char * const srso_strings[] = { [SRSO_MITIGATION_NONE] = "Vulnerable", [SRSO_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", [SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED] = "Vulnerable: Safe RET, no microcode", [SRSO_MITIGATION_MICROCODE] = "Vulnerable: Microcode, no safe RET", + [SRSO_MITIGATION_NOSMT] = "Mitigation: SMT disabled", [SRSO_MITIGATION_SAFE_RET] = "Mitigation: Safe RET", [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only", [SRSO_MITIGATION_BP_SPEC_REDUCE] = "Mitigation: Reduced Speculation" }; -static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_AUTO; - static int __init srso_parse_cmdline(char *str) { if (!str) @@ -2885,35 +3178,44 @@ early_param("spec_rstack_overflow", srso_parse_cmdline); static void __init srso_select_mitigation(void) { - bool has_microcode; - - if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) + if (!boot_cpu_has_bug(X86_BUG_SRSO)) { srso_mitigation = SRSO_MITIGATION_NONE; - - if (srso_mitigation == SRSO_MITIGATION_NONE) return; + } - if (srso_mitigation == SRSO_MITIGATION_AUTO) - srso_mitigation = SRSO_MITIGATION_SAFE_RET; - - has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE); - if (has_microcode) { - /* - * Zen1/2 with SMT off aren't vulnerable after the right - * IBPB microcode has been applied. - */ - if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { - setup_force_cpu_cap(X86_FEATURE_SRSO_NO); + if (srso_mitigation == SRSO_MITIGATION_AUTO) { + if (should_mitigate_vuln(X86_BUG_SRSO)) { + srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else { srso_mitigation = SRSO_MITIGATION_NONE; return; } - } else { + } + + /* Zen1/2 with SMT off aren't vulnerable to SRSO. */ + if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { + srso_mitigation = SRSO_MITIGATION_NOSMT; + return; + } + + if (!boot_cpu_has(X86_FEATURE_IBPB_BRTYPE)) { pr_warn("IBPB-extending microcode not applied!\n"); pr_warn(SRSO_NOTICE); + + /* + * Safe-RET provides partial mitigation without microcode, but + * other mitigations require microcode to provide any + * mitigations. + */ + if (srso_mitigation == SRSO_MITIGATION_SAFE_RET) + srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; + else + srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED; } switch (srso_mitigation) { case SRSO_MITIGATION_SAFE_RET: + case SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED: if (boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO)) { srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; goto ibpb_on_vmexit; @@ -2923,9 +3225,6 @@ static void __init srso_select_mitigation(void) pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); srso_mitigation = SRSO_MITIGATION_NONE; } - - if (!has_microcode) - srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; break; ibpb_on_vmexit: case SRSO_MITIGATION_IBPB_ON_VMEXIT: @@ -2940,9 +3239,6 @@ ibpb_on_vmexit: pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); srso_mitigation = SRSO_MITIGATION_NONE; } - - if (!has_microcode) - srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED; break; default: break; @@ -2957,8 +3253,7 @@ static void __init srso_update_mitigation(void) srso_mitigation = SRSO_MITIGATION_IBPB; if (boot_cpu_has_bug(X86_BUG_SRSO) && - !cpu_mitigations_off() && - !boot_cpu_has(X86_FEATURE_SRSO_NO)) + !cpu_mitigations_off()) pr_info("%s\n", srso_strings[srso_mitigation]); } @@ -3254,9 +3549,6 @@ static ssize_t retbleed_show_state(char *buf) static ssize_t srso_show_state(char *buf) { - if (boot_cpu_has(X86_FEATURE_SRSO_NO)) - return sysfs_emit(buf, "Mitigation: SMT disabled\n"); - return sysfs_emit(buf, "%s\n", srso_strings[srso_mitigation]); } @@ -3265,6 +3557,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3625,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3714,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8feb8fd2957a..34a054181c4d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -26,6 +26,7 @@ #include <linux/pgtable.h> #include <linux/stackprotector.h> #include <linux/utsname.h> +#include <linux/efi.h> #include <asm/alternative.h> #include <asm/cmdline.h> @@ -1233,6 +1234,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1283,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1533,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -2243,20 +2256,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -/* - * Clear all 6 debug registers: - */ -static void clear_all_debug_regs(void) +static void initialize_debug_regs(void) { - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - - set_debugreg(0, i); - } + /* Control register first -- to make sure everything is disabled. */ + set_debugreg(DR7_FIXED_1, 7); + set_debugreg(DR6_RESERVED, 6); + /* dr5 and dr4 don't exist */ + set_debugreg(0, 3); + set_debugreg(0, 2); + set_debugreg(0, 1); + set_debugreg(0, 0); } #ifdef CONFIG_KGDB @@ -2417,7 +2426,7 @@ void cpu_init(void) load_mm_ldt(&init_mm); - clear_all_debug_regs(); + initialize_debug_regs(); dbg_restore_debug_regs(); doublefault_init_cpu_tss(); @@ -2530,6 +2539,12 @@ void __init arch_cpu_finalize_init(void) fpu__init_cpu(); /* + * This needs to follow the FPU initializtion, since EFI depends on it. + */ + if (efi_enabled(EFI_RUNTIME_SERVICES)) + efi_enter_virtual_mode(); + + /* * Ensure that access to the per CPU representation has the initial * boot CPU configuration. */ diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9d852c3b2cb5..5c4eb28c3ac9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; @@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr) rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol } bank_type = smca_get_bank_type(cpu, bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; + } + + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; } if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9b3c5d4a52e..4da4eab56c81 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) @@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); - int ret; mce_device_create(cpu); - - ret = mce_threshold_create_device(cpu); - if (ret) { - mce_device_remove(cpu); - return ret; - } + mce_threshold_create_device(cpu); mce_reenable_cpu(); mce_start_timer(t); return 0; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index efcf21e9552e..9b149b9c4109 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); + cmci_clear(); } bool intel_filter_mce(struct mce *m) diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index fe50eb5b7c4a..b92e09a87c69 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -17,8 +17,8 @@ #define pr_fmt(fmt) "microcode: " fmt -#include <linux/platform_device.h> #include <linux/stop_machine.h> +#include <linux/device/faux.h> #include <linux/syscore_ops.h> #include <linux/miscdevice.h> #include <linux/capability.h> @@ -249,7 +249,7 @@ static void reload_early_microcode(unsigned int cpu) } /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +static struct faux_device *microcode_fdev; #ifdef CONFIG_MICROCODE_LATE_LOADING /* @@ -690,7 +690,7 @@ static int load_late_locked(void) if (!setup_cpus()) return -EBUSY; - switch (microcode_ops->request_microcode_fw(0, µcode_pdev->dev)) { + switch (microcode_ops->request_microcode_fw(0, µcode_fdev->dev)) { case UCODE_NEW: return load_late_stop_cpus(false); case UCODE_NEW_SAFE: @@ -841,9 +841,9 @@ static int __init microcode_init(void) if (early_data.new_rev) pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev); - microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); - if (IS_ERR(microcode_pdev)) - return PTR_ERR(microcode_pdev); + microcode_fdev = faux_device_create("microcode", NULL, NULL); + if (!microcode_fdev) + return -ENODEV; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { @@ -862,7 +862,7 @@ static int __init microcode_init(void) return 0; out_pdev: - platform_device_unregister(microcode_pdev); + faux_device_destroy(microcode_fdev); return error; } diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7109cbfcad4f..187d527ef73b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; struct rdt_mon_domain *d; + struct cacheinfo *ci; int err; lockdep_assert_held(&domain_list_lock); @@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!d->ci) { + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); mon_domain_free(hw_dom); return; } + d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); arch_mon_domain_online(r, d); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ea138583dd92..aefd412a23dc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -37,6 +37,7 @@ DEFINE_PER_CPU(u64, xfd_state); /* The FPU state configuration data for kernel and user space */ struct fpu_state_config fpu_kernel_cfg __ro_after_init; struct fpu_state_config fpu_user_cfg __ro_after_init; +struct vcpu_fpu_config guest_default_cfg __ro_after_init; /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, @@ -217,7 +218,7 @@ void fpu_reset_from_exception_fixup(void) } #if IS_ENABLED(CONFIG_KVM) -static void __fpstate_reset(struct fpstate *fpstate, u64 xfd); +static void __fpstate_reset(struct fpstate *fpstate); static void fpu_lock_guest_permissions(void) { @@ -242,19 +243,21 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) struct fpstate *fpstate; unsigned int size; - size = fpu_kernel_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64); + size = guest_default_cfg.size + ALIGN(offsetof(struct fpstate, regs), 64); + fpstate = vzalloc(size); if (!fpstate) return false; - /* Leave xfd to 0 (the reset value defined by spec) */ - __fpstate_reset(fpstate, 0); - fpstate_init_user(fpstate); + /* Initialize indicators to reflect properties of the fpstate */ fpstate->is_valloc = true; fpstate->is_guest = true; + __fpstate_reset(fpstate); + fpstate_init_user(fpstate); + gfpu->fpstate = fpstate; - gfpu->xfeatures = fpu_kernel_cfg.default_features; + gfpu->xfeatures = guest_default_cfg.features; /* * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state @@ -541,28 +544,50 @@ void fpstate_init_user(struct fpstate *fpstate) fpstate_init_fstate(fpstate); } -static void __fpstate_reset(struct fpstate *fpstate, u64 xfd) +static void __fpstate_reset(struct fpstate *fpstate) { - /* Initialize sizes and feature masks */ - fpstate->size = fpu_kernel_cfg.default_size; + /* + * Supervisor features (and thus sizes) may diverge between guest + * FPUs and host FPUs, as some supervisor features are supported + * for guests despite not being utilized by the host. User + * features and sizes are always identical, which allows for + * common guest and userspace ABI. + * + * For the host, set XFD to the kernel's desired initialization + * value. For guests, set XFD to its architectural RESET value. + */ + if (fpstate->is_guest) { + fpstate->size = guest_default_cfg.size; + fpstate->xfeatures = guest_default_cfg.features; + fpstate->xfd = 0; + } else { + fpstate->size = fpu_kernel_cfg.default_size; + fpstate->xfeatures = fpu_kernel_cfg.default_features; + fpstate->xfd = init_fpstate.xfd; + } + fpstate->user_size = fpu_user_cfg.default_size; - fpstate->xfeatures = fpu_kernel_cfg.default_features; fpstate->user_xfeatures = fpu_user_cfg.default_features; - fpstate->xfd = xfd; } void fpstate_reset(struct fpu *fpu) { /* Set the fpstate pointer to the default fpstate */ fpu->fpstate = &fpu->__fpstate; - __fpstate_reset(fpu->fpstate, init_fpstate.xfd); + __fpstate_reset(fpu->fpstate); /* Initialize the permission related info in fpu */ fpu->perm.__state_perm = fpu_kernel_cfg.default_features; fpu->perm.__state_size = fpu_kernel_cfg.default_size; fpu->perm.__user_state_size = fpu_user_cfg.default_size; - /* Same defaults for guests */ - fpu->guest_perm = fpu->perm; + + fpu->guest_perm.__state_perm = guest_default_cfg.features; + fpu->guest_perm.__state_size = guest_default_cfg.size; + /* + * User features and sizes are always identical between host and + * guest FPUs, which allows for common guest and userspace ABI. + */ + fpu->guest_perm.__user_state_size = fpu_user_cfg.default_size; } static inline void fpu_inherit_perms(struct fpu *dst_fpu) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 99db41bf9fa6..ff988b9ea39f 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -205,6 +205,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) fpu_kernel_cfg.default_size = size; fpu_user_cfg.max_size = size; fpu_user_cfg.default_size = size; + guest_default_cfg.size = size; } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9aa9ac8399ae..12ed75c1b567 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -57,7 +57,7 @@ static const char *xfeature_names[] = "Protection Keys User registers", "PASID state", "Control-flow User registers", - "Control-flow Kernel registers (unused)", + "Control-flow Kernel registers (KVM only)", "unknown xstate feature", "unknown xstate feature", "unknown xstate feature", @@ -81,6 +81,7 @@ static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_PKRU] = X86_FEATURE_OSPKE, [XFEATURE_PASID] = X86_FEATURE_ENQCMD, [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, + [XFEATURE_CET_KERNEL] = X86_FEATURE_SHSTK, [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, [XFEATURE_APX] = X86_FEATURE_APX, @@ -372,6 +373,7 @@ static __init void os_xrstor_booting(struct xregs_state *xstate) XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_PASID | \ XFEATURE_MASK_CET_USER | \ + XFEATURE_MASK_CET_KERNEL | \ XFEATURE_MASK_XTILE | \ XFEATURE_MASK_APX) @@ -573,6 +575,7 @@ static bool __init check_xstate_against_struct(int nr) case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); + case XFEATURE_CET_KERNEL: return XCHECK_SZ(sz, nr, struct cet_supervisor_state); case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state); case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; default: @@ -743,6 +746,9 @@ static int __init init_xstate_size(void) fpu_user_cfg.default_size = xstate_calculate_size(fpu_user_cfg.default_features, false); + guest_default_cfg.size = + xstate_calculate_size(guest_default_cfg.features, compacted); + return 0; } @@ -763,6 +769,7 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) fpu_kernel_cfg.default_size = legacy_size; fpu_user_cfg.max_size = legacy_size; fpu_user_cfg.default_size = legacy_size; + guest_default_cfg.size = legacy_size; /* * Prevent enabling the static branch which enables writes to the @@ -773,6 +780,24 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) fpstate_reset(x86_task_fpu(current)); } +static u64 __init host_default_mask(void) +{ + /* + * Exclude dynamic features (require userspace opt-in) and features + * that are supported only for KVM guests. + */ + return ~((u64)XFEATURE_MASK_USER_DYNAMIC | XFEATURE_MASK_GUEST_SUPERVISOR); +} + +static u64 __init guest_default_mask(void) +{ + /* + * Exclude dynamic features, which require userspace opt-in even + * for KVM guests. + */ + return ~(u64)XFEATURE_MASK_USER_DYNAMIC; +} + /* * Enable and initialize the xsave feature. * Called once per system bootup. @@ -855,12 +880,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; - /* Clean out dynamic features from default */ - fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; - fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; - - fpu_user_cfg.default_features = fpu_user_cfg.max_features; - fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; + /* + * Now, given maximum feature set, determine default values by + * applying default masks. + */ + fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features & host_default_mask(); + fpu_user_cfg.default_features = fpu_user_cfg.max_features & host_default_mask(); + guest_default_cfg.features = fpu_kernel_cfg.max_features & guest_default_mask(); /* Store it for paranoia check at the end */ xfeatures = fpu_kernel_cfg.max_features; diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 6290dd120f5e..ff40f09ad911 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } -static void task_update_io_bitmap(struct task_struct *tsk) +static void task_update_io_bitmap(void) { + struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; if (t->iopl_emul == 3 || t->io_bitmap) { @@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk) struct io_bitmap *iobm = tsk->thread.io_bitmap; tsk->thread.io_bitmap = NULL; - task_update_io_bitmap(tsk); + /* + * Don't touch the TSS when invoked on a failed fork(). TSS + * reflects the state of @current and not the state of @tsk. + */ + if (tsk == current) + task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) } t->iopl_emul = level; - task_update_io_bitmap(current); - + task_update_io_bitmap(); return 0; } diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 9cea1fc36c18..243a769fdd97 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -59,6 +59,18 @@ static ssize_t sched_itmt_enabled_write(struct file *filp, return result; } +static int sched_core_priority_show(struct seq_file *s, void *unused) +{ + int cpu; + + seq_puts(s, "CPU #\tPriority\n"); + for_each_possible_cpu(cpu) + seq_printf(s, "%d\t%d\n", cpu, arch_asym_cpu_priority(cpu)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(sched_core_priority); + static const struct file_operations dfs_sched_itmt_fops = { .read = debugfs_read_file_bool, .write = sched_itmt_enabled_write, @@ -67,6 +79,7 @@ static const struct file_operations dfs_sched_itmt_fops = { }; static struct dentry *dfs_sched_itmt; +static struct dentry *dfs_sched_core_prio; /** * sched_set_itmt_support() - Indicate platform supports ITMT @@ -102,6 +115,14 @@ int sched_set_itmt_support(void) return -ENOMEM; } + dfs_sched_core_prio = debugfs_create_file("sched_core_priority", 0644, + arch_debugfs_dir, NULL, + &sched_core_priority_fops); + if (IS_ERR_OR_NULL(dfs_sched_core_prio)) { + dfs_sched_core_prio = NULL; + return -ENOMEM; + } + sched_itmt_capable = true; sysctl_sched_itmt_enabled = 1; @@ -133,6 +154,8 @@ void sched_clear_itmt_support(void) debugfs_remove(dfs_sched_itmt); dfs_sched_itmt = NULL; + debugfs_remove(dfs_sched_core_prio); + dfs_sched_core_prio = NULL; if (sysctl_sched_itmt_enabled) { /* disable sched_itmt if we are no longer ITMT capable */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 102641fd2172..8b1a9733d13e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) struct perf_event *bp; /* Disable hardware debugging while we are in kgdb: */ - set_debugreg(0UL, 7); + set_debugreg(DR7_FIXED_1, 7); for (i = 0; i < HBP_NUM; i++) { if (!breakinfo[i].enabled) continue; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index b68d4be9464e..d547de9b3ed8 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -40,7 +40,7 @@ static const struct bin_attribute boot_params_data_attr = { .name = "data", .mode = S_IRUGO, }, - .read_new = boot_params_data_read, + .read = boot_params_data_read, .size = sizeof(boot_params), }; @@ -56,7 +56,7 @@ static const struct bin_attribute *const boot_params_data_attrs[] = { static const struct attribute_group boot_params_attr_group = { .attrs = boot_params_version_attrs, - .bin_attrs_new = boot_params_data_attrs, + .bin_attrs = boot_params_data_attrs, }; static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr) @@ -250,7 +250,7 @@ static struct bin_attribute data_attr __ro_after_init = { .name = "data", .mode = S_IRUGO, }, - .read_new = setup_data_data_read, + .read = setup_data_data_read, }; static struct attribute *setup_data_type_attrs[] = { @@ -265,7 +265,7 @@ static const struct bin_attribute *const setup_data_data_attrs[] = { static const struct attribute_group setup_data_attr_group = { .attrs = setup_data_type_attrs, - .bin_attrs_new = setup_data_data_attrs, + .bin_attrs = setup_data_data_attrs, }; static int __init create_setup_data_node(struct kobject *parent, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 921c1c783bc1..8ae750cde0c6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -420,7 +420,7 @@ static u64 kvm_steal_clock(int cpu) return steal; } -static inline void __set_percpu_decrypted(void *ptr, unsigned long size) +static inline __init void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c1d2dac72b9c..1b7960cf6eb0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -176,6 +176,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; + clear_tsk_thread_flag(p, TIF_IO_BITMAP); p->thread.iopl_warn = 0; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -333,13 +334,21 @@ DEFINE_PER_CPU(u64, msr_misc_features_shadow); static void set_cpuid_faulting(bool on) { - u64 msrval; - msrval = this_cpu_read(msr_misc_features_shadow); - msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; - msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); - this_cpu_write(msr_misc_features_shadow, msrval); - wrmsrq(MSR_MISC_FEATURES_ENABLES, msrval); + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + u64 msrval; + + msrval = this_cpu_read(msr_misc_features_shadow); + msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; + msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); + this_cpu_write(msr_misc_features_shadow, msrval); + wrmsrq(MSR_MISC_FEATURES_ENABLES, msrval); + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (on) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_CPUID_USER_DIS_BIT); + else + msr_clear_bit(MSR_K7_HWCR, MSR_K7_HWCR_CPUID_USER_DIS_BIT); + } } static void disable_cpuid(void) @@ -464,6 +473,11 @@ void native_tss_update_io_bitmap(void) } else { struct io_bitmap *iobm = t->io_bitmap; + if (WARN_ON_ONCE(!iobm)) { + clear_thread_flag(TIF_IO_BITMAP); + native_tss_invalidate_io_bitmap(); + } + /* * Only copy bitmap data when the sequence number differs. The * update time is accounted to the incoming task. @@ -901,16 +915,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a10e180cbf23..3ef15c2f152f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400)) + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1)) return; printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8d6cf25127aa..52a5c03c353c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -133,7 +133,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400))) { + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))) { printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", log_lvl, d0, d1, d2); printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", @@ -707,6 +707,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Load the Intel cache allocation PQR MSR. */ resctrl_arch_sched_in(next_p); + /* Reset hw history on AMD CPUs */ + if (cpu_feature_enabled(X86_FEATURE_AMD_WORKLOAD_CLASS)) + wrmsrl(MSR_AMD_WORKLOAD_HRST, 0x1); + return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 095f04bdabdc..3dcadc13f09a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1236,7 +1236,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, static struct user_regset x86_64_regsets[] __ro_after_init = { [REGSET64_GENERAL] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(struct user_regs_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1244,7 +1244,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { .set = genregs_set }, [REGSET64_FP] = { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1253,7 +1253,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { .set = xfpregs_set }, [REGSET64_XSTATE] = { - .core_note_type = NT_X86_XSTATE, + USER_REGSET_NOTE_TYPE(X86_XSTATE), .size = sizeof(u64), .align = sizeof(u64), .active = xstateregs_active, @@ -1261,7 +1261,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { .set = xstateregs_set }, [REGSET64_IOPERM] = { - .core_note_type = NT_386_IOPERM, + USER_REGSET_NOTE_TYPE(386_IOPERM), .n = IO_BITMAP_LONGS, .size = sizeof(long), .align = sizeof(long), @@ -1270,7 +1270,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { }, #ifdef CONFIG_X86_USER_SHADOW_STACK [REGSET64_SSP] = { - .core_note_type = NT_X86_SHSTK, + USER_REGSET_NOTE_TYPE(X86_SHSTK), .n = 1, .size = sizeof(u64), .align = sizeof(u64), @@ -1297,7 +1297,7 @@ static const struct user_regset_view user_x86_64_view = { #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION static struct user_regset x86_32_regsets[] __ro_after_init = { [REGSET32_GENERAL] = { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(struct user_regs_struct32) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1305,7 +1305,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = genregs32_set }, [REGSET32_FP] = { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1314,7 +1314,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = fpregs_set }, [REGSET32_XFP] = { - .core_note_type = NT_PRXFPREG, + USER_REGSET_NOTE_TYPE(PRXFPREG), .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), @@ -1323,7 +1323,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = xfpregs_set }, [REGSET32_XSTATE] = { - .core_note_type = NT_X86_XSTATE, + USER_REGSET_NOTE_TYPE(X86_XSTATE), .size = sizeof(u64), .align = sizeof(u64), .active = xstateregs_active, @@ -1331,7 +1331,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = xstateregs_set }, [REGSET32_TLS] = { - .core_note_type = NT_386_TLS, + USER_REGSET_NOTE_TYPE(386_TLS), .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, .size = sizeof(struct user_desc), @@ -1341,7 +1341,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .set = regset_tls_set }, [REGSET32_IOPERM] = { - .core_note_type = NT_386_IOPERM, + USER_REGSET_NOTE_TYPE(386_IOPERM), .n = IO_BITMAP_BYTES / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 98123ff10506..42bbc42bd350 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn) struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; + prevent_single_step_upon_eretu(regs); + if (!access_ok(frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn) struct rt_sigframe_ia32 __user *frame; sigset_t set; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ee9453891901..d483b585c6c6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18266cc3d98c..b014e6d229f9 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -299,3 +299,27 @@ struct smp_ops smp_ops = { .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); + +int arch_cpu_rescan_dead_smt_siblings(void) +{ + enum cpuhp_smt_control old = cpu_smt_control; + int ret; + + /* + * If SMT has been disabled and SMT siblings are in HLT, bring them back + * online and offline them again so that they end up in MWAIT proper. + * + * Called with hotplug enabled. + */ + if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED) + return 0; + + ret = cpuhp_smt_enable(); + if (ret) + return ret; + + ret = cpuhp_smt_disable(old); + + return ret; +} +EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc78c2325fd2..33e166f6ab12 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -478,44 +478,41 @@ static int x86_cluster_flags(void) */ static bool x86_has_numa_in_package; -static struct sched_domain_topology_level x86_topology[6]; - -static void __init build_sched_topology(void) -{ - int i = 0; - -#ifdef CONFIG_SCHED_SMT - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) - }; -#endif +static struct sched_domain_topology_level x86_topology[] = { + SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT), #ifdef CONFIG_SCHED_CLUSTER - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) - }; + SDTL_INIT(cpu_clustergroup_mask, x86_cluster_flags, CLS), #endif #ifdef CONFIG_SCHED_MC - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) - }; + SDTL_INIT(cpu_coregroup_mask, x86_core_flags, MC), #endif + SDTL_INIT(cpu_cpu_mask, x86_sched_itmt_flags, PKG), + { NULL }, +}; + +static void __init build_sched_topology(void) +{ + struct sched_domain_topology_level *topology = x86_topology; + /* - * When there is NUMA topology inside the package skip the PKG domain - * since the NUMA domains will auto-magically create the right spanning - * domains based on the SLIT. + * When there is NUMA topology inside the package invalidate the + * PKG domain since the NUMA domains will auto-magically create the + * right spanning domains based on the SLIT. */ - if (!x86_has_numa_in_package) { - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_cpu_mask, x86_sched_itmt_flags, SD_INIT_NAME(PKG) - }; + if (x86_has_numa_in_package) { + unsigned int pkgdom = ARRAY_SIZE(x86_topology) - 2; + + memset(&x86_topology[pkgdom], 0, sizeof(x86_topology[pkgdom])); } /* - * There must be one trailing NULL entry left. + * Drop the SMT domains if there is only one thread per-core + * since it'll get degenerated by the scheduler anyways. */ - BUG_ON(i >= ARRAY_SIZE(x86_topology)-1); + if (cpu_smt_num_threads <= 1) + ++topology; - set_sched_topology(x86_topology); + set_sched_topology(topology); } void set_cpu_sibling_map(int cpu) @@ -1244,6 +1241,10 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1290,50 +1291,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } /* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead_cpuid_hint(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_LEAF_MWAIT; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - mwait_play_dead(eax); -} - -/* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). */ @@ -1383,9 +1340,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead_cpuid_hint(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5c897a86418..36354b470590 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline unsigned long debug_read_clear_dr6(void) +static __always_inline unsigned long debug_read_reset_dr6(void) { unsigned long dr6; + get_debugreg(dr6, 6); + dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ + /* * The Intel SDM says: * - * Certain debug exceptions may clear bits 0-3. The remaining - * contents of the DR6 register are never cleared by the - * processor. To avoid confusion in identifying debug - * exceptions, debug handlers should clear the register before - * returning to the interrupted task. + * Certain debug exceptions may clear bits 0-3 of DR6. + * + * BLD induced #DB clears DR6.BLD and any other debug + * exception doesn't modify DR6.BLD. * - * Keep it simple: clear DR6 immediately. + * RTM induced #DB clears DR6.RTM and any other debug + * exception sets DR6.RTM. + * + * To avoid confusion in identifying debug exceptions, + * debug handlers should set DR6.BLD and DR6.RTM, and + * clear other DR6 bits before returning. + * + * Keep it simple: write DR6 with its architectural reset + * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately. */ - get_debugreg(dr6, 6); set_debugreg(DR6_RESERVED, 6); - dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ return dr6; } @@ -1239,13 +1247,13 @@ out: /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - exc_debug_kernel(regs, debug_read_clear_dr6()); + exc_debug_kernel(regs, debug_read_reset_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - exc_debug_user(regs, debug_read_clear_dr6()); + exc_debug_user(regs, debug_read_reset_dr6()); } #ifdef CONFIG_X86_FRED @@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) { /* * FRED #DB stores DR6 on the stack in the format which - * debug_read_clear_dr6() returns for the IDT entry points. + * debug_read_reset_dr6() returns for the IDT entry points. */ unsigned long dr6 = fred_event_data(regs); @@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6 = debug_read_clear_dr6(); + unsigned long dr6 = debug_read_reset_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); |