diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
31 files changed, 655 insertions, 424 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index eb4dbcdf41f1..a02bba0ed6b9 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_ACRN_GUEST) += acrn.o obj-$(CONFIG_DEBUG_FS) += debugfs.o quiet_cmd_mkcapflags = MKCAP $@ - cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^ + cmd_mkcapflags = $(CONFIG_SHELL) $(src)/mkcapflags.sh $@ $^ cpufeature = $(src)/../../include/asm/cpufeatures.h vmxfeature = $(src)/../../include/asm/vmxfeatures.h diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6d8677e80ddb..44df3f11e731 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -13,6 +13,7 @@ #include <asm/apic.h> #include <asm/cacheinfo.h> #include <asm/cpu.h> +#include <asm/cpu_device_id.h> #include <asm/spec-ctrl.h> #include <asm/smp.h> #include <asm/numa.h> @@ -345,6 +346,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } +static void bsp_determine_snp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; + + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined for. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + c->x86 >= 0x19 && snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -437,8 +460,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x1a: switch (c->x86_model) { - case 0x00 ... 0x0f: - case 0x20 ... 0x2f: + case 0x00 ... 0x2f: case 0x40 ... 0x4f: case 0x70 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); @@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } - if (cpu_has(c, X86_FEATURE_SEV_SNP)) { - /* - * RMP table entry format is not architectural and it can vary by processor - * and is defined by the per-processor PPR. Restrict SNP support on the - * known CPU model and family for which the RMP table entry format is - * currently defined for. - */ - if (!boot_cpu_has(X86_FEATURE_ZEN3) && - !boot_cpu_has(X86_FEATURE_ZEN4) && - !boot_cpu_has(X86_FEATURE_ZEN5)) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - else if (!snp_probe_rmptable_info()) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - } - + bsp_determine_snp(c); return; warn: @@ -527,7 +535,6 @@ clear_sev: static void early_init_amd(struct cpuinfo_x86 *c) { - u64 value; u32 dummy; if (c->x86 >= 0xf) @@ -595,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - /* Re-enable TopologyExtensions if switched off by BIOS */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x6f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (msr_set_bit(0xc0011005, 54) > 0) { - rdmsrl(0xc0011005, value); - if (value & BIT_64(54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); - } - } - } - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); @@ -802,6 +795,11 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +static const struct x86_cpu_desc erratum_1386_microcode[] = { + AMD_CPU_DESC(0x17, 0x1, 0x2, 0x0800126e), + AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052), +}; + static void fix_erratum_1386(struct cpuinfo_x86 *c) { /* @@ -811,7 +809,13 @@ static void fix_erratum_1386(struct cpuinfo_x86 *c) * * Affected parts all have no supervisor XSAVE states, meaning that * the XSAVEC instruction (which works fine) is equivalent. + * + * Clear the feature flag only on microcode revisions which + * don't have the fix. */ + if (x86_cpu_has_min_microcode_rev(erratum_1386_microcode)) + return; + clear_cpu_cap(c, X86_FEATURE_XSAVES); } diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index fdbb5f07448f..b3fa61d45352 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -124,25 +124,24 @@ static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) return true; } -#define X86_MATCH(model) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ - INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) +#define X86_MATCH(vfm) \ + X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_APERFMPERF, NULL) static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = { - X86_MATCH(XEON_PHI_KNL), - X86_MATCH(XEON_PHI_KNM), + X86_MATCH(INTEL_XEON_PHI_KNL), + X86_MATCH(INTEL_XEON_PHI_KNM), {} }; static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = { - X86_MATCH(SKYLAKE_X), + X86_MATCH(INTEL_SKYLAKE_X), {} }; static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = { - X86_MATCH(ATOM_GOLDMONT), - X86_MATCH(ATOM_GOLDMONT_D), - X86_MATCH(ATOM_GOLDMONT_PLUS), + X86_MATCH(INTEL_ATOM_GOLDMONT), + X86_MATCH(INTEL_ATOM_GOLDMONT_D), + X86_MATCH(INTEL_ATOM_GOLDMONT_PLUS), {} }; @@ -346,6 +345,7 @@ static DECLARE_WORK(disable_freq_invariance_work, disable_freq_invariance_workfn); DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; +EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale); static void scale_freq_tick(u64 acnt, u64 mcnt) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7ba936d798b..b6f927f6c567 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -26,7 +26,7 @@ #include <asm/msr.h> #include <asm/vmx.h> #include <asm/paravirt.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/e820/api.h> #include <asm/hypervisor.h> #include <asm/tlbflush.h> @@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init x86_arch_cap_msr; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; @@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + x86_arch_cap_msr = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -301,8 +305,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -401,8 +402,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -659,8 +656,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + if (rrsba_disabled) + return; - if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; return; + } - ia32_cap = x86_read_arch_cap_msr(); + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; - if (ia32_cap & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1607,6 +1606,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + + /* Mitigate syscalls when the mitigation is forced =on */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1718,6 +1785,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -1832,8 +1902,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1848,7 +1916,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2323,20 +2391,20 @@ static void override_cache_bits(struct cpuinfo_x86 *c) if (c->x86 != 6) return; - switch (c->x86_model) { - case INTEL_FAM6_NEHALEM: - case INTEL_FAM6_WESTMERE: - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_IVYBRIDGE: - case INTEL_FAM6_HASWELL: - case INTEL_FAM6_HASWELL_L: - case INTEL_FAM6_HASWELL_G: - case INTEL_FAM6_BROADWELL: - case INTEL_FAM6_BROADWELL_G: - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: + switch (c->x86_vfm) { + case INTEL_NEHALEM: + case INTEL_WESTMERE: + case INTEL_SANDYBRIDGE: + case INTEL_IVYBRIDGE: + case INTEL_HASWELL: + case INTEL_HASWELL_L: + case INTEL_HASWELL_G: + case INTEL_BROADWELL: + case INTEL_BROADWELL_G: + case INTEL_SKYLAKE_L: + case INTEL_SKYLAKE: + case INTEL_KABYLAKE_L: + case INTEL_KABYLAKE: if (c->x86_cache_bits < 44) c->x86_cache_bits = 44; break; @@ -2695,15 +2763,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2712,10 +2780,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2725,14 +2793,32 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } +static const char *spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) + return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; + + return "; BHI: Vulnerable"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2745,13 +2831,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5c1e6d6be267..d4e539d4e158 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -68,6 +68,7 @@ #include <asm/traps.h> #include <asm/sev.h> #include <asm/tdx.h> +#include <asm/posted_intr.h> #include "cpu.h" @@ -114,17 +115,17 @@ static const struct x86_cpu_id ppin_cpuids[] = { X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]), /* Legacy models without CPUID enumeration */ - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_HASWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), {} }; @@ -1053,18 +1054,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) void get_cpu_address_sizes(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - bool vp_bits_from_cpuid = true; if (!cpu_has(c, X86_FEATURE_CPUID) || - (c->extended_cpuid_level < 0x80000008)) - vp_bits_from_cpuid = false; - - if (vp_bits_from_cpuid) { - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } else { + (c->extended_cpuid_level < 0x80000008)) { if (IS_ENABLED(CONFIG_X86_64)) { c->x86_clflush_size = 64; c->x86_phys_bits = 36; @@ -1078,7 +1070,17 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c) cpu_has(c, X86_FEATURE_PSE36)) c->x86_phys_bits = 36; } + } else { + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + + /* Provide a sane default if not enumerated: */ + if (!c->x86_clflush_size) + c->x86_clflush_size = 32; } + c->x86_cache_bits = c->x86_phys_bits; c->x86_cache_alignment = c->x86_clflush_size; } @@ -1120,12 +1122,13 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) -#define VULNWL_INTEL(model, whitelist) \ - VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) +#define VULNWL_INTEL(vfm, whitelist) \ + X86_MATCH_VFM(vfm, whitelist) #define VULNWL_AMD(family, whitelist) \ VULNWL(AMD, family, X86_MODEL_ANY, whitelist) @@ -1142,32 +1145,32 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ - VULNWL_INTEL(TIGERLAKE, NO_MMIO), - VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), - VULNWL_INTEL(ALDERLAKE, NO_MMIO), - VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), + VULNWL_INTEL(INTEL_TIGERLAKE, NO_MMIO), + VULNWL_INTEL(INTEL_TIGERLAKE_L, NO_MMIO), + VULNWL_INTEL(INTEL_ALDERLAKE, NO_MMIO), + VULNWL_INTEL(INTEL_ALDERLAKE_L, NO_MMIO), - VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(CORE_YONAH, NO_SSB), + VULNWL_INTEL(INTEL_CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_INTEL(INTEL_ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(INTEL_ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(INTEL_ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1177,33 +1180,31 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ - VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), - VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), - VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + VULNWL_INTEL(INTEL_ATOM_TREMONT, NO_EIBRS_PBRSB), + VULNWL_INTEL(INTEL_ATOM_TREMONT_L, NO_EIBRS_PBRSB), + VULNWL_INTEL(INTEL_ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; #define VULNBL(vendor, family, model, blacklist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) -#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ - X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ - INTEL_FAM6_##model, steppings, \ - X86_FEATURE_ANY, issues) +#define VULNBL_INTEL_STEPPINGS(vfm, steppings, issues) \ + X86_MATCH_VFM_STEPPINGS(vfm, steppings, issues) #define VULNBL_AMD(family, blacklist) \ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) @@ -1228,43 +1229,43 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define RFDS BIT(7) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { - VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), - VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(INTEL_CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE_L, X86_STEPPING_ANY, GDS), + VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE, X86_STEPPING_ANY, GDS), + VULNBL_INTEL_STEPPINGS(INTEL_LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(INTEL_ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(INTEL_ALDERLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_RAPTORLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), @@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Don't use AutoIBRS when SNP is enabled because it degrades host * userspace indirect branch performance. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || (cpu_has(c, X86_FEATURE_AUTOIBRS) && !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); @@ -1581,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (have_cpuid_p()) { cpu_detect(c); get_cpu_vendor(c); + intel_unlock_cpuid_leafs(c); get_cpu_cap(c); setup_force_cpu_cap(X86_FEATURE_CPUID); get_cpu_address_sizes(c); @@ -1740,7 +1749,7 @@ static void generic_identify(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); - + intel_unlock_cpuid_leafs(c); get_cpu_cap(c); get_cpu_address_sizes(c); @@ -2219,6 +2228,8 @@ void cpu_init(void) barrier(); x2apic_setup(); + + intel_posted_msi_init(); } mmgrab(&init_mm); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index ea9e07d57c8d..1beccefbaff9 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; extern void __init tsx_init(void); void tsx_ap_init(void); +void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c); #else static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } +static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { } #endif /* CONFIG_CPU_SUP_INTEL */ extern void init_spectral_chicken(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index b7174209d855..b7d9f530ae16 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, @@ -114,6 +114,9 @@ static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) if (WARN_ON(feature >= MAX_FEATURE_BITS)) return; + if (boot_cpu_has(feature)) + WARN_ON(alternatives_patched); + clear_feature(c, feature); /* Collect all features to disable, handling dependencies */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index be30d7fa2e66..fdf3489d92a4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -228,6 +228,7 @@ static void detect_tme_early(struct cpuinfo_x86 *c) if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { pr_info_once("x86/tme: not enabled by BIOS\n"); mktme_status = MKTME_DISABLED; + clear_cpu_cap(c, X86_FEATURE_TME); return; } @@ -268,19 +269,26 @@ detect_keyid_bits: c->x86_phys_bits -= keyid_bits; } +void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd)) + return; + + /* + * The BIOS can have limited CPUID to leaf 2, which breaks feature + * enumeration. Unlock it and update the maximum leaf info. + */ + if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) + c->cpuid_level = cpuid_eax(0); +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; - /* Unmask CPUID levels if masked: */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - if (msr_clear_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { - c->cpuid_level = cpuid_eax(0); - get_cpu_cap(c); - } - } - if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index f18d35fe27a9..30b1d63b97f3 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -204,12 +204,12 @@ static int intel_epb_offline(unsigned int cpu) } static const struct x86_cpu_id intel_epb_normal[] = { - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, - ENERGY_PERF_BIAS_NORMAL_POWERSAVE), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, - ENERGY_PERF_BIAS_NORMAL_POWERSAVE), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, - ENERGY_PERF_BIAS_NORMAL_POWERSAVE), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, + ENERGY_PERF_BIAS_NORMAL_POWERSAVE), + X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, + ENERGY_PERF_BIAS_NORMAL_POWERSAVE), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, + ENERGY_PERF_BIAS_NORMAL_POWERSAVE), {} }; diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index ad6776081e60..8e7de733320a 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -17,8 +17,7 @@ * * A typical table entry would be to match a specific CPU * - * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL, - * X86_FEATURE_ANY, NULL); + * X86_MATCH_VFM_FEATURE(INTEL_BROADWELL, X86_FEATURE_ANY, NULL); * * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor) @@ -26,7 +25,7 @@ * asm/cpu_device_id.h contains a set of useful macros which are shortcuts * for various common selections. The above can be shortened to: * - * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL); + * X86_MATCH_VFM(INTEL_BROADWELL, NULL); * * Arrays used to match for this should also be declared using * MODULE_DEVICE_TABLE(x86cpu, ...) @@ -39,9 +38,7 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; - for (m = match; - m->vendor | m->family | m->model | m->steppings | m->feature; - m++) { + for (m = match; m->flags & X86_CPU_ID_FLAG_ENTRY_VALID; m++) { if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) continue; if (m->family != X86_FAMILY_ANY && c->x86 != m->family) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b5cc557cfc37..ad0623b659ed 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -47,7 +47,7 @@ #include <linux/kexec.h> #include <asm/fred.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/processor.h> #include <asm/traps.h> #include <asm/tlbflush.h> @@ -1593,6 +1593,24 @@ noinstr void do_machine_check(struct pt_regs *regs) else queue_task_work(&m, msg, kill_me_maybe); + } else if (m.mcgstatus & MCG_STATUS_SEAM_NR) { + /* + * Saved RIP on stack makes it look like the machine check + * was taken in the kernel on the instruction following + * the entry to SEAM mode. But MCG_STATUS_SEAM_NR indicates + * that the machine check was taken inside SEAM non-root + * mode. CPU core has already marked that guest as dead. + * It is OK for the kernel to resume execution at the + * apparent point of the machine check as the fault did + * not occur there. Mark the page as poisoned so it won't + * be added to free list when the guest is terminated. + */ + if (mce_usable_address(&m)) { + struct page *p = pfn_to_online_page(m.addr >> PAGE_SHIFT); + + if (p) + SetPageHWPoison(p); + } } else { /* * Handle an MCE which has happened in kernel space but from @@ -1930,14 +1948,14 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0) cfg->bootlog = 0; - if (c->x86 == 6 && c->x86_model == 45) + if (c->x86_vfm == INTEL_SANDYBRIDGE_X) mce_flags.snb_ifu_quirk = 1; /* * Skylake, Cascacde Lake and Cooper Lake require a quirk on * rep movs. */ - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_SKYLAKE_X) + if (c->x86_vfm == INTEL_SKYLAKE_X) mce_flags.skx_repmov_quirk = 1; } @@ -2500,12 +2518,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index fbe8b61c3413..4284749ec803 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -16,14 +16,14 @@ * used to save error information organized in a lock-less list. * * This memory pool is only to be used to save MCE records in MCE context. - * MCE events are rare, so a fixed size memory pool should be enough. Use - * 2 pages to save MCE events for now (~80 MCE records at most). + * MCE events are rare, so a fixed size memory pool should be enough. + * Allocate on a sliding scale based on number of CPUs. */ -#define MCE_POOLSZ (2 * PAGE_SIZE) +#define MCE_MIN_ENTRIES 80 +#define MCE_PER_CPU 2 static struct gen_pool *mce_evt_pool; static LLIST_HEAD(mce_event_llist); -static char gen_pool_buf[MCE_POOLSZ]; /* * Compare the record "t" with each of the records on list "l" to see if @@ -118,22 +118,32 @@ int mce_gen_pool_add(struct mce *mce) static int mce_gen_pool_create(void) { - struct gen_pool *tmpp; + int mce_numrecords, mce_poolsz, order; + struct gen_pool *gpool; int ret = -ENOMEM; - - tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1); - if (!tmpp) - goto out; - - ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1); + void *mce_pool; + + order = order_base_2(sizeof(struct mce_evt_llist)); + gpool = gen_pool_create(order, -1); + if (!gpool) + return ret; + + mce_numrecords = max(MCE_MIN_ENTRIES, num_possible_cpus() * MCE_PER_CPU); + mce_poolsz = mce_numrecords * (1 << order); + mce_pool = kmalloc(mce_poolsz, GFP_KERNEL); + if (!mce_pool) { + gen_pool_destroy(gpool); + return ret; + } + ret = gen_pool_add(gpool, (unsigned long)mce_pool, mce_poolsz, -1); if (ret) { - gen_pool_destroy(tmpp); - goto out; + gen_pool_destroy(gpool); + kfree(mce_pool); + return ret; } - mce_evt_pool = tmpp; + mce_evt_pool = gpool; -out: return ret; } diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 399b62e223d2..f6103e6bf69a 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -13,7 +13,7 @@ #include <linux/cpumask.h> #include <asm/apic.h> #include <asm/cpufeature.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/processor.h> #include <asm/msr.h> #include <asm/mce.h> @@ -455,10 +455,10 @@ static void intel_imc_init(struct cpuinfo_x86 *c) { u64 error_control; - switch (c->x86_model) { - case INTEL_FAM6_SANDYBRIDGE_X: - case INTEL_FAM6_IVYBRIDGE_X: - case INTEL_FAM6_HASWELL_X: + switch (c->x86_vfm) { + case INTEL_SANDYBRIDGE_X: + case INTEL_IVYBRIDGE_X: + case INTEL_HASWELL_X: if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control)) return; error_control |= 2; @@ -484,12 +484,11 @@ bool intel_filter_mce(struct mce *m) struct cpuinfo_x86 *c = &boot_cpu_data; /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */ - if ((c->x86 == 6) && - ((c->x86_model == INTEL_FAM6_HASWELL) || - (c->x86_model == INTEL_FAM6_HASWELL_L) || - (c->x86_model == INTEL_FAM6_BROADWELL) || - (c->x86_model == INTEL_FAM6_HASWELL_G) || - (c->x86_model == INTEL_FAM6_SKYLAKE_X)) && + if ((c->x86_vfm == INTEL_HASWELL || + c->x86_vfm == INTEL_HASWELL_L || + c->x86_vfm == INTEL_BROADWELL || + c->x86_vfm == INTEL_HASWELL_G || + c->x86_vfm == INTEL_SKYLAKE_X) && (m->bank == 0) && ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) return true; diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index c4477162c07d..dac4d64dfb2a 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -12,7 +12,7 @@ #include <linux/uaccess.h> #include <asm/mce.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/traps.h> #include <asm/insn.h> #include <asm/insn-eval.h> @@ -39,20 +39,20 @@ static struct severity { u64 mask; u64 result; unsigned char sev; - unsigned char mcgmask; - unsigned char mcgres; + unsigned short mcgmask; + unsigned short mcgres; unsigned char ser; unsigned char context; unsigned char excp; unsigned char covered; - unsigned char cpu_model; + unsigned int cpu_vfm; unsigned char cpu_minstepping; unsigned char bank_lo, bank_hi; char *msg; } severities[] = { #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } #define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h -#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s +#define VFM_STEPPING(m, s) .cpu_vfm = m, .cpu_minstepping = s #define KERNEL .context = IN_KERNEL #define USER .context = IN_USER #define KERNEL_RECOV .context = IN_KERNEL_RECOV @@ -128,7 +128,7 @@ static struct severity { MCESEV( AO, "Uncorrected Patrol Scrub Error", SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0), - MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18) + VFM_STEPPING(INTEL_SKYLAKE_X, 4), BANK_RANGE(13, 18) ), /* ignore OVER for UCNA */ @@ -174,6 +174,18 @@ static struct severity { USER ), MCESEV( + AR, "Data load error in SEAM non-root mode", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + MCGMASK(MCG_STATUS_SEAM_NR, MCG_STATUS_SEAM_NR), + KERNEL + ), + MCESEV( + AR, "Instruction fetch error in SEAM non-root mode", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), + MCGMASK(MCG_STATUS_SEAM_NR, MCG_STATUS_SEAM_NR), + KERNEL + ), + MCESEV( PANIC, "Data load in unrecoverable area of kernel", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), KERNEL @@ -290,7 +302,6 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) switch (fixup_type) { case EX_TYPE_UACCESS: - case EX_TYPE_COPY: if (!copy_user) return IN_KERNEL; m->kflags |= MCE_IN_KERNEL_COPYIN; @@ -386,7 +397,7 @@ static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char continue; if (s->excp && excp != s->excp) continue; - if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model) + if (s->cpu_vfm && boot_cpu_data.x86_vfm != s->cpu_vfm) continue; if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping) continue; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 13b45b9c806d..c0d56c02b8da 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -84,8 +84,6 @@ struct microcode_amd { unsigned int mpb[]; }; -#define PATCH_MAX_SIZE (3 * PAGE_SIZE) - static struct equiv_cpu_table { unsigned int num_entries; struct equiv_cpu_entry *entry; @@ -465,7 +463,7 @@ static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, siz return !__apply_microcode_amd(mc); } -static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) +static bool get_builtin_microcode(struct cpio_data *cp, u8 family) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; struct firmware fw; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 232026a239a6..b3658d11e7b6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -60,11 +60,6 @@ module_param(force_minrev, bool, S_IRUSR | S_IWUSR); */ struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -struct cpu_info_ctx { - struct cpu_signature *cpu_sig; - int err; -}; - /* * Those patch levels cannot be updated to newer ones and thus should be final. */ diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 5f0414452b67..815fa67356a2 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -21,7 +21,7 @@ #include <linux/uio.h> #include <linux/mm.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/processor.h> #include <asm/tlbflush.h> #include <asm/setup.h> @@ -577,8 +577,7 @@ static bool is_blacklisted(unsigned int cpu) * This behavior is documented in item BDF90, #334165 (Intel Xeon * Processor E7-8800/4800 v4 Product Family). */ - if (c->x86 == 6 && - c->x86_model == INTEL_FAM6_BROADWELL_X && + if (c->x86_vfm == INTEL_BROADWELL_X && c->x86_stepping == 0x01 && llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 422a4ddc2ab7..7b29ebda024f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; rdmsr(MSR_AMD64_SYSCFG, lo, hi); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 83e40341583e..a113d9aba553 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -22,7 +22,7 @@ #include <linux/cacheinfo.h> #include <linux/cpuhotplug.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/resctrl.h> #include "internal.h" @@ -56,14 +56,9 @@ int max_name_width, max_data_width; */ bool rdt_alloc_capable; -static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r); -static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); -static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r); +static void mba_wrmsr_intel(struct msr_param *m); +static void cat_wrmsr(struct msr_param *m); +static void mba_wrmsr_amd(struct msr_param *m); #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) @@ -309,12 +304,11 @@ static void rdt_get_cdp_l2_config(void) rdt_get_cdp_config(RDT_RESOURCE_L2); } -static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +static void mba_wrmsr_amd(struct msr_param *m) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); @@ -334,25 +328,22 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) return r->default_ctrl; } -static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r) +static void mba_wrmsr_intel(struct msr_param *m) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); /* Write the delay values for mba. */ for (i = m->low; i < m->high; i++) - wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r)); + wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res)); } -static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +static void cat_wrmsr(struct msr_param *m) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); @@ -362,6 +353,8 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) { struct rdt_domain *d; + lockdep_assert_cpus_held(); + list_for_each_entry(d, &r->domains, list) { /* Find the domain that contains this CPU */ if (cpumask_test_cpu(cpu, &d->cpu_mask)) @@ -378,19 +371,11 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r) void rdt_ctrl_update(void *arg) { + struct rdt_hw_resource *hw_res; struct msr_param *m = arg; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); - struct rdt_resource *r = m->res; - int cpu = smp_processor_id(); - struct rdt_domain *d; - d = get_domain_from_cpu(cpu, r); - if (d) { - hw_res->msr_update(d, m, r); - return; - } - pr_warn_once("cpu %d not found in any domain for resource %s\n", - cpu, r->name); + hw_res = resctrl_to_arch_res(m->res); + hw_res->msr_update(m); } /* @@ -463,9 +448,11 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) hw_dom->ctrl_val = dc; setup_default_ctrlval(r, dc); + m.res = r; + m.dom = d; m.low = 0; m.high = hw_res->num_closid; - hw_res->msr_update(d, &m, r); + hw_res->msr_update(&m); return 0; } @@ -821,18 +808,18 @@ static __init bool get_rdt_mon_resources(void) static __init void __check_quirks_intel(void) { - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_HASWELL_X: + switch (boot_cpu_data.x86_vfm) { + case INTEL_HASWELL_X: if (!rdt_options[RDT_FLAG_L3_CAT].force_off) cache_alloc_hsw_probe(); break; - case INTEL_FAM6_SKYLAKE_X: + case INTEL_SKYLAKE_X: if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); else set_rdt_options("!l3cat"); fallthrough; - case INTEL_FAM6_BROADWELL_X: + case INTEL_BROADWELL_X: intel_rdt_mbm_apply_quirk(); break; } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 7997b47743a2..b7291f60399c 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -272,22 +272,6 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type) } } -static bool apply_config(struct rdt_hw_domain *hw_dom, - struct resctrl_staged_config *cfg, u32 idx, - cpumask_var_t cpu_mask) -{ - struct rdt_domain *dom = &hw_dom->d_resctrl; - - if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) { - cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); - hw_dom->ctrl_val[idx] = cfg->new_ctrl; - - return true; - } - - return false; -} - int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { @@ -302,9 +286,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, hw_dom->ctrl_val[idx] = cfg_val; msr_param.res = r; + msr_param.dom = d; msr_param.low = idx; msr_param.high = idx + 1; - hw_res->msr_update(d, &msr_param, r); + hw_res->msr_update(&msr_param); return 0; } @@ -315,48 +300,39 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) struct rdt_hw_domain *hw_dom; struct msr_param msr_param; enum resctrl_conf_type t; - cpumask_var_t cpu_mask; struct rdt_domain *d; u32 idx; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - - msr_param.res = NULL; list_for_each_entry(d, &r->domains, list) { hw_dom = resctrl_to_arch_dom(d); + msr_param.res = NULL; for (t = 0; t < CDP_NUM_TYPES; t++) { cfg = &hw_dom->d_resctrl.staged_config[t]; if (!cfg->have_new_ctrl) continue; idx = get_config_index(closid, t); - if (!apply_config(hw_dom, cfg, idx, cpu_mask)) + if (cfg->new_ctrl == hw_dom->ctrl_val[idx]) continue; + hw_dom->ctrl_val[idx] = cfg->new_ctrl; if (!msr_param.res) { msr_param.low = idx; msr_param.high = msr_param.low + 1; msr_param.res = r; + msr_param.dom = d; } else { msr_param.low = min(msr_param.low, idx); msr_param.high = max(msr_param.high, idx + 1); } } + if (msr_param.res) + smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1); } - if (cpumask_empty(cpu_mask)) - goto done; - - /* Update resource control msr on all the CPUs. */ - on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); - -done: - free_cpumask_var(cpu_mask); - return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c99f26ebe7a6..f1d926832ec8 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + /* Only continue if tick_nohz_full_mask has been initialized. */ + if (!tick_nohz_full_enabled()) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ @@ -378,11 +379,13 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) /** * struct msr_param - set a range of MSRs from a domain * @res: The resource to use + * @dom: The domain to update * @low: Beginning index from base MSR * @high: End index */ struct msr_param { struct rdt_resource *res; + struct rdt_domain *dom; u32 low; u32 high; }; @@ -442,8 +445,7 @@ struct rdt_hw_resource { struct rdt_resource r_resctrl; u32 num_closid; unsigned int msr_base; - void (*msr_update) (struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r); + void (*msr_update)(struct msr_param *m); unsigned int mon_scale; unsigned int mbm_width; unsigned int mbm_cfg_mask; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c34a35ec0f03..2345e6836593 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -24,6 +24,7 @@ #include <asm/resctrl.h> #include "internal.h" +#include "trace.h" /** * struct rmid_entry - dirty tracking for all RMID. @@ -354,6 +355,16 @@ void __check_limbo(struct rdt_domain *d, bool force_free) rmid_dirty = true; } else { rmid_dirty = (val >= resctrl_rmid_realloc_threshold); + + /* + * x86's CLOSID and RMID are independent numbers, so the entry's + * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the + * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't + * used to select the configuration. It is thus necessary to track both + * CLOSID and RMID because there may be dependencies between them + * on some architectures. + */ + trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->id, val); } if (force_free || !rmid_dirty) { diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 884b88e25141..aacf236dfe3b 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -23,7 +23,7 @@ #include <linux/uaccess.h> #include <asm/cacheflush.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/resctrl.h> #include <asm/perf_event.h> @@ -31,7 +31,7 @@ #include "internal.h" #define CREATE_TRACE_POINTS -#include "pseudo_lock_event.h" +#include "trace.h" /* * The bits needed to disable hardware prefetching varies based on the @@ -88,8 +88,8 @@ static u64 get_prefetch_disable_bits(void) boot_cpu_data.x86 != 6) return 0; - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_BROADWELL_X: + switch (boot_cpu_data.x86_vfm) { + case INTEL_BROADWELL_X: /* * SDM defines bits of MSR_MISC_FEATURE_CONTROL register * as: @@ -100,8 +100,8 @@ static u64 get_prefetch_disable_bits(void) * 63:4 Reserved */ return 0xF; - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_PLUS: /* * SDM defines bits of MSR_MISC_FEATURE_CONTROL register * as: @@ -1084,9 +1084,9 @@ static int measure_l2_residency(void *_plr) * L2_HIT 02H * L2_MISS 10H */ - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + switch (boot_cpu_data.x86_vfm) { + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_PLUS: perf_miss_attr.config = X86_CONFIG(.event = 0xd1, .umask = 0x10); perf_hit_attr.config = X86_CONFIG(.event = 0xd1, @@ -1123,8 +1123,8 @@ static int measure_l3_residency(void *_plr) * MISS 41H */ - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_BROADWELL_X: + switch (boot_cpu_data.x86_vfm) { + case INTEL_BROADWELL_X: /* On BDW the hit event counts references, not hits */ perf_hit_attr.config = X86_CONFIG(.event = 0x2e, .umask = 0x4f); @@ -1142,7 +1142,7 @@ static int measure_l3_residency(void *_plr) */ counts.miss_after -= counts.miss_before; - if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) { + if (boot_cpu_data.x86_vfm == INTEL_BROADWELL_X) { /* * On BDW references and misses are counted, need to adjust. * Sometimes the "hits" counter is a bit more than the diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 011e17efb1a6..02f213f1c51c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2813,16 +2813,12 @@ static int reset_all_ctrls(struct rdt_resource *r) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_domain *hw_dom; struct msr_param msr_param; - cpumask_var_t cpu_mask; struct rdt_domain *d; int i; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - msr_param.res = r; msr_param.low = 0; msr_param.high = hw_res->num_closid; @@ -2834,17 +2830,13 @@ static int reset_all_ctrls(struct rdt_resource *r) */ list_for_each_entry(d, &r->domains, list) { hw_dom = resctrl_to_arch_dom(d); - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); for (i = 0; i < hw_res->num_closid; i++) hw_dom->ctrl_val[i] = r->default_ctrl; + msr_param.dom = d; + smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1); } - /* Update CBM on all the CPUs in cpu_mask */ - on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); - - free_cpumask_var(cpu_mask); - return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h b/arch/x86/kernel/cpu/resctrl/trace.h index 428ebbd4270b..2a506316b303 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h +++ b/arch/x86/kernel/cpu/resctrl/trace.h @@ -2,8 +2,8 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM resctrl -#if !defined(_TRACE_PSEUDO_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_PSEUDO_LOCK_H +#if !defined(_TRACE_RESCTRL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RESCTRL_H #include <linux/tracepoint.h> @@ -35,9 +35,25 @@ TRACE_EVENT(pseudo_lock_l3, TP_printk("hits=%llu miss=%llu", __entry->l3_hits, __entry->l3_miss)); -#endif /* _TRACE_PSEUDO_LOCK_H */ +TRACE_EVENT(mon_llc_occupancy_limbo, + TP_PROTO(u32 ctrl_hw_id, u32 mon_hw_id, int domain_id, u64 llc_occupancy_bytes), + TP_ARGS(ctrl_hw_id, mon_hw_id, domain_id, llc_occupancy_bytes), + TP_STRUCT__entry(__field(u32, ctrl_hw_id) + __field(u32, mon_hw_id) + __field(int, domain_id) + __field(u64, llc_occupancy_bytes)), + TP_fast_assign(__entry->ctrl_hw_id = ctrl_hw_id; + __entry->mon_hw_id = mon_hw_id; + __entry->domain_id = domain_id; + __entry->llc_occupancy_bytes = llc_occupancy_bytes;), + TP_printk("ctrl_hw_id=%u mon_hw_id=%u domain_id=%d llc_occupancy_bytes=%llu", + __entry->ctrl_hw_id, __entry->mon_hw_id, __entry->domain_id, + __entry->llc_occupancy_bytes) + ); + +#endif /* _TRACE_RESCTRL_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE pseudo_lock_event +#define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0dad49a09b7a..af5aa2c754c2 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, @@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 262f5fb18d74..22b65a5f5ec6 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -113,7 +113,7 @@ static unsigned long sgx_get_unmapped_area(struct file *file, if (flags & MAP_FIXED) return addr; - return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); + return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 166692f2d501..27892e57c4ef 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -13,6 +13,7 @@ #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/sysfs.h> +#include <linux/vmalloc.h> #include <asm/sgx.h> #include "driver.h" #include "encl.h" diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index aaca8d235dc2..621a151ccf7d 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -123,12 +123,14 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); } static __init bool check_for_real_bsp(u32 apic_id) { + bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6; + u64 msr; + /* * There is no real good way to detect whether this a kdump() * kernel, but except on the Voyager SMP monstrosity which is not @@ -145,17 +147,61 @@ static __init bool check_for_real_bsp(u32 apic_id) if (topo_info.real_bsp_apic_id != BAD_APICID) return false; + /* + * Check whether the enumeration order is broken by evaluating the + * BSP bit in the APICBASE MSR. If the CPU does not have the + * APICBASE MSR then the BSP detection is not possible and the + * kernel must rely on the firmware enumeration order. + */ + if (has_apic_base) { + rdmsrl(MSR_IA32_APICBASE, msr); + is_bsp = !!(msr & MSR_IA32_APICBASE_BSP); + } + if (apic_id == topo_info.boot_cpu_apic_id) { - topo_info.real_bsp_apic_id = apic_id; - return false; + /* + * If the boot CPU has the APIC BSP bit set then the + * firmware enumeration is agreeing. If the CPU does not + * have the APICBASE MSR then the only choice is to trust + * the enumeration order. + */ + if (is_bsp || !has_apic_base) { + topo_info.real_bsp_apic_id = apic_id; + return false; + } + /* + * If the boot APIC is enumerated first, but the APICBASE + * MSR does not have the BSP bit set, then there is no way + * to discover the real BSP here. Assume a crash kernel and + * limit the number of CPUs to 1 as an INIT to the real BSP + * would reset the machine. + */ + pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id); + pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n"); + set_nr_cpu_ids(1); + goto fwbug; } - pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n", + pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n", topo_info.boot_cpu_apic_id, apic_id); + + if (is_bsp) { + /* + * The boot CPU has the APIC BSP bit set. Use it and complain + * about the broken firmware enumeration. + */ + topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id; + goto fwbug; + } + pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); topo_info.real_bsp_apic_id = apic_id; return true; + +fwbug: + pr_warn(FW_BUG "APIC enumeration order not specification compliant\n"); + return false; } static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, @@ -210,7 +256,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) topo_info.nr_disabled_cpus++; } - /* Register present and possible CPUs in the domain maps */ + /* + * Register present and possible CPUs in the domain + * maps. cpu_possible_map will be updated in + * topology_init_possible_cpus() after enumeration is done. + */ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 1a8b3ad493af..7d476fa697ca 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan) if (!sft) sft = get_count_order(ecx.cpu_nthreads + 1); - topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + /* + * cpu_nthreads describes the number of threads in the package + * sft is the number of APIC ID bits per package + * + * As the number of actual threads per core is not described in + * this leaf, just set the CORE domain shift and let the later + * parsers set SMT shift. Assume one thread per core by default + * which is correct if there are no other CPUID leafs to parse. + */ + topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1); return true; } -static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id) { /* * Starting with Fam 17h the DIE domain could probably be used to @@ -48,7 +58,7 @@ static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_ tscan->amd_node_id = node_id; } -static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) +static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext) { struct { // eax @@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) tscan->c->topo.initial_apicid = leaf.ext_apic_id; /* - * If leaf 0xb is available, then SMT shift is set already. If not - * take it from ecx.threads_per_core and use topo_update_dom() - - * topology_set_dom() would propagate and overwrite the already - * propagated CORE level. + * If leaf 0xb is available, then the domain shifts are set + * already and nothing to do here. Only valid for family >= 0x17. */ - if (!has_0xb) { + if (!has_topoext && tscan->c->x86 >= 0x17) { + /* + * Leaf 0x80000008 set the CORE domain shift already. + * Update the SMT domain, but do not propagate it. + */ unsigned int nthreads = leaf.core_nthreads + 1; topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); @@ -107,64 +119,86 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) return true; } -static bool parse_fam10h_node_id(struct topo_scan *tscan) +static void parse_fam10h_node_id(struct topo_scan *tscan) { - struct { - union { + union { + struct { u64 node_id : 3, nodes_per_pkg : 3, unused : 58; - u64 msr; }; + u64 msr; } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) - return false; + return; rdmsrl(MSR_FAM10H_NODE_ID, nid.msr); store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id); tscan->c->topo.llc_id = nid.node_id; - return true; } static void legacy_set_llc(struct topo_scan *tscan) { unsigned int apicid = tscan->c->topo.initial_apicid; - /* parse_8000_0008() set everything up except llc_id */ - tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; + /* If none of the parsers set LLC ID then use the die ID for it. */ + if (tscan->c->topo.llc_id == BAD_APICID) + tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; +} + +static void topoext_fixup(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u64 msrval; + + /* Try to re-enable TopologyExtensions if switched off by BIOS */ + if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f) + return; + + if (msr_set_bit(0xc0011005, 54) <= 0) + return; + + rdmsrl(0xc0011005, msrval); + if (msrval & BIT_64(54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + } } static void parse_topology_amd(struct topo_scan *tscan) { - bool has_0xb = false; + bool has_topoext = false; /* * If the extended topology leaf 0x8000_001e is available - * try to get SMT and CORE shift from leaf 0xb first, then - * try to get the CORE shift from leaf 0x8000_0008. + * try to get SMT, CORE, TILE, and DIE shifts from extended + * CPUID leaf 0x8000_0026 on supported processors first. If + * extended CPUID leaf 0x8000_0026 is not supported, try to + * get SMT and CORE shift from leaf 0xb first, then try to + * get the CORE shift from leaf 0x8000_0008. */ if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) - has_0xb = cpu_parse_topology_ext(tscan); + has_topoext = cpu_parse_topology_ext(tscan); - if (!has_0xb && !parse_8000_0008(tscan)) + if (!has_topoext && !parse_8000_0008(tscan)) return; /* Prefer leaf 0x8000001e if available */ - if (parse_8000_001e(tscan, has_0xb)) + if (parse_8000_001e(tscan, has_topoext)) return; /* Try the NODEID MSR */ - if (parse_fam10h_node_id(tscan)) - return; - - legacy_set_llc(tscan); + parse_fam10h_node_id(tscan); } void cpu_parse_topology_amd(struct topo_scan *tscan) { tscan->amd_nodes_per_pkg = 1; + topoext_fixup(tscan); parse_topology_amd(tscan); + legacy_set_llc(tscan); if (tscan->amd_nodes_per_pkg > 1) set_cpu_cap(tscan->c, X86_FEATURE_AMD_DCM); diff --git a/arch/x86/kernel/cpu/topology_ext.c b/arch/x86/kernel/cpu/topology_ext.c index e477228cd5b2..467b0326bf1a 100644 --- a/arch/x86/kernel/cpu/topology_ext.c +++ b/arch/x86/kernel/cpu/topology_ext.c @@ -13,7 +13,10 @@ enum topo_types { CORE_TYPE = 2, MAX_TYPE_0B = 3, MODULE_TYPE = 3, + AMD_CCD_TYPE = 3, TILE_TYPE = 4, + AMD_SOCKET_TYPE = 4, + MAX_TYPE_80000026 = 5, DIE_TYPE = 5, DIEGRP_TYPE = 6, MAX_TYPE_1F = 7, @@ -32,6 +35,13 @@ static const unsigned int topo_domain_map_0b_1f[MAX_TYPE_1F] = { [DIEGRP_TYPE] = TOPO_DIEGRP_DOMAIN, }; +static const unsigned int topo_domain_map_80000026[MAX_TYPE_80000026] = { + [SMT_TYPE] = TOPO_SMT_DOMAIN, + [CORE_TYPE] = TOPO_CORE_DOMAIN, + [AMD_CCD_TYPE] = TOPO_TILE_DOMAIN, + [AMD_SOCKET_TYPE] = TOPO_DIE_DOMAIN, +}; + static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf, unsigned int *last_dom) { @@ -56,6 +66,7 @@ static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf, switch (leaf) { case 0x0b: maxtype = MAX_TYPE_0B; map = topo_domain_map_0b_1f; break; case 0x1f: maxtype = MAX_TYPE_1F; map = topo_domain_map_0b_1f; break; + case 0x80000026: maxtype = MAX_TYPE_80000026; map = topo_domain_map_80000026; break; default: return false; } @@ -125,6 +136,10 @@ bool cpu_parse_topology_ext(struct topo_scan *tscan) if (tscan->c->cpuid_level >= 0x1f && parse_topology_leaf(tscan, 0x1f)) return true; + /* AMD: Try leaf 0x80000026 first. */ + if (tscan->c->extended_cpuid_level >= 0x80000026 && parse_topology_leaf(tscan, 0x80000026)) + return true; + /* Intel/AMD: Fall back to leaf 0xB if available */ return tscan->c->cpuid_level >= 0x0b && parse_topology_leaf(tscan, 0x0b); } |