diff options
Diffstat (limited to 'arch/x86/kernel/cpu/amd.c')
| -rw-r--r-- | arch/x86/kernel/cpu/amd.c | 1376 |
1 files changed, 930 insertions, 446 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c587a8757227..bc94ff1e250a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,24 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> -#include <linux/init.h> #include <linux/bitops.h> #include <linux/elf.h> #include <linux/mm.h> - +#include <linux/kvm_types.h> #include <linux/io.h> #include <linux/sched.h> +#include <linux/sched/clock.h> +#include <linux/random.h> +#include <linux/topology.h> +#include <linux/platform_data/x86/amd-fch.h> #include <asm/processor.h> #include <asm/apic.h> +#include <asm/cacheinfo.h> #include <asm/cpu.h> +#include <asm/cpu_device_id.h> +#include <asm/spec-ctrl.h> +#include <asm/smp.h> +#include <asm/numa.h> #include <asm/pci-direct.h> +#include <asm/delay.h> +#include <asm/debugreg.h> +#include <asm/resctrl.h> +#include <asm/msr.h> +#include <asm/sev.h> #ifdef CONFIG_X86_64 # include <asm/mmconfig.h> -# include <asm/cacheflush.h> #endif #include "cpu.h" -static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +u16 invlpgb_count_max __ro_after_init = 1; + +static inline int rdmsrq_amd_safe(unsigned msr, u64 *p) { u32 gprs[8] = { 0 }; int err; @@ -36,7 +51,7 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) return err; } -static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) +static inline int wrmsrq_amd_safe(unsigned msr, u64 val) { u32 gprs[8] = { 0 }; @@ -51,7 +66,6 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) return wrmsr_safe_regs(gprs); } -#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -66,14 +80,21 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) * performance at the same time.. */ -extern void vide(void); -__asm__(".align 4\nvide: ret"); +#ifdef CONFIG_X86_32 +extern __visible void vide(void); +__asm__(".text\n" + ".globl vide\n" + ".type vide, @function\n" + ".align 4\n" + "vide: ret\n"); +#endif -static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +static void init_amd_k5(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 /* * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * of the Elan at 0x000df000. Unfortunately, one of the Linux * drivers subsequently pokes it, and changes the CPU speed. * Workaround : Remove the unneeded alias. */ @@ -84,11 +105,12 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) if (inl(CBAR) & CBAR_ENB) outl(0 | CBAR_KEY, CBAR); } +#endif } - -static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +static void init_amd_k6(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 u32 l, h; int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); @@ -101,13 +123,13 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) return; } - if (c->x86_model == 6 && c->x86_mask == 1) { + if (c->x86_model == 6 && c->x86_stepping == 1) { const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); - unsigned long d, d2; + u64 d, d2; - printk(KERN_INFO "AMD K6 stepping B detected - "); + pr_info("AMD K6 stepping B detected - "); /* * It looks like AMD fixed the 2.6.2 bug and improved indirect @@ -116,22 +138,22 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) n = K6_BUG_LOOP; f_vide = vide; - rdtscl(d); + OPTIMIZER_HIDE_VAR(f_vide); + d = rdtsc(); while (n--) f_vide(); - rdtscl(d2); + d2 = rdtsc(); d = d2-d; if (d > 20*K6_BUG_LOOP) - printk(KERN_CONT - "system stability may be impaired when more than 32 MB are used.\n"); + pr_cont("system stability may be impaired when more than 32 MB are used.\n"); else - printk(KERN_CONT "probably OK (after B9730xxxx).\n"); + pr_cont("probably OK (after B9730xxxx).\n"); } /* K6 with old style WHCR */ if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { + (c->x86_model == 8 && c->x86_stepping < 8)) { /* We can only write allocate on the low 508Mb */ if (mbytes > 508) mbytes = 508; @@ -144,13 +166,13 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + pr_info("Enabling old style K6 write allocation for %d Mb\n", mbytes); } return; } - if ((c->x86_model == 8 && c->x86_mask > 7) || + if ((c->x86_model == 8 && c->x86_stepping > 7) || c->x86_model == 9 || c->x86_model == 13) { /* The more serious chips .. */ @@ -165,7 +187,7 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + pr_info("Enabling new style K6 write allocation for %d Mb\n", mbytes); } @@ -177,10 +199,41 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) /* placeholder for any needed mods */ return; } +#endif } -static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) +static void init_amd_k7(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + pr_info("Enabling disabled K7/SSE Support.\n"); + msr_clear_bit(MSR_K7_HWCR, 15); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", + l, ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -190,12 +243,12 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) * but they are not certified as MP capable. */ /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) + if ((c->x86_model == 6) && ((c->x86_stepping == 0) || + (c->x86_stepping == 1))) return; /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) + if ((c->x86_model == 7) && (c->x86_stepping == 0)) return; /* @@ -205,10 +258,10 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for * more. */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || + if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || + ((c->x86_model == 7) && (c->x86_stepping >= 1)) || (c->x86_model > 7)) - if (cpu_has_mp) + if (cpu_has(c, X86_FEATURE_MP)) return; /* If we get here, not a certified SMP capable AMD system. */ @@ -219,55 +272,16 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) */ WARN_ONCE(1, "WARNING: This combination of AMD" " processors is not suitable for SMP.\n"); - add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); -} - -static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk(KERN_INFO - "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", - l, ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); - - amd_k7_smp_check(c); -} + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); #endif +} #ifdef CONFIG_NUMA /* * To workaround broken NUMA config. Read the comment in * srat_detect_node(). */ -static int __cpuinit nearby_node(int apicid) +static int nearby_node(int apicid) { int i, node; @@ -285,100 +299,16 @@ static int __cpuinit nearby_node(int apicid) } #endif -/* - * Fixup core topology information for - * (1) AMD multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) AMD processors supporting compute units - */ -#ifdef CONFIG_X86_HT -static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) -{ - u32 nodes, cores_per_cu = 1; - u8 node_id; - int cpu = smp_processor_id(); - - /* get information required for multi-node processors */ - if (cpu_has_topoext) { - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - nodes = ((ecx >> 8) & 7) + 1; - node_id = ecx & 7; - - /* get compute unit information */ - smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->compute_unit_id = ebx & 0xff; - cores_per_cu += ((ebx >> 8) & 3); - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes = ((value >> 3) & 7) + 1; - node_id = value & 7; - } else - return; - - /* fixup multi-node processor information */ - if (nodes > 1) { - u32 cores_per_node; - u32 cus_per_node; - - set_cpu_cap(c, X86_FEATURE_AMD_DCM); - cores_per_node = c->x86_max_cores / nodes; - cus_per_node = cores_per_node / cores_per_cu; - - /* store NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = node_id; - - /* core id has to be in the [0 .. cores_per_node - 1] range */ - c->cpu_core_id %= cores_per_node; - c->compute_unit_id %= cus_per_node; - } -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits; - int cpu = smp_processor_id(); - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; - /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; - amd_get_topology(c); -#endif -} - -u16 amd_get_nb_id(int cpu) -{ - u16 id = 0; -#ifdef CONFIG_SMP - id = per_cpu(cpu_llc_id, cpu); -#endif - return id; -} -EXPORT_SYMBOL_GPL(amd_get_nb_id); - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA int cpu = smp_processor_id(); int node; - unsigned apicid = c->apicid; + unsigned apicid = c->topo.apicid; node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) - node = per_cpu(cpu_llc_id, cpu); + node = per_cpu_llc_id(cpu); /* * On multi-fabric platform (e.g. Numascale NumaChip) a @@ -408,10 +338,9 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) * through CPU mapping may alter the outcome, directly * access __apicid_to_node[]. */ - int ht_nodeid = c->initial_apicid; + int ht_nodeid = c->topo.initial_apicid; - if (ht_nodeid >= 0 && - __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = __apicid_to_node[ht_nodeid]; /* Pick a nearby node */ if (!node_online(node)) @@ -421,33 +350,75 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +static void bsp_determine_snp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_HT - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; - ecx = cpuid_ecx(0x80000008); + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined or for + * processors which support the architecturally defined RMPREAD + * instruction. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + (cpu_feature_enabled(X86_FEATURE_ZEN3) || + cpu_feature_enabled(X86_FEATURE_ZEN4) || + cpu_feature_enabled(X86_FEATURE_RMPREAD)) && + snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} - c->x86_max_cores = (ecx & 0xff) + 1; +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); } - - c->x86_coreid_bits = bits; -#endif } -static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) +static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -455,10 +426,9 @@ static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) (c->x86 == 0x10 && c->x86_model >= 0x2)) { u64 val; - rdmsrl(MSR_K7_HWCR, val); + rdmsrq(MSR_K7_HWCR, val); if (!(val & BIT(24))) - printk(KERN_WARNING FW_BUG "TSC doesn't count " - "with P0 frequency!\n"); + pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n"); } } @@ -472,12 +442,182 @@ static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) va_align.mask = (upperbit - 1) & PAGE_MASK; va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; + + /* A random value per boot for bit slice [12:upper_bit) */ + va_align.bits = get_random_u32() & va_align.mask; + } + + if (cpu_has(c, X86_FEATURE_MWAITX)) + use_mwaitx_delay(); + + if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && + !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && + c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable SSBD. + */ + if (!rdmsrq_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); + setup_force_cpu_cap(X86_FEATURE_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; + } + } + + resctrl_cpu_detect(c); + + /* Figure out Zen generations: */ + switch (c->x86) { + case 0x17: + switch (c->x86_model) { + case 0x00 ... 0x2f: + case 0x50 ... 0x5f: + setup_force_cpu_cap(X86_FEATURE_ZEN1); + break; + case 0x30 ... 0x4f: + case 0x60 ... 0x7f: + case 0x90 ... 0x91: + case 0xa0 ... 0xaf: + setup_force_cpu_cap(X86_FEATURE_ZEN2); + break; + default: + goto warn; + } + break; + + case 0x19: + switch (c->x86_model) { + case 0x00 ... 0x0f: + case 0x20 ... 0x5f: + setup_force_cpu_cap(X86_FEATURE_ZEN3); + break; + case 0x10 ... 0x1f: + case 0x60 ... 0xaf: + setup_force_cpu_cap(X86_FEATURE_ZEN4); + break; + default: + goto warn; + } + break; + + case 0x1a: + switch (c->x86_model) { + case 0x00 ... 0x2f: + case 0x40 ... 0x4f: + case 0x60 ... 0x7f: + setup_force_cpu_cap(X86_FEATURE_ZEN5); + break; + case 0x50 ... 0x5f: + case 0x80 ... 0xaf: + case 0xc0 ... 0xcf: + setup_force_cpu_cap(X86_FEATURE_ZEN6); + break; + default: + goto warn; + } + break; + + default: + break; + } + + bsp_determine_snp(c); + tsa_init(c); + + if (cpu_has(c, X86_FEATURE_GP_ON_USER_CPUID)) + setup_force_cpu_cap(X86_FEATURE_CPUID_FAULT); + + return; + +warn: + WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model); +} + +static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) +{ + u64 msr; + + /* + * Mark using WBINVD is needed during kexec on processors that + * support SME. This provides support for performing a successful + * kexec when going from SME inactive to SME active (or vice-versa). + * + * The cache must be cleared so that if there are entries with the + * same physical address, both with and without the encryption bit, + * they don't race each other when flushed and potentially end up + * with the wrong entry being committed to memory. + * + * Test the CPUID bit directly because with mem_encrypt=off the + * BSP will clear the X86_FEATURE_SME bit and the APs will not + * see it set after that. + */ + if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + __this_cpu_write(cache_state_incoherent, true); + + /* + * BIOS support is required for SME and SEV. + * For SME: If BIOS has enabled SME then adjust x86_phys_bits by + * the SME physical address space reduction value. + * If BIOS has not enabled SME then don't advertise the + * SME feature (set in scattered.c). + * If the kernel has not enabled SME via any means then + * don't advertise the SME feature. + * For SEV: If BIOS has not enabled SEV then don't advertise SEV and + * any additional functionality based on it. + * + * In all cases, since support for SME and SEV requires long mode, + * don't advertise the feature under CONFIG_X86_32. + */ + if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { + /* Check if memory encryption is enabled */ + rdmsrq(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) + goto clear_all; + + /* + * Always adjust physical address bits. Even though this + * will be a value above 32-bits this is still done for + * CONFIG_X86_32 so that accurate values are reported. + */ + c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; + + if (IS_ENABLED(CONFIG_X86_32)) + goto clear_all; + + if (!sme_me_mask) + setup_clear_cpu_cap(X86_FEATURE_SME); + + rdmsrq(MSR_K7_HWCR, msr); + if (!(msr & MSR_K7_HWCR_SMMLOCK)) + goto clear_sev; + + return; + +clear_all: + setup_clear_cpu_cap(X86_FEATURE_SME); +clear_sev: + setup_clear_cpu_cap(X86_FEATURE_SEV); + setup_clear_cpu_cap(X86_FEATURE_SEV_ES); + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); } } -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +static void early_init_amd(struct cpuinfo_x86 *c) { - early_init_amd_mc(c); + u32 dummy; + + if (c->x86 >= 0xf) + set_cpu_cap(c, X86_FEATURE_K8); + + rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate @@ -486,38 +626,94 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - sched_clock_stable = 1; } + /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ + if (c->x86_power & BIT(12)) + set_cpu_cap(c, X86_FEATURE_ACC_POWER); + + /* Bit 14 indicates the Runtime Average Power Limit interface. */ + if (c->x86_power & BIT(14)) + set_cpu_cap(c, X86_FEATURE_RAPL); + #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); #else /* Set MTRR capability flag if appropriate */ if (c->x86 == 5) if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) + (c->x86_model == 8 && c->x86_stepping >= 8)) set_cpu_cap(c, X86_FEATURE_K6_MTRR); #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) - /* check CPU config space for extended APIC ID */ - if (cpu_has_apic && c->x86 >= 0xf) { - unsigned int val; - val = read_pci_config(0, 24, 0, 0x68); - if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) + /* + * ApicID can always be treated as an 8-bit value for AMD APIC versions + * >= 0x10, but even old K8s came out of reset with version 0x10. So, we + * can safely set X86_FEATURE_EXTD_APICID unconditionally for families + * after 16h. + */ + if (boot_cpu_has(X86_FEATURE_APIC)) { + if (c->x86 > 0x16) set_cpu_cap(c, X86_FEATURE_EXTD_APICID); + else if (c->x86 >= 0xf) { + /* check CPU config space for extended APIC ID */ + unsigned int val; + + val = read_pci_config(0, 24, 0, 0x68); + if ((val >> 17 & 0x3) == 0x3) + set_cpu_cap(c, X86_FEATURE_EXTD_APICID); + } } #endif -} -static const int amd_erratum_383[]; -static const int amd_erratum_400[]; -static bool cpu_has_amd_erratum(const int *erratum); + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); + + /* F16h erratum 793, CVE-2013-6885 */ + if (c->x86 == 0x16 && c->x86_model <= 0xf) + msr_set_bit(MSR_AMD64_LS_CFG, 15); + + early_detect_mem_encrypt(c); + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { + if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + else if (c->x86 >= 0x19 && !wrmsrq_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + setup_force_cpu_cap(X86_FEATURE_SBPB); + } + } +} -static void __cpuinit init_amd(struct cpuinfo_x86 *c) +static void init_amd_k8(struct cpuinfo_x86 *c) { - u32 dummy; - unsigned long long value; + u32 level; + u64 value; + + /* On C+ stepping K8 rep microcode works well for copy/memset */ + level = cpuid_eax(1); + if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* + * Some BIOSes incorrectly force this feature, but only K8 revision D + * (model = 0x14) and later actually support it. + * (AMD Erratum #110, docId: 25759). + */ + if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM) && !cpu_has(c, X86_FEATURE_HYPERVISOR)) { + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); + if (!rdmsrq_amd_safe(0xc001100d, &value)) { + value &= ~BIT_64(32); + wrmsrq_amd_safe(0xc001100d, value); + } + } + + if (!c->x86_model_id[0]) + strscpy(c->x86_model_id, "Hammer"); #ifdef CONFIG_SMP /* @@ -527,166 +723,426 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } + msr_set_bit(MSR_K7_HWCR, 6); #endif - - early_init_amd(c); + set_cpu_bug(c, X86_BUG_SWAPGS_FENCE); /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + * Check models and steppings affected by erratum 400. This is + * used to select the proper idle routine and to enable the + * check whether the machine is affected in arch_post_acpi_subsys_init() + * which sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. */ - clear_cpu_cap(c, 0*32+31); + if (c->x86_model > 0x41 || + (c->x86_model == 0x41 && c->x86_stepping >= 0x2)) + setup_force_cpu_bug(X86_BUG_AMD_E400); +} -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; +static void init_amd_gh(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_MMCONF_FAM10H + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); - level = cpuid_eax(1); - if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); + fam10h_check_enable_mmcfg(); +#endif - /* - * Some BIOSes incorrectly force this feature, but only K8 - * revision D (model = 0x14) and later actually support it. - * (AMD Erratum #110, docId: 25759). - */ - if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { - clear_cpu_cap(c, X86_FEATURE_LAHF_LM); - if (!rdmsrl_amd_safe(0xc001100d, &value)) { - value &= ~(1ULL << 32); - wrmsrl_amd_safe(0xc001100d, value); - } - } + /* + * Disable GART TLB Walk Errors on Fam10h. We do this here because this + * is always needed when GART is enabled, even in a kernel which has no + * MCE support built in. BIOS should disable GartTlbWlk Errors already. + * If it doesn't, we do it here as suggested by the BKDG. + * + * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 + */ + msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); - } - if (c->x86 >= 0x10) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); + /* + * On family 10h BIOS may not have properly enabled WC+ support, causing + * it to be converted to CD memtype. This may result in performance + * degradation for certain nested-paging guests. Prevent this conversion + * by clearing bit 24 in MSR_AMD64_BU_CFG2. + * + * NOTE: we want to use the _safe accessors so as not to #GP kvm + * guests on older kvm hosts. + */ + msr_clear_bit(MSR_AMD64_BU_CFG2, 24); - /* get apicid instead of initial apic id from cpuid */ - c->apicid = hard_smp_processor_id(); -#else + set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) + * Check models and steppings affected by erratum 400. This is + * used to select the proper idle routine and to enable the + * check whether the machine is affected in arch_post_acpi_subsys_init() + * which sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. */ + if (c->x86_model > 0x2 || + (c->x86_model == 0x2 && c->x86_stepping >= 0x1)) + setup_force_cpu_bug(X86_BUG_AMD_E400); +} - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } +static void init_amd_ln(struct cpuinfo_x86 *c) +{ + /* + * Apply erratum 665 fix unconditionally so machines without a BIOS + * fix work. + */ + msr_set_bit(MSR_AMD64_DE_CFG, 31); +} - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif +static bool rdrand_force; - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); +static int __init rdrand_cmdline(char *str) +{ + if (!str) + return -EINVAL; - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } + if (!strcmp(str, "force")) + rdrand_force = true; + else + return -EINVAL; - /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (!rdmsrl_safe(0xc0011005, &value)) { - value |= 1ULL << 54; - wrmsrl_safe(0xc0011005, value); - rdmsrl(0xc0011005, value); - if (value & (1ULL << 54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - printk(KERN_INFO FW_INFO "CPU: Re-enabling " - "disabled Topology Extensions Support\n"); - } - } + return 0; +} +early_param("rdrand", rdrand_cmdline); + +static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c) +{ + /* + * Saving of the MSR used to hide the RDRAND support during + * suspend/resume is done by arch/x86/power/cpu.c, which is + * dependent on CONFIG_PM_SLEEP. + */ + if (!IS_ENABLED(CONFIG_PM_SLEEP)) + return; + + /* + * The self-test can clear X86_FEATURE_RDRAND, so check for + * RDRAND support using the CPUID function directly. + */ + if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force) + return; + + msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62); + + /* + * Verify that the CPUID change has occurred in case the kernel is + * running virtualized and the hypervisor doesn't support the MSR. + */ + if (cpuid_ecx(1) & BIT(30)) { + pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n"); + return; } + clear_cpu_cap(c, X86_FEATURE_RDRAND); + pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n"); +} + +static void init_amd_jg(struct cpuinfo_x86 *c) +{ + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); +} + +static void init_amd_bd(struct cpuinfo_x86 *c) +{ + u64 value; + /* * The way access filter has a performance penalty on some workloads. * Disable it on the affected CPUs. */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { - - if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { + if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) { + if (!rdmsrq_safe(MSR_F15H_IC_CFG, &value) && !(value & 0x1E)) { value |= 0x1E; - wrmsrl_safe(0xc0011021, value); + wrmsrq_safe(MSR_F15H_IC_CFG, value); } } - cpu_detect_cache_sizes(c); + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); +} - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); +static const struct x86_cpu_id erratum_1386_microcode[] = { + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, 0x17, 0x01), 0x2, 0x2, 0x0800126e), + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, 0x17, 0x31), 0x0, 0x0, 0x08301052), + {} +}; + +static void fix_erratum_1386(struct cpuinfo_x86 *c) +{ + /* + * Work around Erratum 1386. The XSAVES instruction malfunctions in + * certain circumstances on Zen1/2 uarch, and not all parts have had + * updated microcode at the time of writing (March 2023). + * + * Affected parts all have no supervisor XSAVE states, meaning that + * the XSAVEC instruction (which works fine) is equivalent. + * + * Clear the feature flag only on microcode revisions which + * don't have the fix. + */ + if (x86_match_min_microcode_rev(erratum_1386_microcode)) + return; + + clear_cpu_cap(c, X86_FEATURE_XSAVES); +} + +void init_spectral_chicken(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_MITIGATION_UNRET_ENTRY + u64 value; + + /* + * On Zen2 we offer this chicken (bit) on the altar of Speculation. + * + * This suppresses speculation from the middle of a basic block, i.e. it + * suppresses non-branch predictions. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + if (!rdmsrq_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { + value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; + wrmsrq_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } } +#endif +} -#ifdef CONFIG_X86_32 - detect_ht(c); +static void init_amd_zen_common(void) +{ + setup_force_cpu_cap(X86_FEATURE_ZEN); +#ifdef CONFIG_NUMA + node_reclaim_distance = 32; #endif +} - init_amd_cacheinfo(c); +static void init_amd_zen1(struct cpuinfo_x86 *c) +{ + fix_erratum_1386(c); - if (c->x86 >= 0xf) - set_cpu_cap(c, X86_FEATURE_K8); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { - if (cpu_has_xmm2) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); } -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); + pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); + setup_force_cpu_bug(X86_BUG_DIV0); - fam10h_check_enable_mmcfg(); + /* + * Turn off the Instructions Retired free counter on machines that are + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (c->x86_model < 0x30) { + msr_clear_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + clear_cpu_cap(c, X86_FEATURE_IRPERF); } +} - if (c == &boot_cpu_data && c->x86 >= 0xf) { - unsigned long long tseg; +static bool cpu_has_zenbleed_microcode(void) +{ + u32 good_rev = 0; + + switch (boot_cpu_data.x86_model) { + case 0x30 ... 0x3f: good_rev = 0x0830107b; break; + case 0x60 ... 0x67: good_rev = 0x0860010c; break; + case 0x68 ... 0x6f: good_rev = 0x08608107; break; + case 0x70 ... 0x7f: good_rev = 0x08701033; break; + case 0xa0 ... 0xaf: good_rev = 0x08a00009; break; + + default: + return false; + } + + if (boot_cpu_data.microcode < good_rev) + return false; + + return true; +} + +static void zen2_zenbleed_check(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (!cpu_has(c, X86_FEATURE_AVX)) + return; + if (!cpu_has_zenbleed_microcode()) { + pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n"); + msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT); + } else { + msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT); + } +} + +static void init_amd_zen2(struct cpuinfo_x86 *c) +{ + init_spectral_chicken(c); + fix_erratum_1386(c); + zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); +} + +static void init_amd_zen3(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - unsigned long pfn = tseg >> PAGE_SHIFT; + if (!cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } +} - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if (pfn_range_is_mapped(pfn, pfn + 1)) - set_memory_4k((unsigned long)__va(tseg), 1); +static void init_amd_zen4(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); + + /* + * These Zen4 SoCs advertise support for virtualized VMLOAD/VMSAVE + * in some BIOS versions but they can lead to random host reboots. + */ + switch (c->x86_model) { + case 0x18 ... 0x1f: + case 0x60 ... 0x7f: + clear_cpu_cap(c, X86_FEATURE_V_VMSAVE_VMLOAD); + break; + } +} + +static const struct x86_cpu_id zen5_rdseed_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), + ZEN_MODEL_STEP_UCODE(0x1a, 0x08, 0x1, 0x0b008121), + ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + ZEN_MODEL_STEP_UCODE(0x1a, 0x24, 0x0, 0x0b204037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x0, 0x0b404035), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x1, 0x0b404108), + ZEN_MODEL_STEP_UCODE(0x1a, 0x60, 0x0, 0x0b600037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x68, 0x0, 0x0b608038), + ZEN_MODEL_STEP_UCODE(0x1a, 0x70, 0x0, 0x0b700037), + {}, +}; + +static void init_amd_zen5(struct cpuinfo_x86 *c) +{ + if (!x86_match_min_microcode_rev(zen5_rdseed_microcode)) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n"); + } +} + +static void init_amd(struct cpuinfo_x86 *c) +{ + u64 vm_cr; + + early_init_amd(c); + + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ + clear_cpu_cap(c, 0*32+31); + + if (c->x86 >= 0x10) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* AMD FSRM also implies FSRS */ + if (cpu_has(c, X86_FEATURE_FSRM)) + set_cpu_cap(c, X86_FEATURE_FSRS); + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); + + switch (c->x86) { + case 4: init_amd_k5(c); break; + case 5: init_amd_k6(c); break; + case 6: init_amd_k7(c); break; + case 0xf: init_amd_k8(c); break; + case 0x10: init_amd_gh(c); break; + case 0x12: init_amd_ln(c); break; + case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; + } + + /* + * Save up on some future enablement work and do common Zen + * settings. + */ + if (c->x86 >= 0x17) + init_amd_zen_common(); + + if (boot_cpu_has(X86_FEATURE_ZEN1)) + init_amd_zen1(c); + else if (boot_cpu_has(X86_FEATURE_ZEN2)) + init_amd_zen2(c); + else if (boot_cpu_has(X86_FEATURE_ZEN3)) + init_amd_zen3(c); + else if (boot_cpu_has(X86_FEATURE_ZEN4)) + init_amd_zen4(c); + else if (boot_cpu_has(X86_FEATURE_ZEN5)) + init_amd_zen5(c); + + /* + * Enable workaround for FXSAVE leak on CPUs + * without a XSaveErPtr feature + */ + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) + set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); + + cpu_detect_cache_sizes(c); + + srat_detect_node(c); + + init_amd_cacheinfo(c); + + if (cpu_has(c, X86_FEATURE_SVM)) { + rdmsrq(MSR_VM_CR, vm_cr); + if (vm_cr & SVM_VM_CR_SVM_DIS_MASK) { + pr_notice_once("SVM disabled (by BIOS) in MSR_VM_CR\n"); + clear_cpu_cap(c, X86_FEATURE_SVM); } } -#endif + + if (!cpu_has(c, X86_FEATURE_LFENCE_RDTSC) && cpu_has(c, X86_FEATURE_XMM2)) { + /* + * Use LFENCE for execution serialization. On families which + * don't have that MSR, LFENCE is already serializing. + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); + + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + } /* * Family 0x12 and above processors have APIC timer @@ -695,77 +1151,58 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - if (c->x86 == 0x10) { - /* - * Disable GART TLB Walk Errors on Fam10h. We do this here - * because this is always needed when GART is enabled, even in a - * kernel which has no MCE support built in. - * BIOS should disable GartTlbWlk Errors themself. If - * it doesn't do it here as suggested by the BKDG. - * - * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 - */ - u64 mask; - int err; + /* 3DNow or LM implies PREFETCHW */ + if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) + if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) + set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); - err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); - if (err == 0) { - mask |= (1 << 10); - wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); - } + /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ + if (!cpu_feature_enabled(X86_FEATURE_XENPV)) + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - /* - * On family 10h BIOS may not have properly enabled WC+ support, - * causing it to be converted to CD memtype. This may result in - * performance degradation for certain nested-paging guests. - * Prevent this conversion by clearing bit 24 in - * MSR_AMD64_BU_CFG2. - * - * NOTE: we want to use the _safe accessors so as not to #GP kvm - * guests on older kvm hosts. - */ + /* Enable the Instructions Retired free counter */ + if (cpu_has(c, X86_FEATURE_IRPERF)) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); - rdmsrl_safe(MSR_AMD64_BU_CFG2, &value); - value &= ~(1ULL << 24); - wrmsrl_safe(MSR_AMD64_BU_CFG2, value); + check_null_seg_clears_base(c); - if (cpu_has_amd_erratum(amd_erratum_383)) - set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); - } + /* + * Make sure EFER[AIBRSE - Automatic IBRS Enable] is set. The APs are brought up + * using the trampoline code and as part of it, MSR_EFER gets prepared there in + * order to be replicated onto them. Regardless, set it here again, if not set, + * to protect against any future refactoring/code reorganization which might + * miss setting this important bit. + */ + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + cpu_has(c, X86_FEATURE_AUTOIBRS)) + WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS) < 0); - if (cpu_has_amd_erratum(amd_erratum_400)) - set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); + /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ + clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); - rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); + /* Enable Translation Cache Extension */ + if (cpu_has(c, X86_FEATURE_TCE)) + msr_set_bit(MSR_EFER, _EFER_TCE); } #ifdef CONFIG_X86_32 -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, - unsigned int size) +static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ - if (c->x86_model == 3 && c->x86_mask == 0) + if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; /* Tbird rev A1/A2 */ if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) + (c->x86_stepping == 0 || c->x86_stepping == 1)) size = 256; } return size; } #endif -static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) -{ - tlb_flushall_shift = 5; - - if (c->x86 <= 0x11) - tlb_flushall_shift = 4; -} - -static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) +static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) { u32 ebx, eax, ecx, edx; u16 mask = 0xfff; @@ -778,8 +1215,8 @@ static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); - tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; - tlb_lli_4k[ENTRIES] = ebx & mask; + tlb_lld_4k = (ebx >> 16) & mask; + tlb_lli_4k = ebx & mask; /* * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB @@ -791,41 +1228,39 @@ static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) } /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ - if (!((eax >> 16) & mask)) { - u32 a, b, c, d; - - cpuid(0x80000005, &a, &b, &c, &d); - tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; - } else { - tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; - } + if (!((eax >> 16) & mask)) + tlb_lld_2m = (cpuid_eax(0x80000005) >> 16) & 0xff; + else + tlb_lld_2m = (eax >> 16) & mask; /* a 4M entry uses two 2M entries */ - tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + tlb_lld_4m = tlb_lld_2m >> 1; /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!(eax & mask)) { /* Erratum 658 */ if (c->x86 == 0x15 && c->x86_model <= 0x1f) { - tlb_lli_2m[ENTRIES] = 1024; + tlb_lli_2m = 1024; } else { cpuid(0x80000005, &eax, &ebx, &ecx, &edx); - tlb_lli_2m[ENTRIES] = eax & 0xff; + tlb_lli_2m = eax & 0xff; } } else - tlb_lli_2m[ENTRIES] = eax & mask; + tlb_lli_2m = eax & mask; - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + tlb_lli_4m = tlb_lli_2m >> 1; - cpu_set_tlb_flushall_shift(c); + /* Max number of pages INVLPGB can invalidate in one shot */ + if (cpu_has(c, X86_FEATURE_INVLPGB)) + invlpgb_count_max = (cpuid_edx(0x80000008) & 0xffff) + 1; } -static const struct cpu_dev __cpuinitconst amd_cpu_dev = { +static const struct cpu_dev amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, #ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + .legacy_models = { + { .family = 4, .model_names = { [3] = "486 DX/2", [7] = "486 DX/2-WB", @@ -836,7 +1271,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { } }, }, - .c_size_cache = amd_size_cache, + .legacy_cache_size = amd_size_cache, #endif .c_early_init = early_init_amd, .c_detect_tlb = cpu_detect_tlb_amd, @@ -847,76 +1282,125 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { cpu_dev_register(amd_cpu_dev); -/* - * AMD errata checking - * - * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or - * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that - * have an OSVW id assigned, which it takes as first argument. Both take a - * variable number of family-specific model-stepping ranges created by - * AMD_MODEL_RANGE(). - * - * Example: - * - * const int amd_erratum_319[] = - * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), - * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), - * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); - */ +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[4], amd_dr_addr_mask); + +static unsigned int amd_msr_dr_addr_masks[] = { + MSR_F16H_DR0_ADDR_MASK, + MSR_F16H_DR1_ADDR_MASK, + MSR_F16H_DR1_ADDR_MASK + 1, + MSR_F16H_DR1_ADDR_MASK + 2 +}; -#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } -#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } -#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ - ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) -#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) -#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) -#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) +void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr) +{ + int cpu = smp_processor_id(); -static const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), - AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); + if (!cpu_feature_enabled(X86_FEATURE_BPEXT)) + return; -static const int amd_erratum_383[] = - AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); + if (WARN_ON_ONCE(dr >= ARRAY_SIZE(amd_msr_dr_addr_masks))) + return; -static bool cpu_has_amd_erratum(const int *erratum) + if (per_cpu(amd_dr_addr_mask, cpu)[dr] == mask) + return; + + wrmsrq(amd_msr_dr_addr_masks[dr], mask); + per_cpu(amd_dr_addr_mask, cpu)[dr] = mask; +} + +unsigned long amd_get_dr_addr_mask(unsigned int dr) { - struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info); - int osvw_id = *erratum++; - u32 range; - u32 ms; + if (!cpu_feature_enabled(X86_FEATURE_BPEXT)) + return 0; - /* - * If called early enough that current_cpu_data hasn't been initialized - * yet, fall back to boot_cpu_data. - */ - if (cpu->x86 == 0) - cpu = &boot_cpu_data; + if (WARN_ON_ONCE(dr >= ARRAY_SIZE(amd_msr_dr_addr_masks))) + return 0; - if (cpu->x86_vendor != X86_VENDOR_AMD) - return false; + return per_cpu(amd_dr_addr_mask[dr], smp_processor_id()); +} +EXPORT_SYMBOL_FOR_KVM(amd_get_dr_addr_mask); + +static void zenbleed_check_cpu(void *unused) +{ + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); - if (osvw_id >= 0 && osvw_id < 65536 && - cpu_has(cpu, X86_FEATURE_OSVW)) { - u64 osvw_len; + zen2_zenbleed_check(c); +} - rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); - if (osvw_id < osvw_len) { - u64 osvw_bits; +void amd_check_microcode(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; - rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), - osvw_bits); - return osvw_bits & (1ULL << (osvw_id & 0x3f)); - } + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) + on_each_cpu(zenbleed_check_cpu, NULL, 1); +} + +static const char * const s5_reset_reason_txt[] = { + [0] = "thermal pin BP_THERMTRIP_L was tripped", + [1] = "power button was pressed for 4 seconds", + [2] = "shutdown pin was tripped", + [4] = "remote ASF power off command was received", + [9] = "internal CPU thermal limit was tripped", + [16] = "system reset pin BP_SYS_RST_L was tripped", + [17] = "software issued PCI reset", + [18] = "software wrote 0x4 to reset control register 0xCF9", + [19] = "software wrote 0x6 to reset control register 0xCF9", + [20] = "software wrote 0xE to reset control register 0xCF9", + [21] = "ACPI power state transition occurred", + [22] = "keyboard reset pin KB_RST_L was tripped", + [23] = "internal CPU shutdown event occurred", + [24] = "system failed to boot before failed boot timer expired", + [25] = "hardware watchdog timer expired", + [26] = "remote ASF reset command was received", + [27] = "an uncorrected error caused a data fabric sync flood event", + [29] = "FCH and MP1 failed warm reset handshake", + [30] = "a parity error occurred", + [31] = "a software sync flood event occurred", +}; + +static __init int print_s5_reset_status_mmio(void) +{ + void __iomem *addr; + u32 value; + int i; + + if (!cpu_feature_enabled(X86_FEATURE_ZEN)) + return 0; + + addr = ioremap(FCH_PM_BASE + FCH_PM_S5_RESET_STATUS, sizeof(value)); + if (!addr) + return 0; + + value = ioread32(addr); + + /* Value with "all bits set" is an error response and should be ignored. */ + if (value == U32_MAX) { + iounmap(addr); + return 0; } - /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_mask; - while ((range = *erratum++)) - if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && - (ms >= AMD_MODEL_RANGE_START(range)) && - (ms <= AMD_MODEL_RANGE_END(range))) - return true; + /* + * Clear all reason bits so they won't be retained if the next reset + * does not update the register. Besides, some bits are never cleared by + * hardware so it's software's responsibility to clear them. + * + * Writing the value back effectively clears all reason bits as they are + * write-1-to-clear. + */ + iowrite32(value, addr); + iounmap(addr); + + for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { + if (!(value & BIT(i))) + continue; + + if (s5_reset_reason_txt[i]) { + pr_info("x86/amd: Previous system reset reason [0x%08x]: %s\n", + value, s5_reset_reason_txt[i]); + } + } - return false; + return 0; } +late_initcall(print_s5_reset_status_mmio); |
