summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/bugs.c4
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c31
-rw-r--r--arch/x86/kernel/cpu/common.c57
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/hygon.c408
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c17
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h6
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c12
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c405
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c210
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c22
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c2
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
19 files changed, 961 insertions, 234 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 347137e80bf5..1f5d2291c31e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
+obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 40bdaea97fe7..b810cc239375 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -312,6 +312,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
}
if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON &&
boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
return SPECTRE_V2_CMD_AUTO;
@@ -371,7 +372,8 @@ static void __init spectre_v2_select_mitigation(void)
return;
retpoline_auto:
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
retpoline_amd:
if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 0c5fcbd998cf..dc1b9342e9c4 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -602,6 +602,10 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
else
amd_cpuid4(index, &eax, &ebx, &ecx);
amd_init_l3_cache(this_leaf, index);
+ } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+ cpuid_count(0x8000001d, index, &eax.full,
+ &ebx.full, &ecx.full, &edx);
+ amd_init_l3_cache(this_leaf, index);
} else {
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
}
@@ -625,7 +629,8 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
union _cpuid4_leaf_eax cache_eax;
int i = -1;
- if (c->x86_vendor == X86_VENDOR_AMD)
+ if (c->x86_vendor == X86_VENDOR_AMD ||
+ c->x86_vendor == X86_VENDOR_HYGON)
op = 0x8000001d;
else
op = 4;
@@ -678,6 +683,22 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
}
}
+void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
+{
+ /*
+ * We may have multiple LLCs if L3 caches exist, so check if we
+ * have an L3 cache by looking at the L3 cache CPUID leaf.
+ */
+ if (!cpuid_edx(0x80000006))
+ return;
+
+ /*
+ * LLC is at the core complex level.
+ * Core complex ID is ApicId[3] for these processors.
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+}
+
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
@@ -691,6 +712,11 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
+void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
+{
+ num_cache_leaves = find_num_cache_leaves(c);
+}
+
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
@@ -913,7 +939,8 @@ static void __cache_cpumap_setup(unsigned int cpu, int index,
int index_msb, i;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (c->x86_vendor == X86_VENDOR_AMD ||
+ c->x86_vendor == X86_VENDOR_HYGON) {
if (__cache_amd_cpumap_setup(cpu, index, base))
return;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 44c4ef3d989b..c519a079b3d5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -949,11 +949,11 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
}
static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY },
{ X86_VENDOR_CENTAUR, 5 },
{ X86_VENDOR_INTEL, 5 },
{ X86_VENDOR_NSC, 5 },
@@ -963,15 +963,16 @@ static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{ X86_VENDOR_AMD },
+ { X86_VENDOR_HYGON },
{}
};
/* Only list CPUs which speculate but are non susceptible to SSB */
static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
@@ -984,14 +985,14 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
/* in addition to cpu_no_speculation */
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
{}
@@ -1076,6 +1077,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
memset(&c->x86_capability, 0, sizeof c->x86_capability);
c->extended_cpuid_level = 0;
+ if (!have_cpuid_p())
+ identify_cpu_without_cpuid(c);
+
/* cyrix could have cpuid enabled via c_identify()*/
if (have_cpuid_p()) {
cpu_detect(c);
@@ -1093,7 +1097,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
} else {
- identify_cpu_without_cpuid(c);
setup_clear_cpu_cap(X86_FEATURE_CPUID);
}
@@ -1669,6 +1672,29 @@ static void wait_for_master_cpu(int cpu)
#endif
}
+#ifdef CONFIG_X86_64
+static void setup_getcpu(int cpu)
+{
+ unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
+ struct desc_struct d = { };
+
+ if (static_cpu_has(X86_FEATURE_RDTSCP))
+ write_rdtscp_aux(cpudata);
+
+ /* Store CPU and node number in limit. */
+ d.limit0 = cpudata;
+ d.limit1 = cpudata >> 16;
+
+ d.type = 5; /* RO data, expand down, accessed */
+ d.dpl = 3; /* Visible to user code */
+ d.s = 1; /* Not a system segment */
+ d.p = 1; /* Present */
+ d.d = 1; /* 32-bit */
+
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
+}
+#endif
+
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
@@ -1706,6 +1732,7 @@ void cpu_init(void)
early_cpu_to_node(cpu) != NUMA_NO_NODE)
set_numa_node(early_cpu_to_node(cpu));
#endif
+ setup_getcpu(cpu);
me = current;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 7b229afa0a37..da5446acc241 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -54,6 +54,7 @@ extern u32 get_scattered_cpuid_leaf(unsigned int level,
enum cpuid_regs_idx reg);
extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
+extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c);
extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 8949b7ae6d92..d12226f60168 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -437,7 +437,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c)
/* enable MAPEN */
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
/* enable cpuid */
- setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);
+ setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80);
/* disable MAPEN */
setCx86(CX86_CCR3, ccr3);
local_irq_restore(flags);
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
new file mode 100644
index 000000000000..cf25405444ab
--- /dev/null
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Hygon Processor Support for Linux
+ *
+ * Copyright (C) 2018 Chengdu Haiguang IC Design Co., Ltd.
+ *
+ * Author: Pu Wen <puwen@hygon.cn>
+ */
+#include <linux/io.h>
+
+#include <asm/cpu.h>
+#include <asm/smp.h>
+#include <asm/cacheinfo.h>
+#include <asm/spec-ctrl.h>
+#include <asm/delay.h>
+#ifdef CONFIG_X86_64
+# include <asm/set_memory.h>
+#endif
+
+#include "cpu.h"
+
+/*
+ * nodes_per_socket: Stores the number of nodes per socket.
+ * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8]
+ */
+static u32 nodes_per_socket = 1;
+
+#ifdef CONFIG_NUMA
+/*
+ * To workaround broken NUMA config. Read the comment in
+ * srat_detect_node().
+ */
+static int nearby_node(int apicid)
+{
+ int i, node;
+
+ for (i = apicid - 1; i >= 0; i--) {
+ node = __apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+ node = __apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
+static void hygon_get_topology_early(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_TOPOEXT))
+ smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
+}
+
+/*
+ * Fixup core topology information for
+ * (1) Hygon multi-node processors
+ * Assumption: Number of cores in each internal node is the same.
+ * (2) Hygon processors supporting compute units
+ */
+static void hygon_get_topology(struct cpuinfo_x86 *c)
+{
+ u8 node_id;
+ int cpu = smp_processor_id();
+
+ /* get information required for multi-node processors */
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ int err;
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+
+ node_id = ecx & 0xff;
+
+ c->cpu_core_id = ebx & 0xff;
+
+ if (smp_num_siblings > 1)
+ c->x86_max_cores /= smp_num_siblings;
+
+ /*
+ * In case leaf B is available, use it to derive
+ * topology information.
+ */
+ err = detect_extended_topology(c);
+ if (!err)
+ c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+
+ cacheinfo_hygon_init_llc_id(c, cpu, node_id);
+ } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
+ u64 value;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ node_id = value & 7;
+
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ } else
+ return;
+
+ if (nodes_per_socket > 1)
+ set_cpu_cap(c, X86_FEATURE_AMD_DCM);
+}
+
+/*
+ * On Hygon setup the lower bits of the APIC id distinguish the cores.
+ * Assumes number of cores is a power of two.
+ */
+static void hygon_detect_cmp(struct cpuinfo_x86 *c)
+{
+ unsigned int bits;
+ int cpu = smp_processor_id();
+
+ bits = c->x86_coreid_bits;
+ /* Low order bits define the core id (index of core in socket) */
+ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
+ /* Convert the initial APIC ID into the socket ID */
+ c->phys_proc_id = c->initial_apicid >> bits;
+ /* use socket ID also for last level cache */
+ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
+}
+
+static void srat_detect_node(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_NUMA
+ int cpu = smp_processor_id();
+ int node;
+ unsigned int apicid = c->apicid;
+
+ node = numa_cpu_node(cpu);
+ if (node == NUMA_NO_NODE)
+ node = per_cpu(cpu_llc_id, cpu);
+
+ /*
+ * On multi-fabric platform (e.g. Numascale NumaChip) a
+ * platform-specific handler needs to be called to fixup some
+ * IDs of the CPU.
+ */
+ if (x86_cpuinit.fixup_cpu_id)
+ x86_cpuinit.fixup_cpu_id(c, node);
+
+ if (!node_online(node)) {
+ /*
+ * Two possibilities here:
+ *
+ * - The CPU is missing memory and no node was created. In
+ * that case try picking one from a nearby CPU.
+ *
+ * - The APIC IDs differ from the HyperTransport node IDs.
+ * Assume they are all increased by a constant offset, but
+ * in the same order as the HT nodeids. If that doesn't
+ * result in a usable node fall back to the path for the
+ * previous case.
+ *
+ * This workaround operates directly on the mapping between
+ * APIC ID and NUMA node, assuming certain relationship
+ * between APIC ID, HT node ID and NUMA topology. As going
+ * through CPU mapping may alter the outcome, directly
+ * access __apicid_to_node[].
+ */
+ int ht_nodeid = c->initial_apicid;
+
+ if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = __apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
+ }
+ numa_set_node(cpu, node);
+#endif
+}
+
+static void early_init_hygon_mc(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ unsigned int bits, ecx;
+
+ /* Multi core CPU? */
+ if (c->extended_cpuid_level < 0x80000008)
+ return;
+
+ ecx = cpuid_ecx(0x80000008);
+
+ c->x86_max_cores = (ecx & 0xff) + 1;
+
+ /* CPU telling us the core id bits shift? */
+ bits = (ecx >> 12) & 0xF;
+
+ /* Otherwise recompute */
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
+
+ c->x86_coreid_bits = bits;
+#endif
+}
+
+static void bsp_init_hygon(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+ unsigned long long tseg;
+
+ /*
+ * Split up direct mapping around the TSEG SMM area.
+ * Don't do it for gbpages because there seems very little
+ * benefit in doing so.
+ */
+ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+ unsigned long pfn = tseg >> PAGE_SHIFT;
+
+ pr_debug("tseg: %010llx\n", tseg);
+ if (pfn_range_is_mapped(pfn, pfn + 1))
+ set_memory_4k((unsigned long)__va(tseg), 1);
+ }
+#endif
+
+ if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
+ u64 val;
+
+ rdmsrl(MSR_K7_HWCR, val);
+ if (!(val & BIT(24)))
+ pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
+ }
+
+ if (cpu_has(c, X86_FEATURE_MWAITX))
+ use_mwaitx_delay();
+
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ u32 ecx;
+
+ ecx = cpuid_ecx(0x8000001e);
+ nodes_per_socket = ((ecx >> 8) & 7) + 1;
+ } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ u64 value;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ nodes_per_socket = ((value >> 3) & 7) + 1;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
+ !boot_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ /*
+ * Try to cache the base value so further operations can
+ * avoid RMW. If that faults, do not enable SSBD.
+ */
+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
+ setup_force_cpu_cap(X86_FEATURE_SSBD);
+ x86_amd_ls_cfg_ssbd_mask = 1ULL << 10;
+ }
+ }
+}
+
+static void early_init_hygon(struct cpuinfo_x86 *c)
+{
+ u32 dummy;
+
+ early_init_hygon_mc(c);
+
+ set_cpu_cap(c, X86_FEATURE_K8);
+
+ rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
+ /*
+ * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+ * with P/T states and does not stop in deep C-states
+ */
+ if (c->x86_power & (1 << 8)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ }
+
+ /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
+ if (c->x86_power & BIT(12))
+ set_cpu_cap(c, X86_FEATURE_ACC_POWER);
+
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_SYSCALL32);
+#endif
+
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+ /*
+ * ApicID can always be treated as an 8-bit value for Hygon APIC So, we
+ * can safely set X86_FEATURE_EXTD_APICID unconditionally.
+ */
+ if (boot_cpu_has(X86_FEATURE_APIC))
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+#endif
+
+ /*
+ * This is only needed to tell the kernel whether to use VMCALL
+ * and VMMCALL. VMMCALL is never executed except under virt, so
+ * we can set it unconditionally.
+ */
+ set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
+ hygon_get_topology_early(c);
+}
+
+static void init_hygon(struct cpuinfo_x86 *c)
+{
+ early_init_hygon(c);
+
+ /*
+ * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+ */
+ clear_cpu_cap(c, 0*32+31);
+
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ /* get apicid instead of initial apic id from cpuid */
+ c->apicid = hard_smp_processor_id();
+
+ set_cpu_cap(c, X86_FEATURE_ZEN);
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
+ cpu_detect_cache_sizes(c);
+
+ hygon_detect_cmp(c);
+ hygon_get_topology(c);
+ srat_detect_node(c);
+
+ init_hygon_cacheinfo(c);
+
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
+ }
+
+ /*
+ * Hygon processors have APIC timer running in deep C states.
+ */
+ set_cpu_cap(c, X86_FEATURE_ARAT);
+
+ /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */
+ if (!cpu_has(c, X86_FEATURE_XENPV))
+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
+{
+ u32 ebx, eax, ecx, edx;
+ u16 mask = 0xfff;
+
+ if (c->extended_cpuid_level < 0x80000006)
+ return;
+
+ cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
+
+ tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
+ tlb_lli_4k[ENTRIES] = ebx & mask;
+
+ /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+ if (!((eax >> 16) & mask))
+ tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff;
+ else
+ tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
+
+ /* a 4M entry uses two 2M entries */
+ tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
+
+ /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+ if (!(eax & mask)) {
+ cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ tlb_lli_2m[ENTRIES] = eax & 0xff;
+ } else
+ tlb_lli_2m[ENTRIES] = eax & mask;
+
+ tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+}
+
+static const struct cpu_dev hygon_cpu_dev = {
+ .c_vendor = "Hygon",
+ .c_ident = { "HygonGenuine" },
+ .c_early_init = early_init_hygon,
+ .c_detect_tlb = cpu_detect_tlb_hygon,
+ .c_bsp_init = bsp_init_hygon,
+ .c_init = init_hygon,
+ .c_x86_vendor = X86_VENDOR_HYGON,
+};
+
+cpu_dev_register(hygon_cpu_dev);
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index abb71ac70443..44272b7107ad 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -485,9 +485,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
size_t tsize;
if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid),
- sizeof(unsigned long),
- GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
@@ -496,7 +494,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_total);
d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_total) {
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
}
}
@@ -504,7 +502,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_local);
d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_local) {
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
return -ENOMEM;
}
@@ -610,9 +608,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
+ /*
+ * rdt_domain "d" is going to be freed below, so clear
+ * its pointer from pseudo_lock_region struct.
+ */
+ if (d->plr)
+ d->plr->d = NULL;
+
kfree(d->ctrl_val);
kfree(d->mbps_val);
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
kfree(d);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 285eb3ec4200..3736f6dc9545 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- u32 _cbm, int closid, bool exclusive);
+ unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
- u32 cbm);
+ unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 0f53049719cd..27937458c231 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -404,8 +404,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
for_each_alloc_enabled_rdt_resource(r)
seq_printf(s, "%s:uninitialized\n", r->name);
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
- rdtgrp->plr->d->id, rdtgrp->plr->cbm);
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%s:%d=%x\n",
+ rdtgrp->plr->r->name,
+ rdtgrp->plr->d->id,
+ rdtgrp->plr->cbm);
+ }
} else {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index 40f3903ae5d9..815b4e92522c 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include <linux/kthread.h>
#include <linux/mman.h>
+#include <linux/perf_event.h>
#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -26,6 +27,7 @@
#include <asm/intel_rdt_sched.h>
#include <asm/perf_event.h>
+#include "../../events/perf_event.h" /* For X86_CONFIG() */
#include "intel_rdt.h"
#define CREATE_TRACE_POINTS
@@ -91,7 +93,7 @@ static u64 get_prefetch_disable_bits(void)
*/
return 0xF;
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
/*
* SDM defines bits of MSR_MISC_FEATURE_CONTROL register
* as:
@@ -106,16 +108,6 @@ static u64 get_prefetch_disable_bits(void)
return 0;
}
-/*
- * Helper to write 64bit value to MSR without tracing. Used when
- * use of the cache should be restricted and use of registers used
- * for local variables avoided.
- */
-static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
-{
- __wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
-}
-
/**
* pseudo_lock_minor_get - Obtain available minor number
* @minor: Pointer to where new minor number will be stored
@@ -797,25 +789,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
/**
* rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
* @d: RDT domain
- * @_cbm: CBM to test
+ * @cbm: CBM to test
*
- * @d represents a cache instance and @_cbm a capacity bitmask that is
- * considered for it. Determine if @_cbm overlaps with any existing
+ * @d represents a cache instance and @cbm a capacity bitmask that is
+ * considered for it. Determine if @cbm overlaps with any existing
* pseudo-locked region on @d.
*
- * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: true if @cbm overlaps with pseudo-locked region on @d, false
* otherwise.
*/
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
{
- unsigned long *cbm = (unsigned long *)&_cbm;
- unsigned long *cbm_b;
unsigned int cbm_len;
+ unsigned long cbm_b;
if (d->plr) {
cbm_len = d->plr->r->cache.cbm_len;
- cbm_b = (unsigned long *)&d->plr->cbm;
- if (bitmap_intersects(cbm, cbm_b, cbm_len))
+ cbm_b = d->plr->cbm;
+ if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
return true;
}
return false;
@@ -886,31 +880,14 @@ static int measure_cycles_lat_fn(void *_plr)
struct pseudo_lock_region *plr = _plr;
unsigned long i;
u64 start, end;
-#ifdef CONFIG_KASAN
- /*
- * The registers used for local register variables are also used
- * when KASAN is active. When KASAN is active we use a regular
- * variable to ensure we always use a valid pointer to access memory.
- * The cost is that accessing this pointer, which could be in
- * cache, will be included in the measurement of memory read latency.
- */
void *mem_r;
-#else
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_KASAN */
local_irq_disable();
/*
- * The wrmsr call may be reordered with the assignment below it.
- * Call wrmsr as directly as possible to avoid tracing clobbering
- * local register variable used for memory pointer.
+ * Disable hardware prefetchers.
*/
- __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- mem_r = plr->kmem;
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ mem_r = READ_ONCE(plr->kmem);
/*
* Dummy execute of the time measurement to load the needed
* instructions into the L1 instruction cache.
@@ -932,157 +909,240 @@ static int measure_cycles_lat_fn(void *_plr)
return 0;
}
-static int measure_cycles_perf_fn(void *_plr)
+/*
+ * Create a perf_event_attr for the hit and miss perf events that will
+ * be used during the performance measurement. A perf_event maintains
+ * a pointer to its perf_event_attr so a unique attribute structure is
+ * created for each perf_event.
+ *
+ * The actual configuration of the event is set right before use in order
+ * to use the X86_CONFIG macro.
+ */
+static struct perf_event_attr perf_miss_attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 0,
+ .exclude_user = 1,
+};
+
+static struct perf_event_attr perf_hit_attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 0,
+ .exclude_user = 1,
+};
+
+struct residency_counts {
+ u64 miss_before, hits_before;
+ u64 miss_after, hits_after;
+};
+
+static int measure_residency_fn(struct perf_event_attr *miss_attr,
+ struct perf_event_attr *hit_attr,
+ struct pseudo_lock_region *plr,
+ struct residency_counts *counts)
{
- unsigned long long l3_hits = 0, l3_miss = 0;
- u64 l3_hit_bits = 0, l3_miss_bits = 0;
- struct pseudo_lock_region *plr = _plr;
- unsigned long long l2_hits, l2_miss;
- u64 l2_hit_bits, l2_miss_bits;
- unsigned long i;
-#ifdef CONFIG_KASAN
- /*
- * The registers used for local register variables are also used
- * when KASAN is active. When KASAN is active we use regular variables
- * at the cost of including cache access latency to these variables
- * in the measurements.
- */
+ u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
+ struct perf_event *miss_event, *hit_event;
+ int hit_pmcnum, miss_pmcnum;
unsigned int line_size;
unsigned int size;
+ unsigned long i;
void *mem_r;
-#else
- register unsigned int line_size asm("esi");
- register unsigned int size asm("edi");
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_KASAN */
+ u64 tmp;
+
+ miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
+ NULL, NULL, NULL);
+ if (IS_ERR(miss_event))
+ goto out;
+
+ hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
+ NULL, NULL, NULL);
+ if (IS_ERR(hit_event))
+ goto out_miss;
+
+ local_irq_disable();
+ /*
+ * Check any possible error state of events used by performing
+ * one local read.
+ */
+ if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
+ local_irq_enable();
+ goto out_hit;
+ }
+ if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
+ local_irq_enable();
+ goto out_hit;
+ }
+
+ /*
+ * Disable hardware prefetchers.
+ */
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+
+ /* Initialize rest of local variables */
+ /*
+ * Performance event has been validated right before this with
+ * interrupts disabled - it is thus safe to read the counter index.
+ */
+ miss_pmcnum = x86_perf_rdpmc_index(miss_event);
+ hit_pmcnum = x86_perf_rdpmc_index(hit_event);
+ line_size = READ_ONCE(plr->line_size);
+ mem_r = READ_ONCE(plr->kmem);
+ size = READ_ONCE(plr->size);
+
+ /*
+ * Read counter variables twice - first to load the instructions
+ * used in L1 cache, second to capture accurate value that does not
+ * include cache misses incurred because of instruction loads.
+ */
+ rdpmcl(hit_pmcnum, hits_before);
+ rdpmcl(miss_pmcnum, miss_before);
+ /*
+ * From SDM: Performing back-to-back fast reads are not guaranteed
+ * to be monotonic.
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ rdpmcl(hit_pmcnum, hits_before);
+ rdpmcl(miss_pmcnum, miss_before);
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ for (i = 0; i < size; i += line_size) {
+ /*
+ * Add a barrier to prevent speculative execution of this
+ * loop reading beyond the end of the buffer.
+ */
+ rmb();
+ asm volatile("mov (%0,%1,1), %%eax\n\t"
+ :
+ : "r" (mem_r), "r" (i)
+ : "%eax", "memory");
+ }
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ rdpmcl(hit_pmcnum, hits_after);
+ rdpmcl(miss_pmcnum, miss_after);
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ /* Re-enable hardware prefetchers */
+ wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+ local_irq_enable();
+out_hit:
+ perf_event_release_kernel(hit_event);
+out_miss:
+ perf_event_release_kernel(miss_event);
+out:
+ /*
+ * All counts will be zero on failure.
+ */
+ counts->miss_before = miss_before;
+ counts->hits_before = hits_before;
+ counts->miss_after = miss_after;
+ counts->hits_after = hits_after;
+ return 0;
+}
+
+static int measure_l2_residency(void *_plr)
+{
+ struct pseudo_lock_region *plr = _plr;
+ struct residency_counts counts = {0};
/*
* Non-architectural event for the Goldmont Microarchitecture
* from Intel x86 Architecture Software Developer Manual (SDM):
* MEM_LOAD_UOPS_RETIRED D1H (event number)
* Umask values:
- * L1_HIT 01H
* L2_HIT 02H
- * L1_MISS 08H
* L2_MISS 10H
- *
- * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
- * has two "no fix" errata associated with it: BDM35 and BDM100. On
- * this platform we use the following events instead:
- * L2_RQSTS 24H (Documented in https://download.01.org/perfmon/BDW/)
- * REFERENCES FFH
- * MISS 3FH
- * LONGEST_LAT_CACHE 2EH (Documented in SDM)
- * REFERENCE 4FH
- * MISS 41H
*/
-
- /*
- * Start by setting flags for IA32_PERFEVTSELx:
- * OS (Operating system mode) 0x2
- * INT (APIC interrupt enable) 0x10
- * EN (Enable counter) 0x40
- *
- * Then add the Umask value and event number to select performance
- * event.
- */
-
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
- l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
- l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
- break;
- case INTEL_FAM6_BROADWELL_X:
- /* On BDW the l2_hit_bits count references, not hits */
- l2_hit_bits = (0x52ULL << 16) | (0xff << 8) | 0x24;
- l2_miss_bits = (0x52ULL << 16) | (0x3f << 8) | 0x24;
- /* On BDW the l3_hit_bits count references, not hits */
- l3_hit_bits = (0x52ULL << 16) | (0x4f << 8) | 0x2e;
- l3_miss_bits = (0x52ULL << 16) | (0x41 << 8) | 0x2e;
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
+ .umask = 0x10);
+ perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
+ .umask = 0x2);
break;
default:
goto out;
}
- local_irq_disable();
+ measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
/*
- * Call wrmsr direcly to avoid the local register variables from
- * being overwritten due to reordering of their assignment with
- * the wrmsr calls.
+ * If a failure prevented the measurements from succeeding
+ * tracepoints will still be written and all counts will be zero.
*/
- __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- /* Disable events and reset counters */
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 2, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 3, 0x0);
- }
- /* Set and enable the L2 counters */
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- l3_hit_bits);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- l3_miss_bits);
- }
- mem_r = plr->kmem;
- size = plr->size;
- line_size = plr->line_size;
- for (i = 0; i < size; i += line_size) {
- asm volatile("mov (%0,%1,1), %%eax\n\t"
- :
- : "r" (mem_r), "r" (i)
- : "%eax", "memory");
- }
+ trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
+ counts.miss_after - counts.miss_before);
+out:
+ plr->thread_done = 1;
+ wake_up_interruptible(&plr->lock_thread_wq);
+ return 0;
+}
+
+static int measure_l3_residency(void *_plr)
+{
+ struct pseudo_lock_region *plr = _plr;
+ struct residency_counts counts = {0};
+
/*
- * Call wrmsr directly (no tracing) to not influence
- * the cache access counters as they are disabled.
+ * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
+ * has two "no fix" errata associated with it: BDM35 and BDM100. On
+ * this platform the following events are used instead:
+ * LONGEST_LAT_CACHE 2EH (Documented in SDM)
+ * REFERENCE 4FH
+ * MISS 41H
*/
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
- l2_hit_bits & ~(0x40ULL << 16));
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
- l2_miss_bits & ~(0x40ULL << 16));
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- l3_hit_bits & ~(0x40ULL << 16));
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- l3_miss_bits & ~(0x40ULL << 16));
- }
- l2_hits = native_read_pmc(0);
- l2_miss = native_read_pmc(1);
- if (l3_hit_bits > 0) {
- l3_hits = native_read_pmc(2);
- l3_miss = native_read_pmc(3);
+
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_BROADWELL_X:
+ /* On BDW the hit event counts references, not hits */
+ perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
+ .umask = 0x4f);
+ perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
+ .umask = 0x41);
+ break;
+ default:
+ goto out;
}
- wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
- local_irq_enable();
+
+ measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
/*
- * On BDW we count references and misses, need to adjust. Sometimes
- * the "hits" counter is a bit more than the references, for
- * example, x references but x + 1 hits. To not report invalid
- * hit values in this case we treat that as misses eaqual to
- * references.
+ * If a failure prevented the measurements from succeeding
+ * tracepoints will still be written and all counts will be zero.
*/
- if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- l2_hits -= (l2_miss > l2_hits ? l2_hits : l2_miss);
- trace_pseudo_lock_l2(l2_hits, l2_miss);
- if (l3_hit_bits > 0) {
- if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- l3_hits -= (l3_miss > l3_hits ? l3_hits : l3_miss);
- trace_pseudo_lock_l3(l3_hits, l3_miss);
+
+ counts.miss_after -= counts.miss_before;
+ if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
+ /*
+ * On BDW references and misses are counted, need to adjust.
+ * Sometimes the "hits" counter is a bit more than the
+ * references, for example, x references but x + 1 hits.
+ * To not report invalid hit values in this case we treat
+ * that as misses equal to references.
+ */
+ /* First compute the number of cache references measured */
+ counts.hits_after -= counts.hits_before;
+ /* Next convert references to cache hits */
+ counts.hits_after -= min(counts.miss_after, counts.hits_after);
+ } else {
+ counts.hits_after -= counts.hits_before;
}
+ trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
out:
plr->thread_done = 1;
wake_up_interruptible(&plr->lock_thread_wq);
@@ -1114,6 +1174,11 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}
+ if (!plr->d) {
+ ret = -ENODEV;
+ goto out;
+ }
+
plr->thread_done = 0;
cpu = cpumask_first(&plr->d->cpu_mask);
if (!cpu_online(cpu)) {
@@ -1121,13 +1186,20 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}
+ plr->cpu = cpu;
+
if (sel == 1)
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
else if (sel == 2)
- thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
+ thread = kthread_create_on_node(measure_l2_residency, plr,
+ cpu_to_node(cpu),
+ "pseudo_lock_measure/%u",
+ cpu);
+ else if (sel == 3)
+ thread = kthread_create_on_node(measure_l3_residency, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
@@ -1171,7 +1243,7 @@ static ssize_t pseudo_lock_measure_trigger(struct file *file,
buf[buf_size] = '\0';
ret = kstrtoint(buf, 10, &sel);
if (ret == 0) {
- if (sel != 1)
+ if (sel != 1 && sel != 2 && sel != 3)
return -EINVAL;
ret = debugfs_file_get(file->f_path.dentry);
if (ret)
@@ -1427,6 +1499,11 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
plr = rdtgrp->plr;
+ if (!plr->d) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENODEV;
+ }
+
/*
* Task is required to run with affinity to the cpus associated
* with the pseudo-locked region. If this is not the case the task
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 1b8e86a5d5e1..f27b8115ffa2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -268,17 +268,27 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
+ struct cpumask *mask;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
- seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
- cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
- else
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ mask = &rdtgrp->plr->d->cpu_mask;
+ seq_printf(s, is_cpu_list(of) ?
+ "%*pbl\n" : "%*pb\n",
+ cpumask_pr_args(mask));
+ }
+ } else {
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->cpu_mask));
+ }
} else {
ret = -ENOENT;
}
@@ -961,7 +971,78 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
}
/**
- * rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
+ * rdt_cdp_peer_get - Retrieve CDP peer if it exists
+ * @r: RDT resource to which RDT domain @d belongs
+ * @d: Cache instance for which a CDP peer is requested
+ * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
+ * Used to return the result.
+ * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
+ * Used to return the result.
+ *
+ * RDT resources are managed independently and by extension the RDT domains
+ * (RDT resource instances) are managed independently also. The Code and
+ * Data Prioritization (CDP) RDT resources, while managed independently,
+ * could refer to the same underlying hardware. For example,
+ * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
+ *
+ * When provided with an RDT resource @r and an instance of that RDT
+ * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
+ * resource and the exact instance that shares the same hardware.
+ *
+ * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
+ * If a CDP peer was found, @r_cdp will point to the peer RDT resource
+ * and @d_cdp will point to the peer RDT domain.
+ */
+static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
+ struct rdt_resource **r_cdp,
+ struct rdt_domain **d_cdp)
+{
+ struct rdt_resource *_r_cdp = NULL;
+ struct rdt_domain *_d_cdp = NULL;
+ int ret = 0;
+
+ switch (r->rid) {
+ case RDT_RESOURCE_L3DATA:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
+ break;
+ case RDT_RESOURCE_L3CODE:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA];
+ break;
+ case RDT_RESOURCE_L2DATA:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE];
+ break;
+ case RDT_RESOURCE_L2CODE:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA];
+ break;
+ default:
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /*
+ * When a new CPU comes online and CDP is enabled then the new
+ * RDT domains (if any) associated with both CDP RDT resources
+ * are added in the same CPU online routine while the
+ * rdtgroup_mutex is held. It should thus not happen for one
+ * RDT domain to exist and be associated with its RDT CDP
+ * resource but there is no RDT domain associated with the
+ * peer RDT CDP resource. Hence the WARN.
+ */
+ _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
+ if (WARN_ON(!_d_cdp)) {
+ _r_cdp = NULL;
+ ret = -EINVAL;
+ }
+
+out:
+ *r_cdp = _r_cdp;
+ *d_cdp = _d_cdp;
+
+ return ret;
+}
+
+/**
+ * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
@@ -975,33 +1056,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
* is false then overlaps with any resource group or hardware entities
* will be considered.
*
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
* Return: false if CBM does not overlap, true if it does.
*/
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- u32 _cbm, int closid, bool exclusive)
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+ unsigned long cbm, int closid, bool exclusive)
{
- unsigned long *cbm = (unsigned long *)&_cbm;
- unsigned long *ctrl_b;
enum rdtgrp_mode mode;
+ unsigned long ctrl_b;
u32 *ctrl;
int i;
/* Check for any overlap with regions used by hardware directly */
if (!exclusive) {
- if (bitmap_intersects(cbm,
- (unsigned long *)&r->cache.shareable_bits,
- r->cache.cbm_len))
+ ctrl_b = r->cache.shareable_bits;
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
return true;
}
/* Check for overlap with other resource groups */
ctrl = d->ctrl_val;
for (i = 0; i < closids_supported(); i++, ctrl++) {
- ctrl_b = (unsigned long *)ctrl;
+ ctrl_b = *ctrl;
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
mode != RDT_MODE_PSEUDO_LOCKSETUP) {
- if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
if (exclusive) {
if (mode == RDT_MODE_EXCLUSIVE)
return true;
@@ -1016,6 +1098,41 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
}
/**
+ * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
+ * @r: Resource to which domain instance @d belongs.
+ * @d: The domain instance for which @closid is being tested.
+ * @cbm: Capacity bitmask being tested.
+ * @closid: Intended closid for @cbm.
+ * @exclusive: Only check if overlaps with exclusive resource groups
+ *
+ * Resources that can be allocated using a CBM can use the CBM to control
+ * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
+ * for overlap. Overlap test is not limited to the specific resource for
+ * which the CBM is intended though - when dealing with CDP resources that
+ * share the underlying hardware the overlap check should be performed on
+ * the CDP resource sharing the hardware also.
+ *
+ * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
+ * overlap test.
+ *
+ * Return: true if CBM overlap detected, false if there is no overlap
+ */
+bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+ unsigned long cbm, int closid, bool exclusive)
+{
+ struct rdt_resource *r_cdp;
+ struct rdt_domain *d_cdp;
+
+ if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
+ return true;
+
+ if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
+ return false;
+
+ return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
+}
+
+/**
* rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
*
* An exclusive resource group implies that there should be no sharing of
@@ -1138,15 +1255,18 @@ out:
* computed by first dividing the total cache size by the CBM length to
* determine how many bytes each bit in the bitmask represents. The result
* is multiplied with the number of bits set in the bitmask.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used to make the
+ * bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_domain *d, u32 cbm)
+ struct rdt_domain *d, unsigned long cbm)
{
struct cpu_cacheinfo *ci;
unsigned int size = 0;
int num_b, i;
- num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
+ num_b = bitmap_weight(&cbm, r->cache.cbm_len);
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == r->cache_level) {
@@ -1172,6 +1292,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
+ int ret = 0;
bool sep;
u32 ctrl;
@@ -1182,11 +1303,18 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
- size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
- rdtgrp->plr->d,
- rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%*s:", max_name_width,
+ rdtgrp->plr->r->name);
+ size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
+ rdtgrp->plr->d,
+ rdtgrp->plr->cbm);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ }
goto out;
}
@@ -1216,7 +1344,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
out:
rdtgroup_kn_unlock(of->kn);
- return 0;
+ return ret;
}
/* rdtgroup information files for one cache resource. */
@@ -2350,13 +2478,16 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
*/
static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
+ struct rdt_resource *r_cdp = NULL;
+ struct rdt_domain *d_cdp = NULL;
u32 used_b = 0, unused_b = 0;
u32 closid = rdtgrp->closid;
struct rdt_resource *r;
+ unsigned long tmp_cbm;
enum rdtgrp_mode mode;
struct rdt_domain *d;
+ u32 peer_ctl, *ctrl;
int i, ret;
- u32 *ctrl;
for_each_alloc_enabled_rdt_resource(r) {
/*
@@ -2366,6 +2497,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA)
continue;
list_for_each_entry(d, &r->domains, list) {
+ rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
d->have_new_ctrl = false;
d->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
@@ -2375,9 +2507,19 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
break;
- used_b |= *ctrl;
+ /*
+ * If CDP is active include peer
+ * domain's usage to ensure there
+ * is no overlap with an exclusive
+ * group.
+ */
+ if (d_cdp)
+ peer_ctl = d_cdp->ctrl_val[i];
+ else
+ peer_ctl = 0;
+ used_b |= *ctrl | peer_ctl;
if (mode == RDT_MODE_SHAREABLE)
- d->new_ctrl |= *ctrl;
+ d->new_ctrl |= *ctrl | peer_ctl;
}
}
if (d->plr && d->plr->cbm > 0)
@@ -2390,9 +2532,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
* modify the CBM based on system availability.
*/
cbm_ensure_valid(&d->new_ctrl, r);
- if (bitmap_weight((unsigned long *) &d->new_ctrl,
- r->cache.cbm_len) <
- r->cache.min_cbm_bits) {
+ /*
+ * Assign the u32 CBM to an unsigned long to ensure
+ * that bitmap_weight() does not access out-of-bound
+ * memory.
+ */
+ tmp_cbm = d->new_ctrl;
+ if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
+ r->cache.min_cbm_bits) {
rdt_last_cmd_printf("no space on %s:%d\n",
r->name, d->id);
return -ENOSPC;
@@ -2795,6 +2942,13 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
seq_puts(seq, ",cdp");
+
+ if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ seq_puts(seq, ",cdpl2");
+
+ if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ seq_puts(seq, ",mba_MBps");
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 97685a0c3175..27f394ac983f 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -38,9 +38,6 @@ static struct mce_log_buffer mcelog = {
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
-/* User mode helper program triggered by machine check event */
-extern char mce_helper[128];
-
static int dev_mce_log(struct notifier_block *nb, unsigned long val,
void *data)
{
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index c805a06e14c3..1fc424c40a31 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -108,6 +108,9 @@ static void setup_inj_struct(struct mce *m)
memset(m, 0, sizeof(struct mce));
m->cpuvendor = boot_cpu_data.x86_vendor;
+ m->time = ktime_get_real_seconds();
+ m->cpuid = cpuid_eax(1);
+ m->microcode = boot_cpu_data.microcode;
}
/* Update fake mce registers on current CPU. */
@@ -576,6 +579,9 @@ static int inj_bank_set(void *data, u64 val)
m->bank = val;
do_inject();
+ /* Reset injection struct */
+ setup_inj_struct(&i_mce);
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index f34d89c01edc..44396d521987 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -336,7 +336,8 @@ int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
void __init mcheck_vendor_init_severity(void)
{
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
mce_severity = mce_severity_amd;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 953b3ce92dcc..8cb3c02980cf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -270,7 +270,7 @@ static void print_mce(struct mce *m)
{
__print_mce(m);
- if (m->cpuvendor != X86_VENDOR_AMD)
+ if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
}
@@ -508,9 +508,9 @@ static int mce_usable_address(struct mce *m)
bool mce_is_memory_error(struct mce *m)
{
- if (m->cpuvendor == X86_VENDOR_AMD) {
+ if (m->cpuvendor == X86_VENDOR_AMD ||
+ m->cpuvendor == X86_VENDOR_HYGON) {
return amd_mce_is_memory_error(m);
-
} else if (m->cpuvendor == X86_VENDOR_INTEL) {
/*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
@@ -539,6 +539,9 @@ static bool mce_is_correctable(struct mce *m)
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
return false;
+ if (m->cpuvendor == X86_VENDOR_HYGON && m->status & MCI_STATUS_DEFERRED)
+ return false;
+
if (m->status & MCI_STATUS_UC)
return false;
@@ -1315,7 +1318,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
local_irq_disable();
ist_end_non_atomic();
} else {
- if (!fixup_exception(regs, X86_TRAP_MC))
+ if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
mce_panic("Failed kernel mode recovery", &m, NULL);
}
@@ -1705,7 +1708,7 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
*/
static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
{
- if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
@@ -1746,6 +1749,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_amd_feature_init(c);
break;
}
+
+ case X86_VENDOR_HYGON:
+ mce_hygon_feature_init(c);
+ break;
+
case X86_VENDOR_CENTAUR:
mce_centaur_feature_init(c);
break;
@@ -1971,12 +1979,14 @@ static void mce_disable_error_reporting(void)
static void vendor_disable_error_reporting(void)
{
/*
- * Don't clear on Intel or AMD CPUs. Some of these MSRs are socket-wide.
+ * Don't clear on Intel or AMD or Hygon CPUs. Some of these MSRs
+ * are socket-wide.
* Disabling them for just a single offlined CPU is bad, since it will
* inhibit reporting for all shared resources on the socket like the
* last level cache (LLC), the integrated memory controller (iMC), etc.
*/
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return;
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 765afd599039..3668c5df90c6 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -831,7 +831,8 @@ int __init amd_special_default_mtrr(void)
{
u32 l, h;
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
return 0;
if (boot_cpu_data.x86 < 0xf)
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 9a19c800fe40..507039c20128 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -127,7 +127,7 @@ static void __init set_num_var_ranges(void)
if (use_intel())
rdmsr(MSR_MTRRcap, config, dummy);
- else if (is_cpu(AMD))
+ else if (is_cpu(AMD) || is_cpu(HYGON))
config = 2;
else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
config = 8;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d389083330c5..9556930cd8c1 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
{
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
if (msr >= MSR_F15H_PERF_CTR)
return (msr - MSR_F15H_PERF_CTR) >> 1;
@@ -74,6 +75,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
{
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
if (msr >= MSR_F15H_PERF_CTL)
return (msr - MSR_F15H_PERF_CTL) >> 1;