summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c14
-rw-r--r--arch/x86/kernel/cpu/bugs.c32
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/debugfs.c1
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c30
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c107
-rw-r--r--arch/x86/kernel/cpu/mce/core.c216
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c11
-rw-r--r--arch/x86/kernel/cpu/mce/genpool.c18
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c6
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h4
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c51
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c10
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c23
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c56
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c10
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c3
-rw-r--r--arch/x86/kernel/cpu/topology_common.c34
22 files changed, 422 insertions, 232 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 015971adadfc..823f44f7bc94 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -924,6 +924,17 @@ static void init_amd_zen4(struct cpuinfo_x86 *c)
{
if (!cpu_has(c, X86_FEATURE_HYPERVISOR))
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+
+ /*
+ * These Zen4 SoCs advertise support for virtualized VMLOAD/VMSAVE
+ * in some BIOS versions but they can lead to random host reboots.
+ */
+ switch (c->x86_model) {
+ case 0x18 ... 0x1f:
+ case 0x60 ... 0x7f:
+ clear_cpu_cap(c, X86_FEATURE_V_VMSAVE_VMLOAD);
+ break;
+ }
}
static void init_amd_zen5(struct cpuinfo_x86 *c)
@@ -1202,5 +1213,6 @@ void amd_check_microcode(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return;
- on_each_cpu(zenbleed_check_cpu, NULL, 1);
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2))
+ on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d1915427b4ff..47a01d4028f6 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,8 +1115,25 @@ do_cmd_auto:
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+
+ /*
+ * IBPB on entry already obviates the need for
+ * software-based untraining so clear those in case some
+ * other mitigation like SRSO has selected them.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_UNRET);
+ setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
mitigate_smt = true;
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+
break;
case RETBLEED_MITIGATION_STUFF:
@@ -2627,6 +2644,14 @@ static void __init srso_select_mitigation(void)
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
srso_mitigation = SRSO_MITIGATION_IBPB;
+
+ /*
+ * IBPB on entry already obviates the need for
+ * software-based untraining so clear those in case some
+ * other mitigation like Retbleed has selected them.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_UNRET);
+ setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2638,6 +2663,13 @@ static void __init srso_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 07a34d723505..f7e5155f85dd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -69,6 +69,7 @@
#include <asm/sev.h>
#include <asm/tdx.h>
#include <asm/posted_intr.h>
+#include <asm/runtime-const.h>
#include "cpu.h"
@@ -1443,6 +1444,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
+ if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
+ setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1902,9 +1906,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Init Machine Check Exception if available. */
mcheck_cpu_init(c);
-#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
-#endif
}
/*
@@ -2085,8 +2087,10 @@ void syscall_init(void)
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+#ifndef CONFIG_SMP
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
+#endif
#endif /* CONFIG_X86_64 */
@@ -2386,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) {
+ unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1;
+
+ /*
+ * Enable this when LAM is gated on LASS support
+ if (cpu_feature_enabled(X86_FEATURE_LAM))
+ USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1;
+ */
+ runtime_const_init(ptr, USER_PTR_MAX);
+
/*
* Make sure the first 2MB area is not mapped by huge pages
* There are typically fixed size MTRRs in there and overlapping
diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c
index 3baf3e435834..10719aba6276 100644
--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
@@ -22,6 +22,7 @@ static int cpu_debug_show(struct seq_file *m, void *p)
seq_printf(m, "die_id: %u\n", c->topo.die_id);
seq_printf(m, "cu_id: %u\n", c->topo.cu_id);
seq_printf(m, "core_id: %u\n", c->topo.core_id);
+ seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c));
seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id);
seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id);
seq_printf(m, "llc_id: %u\n", c->topo.llc_id);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 14bf8c232e45..6ca80fff1fea 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -778,29 +778,33 @@ bool amd_mce_usable_address(struct mce *m)
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m = &err.m;
- mce_prep_record(&m);
+ mce_prep_record(&err);
- m.status = status;
- m.misc = misc;
- m.bank = bank;
- m.tsc = rdtsc();
+ m->status = status;
+ m->misc = misc;
+ m->bank = bank;
+ m->tsc = rdtsc();
- if (m.status & MCI_STATUS_ADDRV) {
- m.addr = addr;
+ if (m->status & MCI_STATUS_ADDRV) {
+ m->addr = addr;
- smca_extract_err_addr(&m);
+ smca_extract_err_addr(m);
}
if (mce_flags.smca) {
- rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
- if (m.status & MCI_STATUS_SYNDV)
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ if (m->status & MCI_STATUS_SYNDV) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
+ }
}
- mce_log(&m);
+ mce_log(&err);
}
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 3885fe05f01e..0a89947e47bc 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -28,7 +28,8 @@
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m;
int lsb;
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
@@ -44,31 +45,33 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
else
lsb = PAGE_SHIFT;
- mce_prep_record(&m);
- m.bank = -1;
+ mce_prep_record(&err);
+ m = &err.m;
+ m->bank = -1;
/* Fake a memory read error with unknown channel */
- m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
- m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
+ m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
+ m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
if (severity >= GHES_SEV_RECOVERABLE)
- m.status |= MCI_STATUS_UC;
+ m->status |= MCI_STATUS_UC;
if (severity >= GHES_SEV_PANIC) {
- m.status |= MCI_STATUS_PCC;
- m.tsc = rdtsc();
+ m->status |= MCI_STATUS_PCC;
+ m->tsc = rdtsc();
}
- m.addr = mem_err->physical_addr;
- mce_log(&m);
+ m->addr = mem_err->physical_addr;
+ mce_log(&err);
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
+ unsigned int cpu, num_regs;
bool apicid_found = false;
- unsigned int cpu;
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m;
if (!boot_cpu_has(X86_FEATURE_SMCA))
return -EINVAL;
@@ -86,16 +89,12 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
return -EINVAL;
/*
- * The register array size must be large enough to include all the
- * SMCA registers which need to be extracted.
- *
* The number of registers in the register array is determined by
* Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2.
- * The register layout is fixed and currently the raw data in the
- * register array includes 6 SMCA registers which the kernel can
- * extract.
+ * Sanity-check registers array size.
*/
- if (ctx_info->reg_arr_size < 48)
+ num_regs = ctx_info->reg_arr_size >> 3;
+ if (!num_regs)
return -EINVAL;
for_each_possible_cpu(cpu) {
@@ -108,18 +107,68 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
if (!apicid_found)
return -EINVAL;
- mce_prep_record_common(&m);
- mce_prep_record_per_cpu(cpu, &m);
+ m = &err.m;
+ memset(&err, 0, sizeof(struct mce_hw_err));
+ mce_prep_record_common(m);
+ mce_prep_record_per_cpu(cpu, m);
+
+ m->bank = (ctx_info->msr_addr >> 4) & 0xFF;
- m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
- m.status = *i_mce;
- m.addr = *(i_mce + 1);
- m.misc = *(i_mce + 2);
- /* Skipping MCA_CONFIG */
- m.ipid = *(i_mce + 4);
- m.synd = *(i_mce + 5);
+ /*
+ * The SMCA register layout is fixed and includes 16 registers.
+ * The end of the array may be variable, but the beginning is known.
+ * Cap the number of registers to expected max (15).
+ */
+ if (num_regs > 15)
+ num_regs = 15;
+
+ switch (num_regs) {
+ /* MCA_SYND2 */
+ case 15:
+ err.vendor.amd.synd2 = *(i_mce + 14);
+ fallthrough;
+ /* MCA_SYND1 */
+ case 14:
+ err.vendor.amd.synd1 = *(i_mce + 13);
+ fallthrough;
+ /* MCA_MISC4 */
+ case 13:
+ /* MCA_MISC3 */
+ case 12:
+ /* MCA_MISC2 */
+ case 11:
+ /* MCA_MISC1 */
+ case 10:
+ /* MCA_DEADDR */
+ case 9:
+ /* MCA_DESTAT */
+ case 8:
+ /* reserved */
+ case 7:
+ /* MCA_SYND */
+ case 6:
+ m->synd = *(i_mce + 5);
+ fallthrough;
+ /* MCA_IPID */
+ case 5:
+ m->ipid = *(i_mce + 4);
+ fallthrough;
+ /* MCA_CONFIG */
+ case 4:
+ /* MCA_MISC0 */
+ case 3:
+ m->misc = *(i_mce + 2);
+ fallthrough;
+ /* MCA_ADDR */
+ case 2:
+ m->addr = *(i_mce + 1);
+ fallthrough;
+ /* MCA_STATUS */
+ case 1:
+ m->status = *i_mce;
+ }
- mce_log(&m);
+ mce_log(&err);
return 0;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2a938f429c4d..7fb5556a0b53 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -88,7 +88,7 @@ struct mca_config mca_cfg __read_mostly = {
.monarch_timeout = -1
};
-static DEFINE_PER_CPU(struct mce, mces_seen);
+static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
static unsigned long mce_need_notify;
/*
@@ -119,8 +119,6 @@ BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
void mce_prep_record_common(struct mce *m)
{
- memset(m, 0, sizeof(struct mce));
-
m->cpuid = cpuid_eax(1);
m->cpuvendor = boot_cpu_data.x86_vendor;
m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
@@ -138,9 +136,12 @@ void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m)
m->socketid = topology_physical_package_id(cpu);
}
-/* Do initial initialization of a struct mce */
-void mce_prep_record(struct mce *m)
+/* Do initial initialization of struct mce_hw_err */
+void mce_prep_record(struct mce_hw_err *err)
{
+ struct mce *m = &err->m;
+
+ memset(err, 0, sizeof(struct mce_hw_err));
mce_prep_record_common(m);
mce_prep_record_per_cpu(smp_processor_id(), m);
}
@@ -148,9 +149,9 @@ void mce_prep_record(struct mce *m)
DEFINE_PER_CPU(struct mce, injectm);
EXPORT_PER_CPU_SYMBOL_GPL(injectm);
-void mce_log(struct mce *m)
+void mce_log(struct mce_hw_err *err)
{
- if (!mce_gen_pool_add(m))
+ if (!mce_gen_pool_add(err))
irq_work_queue(&mce_irq_work);
}
EXPORT_SYMBOL_GPL(mce_log);
@@ -171,8 +172,10 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
-static void __print_mce(struct mce *m)
+static void __print_mce(struct mce_hw_err *err)
{
+ struct mce *m = &err->m;
+
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
m->extcpu,
(m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
@@ -199,6 +202,10 @@ static void __print_mce(struct mce *m)
if (mce_flags.smca) {
if (m->synd)
pr_cont("SYND %llx ", m->synd);
+ if (err->vendor.amd.synd1)
+ pr_cont("SYND1 %llx ", err->vendor.amd.synd1);
+ if (err->vendor.amd.synd2)
+ pr_cont("SYND2 %llx ", err->vendor.amd.synd2);
if (m->ipid)
pr_cont("IPID %llx ", m->ipid);
}
@@ -214,9 +221,11 @@ static void __print_mce(struct mce *m)
m->microcode);
}
-static void print_mce(struct mce *m)
+static void print_mce(struct mce_hw_err *err)
{
- __print_mce(m);
+ struct mce *m = &err->m;
+
+ __print_mce(err);
if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
@@ -251,7 +260,7 @@ static const char *mce_dump_aux_info(struct mce *m)
return NULL;
}
-static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
+static noinstr void mce_panic(const char *msg, struct mce_hw_err *final, char *exp)
{
struct llist_node *pending;
struct mce_evt_llist *l;
@@ -282,20 +291,22 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
llist_for_each_entry(l, pending, llnode) {
- struct mce *m = &l->mce;
+ struct mce_hw_err *err = &l->err;
+ struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC)) {
- print_mce(m);
+ print_mce(err);
if (!apei_err)
apei_err = apei_write_mce(m);
}
}
/* Now print uncorrected but with the final one last */
llist_for_each_entry(l, pending, llnode) {
- struct mce *m = &l->mce;
+ struct mce_hw_err *err = &l->err;
+ struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || mce_cmp(m, final)) {
- print_mce(m);
+ if (!final || mce_cmp(m, &final->m)) {
+ print_mce(err);
if (!apei_err)
apei_err = apei_write_mce(m);
}
@@ -303,12 +314,12 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
if (final) {
print_mce(final);
if (!apei_err)
- apei_err = apei_write_mce(final);
+ apei_err = apei_write_mce(&final->m);
}
if (exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
- memmsg = mce_dump_aux_info(final);
+ memmsg = mce_dump_aux_info(&final->m);
if (memmsg)
pr_emerg(HW_ERR "Machine check: %s\n", memmsg);
@@ -323,9 +334,9 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
* panic.
*/
if (kexec_crash_loaded()) {
- if (final && (final->status & MCI_STATUS_ADDRV)) {
+ if (final && (final->m.status & MCI_STATUS_ADDRV)) {
struct page *p;
- p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
+ p = pfn_to_online_page(final->m.addr >> PAGE_SHIFT);
if (p)
SetPageHWPoison(p);
}
@@ -445,16 +456,18 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
* check into our "mce" struct so that we can use it later to assess
* the severity of the problem as we read per-bank specific details.
*/
-static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
+static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs)
{
+ struct mce *m;
/*
* Enable instrumentation around mce_prep_record() which calls external
* facilities.
*/
instrumentation_begin();
- mce_prep_record(m);
+ mce_prep_record(err);
instrumentation_end();
+ m = &err->m;
m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
if (regs) {
/*
@@ -574,13 +587,13 @@ EXPORT_SYMBOL_GPL(mce_is_correctable);
static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
- struct mce *m = (struct mce *)data;
+ struct mce_hw_err *err = to_mce_hw_err(data);
- if (!m)
+ if (!err)
return NOTIFY_DONE;
/* Emit the trace record: */
- trace_mce_record(m);
+ trace_mce_record(err);
set_bit(0, &mce_need_notify);
@@ -624,13 +637,13 @@ static struct notifier_block mce_uc_nb = {
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
- struct mce *m = (struct mce *)data;
+ struct mce_hw_err *err = to_mce_hw_err(data);
- if (!m)
+ if (!err)
return NOTIFY_DONE;
- if (mca_cfg.print_all || !m->kflags)
- __print_mce(m);
+ if (mca_cfg.print_all || !(err->m.kflags))
+ __print_mce(err);
return NOTIFY_DONE;
}
@@ -644,8 +657,10 @@ static struct notifier_block mce_default_nb = {
/*
* Read ADDR and MISC registers.
*/
-static noinstr void mce_read_aux(struct mce *m, int i)
+static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
{
+ struct mce *m = &err->m;
+
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
@@ -667,8 +682,11 @@ static noinstr void mce_read_aux(struct mce *m, int i)
if (mce_flags.smca) {
m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
- if (m->status & MCI_STATUS_SYNDV)
+ if (m->status & MCI_STATUS_SYNDV) {
m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+ err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i));
+ err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i));
+ }
}
}
@@ -692,26 +710,28 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m;
int i;
this_cpu_inc(mce_poll_count);
- mce_gather_info(&m, NULL);
+ mce_gather_info(&err, NULL);
+ m = &err.m;
if (flags & MCP_TIMESTAMP)
- m.tsc = rdtsc();
+ m->tsc = rdtsc();
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
+ m->misc = 0;
+ m->addr = 0;
+ m->bank = i;
barrier();
- m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
/*
* Update storm tracking here, before checking for the
@@ -721,17 +741,17 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* storm status.
*/
if (!mca_cfg.cmci_disabled)
- mce_track_storm(&m);
+ mce_track_storm(m);
/* If this entry is not valid, ignore it */
- if (!(m.status & MCI_STATUS_VAL))
+ if (!(m->status & MCI_STATUS_VAL))
continue;
/*
* If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it.
*/
- if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
+ if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
goto log_it;
/*
@@ -741,20 +761,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* everything else.
*/
if (!mca_cfg.ser) {
- if (m.status & MCI_STATUS_UC)
+ if (m->status & MCI_STATUS_UC)
continue;
goto log_it;
}
/* Log "not enabled" (speculative) errors */
- if (!(m.status & MCI_STATUS_EN))
+ if (!(m->status & MCI_STATUS_EN))
goto log_it;
/*
* Log UCNA (SDM: 15.6.3 "UCR Error Classification")
* UC == 1 && PCC == 0 && S == 0
*/
- if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
+ if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
goto log_it;
/*
@@ -768,20 +788,20 @@ log_it:
if (flags & MCP_DONTLOG)
goto clear_it;
- mce_read_aux(&m, i);
- m.severity = mce_severity(&m, NULL, NULL, false);
+ mce_read_aux(&err, i);
+ m->severity = mce_severity(m, NULL, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
+ if (mca_cfg.dont_log_ce && !mce_usable_address(m))
goto clear_it;
if (flags & MCP_QUEUE_LOG)
- mce_gen_pool_add(&m);
+ mce_gen_pool_add(&err);
else
- mce_log(&m);
+ mce_log(&err);
clear_it:
/*
@@ -905,9 +925,10 @@ static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_reg
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, unsigned long *validp,
struct pt_regs *regs)
{
+ struct mce *m = &err->m;
char *tmp = *msg;
int i;
@@ -925,7 +946,7 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo
m->bank = i;
if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
- mce_read_aux(m, i);
+ mce_read_aux(err, i);
*msg = tmp;
return 1;
}
@@ -1016,10 +1037,11 @@ out:
*/
static void mce_reign(void)
{
- int cpu;
+ struct mce_hw_err *err = NULL;
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
+ int cpu;
/*
* This CPU is the Monarch and the other CPUs have run
@@ -1027,11 +1049,13 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
- struct mce *mtmp = &per_cpu(mces_seen, cpu);
+ struct mce_hw_err *etmp = &per_cpu(hw_errs_seen, cpu);
+ struct mce *mtmp = &etmp->m;
if (mtmp->severity > global_worst) {
global_worst = mtmp->severity;
- m = &per_cpu(mces_seen, cpu);
+ err = &per_cpu(hw_errs_seen, cpu);
+ m = &err->m;
}
}
@@ -1043,7 +1067,7 @@ static void mce_reign(void)
if (m && global_worst >= MCE_PANIC_SEVERITY) {
/* call mce_severity() to get "msg" for panic */
mce_severity(m, NULL, &msg, true);
- mce_panic("Fatal machine check", m, msg);
+ mce_panic("Fatal machine check", err, msg);
}
/*
@@ -1060,11 +1084,11 @@ static void mce_reign(void)
mce_panic("Fatal machine check from unknown source", NULL, NULL);
/*
- * Now clear all the mces_seen so that they don't reappear on
+ * Now clear all the hw_errs_seen so that they don't reappear on
* the next mce.
*/
for_each_possible_cpu(cpu)
- memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
+ memset(&per_cpu(hw_errs_seen, cpu), 0, sizeof(struct mce_hw_err));
}
static atomic_t global_nwo;
@@ -1268,13 +1292,14 @@ static noinstr bool mce_check_crashing_cpu(void)
}
static __always_inline int
-__mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
- unsigned long *toclear, unsigned long *valid_banks, int no_way_out,
- int *worst)
+__mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs,
+ struct mce_hw_err *final, unsigned long *toclear,
+ unsigned long *valid_banks, int no_way_out, int *worst)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
int severity, i, taint = 0;
+ struct mce *m = &err->m;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
arch___clear_bit(i, toclear);
@@ -1319,7 +1344,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
if (severity == MCE_NO_SEVERITY)
continue;
- mce_read_aux(m, i);
+ mce_read_aux(err, i);
/* assuming valid severity level != 0 */
m->severity = severity;
@@ -1329,17 +1354,17 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
* done in #MC context, where instrumentation is disabled.
*/
instrumentation_begin();
- mce_log(m);
+ mce_log(err);
instrumentation_end();
if (severity > *worst) {
- *final = *m;
+ *final = *err;
*worst = severity;
}
}
/* mce_clear_state will clear *final, save locally for use later */
- *m = *final;
+ *err = *final;
return taint;
}
@@ -1399,9 +1424,10 @@ static void kill_me_never(struct callback_head *cb)
set_mce_nospec(pfn);
}
-static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
+static void queue_task_work(struct mce_hw_err *err, char *msg, void (*func)(struct callback_head *))
{
int count = ++current->mce_count;
+ struct mce *m = &err->m;
/* First call, save all the details */
if (count == 1) {
@@ -1414,11 +1440,12 @@ static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callba
/* Ten is likely overkill. Don't expect more than two faults before task_work() */
if (count > 10)
- mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
+ mce_panic("Too many consecutive machine checks while accessing user data",
+ err, msg);
/* Second or later call, make sure page address matches the one from first call */
if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
- mce_panic("Consecutive machine checks to different user pages", m, msg);
+ mce_panic("Consecutive machine checks to different user pages", err, msg);
/* Do not call task_work_add() more than once */
if (count > 1)
@@ -1467,8 +1494,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
- struct mce m, *final;
+ struct mce_hw_err *final;
+ struct mce_hw_err err;
char *msg = NULL;
+ struct mce *m;
if (unlikely(mce_flags.p5))
return pentium_machine_check(regs);
@@ -1506,13 +1535,14 @@ noinstr void do_machine_check(struct pt_regs *regs)
this_cpu_inc(mce_exception_count);
- mce_gather_info(&m, regs);
- m.tsc = rdtsc();
+ mce_gather_info(&err, regs);
+ m = &err.m;
+ m->tsc = rdtsc();
- final = this_cpu_ptr(&mces_seen);
- *final = m;
+ final = this_cpu_ptr(&hw_errs_seen);
+ *final = err;
- no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
+ no_way_out = mce_no_way_out(&err, &msg, valid_banks, regs);
barrier();
@@ -1521,15 +1551,15 @@ noinstr void do_machine_check(struct pt_regs *regs)
* Assume the worst for now, but if we find the
* severity is MCE_AR_SEVERITY we have other options.
*/
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
+ if (!(m->mcgstatus & MCG_STATUS_RIPV))
kill_current_task = 1;
/*
* Check if this MCE is signaled to only this logical processor,
* on Intel, Zhaoxin only.
*/
- if (m.cpuvendor == X86_VENDOR_INTEL ||
- m.cpuvendor == X86_VENDOR_ZHAOXIN)
- lmce = m.mcgstatus & MCG_STATUS_LMCES;
+ if (m->cpuvendor == X86_VENDOR_INTEL ||
+ m->cpuvendor == X86_VENDOR_ZHAOXIN)
+ lmce = m->mcgstatus & MCG_STATUS_LMCES;
/*
* Local machine check may already know that we have to panic.
@@ -1540,12 +1570,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
*/
if (lmce) {
if (no_way_out)
- mce_panic("Fatal local machine check", &m, msg);
+ mce_panic("Fatal local machine check", &err, msg);
} else {
order = mce_start(&no_way_out);
}
- taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
+ taint = __mc_scan_banks(&err, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1560,7 +1590,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
no_way_out = worst >= MCE_PANIC_SEVERITY;
if (no_way_out)
- mce_panic("Fatal machine check on current CPU", &m, msg);
+ mce_panic("Fatal machine check on current CPU", &err, msg);
}
} else {
/*
@@ -1572,8 +1602,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY) {
- mce_severity(&m, regs, &msg, true);
- mce_panic("Local fatal machine check!", &m, msg);
+ mce_severity(m, regs, &msg, true);
+ mce_panic("Local fatal machine check!", &err, msg);
}
}
@@ -1591,16 +1621,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
goto out;
/* Fault was in user mode and we need to take some action */
- if ((m.cs & 3) == 3) {
+ if ((m->cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- if (!mce_usable_address(&m))
- queue_task_work(&m, msg, kill_me_now);
+ if (!mce_usable_address(m))
+ queue_task_work(&err, msg, kill_me_now);
else
- queue_task_work(&m, msg, kill_me_maybe);
+ queue_task_work(&err, msg, kill_me_maybe);
- } else if (m.mcgstatus & MCG_STATUS_SEAM_NR) {
+ } else if (m->mcgstatus & MCG_STATUS_SEAM_NR) {
/*
* Saved RIP on stack makes it look like the machine check
* was taken in the kernel on the instruction following
@@ -1612,8 +1642,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* not occur there. Mark the page as poisoned so it won't
* be added to free list when the guest is terminated.
*/
- if (mce_usable_address(&m)) {
- struct page *p = pfn_to_online_page(m.addr >> PAGE_SHIFT);
+ if (mce_usable_address(m)) {
+ struct page *p = pfn_to_online_page(m->addr >> PAGE_SHIFT);
if (p)
SetPageHWPoison(p);
@@ -1628,13 +1658,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
* corresponding exception handler which would do that is the
* proper one.
*/
- if (m.kflags & MCE_IN_KERNEL_RECOV) {
+ if (m->kflags & MCE_IN_KERNEL_RECOV) {
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
- mce_panic("Failed kernel mode recovery", &m, msg);
+ mce_panic("Failed kernel mode recovery", &err, msg);
}
- if (m.kflags & MCE_IN_KERNEL_COPYIN)
- queue_task_work(&m, msg, kill_me_never);
+ if (m->kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&err, msg, kill_me_never);
}
out:
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index af44fd5dbd7c..8d023239ce18 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -264,15 +264,8 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN:
return put_user(mcelog->len, p);
- case MCE_GETCLEAR_FLAGS: {
- unsigned flags;
-
- do {
- flags = mcelog->flags;
- } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
-
- return put_user(flags, p);
- }
+ case MCE_GETCLEAR_FLAGS:
+ return put_user(xchg(&mcelog->flags, 0), p);
default:
return -ENOTTY;
}
diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c
index 4284749ec803..d0be6dda0c14 100644
--- a/arch/x86/kernel/cpu/mce/genpool.c
+++ b/arch/x86/kernel/cpu/mce/genpool.c
@@ -31,15 +31,15 @@ static LLIST_HEAD(mce_event_llist);
*/
static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
{
+ struct mce_hw_err *err1, *err2;
struct mce_evt_llist *node;
- struct mce *m1, *m2;
- m1 = &t->mce;
+ err1 = &t->err;
llist_for_each_entry(node, &l->llnode, llnode) {
- m2 = &node->mce;
+ err2 = &node->err;
- if (!mce_cmp(m1, m2))
+ if (!mce_cmp(&err1->m, &err2->m))
return true;
}
return false;
@@ -73,8 +73,8 @@ struct llist_node *mce_gen_pool_prepare_records(void)
void mce_gen_pool_process(struct work_struct *__unused)
{
- struct llist_node *head;
struct mce_evt_llist *node, *tmp;
+ struct llist_node *head;
struct mce *mce;
head = llist_del_all(&mce_event_llist);
@@ -83,7 +83,7 @@ void mce_gen_pool_process(struct work_struct *__unused)
head = llist_reverse_order(head);
llist_for_each_entry_safe(node, tmp, head, llnode) {
- mce = &node->mce;
+ mce = &node->err.m;
blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
}
@@ -94,11 +94,11 @@ bool mce_gen_pool_empty(void)
return llist_empty(&mce_event_llist);
}
-int mce_gen_pool_add(struct mce *mce)
+int mce_gen_pool_add(struct mce_hw_err *err)
{
struct mce_evt_llist *node;
- if (filter_mce(mce))
+ if (filter_mce(&err->m))
return -EINVAL;
if (!mce_evt_pool)
@@ -110,7 +110,7 @@ int mce_gen_pool_add(struct mce *mce)
return -ENOMEM;
}
- memcpy(&node->mce, mce, sizeof(*mce));
+ memcpy(&node->err, err, sizeof(*err));
llist_add(&node->llnode, &mce_event_llist);
return 0;
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 49ed3428785d..313fe682db33 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -502,8 +502,9 @@ static void prepare_msrs(void *info)
static void do_inject(void)
{
- u64 mcg_status = 0;
unsigned int cpu = i_mce.extcpu;
+ struct mce_hw_err err;
+ u64 mcg_status = 0;
u8 b = i_mce.bank;
i_mce.tsc = rdtsc_ordered();
@@ -517,7 +518,8 @@ static void do_inject(void)
i_mce.status |= MCI_STATUS_SYNDV;
if (inj_type == SW_INJ) {
- mce_log(&i_mce);
+ err.m = i_mce;
+ mce_log(&err);
return;
}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f6103e6bf69a..b3cd2c61b11d 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -94,7 +94,7 @@ static int cmci_supported(int *banks)
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
- *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
+ *banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
return !!(cap & MCG_CMCI_P);
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 43c7f3b71df5..84f810598231 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -26,12 +26,12 @@ extern struct blocking_notifier_head x86_mce_decoder_chain;
struct mce_evt_llist {
struct llist_node llnode;
- struct mce mce;
+ struct mce_hw_err err;
};
void mce_gen_pool_process(struct work_struct *__unused);
bool mce_gen_pool_empty(void);
-int mce_gen_pool_add(struct mce *mce);
+int mce_gen_pool_add(struct mce_hw_err *err);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index f63b051f25a0..31a73715d755 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -584,7 +584,7 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_
native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
}
-static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
+static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size);
static int __init save_microcode_in_initrd(void)
{
@@ -605,7 +605,7 @@ static int __init save_microcode_in_initrd(void)
if (!desc.mc)
return -EINVAL;
- ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
+ ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
if (ret > UCODE_UPDATED)
return -EINVAL;
@@ -613,16 +613,19 @@ static int __init save_microcode_in_initrd(void)
}
early_initcall(save_microcode_in_initrd);
-static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n)
+static inline bool patch_cpus_equivalent(struct ucode_patch *p,
+ struct ucode_patch *n,
+ bool ignore_stepping)
{
/* Zen and newer hardcode the f/m/s in the patch ID */
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
union cpuid_1_eax p_cid = ucode_rev_to_cpuid(p->patch_id);
union cpuid_1_eax n_cid = ucode_rev_to_cpuid(n->patch_id);
- /* Zap stepping */
- p_cid.stepping = 0;
- n_cid.stepping = 0;
+ if (ignore_stepping) {
+ p_cid.stepping = 0;
+ n_cid.stepping = 0;
+ }
return p_cid.full == n_cid.full;
} else {
@@ -644,13 +647,13 @@ static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equi
WARN_ON_ONCE(!n.patch_id);
list_for_each_entry(p, &microcode_cache, plist)
- if (patch_cpus_equivalent(p, &n))
+ if (patch_cpus_equivalent(p, &n, false))
return p;
return NULL;
}
-static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
+static inline int patch_newer(struct ucode_patch *p, struct ucode_patch *n)
{
/* Zen and newer hardcode the f/m/s in the patch ID */
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
@@ -659,6 +662,9 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
zp.ucode_rev = p->patch_id;
zn.ucode_rev = n->patch_id;
+ if (zn.stepping != zp.stepping)
+ return -1;
+
return zn.rev > zp.rev;
} else {
return n->patch_id > p->patch_id;
@@ -668,10 +674,14 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
static void update_cache(struct ucode_patch *new_patch)
{
struct ucode_patch *p;
+ int ret;
list_for_each_entry(p, &microcode_cache, plist) {
- if (patch_cpus_equivalent(p, new_patch)) {
- if (!patch_newer(p, new_patch)) {
+ if (patch_cpus_equivalent(p, new_patch, true)) {
+ ret = patch_newer(p, new_patch);
+ if (ret < 0)
+ continue;
+ else if (!ret) {
/* we already have the latest patch */
kfree(new_patch->data);
kfree(new_patch);
@@ -944,21 +954,30 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
return UCODE_OK;
}
-static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size)
{
- struct cpuinfo_x86 *c;
- unsigned int nid, cpu;
- struct ucode_patch *p;
enum ucode_state ret;
/* free old equiv table */
free_equiv_cpu_table();
ret = __load_microcode_amd(family, data, size);
- if (ret != UCODE_OK) {
+ if (ret != UCODE_OK)
cleanup();
+
+ return ret;
+}
+
+static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+{
+ struct cpuinfo_x86 *c;
+ unsigned int nid, cpu;
+ struct ucode_patch *p;
+ enum ucode_state ret;
+
+ ret = _load_microcode_amd(family, data, size);
+ if (ret != UCODE_OK)
return ret;
- }
for_each_node(nid) {
cpu = cpumask_first(cpumask_of_node(nid));
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 815fa67356a2..f3d534807d91 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -319,12 +319,6 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
return UCODE_OK;
}
- /*
- * Writeback and invalidate caches before updating microcode to avoid
- * internal issues depending on what the microcode is updating.
- */
- native_wbinvd();
-
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
@@ -574,14 +568,14 @@ static bool is_blacklisted(unsigned int cpu)
/*
* Late loading on model 79 with microcode revision less than 0x0b000021
* and LLC size per core bigger than 2.5MB may result in a system hang.
- * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * This behavior is documented in item BDX90, #334165 (Intel Xeon
* Processor E7-8800/4800 v4 Product Family).
*/
if (c->x86_vfm == INTEL_BROADWELL_X &&
c->x86_stepping == 0x01 &&
llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
- pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Erratum BDX90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 8591d53c144b..b681c2e07dbf 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -207,7 +207,7 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r)
return false;
}
-static bool __get_mem_config_intel(struct rdt_resource *r)
+static __init bool __get_mem_config_intel(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax;
@@ -241,7 +241,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
return true;
}
-static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
+static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
u32 eax, ebx, ecx, edx, subleaf;
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 50fa1fe9a073..200d89a64027 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -29,10 +29,10 @@
* hardware. The allocated bandwidth percentage is rounded to the next
* control step available on the hardware.
*/
-static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
+static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
{
- unsigned long bw;
int ret;
+ u32 bw;
/*
* Only linear delay values is supported for current Intel SKUs.
@@ -42,16 +42,21 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return false;
}
- ret = kstrtoul(buf, 10, &bw);
+ ret = kstrtou32(buf, 10, &bw);
if (ret) {
- rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
+ rdt_last_cmd_printf("Invalid MB value %s\n", buf);
return false;
}
- if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
- !is_mba_sc(r)) {
- rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
- r->membw.min_bw, r->default_ctrl);
+ /* Nothing else to do if software controller is enabled. */
+ if (is_mba_sc(r)) {
+ *data = bw;
+ return true;
+ }
+
+ if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
+ bw, r->membw.min_bw, r->default_ctrl);
return false;
}
@@ -65,7 +70,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct resctrl_staged_config *cfg;
u32 closid = data->rdtgrp->closid;
struct rdt_resource *r = s->res;
- unsigned long bw_val;
+ u32 bw_val;
cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 851b561850e0..5fcb3d635d91 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -1158,11 +1158,12 @@ static __init int snc_get_config(void)
ret = cpus_per_l3 / cpus_per_node;
- /* sanity check: Only valid results are 1, 2, 3, 4 */
+ /* sanity check: Only valid results are 1, 2, 3, 4, 6 */
switch (ret) {
case 1:
break;
case 2 ... 4:
+ case 6:
pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
break;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index d7163b764c62..d906a1cd8491 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1596,7 +1596,7 @@ static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info
static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
{
- struct mon_config_info mon_info = {0};
+ struct mon_config_info mon_info;
struct rdt_mon_domain *dom;
bool sep = false;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c84c30188fdf..16f3ca30626a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,34 +24,36 @@ struct cpuid_bit {
* levels are different and there is a separate entry for each.
*/
static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
- { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
- { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
- { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
- { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
- { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
- { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
- { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
- { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
- { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
- { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
- { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
- { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
- { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
- { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
- { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
- { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 },
- { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
- { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
- { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
- { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
- { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
- { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
- { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
- { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
- { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
+ { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
+ { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
+ { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
+ { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
+ { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
+ { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
+ { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
+ { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
+ { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
+ { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 },
+ { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
+ { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+ { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
+ { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
+ { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
+ { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 },
+ { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
+ { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 9ace84486499..eb5848d1851a 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -901,19 +901,15 @@ static struct miscdevice sgx_dev_provision = {
int sgx_set_attribute(unsigned long *allowed_attributes,
unsigned int attribute_fd)
{
- struct fd f = fdget(attribute_fd);
+ CLASS(fd, f)(attribute_fd);
- if (!fd_file(f))
+ if (fd_empty(f))
return -EINVAL;
- if (fd_file(f)->f_op != &sgx_provision_fops) {
- fdput(f);
+ if (fd_file(f)->f_op != &sgx_provision_fops)
return -EINVAL;
- }
*allowed_attributes |= SGX_ATTR_PROVISIONKEY;
-
- fdput(f);
return 0;
}
EXPORT_SYMBOL_GPL(sgx_set_attribute);
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 7d476fa697ca..03b3c9c3a45e 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -182,6 +182,9 @@ static void parse_topology_amd(struct topo_scan *tscan)
if (cpu_feature_enabled(X86_FEATURE_TOPOEXT))
has_topoext = cpu_parse_topology_ext(tscan);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES))
+ tscan->c->topo.cpu_type = cpuid_ebx(0x80000026);
+
if (!has_topoext && !parse_8000_0008(tscan))
return;
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index 9a6069e7133c..8277c64f88db 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -3,6 +3,7 @@
#include <xen/xen.h>
+#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/smp.h>
@@ -27,6 +28,36 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
}
}
+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c)
+{
+ if (c->x86_vendor == X86_VENDOR_INTEL) {
+ switch (c->topo.intel_type) {
+ case INTEL_CPU_TYPE_ATOM: return TOPO_CPU_TYPE_EFFICIENCY;
+ case INTEL_CPU_TYPE_CORE: return TOPO_CPU_TYPE_PERFORMANCE;
+ }
+ }
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ switch (c->topo.amd_type) {
+ case 0: return TOPO_CPU_TYPE_PERFORMANCE;
+ case 1: return TOPO_CPU_TYPE_EFFICIENCY;
+ }
+ }
+
+ return TOPO_CPU_TYPE_UNKNOWN;
+}
+
+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c)
+{
+ switch (get_topology_cpu_type(c)) {
+ case TOPO_CPU_TYPE_PERFORMANCE:
+ return "performance";
+ case TOPO_CPU_TYPE_EFFICIENCY:
+ return "efficiency";
+ default:
+ return "unknown";
+ }
+}
+
static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c)
{
struct {
@@ -87,6 +118,7 @@ static void parse_topology(struct topo_scan *tscan, bool early)
.cu_id = 0xff,
.llc_id = BAD_APICID,
.l2c_id = BAD_APICID,
+ .cpu_type = TOPO_CPU_TYPE_UNKNOWN,
};
struct cpuinfo_x86 *c = tscan->c;
struct {
@@ -132,6 +164,8 @@ static void parse_topology(struct topo_scan *tscan, bool early)
case X86_VENDOR_INTEL:
if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan))
parse_legacy(tscan);
+ if (c->cpuid_level >= 0x1a)
+ c->topo.cpu_type = cpuid_eax(0x1a);
break;
case X86_VENDOR_HYGON:
if (IS_ENABLED(CONFIG_CPU_SUP_HYGON))