summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mce
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mce')
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c163
-rw-r--r--arch/x86/kernel/cpu/mce/core.c315
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c18
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h9
4 files changed, 232 insertions, 273 deletions
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5c4eb28c3ac9..d6906442f49b 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -241,7 +241,8 @@ struct threshold_block {
struct threshold_bank {
struct kobject *kobj;
- struct threshold_block *blocks;
+ /* List of threshold blocks within this MCA bank. */
+ struct list_head miscj;
};
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
@@ -252,9 +253,6 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
*/
static DEFINE_PER_CPU(u64, bank_map);
-/* Map of banks that have more than MCA_MISC0 available. */
-static DEFINE_PER_CPU(u64, smca_misc_banks_map);
-
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@@ -264,28 +262,6 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
-static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
-{
- u32 low, high;
-
- /*
- * For SMCA enabled processors, BLKPTR field of the first MISC register
- * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
- */
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return;
-
- if (!(low & MCI_CONFIG_MCAX))
- return;
-
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
- return;
-
- if (low & MASK_BLKPTR_LO)
- per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
-
-}
-
static void smca_configure(unsigned int bank, unsigned int cpu)
{
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
@@ -326,8 +302,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}
- smca_set_misc_banks_map(bank, cpu);
-
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
@@ -419,8 +393,8 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
return true;
};
-/* Reprogram MCx_MISC MSR behind this threshold bank. */
-static void threshold_restart_bank(void *_tr)
+/* Reprogram MCx_MISC MSR behind this threshold block. */
+static void threshold_restart_block(void *_tr)
{
struct thresh_restart *tr = _tr;
u32 hi, lo;
@@ -478,7 +452,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
};
b->threshold_limit = THRESHOLD_MAX;
- threshold_restart_bank(&tr);
+ threshold_restart_block(&tr);
};
static int setup_APIC_mce_threshold(int reserved, int new)
@@ -525,18 +499,6 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int bank, unsigned int block,
- unsigned int cpu)
-{
- if (!block)
- return MSR_AMD64_SMCA_MCx_MISC(bank);
-
- if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
- return 0;
-
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
-}
-
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block,
unsigned int cpu)
@@ -546,8 +508,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;
- if (mce_flags.smca)
- return smca_get_block_address(bank, block, cpu);
+ if (mce_flags.smca) {
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+ if (!(low & MASK_BLKPTR_LO))
+ return 0;
+
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ }
/* Fall back to method we used for older processors: */
switch (block) {
@@ -677,6 +646,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
wrmsrq(MSR_K7_HWCR, hwcr);
}
+static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+ /* This should be disabled by the BIOS, but isn't always */
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
+ /*
+ * disable GART TBL walk error reporting, which
+ * trips off incorrectly with the IOMMU & 3ware
+ * & Cerberus:
+ */
+ clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
+ }
+
+ /*
+ * Various K7s with broken bank 0 around. Always disable
+ * by default.
+ */
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks))
+ mce_banks[0].ctl = 0;
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -684,6 +675,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
u32 low = 0, high = 0, address = 0;
int offset = -1;
+ amd_apply_cpu_quirks(c);
+
+ mce_flags.amd_threshold = 1;
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (mce_flags.smca)
@@ -714,6 +708,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
+void smca_bsp_init(void)
+{
+ mce_threshold_vector = amd_threshold_interrupt;
+ deferred_error_int_vector = amd_deferred_error_interrupt;
+}
+
/*
* DRAM ECC errors are reported in the Northbridge (bank 4) with
* Extended Error Code 8.
@@ -921,7 +921,7 @@ static void log_and_reset_block(struct threshold_block *block)
/* Reset threshold block after logging error. */
memset(&tr, 0, sizeof(tr));
tr.b = block;
- threshold_restart_bank(&tr);
+ threshold_restart_block(&tr);
}
/*
@@ -930,9 +930,9 @@ static void log_and_reset_block(struct threshold_block *block)
*/
static void amd_threshold_interrupt(void)
{
- struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
- struct threshold_bank **bp = this_cpu_read(threshold_banks);
+ struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank;
unsigned int bank, cpu = smp_processor_id();
+ struct threshold_block *block, *tmp;
/*
* Validate that the threshold bank has been initialized already. The
@@ -946,20 +946,20 @@ static void amd_threshold_interrupt(void)
if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
continue;
- first_block = bp[bank]->blocks;
- if (!first_block)
+ thr_bank = bp[bank];
+ if (!thr_bank)
continue;
- /*
- * The first block is also the head of the list. Check it first
- * before iterating over the rest.
- */
- log_and_reset_block(first_block);
- list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
+ list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj)
log_and_reset_block(block);
}
}
+void amd_clear_bank(struct mce *m)
+{
+ mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
+}
+
/*
* Sysfs Interface
*/
@@ -995,7 +995,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
memset(&tr, 0, sizeof(tr));
tr.b = b;
- if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@@ -1020,7 +1020,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
b->threshold_limit = new;
tr.b = b;
- if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@@ -1181,13 +1181,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
default_attrs[2] = NULL;
}
- INIT_LIST_HEAD(&b->miscj);
-
- /* This is safe as @tb is not visible yet */
- if (tb->blocks)
- list_add(&b->miscj, &tb->blocks->miscj);
- else
- tb->blocks = b;
+ list_add(&b->miscj, &tb->miscj);
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
if (err)
@@ -1238,6 +1232,8 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
goto out_free;
}
+ INIT_LIST_HEAD(&b->miscj);
+
err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
if (err)
goto out_kobj;
@@ -1258,26 +1254,15 @@ static void threshold_block_release(struct kobject *kobj)
kfree(to_block(kobj));
}
-static void deallocate_threshold_blocks(struct threshold_bank *bank)
+static void threshold_remove_bank(struct threshold_bank *bank)
{
struct threshold_block *pos, *tmp;
- list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) {
+ list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) {
list_del(&pos->miscj);
kobject_put(&pos->kobj);
}
- kobject_put(&bank->blocks->kobj);
-}
-
-static void threshold_remove_bank(struct threshold_bank *bank)
-{
- if (!bank->blocks)
- goto out_free;
-
- deallocate_threshold_blocks(bank);
-
-out_free:
kobject_put(bank->kobj);
kfree(bank);
}
@@ -1296,12 +1281,12 @@ static void __threshold_remove_device(struct threshold_bank **bp)
kfree(bp);
}
-int mce_threshold_remove_device(unsigned int cpu)
+void mce_threshold_remove_device(unsigned int cpu)
{
struct threshold_bank **bp = this_cpu_read(threshold_banks);
if (!bp)
- return 0;
+ return;
/*
* Clear the pointer before cleaning up, so that the interrupt won't
@@ -1310,7 +1295,7 @@ int mce_threshold_remove_device(unsigned int cpu)
this_cpu_write(threshold_banks, NULL);
__threshold_remove_device(bp);
- return 0;
+ return;
}
/**
@@ -1324,36 +1309,34 @@ int mce_threshold_remove_device(unsigned int cpu)
* thread running on @cpu. The callback is invoked on all CPUs which are
* online when the callback is installed or during a real hotplug event.
*/
-int mce_threshold_create_device(unsigned int cpu)
+void mce_threshold_create_device(unsigned int cpu)
{
unsigned int numbanks, bank;
struct threshold_bank **bp;
- int err;
if (!mce_flags.amd_threshold)
- return 0;
+ return;
bp = this_cpu_read(threshold_banks);
if (bp)
- return 0;
+ return;
numbanks = this_cpu_read(mce_num_banks);
bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL);
if (!bp)
- return -ENOMEM;
+ return;
for (bank = 0; bank < numbanks; ++bank) {
if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
continue;
- err = threshold_create_bank(bp, cpu, bank);
- if (err) {
+ if (threshold_create_bank(bp, cpu, bank)) {
__threshold_remove_device(bp);
- return err;
+ return;
}
}
this_cpu_write(threshold_banks, bp);
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt;
- return 0;
+ return;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4da4eab56c81..460e90a1a0b1 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -423,7 +423,7 @@ noinstr u64 mce_rdmsrq(u32 msr)
return EAX_EDX_VAL(val, low, high);
}
-static noinstr void mce_wrmsrq(u32 msr, u64 v)
+noinstr void mce_wrmsrq(u32 msr, u64 v)
{
u32 low, high;
@@ -715,6 +715,60 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
+ * Newer Intel systems that support software error
+ * recovery need to make additional checks. Other
+ * CPUs should skip over uncorrected errors, but log
+ * everything else.
+ */
+static bool ser_should_log_poll_error(struct mce *m)
+{
+ /* Log "not enabled" (speculative) errors */
+ if (!(m->status & MCI_STATUS_EN))
+ return true;
+
+ /*
+ * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
+ * UC == 1 && PCC == 0 && S == 0
+ */
+ if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
+ return true;
+
+ return false;
+}
+
+static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
+{
+ struct mce *m = &err->m;
+
+ /* If this entry is not valid, ignore it. */
+ if (!(m->status & MCI_STATUS_VAL))
+ return false;
+
+ /*
+ * If we are logging everything (at CPU online) or this
+ * is a corrected error, then we must log it.
+ */
+ if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
+ return true;
+
+ if (mca_cfg.ser)
+ return ser_should_log_poll_error(m);
+
+ if (m->status & MCI_STATUS_UC)
+ return false;
+
+ return true;
+}
+
+static void clear_bank(struct mce *m)
+{
+ if (m->cpuvendor == X86_VENDOR_AMD)
+ return amd_clear_bank(m);
+
+ mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
+}
+
+/*
* Poll for corrected events or events that happened before reset.
* Those are just logged through /dev/mcelog.
*
@@ -765,51 +819,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!mca_cfg.cmci_disabled)
mce_track_storm(m);
- /* If this entry is not valid, ignore it */
- if (!(m->status & MCI_STATUS_VAL))
+ /* Verify that the error should be logged based on hardware conditions. */
+ if (!should_log_poll_error(flags, &err))
continue;
- /*
- * If we are logging everything (at CPU online) or this
- * is a corrected error, then we must log it.
- */
- if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
- goto log_it;
-
- /*
- * Newer Intel systems that support software error
- * recovery need to make additional checks. Other
- * CPUs should skip over uncorrected errors, but log
- * everything else.
- */
- if (!mca_cfg.ser) {
- if (m->status & MCI_STATUS_UC)
- continue;
- goto log_it;
- }
-
- /* Log "not enabled" (speculative) errors */
- if (!(m->status & MCI_STATUS_EN))
- goto log_it;
-
- /*
- * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
- * UC == 1 && PCC == 0 && S == 0
- */
- if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
- goto log_it;
-
- /*
- * Skip anything else. Presumption is that our read of this
- * bank is racing with a machine check. Leave the log alone
- * for do_machine_check() to deal with it.
- */
- continue;
-
-log_it:
- if (flags & MCP_DONTLOG)
- goto clear_it;
-
mce_read_aux(&err, i);
m->severity = mce_severity(m, NULL, NULL, false);
/*
@@ -826,10 +839,7 @@ log_it:
mce_log(&err);
clear_it:
- /*
- * Clear state for this bank.
- */
- mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
+ clear_bank(m);
}
/*
@@ -1810,9 +1820,10 @@ static void __mcheck_cpu_mce_banks_init(void)
struct mce_bank *b = &mce_banks[i];
/*
- * Init them all, __mcheck_cpu_apply_quirks() is going to apply
- * the required vendor quirks before
- * __mcheck_cpu_init_clear_banks() does the final bank setup.
+ * Init them all by default.
+ *
+ * The required vendor quirks will be applied before
+ * __mcheck_cpu_init_prepare_banks() does the final bank setup.
*/
b->ctl = -1ULL;
b->init = true;
@@ -1840,69 +1851,34 @@ static void __mcheck_cpu_cap_init(void)
this_cpu_write(mce_num_banks, b);
__mcheck_cpu_mce_banks_init();
-
- /* Use accurate RIP reporting if available. */
- if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
- mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
-
- if (cap & MCG_SER_P)
- mca_cfg.ser = 1;
}
static void __mcheck_cpu_init_generic(void)
{
- enum mcp_flags m_fl = 0;
- mce_banks_t all_banks;
u64 cap;
- if (!mca_cfg.bootlog)
- m_fl = MCP_DONTLOG;
-
- /*
- * Log the machine checks left over from the previous reset. Log them
- * only, do not start processing them. That will happen in mcheck_late_init()
- * when all consumers have been registered on the notifier chain.
- */
- bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
-
- cr4_set_bits(X86_CR4_MCE);
-
rdmsrq(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
-static void __mcheck_cpu_init_clear_banks(void)
+static void __mcheck_cpu_init_prepare_banks(void)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u64 msrval;
int i;
- for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
- struct mce_bank *b = &mce_banks[i];
+ /*
+ * Log the machine checks left over from the previous reset. Log them
+ * only, do not start processing them. That will happen in mcheck_late_init()
+ * when all consumers have been registered on the notifier chain.
+ */
+ if (mca_cfg.bootlog) {
+ mce_banks_t all_banks;
- if (!b->init)
- continue;
- wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
- wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
+ bitmap_fill(all_banks, MAX_NR_BANKS);
+ machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
}
-}
-
-/*
- * Do a final check to see if there are any unused/RAZ banks.
- *
- * This must be done after the banks have been initialized and any quirks have
- * been applied.
- *
- * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
- * Otherwise, a user who disables a bank will not be able to re-enable it
- * without a system reboot.
- */
-static void __mcheck_cpu_check_banks(void)
-{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
- u64 msrval;
- int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
@@ -1910,25 +1886,16 @@ static void __mcheck_cpu_check_banks(void)
if (!b->init)
continue;
+ wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
+ wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
+
rdmsrq(mca_msr_reg(i, MCA_CTL), msrval);
b->init = !!msrval;
}
}
-static void apply_quirks_amd(struct cpuinfo_x86 *c)
+static void amd_apply_global_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
- /* This should be disabled by the BIOS, but isn't always */
- if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
- /*
- * disable GART TBL walk error reporting, which
- * trips off incorrectly with the IOMMU & 3ware
- * & Cerberus:
- */
- clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
- }
-
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
@@ -1938,13 +1905,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
}
/*
- * Various K7s with broken bank 0 around. Always disable
- * by default.
- */
- if (c->x86 == 6 && this_cpu_read(mce_num_banks))
- mce_banks[0].ctl = 0;
-
- /*
* overflow_recov is supported for F15h Models 00h-0fh
* even though we don't have a CPUID bit for it.
*/
@@ -1955,26 +1915,13 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
mce_flags.zen_ifu_quirk = 1;
}
-static void apply_quirks_intel(struct cpuinfo_x86 *c)
+static void intel_apply_global_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
/* Older CPUs (prior to family 6) don't need quirks. */
if (c->x86_vfm < INTEL_PENTIUM_PRO)
return;
/*
- * SDM documents that on family 6 bank 0 should not be written
- * because it aliases to another special BIOS controlled
- * register.
- * But it's not aliased anymore on model 0x1a+
- * Don't ignore bank 0 completely because there could be a
- * valid event later, merely don't write CTL0.
- */
- if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
- mce_banks[0].init = false;
-
- /*
* All newer Intel systems support MCE broadcasting. Enable
* synchronization with a one second timeout.
*/
@@ -1999,7 +1946,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c)
mce_flags.skx_repmov_quirk = 1;
}
-static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
+static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c)
{
/*
* All newer Zhaoxin CPUs support MCE broadcasting. Enable
@@ -2011,34 +1958,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
}
}
-/* Add per CPU specific workarounds here */
-static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
-{
- struct mca_config *cfg = &mca_cfg;
-
- switch (c->x86_vendor) {
- case X86_VENDOR_UNKNOWN:
- pr_info("unknown CPU type - not enabling MCE support\n");
- return false;
- case X86_VENDOR_AMD:
- apply_quirks_amd(c);
- break;
- case X86_VENDOR_INTEL:
- apply_quirks_intel(c);
- break;
- case X86_VENDOR_ZHAOXIN:
- apply_quirks_zhaoxin(c);
- break;
- }
-
- if (cfg->monarch_timeout < 0)
- cfg->monarch_timeout = 0;
- if (cfg->bootlog != 0)
- cfg->panic_timeout = 30;
-
- return true;
-}
-
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
@@ -2060,19 +1979,6 @@ static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
return false;
}
-/*
- * Init basic CPU features needed for early decoding of MCEs.
- */
-static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
-{
- if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
- mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
- mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
- mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
- mce_flags.amd_threshold = 1;
- }
-}
-
static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
@@ -2281,6 +2187,53 @@ DEFINE_IDTENTRY_RAW(exc_machine_check)
}
#endif
+void mca_bsp_init(struct cpuinfo_x86 *c)
+{
+ u64 cap;
+
+ if (!mce_available(c))
+ return;
+
+ if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
+ mca_cfg.disabled = 1;
+ pr_info("unknown CPU type - not enabling MCE support\n");
+ return;
+ }
+
+ mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
+ mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR);
+ mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA);
+
+ if (mce_flags.smca)
+ smca_bsp_init();
+
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
+
+ /* Use accurate RIP reporting if available. */
+ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
+ mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
+
+ if (cap & MCG_SER_P)
+ mca_cfg.ser = 1;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ amd_apply_global_quirks(c);
+ break;
+ case X86_VENDOR_INTEL:
+ intel_apply_global_quirks(c);
+ break;
+ case X86_VENDOR_ZHAOXIN:
+ zhaoxin_apply_global_quirks(c);
+ break;
+ }
+
+ if (mca_cfg.monarch_timeout < 0)
+ mca_cfg.monarch_timeout = 0;
+ if (mca_cfg.bootlog != 0)
+ mca_cfg.panic_timeout = 30;
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
@@ -2298,11 +2251,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_cap_init();
- if (!__mcheck_cpu_apply_quirks(c)) {
- mca_cfg.disabled = 1;
- return;
- }
-
if (!mce_gen_pool_init()) {
mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
@@ -2311,12 +2259,11 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
mca_cfg.initialized = 1;
- __mcheck_cpu_init_early(c);
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
- __mcheck_cpu_init_clear_banks();
- __mcheck_cpu_check_banks();
+ __mcheck_cpu_init_prepare_banks();
__mcheck_cpu_setup_timer();
+ cr4_set_bits(X86_CR4_MCE);
}
/*
@@ -2483,7 +2430,8 @@ static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
- __mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_init_prepare_banks();
+ cr4_set_bits(X86_CR4_MCE);
}
static struct syscore_ops mce_syscore_ops = {
@@ -2501,8 +2449,9 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
- __mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_init_prepare_banks();
__mcheck_cpu_init_timer();
+ cr4_set_bits(X86_CR4_MCE);
}
/* Reinit MCEs after user configuration changes */
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 9b149b9c4109..4655223ba560 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -468,8 +468,26 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
}
}
+static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+ /*
+ * SDM documents that on family 6 bank 0 should not be written
+ * because it aliases to another special BIOS controlled
+ * register.
+ * But it's not aliased anymore on model 0x1a+
+ * Don't ignore bank 0 completely because there could be a
+ * valid event later, merely don't write CTL0.
+ *
+ * Older CPUs (prior to family 6) can't reach this point and already
+ * return early due to the check of __mcheck_cpu_ancient_init().
+ */
+ if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
+ this_cpu_ptr(mce_banks_array)[0].init = false;
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
+ intel_apply_cpu_quirks(c);
intel_init_cmci();
intel_init_lmce();
intel_imc_init(c);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b5ba598e54cb..b0e00ec5cc8c 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -265,8 +265,11 @@ void mce_prep_record_common(struct mce *m);
void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
+void mce_threshold_create_device(unsigned int cpu);
+void mce_threshold_remove_device(unsigned int cpu);
extern bool amd_filter_mce(struct mce *m);
bool amd_mce_usable_address(struct mce *m);
+void amd_clear_bank(struct mce *m);
/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@@ -292,10 +295,15 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
m->addr &= GENMASK_ULL(55, lsb);
}
+void smca_bsp_init(void);
#else
+static inline void mce_threshold_create_device(unsigned int cpu) { }
+static inline void mce_threshold_remove_device(unsigned int cpu) { }
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline bool amd_mce_usable_address(struct mce *m) { return false; }
+static inline void amd_clear_bank(struct mce *m) { }
static inline void smca_extract_err_addr(struct mce *m) { }
+static inline void smca_bsp_init(void) { }
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
@@ -313,6 +321,7 @@ static __always_inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
noinstr u64 mce_rdmsrq(u32 msr);
+noinstr void mce_wrmsrq(u32 msr, u64 v);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{