summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mce/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mce/core.c')
-rw-r--r--arch/x86/kernel/cpu/mce/core.c49
1 files changed, 43 insertions, 6 deletions
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 460e90a1a0b1..34440021e8cf 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -45,6 +45,7 @@
#include <linux/task_work.h>
#include <linux/hardirq.h>
#include <linux/kexec.h>
+#include <linux/vmcore_info.h>
#include <asm/fred.h>
#include <asm/cpu_device_id.h>
@@ -687,7 +688,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i));
+ else
+ m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
/*
* Mask the reported address by the reported granularity.
@@ -715,6 +719,29 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
+ * We have three scenarios for checking for Deferred errors:
+ *
+ * 1) Non-SMCA systems check MCA_STATUS and log error if found.
+ * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
+ * clear MCA_DESTAT.
+ * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
+ * log it.
+ */
+static bool smca_should_log_poll_error(struct mce *m)
+{
+ if (m->status & MCI_STATUS_VAL)
+ return true;
+
+ m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
+ if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
+ m->kflags |= MCE_CHECK_DFR_REGS;
+ return true;
+ }
+
+ return false;
+}
+
+/*
* Newer Intel systems that support software error
* recovery need to make additional checks. Other
* CPUs should skip over uncorrected errors, but log
@@ -740,6 +767,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
{
struct mce *m = &err->m;
+ if (mce_flags.smca)
+ return smca_should_log_poll_error(m);
+
/* If this entry is not valid, ignore it. */
if (!(m->status & MCI_STATUS_VAL))
return false;
@@ -1700,6 +1730,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
}
out:
+ /* Given it didn't panic, mark it as recoverable */
+ hwerr_log_error_type(HWERR_RECOV_OTHERS);
+
instrumentation_end();
clear:
@@ -2410,13 +2443,13 @@ static void vendor_disable_error_reporting(void)
mce_disable_error_reporting();
}
-static int mce_syscore_suspend(void)
+static int mce_syscore_suspend(void *data)
{
vendor_disable_error_reporting();
return 0;
}
-static void mce_syscore_shutdown(void)
+static void mce_syscore_shutdown(void *data)
{
vendor_disable_error_reporting();
}
@@ -2426,7 +2459,7 @@ static void mce_syscore_shutdown(void)
* Only one CPU is active at this time, the others get re-added later using
* CPU hotplug:
*/
-static void mce_syscore_resume(void)
+static void mce_syscore_resume(void *data)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
@@ -2434,12 +2467,16 @@ static void mce_syscore_resume(void)
cr4_set_bits(X86_CR4_MCE);
}
-static struct syscore_ops mce_syscore_ops = {
+static const struct syscore_ops mce_syscore_ops = {
.suspend = mce_syscore_suspend,
.shutdown = mce_syscore_shutdown,
.resume = mce_syscore_resume,
};
+static struct syscore mce_syscore = {
+ .ops = &mce_syscore_ops,
+};
+
/*
* mce_device: Sysfs support
*/
@@ -2840,7 +2877,7 @@ static __init int mcheck_init_device(void)
if (err < 0)
goto err_out_online;
- register_syscore_ops(&mce_syscore_ops);
+ register_syscore(&mce_syscore);
return 0;