From 68299a42f84288537ee3420c431ac0115ccb90b1 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 30 Oct 2020 12:04:00 -0700 Subject: x86/mce: Enable additional error logging on certain Intel CPUs The Xeon versions of Sandy Bridge, Ivy Bridge and Haswell support an optional additional error logging mode which is enabled by an MSR. Previously, this mode was enabled from the mcelog(8) tool via /dev/cpu, but userspace should not be poking at MSRs. So move the enabling into the kernel. [ bp: Correct the explanation why this is done. ] Suggested-by: Boris Petkov Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201030190807.GA13884@agluck-desk2.amr.corp.intel.com --- arch/x86/include/asm/msr-index.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d93505..b2dd2648c0e2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -139,6 +139,7 @@ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_ERROR_CONTROL 0x0000017f #define MSR_IA32_MCG_EXT_CTL 0x000004d0 #define MSR_OFFCORE_RSP_0 0x000001a6 -- cgit From 15af36596ae305aefc8c502c2d3e8c58221709eb Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 6 Nov 2020 22:12:16 +0800 Subject: x86/mce: Correct the detection of invalid notifier priorities Commit c9c6d216ed28 ("x86/mce: Rename "first" function as "early"") changed the enumeration of MCE notifier priorities. Correct the check for notifier priorities to cover the new range. [ bp: Rewrite commit message, remove superfluous brackets in conditional. ] Fixes: c9c6d216ed28 ("x86/mce: Rename "first" function as "early"") Signed-off-by: Zhen Lei Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201106141216.2062-2-thunder.leizhen@huawei.com --- arch/x86/include/asm/mce.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a0f147893a04..fc25c88c7ff2 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -177,7 +177,8 @@ enum mce_notifier_prios { MCE_PRIO_EXTLOG, MCE_PRIO_UC, MCE_PRIO_EARLY, - MCE_PRIO_CEC + MCE_PRIO_CEC, + MCE_PRIO_HIGHEST = MCE_PRIO_CEC }; struct notifier_block; -- cgit From 4a24d80b8c3e9f89d6a6a7b89bd057c463b638d3 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 19 Nov 2020 12:29:38 -0600 Subject: x86/mce, cper: Pass x86 CPER through the MCA handling chain The kernel uses ACPI Boot Error Record Table (BERT) to report fatal errors that occurred in a previous boot. The MCA errors in the BERT are reported using the x86 Processor Error Common Platform Error Record (CPER) format. Currently, the record prints out the raw MSR values and AMD relies on the raw record to provide MCA information. Extract the raw MSR values of MCA registers from the BERT and feed them into mce_log() to decode them properly. The implementation is SMCA-specific as the raw MCA register values are given in the register offset order of the SMCA address space. [ bp: Massage. ] [ Fix a build breakage in patch v1. ] Reported-by: kernel test robot Signed-off-by: Smita Koralahalli Signed-off-by: Borislav Petkov Reviewed-by: Punit Agrawal Acked-by: Ard Biesheuvel Link: https://lkml.kernel.org/r/20201119182938.151155-1-Smita.KoralahalliChannabasappa@amd.com --- arch/x86/include/asm/acpi.h | 11 +++++++++++ arch/x86/include/asm/mce.h | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 6d2df1ee427b..65064d9f7fa6 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -159,6 +159,8 @@ static inline u64 x86_default_get_root_pointer(void) extern int x86_acpi_numa_init(void); #endif /* CONFIG_ACPI_NUMA */ +struct cper_ia_proc_ctx; + #ifdef CONFIG_ACPI_APEI static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) { @@ -177,6 +179,15 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) */ return PAGE_KERNEL_NOENC; } + +int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); +#else +static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) +{ + return -EINVAL; +} #endif #define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index fc25c88c7ff2..56cdeaac76a0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -199,16 +199,22 @@ static inline void enable_copy_mc_fragile(void) } #endif +struct cper_ia_proc_ctx; + #ifdef CONFIG_X86_MCE int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_clear(struct cpuinfo_x86 *c); void mcheck_vendor_init_severity(void); +int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); #else static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} static inline void mcheck_vendor_init_severity(void) {} +static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) { return -EINVAL; } #endif #ifdef CONFIG_X86_ANCIENT_MCE -- cgit