summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mce/core.c
diff options
context:
space:
mode:
authorBorislav Petkov (AMD) <bp@alien8.de>2023-07-19 14:19:50 +0200
committerBorislav Petkov (AMD) <bp@alien8.de>2023-07-21 18:55:46 +0200
commitc3629dd7e67d6ec5705d33b0de0d142c972fe573 (patch)
tree2e344dd47063840314a002e9ddb08d5cd316e283 /arch/x86/kernel/cpu/mce/core.c
parentfdf0eaf11452d72945af31804e2a1048ee1b574c (diff)
x86/mce: Prevent duplicate error records
A legitimate use case of the MCA infrastructure is to have the firmware log all uncorrectable errors and also, have the OS see all correctable errors. The uncorrectable, UCNA errors are usually configured to be reported through an SMI. CMCI, which is the correctable error reporting interrupt, uses SMI too and having both enabled, leads to unnecessary overhead. So what ends up happening is, people disable CMCI in the wild and leave on only the UCNA SMI. When CMCI is disabled, the MCA infrastructure resorts to polling the MCA banks. If a MCA MSR is shared between the logical threads, one error ends up getting logged multiple times as the polling runs on every logical thread. Therefore, introduce locking on the Intel side of the polling routine to prevent such duplicate error records from appearing. Based on a patch by Aristeu Rozanski <aris@ruivo.org>. Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Tested-by: Tony Luck <tony.luck@intel.com> Acked-by: Aristeu Rozanski <aris@ruivo.org> Link: https://lore.kernel.org/r/20230515143225.GC4090740@cathedrallabs.org
Diffstat (limited to 'arch/x86/kernel/cpu/mce/core.c')
-rw-r--r--arch/x86/kernel/cpu/mce/core.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 89e2aab5d34d..b8ad5a5b4026 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1608,6 +1608,13 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
local_irq_restore(flags);
}
+static void mc_poll_banks_default(void)
+{
+ machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+}
+
+void (*mc_poll_banks)(void) = mc_poll_banks_default;
+
static void mce_timer_fn(struct timer_list *t)
{
struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1618,7 +1625,7 @@ static void mce_timer_fn(struct timer_list *t)
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) {
- machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+ mc_poll_banks();
if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv);