summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mcheck/mce-internal.h
diff options
context:
space:
mode:
authorBorislav Petkov <bp@suse.de>2015-01-13 15:08:51 +0100
committerBorislav Petkov <bp@suse.de>2015-02-19 13:24:25 +0100
commit3f2f0680d1161df96a0e8fea16930f1bd487a9cf (patch)
tree29009c1b6dcc24a7dc93ba485983c6ea5f31e0f0 /arch/x86/kernel/cpu/mcheck/mce-internal.h
parent0eac092d8307db61d320f77f9fce40e60b4ffa89 (diff)
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race condition in the CMCI storm adaptive period length thing. Yes, we have to admit, it is fragile and error prone. So let's simplify it. The simpler logic is: now, after we enter storm mode, we go straight to polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm mode as long as we see errors being logged while polling. Theoretically, if we see an uninterrupted error stream, we will remain in storm mode indefinitely and keep polling the MSRs. However, when the storm is actually a burst of errors, once we have logged them all, we back out of it after ~5 mins of polling and no more errors logged. If we encounter an error during those 5 minutes, we reset the polling interval to 5 mins. Making machine_check_poll() return a bool and denoting whether it has seen an error or not lets us simplify a bunch of code and move the storm handling private to mce_intel.c. Some minor cleanups while at it. Reported-by: Calvin Owens <calvinowens@fb.com> Tested-by: Tony Luck <tony.luck@intel.com> Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com Signed-off-by: Borislav Petkov <bp@suse.de>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce-internal.h')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h9
1 files changed, 5 insertions, 4 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 10b46906767f..e12f0bfb45c1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -14,6 +14,7 @@ enum severity_level {
};
#define ATTR_LEN 16
+#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
/* One object for each MCE bank, shared by all CPUs */
struct mce_bank {
@@ -30,13 +31,13 @@ extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
-unsigned long mce_intel_adjust_timer(unsigned long interval);
-void mce_intel_cmci_poll(void);
+unsigned long cmci_intel_adjust_timer(unsigned long interval);
+bool mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank);
#else
-# define mce_intel_adjust_timer mce_adjust_timer_default
-static inline void mce_intel_cmci_poll(void) { }
+# define cmci_intel_adjust_timer mce_adjust_timer_default
+static inline bool mce_intel_cmci_poll(void) { return false; }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
#endif