summaryrefslogtreecommitdiff
path: root/drivers/edac/skx_common.h
diff options
context:
space:
mode:
authorQiuxu Zhuo <qiuxu.zhuo@intel.com>2023-01-13 11:27:58 +0800
committerTony Luck <tony.luck@intel.com>2023-01-25 08:16:44 -0800
commit6e8746cb735166eaf3ceb086b31bb0431f5e3532 (patch)
tree034555c29a20c55d63ffe8585d153eb849dd4ef3 /drivers/edac/skx_common.h
parent8d8fcc391f50e2d9686d42b85e9b1db89b1bbb35 (diff)
EDAC/skx_common: Enable EDAC support for the "near" memory
The current {skx,i10nm}_edac miss the EDAC support to decode errors from the 1st level memory (the fast "near" memory as cache) of the 2-level memory system. Introduce a helper function skx_error_in_mem() to check whether errors are from memory at the beginning of skx_mce_check_error(). As long as the errors are from memory (either the 1-level memory system or the 2-level memory system), decode the errors. Reported-and-tested-by: Youquan Song <youquan.song@intel.com> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/all/20230113032802.41752-1-qiuxu.zhuo@intel.com
Diffstat (limited to 'drivers/edac/skx_common.h')
-rw-r--r--drivers/edac/skx_common.h24
1 files changed, 24 insertions, 0 deletions
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 0cbadd3d2cd3..312032657264 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -57,6 +57,30 @@
#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
/*
+ * According to Intel Architecture spec vol 3B,
+ * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
+ * memory errors should fit one of these masks:
+ * 000f 0000 1mmm cccc (binary)
+ * 000f 0010 1mmm cccc (binary) [RAM used as cache]
+ * where:
+ * f = Correction Report Filtering Bit. If 1, subsequent errors
+ * won't be shown
+ * mmm = error type
+ * cccc = channel
+ */
+#define MCACOD_MEM_ERR_MASK 0xef80
+/*
+ * Errors from either the memory of the 1-level memory system or the
+ * 2nd level memory (the slow "far" memory) of the 2-level memory system.
+ */
+#define MCACOD_MEM_CTL_ERR 0x80
+/*
+ * Errors from the 1st level memory (the fast "near" memory as cache)
+ * of the 2-level memory system.
+ */
+#define MCACOD_EXT_MEM_ERR 0x280
+
+/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
* memory controllers on the die.