summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig142
-rw-r--r--drivers/edac/Makefile25
-rw-r--r--drivers/edac/a72_edac.c225
-rw-r--r--drivers/edac/altera_edac.c77
-rw-r--r--drivers/edac/altera_edac.h2
-rw-r--r--drivers/edac/amd64_edac.c1943
-rw-r--r--drivers/edac/amd64_edac.h102
-rw-r--r--drivers/edac/amd8111_edac.c597
-rw-r--r--drivers/edac/amd8111_edac.h118
-rw-r--r--drivers/edac/amd8131_edac.c358
-rw-r--r--drivers/edac/amd8131_edac.h107
-rw-r--r--drivers/edac/armada_xp_edac.c14
-rw-r--r--drivers/edac/aspeed_edac.c4
-rw-r--r--drivers/edac/bluefield_edac.c184
-rw-r--r--drivers/edac/cell_edac.c282
-rw-r--r--drivers/edac/cpc925_edac.c6
-rw-r--r--drivers/edac/debugfs.c5
-rw-r--r--drivers/edac/dmc520_edac.c8
-rw-r--r--drivers/edac/e752x_edac.c2
-rw-r--r--drivers/edac/e7xxx_edac.c3
-rw-r--r--drivers/edac/ecs.c207
-rw-r--r--drivers/edac/edac_device.c238
-rw-r--r--drivers/edac/edac_device.h24
-rw-r--r--drivers/edac/edac_device_sysfs.c40
-rw-r--r--drivers/edac/edac_mc.c3
-rw-r--r--drivers/edac/edac_mc_sysfs.c415
-rw-r--r--drivers/edac/edac_module.c4
-rw-r--r--drivers/edac/edac_pci.h5
-rw-r--r--drivers/edac/edac_pci_sysfs.c20
-rw-r--r--drivers/edac/fsl_ddr_edac.c147
-rw-r--r--drivers/edac/fsl_ddr_edac.h15
-rw-r--r--drivers/edac/ghes_edac.c9
-rw-r--r--drivers/edac/highbank_l2_edac.c8
-rw-r--r--drivers/edac/highbank_mc_edac.c6
-rw-r--r--drivers/edac/i10nm_base.c1047
-rw-r--r--drivers/edac/i5000_edac.c15
-rw-r--r--drivers/edac/i5100_edac.c5
-rw-r--r--drivers/edac/i5400_edac.c3
-rw-r--r--drivers/edac/i7300_edac.c7
-rw-r--r--drivers/edac/i7core_edac.c4
-rw-r--r--drivers/edac/i82860_edac.c3
-rw-r--r--drivers/edac/ie31200_edac.c688
-rw-r--r--drivers/edac/igen6_edac.c395
-rw-r--r--drivers/edac/imh_base.c602
-rw-r--r--drivers/edac/layerscape_edac.c5
-rw-r--r--drivers/edac/loongson_edac.c157
-rw-r--r--drivers/edac/mce_amd.c556
-rw-r--r--drivers/edac/mem_repair.c357
-rw-r--r--drivers/edac/mpc85xx_edac.c16
-rw-r--r--drivers/edac/npcm_edac.c542
-rw-r--r--drivers/edac/octeon_edac-l2c.c7
-rw-r--r--drivers/edac/octeon_edac-lmc.c4
-rw-r--r--drivers/edac/octeon_edac-pc.c6
-rw-r--r--drivers/edac/octeon_edac-pci.c5
-rw-r--r--drivers/edac/pnd2_edac.c63
-rw-r--r--drivers/edac/ppc4xx_edac.c1428
-rw-r--r--drivers/edac/ppc4xx_edac.h167
-rw-r--r--drivers/edac/qcom_edac.c192
-rw-r--r--drivers/edac/r82600_edac.c3
-rw-r--r--drivers/edac/sb_edac.c63
-rw-r--r--drivers/edac/scrub.c210
-rw-r--r--drivers/edac/sifive_edac.c3
-rw-r--r--drivers/edac/skx_base.c102
-rw-r--r--drivers/edac/skx_common.c333
-rw-r--r--drivers/edac/skx_common.h235
-rw-r--r--drivers/edac/synopsys_edac.c191
-rw-r--r--drivers/edac/thunderx_edac.c24
-rw-r--r--drivers/edac/ti_edac.c4
-rw-r--r--drivers/edac/versal_edac.c1196
-rw-r--r--drivers/edac/versalnet_edac.c962
-rw-r--r--drivers/edac/xgene_edac.c31
-rw-r--r--drivers/edac/zynqmp_edac.c465
72 files changed, 8886 insertions, 6555 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 4cfdefbd744d..81e40543ffd8 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -23,14 +23,6 @@ menuconfig EDAC
if EDAC
-config EDAC_LEGACY_SYSFS
- bool "EDAC legacy sysfs"
- default y
- help
- Enable the compatibility sysfs nodes.
- Use 'Y' if your edac utilities aren't ported to work with the newer
- structures.
-
config EDAC_DEBUG
bool "Debugging"
select DEBUG_FS
@@ -75,9 +67,39 @@ config EDAC_GHES
In doubt, say 'Y'.
+config EDAC_SCRUB
+ bool "EDAC scrub feature"
+ help
+ The EDAC scrub feature is optional and is designed to control the
+ memory scrubbers in the system. The common sysfs scrub interface
+ abstracts the control of various arbitrary scrubbing functionalities
+ into a unified set of functions.
+ Say 'y/n' to enable/disable EDAC scrub feature.
+
+config EDAC_ECS
+ bool "EDAC ECS (Error Check Scrub) feature"
+ help
+ The EDAC ECS feature is optional and is designed to control on-die
+ error check scrub (e.g., DDR5 ECS) in the system. The common sysfs
+ ECS interface abstracts the control of various ECS functionalities
+ into a unified set of functions.
+ Say 'y/n' to enable/disable EDAC ECS feature.
+
+config EDAC_MEM_REPAIR
+ bool "EDAC memory repair feature"
+ help
+ The EDAC memory repair feature is optional and is designed to control
+ the memory devices with repair features, such as Post Package Repair
+ (PPR), memory sparing etc. The common sysfs memory repair interface
+ abstracts the control of various memory repair functionalities into
+ a unified set of functions.
+ Say 'y/n' to enable/disable EDAC memory repair feature.
+
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64)"
depends on AMD_NB && EDAC_DECODE_MCE
+ depends on AMD_NODE
+ imply AMD_ATL
help
Support for error detection and correction of DRAM ECC errors on
the AMD64 families (>= K8) of memory controllers.
@@ -166,7 +188,7 @@ config EDAC_I3200
config EDAC_IE31200
tristate "Intel e312xx"
- depends on PCI && X86
+ depends on PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction on the Intel
E3-1200 based DRAM controllers.
@@ -261,6 +283,18 @@ config EDAC_I10NM
system has non-volatile DIMMs you should also manually
select CONFIG_ACPI_NFIT.
+config EDAC_IMH
+ tristate "Intel Integrated Memory/IO Hub MC"
+ depends on X86_64 && X86_MCE_INTEL && ACPI
+ depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_IMH can't be y
+ select DMI
+ select ACPI_ADXL
+ help
+ Support for error detection and correction the Intel
+ Integrated Memory/IO Hub Memory Controller. This MC IP is
+ first used on the Diamond Rapids servers but may appear on
+ others in the future.
+
config EDAC_PND2
tristate "Intel Pondicherry2"
depends on PCI && X86_64 && X86_MCE_INTEL
@@ -302,41 +336,6 @@ config EDAC_PASEMI
Support for error detection and correction on PA Semi
PWRficient.
-config EDAC_CELL
- tristate "Cell Broadband Engine memory controller"
- depends on PPC_CELL_COMMON
- help
- Support for error detection and correction on the
- Cell Broadband Engine internal memory controller
- on platform without a hypervisor
-
-config EDAC_PPC4XX
- tristate "PPC4xx IBM DDR2 Memory Controller"
- depends on 4xx
- help
- This enables support for EDAC on the ECC memory used
- with the IBM DDR2 memory controller found in various
- PowerPC 4xx embedded processors such as the 405EX[r],
- 440SP, 440SPe, 460EX, 460GT and 460SX.
-
-config EDAC_AMD8131
- tristate "AMD8131 HyperTransport PCI-X Tunnel"
- depends on PCI && PPC_MAPLE
- help
- Support for error detection and correction on the
- AMD8131 HyperTransport PCI-X Tunnel chip.
- Note, add more Kconfig dependency if it's adopted
- on some machine other than Maple.
-
-config EDAC_AMD8111
- tristate "AMD8111 HyperTransport I/O Hub"
- depends on PCI && PPC_MAPLE
- help
- Support for error detection and correction on the
- AMD8111 HyperTransport I/O Hub chip.
- Note, add more Kconfig dependency if it's adopted
- on some machine other than Maple.
-
config EDAC_CPC925
tristate "IBM CPC925 Memory Controller (PPC970FX)"
depends on PPC64
@@ -542,4 +541,59 @@ config EDAC_DMC520
Support for error detection and correction on the
SoCs with ARM DMC-520 DRAM controller.
+config EDAC_ZYNQMP
+ tristate "Xilinx ZynqMP OCM Controller"
+ depends on ARCH_ZYNQMP || COMPILE_TEST
+ help
+ This driver supports error detection and correction for the
+ Xilinx ZynqMP OCM (On Chip Memory) controller. It can also be
+ built as a module. In that case it will be called zynqmp_edac.
+
+config EDAC_NPCM
+ tristate "Nuvoton NPCM DDR Memory Controller"
+ depends on (ARCH_NPCM || COMPILE_TEST)
+ help
+ Support for error detection and correction on the Nuvoton NPCM DDR
+ memory controller.
+
+ The memory controller supports single bit error correction, double bit
+ error detection (in-line ECC in which a section 1/8th of the memory
+ device used to store data is used for ECC storage).
+
+config EDAC_VERSAL
+ tristate "Xilinx Versal DDR Memory Controller"
+ depends on ARCH_ZYNQMP || COMPILE_TEST
+ help
+ Support for error detection and correction on the Xilinx Versal DDR
+ memory controller.
+
+ Report both single bit errors (CE) and double bit errors (UE).
+ Support injecting both correctable and uncorrectable errors
+ for debugging purposes.
+
+config EDAC_LOONGSON
+ tristate "Loongson Memory Controller"
+ depends on LOONGARCH && ACPI
+ help
+ Support for error detection and correction on the Loongson
+ family memory controller. This driver reports single bit
+ errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000
+ are compatible.
+
+config EDAC_CORTEX_A72
+ tristate "ARM Cortex A72"
+ depends on ARM64
+ help
+ Support for L1/L2 cache error detection for ARM Cortex A72 processor.
+ The detected and reported errors are from reading CPU/L2 memory error
+ syndrome registers.
+
+config EDAC_VERSALNET
+ tristate "AMD VersalNET DDR Controller"
+ depends on CDX_CONTROLLER && ARCH_ZYNQMP
+ help
+ Support for single bit error correction, double bit error detection
+ and other system errors from various IP subsystems like RPU, NOCs,
+ HNICX, PL on the AMD Versal NET DDR memory controller.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 2d1641a27a28..8429b1e856bc 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -12,6 +12,9 @@ edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o wq.o
edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o
+edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o
+edac_core-$(CONFIG_EDAC_ECS) += ecs.o
+edac_core-$(CONFIG_EDAC_MEM_REPAIR) += mem_repair.o
ifdef CONFIG_PCI
edac_core-y += edac_pci.o edac_pci_sysfs.o
@@ -54,16 +57,16 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o
layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o
obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o
-skx_edac-y := skx_common.o skx_base.o
-obj-$(CONFIG_EDAC_SKX) += skx_edac.o
+skx_edac_common-y := skx_common.o
-i10nm_edac-y := skx_common.o i10nm_base.o
-obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o
+skx_edac-y := skx_base.o
+obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o
-obj-$(CONFIG_EDAC_CELL) += cell_edac.o
-obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
-obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
-obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
+i10nm_edac-y := i10nm_base.o
+obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o
+
+imh_edac-y := imh_base.o
+obj-$(CONFIG_EDAC_IMH) += imh_edac.o skx_edac_common.o
obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o
@@ -84,3 +87,9 @@ obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o
+obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o
+obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o
+obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o
+obj-$(CONFIG_EDAC_LOONGSON) += loongson_edac.o
+obj-$(CONFIG_EDAC_VERSALNET) += versalnet_edac.o
+obj-$(CONFIG_EDAC_CORTEX_A72) += a72_edac.o
diff --git a/drivers/edac/a72_edac.c b/drivers/edac/a72_edac.c
new file mode 100644
index 000000000000..9262d75c3855
--- /dev/null
+++ b/drivers/edac/a72_edac.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cortex A72 EDAC L1 and L2 cache error detection
+ *
+ * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com>
+ *
+ * Based on Code from:
+ * Copyright (c) 2018, NXP Semiconductor
+ * Author: York Sun <york.sun@nxp.com>
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/bitfield.h>
+#include <asm/smp_plat.h>
+
+#include "edac_module.h"
+
+#define DRVNAME "a72-edac"
+
+#define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2)
+#define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3)
+
+#define CPUMERRSR_EL1_RAMID GENMASK(30, 24)
+#define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18)
+
+#define CPUMERRSR_EL1_VALID BIT(31)
+#define CPUMERRSR_EL1_FATAL BIT(63)
+#define L2MERRSR_EL1_VALID BIT(31)
+#define L2MERRSR_EL1_FATAL BIT(63)
+
+#define L1_I_TAG_RAM 0x00
+#define L1_I_DATA_RAM 0x01
+#define L1_D_TAG_RAM 0x08
+#define L1_D_DATA_RAM 0x09
+#define TLB_RAM 0x18
+
+#define MESSAGE_SIZE 64
+
+struct mem_err_synd_reg {
+ u64 cpu_mesr;
+ u64 l2_mesr;
+};
+
+static struct cpumask compat_mask;
+
+static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu,
+ struct mem_err_synd_reg *mesr)
+{
+ u64 cpu_mesr = mesr->cpu_mesr;
+ u64 l2_mesr = mesr->l2_mesr;
+ char msg[MESSAGE_SIZE];
+
+ if (cpu_mesr & CPUMERRSR_EL1_VALID) {
+ const char *str;
+ bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL;
+
+ switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) {
+ case L1_I_TAG_RAM:
+ str = "L1-I Tag RAM";
+ break;
+ case L1_I_DATA_RAM:
+ str = "L1-I Data RAM";
+ break;
+ case L1_D_TAG_RAM:
+ str = "L1-D Tag RAM";
+ break;
+ case L1_D_DATA_RAM:
+ str = "L1-D Data RAM";
+ break;
+ case TLB_RAM:
+ str = "TLB RAM";
+ break;
+ default:
+ str = "Unspecified";
+ break;
+ }
+
+ snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d",
+ str, fatal ? "fatal" : "correctable", cpu);
+
+ if (fatal)
+ edac_device_handle_ue(edac_ctl, cpu, 0, msg);
+ else
+ edac_device_handle_ce(edac_ctl, cpu, 0, msg);
+ }
+
+ if (l2_mesr & L2MERRSR_EL1_VALID) {
+ bool fatal = l2_mesr & L2MERRSR_EL1_FATAL;
+
+ snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx",
+ fatal ? "fatal" : "correctable", cpu,
+ FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr));
+ if (fatal)
+ edac_device_handle_ue(edac_ctl, cpu, 1, msg);
+ else
+ edac_device_handle_ce(edac_ctl, cpu, 1, msg);
+ }
+}
+
+static void read_errors(void *data)
+{
+ struct mem_err_synd_reg *mesr = data;
+
+ mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1);
+ if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) {
+ write_sysreg_s(0, SYS_CPUMERRSR_EL1);
+ isb();
+ }
+ mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1);
+ if (mesr->l2_mesr & L2MERRSR_EL1_VALID) {
+ write_sysreg_s(0, SYS_L2MERRSR_EL1);
+ isb();
+ }
+}
+
+static void a72_edac_check(struct edac_device_ctl_info *edac_ctl)
+{
+ struct mem_err_synd_reg mesr;
+ int cpu;
+
+ cpus_read_lock();
+ for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) {
+ smp_call_function_single(cpu, read_errors, &mesr, true);
+ report_errors(edac_ctl, cpu, &mesr);
+ }
+ cpus_read_unlock();
+}
+
+static int a72_edac_probe(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *edac_ctl;
+ struct device *dev = &pdev->dev;
+ int rc;
+
+ edac_ctl = edac_device_alloc_ctl_info(0, "cpu",
+ num_possible_cpus(), "L", 2, 1,
+ edac_device_alloc_index());
+ if (!edac_ctl)
+ return -ENOMEM;
+
+ edac_ctl->edac_check = a72_edac_check;
+ edac_ctl->dev = dev;
+ edac_ctl->mod_name = dev_name(dev);
+ edac_ctl->dev_name = dev_name(dev);
+ edac_ctl->ctl_name = DRVNAME;
+ dev_set_drvdata(dev, edac_ctl);
+
+ rc = edac_device_add_device(edac_ctl);
+ if (rc)
+ goto out_dev;
+
+ return 0;
+
+out_dev:
+ edac_device_free_ctl_info(edac_ctl);
+
+ return rc;
+}
+
+static void a72_edac_remove(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev);
+
+ edac_device_del_device(edac_ctl->dev);
+ edac_device_free_ctl_info(edac_ctl);
+}
+
+static const struct of_device_id cortex_arm64_edac_of_match[] = {
+ { .compatible = "arm,cortex-a72" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match);
+
+static struct platform_driver a72_edac_driver = {
+ .probe = a72_edac_probe,
+ .remove = a72_edac_remove,
+ .driver = {
+ .name = DRVNAME,
+ },
+};
+
+static struct platform_device *a72_pdev;
+
+static int __init a72_edac_driver_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
+ if (np) {
+ if (of_match_node(cortex_arm64_edac_of_match, np) &&
+ of_property_read_bool(np, "edac-enabled")) {
+ cpumask_set_cpu(cpu, &compat_mask);
+ }
+ } else {
+ pr_warn("failed to find device node for CPU %d\n", cpu);
+ }
+ }
+
+ if (cpumask_empty(&compat_mask))
+ return 0;
+
+ a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0);
+ if (IS_ERR(a72_pdev)) {
+ pr_err("failed to register A72 EDAC device\n");
+ return PTR_ERR(a72_pdev);
+ }
+
+ return platform_driver_register(&a72_edac_driver);
+}
+
+static void __exit a72_edac_driver_exit(void)
+{
+ platform_device_unregister(a72_pdev);
+ platform_driver_unregister(&a72_edac_driver);
+}
+
+module_init(a72_edac_driver_init);
+module_exit(a72_edac_driver_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver");
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index e7e8e624a436..0c5b94e64ea1 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -22,6 +22,7 @@
#include <linux/of_platform.h>
#include <linux/panic_notifier.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/types.h>
#include <linux/uaccess.h>
@@ -98,7 +99,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
if (status & priv->ecc_stat_ce_mask) {
regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset,
&err_addr);
- if (priv->ecc_uecnt_offset)
+ if (priv->ecc_cecnt_offset)
regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset,
&err_count);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count,
@@ -127,7 +128,6 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
ptemp = dma_alloc_coherent(mci->pdev, 16, &dma_handle, GFP_KERNEL);
if (!ptemp) {
- dma_free_coherent(mci->pdev, 16, ptemp, dma_handle);
edac_printk(KERN_ERR, EDAC_MC,
"Inject: Buffer Allocation error\n");
return -ENOMEM;
@@ -279,7 +279,6 @@ release:
static int altr_sdram_probe(struct platform_device *pdev)
{
- const struct of_device_id *id;
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct altr_sdram_mc_data *drvdata;
@@ -290,10 +289,6 @@ static int altr_sdram_probe(struct platform_device *pdev)
int irq, irq2, res = 0;
unsigned long mem_size, irqflags = 0;
- id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev);
- if (!id)
- return -ENODEV;
-
/* Grab the register range from the sdr controller in device tree */
mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
"altr,sdr-syscon");
@@ -304,8 +299,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
}
/* Check specific dependencies for the module */
- priv = of_match_node(altr_sdram_ctrl_of_match,
- pdev->dev.of_node)->data;
+ priv = device_get_match_data(&pdev->dev);
/* Validate the SDRAM controller has ECC enabled */
if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) ||
@@ -459,15 +453,13 @@ free:
return res;
}
-static int altr_sdram_remove(struct platform_device *pdev)
+static void altr_sdram_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
/*
@@ -744,8 +736,7 @@ static int altr_edac_device_probe(struct platform_device *pdev)
}
dci = edac_device_alloc_ctl_info(sizeof(*drvdata), ecc_name,
- 1, ecc_name, 1, 0, NULL, 0,
- dev_instance++);
+ 1, ecc_name, 1, 0, dev_instance++);
if (!dci) {
edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -812,7 +803,7 @@ fail:
return res;
}
-static int altr_edac_device_remove(struct platform_device *pdev)
+static void altr_edac_device_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
struct altr_edac_device_dev *drvdata = dci->pvt_info;
@@ -820,8 +811,6 @@ static int altr_edac_device_remove(struct platform_device *pdev)
debugfs_remove_recursive(drvdata->debugfs_dir);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(dci);
-
- return 0;
}
static struct platform_driver altr_edac_device_driver = {
@@ -1015,9 +1004,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
}
}
- /* Interrupt mode set to every SBERR */
- regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST,
- ALTR_A10_ECC_INTMODE);
/* Enable ECC */
ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base +
ALTR_A10_ECC_CTRL_OFST));
@@ -1198,10 +1184,22 @@ altr_check_ocram_deps_init(struct altr_edac_device_dev *device)
if (ret)
return ret;
- /* Verify OCRAM has been initialized */
+ /*
+ * Verify that OCRAM has been initialized.
+ * During a warm reset, OCRAM contents are retained, but the control
+ * and status registers are reset to their default values. Therefore,
+ * ECC must be explicitly re-enabled in the control register.
+ * Error condition: if INITCOMPLETEA is clear and ECC_EN is already set.
+ */
if (!ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA,
- (base + ALTR_A10_ECC_INITSTAT_OFST)))
- return -ENODEV;
+ (base + ALTR_A10_ECC_INITSTAT_OFST))) {
+ if (!ecc_test_bits(ALTR_A10_ECC_EN,
+ (base + ALTR_A10_ECC_CTRL_OFST)))
+ ecc_set_bits(ALTR_A10_ECC_EN,
+ (base + ALTR_A10_ECC_CTRL_OFST));
+ else
+ return -ENODEV;
+ }
/* Enable IRQ on Single Bit Error */
writel(ALTR_A10_ECC_SERRINTEN, (base + ALTR_A10_ECC_ERRINTENS_OFST));
@@ -1371,7 +1369,7 @@ static const struct edac_device_prv_data a10_enetecc_data = {
.ue_set_mask = ALTR_A10_ECC_TDERRA,
.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
.ecc_irq_handler = altr_edac_a10_ecc_irq,
- .inject_fops = &altr_edac_a10_device_inject2_fops,
+ .inject_fops = &altr_edac_a10_device_inject_fops,
};
#endif /* CONFIG_EDAC_ALTERA_ETHERNET */
@@ -1461,7 +1459,7 @@ static const struct edac_device_prv_data a10_usbecc_data = {
.ue_set_mask = ALTR_A10_ECC_TDERRA,
.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
.ecc_irq_handler = altr_edac_a10_ecc_irq,
- .inject_fops = &altr_edac_a10_device_inject2_fops,
+ .inject_fops = &altr_edac_a10_device_inject_fops,
};
#endif /* CONFIG_EDAC_ALTERA_USB */
@@ -1523,7 +1521,7 @@ static int altr_portb_setup(struct altr_edac_device_dev *device)
/* Create the PortB EDAC device */
edac_idx = edac_device_alloc_index();
dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, 1,
- ecc_name, 1, 0, NULL, 0, edac_idx);
+ ecc_name, 1, 0, edac_idx);
if (!dci) {
edac_printk(KERN_ERR, EDAC_DEVICE,
"%s: Unable to allocate PortB EDAC device\n",
@@ -1759,9 +1757,9 @@ altr_edac_a10_device_trig(struct file *file, const char __user *user_buf,
local_irq_save(flags);
if (trig_type == ALTR_UE_TRIGGER_CHAR)
- writel(priv->ue_set_mask, set_addr);
+ writew(priv->ue_set_mask, set_addr);
else
- writel(priv->ce_set_mask, set_addr);
+ writew(priv->ce_set_mask, set_addr);
/* Ensure the interrupt test bits are set */
wmb();
@@ -1791,7 +1789,7 @@ altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf,
local_irq_save(flags);
if (trig_type == ALTR_UE_TRIGGER_CHAR) {
- writel(priv->ue_set_mask, set_addr);
+ writew(priv->ue_set_mask, set_addr);
} else {
/* Setup read/write of 4 bytes */
writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST);
@@ -1930,8 +1928,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
edac_idx = edac_device_alloc_index();
dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name,
- 1, ecc_name, 1, 0, NULL, 0,
- edac_idx);
+ 1, ecc_name, 1, 0, edac_idx);
if (!dci) {
edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -2138,21 +2135,23 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
return PTR_ERR(edac->ecc_mgr_map);
}
+ /* Set irq mask for DDR SBE to avoid any pending irq before registration */
+ regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST,
+ (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0));
+
edac->irq_chip.name = pdev->dev.of_node->name;
edac->irq_chip.irq_mask = a10_eccmgr_irq_mask;
edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask;
- edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64,
- &a10_eccmgr_ic_ops, edac);
+ edac->domain = irq_domain_create_linear(dev_fwnode(&pdev->dev), 64, &a10_eccmgr_ic_ops,
+ edac);
if (!edac->domain) {
dev_err(&pdev->dev, "Error adding IRQ domain\n");
return -ENOMEM;
}
edac->sb_irq = platform_get_irq(pdev, 0);
- if (edac->sb_irq < 0) {
- dev_err(&pdev->dev, "No SBERR IRQ resource\n");
+ if (edac->sb_irq < 0)
return edac->sb_irq;
- }
irq_set_chained_handler_and_data(edac->sb_irq,
altr_edac_a10_irq_handler,
@@ -2184,10 +2183,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
}
#else
edac->db_irq = platform_get_irq(pdev, 1);
- if (edac->db_irq < 0) {
- dev_err(&pdev->dev, "No DBERR IRQ resource\n");
+ if (edac->db_irq < 0)
return edac->db_irq;
- }
+
irq_set_chained_handler_and_data(edac->db_irq,
altr_edac_a10_irq_handler, edac);
#endif
@@ -2226,6 +2224,5 @@ static struct platform_driver altr_edac_a10_driver = {
};
module_platform_driver(altr_edac_a10_driver);
-MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Thor Thayer");
MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 3727e72c8c2e..7248d24c4908 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -249,6 +249,8 @@ struct altr_sdram_mc_data {
#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94
#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1)
+#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16)
+#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17)
#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C
#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e3318e5575a3..2391f3469961 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/ras.h>
+#include <linux/string_choices.h>
#include "amd64_edac.h"
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
+#include <asm/amd/node.h>
static struct edac_pci_ctl_info *pci_ctl;
@@ -13,15 +16,12 @@ module_param(ecc_enable_override, int, 0644);
static struct msr __percpu *msrs;
-static struct amd64_family_type *fam_type;
-
-static inline u32 get_umc_reg(u32 reg)
+static inline u32 get_umc_reg(struct amd64_pvt *pvt, u32 reg)
{
- if (!fam_type->flags.zn_regs_v2)
+ if (!pvt->flags.zn_regs_v2)
return reg;
switch (reg) {
- case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5;
case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5;
case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5;
}
@@ -82,7 +82,7 @@ int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
amd64_warn("%s: error reading F%dx%03x.\n",
func, PCI_FUNC(pdev->devfn), offset);
- return err;
+ return pcibios_err_to_errno(err);
}
int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
@@ -95,7 +95,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
amd64_warn("%s: error writing to F%dx%03x.\n",
func, PCI_FUNC(pdev->devfn), offset);
- return err;
+ return pcibios_err_to_errno(err);
}
/*
@@ -182,21 +182,6 @@ static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct,
* other archs, we might not have access to the caches directly.
*/
-static inline void __f17h_set_scrubval(struct amd64_pvt *pvt, u32 scrubval)
-{
- /*
- * Fam17h supports scrub values between 0x5 and 0x14. Also, the values
- * are shifted down by 0x5, so scrubval 0x5 is written to the register
- * as 0x0, scrubval 0x6 as 0x1, etc.
- */
- if (scrubval >= 0x5 && scrubval <= 0x14) {
- scrubval -= 0x5;
- pci_write_bits32(pvt->F6, F17H_SCR_LIMIT_ADDR, scrubval, 0xF);
- pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 1, 0x1);
- } else {
- pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 0, 0x1);
- }
-}
/*
* Scan the scrub rate mapping table for a close or matching bandwidth value to
* issue. If requested is too big, then use last maximum value found.
@@ -229,9 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
scrubval = scrubrates[i].scrubval;
- if (pvt->umc) {
- __f17h_set_scrubval(pvt, scrubval);
- } else if (pvt->fam == 0x15 && pvt->model == 0x60) {
+ if (pvt->fam == 0x15 && pvt->model == 0x60) {
f15h_select_dct(pvt, 0);
pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
f15h_select_dct(pvt, 1);
@@ -271,16 +254,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
int i, retval = -EINVAL;
u32 scrubval = 0;
- if (pvt->umc) {
- amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
- if (scrubval & BIT(0)) {
- amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
- scrubval &= 0xF;
- scrubval += 0x5;
- } else {
- scrubval = 0;
- }
- } else if (pvt->fam == 0x15) {
+ if (pvt->fam == 0x15) {
/* Erratum #505 */
if (pvt->model < 0x10)
f15h_select_dct(pvt, 0);
@@ -463,7 +437,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
for (i = 0; i < pvt->csels[dct].m_cnt; i++)
#define for_each_umc(i) \
- for (i = 0; i < fam_type->max_mcs; i++)
+ for (i = 0; i < pvt->max_mcs; i++)
/*
* @input_addr is an InputAddr associated with the node given by mci. Return the
@@ -1003,321 +977,186 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
return csrow;
}
-/* Protect the PCI config register pairs used for DF indirect access. */
-static DEFINE_MUTEX(df_indirect_mutex);
-
/*
- * Data Fabric Indirect Access uses FICAA/FICAD.
- *
- * Fabric Indirect Configuration Access Address (FICAA): Constructed based
- * on the device's Instance Id and the PCI function and register offset of
- * the desired register.
+ * See AMD PPR DF::LclNodeTypeMap
*
- * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
- * and FICAD HI registers but so far we only need the LO register.
+ * This register gives information for nodes of the same type within a system.
*
- * Use Instance Id 0xFF to indicate a broadcast read.
+ * Reading this register from a GPU node will tell how many GPU nodes are in the
+ * system and what the lowest AMD Node ID value is for the GPU nodes. Use this
+ * info to fixup the Linux logical "Node ID" value set in the AMD NB code and EDAC.
*/
-#define DF_BROADCAST 0xFF
-static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
-{
- struct pci_dev *F4;
- u32 ficaa;
- int err = -ENODEV;
-
- if (node >= amd_nb_num())
- goto out;
-
- F4 = node_to_amd_nb(node)->link;
- if (!F4)
- goto out;
-
- ficaa = (instance_id == DF_BROADCAST) ? 0 : 1;
- ficaa |= reg & 0x3FC;
- ficaa |= (func & 0x7) << 11;
- ficaa |= instance_id << 16;
-
- mutex_lock(&df_indirect_mutex);
-
- err = pci_write_config_dword(F4, 0x5C, ficaa);
- if (err) {
- pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
- goto out_unlock;
- }
+static struct local_node_map {
+ u16 node_count;
+ u16 base_node_id;
+} gpu_node_map;
- err = pci_read_config_dword(F4, 0x98, lo);
- if (err)
- pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
-
-out_unlock:
- mutex_unlock(&df_indirect_mutex);
-
-out:
- return err;
-}
+#define PCI_DEVICE_ID_AMD_MI200_DF_F1 0x14d1
+#define REG_LOCAL_NODE_TYPE_MAP 0x144
-static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
-{
- return __df_indirect_read(node, func, reg, instance_id, lo);
-}
+/* Local Node Type Map (LNTM) fields */
+#define LNTM_NODE_COUNT GENMASK(27, 16)
+#define LNTM_BASE_NODE_ID GENMASK(11, 0)
-static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo)
+static int gpu_get_node_map(struct amd64_pvt *pvt)
{
- return __df_indirect_read(node, func, reg, DF_BROADCAST, lo);
-}
-
-struct addr_ctx {
- u64 ret_addr;
+ struct pci_dev *pdev;
+ int ret;
u32 tmp;
- u16 nid;
- u8 inst_id;
-};
-
-static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
-{
- u64 dram_base_addr, dram_limit_addr, dram_hole_base;
-
- u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
- u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
- u8 intlv_addr_sel, intlv_addr_bit;
- u8 num_intlv_bits, hashed_bit;
- u8 lgcy_mmio_hole_en, base = 0;
- u8 cs_mask, cs_id = 0;
- bool hash_enabled = false;
-
- struct addr_ctx ctx;
-
- memset(&ctx, 0, sizeof(ctx));
-
- /* Start from the normalized address */
- ctx.ret_addr = norm_addr;
-
- ctx.nid = nid;
- ctx.inst_id = umc;
-
- /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
- if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp))
- goto out_err;
-
- /* Remove HiAddrOffset from normalized address, if enabled: */
- if (ctx.tmp & BIT(0)) {
- u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8;
-
- if (norm_addr >= hi_addr_offset) {
- ctx.ret_addr -= hi_addr_offset;
- base = 1;
- }
- }
- /* Read D18F0x110 (DramBaseAddress). */
- if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp))
- goto out_err;
-
- /* Check if address range is valid. */
- if (!(ctx.tmp & BIT(0))) {
- pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
- __func__, ctx.tmp);
- goto out_err;
- }
-
- lgcy_mmio_hole_en = ctx.tmp & BIT(1);
- intlv_num_chan = (ctx.tmp >> 4) & 0xF;
- intlv_addr_sel = (ctx.tmp >> 8) & 0x7;
- dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16;
-
- /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
- if (intlv_addr_sel > 3) {
- pr_err("%s: Invalid interleave address select %d.\n",
- __func__, intlv_addr_sel);
- goto out_err;
- }
-
- /* Read D18F0x114 (DramLimitAddress). */
- if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp))
- goto out_err;
-
- intlv_num_sockets = (ctx.tmp >> 8) & 0x1;
- intlv_num_dies = (ctx.tmp >> 10) & 0x3;
- dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
-
- intlv_addr_bit = intlv_addr_sel + 8;
-
- /* Re-use intlv_num_chan by setting it equal to log2(#channels) */
- switch (intlv_num_chan) {
- case 0: intlv_num_chan = 0; break;
- case 1: intlv_num_chan = 1; break;
- case 3: intlv_num_chan = 2; break;
- case 5: intlv_num_chan = 3; break;
- case 7: intlv_num_chan = 4; break;
-
- case 8: intlv_num_chan = 1;
- hash_enabled = true;
- break;
- default:
- pr_err("%s: Invalid number of interleaved channels %d.\n",
- __func__, intlv_num_chan);
- goto out_err;
- }
+ /*
+ * Mapping of nodes from hardware-provided AMD Node ID to a
+ * Linux logical one is applicable for MI200 models. Therefore,
+ * return early for other heterogeneous systems.
+ */
+ if (pvt->F3->device != PCI_DEVICE_ID_AMD_MI200_DF_F3)
+ return 0;
- num_intlv_bits = intlv_num_chan;
+ /*
+ * Node ID 0 is reserved for CPUs. Therefore, a non-zero Node ID
+ * means the values have been already cached.
+ */
+ if (gpu_node_map.base_node_id)
+ return 0;
- if (intlv_num_dies > 2) {
- pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
- __func__, intlv_num_dies);
- goto out_err;
+ pdev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F1, NULL);
+ if (!pdev) {
+ ret = -ENODEV;
+ goto out;
}
- num_intlv_bits += intlv_num_dies;
-
- /* Add a bit if sockets are interleaved. */
- num_intlv_bits += intlv_num_sockets;
-
- /* Assert num_intlv_bits <= 4 */
- if (num_intlv_bits > 4) {
- pr_err("%s: Invalid interleave bits %d.\n",
- __func__, num_intlv_bits);
- goto out_err;
+ ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
+ if (ret) {
+ ret = pcibios_err_to_errno(ret);
+ goto out;
}
- if (num_intlv_bits > 0) {
- u64 temp_addr_x, temp_addr_i, temp_addr_y;
- u8 die_id_bit, sock_id_bit, cs_fabric_id;
+ gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp);
+ gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp);
- /*
- * Read FabricBlockInstanceInformation3_CS[BlockFabricID].
- * This is the fabric id for this coherent slave. Use
- * umc/channel# as instance id of the coherent slave
- * for FICAA.
- */
- if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp))
- goto out_err;
-
- cs_fabric_id = (ctx.tmp >> 8) & 0xFF;
- die_id_bit = 0;
-
- /* If interleaved over more than 1 channel: */
- if (intlv_num_chan) {
- die_id_bit = intlv_num_chan;
- cs_mask = (1 << die_id_bit) - 1;
- cs_id = cs_fabric_id & cs_mask;
- }
+out:
+ pci_dev_put(pdev);
+ return ret;
+}
- sock_id_bit = die_id_bit;
+static int fixup_node_id(int node_id, struct mce *m)
+{
+ /* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */
+ u8 nid = (m->ipid >> 44) & 0xF;
- /* Read D18F1x208 (SystemFabricIdMask). */
- if (intlv_num_dies || intlv_num_sockets)
- if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp))
- goto out_err;
+ if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2)
+ return node_id;
- /* If interleaved over more than 1 die. */
- if (intlv_num_dies) {
- sock_id_bit = die_id_bit + intlv_num_dies;
- die_id_shift = (ctx.tmp >> 24) & 0xF;
- die_id_mask = (ctx.tmp >> 8) & 0xFF;
+ /* Nodes below the GPU base node are CPU nodes and don't need a fixup. */
+ if (nid < gpu_node_map.base_node_id)
+ return node_id;
- cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
- }
+ /* Convert the hardware-provided AMD Node ID to a Linux logical one. */
+ return nid - gpu_node_map.base_node_id + 1;
+}
- /* If interleaved over more than 1 socket. */
- if (intlv_num_sockets) {
- socket_id_shift = (ctx.tmp >> 28) & 0xF;
- socket_id_mask = (ctx.tmp >> 16) & 0xFF;
+static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
- cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
- }
+/*
+ * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
+ * are ECC capable.
+ */
+static unsigned long dct_determine_edac_cap(struct amd64_pvt *pvt)
+{
+ unsigned long edac_cap = EDAC_FLAG_NONE;
+ u8 bit;
- /*
- * The pre-interleaved address consists of XXXXXXIIIYYYYY
- * where III is the ID for this CS, and XXXXXXYYYYY are the
- * address bits from the post-interleaved address.
- * "num_intlv_bits" has been calculated to tell us how many "I"
- * bits there are. "intlv_addr_bit" tells us how many "Y" bits
- * there are (where "I" starts).
- */
- temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0);
- temp_addr_i = (cs_id << intlv_addr_bit);
- temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
- ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
- }
+ bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
+ ? 19
+ : 17;
- /* Add dram base address */
- ctx.ret_addr += dram_base_addr;
+ if (pvt->dclr0 & BIT(bit))
+ edac_cap = EDAC_FLAG_SECDED;
- /* If legacy MMIO hole enabled */
- if (lgcy_mmio_hole_en) {
- if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp))
- goto out_err;
+ return edac_cap;
+}
- dram_hole_base = ctx.tmp & GENMASK(31, 24);
- if (ctx.ret_addr >= dram_hole_base)
- ctx.ret_addr += (BIT_ULL(32) - dram_hole_base);
- }
+static unsigned long umc_determine_edac_cap(struct amd64_pvt *pvt)
+{
+ u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
+ unsigned long edac_cap = EDAC_FLAG_NONE;
- if (hash_enabled) {
- /* Save some parentheses and grab ls-bit at the end. */
- hashed_bit = (ctx.ret_addr >> 12) ^
- (ctx.ret_addr >> 18) ^
- (ctx.ret_addr >> 21) ^
- (ctx.ret_addr >> 30) ^
- cs_id;
+ for_each_umc(i) {
+ if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
+ continue;
- hashed_bit &= BIT(0);
+ umc_en_mask |= BIT(i);
- if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0)))
- ctx.ret_addr ^= BIT(intlv_addr_bit);
+ /* UMC Configuration bit 12 (DimmEccEn) */
+ if (pvt->umc[i].umc_cfg & BIT(12))
+ dimm_ecc_en_mask |= BIT(i);
}
- /* Is calculated system address is above DRAM limit address? */
- if (ctx.ret_addr > dram_limit_addr)
- goto out_err;
-
- *sys_addr = ctx.ret_addr;
- return 0;
+ if (umc_en_mask == dimm_ecc_en_mask)
+ edac_cap = EDAC_FLAG_SECDED;
-out_err:
- return -EINVAL;
+ return edac_cap;
}
-static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
-
/*
- * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
- * are ECC capable.
+ * debug routine to display the memory sizes of all logical DIMMs and its
+ * CSROWs
*/
-static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
+static void dct_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
{
- unsigned long edac_cap = EDAC_FLAG_NONE;
- u8 bit;
+ u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
+ u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
+ int dimm, size0, size1;
- if (pvt->umc) {
- u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
+ if (pvt->fam == 0xf) {
+ /* K8 families < revF not supported yet */
+ if (pvt->ext_model < K8_REV_F)
+ return;
- for_each_umc(i) {
- if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
- continue;
+ WARN_ON(ctrl != 0);
+ }
- umc_en_mask |= BIT(i);
+ if (pvt->fam == 0x10) {
+ dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1
+ : pvt->dbam0;
+ dcsb = (ctrl && !dct_ganging_enabled(pvt)) ?
+ pvt->csels[1].csbases :
+ pvt->csels[0].csbases;
+ } else if (ctrl) {
+ dbam = pvt->dbam0;
+ dcsb = pvt->csels[1].csbases;
+ }
+ edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
+ ctrl, dbam);
- /* UMC Configuration bit 12 (DimmEccEn) */
- if (pvt->umc[i].umc_cfg & BIT(12))
- dimm_ecc_en_mask |= BIT(i);
- }
+ edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
- if (umc_en_mask == dimm_ecc_en_mask)
- edac_cap = EDAC_FLAG_SECDED;
- } else {
- bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
- ? 19
- : 17;
+ /* Dump memory sizes for DIMM and its CSROWs */
+ for (dimm = 0; dimm < 4; dimm++) {
+ size0 = 0;
+ if (dcsb[dimm * 2] & DCSB_CS_ENABLE)
+ /*
+ * For F15m60h, we need multiplier for LRDIMM cs_size
+ * calculation. We pass dimm value to the dbam_to_cs
+ * mapper so we can find the multiplier from the
+ * corresponding DCSM.
+ */
+ size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
+ DBAM_DIMM(dimm, dbam),
+ dimm);
- if (pvt->dclr0 & BIT(bit))
- edac_cap = EDAC_FLAG_SECDED;
- }
+ size1 = 0;
+ if (dcsb[dimm * 2 + 1] & DCSB_CS_ENABLE)
+ size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
+ DBAM_DIMM(dimm, dbam),
+ dimm);
- return edac_cap;
+ amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
+ dimm * 2, size0,
+ dimm * 2 + 1, size1);
+ }
}
-static void debug_display_dimm_sizes(struct amd64_pvt *, u8);
static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
{
@@ -1333,22 +1172,21 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
}
- edac_dbg(1, "All DIMMs support ECC:%s\n",
- (dclr & BIT(19)) ? "yes" : "no");
+ edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19)));
edac_dbg(1, " PAR/ERR parity: %s\n",
- (dclr & BIT(8)) ? "enabled" : "disabled");
+ str_enabled_disabled(dclr & BIT(8)));
if (pvt->fam == 0x10)
edac_dbg(1, " DCT 128bit mode width: %s\n",
(dclr & BIT(11)) ? "128b" : "64b");
edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
- (dclr & BIT(12)) ? "yes" : "no",
- (dclr & BIT(13)) ? "yes" : "no",
- (dclr & BIT(14)) ? "yes" : "no",
- (dclr & BIT(15)) ? "yes" : "no");
+ str_yes_no(dclr & BIT(12)),
+ str_yes_no(dclr & BIT(13)),
+ str_yes_no(dclr & BIT(14)),
+ str_yes_no(dclr & BIT(15)));
}
#define CS_EVEN_PRIMARY BIT(0)
@@ -1360,7 +1198,7 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY)
#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY)
-static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
+static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
{
u8 base, count = 0;
int cs_mode = 0;
@@ -1371,7 +1209,9 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_PRIMARY;
- /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_SECONDARY;
+
if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_SECONDARY;
@@ -1392,7 +1232,106 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
-static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
+static int calculate_cs_size(u32 mask, unsigned int cs_mode)
+{
+ int msb, weight, num_zero_bits;
+ u32 deinterleaved_mask;
+
+ if (!mask)
+ return 0;
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ *
+ * In the special 3 Rank interleaving case, a single bit is flipped
+ * without swapping with the most significant bit. This can be handled
+ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(mask) - 1;
+ weight = hweight_long(mask);
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
+
+ return (deinterleaved_mask >> 2) + 1;
+}
+
+static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec,
+ unsigned int cs_mode, int csrow_nr, int dimm)
+{
+ int size;
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = calculate_cs_size(addr_mask, cs_mode);
+
+ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec);
+ size += calculate_cs_size(addr_mask_sec, cs_mode);
+
+ /* Return size in MBs. */
+ return size >> 10;
+}
+
+static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ unsigned int cs_mode, int csrow_nr)
+{
+ u32 addr_mask = 0, addr_mask_sec = 0;
+ int cs_mask_nr = csrow_nr;
+ int dimm, size = 0;
+
+ /* No Chip Selects are enabled. */
+ if (!cs_mode)
+ return size;
+
+ /* Requested size of an even CS but none are enabled. */
+ if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1))
+ return size;
+
+ /* Requested size of an odd CS but none are enabled. */
+ if (!(cs_mode & CS_ODD) && (csrow_nr & 1))
+ return size;
+
+ /*
+ * Family 17h introduced systems with one mask per DIMM,
+ * and two Chip Selects per DIMM.
+ *
+ * CS0 and CS1 -> MASK0 / DIMM0
+ * CS2 and CS3 -> MASK1 / DIMM1
+ *
+ * Family 19h Model 10h introduced systems with one mask per Chip Select,
+ * and two Chip Selects per DIMM.
+ *
+ * CS0 -> MASK0 -> DIMM0
+ * CS1 -> MASK1 -> DIMM0
+ * CS2 -> MASK2 -> DIMM1
+ * CS3 -> MASK3 -> DIMM1
+ *
+ * Keep the mask number equal to the Chip Select number for newer systems,
+ * and shift the mask number for older systems.
+ */
+ dimm = csrow_nr >> 1;
+
+ if (!pvt->flags.zn_regs_v2)
+ cs_mask_nr >>= 1;
+
+ if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY))
+ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
+
+ if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY))
+ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
+
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm);
+}
+
+static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
{
int dimm, size0, size1, cs0, cs1, cs_mode;
@@ -1402,10 +1341,10 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
cs0 = dimm * 2;
cs1 = dimm * 2 + 1;
- cs_mode = f17_get_cs_mode(dimm, ctrl, pvt);
+ cs_mode = umc_get_cs_mode(dimm, ctrl, pvt);
- size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0);
- size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1);
+ size0 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs0);
+ size1 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs1);
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
cs0, size0,
@@ -1413,63 +1352,44 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
}
}
-static void __dump_misc_regs_df(struct amd64_pvt *pvt)
+static void umc_dump_misc_regs(struct amd64_pvt *pvt)
{
struct amd64_umc *umc;
- u32 i, tmp, umc_base;
+ u32 i;
for_each_umc(i) {
- umc_base = get_umc_base(i);
umc = &pvt->umc[i];
edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg);
edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
-
- amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp);
- edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp);
-
- amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_UMC_CAP, &tmp);
- edac_dbg(1, "UMC%d UMC cap: 0x%x\n", i, tmp);
edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi);
edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n",
- i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no",
- (umc->umc_cap_hi & BIT(31)) ? "yes" : "no");
+ i, str_yes_no(umc->umc_cap_hi & BIT(30)),
+ str_yes_no(umc->umc_cap_hi & BIT(31)));
edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n",
- i, (umc->umc_cfg & BIT(12)) ? "yes" : "no");
+ i, str_yes_no(umc->umc_cfg & BIT(12)));
edac_dbg(1, "UMC%d x4 DIMMs present: %s\n",
- i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no");
+ i, str_yes_no(umc->dimm_cfg & BIT(6)));
edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
- i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
-
- if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) {
- amd_smn_read(pvt->mc_node_id,
- umc_base + get_umc_reg(UMCCH_ADDR_CFG),
- &tmp);
- edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
- i, 1 << ((tmp >> 4) & 0x3));
- }
+ i, str_yes_no(umc->dimm_cfg & BIT(7)));
- debug_display_dimm_sizes_df(pvt, i);
+ umc_debug_display_dimm_sizes(pvt, i);
}
-
- edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n",
- pvt->dhar, dhar_base(pvt));
}
-/* Display and decode various NB registers for debug purposes. */
-static void __dump_misc_regs(struct amd64_pvt *pvt)
+static void dct_dump_misc_regs(struct amd64_pvt *pvt)
{
edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
edac_dbg(1, " NB two channel DRAM capable: %s\n",
- (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
+ str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL));
edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n",
- (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
- (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
+ str_yes_no(pvt->nbcap & NBCAP_SECDED),
+ str_yes_no(pvt->nbcap & NBCAP_CHIPKILL));
debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
@@ -1480,28 +1400,19 @@ static void __dump_misc_regs(struct amd64_pvt *pvt)
(pvt->fam == 0xf) ? k8_dhar_offset(pvt)
: f10_dhar_offset(pvt));
- debug_display_dimm_sizes(pvt, 0);
+ dct_debug_display_dimm_sizes(pvt, 0);
/* everything below this point is Fam10h and above */
if (pvt->fam == 0xf)
return;
- debug_display_dimm_sizes(pvt, 1);
+ dct_debug_display_dimm_sizes(pvt, 1);
/* Only if NOT ganged does dclr1 have valid info */
if (!dct_ganging_enabled(pvt))
debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
-}
-
-/* Display and decode various NB registers for debug purposes. */
-static void dump_misc_regs(struct amd64_pvt *pvt)
-{
- if (pvt->umc)
- __dump_misc_regs_df(pvt);
- else
- __dump_misc_regs(pvt);
- edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
+ edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt)));
amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
}
@@ -1509,7 +1420,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
/*
* See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
*/
-static void prep_chip_selects(struct amd64_pvt *pvt)
+static void dct_prep_chip_selects(struct amd64_pvt *pvt)
{
if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
@@ -1517,21 +1428,23 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
- } else if (pvt->fam >= 0x17) {
- int umc;
-
- for_each_umc(umc) {
- pvt->csels[umc].b_cnt = 4;
- pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2;
- }
-
} else {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
}
}
-static void read_umc_base_mask(struct amd64_pvt *pvt)
+static void umc_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 4;
+ pvt->csels[umc].m_cnt = pvt->flags.zn_regs_v2 ? 4 : 2;
+ }
+}
+
+static void umc_read_base_mask(struct amd64_pvt *pvt)
{
u32 umc_base_reg, umc_base_reg_sec;
u32 umc_mask_reg, umc_mask_reg_sec;
@@ -1540,6 +1453,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
u32 *base, *base_sec;
u32 *mask, *mask_sec;
int cs, umc;
+ u32 tmp;
for_each_umc(umc) {
umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR;
@@ -1552,17 +1466,21 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
base_reg = umc_base_reg + (cs * 4);
base_reg_sec = umc_base_reg_sec + (cs * 4);
- if (!amd_smn_read(pvt->mc_node_id, base_reg, base))
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, &tmp)) {
+ *base = tmp;
edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
umc, cs, *base, base_reg);
+ }
- if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec))
+ if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, &tmp)) {
+ *base_sec = tmp;
edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n",
umc, cs, *base_sec, base_reg_sec);
+ }
}
umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
- umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC);
+ umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(pvt, UMCCH_ADDR_MASK_SEC);
for_each_chip_select_mask(cs, umc, pvt) {
mask = &pvt->csels[umc].csmasks[cs];
@@ -1571,13 +1489,17 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
mask_reg = umc_mask_reg + (cs * 4);
mask_reg_sec = umc_mask_reg_sec + (cs * 4);
- if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask))
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, &tmp)) {
+ *mask = tmp;
edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
umc, cs, *mask, mask_reg);
+ }
- if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec))
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, &tmp)) {
+ *mask_sec = tmp;
edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n",
umc, cs, *mask_sec, mask_reg_sec);
+ }
}
}
}
@@ -1585,15 +1507,10 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
/*
* Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
*/
-static void read_dct_base_mask(struct amd64_pvt *pvt)
+static void dct_read_base_mask(struct amd64_pvt *pvt)
{
int cs;
- prep_chip_selects(pvt);
-
- if (pvt->umc)
- return read_umc_base_mask(pvt);
-
for_each_chip_select(cs, 0, pvt) {
int reg0 = DCSB0 + (cs * 4);
int reg1 = DCSB1 + (cs * 4);
@@ -1633,7 +1550,7 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
}
}
-static void determine_memory_type_df(struct amd64_pvt *pvt)
+static void umc_determine_memory_type(struct amd64_pvt *pvt)
{
struct amd64_umc *umc;
u32 i;
@@ -1650,7 +1567,7 @@ static void determine_memory_type_df(struct amd64_pvt *pvt)
* Check if the system supports the "DDR Type" field in UMC Config
* and has DDR5 DIMMs in use.
*/
- if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
+ if (pvt->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
if (umc->dimm_cfg & BIT(5))
umc->dram_type = MEM_LRDDR5;
else if (umc->dimm_cfg & BIT(4))
@@ -1670,13 +1587,10 @@ static void determine_memory_type_df(struct amd64_pvt *pvt)
}
}
-static void determine_memory_type(struct amd64_pvt *pvt)
+static void dct_determine_memory_type(struct amd64_pvt *pvt)
{
u32 dram_ctrl, dcsm;
- if (pvt->umc)
- return determine_memory_type_df(pvt);
-
switch (pvt->fam) {
case 0xf:
if (pvt->ext_model >= K8_REV_F)
@@ -1726,34 +1640,18 @@ static void determine_memory_type(struct amd64_pvt *pvt)
WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
pvt->dram_type = MEM_EMPTY;
}
+
+ edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
return;
ddr3:
pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
}
-/* Get the number of DCT channels the memory controller is using. */
-static int k8_early_channel_count(struct amd64_pvt *pvt)
-{
- int flag;
-
- if (pvt->ext_model >= K8_REV_F)
- /* RevF (NPT) and later */
- flag = pvt->dclr0 & WIDTH_128;
- else
- /* RevE and earlier */
- flag = pvt->dclr0 & REVE_WIDTH_128;
-
- /* not used */
- pvt->dclr1 = 0;
-
- return (flag) ? 2 : 1;
-}
-
/* On F10h and later ErrAddr is MC4_ADDR[47:1] */
static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
{
- u16 mce_nid = topology_die_id(m->extcpu);
+ u16 mce_nid = topology_amd_node_id(m->extcpu);
struct mem_ctl_info *mci;
u8 start_bit = 1;
u8 end_bit = 47;
@@ -2001,69 +1899,6 @@ static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
}
}
-/*
- * Get the number of DCT channels in use.
- *
- * Return:
- * number of Memory Channels in operation
- * Pass back:
- * contents of the DCL0_LOW register
- */
-static int f1x_early_channel_count(struct amd64_pvt *pvt)
-{
- int i, j, channels = 0;
-
- /* On F10h, if we are in 128 bit mode, then we are using 2 channels */
- if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128))
- return 2;
-
- /*
- * Need to check if in unganged mode: In such, there are 2 channels,
- * but they are not in 128 bit mode and thus the above 'dclr0' status
- * bit will be OFF.
- *
- * Need to check DCT0[0] and DCT1[0] to see if only one of them has
- * their CSEnable bit on. If so, then SINGLE DIMM case.
- */
- edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
-
- /*
- * Check DRAM Bank Address Mapping values for each DIMM to see if there
- * is more than just one DIMM present in unganged mode. Need to check
- * both controllers since DIMMs can be placed in either one.
- */
- for (i = 0; i < 2; i++) {
- u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
-
- for (j = 0; j < 4; j++) {
- if (DBAM_DIMM(j, dbam) > 0) {
- channels++;
- break;
- }
- }
- }
-
- if (channels > 2)
- channels = 2;
-
- amd64_info("MCT channel count: %d\n", channels);
-
- return channels;
-}
-
-static int f17_early_channel_count(struct amd64_pvt *pvt)
-{
- int i, channels = 0;
-
- /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
- for_each_umc(i)
- channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
-
- amd64_info("MCT channel count: %d\n", channels);
-
- return channels;
-}
-
static int ddr3_cs_size(unsigned i, bool dct_width)
{
unsigned shift = 0;
@@ -2191,84 +2026,6 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
return ddr3_cs_size(cs_mode, false);
}
-static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
- unsigned int cs_mode, int csrow_nr)
-{
- u32 addr_mask_orig, addr_mask_deinterleaved;
- u32 msb, weight, num_zero_bits;
- int cs_mask_nr = csrow_nr;
- int dimm, size = 0;
-
- /* No Chip Selects are enabled. */
- if (!cs_mode)
- return size;
-
- /* Requested size of an even CS but none are enabled. */
- if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1))
- return size;
-
- /* Requested size of an odd CS but none are enabled. */
- if (!(cs_mode & CS_ODD) && (csrow_nr & 1))
- return size;
-
- /*
- * Family 17h introduced systems with one mask per DIMM,
- * and two Chip Selects per DIMM.
- *
- * CS0 and CS1 -> MASK0 / DIMM0
- * CS2 and CS3 -> MASK1 / DIMM1
- *
- * Family 19h Model 10h introduced systems with one mask per Chip Select,
- * and two Chip Selects per DIMM.
- *
- * CS0 -> MASK0 -> DIMM0
- * CS1 -> MASK1 -> DIMM0
- * CS2 -> MASK2 -> DIMM1
- * CS3 -> MASK3 -> DIMM1
- *
- * Keep the mask number equal to the Chip Select number for newer systems,
- * and shift the mask number for older systems.
- */
- dimm = csrow_nr >> 1;
-
- if (!fam_type->flags.zn_regs_v2)
- cs_mask_nr >>= 1;
-
- /* Asymmetric dual-rank DIMM support. */
- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- else
- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
-
- /*
- * The number of zero bits in the mask is equal to the number of bits
- * in a full mask minus the number of bits in the current mask.
- *
- * The MSB is the number of bits in the full mask because BIT[0] is
- * always 0.
- *
- * In the special 3 Rank interleaving case, a single bit is flipped
- * without swapping with the most significant bit. This can be handled
- * by keeping the MSB where it is and ignoring the single zero bit.
- */
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
-
- /* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
-
- edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
-
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
-
- /* Return size in MBs. */
- return size >> 10;
-}
-
static void read_dram_ctl_register(struct amd64_pvt *pvt)
{
@@ -2284,15 +2041,15 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt)
if (!dct_ganging_enabled(pvt))
edac_dbg(0, " Address range split per DCT: %s\n",
- (dct_high_range_enabled(pvt) ? "yes" : "no"));
+ str_yes_no(dct_high_range_enabled(pvt)));
edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
- (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
- (dct_memory_cleared(pvt) ? "yes" : "no"));
+ str_enabled_disabled(dct_data_intlv_enabled(pvt)),
+ str_yes_no(dct_memory_cleared(pvt)));
edac_dbg(0, " channel interleave: %s, "
"interleave bits selector: 0x%x\n",
- (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
+ str_enabled_disabled(dct_interleave_enabled(pvt)),
dct_sel_interleave_addr(pvt));
}
@@ -2792,227 +2549,6 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
}
/*
- * debug routine to display the memory sizes of all logical DIMMs and its
- * CSROWs
- */
-static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
-{
- int dimm, size0, size1;
- u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
- u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
-
- if (pvt->fam == 0xf) {
- /* K8 families < revF not supported yet */
- if (pvt->ext_model < K8_REV_F)
- return;
- else
- WARN_ON(ctrl != 0);
- }
-
- if (pvt->fam == 0x10) {
- dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1
- : pvt->dbam0;
- dcsb = (ctrl && !dct_ganging_enabled(pvt)) ?
- pvt->csels[1].csbases :
- pvt->csels[0].csbases;
- } else if (ctrl) {
- dbam = pvt->dbam0;
- dcsb = pvt->csels[1].csbases;
- }
- edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
- ctrl, dbam);
-
- edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
-
- /* Dump memory sizes for DIMM and its CSROWs */
- for (dimm = 0; dimm < 4; dimm++) {
-
- size0 = 0;
- if (dcsb[dimm*2] & DCSB_CS_ENABLE)
- /*
- * For F15m60h, we need multiplier for LRDIMM cs_size
- * calculation. We pass dimm value to the dbam_to_cs
- * mapper so we can find the multiplier from the
- * corresponding DCSM.
- */
- size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
- DBAM_DIMM(dimm, dbam),
- dimm);
-
- size1 = 0;
- if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
- size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
- DBAM_DIMM(dimm, dbam),
- dimm);
-
- amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
- dimm * 2, size0,
- dimm * 2 + 1, size1);
- }
-}
-
-static struct amd64_family_type family_types[] = {
- [K8_CPUS] = {
- .ctl_name = "K8",
- .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
- .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = k8_early_channel_count,
- .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
- .dbam_to_cs = k8_dbam_to_chip_select,
- }
- },
- [F10_CPUS] = {
- .ctl_name = "F10h",
- .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
- .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f10_dbam_to_chip_select,
- }
- },
- [F15_CPUS] = {
- .ctl_name = "F15h",
- .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
- .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f15_dbam_to_chip_select,
- }
- },
- [F15_M30H_CPUS] = {
- .ctl_name = "F15h_M30h",
- .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
- .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
- },
- [F15_M60H_CPUS] = {
- .ctl_name = "F15h_M60h",
- .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
- .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f15_m60h_dbam_to_chip_select,
- }
- },
- [F16_CPUS] = {
- .ctl_name = "F16h",
- .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
- .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
- },
- [F16_M30H_CPUS] = {
- .ctl_name = "F16h_M30h",
- .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
- .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
- },
- [F17_CPUS] = {
- .ctl_name = "F17h",
- .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
- [F17_M10H_CPUS] = {
- .ctl_name = "F17h_M10h",
- .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
- [F17_M30H_CPUS] = {
- .ctl_name = "F17h_M30h",
- .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
- .max_mcs = 8,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
- [F17_M60H_CPUS] = {
- .ctl_name = "F17h_M60h",
- .f0_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F6,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
- [F17_M70H_CPUS] = {
- .ctl_name = "F17h_M70h",
- .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
- [F19_CPUS] = {
- .ctl_name = "F19h",
- .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
- .max_mcs = 8,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
- [F19_M10H_CPUS] = {
- .ctl_name = "F19h_M10h",
- .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6,
- .max_mcs = 12,
- .flags.zn_regs_v2 = 1,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
- [F19_M50H_CPUS] = {
- .ctl_name = "F19h_M50h",
- .f0_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F0,
- .f6_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F6,
- .max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
- },
-};
-
-/*
* These are tables of eigenvectors (one per line) which can be used for the
* construction of the syndrome tables. The modified syndrome search algorithm
* uses those to find the symbol in error and thus the DIMM.
@@ -3259,19 +2795,26 @@ static inline void decode_bus_error(int node_id, struct mce *m)
* Currently, we can derive the channel number by looking at the 6th nibble in
* the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
* number.
+ *
+ * For DRAM ECC errors, the Chip Select number is given in bits [2:0] of
+ * the MCA_SYND[ErrorInformation] field.
*/
-static int find_umc_channel(struct mce *m)
+static void umc_get_err_info(struct mce *m, struct err_info *err)
{
- return (m->ipid & GENMASK(31, 0)) >> 20;
+ err->channel = (m->ipid & GENMASK(31, 0)) >> 20;
+ err->csrow = m->synd & 0x7;
}
static void decode_umc_error(int node_id, struct mce *m)
{
u8 ecc_type = (m->status >> 45) & 0x3;
struct mem_ctl_info *mci;
+ unsigned long sys_addr;
struct amd64_pvt *pvt;
+ struct atl_err a_err;
struct err_info err;
- u64 sys_addr;
+
+ node_id = fixup_node_id(node_id, m);
mci = edac_mc_find(node_id);
if (!mci)
@@ -3284,8 +2827,6 @@ static void decode_umc_error(int node_id, struct mce *m)
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;
- err.channel = find_umc_channel(m);
-
if (!(m->status & MCI_STATUS_SYNDV)) {
err.err_code = ERR_SYND;
goto log_error;
@@ -3300,9 +2841,14 @@ static void decode_umc_error(int node_id, struct mce *m)
err.err_code = ERR_CHANNEL;
}
- err.csrow = m->synd & 0x7;
+ pvt->ops->get_err_info(m, &err);
+
+ a_err.addr = m->addr;
+ a_err.ipid = m->ipid;
+ a_err.cpu = m->extcpu;
- if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
+ sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
+ if (IS_ERR_VALUE(sys_addr)) {
err.err_code = ERR_NORM_ADDR;
goto log_error;
}
@@ -3316,37 +2862,10 @@ log_error:
/*
* Use pvt->F3 which contains the F3 CPU PCI device to get the related
* F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
- * Reserve F0 and F6 on systems with a UMC.
*/
static int
reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2)
{
- if (pvt->umc) {
- pvt->F0 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
- if (!pvt->F0) {
- edac_dbg(1, "F0 not found, device 0x%x\n", pci_id1);
- return -ENODEV;
- }
-
- pvt->F6 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3);
- if (!pvt->F6) {
- pci_dev_put(pvt->F0);
- pvt->F0 = NULL;
-
- edac_dbg(1, "F6 not found: device 0x%x\n", pci_id2);
- return -ENODEV;
- }
-
- if (!pci_ctl_dev)
- pci_ctl_dev = &pvt->F0->dev;
-
- edac_dbg(1, "F0: %s\n", pci_name(pvt->F0));
- edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
- edac_dbg(1, "F6: %s\n", pci_name(pvt->F6));
-
- return 0;
- }
-
/* Reserve the ADDRESS MAP Device */
pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
if (!pvt->F1) {
@@ -3374,37 +2893,11 @@ reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2)
return 0;
}
-static void free_mc_sibling_devs(struct amd64_pvt *pvt)
-{
- if (pvt->umc) {
- pci_dev_put(pvt->F0);
- pci_dev_put(pvt->F6);
- } else {
- pci_dev_put(pvt->F1);
- pci_dev_put(pvt->F2);
- }
-}
-
static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
{
pvt->ecc_sym_sz = 4;
- if (pvt->umc) {
- u8 i;
-
- for_each_umc(i) {
- /* Check enabled channels only: */
- if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
- if (pvt->umc[i].ecc_ctrl & BIT(9)) {
- pvt->ecc_sym_sz = 16;
- return;
- } else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
- pvt->ecc_sym_sz = 8;
- return;
- }
- }
- }
- } else if (pvt->fam >= 0x10) {
+ if (pvt->fam >= 0x10) {
u32 tmp;
amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
@@ -3421,11 +2914,11 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
/*
* Retrieve the hardware registers of the memory controller.
*/
-static void __read_mc_regs_df(struct amd64_pvt *pvt)
+static void umc_read_mc_regs(struct amd64_pvt *pvt)
{
u8 nid = pvt->mc_node_id;
struct amd64_umc *umc;
- u32 i, umc_base;
+ u32 i, tmp, umc_base;
/* Read registers from each UMC */
for_each_umc(i) {
@@ -3433,11 +2926,20 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
- amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg);
- amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
- amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
- amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
- amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi);
+ if (!amd_smn_read(nid, umc_base + get_umc_reg(pvt, UMCCH_DIMM_CFG), &tmp))
+ umc->dimm_cfg = tmp;
+
+ if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &tmp))
+ umc->umc_cfg = tmp;
+
+ if (!amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &tmp))
+ umc->sdp_ctrl = tmp;
+
+ if (!amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &tmp))
+ umc->ecc_ctrl = tmp;
+
+ if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &tmp))
+ umc->umc_cap_hi = tmp;
}
}
@@ -3445,7 +2947,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
* Retrieve the hardware registers of the memory controller (this includes the
* 'Address Map' and 'Misc' device regs)
*/
-static void read_mc_regs(struct amd64_pvt *pvt)
+static void dct_read_mc_regs(struct amd64_pvt *pvt)
{
unsigned int range;
u64 msr_val;
@@ -3454,25 +2956,18 @@ static void read_mc_regs(struct amd64_pvt *pvt)
* Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
* those are Read-As-Zero.
*/
- rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
+ rdmsrq(MSR_K8_TOP_MEM1, pvt->top_mem);
edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
/* Check first whether TOP_MEM2 is enabled: */
- rdmsrl(MSR_AMD64_SYSCFG, msr_val);
+ rdmsrq(MSR_AMD64_SYSCFG, msr_val);
if (msr_val & BIT(21)) {
- rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
+ rdmsrq(MSR_K8_TOP_MEM2, pvt->top_mem2);
edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
} else {
edac_dbg(0, " TOP_MEM2 disabled\n");
}
- if (pvt->umc) {
- __read_mc_regs_df(pvt);
- amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);
-
- goto skip;
- }
-
amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
read_dram_ctl_register(pvt);
@@ -3513,14 +3008,6 @@ static void read_mc_regs(struct amd64_pvt *pvt)
amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1);
}
-skip:
- read_dct_base_mask(pvt);
-
- determine_memory_type(pvt);
-
- if (!pvt->umc)
- edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
-
determine_ecc_sym_sz(pvt);
}
@@ -3558,36 +3045,47 @@ skip:
* encompasses
*
*/
-static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
+static u32 dct_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
{
u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
- int csrow_nr = csrow_nr_orig;
u32 cs_mode, nr_pages;
- if (!pvt->umc) {
- csrow_nr >>= 1;
- cs_mode = DBAM_DIMM(csrow_nr, dbam);
- } else {
- cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt);
- }
+ csrow_nr >>= 1;
+ cs_mode = DBAM_DIMM(csrow_nr, dbam);
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
nr_pages <<= 20 - PAGE_SHIFT;
edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
- csrow_nr_orig, dct, cs_mode);
+ csrow_nr, dct, cs_mode);
edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
return nr_pages;
}
-static int init_csrows_df(struct mem_ctl_info *mci)
+static u32 umc_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
+{
+ int csrow_nr = csrow_nr_orig;
+ u32 cs_mode, nr_pages;
+
+ cs_mode = umc_get_cs_mode(csrow_nr >> 1, dct, pvt);
+
+ nr_pages = umc_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr);
+ nr_pages <<= 20 - PAGE_SHIFT;
+
+ edac_dbg(0, "csrow: %d, channel: %d, cs_mode %d\n",
+ csrow_nr_orig, dct, cs_mode);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
+
+ return nr_pages;
+}
+
+static void umc_init_csrows(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
enum edac_type edac_mode = EDAC_NONE;
enum dev_type dev_type = DEV_UNKNOWN;
struct dimm_info *dimm;
- int empty = 1;
u8 umc, cs;
if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) {
@@ -3608,40 +3106,34 @@ static int init_csrows_df(struct mem_ctl_info *mci)
if (!csrow_enabled(cs, umc, pvt))
continue;
- empty = 0;
dimm = mci->csrows[cs]->channels[umc]->dimm;
edac_dbg(1, "MC node: %d, csrow: %d\n",
pvt->mc_node_id, cs);
- dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
+ dimm->nr_pages = umc_get_csrow_nr_pages(pvt, umc, cs);
dimm->mtype = pvt->umc[umc].dram_type;
dimm->edac_mode = edac_mode;
dimm->dtype = dev_type;
dimm->grain = 64;
}
}
-
- return empty;
}
/*
* Initialize the array of csrow attribute instances, based on the values
* from pci config hardware registers.
*/
-static int init_csrows(struct mem_ctl_info *mci)
+static void dct_init_csrows(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
enum edac_type edac_mode = EDAC_NONE;
struct csrow_info *csrow;
struct dimm_info *dimm;
- int i, j, empty = 1;
int nr_pages = 0;
+ int i, j;
u32 val;
- if (pvt->umc)
- return init_csrows_df(mci);
-
amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
pvt->nbcfg = val;
@@ -3664,19 +3156,18 @@ static int init_csrows(struct mem_ctl_info *mci)
continue;
csrow = mci->csrows[i];
- empty = 0;
edac_dbg(1, "MC node: %d, csrow: %d\n",
pvt->mc_node_id, i);
if (row_dct0) {
- nr_pages = get_csrow_nr_pages(pvt, 0, i);
+ nr_pages = dct_get_csrow_nr_pages(pvt, 0, i);
csrow->channels[0]->dimm->nr_pages = nr_pages;
}
/* K8 has only one DCT */
if (pvt->fam != 0xf && row_dct1) {
- int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i);
+ int row_dct1_pages = dct_get_csrow_nr_pages(pvt, 1, i);
csrow->channels[1]->dimm->nr_pages = row_dct1_pages;
nr_pages += row_dct1_pages;
@@ -3691,15 +3182,13 @@ static int init_csrows(struct mem_ctl_info *mci)
: EDAC_SECDED;
}
- for (j = 0; j < pvt->channel_count; j++) {
+ for (j = 0; j < pvt->max_mcs; j++) {
dimm = csrow->channels[j]->dimm;
dimm->mtype = pvt->dram_type;
dimm->edac_mode = edac_mode;
dimm->grain = 64;
}
}
-
- return empty;
}
/* get all cores on this DCT */
@@ -3708,7 +3197,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
int cpu;
for_each_online_cpu(cpu)
- if (topology_die_id(cpu) == nid)
+ if (topology_amd_node_id(cpu) == nid)
cpumask_set_cpu(cpu, mask);
}
@@ -3733,8 +3222,7 @@ static bool nb_mce_bank_enabled_on_node(u16 nid)
nbe = reg->l & MSR_MCGCTL_NBE;
edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
- cpu, reg->q,
- (nbe ? "enabled" : "disabled"));
+ cpu, reg->q, str_enabled_disabled(nbe));
if (!nbe)
goto out;
@@ -3862,59 +3350,51 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
amd64_warn("Error restoring NB MCGCTL settings!\n");
}
-static bool ecc_enabled(struct amd64_pvt *pvt)
+static bool dct_ecc_enabled(struct amd64_pvt *pvt)
{
u16 nid = pvt->mc_node_id;
bool nb_mce_en = false;
- u8 ecc_en = 0, i;
+ u8 ecc_en = 0;
u32 value;
- if (boot_cpu_data.x86 >= 0x17) {
- u8 umc_en_mask = 0, ecc_en_mask = 0;
- struct amd64_umc *umc;
-
- for_each_umc(i) {
- umc = &pvt->umc[i];
+ amd64_read_pci_cfg(pvt->F3, NBCFG, &value);
- /* Only check enabled UMCs. */
- if (!(umc->sdp_ctrl & UMC_SDP_INIT))
- continue;
+ ecc_en = !!(value & NBCFG_ECC_ENABLE);
- umc_en_mask |= BIT(i);
+ nb_mce_en = nb_mce_bank_enabled_on_node(nid);
+ if (!nb_mce_en)
+ edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
+ MSR_IA32_MCG_CTL, nid);
- if (umc->umc_cap_hi & UMC_ECC_ENABLED)
- ecc_en_mask |= BIT(i);
- }
+ edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en));
- /* Check whether at least one UMC is enabled: */
- if (umc_en_mask)
- ecc_en = umc_en_mask == ecc_en_mask;
- else
- edac_dbg(0, "Node %d: No enabled UMCs.\n", nid);
+ return ecc_en && nb_mce_en;
+}
- /* Assume UMC MCA banks are enabled. */
- nb_mce_en = true;
- } else {
- amd64_read_pci_cfg(pvt->F3, NBCFG, &value);
+static bool umc_ecc_enabled(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ bool ecc_en = false;
+ int i;
- ecc_en = !!(value & NBCFG_ECC_ENABLE);
+ /* Check whether at least one UMC is enabled: */
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
- nb_mce_en = nb_mce_bank_enabled_on_node(nid);
- if (!nb_mce_en)
- edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
- MSR_IA32_MCG_CTL, nid);
+ if (umc->sdp_ctrl & UMC_SDP_INIT &&
+ umc->umc_cap_hi & UMC_ECC_ENABLED) {
+ ecc_en = true;
+ break;
+ }
}
- edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled"));
+ edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en));
- if (!ecc_en || !nb_mce_en)
- return false;
- else
- return true;
+ return ecc_en;
}
static inline void
-f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
+umc_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
{
u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1;
@@ -3944,142 +3424,503 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
}
}
-static void setup_mci_misc_attrs(struct mem_ctl_info *mci)
+static void dct_setup_mci_misc_attrs(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
- if (pvt->umc) {
- f17h_determine_edac_ctl_cap(mci, pvt);
- } else {
- if (pvt->nbcap & NBCAP_SECDED)
- mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
+ if (pvt->nbcap & NBCAP_SECDED)
+ mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
- if (pvt->nbcap & NBCAP_CHIPKILL)
- mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
- }
+ if (pvt->nbcap & NBCAP_CHIPKILL)
+ mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
- mci->edac_cap = determine_edac_cap(pvt);
+ mci->edac_cap = dct_determine_edac_cap(pvt);
mci->mod_name = EDAC_MOD_STR;
- mci->ctl_name = fam_type->ctl_name;
+ mci->ctl_name = pvt->ctl_name;
mci->dev_name = pci_name(pvt->F3);
mci->ctl_page_to_phys = NULL;
/* memory scrubber interface */
mci->set_sdram_scrub_rate = set_scrub_rate;
mci->get_sdram_scrub_rate = get_scrub_rate;
+
+ dct_init_csrows(mci);
+}
+
+static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE;
+
+ umc_determine_edac_ctl_cap(mci, pvt);
+
+ mci->edac_cap = umc_determine_edac_cap(pvt);
+ mci->mod_name = EDAC_MOD_STR;
+ mci->ctl_name = pvt->ctl_name;
+ mci->dev_name = pci_name(pvt->F3);
+ mci->ctl_page_to_phys = NULL;
+
+ umc_init_csrows(mci);
+}
+
+static int dct_hw_info_get(struct amd64_pvt *pvt)
+{
+ int ret = reserve_mc_sibling_devs(pvt, pvt->f1_id, pvt->f2_id);
+
+ if (ret)
+ return ret;
+
+ dct_prep_chip_selects(pvt);
+ dct_read_base_mask(pvt);
+ dct_read_mc_regs(pvt);
+ dct_determine_memory_type(pvt);
+
+ return 0;
+}
+
+static int umc_hw_info_get(struct amd64_pvt *pvt)
+{
+ pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ if (!pvt->umc)
+ return -ENOMEM;
+
+ umc_prep_chip_selects(pvt);
+ umc_read_base_mask(pvt);
+ umc_read_mc_regs(pvt);
+ umc_determine_memory_type(pvt);
+
+ return 0;
}
/*
- * returns a pointer to the family descriptor on success, NULL otherwise.
+ * The CPUs have one channel per UMC, so UMC number is equivalent to a
+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
+ * longer works as a channel number.
+ *
+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
+ * However, the IDs are split such that two UMC values go to one UMC, and
+ * the channel numbers are split in two groups of four.
+ *
+ * Refer to comment on gpu_get_umc_base().
+ *
+ * For example,
+ * UMC0 CH[3:0] = 0x0005[3:0]000
+ * UMC0 CH[7:4] = 0x0015[3:0]000
+ * UMC1 CH[3:0] = 0x0025[3:0]000
+ * UMC1 CH[7:4] = 0x0035[3:0]000
*/
-static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
+static void gpu_get_err_info(struct mce *m, struct err_info *err)
+{
+ u8 ch = (m->ipid & GENMASK(31, 0)) >> 20;
+ u8 phy = ((m->ipid >> 12) & 0xf);
+
+ err->channel = ch % 2 ? phy + 4 : phy;
+ err->csrow = phy;
+}
+
+static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ unsigned int cs_mode, int csrow_nr)
+{
+ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr];
+
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1);
+}
+
+static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
+{
+ int size, cs_mode, cs = 0;
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
+
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ for_each_chip_select(cs, ctrl, pvt) {
+ size = gpu_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs);
+ amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size);
+ }
+}
+
+static void gpu_dump_misc_regs(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ u32 i;
+
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
+
+ edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
+ edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
+ edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
+ edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i);
+
+ gpu_debug_display_dimm_sizes(pvt, i);
+ }
+}
+
+static u32 gpu_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
+{
+ u32 nr_pages;
+ int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ nr_pages = gpu_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr);
+ nr_pages <<= 20 - PAGE_SHIFT;
+
+ edac_dbg(0, "csrow: %d, channel: %d\n", csrow_nr, dct);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
+
+ return nr_pages;
+}
+
+static void gpu_init_csrows(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ struct dimm_info *dimm;
+ u8 umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ dimm = mci->csrows[umc]->channels[cs]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs);
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->mtype = pvt->dram_type;
+ dimm->dtype = DEV_X16;
+ dimm->grain = 64;
+ }
+ }
+}
+
+static void gpu_setup_mci_misc_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ mci->mtype_cap = MEM_FLAG_HBM2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+ mci->edac_cap = EDAC_FLAG_EC;
+ mci->mod_name = EDAC_MOD_STR;
+ mci->ctl_name = pvt->ctl_name;
+ mci->dev_name = pci_name(pvt->F3);
+ mci->ctl_page_to_phys = NULL;
+
+ gpu_init_csrows(mci);
+}
+
+/* ECC is enabled by default on GPU nodes */
+static bool gpu_ecc_enabled(struct amd64_pvt *pvt)
+{
+ return true;
+}
+
+static inline u32 gpu_get_umc_base(struct amd64_pvt *pvt, u8 umc, u8 channel)
+{
+ /*
+ * On CPUs, there is one channel per UMC, so UMC numbering equals
+ * channel numbering. On GPUs, there are eight channels per UMC,
+ * so the channel numbering is different from UMC numbering.
+ *
+ * On CPU nodes channels are selected in 6th nibble
+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
+ *
+ * On GPU nodes channels are selected in 3rd nibble
+ * HBM chX[3:0]= [Y ]5X[3:0]000;
+ * HBM chX[7:4]= [Y+1]5X[3:0]000
+ *
+ * On MI300 APU nodes, same as GPU nodes but channels are selected
+ * in the base address of 0x90000
+ */
+ umc *= 2;
+
+ if (channel >= 4)
+ umc++;
+
+ return pvt->gpu_umc_base + (umc << 20) + ((channel % 4) << 12);
+}
+
+static void gpu_read_mc_regs(struct amd64_pvt *pvt)
+{
+ u8 nid = pvt->mc_node_id;
+ struct amd64_umc *umc;
+ u32 i, tmp, umc_base;
+
+ /* Read registers from each UMC */
+ for_each_umc(i) {
+ umc_base = gpu_get_umc_base(pvt, i, 0);
+ umc = &pvt->umc[i];
+
+ if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &tmp))
+ umc->umc_cfg = tmp;
+
+ if (!amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &tmp))
+ umc->sdp_ctrl = tmp;
+
+ if (!amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &tmp))
+ umc->ecc_ctrl = tmp;
+ }
+}
+
+static void gpu_read_base_mask(struct amd64_pvt *pvt)
+{
+ u32 base_reg, mask_reg;
+ u32 *base, *mask;
+ int umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ base_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_BASE_ADDR;
+ base = &pvt->csels[umc].csbases[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) {
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+ }
+
+ mask_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_ADDR_MASK;
+ mask = &pvt->csels[umc].csmasks[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) {
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+ }
+ }
+ }
+}
+
+static void gpu_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 8;
+ pvt->csels[umc].m_cnt = 8;
+ }
+}
+
+static int gpu_hw_info_get(struct amd64_pvt *pvt)
+{
+ int ret;
+
+ ret = gpu_get_node_map(pvt);
+ if (ret)
+ return ret;
+
+ pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ if (!pvt->umc)
+ return -ENOMEM;
+
+ gpu_prep_chip_selects(pvt);
+ gpu_read_base_mask(pvt);
+ gpu_read_mc_regs(pvt);
+
+ return 0;
+}
+
+static void hw_info_put(struct amd64_pvt *pvt)
+{
+ pci_dev_put(pvt->F1);
+ pci_dev_put(pvt->F2);
+ kfree(pvt->umc);
+ kfree(pvt->csels);
+}
+
+static struct low_ops umc_ops = {
+ .hw_info_get = umc_hw_info_get,
+ .ecc_enabled = umc_ecc_enabled,
+ .setup_mci_misc_attrs = umc_setup_mci_misc_attrs,
+ .dump_misc_regs = umc_dump_misc_regs,
+ .get_err_info = umc_get_err_info,
+};
+
+static struct low_ops gpu_ops = {
+ .hw_info_get = gpu_hw_info_get,
+ .ecc_enabled = gpu_ecc_enabled,
+ .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs,
+ .dump_misc_regs = gpu_dump_misc_regs,
+ .get_err_info = gpu_get_err_info,
+};
+
+/* Use Family 16h versions for defaults and adjust as needed below. */
+static struct low_ops dct_ops = {
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .hw_info_get = dct_hw_info_get,
+ .ecc_enabled = dct_ecc_enabled,
+ .setup_mci_misc_attrs = dct_setup_mci_misc_attrs,
+ .dump_misc_regs = dct_dump_misc_regs,
+};
+
+static int per_family_init(struct amd64_pvt *pvt)
{
pvt->ext_model = boot_cpu_data.x86_model >> 4;
pvt->stepping = boot_cpu_data.x86_stepping;
pvt->model = boot_cpu_data.x86_model;
pvt->fam = boot_cpu_data.x86;
+ char *tmp_name = NULL;
+ pvt->max_mcs = 2;
+
+ /*
+ * Decide on which ops group to use here and do any family/model
+ * overrides below.
+ */
+ if (pvt->fam >= 0x17)
+ pvt->ops = &umc_ops;
+ else
+ pvt->ops = &dct_ops;
switch (pvt->fam) {
case 0xf:
- fam_type = &family_types[K8_CPUS];
- pvt->ops = &family_types[K8_CPUS].ops;
+ tmp_name = (pvt->ext_model >= K8_REV_F) ?
+ "K8 revF or later" : "K8 revE or earlier";
+ pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP;
+ pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL;
+ pvt->ops->map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow;
+ pvt->ops->dbam_to_cs = k8_dbam_to_chip_select;
break;
case 0x10:
- fam_type = &family_types[F10_CPUS];
- pvt->ops = &family_types[F10_CPUS].ops;
+ pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP;
+ pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM;
+ pvt->ops->dbam_to_cs = f10_dbam_to_chip_select;
break;
case 0x15:
- if (pvt->model == 0x30) {
- fam_type = &family_types[F15_M30H_CPUS];
- pvt->ops = &family_types[F15_M30H_CPUS].ops;
+ switch (pvt->model) {
+ case 0x30:
+ pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1;
+ pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2;
break;
- } else if (pvt->model == 0x60) {
- fam_type = &family_types[F15_M60H_CPUS];
- pvt->ops = &family_types[F15_M60H_CPUS].ops;
+ case 0x60:
+ pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1;
+ pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2;
+ pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select;
+ break;
+ case 0x13:
+ /* Richland is only client */
+ return -ENODEV;
+ default:
+ pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1;
+ pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2;
+ pvt->ops->dbam_to_cs = f15_dbam_to_chip_select;
break;
- /* Richland is only client */
- } else if (pvt->model == 0x13) {
- return NULL;
- } else {
- fam_type = &family_types[F15_CPUS];
- pvt->ops = &family_types[F15_CPUS].ops;
}
break;
case 0x16:
- if (pvt->model == 0x30) {
- fam_type = &family_types[F16_M30H_CPUS];
- pvt->ops = &family_types[F16_M30H_CPUS].ops;
+ switch (pvt->model) {
+ case 0x30:
+ pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1;
+ pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2;
+ break;
+ default:
+ pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1;
+ pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2;
break;
}
- fam_type = &family_types[F16_CPUS];
- pvt->ops = &family_types[F16_CPUS].ops;
break;
case 0x17:
- if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
- fam_type = &family_types[F17_M10H_CPUS];
- pvt->ops = &family_types[F17_M10H_CPUS].ops;
+ switch (pvt->model) {
+ case 0x30 ... 0x3f:
+ pvt->max_mcs = 8;
break;
- } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
- fam_type = &family_types[F17_M30H_CPUS];
- pvt->ops = &family_types[F17_M30H_CPUS].ops;
- break;
- } else if (pvt->model >= 0x60 && pvt->model <= 0x6f) {
- fam_type = &family_types[F17_M60H_CPUS];
- pvt->ops = &family_types[F17_M60H_CPUS].ops;
- break;
- } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) {
- fam_type = &family_types[F17_M70H_CPUS];
- pvt->ops = &family_types[F17_M70H_CPUS].ops;
+ default:
break;
}
- fallthrough;
- case 0x18:
- fam_type = &family_types[F17_CPUS];
- pvt->ops = &family_types[F17_CPUS].ops;
+ break;
- if (pvt->fam == 0x18)
- family_types[F17_CPUS].ctl_name = "F18h";
+ case 0x18:
break;
case 0x19:
- if (pvt->model >= 0x10 && pvt->model <= 0x1f) {
- fam_type = &family_types[F19_M10H_CPUS];
- pvt->ops = &family_types[F19_M10H_CPUS].ops;
+ switch (pvt->model) {
+ case 0x00 ... 0x0f:
+ pvt->max_mcs = 8;
+ break;
+ case 0x10 ... 0x1f:
+ pvt->max_mcs = 12;
+ pvt->flags.zn_regs_v2 = 1;
+ break;
+ case 0x30 ... 0x3f:
+ if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) {
+ tmp_name = "MI200";
+ pvt->max_mcs = 4;
+ pvt->dram_type = MEM_HBM2;
+ pvt->gpu_umc_base = 0x50000;
+ pvt->ops = &gpu_ops;
+ } else {
+ pvt->max_mcs = 8;
+ }
+ break;
+ case 0x60 ... 0x6f:
+ pvt->flags.zn_regs_v2 = 1;
+ break;
+ case 0x70 ... 0x7f:
+ pvt->max_mcs = 4;
+ pvt->flags.zn_regs_v2 = 1;
+ break;
+ case 0x90 ... 0x9f:
+ pvt->max_mcs = 4;
+ pvt->dram_type = MEM_HBM3;
+ pvt->gpu_umc_base = 0x90000;
+ pvt->ops = &gpu_ops;
+ break;
+ case 0xa0 ... 0xaf:
+ pvt->max_mcs = 12;
+ pvt->flags.zn_regs_v2 = 1;
+ break;
+ }
+ break;
+
+ case 0x1A:
+ switch (pvt->model) {
+ case 0x00 ... 0x1f:
+ pvt->max_mcs = 12;
+ pvt->flags.zn_regs_v2 = 1;
break;
- } else if (pvt->model >= 0x20 && pvt->model <= 0x2f) {
- fam_type = &family_types[F17_M70H_CPUS];
- pvt->ops = &family_types[F17_M70H_CPUS].ops;
- fam_type->ctl_name = "F19h_M20h";
+ case 0x40 ... 0x4f:
+ pvt->flags.zn_regs_v2 = 1;
break;
- } else if (pvt->model >= 0x50 && pvt->model <= 0x5f) {
- fam_type = &family_types[F19_M50H_CPUS];
- pvt->ops = &family_types[F19_M50H_CPUS].ops;
- fam_type->ctl_name = "F19h_M50h";
+ case 0x50 ... 0x57:
+ case 0xc0 ... 0xc7:
+ pvt->max_mcs = 16;
+ pvt->flags.zn_regs_v2 = 1;
break;
- } else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) {
- fam_type = &family_types[F19_M10H_CPUS];
- pvt->ops = &family_types[F19_M10H_CPUS].ops;
- fam_type->ctl_name = "F19h_MA0h";
+ case 0x90 ... 0x9f:
+ case 0xa0 ... 0xaf:
+ pvt->max_mcs = 8;
+ pvt->flags.zn_regs_v2 = 1;
break;
}
- fam_type = &family_types[F19_CPUS];
- pvt->ops = &family_types[F19_CPUS].ops;
- family_types[F19_CPUS].ctl_name = "F19h";
break;
default:
amd64_err("Unsupported family!\n");
- return NULL;
+ return -ENODEV;
}
- return fam_type;
+ if (tmp_name)
+ scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), tmp_name);
+ else
+ scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh",
+ pvt->fam, pvt->model);
+
+ pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL);
+ if (!pvt->csels)
+ return -ENOMEM;
+
+ return 0;
}
static const struct attribute_group *amd64_edac_attr_groups[] = {
@@ -4090,67 +3931,33 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
NULL
};
-static int hw_info_get(struct amd64_pvt *pvt)
-{
- u16 pci_id1, pci_id2;
- int ret;
-
- if (pvt->fam >= 0x17) {
- pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
- if (!pvt->umc)
- return -ENOMEM;
-
- pci_id1 = fam_type->f0_id;
- pci_id2 = fam_type->f6_id;
- } else {
- pci_id1 = fam_type->f1_id;
- pci_id2 = fam_type->f2_id;
- }
-
- ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
- if (ret)
- return ret;
-
- read_mc_regs(pvt);
-
- return 0;
-}
-
-static void hw_info_put(struct amd64_pvt *pvt)
+/*
+ * For heterogeneous and APU models EDAC CHIP_SELECT and CHANNEL layers
+ * should be swapped to fit into the layers.
+ */
+static unsigned int get_layer_size(struct amd64_pvt *pvt, u8 layer)
{
- if (pvt->F0 || pvt->F1)
- free_mc_sibling_devs(pvt);
+ bool is_gpu = (pvt->ops == &gpu_ops);
- kfree(pvt->umc);
+ if (!layer)
+ return is_gpu ? pvt->max_mcs
+ : pvt->csels[0].b_cnt;
+ else
+ return is_gpu ? pvt->csels[0].b_cnt
+ : pvt->max_mcs;
}
static int init_one_instance(struct amd64_pvt *pvt)
{
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
- int ret = -EINVAL;
-
- /*
- * We need to determine how many memory channels there are. Then use
- * that information for calculating the size of the dynamic instance
- * tables in the 'mci' structure.
- */
- pvt->channel_count = pvt->ops->early_channel_count(pvt);
- if (pvt->channel_count < 0)
- return ret;
+ int ret = -ENOMEM;
- ret = -ENOMEM;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].size = get_layer_size(pvt, 0);
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
-
- /*
- * Always allocate two channels since we can have setups with DIMMs on
- * only one channel. Also, this simplifies handling later for the price
- * of a couple of KBs tops.
- */
- layers[1].size = fam_type->max_mcs;
+ layers[1].size = get_layer_size(pvt, 1);
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
@@ -4160,10 +3967,7 @@ static int init_one_instance(struct amd64_pvt *pvt)
mci->pvt_info = pvt;
mci->pdev = &pvt->F3->dev;
- setup_mci_misc_attrs(mci);
-
- if (init_csrows(mci))
- mci->edac_cap = EDAC_FLAG_NONE;
+ pvt->ops->setup_mci_misc_attrs(mci);
ret = -ENODEV;
if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
@@ -4180,7 +3984,7 @@ static bool instance_has_memory(struct amd64_pvt *pvt)
bool cs_enabled = false;
int cs = 0, dct = 0;
- for (dct = 0; dct < fam_type->max_mcs; dct++) {
+ for (dct = 0; dct < pvt->max_mcs; dct++) {
for_each_chip_select(cs, dct, pvt)
cs_enabled |= csrow_enabled(cs, dct, pvt);
}
@@ -4209,12 +4013,11 @@ static int probe_one_instance(unsigned int nid)
pvt->mc_node_id = nid;
pvt->F3 = F3;
- ret = -ENODEV;
- fam_type = per_family_init(pvt);
- if (!fam_type)
+ ret = per_family_init(pvt);
+ if (ret < 0)
goto err_enable;
- ret = hw_info_get(pvt);
+ ret = pvt->ops->hw_info_get(pvt);
if (ret < 0)
goto err_enable;
@@ -4224,7 +4027,7 @@ static int probe_one_instance(unsigned int nid)
goto err_enable;
}
- if (!ecc_enabled(pvt)) {
+ if (!pvt->ops->ecc_enabled(pvt)) {
ret = -ENODEV;
if (!ecc_enable_override)
@@ -4250,13 +4053,10 @@ static int probe_one_instance(unsigned int nid)
goto err_enable;
}
- amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
- (pvt->fam == 0xf ?
- (pvt->ext_model >= K8_REV_F ? "revF or later "
- : "revE or earlier ")
- : ""), pvt->mc_node_id);
+ amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id);
- dump_misc_regs(pvt);
+ /* Display and decode various registers for debug purposes. */
+ pvt->ops->dump_misc_regs(pvt);
return ret;
@@ -4319,6 +4119,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
X86_MATCH_VENDOR_FAM(HYGON, 0x18, NULL),
X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
+ X86_MATCH_VENDOR_FAM(AMD, 0x1A, NULL),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
@@ -4370,19 +4171,17 @@ static int __init amd64_edac_init(void)
}
/* register stuff with EDAC MCE */
- if (boot_cpu_data.x86 >= 0x17)
+ if (boot_cpu_data.x86 >= 0x17) {
amd_register_ecc_decoder(decode_umc_error);
- else
+ } else {
amd_register_ecc_decoder(decode_bus_error);
-
- setup_pci_device();
+ setup_pci_device();
+ }
#ifdef CONFIG_X86_32
amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
#endif
- printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
-
return 0;
err_pci:
@@ -4427,10 +4226,8 @@ module_init(amd64_edac_init);
module_exit(amd64_edac_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
- "Dave Peterson, Thayne Harbaugh");
-MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
- EDAC_AMD64_VERSION);
+MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, Dave Peterson, Thayne Harbaugh; AMD");
+MODULE_DESCRIPTION("MC support for AMD64 memory controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 38e5ad95d010..1757c1b99fc8 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/mmzone.h>
#include <linux/edac.h>
+#include <linux/bitfield.h>
#include <asm/cpu_device_id.h>
#include <asm/msr.h>
#include "edac_module.h"
@@ -85,7 +86,6 @@
* sections 3.5.4 and 3.5.5 for more information.
*/
-#define EDAC_AMD64_VERSION "3.5.0"
#define EDAC_MOD_STR "amd64_edac"
/* Extended Model from CPUID, for CPU Revision numbers */
@@ -96,11 +96,12 @@
/* Hardware limit on ChipSelect rows per MC and processors per system */
#define NUM_CHIPSELECTS 8
#define DRAM_RANGES 8
-#define NUM_CONTROLLERS 12
#define ON true
#define OFF false
+#define MAX_CTL_NAMELEN 19
+
/*
* PCI-defined configuration space registers
*/
@@ -114,22 +115,6 @@
#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581
#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582
-#define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460
-#define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466
-#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8
-#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
-#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490
-#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
-#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F0 0x1448
-#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F6 0x144e
-#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440
-#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
-#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650
-#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656
-#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad
-#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3
-#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F0 0x166a
-#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F6 0x1670
/*
* Function 1 - Address Map
@@ -215,8 +200,6 @@
#define DCT_SEL_HI 0x114
#define F15H_M60H_SCRCTRL 0x1C8
-#define F17H_SCR_BASE_ADDR 0x48
-#define F17H_SCR_LIMIT_ADDR 0x4C
/*
* Function 3 - Misc Control
@@ -274,15 +257,11 @@
#define UMCCH_ADDR_MASK 0x20
#define UMCCH_ADDR_MASK_SEC 0x28
#define UMCCH_ADDR_MASK_SEC_DDR5 0x30
-#define UMCCH_ADDR_CFG 0x30
-#define UMCCH_ADDR_CFG_DDR5 0x40
#define UMCCH_DIMM_CFG 0x80
#define UMCCH_DIMM_CFG_DDR5 0x90
#define UMCCH_UMC_CFG 0x100
#define UMCCH_SDP_CTRL 0x104
#define UMCCH_ECC_CTRL 0x14C
-#define UMCCH_ECC_BAD_SYMBOL 0xD90
-#define UMCCH_UMC_CAP 0xDF0
#define UMCCH_UMC_CAP_HI 0xDF4
/* UMC CH bitfields */
@@ -291,25 +270,6 @@
#define UMC_SDP_INIT BIT(31)
-enum amd_families {
- K8_CPUS = 0,
- F10_CPUS,
- F15_CPUS,
- F15_M30H_CPUS,
- F15_M60H_CPUS,
- F16_CPUS,
- F16_M30H_CPUS,
- F17_CPUS,
- F17_M10H_CPUS,
- F17_M30H_CPUS,
- F17_M60H_CPUS,
- F17_M70H_CPUS,
- F19_CPUS,
- F19_M10H_CPUS,
- F19_M50H_CPUS,
- NUM_FAMILIES,
-};
-
/* Error injection control structure */
struct error_injection {
u32 section;
@@ -352,11 +312,21 @@ struct amd64_umc {
enum mem_type dram_type;
};
+struct amd64_family_flags {
+ /*
+ * Indicates that the system supports the new register offsets, etc.
+ * first introduced with Family 19h Model 10h.
+ */
+ __u64 zn_regs_v2 : 1,
+
+ __reserved : 63;
+};
+
struct amd64_pvt {
struct low_ops *ops;
/* pci_device handles which we utilize */
- struct pci_dev *F0, *F1, *F2, *F3, *F6;
+ struct pci_dev *F1, *F2, *F3;
u16 mc_node_id; /* MC index of this MC node */
u8 fam; /* CPU family */
@@ -364,7 +334,6 @@ struct amd64_pvt {
u8 stepping; /* ... stepping */
int ext_model; /* extended model value of this node */
- int channel_count;
/* Raw registers */
u32 dclr0; /* DRAM Configuration Low DCT0 reg */
@@ -373,13 +342,12 @@ struct amd64_pvt {
u32 dchr1; /* DRAM Configuration High DCT1 reg */
u32 nbcap; /* North Bridge Capabilities */
u32 nbcfg; /* F10 North Bridge Configuration */
- u32 ext_nbcfg; /* Extended F10 North Bridge Configuration */
u32 dhar; /* DRAM Hoist reg */
u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */
u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */
/* one for each DCT/UMC */
- struct chip_select csels[NUM_CONTROLLERS];
+ struct chip_select *csels;
/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
struct dram_range ranges[DRAM_RANGES];
@@ -390,10 +358,17 @@ struct amd64_pvt {
u32 dct_sel_lo; /* DRAM Controller Select Low */
u32 dct_sel_hi; /* DRAM Controller Select High */
u32 online_spare; /* On-Line spare Reg */
+ u32 gpu_umc_base; /* Base address used for channel selection on GPUs */
/* x4, x8, or x16 syndromes in use */
u8 ecc_sym_sz;
+ char ctl_name[MAX_CTL_NAMELEN];
+ u16 f1_id, f2_id;
+ /* Maximum number of memory controllers per die/node. */
+ u8 max_mcs;
+
+ struct amd64_family_flags flags;
/* place to store error injection parameters prior to issue */
struct error_injection injection;
@@ -484,30 +459,15 @@ struct ecc_settings {
* functions and per device encoding/decoding logic.
*/
struct low_ops {
- int (*early_channel_count) (struct amd64_pvt *pvt);
- void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
- struct err_info *);
- int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct,
- unsigned cs_mode, int cs_mask_nr);
-};
-
-struct amd64_family_flags {
- /*
- * Indicates that the system supports the new register offsets, etc.
- * first introduced with Family 19h Model 10h.
- */
- __u64 zn_regs_v2 : 1,
-
- __reserved : 63;
-};
-
-struct amd64_family_type {
- const char *ctl_name;
- u16 f0_id, f1_id, f2_id, f6_id;
- /* Maximum number of memory controllers per die/node. */
- u8 max_mcs;
- struct amd64_family_flags flags;
- struct low_ops ops;
+ void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci, u64 sys_addr,
+ struct err_info *err);
+ int (*dbam_to_cs)(struct amd64_pvt *pvt, u8 dct,
+ unsigned int cs_mode, int cs_mask_nr);
+ int (*hw_info_get)(struct amd64_pvt *pvt);
+ bool (*ecc_enabled)(struct amd64_pvt *pvt);
+ void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci);
+ void (*dump_misc_regs)(struct amd64_pvt *pvt);
+ void (*get_err_info)(struct mce *m, struct err_info *err);
};
int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c
deleted file mode 100644
index 7508aa416ddb..000000000000
--- a/drivers/edac/amd8111_edac.c
+++ /dev/null
@@ -1,597 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * amd8111_edac.c, AMD8111 Hyper Transport chip EDAC kernel module
- *
- * Copyright (c) 2008 Wind River Systems, Inc.
- *
- * Authors: Cao Qingtao <qingtao.cao@windriver.com>
- * Benjamin Walsh <benjamin.walsh@windriver.com>
- * Hu Yongqi <yongqi.hu@windriver.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/bitops.h>
-#include <linux/edac.h>
-#include <linux/pci_ids.h>
-#include <asm/io.h>
-
-#include "edac_module.h"
-#include "amd8111_edac.h"
-
-#define AMD8111_EDAC_REVISION " Ver: 1.0.0"
-#define AMD8111_EDAC_MOD_STR "amd8111_edac"
-
-#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460
-
-enum amd8111_edac_devs {
- LPC_BRIDGE = 0,
-};
-
-enum amd8111_edac_pcis {
- PCI_BRIDGE = 0,
-};
-
-/* Wrapper functions for accessing PCI configuration space */
-static int edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32)
-{
- int ret;
-
- ret = pci_read_config_dword(dev, reg, val32);
- if (ret != 0)
- printk(KERN_ERR AMD8111_EDAC_MOD_STR
- " PCI Access Read Error at 0x%x\n", reg);
-
- return ret;
-}
-
-static void edac_pci_read_byte(struct pci_dev *dev, int reg, u8 *val8)
-{
- int ret;
-
- ret = pci_read_config_byte(dev, reg, val8);
- if (ret != 0)
- printk(KERN_ERR AMD8111_EDAC_MOD_STR
- " PCI Access Read Error at 0x%x\n", reg);
-}
-
-static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32)
-{
- int ret;
-
- ret = pci_write_config_dword(dev, reg, val32);
- if (ret != 0)
- printk(KERN_ERR AMD8111_EDAC_MOD_STR
- " PCI Access Write Error at 0x%x\n", reg);
-}
-
-static void edac_pci_write_byte(struct pci_dev *dev, int reg, u8 val8)
-{
- int ret;
-
- ret = pci_write_config_byte(dev, reg, val8);
- if (ret != 0)
- printk(KERN_ERR AMD8111_EDAC_MOD_STR
- " PCI Access Write Error at 0x%x\n", reg);
-}
-
-/*
- * device-specific methods for amd8111 PCI Bridge Controller
- *
- * Error Reporting and Handling for amd8111 chipset could be found
- * in its datasheet 3.1.2 section, P37
- */
-static void amd8111_pci_bridge_init(struct amd8111_pci_info *pci_info)
-{
- u32 val32;
- struct pci_dev *dev = pci_info->dev;
-
- /* First clear error detection flags on the host interface */
-
- /* Clear SSE/SMA/STA flags in the global status register*/
- edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
- if (val32 & PCI_STSCMD_CLEAR_MASK)
- edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
-
- /* Clear CRC and Link Fail flags in HT Link Control reg */
- edac_pci_read_dword(dev, REG_HT_LINK, &val32);
- if (val32 & HT_LINK_CLEAR_MASK)
- edac_pci_write_dword(dev, REG_HT_LINK, val32);
-
- /* Second clear all fault on the secondary interface */
-
- /* Clear error flags in the memory-base limit reg. */
- edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
- if (val32 & MEM_LIMIT_CLEAR_MASK)
- edac_pci_write_dword(dev, REG_MEM_LIM, val32);
-
- /* Clear Discard Timer Expired flag in Interrupt/Bridge Control reg */
- edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
- if (val32 & PCI_INTBRG_CTRL_CLEAR_MASK)
- edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
-
- /* Last enable error detections */
- if (edac_op_state == EDAC_OPSTATE_POLL) {
- /* Enable System Error reporting in global status register */
- edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
- val32 |= PCI_STSCMD_SERREN;
- edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
-
- /* Enable CRC Sync flood packets to HyperTransport Link */
- edac_pci_read_dword(dev, REG_HT_LINK, &val32);
- val32 |= HT_LINK_CRCFEN;
- edac_pci_write_dword(dev, REG_HT_LINK, val32);
-
- /* Enable SSE reporting etc in Interrupt control reg */
- edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
- val32 |= PCI_INTBRG_CTRL_POLL_MASK;
- edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
- }
-}
-
-static void amd8111_pci_bridge_exit(struct amd8111_pci_info *pci_info)
-{
- u32 val32;
- struct pci_dev *dev = pci_info->dev;
-
- if (edac_op_state == EDAC_OPSTATE_POLL) {
- /* Disable System Error reporting */
- edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
- val32 &= ~PCI_STSCMD_SERREN;
- edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
-
- /* Disable CRC flood packets */
- edac_pci_read_dword(dev, REG_HT_LINK, &val32);
- val32 &= ~HT_LINK_CRCFEN;
- edac_pci_write_dword(dev, REG_HT_LINK, val32);
-
- /* Disable DTSERREN/MARSP/SERREN in Interrupt Control reg */
- edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
- val32 &= ~PCI_INTBRG_CTRL_POLL_MASK;
- edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
- }
-}
-
-static void amd8111_pci_bridge_check(struct edac_pci_ctl_info *edac_dev)
-{
- struct amd8111_pci_info *pci_info = edac_dev->pvt_info;
- struct pci_dev *dev = pci_info->dev;
- u32 val32;
-
- /* Check out PCI Bridge Status and Command Register */
- edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
- if (val32 & PCI_STSCMD_CLEAR_MASK) {
- printk(KERN_INFO "Error(s) in PCI bridge status and command"
- "register on device %s\n", pci_info->ctl_name);
- printk(KERN_INFO "SSE: %d, RMA: %d, RTA: %d\n",
- (val32 & PCI_STSCMD_SSE) != 0,
- (val32 & PCI_STSCMD_RMA) != 0,
- (val32 & PCI_STSCMD_RTA) != 0);
-
- val32 |= PCI_STSCMD_CLEAR_MASK;
- edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-
- /* Check out HyperTransport Link Control Register */
- edac_pci_read_dword(dev, REG_HT_LINK, &val32);
- if (val32 & HT_LINK_LKFAIL) {
- printk(KERN_INFO "Error(s) in hypertransport link control"
- "register on device %s\n", pci_info->ctl_name);
- printk(KERN_INFO "LKFAIL: %d\n",
- (val32 & HT_LINK_LKFAIL) != 0);
-
- val32 |= HT_LINK_LKFAIL;
- edac_pci_write_dword(dev, REG_HT_LINK, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-
- /* Check out PCI Interrupt and Bridge Control Register */
- edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
- if (val32 & PCI_INTBRG_CTRL_DTSTAT) {
- printk(KERN_INFO "Error(s) in PCI interrupt and bridge control"
- "register on device %s\n", pci_info->ctl_name);
- printk(KERN_INFO "DTSTAT: %d\n",
- (val32 & PCI_INTBRG_CTRL_DTSTAT) != 0);
-
- val32 |= PCI_INTBRG_CTRL_DTSTAT;
- edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-
- /* Check out PCI Bridge Memory Base-Limit Register */
- edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
- if (val32 & MEM_LIMIT_CLEAR_MASK) {
- printk(KERN_INFO
- "Error(s) in mem limit register on %s device\n",
- pci_info->ctl_name);
- printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n"
- "RTA: %d, STA: %d, MDPE: %d\n",
- (val32 & MEM_LIMIT_DPE) != 0,
- (val32 & MEM_LIMIT_RSE) != 0,
- (val32 & MEM_LIMIT_RMA) != 0,
- (val32 & MEM_LIMIT_RTA) != 0,
- (val32 & MEM_LIMIT_STA) != 0,
- (val32 & MEM_LIMIT_MDPE) != 0);
-
- val32 |= MEM_LIMIT_CLEAR_MASK;
- edac_pci_write_dword(dev, REG_MEM_LIM, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-}
-
-static struct resource *legacy_io_res;
-static int at_compat_reg_broken;
-#define LEGACY_NR_PORTS 1
-
-/* device-specific methods for amd8111 LPC Bridge device */
-static void amd8111_lpc_bridge_init(struct amd8111_dev_info *dev_info)
-{
- u8 val8;
- struct pci_dev *dev = dev_info->dev;
-
- /* First clear REG_AT_COMPAT[SERR, IOCHK] if necessary */
- legacy_io_res = request_region(REG_AT_COMPAT, LEGACY_NR_PORTS,
- AMD8111_EDAC_MOD_STR);
- if (!legacy_io_res)
- printk(KERN_INFO "%s: failed to request legacy I/O region "
- "start %d, len %d\n", __func__,
- REG_AT_COMPAT, LEGACY_NR_PORTS);
- else {
- val8 = __do_inb(REG_AT_COMPAT);
- if (val8 == 0xff) { /* buggy port */
- printk(KERN_INFO "%s: port %d is buggy, not supported"
- " by hardware?\n", __func__, REG_AT_COMPAT);
- at_compat_reg_broken = 1;
- release_region(REG_AT_COMPAT, LEGACY_NR_PORTS);
- legacy_io_res = NULL;
- } else {
- u8 out8 = 0;
- if (val8 & AT_COMPAT_SERR)
- out8 = AT_COMPAT_CLRSERR;
- if (val8 & AT_COMPAT_IOCHK)
- out8 |= AT_COMPAT_CLRIOCHK;
- if (out8 > 0)
- __do_outb(out8, REG_AT_COMPAT);
- }
- }
-
- /* Second clear error flags on LPC bridge */
- edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8);
- if (val8 & IO_CTRL_1_CLEAR_MASK)
- edac_pci_write_byte(dev, REG_IO_CTRL_1, val8);
-}
-
-static void amd8111_lpc_bridge_exit(struct amd8111_dev_info *dev_info)
-{
- if (legacy_io_res)
- release_region(REG_AT_COMPAT, LEGACY_NR_PORTS);
-}
-
-static void amd8111_lpc_bridge_check(struct edac_device_ctl_info *edac_dev)
-{
- struct amd8111_dev_info *dev_info = edac_dev->pvt_info;
- struct pci_dev *dev = dev_info->dev;
- u8 val8;
-
- edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8);
- if (val8 & IO_CTRL_1_CLEAR_MASK) {
- printk(KERN_INFO
- "Error(s) in IO control register on %s device\n",
- dev_info->ctl_name);
- printk(KERN_INFO "LPC ERR: %d, PW2LPC: %d\n",
- (val8 & IO_CTRL_1_LPC_ERR) != 0,
- (val8 & IO_CTRL_1_PW2LPC) != 0);
-
- val8 |= IO_CTRL_1_CLEAR_MASK;
- edac_pci_write_byte(dev, REG_IO_CTRL_1, val8);
-
- edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
- }
-
- if (at_compat_reg_broken == 0) {
- u8 out8 = 0;
- val8 = __do_inb(REG_AT_COMPAT);
- if (val8 & AT_COMPAT_SERR)
- out8 = AT_COMPAT_CLRSERR;
- if (val8 & AT_COMPAT_IOCHK)
- out8 |= AT_COMPAT_CLRIOCHK;
- if (out8 > 0) {
- __do_outb(out8, REG_AT_COMPAT);
- edac_device_handle_ue(edac_dev, 0, 0,
- edac_dev->ctl_name);
- }
- }
-}
-
-/* General devices represented by edac_device_ctl_info */
-static struct amd8111_dev_info amd8111_devices[] = {
- [LPC_BRIDGE] = {
- .err_dev = PCI_DEVICE_ID_AMD_8111_LPC,
- .ctl_name = "lpc",
- .init = amd8111_lpc_bridge_init,
- .exit = amd8111_lpc_bridge_exit,
- .check = amd8111_lpc_bridge_check,
- },
- {0},
-};
-
-/* PCI controllers represented by edac_pci_ctl_info */
-static struct amd8111_pci_info amd8111_pcis[] = {
- [PCI_BRIDGE] = {
- .err_dev = PCI_DEVICE_ID_AMD_8111_PCI,
- .ctl_name = "AMD8111_PCI_Controller",
- .init = amd8111_pci_bridge_init,
- .exit = amd8111_pci_bridge_exit,
- .check = amd8111_pci_bridge_check,
- },
- {0},
-};
-
-static int amd8111_dev_probe(struct pci_dev *dev,
- const struct pci_device_id *id)
-{
- struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data];
- int ret = -ENODEV;
-
- dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD,
- dev_info->err_dev, NULL);
-
- if (!dev_info->dev) {
- printk(KERN_ERR "EDAC device not found:"
- "vendor %x, device %x, name %s\n",
- PCI_VENDOR_ID_AMD, dev_info->err_dev,
- dev_info->ctl_name);
- goto err;
- }
-
- if (pci_enable_device(dev_info->dev)) {
- printk(KERN_ERR "failed to enable:"
- "vendor %x, device %x, name %s\n",
- PCI_VENDOR_ID_AMD, dev_info->err_dev,
- dev_info->ctl_name);
- goto err_dev_put;
- }
-
- /*
- * we do not allocate extra private structure for
- * edac_device_ctl_info, but make use of existing
- * one instead.
- */
- dev_info->edac_idx = edac_device_alloc_index();
- dev_info->edac_dev =
- edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1,
- NULL, 0, 0,
- NULL, 0, dev_info->edac_idx);
- if (!dev_info->edac_dev) {
- ret = -ENOMEM;
- goto err_dev_put;
- }
-
- dev_info->edac_dev->pvt_info = dev_info;
- dev_info->edac_dev->dev = &dev_info->dev->dev;
- dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
- dev_info->edac_dev->ctl_name = dev_info->ctl_name;
- dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
-
- if (edac_op_state == EDAC_OPSTATE_POLL)
- dev_info->edac_dev->edac_check = dev_info->check;
-
- if (dev_info->init)
- dev_info->init(dev_info);
-
- if (edac_device_add_device(dev_info->edac_dev) > 0) {
- printk(KERN_ERR "failed to add edac_dev for %s\n",
- dev_info->ctl_name);
- goto err_edac_free_ctl;
- }
-
- printk(KERN_INFO "added one edac_dev on AMD8111 "
- "vendor %x, device %x, name %s\n",
- PCI_VENDOR_ID_AMD, dev_info->err_dev,
- dev_info->ctl_name);
-
- return 0;
-
-err_edac_free_ctl:
- edac_device_free_ctl_info(dev_info->edac_dev);
-err_dev_put:
- pci_dev_put(dev_info->dev);
-err:
- return ret;
-}
-
-static void amd8111_dev_remove(struct pci_dev *dev)
-{
- struct amd8111_dev_info *dev_info;
-
- for (dev_info = amd8111_devices; dev_info->err_dev; dev_info++)
- if (dev_info->dev->device == dev->device)
- break;
-
- if (!dev_info->err_dev) /* should never happen */
- return;
-
- if (dev_info->edac_dev) {
- edac_device_del_device(dev_info->edac_dev->dev);
- edac_device_free_ctl_info(dev_info->edac_dev);
- }
-
- if (dev_info->exit)
- dev_info->exit(dev_info);
-
- pci_dev_put(dev_info->dev);
-}
-
-static int amd8111_pci_probe(struct pci_dev *dev,
- const struct pci_device_id *id)
-{
- struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data];
- int ret = -ENODEV;
-
- pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD,
- pci_info->err_dev, NULL);
-
- if (!pci_info->dev) {
- printk(KERN_ERR "EDAC device not found:"
- "vendor %x, device %x, name %s\n",
- PCI_VENDOR_ID_AMD, pci_info->err_dev,
- pci_info->ctl_name);
- goto err;
- }
-
- if (pci_enable_device(pci_info->dev)) {
- printk(KERN_ERR "failed to enable:"
- "vendor %x, device %x, name %s\n",
- PCI_VENDOR_ID_AMD, pci_info->err_dev,
- pci_info->ctl_name);
- goto err_dev_put;
- }
-
- /*
- * we do not allocate extra private structure for
- * edac_pci_ctl_info, but make use of existing
- * one instead.
- */
- pci_info->edac_idx = edac_pci_alloc_index();
- pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name);
- if (!pci_info->edac_dev) {
- ret = -ENOMEM;
- goto err_dev_put;
- }
-
- pci_info->edac_dev->pvt_info = pci_info;
- pci_info->edac_dev->dev = &pci_info->dev->dev;
- pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
- pci_info->edac_dev->ctl_name = pci_info->ctl_name;
- pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev);
-
- if (edac_op_state == EDAC_OPSTATE_POLL)
- pci_info->edac_dev->edac_check = pci_info->check;
-
- if (pci_info->init)
- pci_info->init(pci_info);
-
- if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) {
- printk(KERN_ERR "failed to add edac_pci for %s\n",
- pci_info->ctl_name);
- goto err_edac_free_ctl;
- }
-
- printk(KERN_INFO "added one edac_pci on AMD8111 "
- "vendor %x, device %x, name %s\n",
- PCI_VENDOR_ID_AMD, pci_info->err_dev,
- pci_info->ctl_name);
-
- return 0;
-
-err_edac_free_ctl:
- edac_pci_free_ctl_info(pci_info->edac_dev);
-err_dev_put:
- pci_dev_put(pci_info->dev);
-err:
- return ret;
-}
-
-static void amd8111_pci_remove(struct pci_dev *dev)
-{
- struct amd8111_pci_info *pci_info;
-
- for (pci_info = amd8111_pcis; pci_info->err_dev; pci_info++)
- if (pci_info->dev->device == dev->device)
- break;
-
- if (!pci_info->err_dev) /* should never happen */
- return;
-
- if (pci_info->edac_dev) {
- edac_pci_del_device(pci_info->edac_dev->dev);
- edac_pci_free_ctl_info(pci_info->edac_dev);
- }
-
- if (pci_info->exit)
- pci_info->exit(pci_info);
-
- pci_dev_put(pci_info->dev);
-}
-
-/* PCI Device ID talbe for general EDAC device */
-static const struct pci_device_id amd8111_edac_dev_tbl[] = {
- {
- PCI_VEND_DEV(AMD, 8111_LPC),
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .class = 0,
- .class_mask = 0,
- .driver_data = LPC_BRIDGE,
- },
- {
- 0,
- } /* table is NULL-terminated */
-};
-MODULE_DEVICE_TABLE(pci, amd8111_edac_dev_tbl);
-
-static struct pci_driver amd8111_edac_dev_driver = {
- .name = "AMD8111_EDAC_DEV",
- .probe = amd8111_dev_probe,
- .remove = amd8111_dev_remove,
- .id_table = amd8111_edac_dev_tbl,
-};
-
-/* PCI Device ID table for EDAC PCI controller */
-static const struct pci_device_id amd8111_edac_pci_tbl[] = {
- {
- PCI_VEND_DEV(AMD, 8111_PCI),
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .class = 0,
- .class_mask = 0,
- .driver_data = PCI_BRIDGE,
- },
- {
- 0,
- } /* table is NULL-terminated */
-};
-MODULE_DEVICE_TABLE(pci, amd8111_edac_pci_tbl);
-
-static struct pci_driver amd8111_edac_pci_driver = {
- .name = "AMD8111_EDAC_PCI",
- .probe = amd8111_pci_probe,
- .remove = amd8111_pci_remove,
- .id_table = amd8111_edac_pci_tbl,
-};
-
-static int __init amd8111_edac_init(void)
-{
- int val;
-
- printk(KERN_INFO "AMD8111 EDAC driver " AMD8111_EDAC_REVISION "\n");
- printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n");
-
- /* Only POLL mode supported so far */
- edac_op_state = EDAC_OPSTATE_POLL;
-
- val = pci_register_driver(&amd8111_edac_dev_driver);
- val |= pci_register_driver(&amd8111_edac_pci_driver);
-
- return val;
-}
-
-static void __exit amd8111_edac_exit(void)
-{
- pci_unregister_driver(&amd8111_edac_pci_driver);
- pci_unregister_driver(&amd8111_edac_dev_driver);
-}
-
-
-module_init(amd8111_edac_init);
-module_exit(amd8111_edac_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n");
-MODULE_DESCRIPTION("AMD8111 HyperTransport I/O Hub EDAC kernel module");
diff --git a/drivers/edac/amd8111_edac.h b/drivers/edac/amd8111_edac.h
deleted file mode 100644
index 200cab1b3e42..000000000000
--- a/drivers/edac/amd8111_edac.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * amd8111_edac.h, EDAC defs for AMD8111 hypertransport chip
- *
- * Copyright (c) 2008 Wind River Systems, Inc.
- *
- * Authors: Cao Qingtao <qingtao.cao@windriver.com>
- * Benjamin Walsh <benjamin.walsh@windriver.com>
- * Hu Yongqi <yongqi.hu@windriver.com>
- */
-
-#ifndef _AMD8111_EDAC_H_
-#define _AMD8111_EDAC_H_
-
-/************************************************************
- * PCI Bridge Status and Command Register, DevA:0x04
- ************************************************************/
-#define REG_PCI_STSCMD 0x04
-enum pci_stscmd_bits {
- PCI_STSCMD_SSE = BIT(30),
- PCI_STSCMD_RMA = BIT(29),
- PCI_STSCMD_RTA = BIT(28),
- PCI_STSCMD_SERREN = BIT(8),
- PCI_STSCMD_CLEAR_MASK = (PCI_STSCMD_SSE |
- PCI_STSCMD_RMA |
- PCI_STSCMD_RTA)
-};
-
-/************************************************************
- * PCI Bridge Memory Base-Limit Register, DevA:0x1c
- ************************************************************/
-#define REG_MEM_LIM 0x1c
-enum mem_limit_bits {
- MEM_LIMIT_DPE = BIT(31),
- MEM_LIMIT_RSE = BIT(30),
- MEM_LIMIT_RMA = BIT(29),
- MEM_LIMIT_RTA = BIT(28),
- MEM_LIMIT_STA = BIT(27),
- MEM_LIMIT_MDPE = BIT(24),
- MEM_LIMIT_CLEAR_MASK = (MEM_LIMIT_DPE |
- MEM_LIMIT_RSE |
- MEM_LIMIT_RMA |
- MEM_LIMIT_RTA |
- MEM_LIMIT_STA |
- MEM_LIMIT_MDPE)
-};
-
-/************************************************************
- * HyperTransport Link Control Register, DevA:0xc4
- ************************************************************/
-#define REG_HT_LINK 0xc4
-enum ht_link_bits {
- HT_LINK_LKFAIL = BIT(4),
- HT_LINK_CRCFEN = BIT(1),
- HT_LINK_CLEAR_MASK = (HT_LINK_LKFAIL)
-};
-
-/************************************************************
- * PCI Bridge Interrupt and Bridge Control, DevA:0x3c
- ************************************************************/
-#define REG_PCI_INTBRG_CTRL 0x3c
-enum pci_intbrg_ctrl_bits {
- PCI_INTBRG_CTRL_DTSERREN = BIT(27),
- PCI_INTBRG_CTRL_DTSTAT = BIT(26),
- PCI_INTBRG_CTRL_MARSP = BIT(21),
- PCI_INTBRG_CTRL_SERREN = BIT(17),
- PCI_INTBRG_CTRL_PEREN = BIT(16),
- PCI_INTBRG_CTRL_CLEAR_MASK = (PCI_INTBRG_CTRL_DTSTAT),
- PCI_INTBRG_CTRL_POLL_MASK = (PCI_INTBRG_CTRL_DTSERREN |
- PCI_INTBRG_CTRL_MARSP |
- PCI_INTBRG_CTRL_SERREN)
-};
-
-/************************************************************
- * I/O Control 1 Register, DevB:0x40
- ************************************************************/
-#define REG_IO_CTRL_1 0x40
-enum io_ctrl_1_bits {
- IO_CTRL_1_NMIONERR = BIT(7),
- IO_CTRL_1_LPC_ERR = BIT(6),
- IO_CTRL_1_PW2LPC = BIT(1),
- IO_CTRL_1_CLEAR_MASK = (IO_CTRL_1_LPC_ERR | IO_CTRL_1_PW2LPC)
-};
-
-/************************************************************
- * Legacy I/O Space Registers
- ************************************************************/
-#define REG_AT_COMPAT 0x61
-enum at_compat_bits {
- AT_COMPAT_SERR = BIT(7),
- AT_COMPAT_IOCHK = BIT(6),
- AT_COMPAT_CLRIOCHK = BIT(3),
- AT_COMPAT_CLRSERR = BIT(2),
-};
-
-struct amd8111_dev_info {
- u16 err_dev; /* PCI Device ID */
- struct pci_dev *dev;
- int edac_idx; /* device index */
- char *ctl_name;
- struct edac_device_ctl_info *edac_dev;
- void (*init)(struct amd8111_dev_info *dev_info);
- void (*exit)(struct amd8111_dev_info *dev_info);
- void (*check)(struct edac_device_ctl_info *edac_dev);
-};
-
-struct amd8111_pci_info {
- u16 err_dev; /* PCI Device ID */
- struct pci_dev *dev;
- int edac_idx; /* pci index */
- const char *ctl_name;
- struct edac_pci_ctl_info *edac_dev;
- void (*init)(struct amd8111_pci_info *dev_info);
- void (*exit)(struct amd8111_pci_info *dev_info);
- void (*check)(struct edac_pci_ctl_info *edac_dev);
-};
-
-#endif /* _AMD8111_EDAC_H_ */
diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c
deleted file mode 100644
index 169353710982..000000000000
--- a/drivers/edac/amd8131_edac.c
+++ /dev/null
@@ -1,358 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * amd8131_edac.c, AMD8131 hypertransport chip EDAC kernel module
- *
- * Copyright (c) 2008 Wind River Systems, Inc.
- *
- * Authors: Cao Qingtao <qingtao.cao@windriver.com>
- * Benjamin Walsh <benjamin.walsh@windriver.com>
- * Hu Yongqi <yongqi.hu@windriver.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/bitops.h>
-#include <linux/edac.h>
-#include <linux/pci_ids.h>
-
-#include "edac_module.h"
-#include "amd8131_edac.h"
-
-#define AMD8131_EDAC_REVISION " Ver: 1.0.0"
-#define AMD8131_EDAC_MOD_STR "amd8131_edac"
-
-/* Wrapper functions for accessing PCI configuration space */
-static void edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32)
-{
- int ret;
-
- ret = pci_read_config_dword(dev, reg, val32);
- if (ret != 0)
- printk(KERN_ERR AMD8131_EDAC_MOD_STR
- " PCI Access Read Error at 0x%x\n", reg);
-}
-
-static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32)
-{
- int ret;
-
- ret = pci_write_config_dword(dev, reg, val32);
- if (ret != 0)
- printk(KERN_ERR AMD8131_EDAC_MOD_STR
- " PCI Access Write Error at 0x%x\n", reg);
-}
-
-/* Support up to two AMD8131 chipsets on a platform */
-static struct amd8131_dev_info amd8131_devices[] = {
- {
- .inst = NORTH_A,
- .devfn = DEVFN_PCIX_BRIDGE_NORTH_A,
- .ctl_name = "AMD8131_PCIX_NORTH_A",
- },
- {
- .inst = NORTH_B,
- .devfn = DEVFN_PCIX_BRIDGE_NORTH_B,
- .ctl_name = "AMD8131_PCIX_NORTH_B",
- },
- {
- .inst = SOUTH_A,
- .devfn = DEVFN_PCIX_BRIDGE_SOUTH_A,
- .ctl_name = "AMD8131_PCIX_SOUTH_A",
- },
- {
- .inst = SOUTH_B,
- .devfn = DEVFN_PCIX_BRIDGE_SOUTH_B,
- .ctl_name = "AMD8131_PCIX_SOUTH_B",
- },
- {.inst = NO_BRIDGE,},
-};
-
-static void amd8131_pcix_init(struct amd8131_dev_info *dev_info)
-{
- u32 val32;
- struct pci_dev *dev = dev_info->dev;
-
- /* First clear error detection flags */
- edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
- if (val32 & MEM_LIMIT_MASK)
- edac_pci_write_dword(dev, REG_MEM_LIM, val32);
-
- /* Clear Discard Timer Timedout flag */
- edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
- if (val32 & INT_CTLR_DTS)
- edac_pci_write_dword(dev, REG_INT_CTLR, val32);
-
- /* Clear CRC Error flag on link side A */
- edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
- if (val32 & LNK_CTRL_CRCERR_A)
- edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
-
- /* Clear CRC Error flag on link side B */
- edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
- if (val32 & LNK_CTRL_CRCERR_B)
- edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
-
- /*
- * Then enable all error detections.
- *
- * Setup Discard Timer Sync Flood Enable,
- * System Error Enable and Parity Error Enable.
- */
- edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
- val32 |= INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE;
- edac_pci_write_dword(dev, REG_INT_CTLR, val32);
-
- /* Enable overall SERR Error detection */
- edac_pci_read_dword(dev, REG_STS_CMD, &val32);
- val32 |= STS_CMD_SERREN;
- edac_pci_write_dword(dev, REG_STS_CMD, val32);
-
- /* Setup CRC Flood Enable for link side A */
- edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
- val32 |= LNK_CTRL_CRCFEN;
- edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
-
- /* Setup CRC Flood Enable for link side B */
- edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
- val32 |= LNK_CTRL_CRCFEN;
- edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
-}
-
-static void amd8131_pcix_exit(struct amd8131_dev_info *dev_info)
-{
- u32 val32;
- struct pci_dev *dev = dev_info->dev;
-
- /* Disable SERR, PERR and DTSE Error detection */
- edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
- val32 &= ~(INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE);
- edac_pci_write_dword(dev, REG_INT_CTLR, val32);
-
- /* Disable overall System Error detection */
- edac_pci_read_dword(dev, REG_STS_CMD, &val32);
- val32 &= ~STS_CMD_SERREN;
- edac_pci_write_dword(dev, REG_STS_CMD, val32);
-
- /* Disable CRC Sync Flood on link side A */
- edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
- val32 &= ~LNK_CTRL_CRCFEN;
- edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
-
- /* Disable CRC Sync Flood on link side B */
- edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
- val32 &= ~LNK_CTRL_CRCFEN;
- edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
-}
-
-static void amd8131_pcix_check(struct edac_pci_ctl_info *edac_dev)
-{
- struct amd8131_dev_info *dev_info = edac_dev->pvt_info;
- struct pci_dev *dev = dev_info->dev;
- u32 val32;
-
- /* Check PCI-X Bridge Memory Base-Limit Register for errors */
- edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
- if (val32 & MEM_LIMIT_MASK) {
- printk(KERN_INFO "Error(s) in mem limit register "
- "on %s bridge\n", dev_info->ctl_name);
- printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n"
- "RTA: %d, STA: %d, MDPE: %d\n",
- val32 & MEM_LIMIT_DPE,
- val32 & MEM_LIMIT_RSE,
- val32 & MEM_LIMIT_RMA,
- val32 & MEM_LIMIT_RTA,
- val32 & MEM_LIMIT_STA,
- val32 & MEM_LIMIT_MDPE);
-
- val32 |= MEM_LIMIT_MASK;
- edac_pci_write_dword(dev, REG_MEM_LIM, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-
- /* Check if Discard Timer timed out */
- edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
- if (val32 & INT_CTLR_DTS) {
- printk(KERN_INFO "Error(s) in interrupt and control register "
- "on %s bridge\n", dev_info->ctl_name);
- printk(KERN_INFO "DTS: %d\n", val32 & INT_CTLR_DTS);
-
- val32 |= INT_CTLR_DTS;
- edac_pci_write_dword(dev, REG_INT_CTLR, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-
- /* Check if CRC error happens on link side A */
- edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
- if (val32 & LNK_CTRL_CRCERR_A) {
- printk(KERN_INFO "Error(s) in link conf and control register "
- "on %s bridge\n", dev_info->ctl_name);
- printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_A);
-
- val32 |= LNK_CTRL_CRCERR_A;
- edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-
- /* Check if CRC error happens on link side B */
- edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
- if (val32 & LNK_CTRL_CRCERR_B) {
- printk(KERN_INFO "Error(s) in link conf and control register "
- "on %s bridge\n", dev_info->ctl_name);
- printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_B);
-
- val32 |= LNK_CTRL_CRCERR_B;
- edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
-
- edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
- }
-}
-
-static struct amd8131_info amd8131_chipset = {
- .err_dev = PCI_DEVICE_ID_AMD_8131_APIC,
- .devices = amd8131_devices,
- .init = amd8131_pcix_init,
- .exit = amd8131_pcix_exit,
- .check = amd8131_pcix_check,
-};
-
-/*
- * There are 4 PCIX Bridges on ATCA-6101 that share the same PCI Device ID,
- * so amd8131_probe() would be called by kernel 4 times, with different
- * address of pci_dev for each of them each time.
- */
-static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
- struct amd8131_dev_info *dev_info;
-
- for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE;
- dev_info++)
- if (dev_info->devfn == dev->devfn)
- break;
-
- if (dev_info->inst == NO_BRIDGE) /* should never happen */
- return -ENODEV;
-
- /*
- * We can't call pci_get_device() as we are used to do because
- * there are 4 of them but pci_dev_get() instead.
- */
- dev_info->dev = pci_dev_get(dev);
-
- if (pci_enable_device(dev_info->dev)) {
- pci_dev_put(dev_info->dev);
- printk(KERN_ERR "failed to enable:"
- "vendor %x, device %x, devfn %x, name %s\n",
- PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev,
- dev_info->devfn, dev_info->ctl_name);
- return -ENODEV;
- }
-
- /*
- * we do not allocate extra private structure for
- * edac_pci_ctl_info, but make use of existing
- * one instead.
- */
- dev_info->edac_idx = edac_pci_alloc_index();
- dev_info->edac_dev = edac_pci_alloc_ctl_info(0, dev_info->ctl_name);
- if (!dev_info->edac_dev)
- return -ENOMEM;
-
- dev_info->edac_dev->pvt_info = dev_info;
- dev_info->edac_dev->dev = &dev_info->dev->dev;
- dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR;
- dev_info->edac_dev->ctl_name = dev_info->ctl_name;
- dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
-
- if (edac_op_state == EDAC_OPSTATE_POLL)
- dev_info->edac_dev->edac_check = amd8131_chipset.check;
-
- if (amd8131_chipset.init)
- amd8131_chipset.init(dev_info);
-
- if (edac_pci_add_device(dev_info->edac_dev, dev_info->edac_idx) > 0) {
- printk(KERN_ERR "failed edac_pci_add_device() for %s\n",
- dev_info->ctl_name);
- edac_pci_free_ctl_info(dev_info->edac_dev);
- return -ENODEV;
- }
-
- printk(KERN_INFO "added one device on AMD8131 "
- "vendor %x, device %x, devfn %x, name %s\n",
- PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev,
- dev_info->devfn, dev_info->ctl_name);
-
- return 0;
-}
-
-static void amd8131_remove(struct pci_dev *dev)
-{
- struct amd8131_dev_info *dev_info;
-
- for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE;
- dev_info++)
- if (dev_info->devfn == dev->devfn)
- break;
-
- if (dev_info->inst == NO_BRIDGE) /* should never happen */
- return;
-
- if (dev_info->edac_dev) {
- edac_pci_del_device(dev_info->edac_dev->dev);
- edac_pci_free_ctl_info(dev_info->edac_dev);
- }
-
- if (amd8131_chipset.exit)
- amd8131_chipset.exit(dev_info);
-
- pci_dev_put(dev_info->dev);
-}
-
-static const struct pci_device_id amd8131_edac_pci_tbl[] = {
- {
- PCI_VEND_DEV(AMD, 8131_BRIDGE),
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .class = 0,
- .class_mask = 0,
- .driver_data = 0,
- },
- {
- 0,
- } /* table is NULL-terminated */
-};
-MODULE_DEVICE_TABLE(pci, amd8131_edac_pci_tbl);
-
-static struct pci_driver amd8131_edac_driver = {
- .name = AMD8131_EDAC_MOD_STR,
- .probe = amd8131_probe,
- .remove = amd8131_remove,
- .id_table = amd8131_edac_pci_tbl,
-};
-
-static int __init amd8131_edac_init(void)
-{
- printk(KERN_INFO "AMD8131 EDAC driver " AMD8131_EDAC_REVISION "\n");
- printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n");
-
- /* Only POLL mode supported so far */
- edac_op_state = EDAC_OPSTATE_POLL;
-
- return pci_register_driver(&amd8131_edac_driver);
-}
-
-static void __exit amd8131_edac_exit(void)
-{
- pci_unregister_driver(&amd8131_edac_driver);
-}
-
-module_init(amd8131_edac_init);
-module_exit(amd8131_edac_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n");
-MODULE_DESCRIPTION("AMD8131 HyperTransport PCI-X Tunnel EDAC kernel module");
diff --git a/drivers/edac/amd8131_edac.h b/drivers/edac/amd8131_edac.h
deleted file mode 100644
index 5f362abdaf12..000000000000
--- a/drivers/edac/amd8131_edac.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * amd8131_edac.h, EDAC defs for AMD8131 hypertransport chip
- *
- * Copyright (c) 2008 Wind River Systems, Inc.
- *
- * Authors: Cao Qingtao <qingtao.cao@windriver.com>
- * Benjamin Walsh <benjamin.walsh@windriver.com>
- * Hu Yongqi <yongqi.hu@windriver.com>
- */
-
-#ifndef _AMD8131_EDAC_H_
-#define _AMD8131_EDAC_H_
-
-#define DEVFN_PCIX_BRIDGE_NORTH_A 8
-#define DEVFN_PCIX_BRIDGE_NORTH_B 16
-#define DEVFN_PCIX_BRIDGE_SOUTH_A 24
-#define DEVFN_PCIX_BRIDGE_SOUTH_B 32
-
-/************************************************************
- * PCI-X Bridge Status and Command Register, DevA:0x04
- ************************************************************/
-#define REG_STS_CMD 0x04
-enum sts_cmd_bits {
- STS_CMD_SSE = BIT(30),
- STS_CMD_SERREN = BIT(8)
-};
-
-/************************************************************
- * PCI-X Bridge Interrupt and Bridge Control Register,
- ************************************************************/
-#define REG_INT_CTLR 0x3c
-enum int_ctlr_bits {
- INT_CTLR_DTSE = BIT(27),
- INT_CTLR_DTS = BIT(26),
- INT_CTLR_SERR = BIT(17),
- INT_CTLR_PERR = BIT(16)
-};
-
-/************************************************************
- * PCI-X Bridge Memory Base-Limit Register, DevA:0x1C
- ************************************************************/
-#define REG_MEM_LIM 0x1c
-enum mem_limit_bits {
- MEM_LIMIT_DPE = BIT(31),
- MEM_LIMIT_RSE = BIT(30),
- MEM_LIMIT_RMA = BIT(29),
- MEM_LIMIT_RTA = BIT(28),
- MEM_LIMIT_STA = BIT(27),
- MEM_LIMIT_MDPE = BIT(24),
- MEM_LIMIT_MASK = MEM_LIMIT_DPE|MEM_LIMIT_RSE|MEM_LIMIT_RMA|
- MEM_LIMIT_RTA|MEM_LIMIT_STA|MEM_LIMIT_MDPE
-};
-
-/************************************************************
- * Link Configuration And Control Register, side A
- ************************************************************/
-#define REG_LNK_CTRL_A 0xc4
-
-/************************************************************
- * Link Configuration And Control Register, side B
- ************************************************************/
-#define REG_LNK_CTRL_B 0xc8
-
-enum lnk_ctrl_bits {
- LNK_CTRL_CRCERR_A = BIT(9),
- LNK_CTRL_CRCERR_B = BIT(8),
- LNK_CTRL_CRCFEN = BIT(1)
-};
-
-enum pcix_bridge_inst {
- NORTH_A = 0,
- NORTH_B = 1,
- SOUTH_A = 2,
- SOUTH_B = 3,
- NO_BRIDGE = 4
-};
-
-struct amd8131_dev_info {
- int devfn;
- enum pcix_bridge_inst inst;
- struct pci_dev *dev;
- int edac_idx; /* pci device index */
- char *ctl_name;
- struct edac_pci_ctl_info *edac_dev;
-};
-
-/*
- * AMD8131 chipset has two pairs of PCIX Bridge and related IOAPIC
- * Controller, and ATCA-6101 has two AMD8131 chipsets, so there are
- * four PCIX Bridges on ATCA-6101 altogether.
- *
- * These PCIX Bridges share the same PCI Device ID and are all of
- * Function Zero, they could be discrimated by their pci_dev->devfn.
- * They share the same set of init/check/exit methods, and their
- * private structures are collected in the devices[] array.
- */
-struct amd8131_info {
- u16 err_dev; /* PCI Device ID for AMD8131 APIC*/
- struct amd8131_dev_info *devices;
- void (*init)(struct amd8131_dev_info *dev_info);
- void (*exit)(struct amd8131_dev_info *dev_info);
- void (*check)(struct edac_pci_ctl_info *edac_dev);
-};
-
-#endif /* _AMD8131_EDAC_H_ */
-
diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c
index c4bd2fb9c46b..d64248fcf4c0 100644
--- a/drivers/edac/armada_xp_edac.c
+++ b/drivers/edac/armada_xp_edac.c
@@ -5,7 +5,9 @@
#include <linux/kernel.h>
#include <linux/edac.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
#include <asm/hardware/cache-l2x0.h>
#include <asm/hardware/cache-aurora-l2.h>
@@ -351,15 +353,13 @@ static int axp_mc_probe(struct platform_device *pdev)
return 0;
}
-static int axp_mc_remove(struct platform_device *pdev)
+static void axp_mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
static struct platform_driver axp_mc_driver = {
@@ -523,7 +523,7 @@ static int aurora_l2_probe(struct platform_device *pdev)
dev_warn(&pdev->dev, "data ECC is not enabled\n");
dci = edac_device_alloc_ctl_info(sizeof(*drvdata),
- "cpu", 1, "L", 1, 2, NULL, 0, 0);
+ "cpu", 1, "L", 1, 2, 0);
if (!dci)
return -ENOMEM;
@@ -564,7 +564,7 @@ static int aurora_l2_probe(struct platform_device *pdev)
return 0;
}
-static int aurora_l2_remove(struct platform_device *pdev)
+static void aurora_l2_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
#ifdef CONFIG_EDAC_DEBUG
@@ -575,8 +575,6 @@ static int aurora_l2_remove(struct platform_device *pdev)
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(dci);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
static struct platform_driver aurora_l2_driver = {
diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c
index 6bd5f8815919..dadb8acbee3d 100644
--- a/drivers/edac/aspeed_edac.c
+++ b/drivers/edac/aspeed_edac.c
@@ -357,7 +357,7 @@ probe_exit02:
}
-static int aspeed_remove(struct platform_device *pdev)
+static void aspeed_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci;
@@ -369,8 +369,6 @@ static int aspeed_remove(struct platform_device *pdev)
mci = edac_mc_del_mc(&pdev->dev);
if (mci)
edac_mc_free(mci);
-
- return 0;
}
diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c
index e4736eb37bfb..ae3bb7afa103 100644
--- a/drivers/edac/bluefield_edac.c
+++ b/drivers/edac/bluefield_edac.c
@@ -47,13 +47,22 @@
#define MLXBF_EDAC_MAX_DIMM_PER_MC 2
#define MLXBF_EDAC_ERROR_GRAIN 8
+#define MLXBF_WRITE_REG_32 (0x82000009)
+#define MLXBF_READ_REG_32 (0x8200000A)
+#define MLXBF_SIP_SVC_VERSION (0x8200ff03)
+
+#define MLXBF_SMCCC_ACCESS_VIOLATION (-4)
+
+#define MLXBF_SVC_REQ_MAJOR 0
+#define MLXBF_SVC_REQ_MINOR 3
+
/*
- * Request MLNX_SIP_GET_DIMM_INFO
+ * Request MLXBF_SIP_GET_DIMM_INFO
*
* Retrieve information about DIMM on a certain slot.
*
* Call register usage:
- * a0: MLNX_SIP_GET_DIMM_INFO
+ * a0: MLXBF_SIP_GET_DIMM_INFO
* a1: (Memory controller index) << 16 | (Dimm index in memory controller)
* a2-7: not used.
*
@@ -61,7 +70,7 @@
* a0: MLXBF_DIMM_INFO defined below describing the DIMM.
* a1-3: not used.
*/
-#define MLNX_SIP_GET_DIMM_INFO 0x82000008
+#define MLXBF_SIP_GET_DIMM_INFO 0x82000008
/* Format for the SMC response about the memory information */
#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
@@ -72,9 +81,15 @@
#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
struct bluefield_edac_priv {
+ /* pointer to device structure */
+ struct device *dev;
int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
void __iomem *emi_base;
int dimm_per_mc;
+ /* access to secure regs supported */
+ bool svc_sreg_support;
+ /* SMC table# for secure regs access */
+ u32 sreg_tbl;
};
static u64 smc_call1(u64 smc_op, u64 smc_arg)
@@ -86,6 +101,71 @@ static u64 smc_call1(u64 smc_op, u64 smc_arg)
return res.a0;
}
+static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl)
+{
+ struct arm_smccc_res res;
+ int status;
+
+ arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr,
+ 0, 0, 0, 0, 0, &res);
+
+ status = res.a0;
+
+ if (status == SMCCC_RET_NOT_SUPPORTED ||
+ status == MLXBF_SMCCC_ACCESS_VIOLATION)
+ return -1;
+
+ *result = (u32)res.a1;
+ return 0;
+}
+
+static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl)
+{
+ struct arm_smccc_res res;
+ int status;
+
+ arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr,
+ 0, 0, 0, 0, &res);
+
+ status = res.a0;
+
+ if (status == SMCCC_RET_NOT_SUPPORTED ||
+ status == MLXBF_SMCCC_ACCESS_VIOLATION)
+ return -1;
+ else
+ return 0;
+}
+
+static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result)
+{
+ void __iomem *addr;
+ int err = 0;
+
+ addr = priv->emi_base + offset;
+
+ if (priv->svc_sreg_support)
+ err = secure_readl(addr, result, priv->sreg_tbl);
+ else
+ *result = readl(addr);
+
+ return err;
+}
+
+static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data)
+{
+ void __iomem *addr;
+ int err = 0;
+
+ addr = priv->emi_base + offset;
+
+ if (priv->svc_sreg_support)
+ err = secure_writel(addr, data, priv->sreg_tbl);
+ else
+ writel(data, addr);
+
+ return err;
+}
+
/*
* Gather the ECC information from the External Memory Interface registers
* and report it to the edac handler.
@@ -99,7 +179,7 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom;
enum hw_event_mc_err_type ecc_type;
u64 ecc_dimm_addr;
- int ecc_dimm;
+ int ecc_dimm, err;
ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
HW_EVENT_ERR_UNCORRECTED;
@@ -109,14 +189,21 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
* registers with information about the last ECC error occurrence.
*/
ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
- writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL);
+ err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select);
+ if (err)
+ dev_err(priv->dev, "ECC latch select write failed.\n");
/*
* Verify that the ECC reported info in the registers is of the
* same type as the one asked to report. If not, just report the
* error without the detailed information.
*/
- dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM);
+ err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom);
+ if (err) {
+ dev_err(priv->dev, "DRAM syndrom read failed.\n");
+ return;
+ }
+
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);
@@ -127,13 +214,27 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
return;
}
- dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO);
+ err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info);
+ if (err) {
+ dev_err(priv->dev, "DRAM additional info read failed.\n");
+ return;
+ }
+
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
- edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0);
- edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1);
+ err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0);
+ if (err) {
+ dev_err(priv->dev, "Error addr 0 read failed.\n");
+ return;
+ }
+
+ err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1);
+ if (err) {
+ dev_err(priv->dev, "Error addr 1 read failed.\n");
+ return;
+ }
ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
@@ -147,6 +248,7 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
{
struct bluefield_edac_priv *priv = mci->pvt_info;
u32 ecc_count, single_error_count, double_error_count, ecc_error = 0;
+ int err;
/*
* The memory controller might not be initialized by the firmware
@@ -155,7 +257,12 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
if (mci->edac_cap == EDAC_FLAG_NONE)
return;
- ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT);
+ err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count);
+ if (err) {
+ dev_err(priv->dev, "ECC count read failed.\n");
+ return;
+ }
+
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
@@ -172,15 +279,18 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
}
/* Write to clear reported errors. */
- if (ecc_count)
- writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR);
+ if (ecc_count) {
+ err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error);
+ if (err)
+ dev_err(priv->dev, "ECC Error write failed.\n");
+ }
}
/* Initialize the DIMMs information for the given memory controller. */
static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
{
struct bluefield_edac_priv *priv = mci->pvt_info;
- int mem_ctrl_idx = mci->mc_idx;
+ u64 mem_ctrl_idx = mci->mc_idx;
struct dimm_info *dimm;
u64 smc_info, smc_arg;
int is_empty = 1, i;
@@ -189,7 +299,7 @@ static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
dimm = mci->dimms[i];
smc_arg = mem_ctrl_idx << 16 | i;
- smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg);
+ smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg);
if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) {
dimm->mtype = MEM_EMPTY;
@@ -244,6 +354,7 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev)
struct bluefield_edac_priv *priv;
struct device *dev = &pdev->dev;
struct edac_mc_layer layers[1];
+ struct arm_smccc_res res;
struct mem_ctl_info *mci;
struct resource *emi_res;
unsigned int mc_idx, dimm_count;
@@ -279,13 +390,43 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev)
return -ENOMEM;
priv = mci->pvt_info;
+ priv->dev = dev;
+
+ /*
+ * The "sec_reg_block" property in the ACPI table determines the method
+ * the driver uses to access the EMI registers:
+ * a) property is not present - directly access registers via readl/writel
+ * b) property is present - indirectly access registers via SMC calls
+ * (assuming required Silicon Provider service version found)
+ */
+ if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) {
+ priv->svc_sreg_support = false;
+ } else {
+ /*
+ * Check for minimum required Arm Silicon Provider (SiP) service
+ * version, ensuring support of required SMC function IDs.
+ */
+ arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res);
+ if (res.a0 == MLXBF_SVC_REQ_MAJOR &&
+ res.a1 >= MLXBF_SVC_REQ_MINOR) {
+ priv->svc_sreg_support = true;
+ } else {
+ dev_err(dev, "Required SMCs are not supported.\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ }
priv->dimm_per_mc = dimm_count;
- priv->emi_base = devm_ioremap_resource(dev, emi_res);
- if (IS_ERR(priv->emi_base)) {
- dev_err(dev, "failed to map EMI IO resource\n");
- ret = PTR_ERR(priv->emi_base);
- goto err;
+ if (!priv->svc_sreg_support) {
+ priv->emi_base = devm_ioremap_resource(dev, emi_res);
+ if (IS_ERR(priv->emi_base)) {
+ dev_err(dev, "failed to map EMI IO resource\n");
+ ret = PTR_ERR(priv->emi_base);
+ goto err;
+ }
+ } else {
+ priv->emi_base = (void __iomem *)emi_res->start;
}
mci->pdev = dev;
@@ -320,17 +461,14 @@ err:
edac_mc_free(mci);
return ret;
-
}
-static int bluefield_edac_mc_remove(struct platform_device *pdev)
+static void bluefield_edac_mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
-
- return 0;
}
static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
deleted file mode 100644
index bc1f3416400e..000000000000
--- a/drivers/edac/cell_edac.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Cell MIC driver for ECC counting
- *
- * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
- * <benh@kernel.crashing.org>
- *
- * This file may be distributed under the terms of the
- * GNU General Public License.
- */
-#undef DEBUG
-
-#include <linux/edac.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/stop_machine.h>
-#include <linux/io.h>
-#include <linux/of_address.h>
-#include <asm/machdep.h>
-#include <asm/cell-regs.h>
-
-#include "edac_module.h"
-
-struct cell_edac_priv
-{
- struct cbe_mic_tm_regs __iomem *regs;
- int node;
- int chanmask;
-#ifdef DEBUG
- u64 prev_fir;
-#endif
-};
-
-static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
-{
- struct cell_edac_priv *priv = mci->pvt_info;
- struct csrow_info *csrow = mci->csrows[0];
- unsigned long address, pfn, offset, syndrome;
-
- dev_dbg(mci->pdev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n",
- priv->node, chan, ar);
-
- /* Address decoding is likely a bit bogus, to dbl check */
- address = (ar & 0xffffffffe0000000ul) >> 29;
- if (priv->chanmask == 0x3)
- address = (address << 1) | chan;
- pfn = address >> PAGE_SHIFT;
- offset = address & ~PAGE_MASK;
- syndrome = (ar & 0x000000001fe00000ul) >> 21;
-
- /* TODO: Decoding of the error address */
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- csrow->first_page + pfn, offset, syndrome,
- 0, chan, -1, "", "");
-}
-
-static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
-{
- struct cell_edac_priv *priv = mci->pvt_info;
- struct csrow_info *csrow = mci->csrows[0];
- unsigned long address, pfn, offset;
-
- dev_dbg(mci->pdev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n",
- priv->node, chan, ar);
-
- /* Address decoding is likely a bit bogus, to dbl check */
- address = (ar & 0xffffffffe0000000ul) >> 29;
- if (priv->chanmask == 0x3)
- address = (address << 1) | chan;
- pfn = address >> PAGE_SHIFT;
- offset = address & ~PAGE_MASK;
-
- /* TODO: Decoding of the error address */
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- csrow->first_page + pfn, offset, 0,
- 0, chan, -1, "", "");
-}
-
-static void cell_edac_check(struct mem_ctl_info *mci)
-{
- struct cell_edac_priv *priv = mci->pvt_info;
- u64 fir, addreg, clear = 0;
-
- fir = in_be64(&priv->regs->mic_fir);
-#ifdef DEBUG
- if (fir != priv->prev_fir) {
- dev_dbg(mci->pdev, "fir change : 0x%016lx\n", fir);
- priv->prev_fir = fir;
- }
-#endif
- if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_SINGLE_0_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_0);
- clear |= CBE_MIC_FIR_ECC_SINGLE_0_RESET;
- cell_edac_count_ce(mci, 0, addreg);
- }
- if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_SINGLE_1_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_1);
- clear |= CBE_MIC_FIR_ECC_SINGLE_1_RESET;
- cell_edac_count_ce(mci, 1, addreg);
- }
- if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_MULTI_0_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_0);
- clear |= CBE_MIC_FIR_ECC_MULTI_0_RESET;
- cell_edac_count_ue(mci, 0, addreg);
- }
- if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_MULTI_1_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_1);
- clear |= CBE_MIC_FIR_ECC_MULTI_1_RESET;
- cell_edac_count_ue(mci, 1, addreg);
- }
-
- /* The procedure for clearing FIR bits is a bit ... weird */
- if (clear) {
- fir &= ~(CBE_MIC_FIR_ECC_ERR_MASK | CBE_MIC_FIR_ECC_SET_MASK);
- fir |= CBE_MIC_FIR_ECC_RESET_MASK;
- fir &= ~clear;
- out_be64(&priv->regs->mic_fir, fir);
- (void)in_be64(&priv->regs->mic_fir);
-
- mb(); /* sync up */
-#ifdef DEBUG
- fir = in_be64(&priv->regs->mic_fir);
- dev_dbg(mci->pdev, "fir clear : 0x%016lx\n", fir);
-#endif
- }
-}
-
-static void cell_edac_init_csrows(struct mem_ctl_info *mci)
-{
- struct csrow_info *csrow = mci->csrows[0];
- struct dimm_info *dimm;
- struct cell_edac_priv *priv = mci->pvt_info;
- struct device_node *np;
- int j;
- u32 nr_pages;
-
- for_each_node_by_name(np, "memory") {
- struct resource r;
-
- /* We "know" that the Cell firmware only creates one entry
- * in the "memory" nodes. If that changes, this code will
- * need to be adapted.
- */
- if (of_address_to_resource(np, 0, &r))
- continue;
- if (of_node_to_nid(np) != priv->node)
- continue;
- csrow->first_page = r.start >> PAGE_SHIFT;
- nr_pages = resource_size(&r) >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + nr_pages - 1;
-
- for (j = 0; j < csrow->nr_channels; j++) {
- dimm = csrow->channels[j]->dimm;
- dimm->mtype = MEM_XDR;
- dimm->edac_mode = EDAC_SECDED;
- dimm->nr_pages = nr_pages / csrow->nr_channels;
- }
- dev_dbg(mci->pdev,
- "Initialized on node %d, chanmask=0x%x,"
- " first_page=0x%lx, nr_pages=0x%x\n",
- priv->node, priv->chanmask,
- csrow->first_page, nr_pages);
- break;
- }
- of_node_put(np);
-}
-
-static int cell_edac_probe(struct platform_device *pdev)
-{
- struct cbe_mic_tm_regs __iomem *regs;
- struct mem_ctl_info *mci;
- struct edac_mc_layer layers[2];
- struct cell_edac_priv *priv;
- u64 reg;
- int rc, chanmask, num_chans;
-
- regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id));
- if (regs == NULL)
- return -ENODEV;
-
- edac_op_state = EDAC_OPSTATE_POLL;
-
- /* Get channel population */
- reg = in_be64(&regs->mic_mnt_cfg);
- dev_dbg(&pdev->dev, "MIC_MNT_CFG = 0x%016llx\n", reg);
- chanmask = 0;
- if (reg & CBE_MIC_MNT_CFG_CHAN_0_POP)
- chanmask |= 0x1;
- if (reg & CBE_MIC_MNT_CFG_CHAN_1_POP)
- chanmask |= 0x2;
- if (chanmask == 0) {
- dev_warn(&pdev->dev,
- "Yuck ! No channel populated ? Aborting !\n");
- return -ENODEV;
- }
- dev_dbg(&pdev->dev, "Initial FIR = 0x%016llx\n",
- in_be64(&regs->mic_fir));
-
- /* Allocate & init EDAC MC data structure */
- num_chans = chanmask == 3 ? 2 : 1;
-
- layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = 1;
- layers[0].is_virt_csrow = true;
- layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = num_chans;
- layers[1].is_virt_csrow = false;
- mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
- sizeof(struct cell_edac_priv));
- if (mci == NULL)
- return -ENOMEM;
- priv = mci->pvt_info;
- priv->regs = regs;
- priv->node = pdev->id;
- priv->chanmask = chanmask;
- mci->pdev = &pdev->dev;
- mci->mtype_cap = MEM_FLAG_XDR;
- mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
- mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
- mci->mod_name = "cell_edac";
- mci->ctl_name = "MIC";
- mci->dev_name = dev_name(&pdev->dev);
- mci->edac_check = cell_edac_check;
- cell_edac_init_csrows(mci);
-
- /* Register with EDAC core */
- rc = edac_mc_add_mc(mci);
- if (rc) {
- dev_err(&pdev->dev, "failed to register with EDAC core\n");
- edac_mc_free(mci);
- return rc;
- }
-
- return 0;
-}
-
-static int cell_edac_remove(struct platform_device *pdev)
-{
- struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
- if (mci)
- edac_mc_free(mci);
- return 0;
-}
-
-static struct platform_driver cell_edac_driver = {
- .driver = {
- .name = "cbe-mic",
- },
- .probe = cell_edac_probe,
- .remove = cell_edac_remove,
-};
-
-static int __init cell_edac_init(void)
-{
- /* Sanity check registers data structure */
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_df_ecc_address_0) != 0xf8);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_df_ecc_address_1) != 0x1b8);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_df_config) != 0x218);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_fir) != 0x230);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_mnt_cfg) != 0x210);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_exc) != 0x208);
-
- return platform_driver_register(&cell_edac_driver);
-}
-
-static void __exit cell_edac_exit(void)
-{
- platform_driver_unregister(&cell_edac_driver);
-}
-
-module_init(cell_edac_init);
-module_exit(cell_edac_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
-MODULE_DESCRIPTION("ECC counting for Cell MIC");
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index 9797e6d60dde..9c9e4369c041 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -797,7 +797,7 @@ static void cpc925_add_edac_devices(void __iomem *vbase)
dev_info->edac_idx = edac_device_alloc_index();
dev_info->edac_dev =
edac_device_alloc_ctl_info(0, dev_info->ctl_name,
- 1, NULL, 0, 0, NULL, 0, dev_info->edac_idx);
+ 1, NULL, 0, 0, dev_info->edac_idx);
if (!dev_info->edac_dev) {
cpc925_printk(KERN_ERR, "No memory for edac device\n");
goto err1;
@@ -1010,7 +1010,7 @@ out:
return res;
}
-static int cpc925_remove(struct platform_device *pdev)
+static void cpc925_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
@@ -1023,8 +1023,6 @@ static int cpc925_remove(struct platform_device *pdev)
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
-
- return 0;
}
static struct platform_driver cpc925_edac_driver = {
diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c
index 4804332d9946..8195fc9c9354 100644
--- a/drivers/edac/debugfs.c
+++ b/drivers/edac/debugfs.c
@@ -1,4 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/string_choices.h>
+
#include "edac_module.h"
static struct dentry *edac_debugfs;
@@ -22,7 +25,7 @@ static ssize_t edac_fake_inject_write(struct file *file,
"Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n",
errcount,
(type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE",
- errcount > 1 ? "s" : "",
+ str_plural(errcount),
mci->fake_inject_layer[0],
mci->fake_inject_layer[1],
mci->fake_inject_layer[2]
diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c
index 1fa5ca57e9ec..64a4d0a07032 100644
--- a/drivers/edac/dmc520_edac.c
+++ b/drivers/edac/dmc520_edac.c
@@ -480,7 +480,6 @@ static int dmc520_edac_probe(struct platform_device *pdev)
struct mem_ctl_info *mci;
void __iomem *reg_base;
u32 irq_mask_all = 0;
- struct resource *res;
struct device *dev;
int ret, idx, irq;
u32 reg_val;
@@ -505,8 +504,7 @@ static int dmc520_edac_probe(struct platform_device *pdev)
}
/* Initialize dmc520 edac */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- reg_base = devm_ioremap_resource(dev, res);
+ reg_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(reg_base))
return PTR_ERR(reg_base);
@@ -602,7 +600,7 @@ err:
return ret;
}
-static int dmc520_edac_remove(struct platform_device *pdev)
+static void dmc520_edac_remove(struct platform_device *pdev)
{
u32 reg_val, idx, irq_mask_all = 0;
struct mem_ctl_info *mci;
@@ -626,8 +624,6 @@ static int dmc520_edac_remove(struct platform_device *pdev)
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
-
- return 0;
}
static const struct of_device_id dmc520_edac_driver_id[] = {
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index ac7c9b42d4c7..7221b4bb6df2 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -1462,7 +1462,7 @@ module_init(e752x_init);
module_exit(e752x_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n");
+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman");
MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers");
module_param(force_function_unhide, int, 0444);
diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c
index 497e710fca3d..5852b95fa470 100644
--- a/drivers/edac/e7xxx_edac.c
+++ b/drivers/edac/e7xxx_edac.c
@@ -596,8 +596,7 @@ module_init(e7xxx_init);
module_exit(e7xxx_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
- "Based on.work by Dan Hollis et al");
+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al");
MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c
new file mode 100644
index 000000000000..51c451c7f0f0
--- /dev/null
+++ b/drivers/edac/ecs.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The generic ECS driver is designed to support control of on-die error
+ * check scrub (e.g., DDR5 ECS). The common sysfs ECS interface abstracts
+ * the control of various ECS functionalities into a unified set of functions.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ */
+
+#include <linux/edac.h>
+
+#define EDAC_ECS_FRU_NAME "ecs_fru"
+
+enum edac_ecs_attributes {
+ ECS_LOG_ENTRY_TYPE,
+ ECS_MODE,
+ ECS_RESET,
+ ECS_THRESHOLD,
+ ECS_MAX_ATTRS
+};
+
+struct edac_ecs_dev_attr {
+ struct device_attribute dev_attr;
+ int fru_id;
+};
+
+struct edac_ecs_fru_context {
+ char name[EDAC_FEAT_NAME_LEN];
+ struct edac_ecs_dev_attr dev_attr[ECS_MAX_ATTRS];
+ struct attribute *ecs_attrs[ECS_MAX_ATTRS + 1];
+ struct attribute_group group;
+};
+
+struct edac_ecs_context {
+ u16 num_media_frus;
+ struct edac_ecs_fru_context *fru_ctxs;
+};
+
+#define TO_ECS_DEV_ATTR(_dev_attr) \
+ container_of(_dev_attr, struct edac_ecs_dev_attr, dev_attr)
+
+#define EDAC_ECS_ATTR_SHOW(attrib, cb, type, format) \
+static ssize_t attrib##_show(struct device *ras_feat_dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \
+ dev_attr->fru_id, &data); \
+ if (ret) \
+ return ret; \
+ \
+ return sysfs_emit(buf, format, data); \
+}
+
+EDAC_ECS_ATTR_SHOW(log_entry_type, get_log_entry_type, u32, "%u\n")
+EDAC_ECS_ATTR_SHOW(mode, get_mode, u32, "%u\n")
+EDAC_ECS_ATTR_SHOW(threshold, get_threshold, u32, "%u\n")
+
+#define EDAC_ECS_ATTR_STORE(attrib, cb, type, conv_func) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = conv_func(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \
+ dev_attr->fru_id, data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+EDAC_ECS_ATTR_STORE(log_entry_type, set_log_entry_type, unsigned long, kstrtoul)
+EDAC_ECS_ATTR_STORE(mode, set_mode, unsigned long, kstrtoul)
+EDAC_ECS_ATTR_STORE(reset, reset, unsigned long, kstrtoul)
+EDAC_ECS_ATTR_STORE(threshold, set_threshold, unsigned long, kstrtoul)
+
+static umode_t ecs_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id)
+{
+ struct device *ras_feat_dev = kobj_to_dev(kobj);
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops;
+
+ switch (attr_id) {
+ case ECS_LOG_ENTRY_TYPE:
+ if (ops->get_log_entry_type) {
+ if (ops->set_log_entry_type)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case ECS_MODE:
+ if (ops->get_mode) {
+ if (ops->set_mode)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case ECS_RESET:
+ if (ops->reset)
+ return a->mode;
+ break;
+ case ECS_THRESHOLD:
+ if (ops->get_threshold) {
+ if (ops->set_threshold)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define EDAC_ECS_ATTR_RO(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RO(_name), \
+ .fru_id = _fru_id })
+
+#define EDAC_ECS_ATTR_WO(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_WO(_name), \
+ .fru_id = _fru_id })
+
+#define EDAC_ECS_ATTR_RW(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RW(_name), \
+ .fru_id = _fru_id })
+
+static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group **attr_groups,
+ u16 num_media_frus)
+{
+ struct edac_ecs_context *ecs_ctx;
+ u32 fru;
+
+ ecs_ctx = devm_kzalloc(ecs_dev, sizeof(*ecs_ctx), GFP_KERNEL);
+ if (!ecs_ctx)
+ return -ENOMEM;
+
+ ecs_ctx->num_media_frus = num_media_frus;
+ ecs_ctx->fru_ctxs = devm_kcalloc(ecs_dev, num_media_frus,
+ sizeof(*ecs_ctx->fru_ctxs),
+ GFP_KERNEL);
+ if (!ecs_ctx->fru_ctxs)
+ return -ENOMEM;
+
+ for (fru = 0; fru < num_media_frus; fru++) {
+ struct edac_ecs_fru_context *fru_ctx = &ecs_ctx->fru_ctxs[fru];
+ struct attribute_group *group = &fru_ctx->group;
+ int i;
+
+ fru_ctx->dev_attr[ECS_LOG_ENTRY_TYPE] = EDAC_ECS_ATTR_RW(log_entry_type, fru);
+ fru_ctx->dev_attr[ECS_MODE] = EDAC_ECS_ATTR_RW(mode, fru);
+ fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru);
+ fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru);
+
+ for (i = 0; i < ECS_MAX_ATTRS; i++) {
+ sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr);
+ fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr;
+ }
+
+ sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru);
+ group->name = fru_ctx->name;
+ group->attrs = fru_ctx->ecs_attrs;
+ group->is_visible = ecs_attr_visible;
+
+ attr_groups[fru] = group;
+ }
+
+ return 0;
+}
+
+/**
+ * edac_ecs_get_desc - get EDAC ECS descriptors
+ * @ecs_dev: client device, supports ECS feature
+ * @attr_groups: pointer to attribute group container
+ * @num_media_frus: number of media FRUs in the device
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ */
+int edac_ecs_get_desc(struct device *ecs_dev,
+ const struct attribute_group **attr_groups, u16 num_media_frus)
+{
+ if (!ecs_dev || !attr_groups || !num_media_frus)
+ return -EINVAL;
+
+ return ecs_create_desc(ecs_dev, attr_groups, num_media_frus);
+}
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 0689e1510721..0734909b08a4 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -56,14 +56,12 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
struct edac_device_ctl_info *
edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances,
char *blk_name, unsigned nr_blocks, unsigned off_val,
- struct edac_dev_sysfs_block_attribute *attrib_spec,
- unsigned nr_attrib, int device_index)
+ int device_index)
{
- struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib;
struct edac_device_block *dev_blk, *blk_p, *blk;
struct edac_device_instance *dev_inst, *inst;
struct edac_device_ctl_info *dev_ctl;
- unsigned instance, block, attr;
+ unsigned instance, block;
void *pvt;
int err;
@@ -85,15 +83,6 @@ edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instance
dev_ctl->blocks = dev_blk;
- if (nr_attrib) {
- dev_attrib = kcalloc(nr_attrib, sizeof(struct edac_dev_sysfs_block_attribute),
- GFP_KERNEL);
- if (!dev_attrib)
- goto free;
-
- dev_ctl->attribs = dev_attrib;
- }
-
if (pvt_sz) {
pvt = kzalloc(pvt_sz, GFP_KERNEL);
if (!pvt)
@@ -132,44 +121,6 @@ edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instance
edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n",
instance, inst, block, blk, blk->name);
-
- /* if there are NO attributes OR no attribute pointer
- * then continue on to next block iteration
- */
- if ((nr_attrib == 0) || (attrib_spec == NULL))
- continue;
-
- /* setup the attribute array for this block */
- blk->nr_attribs = nr_attrib;
- attrib_p = &dev_attrib[block*nr_instances*nr_attrib];
- blk->block_attributes = attrib_p;
-
- edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n",
- blk->block_attributes);
-
- /* Initialize every user specified attribute in this
- * block with the data the caller passed in
- * Each block gets its own copy of pointers,
- * and its unique 'value'
- */
- for (attr = 0; attr < nr_attrib; attr++) {
- attrib = &attrib_p[attr];
-
- /* populate the unique per attrib
- * with the code pointers and info
- */
- attrib->attr = attrib_spec[attr].attr;
- attrib->show = attrib_spec[attr].show;
- attrib->store = attrib_spec[attr].store;
-
- attrib->block = blk; /* up link */
-
- edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n",
- attrib, attrib->attr.name,
- &attrib_spec[attr],
- attrib_spec[attr].attr.name
- );
- }
}
}
@@ -619,3 +570,188 @@ void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
block ? block->name : "N/A", count, msg);
}
EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
+
+static void edac_dev_release(struct device *dev)
+{
+ struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev);
+
+ kfree(ctx->mem_repair);
+ kfree(ctx->scrub);
+ kfree(ctx->dev.groups);
+ kfree(ctx);
+}
+
+static const struct device_type edac_dev_type = {
+ .name = "edac_dev",
+ .release = edac_dev_release,
+};
+
+static void edac_dev_unreg(void *data)
+{
+ device_unregister(data);
+}
+
+/**
+ * edac_dev_register - register device for RAS features with EDAC
+ * @parent: parent device.
+ * @name: name for the folder in the /sys/bus/edac/devices/,
+ * which is derived from the parent device.
+ * For e.g. /sys/bus/edac/devices/cxl_mem0/
+ * @private: parent driver's data to store in the context if any.
+ * @num_features: number of RAS features to register.
+ * @ras_features: list of RAS features to register.
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ *
+ */
+int edac_dev_register(struct device *parent, char *name,
+ void *private, int num_features,
+ const struct edac_dev_feature *ras_features)
+{
+ const struct attribute_group **ras_attr_groups;
+ struct edac_dev_data *dev_data;
+ struct edac_dev_feat_ctx *ctx;
+ int mem_repair_cnt = 0;
+ int attr_gcnt = 0;
+ int ret = -ENOMEM;
+ int scrub_cnt = 0;
+ int feat;
+
+ if (!parent || !name || !num_features || !ras_features)
+ return -EINVAL;
+
+ /* Double parse to make space for attributes */
+ for (feat = 0; feat < num_features; feat++) {
+ switch (ras_features[feat].ft_type) {
+ case RAS_FEAT_SCRUB:
+ attr_gcnt++;
+ scrub_cnt++;
+ break;
+ case RAS_FEAT_ECS:
+ attr_gcnt += ras_features[feat].ecs_info.num_media_frus;
+ break;
+ case RAS_FEAT_MEM_REPAIR:
+ attr_gcnt++;
+ mem_repair_cnt++;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL);
+ if (!ras_attr_groups)
+ goto ctx_free;
+
+ if (scrub_cnt) {
+ ctx->scrub = kcalloc(scrub_cnt, sizeof(*ctx->scrub), GFP_KERNEL);
+ if (!ctx->scrub)
+ goto groups_free;
+ }
+
+ if (mem_repair_cnt) {
+ ctx->mem_repair = kcalloc(mem_repair_cnt, sizeof(*ctx->mem_repair), GFP_KERNEL);
+ if (!ctx->mem_repair)
+ goto data_mem_free;
+ }
+
+ attr_gcnt = 0;
+ scrub_cnt = 0;
+ mem_repair_cnt = 0;
+ for (feat = 0; feat < num_features; feat++, ras_features++) {
+ switch (ras_features->ft_type) {
+ case RAS_FEAT_SCRUB:
+ if (!ras_features->scrub_ops || scrub_cnt != ras_features->instance) {
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+
+ dev_data = &ctx->scrub[scrub_cnt];
+ dev_data->instance = scrub_cnt;
+ dev_data->scrub_ops = ras_features->scrub_ops;
+ dev_data->private = ras_features->ctx;
+ ret = edac_scrub_get_desc(parent, &ras_attr_groups[attr_gcnt],
+ ras_features->instance);
+ if (ret)
+ goto data_mem_free;
+
+ scrub_cnt++;
+ attr_gcnt++;
+ break;
+ case RAS_FEAT_ECS:
+ if (!ras_features->ecs_ops) {
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+
+ dev_data = &ctx->ecs;
+ dev_data->ecs_ops = ras_features->ecs_ops;
+ dev_data->private = ras_features->ctx;
+ ret = edac_ecs_get_desc(parent, &ras_attr_groups[attr_gcnt],
+ ras_features->ecs_info.num_media_frus);
+ if (ret)
+ goto data_mem_free;
+
+ attr_gcnt += ras_features->ecs_info.num_media_frus;
+ break;
+ case RAS_FEAT_MEM_REPAIR:
+ if (!ras_features->mem_repair_ops ||
+ mem_repair_cnt != ras_features->instance) {
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+
+ dev_data = &ctx->mem_repair[mem_repair_cnt];
+ dev_data->instance = mem_repair_cnt;
+ dev_data->mem_repair_ops = ras_features->mem_repair_ops;
+ dev_data->private = ras_features->ctx;
+ ret = edac_mem_repair_get_desc(parent, &ras_attr_groups[attr_gcnt],
+ ras_features->instance);
+ if (ret)
+ goto data_mem_free;
+
+ mem_repair_cnt++;
+ attr_gcnt++;
+ break;
+ default:
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+ }
+
+ ctx->dev.parent = parent;
+ ctx->dev.bus = edac_get_sysfs_subsys();
+ ctx->dev.type = &edac_dev_type;
+ ctx->dev.groups = ras_attr_groups;
+ ctx->private = private;
+ dev_set_drvdata(&ctx->dev, ctx);
+
+ ret = dev_set_name(&ctx->dev, "%s", name);
+ if (ret)
+ goto data_mem_free;
+
+ ret = device_register(&ctx->dev);
+ if (ret) {
+ put_device(&ctx->dev);
+ return ret;
+ }
+
+ return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev);
+
+data_mem_free:
+ kfree(ctx->mem_repair);
+ kfree(ctx->scrub);
+groups_free:
+ kfree(ras_attr_groups);
+ctx_free:
+ kfree(ctx);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(edac_dev_register);
diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
index 3f44e6b9d387..034711d71ebf 100644
--- a/drivers/edac/edac_device.h
+++ b/drivers/edac/edac_device.h
@@ -22,7 +22,6 @@
#ifndef _EDAC_DEVICE_H_
#define _EDAC_DEVICE_H_
-#include <linux/completion.h>
#include <linux/device.h>
#include <linux/edac.h>
#include <linux/kobject.h>
@@ -95,22 +94,13 @@ struct edac_dev_sysfs_attribute {
*
* used in leaf 'block' nodes for adding controls/attributes
*
- * each block in each instance of the containing control structure
- * can have an array of the following. The show and store functions
- * will be filled in with the show/store function in the
- * low level driver.
- *
- * The 'value' field will be the actual value field used for
- * counting
+ * each block in each instance of the containing control structure can
+ * have an array of the following. The show function will be filled in
+ * with the show function in the low level driver.
*/
struct edac_dev_sysfs_block_attribute {
struct attribute attr;
ssize_t (*show)(struct kobject *, struct attribute *, char *);
- ssize_t (*store)(struct kobject *, struct attribute *,
- const char *, size_t);
- struct edac_device_block *block;
-
- unsigned int value;
};
/* device block control structure */
@@ -176,7 +166,7 @@ struct edac_device_ctl_info {
struct edac_dev_sysfs_attribute *sysfs_attributes;
/* pointer to main 'edac' subsys in sysfs */
- struct bus_type *edac_subsys;
+ const struct bus_type *edac_subsys;
/* the internal state of this controller instance */
int op_state;
@@ -200,8 +190,6 @@ struct edac_device_ctl_info {
unsigned long start_time; /* edac_device load start time (jiffies) */
- struct completion removal_complete;
-
/* sysfs top name under 'edac' directory
* and instance name:
* cpu/cpu0/...
@@ -217,7 +205,6 @@ struct edac_device_ctl_info {
u32 nr_instances;
struct edac_device_instance *instances;
struct edac_device_block *blocks;
- struct edac_dev_sysfs_block_attribute *attribs;
/* Event counters for the this whole EDAC Device */
struct edac_device_counter counters;
@@ -245,8 +232,6 @@ extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
char *edac_device_name, unsigned nr_instances,
char *edac_block_name, unsigned nr_blocks,
unsigned offset_value,
- struct edac_dev_sysfs_block_attribute *block_attributes,
- unsigned nr_attribs,
int device_index);
/* The offset value can be:
@@ -356,7 +341,6 @@ static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci)
{
if (ci) {
kfree(ci->pvt_info);
- kfree(ci->attribs);
kfree(ci->blocks);
kfree(ci->instances);
kfree(ci);
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index ac678b4a21fc..fcebc4ffea26 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -228,8 +228,9 @@ static struct kobj_type ktype_device_ctrl = {
*/
int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
{
- struct bus_type *edac_subsys;
- int err;
+ struct device *dev_root;
+ const struct bus_type *edac_subsys;
+ int err = -ENODEV;
edac_dbg(1, "\n");
@@ -247,15 +248,16 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
*/
edac_dev->owner = THIS_MODULE;
- if (!try_module_get(edac_dev->owner)) {
- err = -ENODEV;
+ if (!try_module_get(edac_dev->owner))
goto err_out;
- }
/* register */
- err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl,
- &edac_subsys->dev_root->kobj,
- "%s", edac_dev->name);
+ dev_root = bus_get_dev_root(edac_subsys);
+ if (dev_root) {
+ err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl,
+ &dev_root->kobj, "%s", edac_dev->name);
+ put_device(dev_root);
+ }
if (err) {
edac_dbg(1, "Failed to register '.../edac/%s'\n",
edac_dev->name);
@@ -455,35 +457,19 @@ static ssize_t edac_dev_block_show(struct kobject *kobj,
return -EIO;
}
-/* Function to 'store' fields into the edac_dev 'block' structure */
-static ssize_t edac_dev_block_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct edac_dev_sysfs_block_attribute *block_attr;
-
- block_attr = to_block_attr(attr);
-
- if (block_attr->store)
- return block_attr->store(kobj, attr, buffer, count);
- return -EIO;
-}
-
/* edac_dev file operations for a 'block' */
static const struct sysfs_ops device_block_ops = {
.show = edac_dev_block_show,
- .store = edac_dev_block_store
};
-#define BLOCK_ATTR(_name,_mode,_show,_store) \
+#define BLOCK_ATTR(_name,_mode,_show) \
static struct edac_dev_sysfs_block_attribute attr_block_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
- .store = _store, \
};
-BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL);
-BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL);
+BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show);
+BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show);
/* list of edac_dev 'block' attributes */
static struct attribute *device_block_attrs[] = {
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 6faeb2ab3960..0959320fe51c 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -166,6 +166,7 @@ const char * const edac_mem_types[] = {
[MEM_NVDIMM] = "Non-volatile-RAM",
[MEM_WIO2] = "Wide-IO-2",
[MEM_HBM2] = "High-bandwidth-memory-Gen2",
+ [MEM_HBM3] = "High-bandwidth-memory-Gen3",
};
EXPORT_SYMBOL_GPL(edac_mem_types);
@@ -213,7 +214,7 @@ static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
unsigned int row, chn;
/*
- * Alocate and fill the csrow/channels structs
+ * Allocate and fill the csrow/channels structs
*/
mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
if (!mci->csrows)
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 15f63452a9be..091cc6aae8a9 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -115,378 +115,6 @@ static const char * const edac_caps[] = {
[EDAC_S16ECD16ED] = "S16ECD16ED"
};
-#ifdef CONFIG_EDAC_LEGACY_SYSFS
-/*
- * EDAC sysfs CSROW data structures and methods
- */
-
-#define to_csrow(k) container_of(k, struct csrow_info, dev)
-
-/*
- * We need it to avoid namespace conflicts between the legacy API
- * and the per-dimm/per-rank one
- */
-#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \
- static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store)
-
-struct dev_ch_attribute {
- struct device_attribute attr;
- unsigned int channel;
-};
-
-#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \
- static struct dev_ch_attribute dev_attr_legacy_##_name = \
- { __ATTR(_name, _mode, _show, _store), (_var) }
-
-#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel)
-
-/* Set of more default csrow<id> attribute show/store functions */
-static ssize_t csrow_ue_count_show(struct device *dev,
- struct device_attribute *mattr, char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
-
- return sprintf(data, "%u\n", csrow->ue_count);
-}
-
-static ssize_t csrow_ce_count_show(struct device *dev,
- struct device_attribute *mattr, char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
-
- return sprintf(data, "%u\n", csrow->ce_count);
-}
-
-static ssize_t csrow_size_show(struct device *dev,
- struct device_attribute *mattr, char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
- int i;
- u32 nr_pages = 0;
-
- for (i = 0; i < csrow->nr_channels; i++)
- nr_pages += csrow->channels[i]->dimm->nr_pages;
- return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
-}
-
-static ssize_t csrow_mem_type_show(struct device *dev,
- struct device_attribute *mattr, char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
-
- return sprintf(data, "%s\n", edac_mem_types[csrow->channels[0]->dimm->mtype]);
-}
-
-static ssize_t csrow_dev_type_show(struct device *dev,
- struct device_attribute *mattr, char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
-
- return sprintf(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]);
-}
-
-static ssize_t csrow_edac_mode_show(struct device *dev,
- struct device_attribute *mattr,
- char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
-
- return sprintf(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]);
-}
-
-/* show/store functions for DIMM Label attributes */
-static ssize_t channel_dimm_label_show(struct device *dev,
- struct device_attribute *mattr,
- char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
- unsigned int chan = to_channel(mattr);
- struct rank_info *rank = csrow->channels[chan];
-
- /* if field has not been initialized, there is nothing to send */
- if (!rank->dimm->label[0])
- return 0;
-
- return snprintf(data, sizeof(rank->dimm->label) + 1, "%s\n",
- rank->dimm->label);
-}
-
-static ssize_t channel_dimm_label_store(struct device *dev,
- struct device_attribute *mattr,
- const char *data, size_t count)
-{
- struct csrow_info *csrow = to_csrow(dev);
- unsigned int chan = to_channel(mattr);
- struct rank_info *rank = csrow->channels[chan];
- size_t copy_count = count;
-
- if (count == 0)
- return -EINVAL;
-
- if (data[count - 1] == '\0' || data[count - 1] == '\n')
- copy_count -= 1;
-
- if (copy_count == 0 || copy_count >= sizeof(rank->dimm->label))
- return -EINVAL;
-
- strncpy(rank->dimm->label, data, copy_count);
- rank->dimm->label[copy_count] = '\0';
-
- return count;
-}
-
-/* show function for dynamic chX_ce_count attribute */
-static ssize_t channel_ce_count_show(struct device *dev,
- struct device_attribute *mattr, char *data)
-{
- struct csrow_info *csrow = to_csrow(dev);
- unsigned int chan = to_channel(mattr);
- struct rank_info *rank = csrow->channels[chan];
-
- return sprintf(data, "%u\n", rank->ce_count);
-}
-
-/* cwrow<id>/attribute files */
-DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL);
-DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL);
-DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL);
-DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL);
-DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL);
-DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL);
-
-/* default attributes of the CSROW<id> object */
-static struct attribute *csrow_attrs[] = {
- &dev_attr_legacy_dev_type.attr,
- &dev_attr_legacy_mem_type.attr,
- &dev_attr_legacy_edac_mode.attr,
- &dev_attr_legacy_size_mb.attr,
- &dev_attr_legacy_ue_count.attr,
- &dev_attr_legacy_ce_count.attr,
- NULL,
-};
-
-static const struct attribute_group csrow_attr_grp = {
- .attrs = csrow_attrs,
-};
-
-static const struct attribute_group *csrow_attr_groups[] = {
- &csrow_attr_grp,
- NULL
-};
-
-static const struct device_type csrow_attr_type = {
- .groups = csrow_attr_groups,
-};
-
-/*
- * possible dynamic channel DIMM Label attribute files
- *
- */
-DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 0);
-DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 1);
-DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 2);
-DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 3);
-DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 4);
-DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 5);
-DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 6);
-DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 7);
-DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 8);
-DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 9);
-DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 10);
-DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
- channel_dimm_label_show, channel_dimm_label_store, 11);
-
-/* Total possible dynamic DIMM Label attribute file table */
-static struct attribute *dynamic_csrow_dimm_attr[] = {
- &dev_attr_legacy_ch0_dimm_label.attr.attr,
- &dev_attr_legacy_ch1_dimm_label.attr.attr,
- &dev_attr_legacy_ch2_dimm_label.attr.attr,
- &dev_attr_legacy_ch3_dimm_label.attr.attr,
- &dev_attr_legacy_ch4_dimm_label.attr.attr,
- &dev_attr_legacy_ch5_dimm_label.attr.attr,
- &dev_attr_legacy_ch6_dimm_label.attr.attr,
- &dev_attr_legacy_ch7_dimm_label.attr.attr,
- &dev_attr_legacy_ch8_dimm_label.attr.attr,
- &dev_attr_legacy_ch9_dimm_label.attr.attr,
- &dev_attr_legacy_ch10_dimm_label.attr.attr,
- &dev_attr_legacy_ch11_dimm_label.attr.attr,
- NULL
-};
-
-/* possible dynamic channel ce_count attribute files */
-DEVICE_CHANNEL(ch0_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 0);
-DEVICE_CHANNEL(ch1_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 1);
-DEVICE_CHANNEL(ch2_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 2);
-DEVICE_CHANNEL(ch3_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 3);
-DEVICE_CHANNEL(ch4_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 4);
-DEVICE_CHANNEL(ch5_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 5);
-DEVICE_CHANNEL(ch6_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 6);
-DEVICE_CHANNEL(ch7_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 7);
-DEVICE_CHANNEL(ch8_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 8);
-DEVICE_CHANNEL(ch9_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 9);
-DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 10);
-DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
- channel_ce_count_show, NULL, 11);
-
-/* Total possible dynamic ce_count attribute file table */
-static struct attribute *dynamic_csrow_ce_count_attr[] = {
- &dev_attr_legacy_ch0_ce_count.attr.attr,
- &dev_attr_legacy_ch1_ce_count.attr.attr,
- &dev_attr_legacy_ch2_ce_count.attr.attr,
- &dev_attr_legacy_ch3_ce_count.attr.attr,
- &dev_attr_legacy_ch4_ce_count.attr.attr,
- &dev_attr_legacy_ch5_ce_count.attr.attr,
- &dev_attr_legacy_ch6_ce_count.attr.attr,
- &dev_attr_legacy_ch7_ce_count.attr.attr,
- &dev_attr_legacy_ch8_ce_count.attr.attr,
- &dev_attr_legacy_ch9_ce_count.attr.attr,
- &dev_attr_legacy_ch10_ce_count.attr.attr,
- &dev_attr_legacy_ch11_ce_count.attr.attr,
- NULL
-};
-
-static umode_t csrow_dev_is_visible(struct kobject *kobj,
- struct attribute *attr, int idx)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
-
- if (idx >= csrow->nr_channels)
- return 0;
-
- if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) {
- WARN_ONCE(1, "idx: %d\n", idx);
- return 0;
- }
-
- /* Only expose populated DIMMs */
- if (!csrow->channels[idx]->dimm->nr_pages)
- return 0;
-
- return attr->mode;
-}
-
-
-static const struct attribute_group csrow_dev_dimm_group = {
- .attrs = dynamic_csrow_dimm_attr,
- .is_visible = csrow_dev_is_visible,
-};
-
-static const struct attribute_group csrow_dev_ce_count_group = {
- .attrs = dynamic_csrow_ce_count_attr,
- .is_visible = csrow_dev_is_visible,
-};
-
-static const struct attribute_group *csrow_dev_groups[] = {
- &csrow_dev_dimm_group,
- &csrow_dev_ce_count_group,
- NULL
-};
-
-static void csrow_release(struct device *dev)
-{
- /*
- * Nothing to do, just unregister sysfs here. The mci
- * device owns the data and will also release it.
- */
-}
-
-static inline int nr_pages_per_csrow(struct csrow_info *csrow)
-{
- int chan, nr_pages = 0;
-
- for (chan = 0; chan < csrow->nr_channels; chan++)
- nr_pages += csrow->channels[chan]->dimm->nr_pages;
-
- return nr_pages;
-}
-
-/* Create a CSROW object under specifed edac_mc_device */
-static int edac_create_csrow_object(struct mem_ctl_info *mci,
- struct csrow_info *csrow, int index)
-{
- int err;
-
- csrow->dev.type = &csrow_attr_type;
- csrow->dev.groups = csrow_dev_groups;
- csrow->dev.release = csrow_release;
- device_initialize(&csrow->dev);
- csrow->dev.parent = &mci->dev;
- csrow->mci = mci;
- dev_set_name(&csrow->dev, "csrow%d", index);
- dev_set_drvdata(&csrow->dev, csrow);
-
- err = device_add(&csrow->dev);
- if (err) {
- edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev));
- put_device(&csrow->dev);
- return err;
- }
-
- edac_dbg(0, "device %s created\n", dev_name(&csrow->dev));
-
- return 0;
-}
-
-/* Create a CSROW object under specifed edac_mc_device */
-static int edac_create_csrow_objects(struct mem_ctl_info *mci)
-{
- int err, i;
- struct csrow_info *csrow;
-
- for (i = 0; i < mci->nr_csrows; i++) {
- csrow = mci->csrows[i];
- if (!nr_pages_per_csrow(csrow))
- continue;
- err = edac_create_csrow_object(mci, mci->csrows[i], i);
- if (err < 0)
- goto error;
- }
- return 0;
-
-error:
- for (--i; i >= 0; i--) {
- if (device_is_registered(&mci->csrows[i]->dev))
- device_unregister(&mci->csrows[i]->dev);
- }
-
- return err;
-}
-
-static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
-{
- int i;
-
- for (i = 0; i < mci->nr_csrows; i++) {
- if (device_is_registered(&mci->csrows[i]->dev))
- device_unregister(&mci->csrows[i]->dev);
- }
-}
-
-#endif
-
/*
* Per-dimm (or per-rank) devices
*/
@@ -515,7 +143,7 @@ static ssize_t dimmdev_label_show(struct device *dev,
if (!dimm->label[0])
return 0;
- return snprintf(data, sizeof(dimm->label) + 1, "%s\n", dimm->label);
+ return sysfs_emit(data, "%s\n", dimm->label);
}
static ssize_t dimmdev_label_store(struct device *dev,
@@ -535,7 +163,7 @@ static ssize_t dimmdev_label_store(struct device *dev,
if (copy_count == 0 || copy_count >= sizeof(dimm->label))
return -EINVAL;
- strncpy(dimm->label, data, copy_count);
+ memcpy(dimm->label, data, copy_count);
dimm->label[copy_count] = '\0';
return count;
@@ -546,7 +174,7 @@ static ssize_t dimmdev_size_show(struct device *dev,
{
struct dimm_info *dimm = to_dimm(dev);
- return sprintf(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages));
+ return sysfs_emit(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages));
}
static ssize_t dimmdev_mem_type_show(struct device *dev,
@@ -554,7 +182,7 @@ static ssize_t dimmdev_mem_type_show(struct device *dev,
{
struct dimm_info *dimm = to_dimm(dev);
- return sprintf(data, "%s\n", edac_mem_types[dimm->mtype]);
+ return sysfs_emit(data, "%s\n", edac_mem_types[dimm->mtype]);
}
static ssize_t dimmdev_dev_type_show(struct device *dev,
@@ -562,7 +190,7 @@ static ssize_t dimmdev_dev_type_show(struct device *dev,
{
struct dimm_info *dimm = to_dimm(dev);
- return sprintf(data, "%s\n", dev_types[dimm->dtype]);
+ return sysfs_emit(data, "%s\n", dev_types[dimm->dtype]);
}
static ssize_t dimmdev_edac_mode_show(struct device *dev,
@@ -571,7 +199,7 @@ static ssize_t dimmdev_edac_mode_show(struct device *dev,
{
struct dimm_info *dimm = to_dimm(dev);
- return sprintf(data, "%s\n", edac_caps[dimm->edac_mode]);
+ return sysfs_emit(data, "%s\n", edac_caps[dimm->edac_mode]);
}
static ssize_t dimmdev_ce_count_show(struct device *dev,
@@ -580,7 +208,7 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
{
struct dimm_info *dimm = to_dimm(dev);
- return sprintf(data, "%u\n", dimm->ce_count);
+ return sysfs_emit(data, "%u\n", dimm->ce_count);
}
static ssize_t dimmdev_ue_count_show(struct device *dev,
@@ -589,7 +217,7 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
{
struct dimm_info *dimm = to_dimm(dev);
- return sprintf(data, "%u\n", dimm->ue_count);
+ return sysfs_emit(data, "%u\n", dimm->ue_count);
}
/* dimm/rank attribute files */
@@ -637,7 +265,7 @@ static void dimm_release(struct device *dev)
*/
}
-/* Create a DIMM object under specifed memory controller device */
+/* Create a DIMM object under specified memory controller device */
static int edac_create_dimm_object(struct mem_ctl_info *mci,
struct dimm_info *dimm)
{
@@ -758,7 +386,7 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev,
return bandwidth;
}
- return sprintf(data, "%d\n", bandwidth);
+ return sysfs_emit(data, "%d\n", bandwidth);
}
/* default attribute files for the MCI object */
@@ -768,7 +396,7 @@ static ssize_t mci_ue_count_show(struct device *dev,
{
struct mem_ctl_info *mci = to_mci(dev);
- return sprintf(data, "%u\n", mci->ue_mc);
+ return sysfs_emit(data, "%u\n", mci->ue_mc);
}
static ssize_t mci_ce_count_show(struct device *dev,
@@ -777,7 +405,7 @@ static ssize_t mci_ce_count_show(struct device *dev,
{
struct mem_ctl_info *mci = to_mci(dev);
- return sprintf(data, "%u\n", mci->ce_mc);
+ return sysfs_emit(data, "%u\n", mci->ce_mc);
}
static ssize_t mci_ce_noinfo_show(struct device *dev,
@@ -786,7 +414,7 @@ static ssize_t mci_ce_noinfo_show(struct device *dev,
{
struct mem_ctl_info *mci = to_mci(dev);
- return sprintf(data, "%u\n", mci->ce_noinfo_count);
+ return sysfs_emit(data, "%u\n", mci->ce_noinfo_count);
}
static ssize_t mci_ue_noinfo_show(struct device *dev,
@@ -795,7 +423,7 @@ static ssize_t mci_ue_noinfo_show(struct device *dev,
{
struct mem_ctl_info *mci = to_mci(dev);
- return sprintf(data, "%u\n", mci->ue_noinfo_count);
+ return sysfs_emit(data, "%u\n", mci->ue_noinfo_count);
}
static ssize_t mci_seconds_show(struct device *dev,
@@ -804,7 +432,7 @@ static ssize_t mci_seconds_show(struct device *dev,
{
struct mem_ctl_info *mci = to_mci(dev);
- return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ);
+ return sysfs_emit(data, "%ld\n", (jiffies - mci->start_time) / HZ);
}
static ssize_t mci_ctl_name_show(struct device *dev,
@@ -813,7 +441,7 @@ static ssize_t mci_ctl_name_show(struct device *dev,
{
struct mem_ctl_info *mci = to_mci(dev);
- return sprintf(data, "%s\n", mci->ctl_name);
+ return sysfs_emit(data, "%s\n", mci->ctl_name);
}
static ssize_t mci_size_mb_show(struct device *dev,
@@ -833,7 +461,7 @@ static ssize_t mci_size_mb_show(struct device *dev,
}
}
- return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages));
+ return sysfs_emit(data, "%u\n", PAGES_TO_MiB(total_pages));
}
static ssize_t mci_max_location_show(struct device *dev,
@@ -966,12 +594,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
goto fail;
}
-#ifdef CONFIG_EDAC_LEGACY_SYSFS
- err = edac_create_csrow_objects(mci);
- if (err < 0)
- goto fail;
-#endif
-
edac_create_debugfs_nodes(mci);
return 0;
@@ -996,9 +618,6 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
#ifdef CONFIG_EDAC_DEBUG
edac_debugfs_remove_recursive(mci->debugfs);
#endif
-#ifdef CONFIG_EDAC_LEGACY_SYSFS
- edac_delete_csrow_objects(mci);
-#endif
mci_for_each_dimm(mci, dimm) {
if (!device_is_registered(&dimm->dev))
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 32a931d0cb71..1c9f62382666 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -67,7 +67,7 @@ char *edac_op_state_to_string(int opstate)
* sysfs object: /sys/devices/system/edac
* need to export to other files
*/
-static struct bus_type edac_subsys = {
+static const struct bus_type edac_subsys = {
.name = "edac",
.dev_name = "edac",
};
@@ -90,7 +90,7 @@ static void edac_subsys_exit(void)
}
/* return pointer to the 'edac' node in sysfs */
-struct bus_type *edac_get_sysfs_subsys(void)
+const struct bus_type *edac_get_sysfs_subsys(void)
{
return &edac_subsys;
}
diff --git a/drivers/edac/edac_pci.h b/drivers/edac/edac_pci.h
index 5175f5724cfa..3f47cd9b2b03 100644
--- a/drivers/edac/edac_pci.h
+++ b/drivers/edac/edac_pci.h
@@ -22,7 +22,6 @@
#ifndef _EDAC_PCI_H_
#define _EDAC_PCI_H_
-#include <linux/completion.h>
#include <linux/device.h>
#include <linux/edac.h>
#include <linux/kobject.h>
@@ -48,8 +47,6 @@ struct edac_pci_ctl_info {
int pci_idx;
- struct bus_type *edac_subsys; /* pointer to subsystem */
-
/* the internal state of this controller instance */
int op_state;
/* work struct for this instance */
@@ -72,8 +69,6 @@ struct edac_pci_ctl_info {
unsigned long start_time; /* edac_pci load start time (jiffies) */
- struct completion complete;
-
/* sysfs top name under 'edac' directory
* and instance name:
* cpu/cpu0/...
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 888d5728ecef..7b44afcf48db 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -337,8 +337,9 @@ static struct kobj_type ktype_edac_pci_main_kobj = {
*/
static int edac_pci_main_kobj_setup(void)
{
- int err;
- struct bus_type *edac_subsys;
+ int err = -ENODEV;
+ const struct bus_type *edac_subsys;
+ struct device *dev_root;
edac_dbg(0, "\n");
@@ -357,7 +358,6 @@ static int edac_pci_main_kobj_setup(void)
*/
if (!try_module_get(THIS_MODULE)) {
edac_dbg(1, "try_module_get() failed\n");
- err = -ENODEV;
goto decrement_count_fail;
}
@@ -369,9 +369,13 @@ static int edac_pci_main_kobj_setup(void)
}
/* Instanstiate the pci object */
- err = kobject_init_and_add(edac_pci_top_main_kobj,
- &ktype_edac_pci_main_kobj,
- &edac_subsys->dev_root->kobj, "pci");
+ dev_root = bus_get_dev_root(edac_subsys);
+ if (dev_root) {
+ err = kobject_init_and_add(edac_pci_top_main_kobj,
+ &ktype_edac_pci_main_kobj,
+ &dev_root->kobj, "pci");
+ put_device(dev_root);
+ }
if (err) {
edac_dbg(1, "Failed to register '.../edac/pci'\n");
goto kobject_init_and_add_fail;
@@ -517,7 +521,7 @@ static void edac_pci_dev_parity_clear(struct pci_dev *dev)
/* read the device TYPE, looking for bridges */
pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
- if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
+ if ((header_type & PCI_HEADER_TYPE_MASK) == PCI_HEADER_TYPE_BRIDGE)
get_pci_parity_status(dev, 1);
}
@@ -579,7 +583,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
edac_dbg(4, "PCI HEADER TYPE= 0x%02x %s\n",
header_type, dev_name(&dev->dev));
- if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+ if ((header_type & PCI_HEADER_TYPE_MASK) == PCI_HEADER_TYPE_BRIDGE) {
/* On bridges, need to examine secondary status register */
status = get_pci_parity_status(dev, 1);
diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c
index ac2102b25706..e4eaec0aa81d 100644
--- a/drivers/edac/fsl_ddr_edac.c
+++ b/drivers/edac/fsl_ddr_edac.c
@@ -22,8 +22,7 @@
#include <linux/smp.h>
#include <linux/gfp.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include "edac_module.h"
#include "fsl_ddr_edac.h"
@@ -32,18 +31,30 @@
static int edac_mc_idx;
-static u32 orig_ddr_err_disable;
-static u32 orig_ddr_err_sbe;
-static bool little_endian;
+static inline void __iomem *ddr_reg_addr(struct fsl_mc_pdata *pdata, unsigned int off)
+{
+ if (pdata->flag == TYPE_IMX9 && off >= FSL_MC_DATA_ERR_INJECT_HI && off <= FSL_MC_ERR_SBE)
+ return pdata->inject_vbase + off - FSL_MC_DATA_ERR_INJECT_HI
+ + IMX9_MC_DATA_ERR_INJECT_OFF;
+
+ if (pdata->flag == TYPE_IMX9 && off >= IMX9_MC_ERR_EN)
+ return pdata->inject_vbase + off - IMX9_MC_ERR_EN;
-static inline u32 ddr_in32(void __iomem *addr)
+ return pdata->mc_vbase + off;
+}
+
+static inline u32 ddr_in32(struct fsl_mc_pdata *pdata, unsigned int off)
{
- return little_endian ? ioread32(addr) : ioread32be(addr);
+ void __iomem *addr = ddr_reg_addr(pdata, off);
+
+ return pdata->little_endian ? ioread32(addr) : ioread32be(addr);
}
-static inline void ddr_out32(void __iomem *addr, u32 value)
+static inline void ddr_out32(struct fsl_mc_pdata *pdata, unsigned int off, u32 value)
{
- if (little_endian)
+ void __iomem *addr = ddr_reg_addr(pdata, off);
+
+ if (pdata->little_endian)
iowrite32(value, addr);
else
iowrite32be(value, addr);
@@ -61,7 +72,7 @@ static ssize_t fsl_mc_inject_data_hi_show(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev);
struct fsl_mc_pdata *pdata = mci->pvt_info;
return sprintf(data, "0x%08x",
- ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI));
+ ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_HI));
}
static ssize_t fsl_mc_inject_data_lo_show(struct device *dev,
@@ -71,7 +82,7 @@ static ssize_t fsl_mc_inject_data_lo_show(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev);
struct fsl_mc_pdata *pdata = mci->pvt_info;
return sprintf(data, "0x%08x",
- ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO));
+ ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_LO));
}
static ssize_t fsl_mc_inject_ctrl_show(struct device *dev,
@@ -81,7 +92,7 @@ static ssize_t fsl_mc_inject_ctrl_show(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev);
struct fsl_mc_pdata *pdata = mci->pvt_info;
return sprintf(data, "0x%08x",
- ddr_in32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT));
+ ddr_in32(pdata, FSL_MC_ECC_ERR_INJECT));
}
static ssize_t fsl_mc_inject_data_hi_store(struct device *dev,
@@ -98,7 +109,7 @@ static ssize_t fsl_mc_inject_data_hi_store(struct device *dev,
if (rc)
return rc;
- ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI, val);
+ ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_HI, val);
return count;
}
return 0;
@@ -118,7 +129,7 @@ static ssize_t fsl_mc_inject_data_lo_store(struct device *dev,
if (rc)
return rc;
- ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO, val);
+ ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_LO, val);
return count;
}
return 0;
@@ -138,7 +149,7 @@ static ssize_t fsl_mc_inject_ctrl_store(struct device *dev,
if (rc)
return rc;
- ddr_out32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT, val);
+ ddr_out32(pdata, FSL_MC_ECC_ERR_INJECT, val);
return count;
}
return 0;
@@ -287,7 +298,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci)
int bad_data_bit;
int bad_ecc_bit;
- err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT);
+ err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT);
if (!err_detect)
return;
@@ -296,14 +307,14 @@ static void fsl_mc_check(struct mem_ctl_info *mci)
/* no more processing if not ECC bit errors */
if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) {
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect);
+ ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect);
return;
}
- syndrome = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ECC);
+ syndrome = ddr_in32(pdata, FSL_MC_CAPTURE_ECC);
/* Mask off appropriate bits of syndrome based on bus width */
- bus_width = (ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG) &
+ bus_width = (ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG) &
DSC_DBW_MASK) ? 32 : 64;
if (bus_width == 64)
syndrome &= 0xff;
@@ -311,8 +322,8 @@ static void fsl_mc_check(struct mem_ctl_info *mci)
syndrome &= 0xffff;
err_addr = make64(
- ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_EXT_ADDRESS),
- ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ADDRESS));
+ ddr_in32(pdata, FSL_MC_CAPTURE_EXT_ADDRESS),
+ ddr_in32(pdata, FSL_MC_CAPTURE_ADDRESS));
pfn = err_addr >> PAGE_SHIFT;
for (row_index = 0; row_index < mci->nr_csrows; row_index++) {
@@ -321,29 +332,33 @@ static void fsl_mc_check(struct mem_ctl_info *mci)
break;
}
- cap_high = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_HI);
- cap_low = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_LO);
+ cap_high = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_HI);
+ cap_low = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_LO);
/*
* Analyze single-bit errors on 64-bit wide buses
* TODO: Add support for 32-bit wide buses
*/
if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) {
+ u64 cap = (u64)cap_high << 32 | cap_low;
+ u32 s = syndrome;
+
sbe_ecc_decode(cap_high, cap_low, syndrome,
&bad_data_bit, &bad_ecc_bit);
- if (bad_data_bit != -1)
- fsl_mc_printk(mci, KERN_ERR,
- "Faulty Data bit: %d\n", bad_data_bit);
- if (bad_ecc_bit != -1)
- fsl_mc_printk(mci, KERN_ERR,
- "Faulty ECC bit: %d\n", bad_ecc_bit);
+ if (bad_data_bit >= 0) {
+ fsl_mc_printk(mci, KERN_ERR, "Faulty Data bit: %d\n", bad_data_bit);
+ cap ^= 1ULL << bad_data_bit;
+ }
+
+ if (bad_ecc_bit >= 0) {
+ fsl_mc_printk(mci, KERN_ERR, "Faulty ECC bit: %d\n", bad_ecc_bit);
+ s ^= 1 << bad_ecc_bit;
+ }
fsl_mc_printk(mci, KERN_ERR,
"Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
- cap_high ^ (1 << (bad_data_bit - 32)),
- cap_low ^ (1 << bad_data_bit),
- syndrome ^ (1 << bad_ecc_bit));
+ upper_32_bits(cap), lower_32_bits(cap), s);
}
fsl_mc_printk(mci, KERN_ERR,
@@ -368,7 +383,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci)
row_index, 0, -1,
mci->ctl_name, "");
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect);
+ ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect);
}
static irqreturn_t fsl_mc_isr(int irq, void *dev_id)
@@ -377,7 +392,7 @@ static irqreturn_t fsl_mc_isr(int irq, void *dev_id)
struct fsl_mc_pdata *pdata = mci->pvt_info;
u32 err_detect;
- err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT);
+ err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT);
if (!err_detect)
return IRQ_NONE;
@@ -397,7 +412,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci)
u32 cs_bnds;
int index;
- sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG);
+ sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG);
sdtype = sdram_ctl & DSC_SDTYPE_MASK;
if (sdram_ctl & DSC_RD_EN) {
@@ -432,6 +447,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci)
case 0x05000000:
mtype = MEM_DDR4;
break;
+ case 0x04000000:
+ mtype = MEM_LPDDR4;
+ break;
default:
mtype = MEM_UNKNOWN;
break;
@@ -445,7 +463,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci)
csrow = mci->csrows[index];
dimm = csrow->channels[0]->dimm;
- cs_bnds = ddr_in32(pdata->mc_vbase + FSL_MC_CS_BNDS_0 +
+ cs_bnds = ddr_in32(pdata, FSL_MC_CS_BNDS_0 +
(index * FSL_MC_CS_BNDS_OFS));
start = (cs_bnds & 0xffff0000) >> 16;
@@ -465,7 +483,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci)
dimm->grain = 8;
dimm->mtype = mtype;
dimm->dtype = DEV_UNKNOWN;
- if (sdram_ctl & DSC_X32_EN)
+ if (pdata->flag == TYPE_IMX9)
+ dimm->dtype = DEV_X16;
+ else if (sdram_ctl & DSC_X32_EN)
dimm->dtype = DEV_X32;
dimm->edac_mode = EDAC_SECDED;
}
@@ -477,6 +497,7 @@ int fsl_mc_err_probe(struct platform_device *op)
struct edac_mc_layer layers[2];
struct fsl_mc_pdata *pdata;
struct resource r;
+ u32 ecc_en_mask;
u32 sdram_ctl;
int res;
@@ -504,11 +525,13 @@ int fsl_mc_err_probe(struct platform_device *op)
mci->ctl_name = pdata->name;
mci->dev_name = pdata->name;
+ pdata->flag = (unsigned long)device_get_match_data(&op->dev);
+
/*
* Get the endianness of DDR controller registers.
* Default is big endian.
*/
- little_endian = of_property_read_bool(op->dev.of_node, "little-endian");
+ pdata->little_endian = of_property_read_bool(op->dev.of_node, "little-endian");
res = of_address_to_resource(op->dev.of_node, 0, &r);
if (res) {
@@ -532,8 +555,23 @@ int fsl_mc_err_probe(struct platform_device *op)
goto err;
}
- sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG);
- if (!(sdram_ctl & DSC_ECC_EN)) {
+ if (pdata->flag == TYPE_IMX9) {
+ pdata->inject_vbase = devm_platform_ioremap_resource_byname(op, "inject");
+ if (IS_ERR(pdata->inject_vbase)) {
+ res = -ENOMEM;
+ goto err;
+ }
+ }
+
+ if (pdata->flag == TYPE_IMX9) {
+ sdram_ctl = ddr_in32(pdata, IMX9_MC_ERR_EN);
+ ecc_en_mask = ERR_ECC_EN | ERR_INLINE_ECC;
+ } else {
+ sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG);
+ ecc_en_mask = DSC_ECC_EN;
+ }
+
+ if ((sdram_ctl & ecc_en_mask) != ecc_en_mask) {
/* no ECC */
pr_warn("%s: No ECC DIMMs discovered\n", __func__);
res = -ENODEV;
@@ -544,7 +582,8 @@ int fsl_mc_err_probe(struct platform_device *op)
mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR |
MEM_FLAG_DDR2 | MEM_FLAG_RDDR2 |
MEM_FLAG_DDR3 | MEM_FLAG_RDDR3 |
- MEM_FLAG_DDR4 | MEM_FLAG_RDDR4;
+ MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 |
+ MEM_FLAG_LPDDR4;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
@@ -559,11 +598,11 @@ int fsl_mc_err_probe(struct platform_device *op)
fsl_ddr_init_csrows(mci);
/* store the original error disable bits */
- orig_ddr_err_disable = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DISABLE);
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, 0);
+ pdata->orig_ddr_err_disable = ddr_in32(pdata, FSL_MC_ERR_DISABLE);
+ ddr_out32(pdata, FSL_MC_ERR_DISABLE, 0);
/* clear all error bits */
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, ~0);
+ ddr_out32(pdata, FSL_MC_ERR_DETECT, ~0);
res = edac_mc_add_mc_with_groups(mci, fsl_ddr_dev_groups);
if (res) {
@@ -572,15 +611,15 @@ int fsl_mc_err_probe(struct platform_device *op)
}
if (edac_op_state == EDAC_OPSTATE_INT) {
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN,
+ ddr_out32(pdata, FSL_MC_ERR_INT_EN,
DDR_EIE_MBEE | DDR_EIE_SBEE);
/* store the original error management threshold */
- orig_ddr_err_sbe = ddr_in32(pdata->mc_vbase +
- FSL_MC_ERR_SBE) & 0xff0000;
+ pdata->orig_ddr_err_sbe = ddr_in32(pdata,
+ FSL_MC_ERR_SBE) & 0xff0000;
/* set threshold to 1 error per interrupt */
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, 0x10000);
+ ddr_out32(pdata, FSL_MC_ERR_SBE, 0x10000);
/* register interrupts */
pdata->irq = platform_get_irq(op, 0);
@@ -613,7 +652,7 @@ err:
return res;
}
-int fsl_mc_err_remove(struct platform_device *op)
+void fsl_mc_err_remove(struct platform_device *op)
{
struct mem_ctl_info *mci = dev_get_drvdata(&op->dev);
struct fsl_mc_pdata *pdata = mci->pvt_info;
@@ -621,14 +660,14 @@ int fsl_mc_err_remove(struct platform_device *op)
edac_dbg(0, "\n");
if (edac_op_state == EDAC_OPSTATE_INT) {
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, 0);
+ ddr_out32(pdata, FSL_MC_ERR_INT_EN, 0);
}
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE,
- orig_ddr_err_disable);
- ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, orig_ddr_err_sbe);
+ ddr_out32(pdata, FSL_MC_ERR_DISABLE,
+ pdata->orig_ddr_err_disable);
+ ddr_out32(pdata, FSL_MC_ERR_SBE, pdata->orig_ddr_err_sbe);
+
edac_mc_del_mc(&op->dev);
edac_mc_free(mci);
- return 0;
}
diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h
index 332439d7b2d9..73618f79e587 100644
--- a/drivers/edac/fsl_ddr_edac.h
+++ b/drivers/edac/fsl_ddr_edac.h
@@ -39,6 +39,9 @@
#define FSL_MC_CAPTURE_EXT_ADDRESS 0x0e54
#define FSL_MC_ERR_SBE 0x0e58
+#define IMX9_MC_ERR_EN 0x1000
+#define IMX9_MC_DATA_ERR_INJECT_OFF 0x100
+
#define DSC_MEM_EN 0x80000000
#define DSC_ECC_EN 0x20000000
#define DSC_RD_EN 0x10000000
@@ -46,6 +49,9 @@
#define DSC_DBW_32 0x00080000
#define DSC_DBW_64 0x00000000
+#define ERR_ECC_EN 0x80000000
+#define ERR_INLINE_ECC 0x40000000
+
#define DSC_SDTYPE_MASK 0x07000000
#define DSC_X32_EN 0x00000020
@@ -65,12 +71,19 @@
#define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */
#define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */
+#define TYPE_IMX9 0x1 /* MC used by iMX9 having registers changed */
+
struct fsl_mc_pdata {
char *name;
int edac_idx;
void __iomem *mc_vbase;
+ void __iomem *inject_vbase;
int irq;
+ u32 orig_ddr_err_disable;
+ u32 orig_ddr_err_sbe;
+ bool little_endian;
+ unsigned long flag;
};
int fsl_mc_err_probe(struct platform_device *op);
-int fsl_mc_err_remove(struct platform_device *op);
+void fsl_mc_err_remove(struct platform_device *op);
#endif
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index cf2b618c1ada..d80c88818691 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -15,6 +15,7 @@
#include "edac_module.h"
#include <ras/ras_event.h>
#include <linux/notifier.h>
+#include <linux/string.h>
#define OTHER_DETAIL_LEN 400
@@ -332,7 +333,7 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb,
p = pvt->msg;
p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype));
} else {
- strcpy(pvt->msg, "unknown error");
+ strscpy(pvt->msg, "unknown error");
}
/* Error address */
@@ -357,14 +358,14 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb,
dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
if (dimm) {
e->top_layer = dimm->idx;
- strcpy(e->label, dimm->label);
+ strscpy(e->label, dimm->label);
}
}
if (p > e->location)
*(p - 1) = '\0';
if (!*e->label)
- strcpy(e->label, "unknown memory");
+ strscpy(e->label, "unknown memory");
/* All other fields are mapped on e->other_detail */
p = pvt->other_detail;
@@ -547,7 +548,7 @@ static int __init ghes_edac_init(void)
return -ENODEV;
if (list_empty(ghes_devs)) {
- pr_info("GHES probing device list is empty");
+ pr_info("GHES probing device list is empty\n");
return -ENODEV;
}
diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c
index c4549cec788b..24f163ff323f 100644
--- a/drivers/edac/highbank_l2_edac.c
+++ b/drivers/edac/highbank_l2_edac.c
@@ -7,8 +7,9 @@
#include <linux/ctype.h>
#include <linux/edac.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
-#include <linux/of_platform.h>
#include "edac_module.h"
@@ -53,7 +54,7 @@ static int highbank_l2_err_probe(struct platform_device *pdev)
int res = 0;
dci = edac_device_alloc_ctl_info(sizeof(*drvdata), "cpu",
- 1, "L", 1, 2, NULL, 0, 0);
+ 1, "L", 1, 2, 0);
if (!dci)
return -ENOMEM;
@@ -117,13 +118,12 @@ err:
return res;
}
-static int highbank_l2_err_remove(struct platform_device *pdev)
+static void highbank_l2_err_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(dci);
- return 0;
}
static struct platform_driver highbank_l2_edac_driver = {
diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c
index 19fba258ae10..a8879d72d064 100644
--- a/drivers/edac/highbank_mc_edac.c
+++ b/drivers/edac/highbank_mc_edac.c
@@ -7,8 +7,9 @@
#include <linux/ctype.h>
#include <linux/edac.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
-#include <linux/of_platform.h>
#include <linux/uaccess.h>
#include "edac_module.h"
@@ -250,13 +251,12 @@ free:
return res;
}
-static int highbank_mc_remove(struct platform_device *pdev)
+static void highbank_mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
- return 0;
}
static struct platform_driver highbank_mc_edac_driver = {
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 65aeea53e2df..89b3e8cc38b1 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -13,7 +13,7 @@
#include "edac_module.h"
#include "skx_common.h"
-#define I10NM_REVISION "v0.0.5"
+#define I10NM_REVISION "v0.0.6"
#define EDAC_MOD_STR "i10nm_edac"
/* Debug macros */
@@ -22,25 +22,30 @@
#define I10NM_GET_SCK_BAR(d, reg) \
pci_read_config_dword((d)->uracu, 0xd0, &(reg))
-#define I10NM_GET_IMC_BAR(d, i, reg) \
- pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
+#define I10NM_GET_IMC_BAR(d, i, reg) \
+ pci_read_config_dword((d)->uracu, \
+ (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg))
#define I10NM_GET_SAD(d, offset, i, reg)\
- pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
+ pci_read_config_dword((d)->sad_all, (offset) + (i) * \
+ (res_cfg->type == GNR ? 12 : 8), &(reg))
#define I10NM_GET_HBM_IMC_BAR(d, reg) \
pci_read_config_dword((d)->uracu, 0xd4, &(reg))
#define I10NM_GET_CAPID3_CFG(d, reg) \
- pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
+ pci_read_config_dword((d)->pcu_cr3, \
+ res_cfg->type == GNR ? 0x290 : 0x90, &(reg))
+#define I10NM_GET_CAPID5_CFG(d, reg) \
+ pci_read_config_dword((d)->pcu_cr3, \
+ res_cfg->type == GNR ? 0x298 : 0x98, &(reg))
#define I10NM_GET_DIMMMTR(m, i, j) \
- readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
+ readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \
+ (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \
(i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCDDRTCFG(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
(i) * (m)->chan_mmio_sz)
#define I10NM_GET_MCMTR(m, i) \
- readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
- (i) * (m)->chan_mmio_sz)
-#define I10NM_GET_AMAP(m, i) \
- readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
+ readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \
+ (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \
(i) * (m)->chan_mmio_sz)
#define I10NM_GET_REG32(m, i, offset) \
readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
@@ -56,7 +61,11 @@
#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
+#define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000
+#define I10NM_GNR_D_IMC_MMIO_OFFSET 0x206000
+#define I10NM_GNR_IMC_MMIO_SIZE 0x4000
#define I10NM_HBM_IMC_MMIO_SIZE 0x9000
+#define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24)
#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
@@ -64,12 +73,6 @@
#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
-#define RETRY_RD_ERR_LOG_UC BIT(1)
-#define RETRY_RD_ERR_LOG_NOOVER BIT(14)
-#define RETRY_RD_ERR_LOG_EN BIT(15)
-#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
-#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
-
static struct list_head *i10nm_edac_list;
static struct res_config *res_cfg;
@@ -77,215 +80,319 @@ static int retry_rd_err_log;
static int decoding_via_mca;
static bool mem_cfg_2lm;
-static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
-static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
-static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
-static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
-static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
-static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
-static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
-static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
-static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
-
-static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
- u32 *offsets_scrub, u32 *offsets_demand,
- u32 *offsets_demand2)
+static struct reg_rrl icx_reg_rrl_ddr = {
+ .set_num = 2,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8},
+ {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0},
+ },
+ .widths = {4, 4, 4, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl spr_reg_rrl_ddr = {
+ .set_num = 3,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND},
+ .offsets = {
+ {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8},
+ {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0},
+ {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl spr_reg_rrl_hbm_pch0 = {
+ .set_num = 2,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8},
+ {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl spr_reg_rrl_hbm_pch1 = {
+ .set_num = 2,
+ .reg_num = 6,
+ .modes = {LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8},
+ {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(13),
+ .noover_mask = BIT(14),
+ .en_mask = BIT(15),
+
+ .cecnt_num = 4,
+ .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24},
+ .cecnt_widths = {4, 4, 4, 4},
+};
+
+static struct reg_rrl gnr_reg_rrl_ddr = {
+ .set_num = 4,
+ .reg_num = 6,
+ .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND},
+ .offsets = {
+ {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0},
+ {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8},
+ {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0},
+ {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8},
+ },
+ .widths = {4, 4, 8, 4, 4, 8},
+ .v_mask = BIT(0),
+ .uc_mask = BIT(1),
+ .over_mask = BIT(2),
+ .en_patspr_mask = BIT(14),
+ .noover_mask = BIT(15),
+ .en_mask = BIT(12),
+
+ .cecnt_num = 8,
+ .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c},
+ .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4},
+};
+
+static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width)
+{
+ switch (width) {
+ case 4:
+ return I10NM_GET_REG32(imc, chan, offset);
+ case 8:
+ return I10NM_GET_REG64(imc, chan, offset);
+ default:
+ i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width);
+ return 0;
+ }
+}
+
+static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val)
+{
+ switch (width) {
+ case 4:
+ return I10NM_SET_REG32(imc, chan, offset, (u32)val);
+ default:
+ i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width);
+ }
+}
+
+static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl,
+ int rrl_set, bool enable, u32 *rrl_ctl)
{
- u32 s, d, d2;
+ enum rrl_mode mode = rrl->modes[rrl_set];
+ u32 offset = rrl->offsets[rrl_set][0], v;
+ u8 width = rrl->widths[0];
+ bool first, scrub;
+
+ /* First or last read error. */
+ first = (mode == FRE_SCRUB || mode == FRE_DEMAND);
+ /* Patrol scrub or on-demand read error. */
+ scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB);
- s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
- d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
- if (offsets_demand2)
- d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
+ v = read_imc_reg(imc, chan, offset, width);
if (enable) {
- /* Save default configurations */
- imc->chan[chan].retry_rd_err_log_s = s;
- imc->chan[chan].retry_rd_err_log_d = d;
- if (offsets_demand2)
- imc->chan[chan].retry_rd_err_log_d2 = d2;
-
- s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
- s |= RETRY_RD_ERR_LOG_EN;
- d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
- d |= RETRY_RD_ERR_LOG_EN;
-
- if (offsets_demand2) {
- d2 &= ~RETRY_RD_ERR_LOG_UC;
- d2 |= RETRY_RD_ERR_LOG_NOOVER;
- d2 |= RETRY_RD_ERR_LOG_EN;
- }
+ /* Save default configurations. */
+ *rrl_ctl = v;
+ v &= ~rrl->uc_mask;
+
+ if (first)
+ v |= rrl->noover_mask;
+ else
+ v &= ~rrl->noover_mask;
+
+ if (scrub)
+ v |= rrl->en_patspr_mask;
+ else
+ v &= ~rrl->en_patspr_mask;
+
+ v |= rrl->en_mask;
} else {
- /* Restore default configurations */
- if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
- s |= RETRY_RD_ERR_LOG_UC;
- if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
- s |= RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
- s &= ~RETRY_RD_ERR_LOG_EN;
- if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
- d |= RETRY_RD_ERR_LOG_UC;
- if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
- d |= RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
- d &= ~RETRY_RD_ERR_LOG_EN;
-
- if (offsets_demand2) {
- if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
- d2 |= RETRY_RD_ERR_LOG_UC;
- if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
- d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
- d2 &= ~RETRY_RD_ERR_LOG_EN;
+ /* Restore default configurations. */
+ if (*rrl_ctl & rrl->uc_mask)
+ v |= rrl->uc_mask;
+
+ if (first) {
+ if (!(*rrl_ctl & rrl->noover_mask))
+ v &= ~rrl->noover_mask;
+ } else {
+ if (*rrl_ctl & rrl->noover_mask)
+ v |= rrl->noover_mask;
+ }
+
+ if (scrub) {
+ if (!(*rrl_ctl & rrl->en_patspr_mask))
+ v &= ~rrl->en_patspr_mask;
+ } else {
+ if (*rrl_ctl & rrl->en_patspr_mask)
+ v |= rrl->en_patspr_mask;
}
+
+ if (!(*rrl_ctl & rrl->en_mask))
+ v &= ~rrl->en_mask;
}
- I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
- I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
- if (offsets_demand2)
- I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
+ write_imc_reg(imc, chan, offset, width, v);
+}
+
+static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl,
+ bool enable, u32 *rrl_ctl)
+{
+ for (int i = 0; i < rrl->set_num; i++)
+ enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i);
+}
+
+static void enable_rrls_ddr(struct skx_imc *imc, bool enable)
+{
+ struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr;
+ int i, chan_num = res_cfg->ddr_chan_num;
+ struct skx_channel *chan = imc->chan;
+
+ if (!imc->mbase)
+ return;
+
+ for (i = 0; i < chan_num; i++)
+ enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]);
+}
+
+static void enable_rrls_hbm(struct skx_imc *imc, bool enable)
+{
+ struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm;
+ int i, chan_num = res_cfg->hbm_chan_num;
+ struct skx_channel *chan = imc->chan;
+
+ if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1])
+ return;
+
+ for (i = 0; i < chan_num; i++) {
+ enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]);
+ enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]);
+ }
}
static void enable_retry_rd_err_log(bool enable)
{
- struct skx_imc *imc;
struct skx_dev *d;
- int i, j;
+ int i, imc_num;
edac_dbg(2, "\n");
- list_for_each_entry(d, i10nm_edac_list, list)
- for (i = 0; i < I10NM_NUM_IMC; i++) {
- imc = &d->imc[i];
- if (!imc->mbase)
- continue;
+ list_for_each_entry(d, i10nm_edac_list, list) {
+ imc_num = res_cfg->ddr_imc_num;
+ for (i = 0; i < imc_num; i++)
+ enable_rrls_ddr(&d->imc[i], enable);
- for (j = 0; j < I10NM_NUM_CHANNELS; j++) {
- if (imc->hbm_mc) {
- __enable_retry_rd_err_log(imc, j, enable,
- res_cfg->offsets_scrub_hbm0,
- res_cfg->offsets_demand_hbm0,
- NULL);
- __enable_retry_rd_err_log(imc, j, enable,
- res_cfg->offsets_scrub_hbm1,
- res_cfg->offsets_demand_hbm1,
- NULL);
- } else {
- __enable_retry_rd_err_log(imc, j, enable,
- res_cfg->offsets_scrub,
- res_cfg->offsets_demand,
- res_cfg->offsets_demand2);
- }
- }
+ imc_num += res_cfg->hbm_imc_num;
+ for (; i < imc_num; i++)
+ enable_rrls_hbm(&d->imc[i], enable);
}
}
static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
int len, bool scrub_err)
{
+ int i, j, n, ch = res->channel, pch = res->cs & 1;
struct skx_imc *imc = &res->dev->imc[res->imc];
- u32 log0, log1, log2, log3, log4;
- u32 corr0, corr1, corr2, corr3;
- u32 lxg0, lxg1, lxg3, lxg4;
- u32 *xffsets = NULL;
- u64 log2a, log5;
- u64 lxg2a, lxg5;
- u32 *offsets;
- int n, pch;
+ u64 log, corr, status_mask;
+ struct reg_rrl *rrl;
+ bool scrub;
+ u32 offset;
+ u8 width;
if (!imc->mbase)
return;
- if (imc->hbm_mc) {
- pch = res->cs & 1;
+ rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr;
- if (pch)
- offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
- res_cfg->offsets_demand_hbm1;
- else
- offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
- res_cfg->offsets_demand_hbm0;
- } else {
- if (scrub_err) {
- offsets = res_cfg->offsets_scrub;
- } else {
- offsets = res_cfg->offsets_demand;
- xffsets = res_cfg->offsets_demand2;
- }
- }
+ if (!rrl)
+ return;
- log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
- log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
- log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
- log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
- log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
-
- if (xffsets) {
- lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
- lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
- lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
- lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
- lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
- }
+ status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask;
- if (res_cfg->type == SPR) {
- log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
- n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
- log0, log1, log2a, log3, log4, log5);
+ n = scnprintf(msg, len, " retry_rd_err_log[");
+ for (i = 0; i < rrl->set_num; i++) {
+ scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB);
+ if (scrub_err != scrub)
+ continue;
- if (len - n > 0) {
- if (xffsets) {
- lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
- n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
- lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
- } else {
- n += snprintf(msg + n, len - n, "]");
- }
- }
- } else {
- log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
- n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
- log0, log1, log2, log3, log4, log5);
- }
+ for (j = 0; j < rrl->reg_num && len - n > 0; j++) {
+ offset = rrl->offsets[i][j];
+ width = rrl->widths[j];
+ log = read_imc_reg(imc, ch, offset, width);
- if (imc->hbm_mc) {
- if (pch) {
- corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
- corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
- corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
- corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
- } else {
- corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
- corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
- corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
- corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
+ if (width == 4)
+ n += scnprintf(msg + n, len - n, "%.8llx ", log);
+ else
+ n += scnprintf(msg + n, len - n, "%.16llx ", log);
+
+ /* Clear RRL status if RRL in Linux control mode. */
+ if (retry_rd_err_log == 2 && !j && (log & status_mask))
+ write_imc_reg(imc, ch, offset, width, log & ~status_mask);
}
- } else {
- corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
- corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
- corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
- corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
}
- if (len - n > 0)
- snprintf(msg + n, len - n,
- " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
- corr0 & 0xffff, corr0 >> 16,
- corr1 & 0xffff, corr1 >> 16,
- corr2 & 0xffff, corr2 >> 16,
- corr3 & 0xffff, corr3 >> 16);
-
- /* Clear status bits */
- if (retry_rd_err_log == 2) {
- if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
- log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
- I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
+ /* Move back one space. */
+ n--;
+ n += scnprintf(msg + n, len - n, "]");
+
+ if (len - n > 0) {
+ n += scnprintf(msg + n, len - n, " correrrcnt[");
+ for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) {
+ offset = rrl->cecnt_offsets[i];
+ width = rrl->cecnt_widths[i];
+ corr = read_imc_reg(imc, ch, offset, width);
+
+ /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */
+ if (res_cfg->type <= SPR) {
+ n += scnprintf(msg + n, len - n, "%.4llx %.4llx ",
+ corr & 0xffff, corr >> 16);
+ } else {
+ /* CPUs {GNR} encode one counter per CORRERRCNT register. */
+ if (width == 4)
+ n += scnprintf(msg + n, len - n, "%.8llx ", corr);
+ else
+ n += scnprintf(msg + n, len - n, "%.16llx ", corr);
+ }
}
- if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
- lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
- I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
- }
+ /* Move back one space. */
+ n--;
+ n += scnprintf(msg + n, len - n, "]");
}
}
@@ -311,6 +418,80 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
return pdev;
}
+/**
+ * i10nm_get_imc_num() - Get the number of present DDR memory controllers.
+ *
+ * @cfg : The pointer to the structure of EDAC resource configurations.
+ *
+ * For Granite Rapids CPUs, the number of present DDR memory controllers read
+ * at runtime overwrites the value statically configured in @cfg->ddr_imc_num.
+ * For other CPUs, the number of present DDR memory controllers is statically
+ * configured in @cfg->ddr_imc_num.
+ *
+ * RETURNS : 0 on success, < 0 on failure.
+ */
+static int i10nm_get_imc_num(struct res_config *cfg)
+{
+ int n, imc_num, chan_num = 0;
+ struct skx_dev *d;
+ u32 reg;
+
+ list_for_each_entry(d, i10nm_edac_list, list) {
+ d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus],
+ res_cfg->pcu_cr3_bdf.dev,
+ res_cfg->pcu_cr3_bdf.fun);
+ if (!d->pcu_cr3)
+ continue;
+
+ if (I10NM_GET_CAPID5_CFG(d, reg))
+ continue;
+
+ n = I10NM_DDR_IMC_CH_CNT(reg);
+
+ if (!chan_num) {
+ chan_num = n;
+ edac_dbg(2, "Get DDR CH number: %d\n", chan_num);
+ } else if (chan_num != n) {
+ i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n);
+ }
+ }
+
+ switch (cfg->type) {
+ case GNR:
+ /*
+ * One channel per DDR memory controller for Granite Rapids CPUs.
+ */
+ imc_num = chan_num;
+
+ if (!imc_num) {
+ i10nm_printk(KERN_ERR, "Invalid DDR MC number\n");
+ return -ENODEV;
+ }
+
+ if (cfg->ddr_imc_num != imc_num) {
+ /*
+ * Update the configuration data to reflect the number of
+ * present DDR memory controllers.
+ */
+ cfg->ddr_imc_num = imc_num;
+ edac_dbg(2, "Set DDR MC number: %d", imc_num);
+
+ /* Release and reallocate skx_dev list with the updated number. */
+ skx_remove();
+ if (skx_get_all_bus_mappings(cfg, &i10nm_edac_list) <= 0)
+ return -ENODEV;
+ }
+
+ return 0;
+ default:
+ /*
+ * For other CPUs, the number of present DDR memory controllers
+ * is statically pre-configured in cfg->ddr_imc_num.
+ */
+ return 0;
+ }
+}
+
static bool i10nm_check_2lm(struct res_config *cfg)
{
struct skx_dev *d;
@@ -318,9 +499,9 @@ static bool i10nm_check_2lm(struct res_config *cfg)
int i;
list_for_each_entry(d, i10nm_edac_list, list) {
- d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
- PCI_SLOT(cfg->sad_all_devfn),
- PCI_FUNC(cfg->sad_all_devfn));
+ d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus],
+ res_cfg->sad_all_bdf.dev,
+ res_cfg->sad_all_bdf.fun);
if (!d->sad_all)
continue;
@@ -337,20 +518,39 @@ static bool i10nm_check_2lm(struct res_config *cfg)
}
/*
- * Check whether the error comes from DDRT by ICX/Tremont model specific error code.
- * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS.
+ * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code.
+ * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS.
*/
static bool i10nm_mscod_is_ddrt(u32 mscod)
{
- switch (mscod) {
- case 0x0106: case 0x0107:
- case 0x0800: case 0x0804:
- case 0x0806 ... 0x0808:
- case 0x080a ... 0x080e:
- case 0x0810: case 0x0811:
- case 0x0816: case 0x081e:
- case 0x081f:
- return true;
+ switch (res_cfg->type) {
+ case I10NM:
+ switch (mscod) {
+ case 0x0106: case 0x0107:
+ case 0x0800: case 0x0804:
+ case 0x0806 ... 0x0808:
+ case 0x080a ... 0x080e:
+ case 0x0810: case 0x0811:
+ case 0x0816: case 0x081e:
+ case 0x081f:
+ return true;
+ }
+
+ break;
+ case SPR:
+ switch (mscod) {
+ case 0x0800: case 0x0804:
+ case 0x0806 ... 0x0808:
+ case 0x080a ... 0x080e:
+ case 0x0810: case 0x0811:
+ case 0x0816: case 0x081e:
+ case 0x081f:
+ return true;
+ }
+
+ break;
+ default:
+ return false;
}
return false;
@@ -358,6 +558,7 @@ static bool i10nm_mscod_is_ddrt(u32 mscod)
static bool i10nm_mc_decode_available(struct mce *mce)
{
+#define ICX_IMCx_CHy 0x06666000
u8 bank;
if (!decoding_via_mca || mem_cfg_2lm)
@@ -371,21 +572,26 @@ static bool i10nm_mc_decode_available(struct mce *mce)
switch (res_cfg->type) {
case I10NM:
- if (bank < 13 || bank > 26)
+ /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */
+ if (!(ICX_IMCx_CHy & (1 << bank)))
return false;
-
- /* DDRT errors can't be decoded from MCA bank registers */
- if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
- return false;
-
- if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
+ break;
+ case SPR:
+ if (bank < 13 || bank > 20)
return false;
-
- /* Check whether one of {13,14,17,18,21,22,25,26} */
- return ((bank - 13) & BIT(1)) == 0;
+ break;
default:
return false;
}
+
+ /* DDRT errors can't be decoded from MCA bank registers */
+ if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
+ return false;
+
+ if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
+ return false;
+
+ return true;
}
static bool i10nm_mc_decode(struct decoded_addr *res)
@@ -407,9 +613,29 @@ static bool i10nm_mc_decode(struct decoded_addr *res)
switch (res_cfg->type) {
case I10NM:
- bank = m->bank - 13;
- res->imc = bank / 4;
- res->channel = bank % 2;
+ bank = m->bank - 13;
+ res->imc = bank / 4;
+ res->channel = bank % 2;
+ res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
+ res->row = GET_BITFIELD(m->misc, 19, 39);
+ res->bank_group = GET_BITFIELD(m->misc, 40, 41);
+ res->bank_address = GET_BITFIELD(m->misc, 42, 43);
+ res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2;
+ res->rank = GET_BITFIELD(m->misc, 56, 58);
+ res->dimm = res->rank >> 2;
+ res->rank = res->rank % 4;
+ break;
+ case SPR:
+ bank = m->bank - 13;
+ res->imc = bank / 2;
+ res->channel = bank % 2;
+ res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
+ res->row = GET_BITFIELD(m->misc, 19, 36);
+ res->bank_group = GET_BITFIELD(m->misc, 37, 38);
+ res->bank_address = GET_BITFIELD(m->misc, 39, 40);
+ res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2;
+ res->rank = GET_BITFIELD(m->misc, 57, 57);
+ res->dimm = GET_BITFIELD(m->misc, 58, 58);
break;
default:
return false;
@@ -421,34 +647,165 @@ static bool i10nm_mc_decode(struct decoded_addr *res)
return false;
}
- res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
- res->row = GET_BITFIELD(m->misc, 19, 39);
- res->bank_group = GET_BITFIELD(m->misc, 40, 41);
- res->bank_address = GET_BITFIELD(m->misc, 42, 43);
- res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2;
- res->rank = GET_BITFIELD(m->misc, 56, 58);
- res->dimm = res->rank >> 2;
- res->rank = res->rank % 4;
-
return true;
}
+/**
+ * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller.
+ *
+ * @d : The pointer to the structure of CPU socket EDAC device.
+ * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1).
+ * @physical_idx : To store the corresponding physical index of @logical_idx.
+ *
+ * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure.
+ */
+static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx)
+{
+#define GNR_MAX_IMC_PCI_CNT 28
+
+ struct pci_dev *mdev;
+ int i, logical = 0;
+
+ /*
+ * Detect present memory controllers from { PCI device: 8-5, function 7-1 }
+ */
+ for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) {
+ mdev = pci_get_dev_wrapper(d->seg,
+ d->bus[res_cfg->ddr_mdev_bdf.bus],
+ res_cfg->ddr_mdev_bdf.dev + i / 7,
+ res_cfg->ddr_mdev_bdf.fun + i % 7);
+
+ if (mdev) {
+ if (logical == logical_idx) {
+ *physical_idx = i;
+ return mdev;
+ }
+
+ pci_dev_put(mdev);
+ logical++;
+ }
+ }
+
+ return NULL;
+}
+
+static u32 get_gnr_imc_mmio_offset(void)
+{
+ if (boot_cpu_data.x86_vfm == INTEL_GRANITERAPIDS_D)
+ return I10NM_GNR_D_IMC_MMIO_OFFSET;
+
+ return I10NM_GNR_IMC_MMIO_OFFSET;
+}
+
+/**
+ * get_ddr_munit() - Get the resource of the i-th DDR memory controller.
+ *
+ * @d : The pointer to the structure of CPU socket EDAC device.
+ * @i : The index of the CPU socket relative DDR memory controller.
+ * @offset : To store the MMIO offset of the i-th DDR memory controller.
+ * @size : To store the MMIO size of the i-th DDR memory controller.
+ *
+ * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure.
+ */
+static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size)
+{
+ struct pci_dev *mdev;
+ int physical_idx;
+ u32 reg;
+
+ switch (res_cfg->type) {
+ case GNR:
+ if (I10NM_GET_IMC_BAR(d, 0, reg)) {
+ i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n");
+ return NULL;
+ }
+
+ mdev = get_gnr_mdev(d, i, &physical_idx);
+ if (!mdev)
+ return NULL;
+
+ *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) +
+ get_gnr_imc_mmio_offset() +
+ physical_idx * I10NM_GNR_IMC_MMIO_SIZE;
+ *size = I10NM_GNR_IMC_MMIO_SIZE;
+
+ break;
+ default:
+ if (I10NM_GET_IMC_BAR(d, i, reg)) {
+ i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i);
+ return NULL;
+ }
+
+ mdev = pci_get_dev_wrapper(d->seg,
+ d->bus[res_cfg->ddr_mdev_bdf.bus],
+ res_cfg->ddr_mdev_bdf.dev + i,
+ res_cfg->ddr_mdev_bdf.fun);
+ if (!mdev)
+ return NULL;
+
+ *offset = I10NM_GET_IMC_MMIO_OFFSET(reg);
+ *size = I10NM_GET_IMC_MMIO_SIZE(reg);
+ }
+
+ return mdev;
+}
+
+/**
+ * i10nm_imc_absent() - Check whether the memory controller @imc is absent
+ *
+ * @imc : The pointer to the structure of memory controller EDAC device.
+ *
+ * RETURNS : true if the memory controller EDAC device is absent, false otherwise.
+ */
+static bool i10nm_imc_absent(struct skx_imc *imc)
+{
+ u32 mcmtr;
+ int i;
+
+ switch (res_cfg->type) {
+ case SPR:
+ for (i = 0; i < res_cfg->ddr_chan_num; i++) {
+ mcmtr = I10NM_GET_MCMTR(imc, i);
+ edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr);
+ if (mcmtr != ~0)
+ return false;
+ }
+
+ /*
+ * Some workstations' absent memory controllers still
+ * appear as PCIe devices, misleading the EDAC driver.
+ * By observing that the MMIO registers of these absent
+ * memory controllers consistently hold the value of ~0.
+ *
+ * We identify a memory controller as absent by checking
+ * if its MMIO register "mcmtr" == ~0 in all its channels.
+ */
+ return true;
+ default:
+ return false;
+ }
+}
+
static int i10nm_get_ddr_munits(void)
{
struct pci_dev *mdev;
void __iomem *mbase;
unsigned long size;
struct skx_dev *d;
- int i, j = 0;
+ int i, lmc, j = 0;
u32 reg, off;
u64 base;
list_for_each_entry(d, i10nm_edac_list, list) {
- d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1);
+ d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus],
+ res_cfg->util_all_bdf.dev,
+ res_cfg->util_all_bdf.fun);
if (!d->util_all)
return -ENODEV;
- d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1);
+ d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus],
+ res_cfg->uracu_bdf.dev,
+ res_cfg->uracu_bdf.fun);
if (!d->uracu)
return -ENODEV;
@@ -461,9 +818,9 @@ static int i10nm_get_ddr_munits(void)
edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
j++, base, reg);
- for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
- mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
- 12 + i, 0);
+ for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) {
+ mdev = get_ddr_munit(d, i, &off, &size);
+
if (i == 0 && !mdev) {
i10nm_printk(KERN_ERR, "No IMC found\n");
return -ENODEV;
@@ -471,15 +828,6 @@ static int i10nm_get_ddr_munits(void)
if (!mdev)
continue;
- d->imc[i].mdev = mdev;
-
- if (I10NM_GET_IMC_BAR(d, i, reg)) {
- i10nm_printk(KERN_ERR, "Failed to get mc bar\n");
- return -ENODEV;
- }
-
- off = I10NM_GET_IMC_MMIO_OFFSET(reg);
- size = I10NM_GET_IMC_MMIO_SIZE(reg);
edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
i, base + off, size, reg);
@@ -490,7 +838,19 @@ static int i10nm_get_ddr_munits(void)
return -ENODEV;
}
- d->imc[i].mbase = mbase;
+ d->imc[lmc].mbase = mbase;
+ if (i10nm_imc_absent(&d->imc[lmc])) {
+ pci_dev_put(mdev);
+ iounmap(mbase);
+ d->imc[lmc].mbase = NULL;
+ edac_dbg(2, "Skip absent mc%d\n", i);
+ continue;
+ } else {
+ d->imc[lmc].mdev = mdev;
+ if (res_cfg->type == SPR)
+ skx_set_mc_mapping(d, i, lmc);
+ lmc++;
+ }
}
}
@@ -519,7 +879,6 @@ static int i10nm_get_hbm_munits(void)
u64 base;
list_for_each_entry(d, i10nm_edac_list, list) {
- d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
if (!d->pcu_cr3)
return -ENODEV;
@@ -540,11 +899,13 @@ static int i10nm_get_hbm_munits(void)
}
base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
- lmc = I10NM_NUM_DDR_IMC;
+ lmc = res_cfg->ddr_imc_num;
+
+ for (i = 0; i < res_cfg->hbm_imc_num; i++) {
+ mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus],
+ res_cfg->hbm_mdev_bdf.dev + i / 4,
+ res_cfg->hbm_mdev_bdf.fun + i % 4);
- for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
- mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
- 12 + i / 4, 1 + i % 4);
if (i == 0 && !mdev) {
i10nm_printk(KERN_ERR, "No hbm mc found\n");
return -ENODEV;
@@ -594,49 +955,95 @@ static struct res_config i10nm_cfg0 = {
.type = I10NM,
.decs_did = 0x3452,
.busno_cfg_offset = 0xcc,
+ .ddr_imc_num = 4,
+ .ddr_chan_num = 2,
+ .ddr_dimm_num = 2,
.ddr_chan_mmio_sz = 0x4000,
- .sad_all_devfn = PCI_DEVFN(29, 0),
+ .sad_all_bdf = {1, 29, 0},
+ .pcu_cr3_bdf = {1, 30, 3},
+ .util_all_bdf = {1, 29, 1},
+ .uracu_bdf = {0, 0, 1},
+ .ddr_mdev_bdf = {0, 12, 0},
+ .hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
- .offsets_scrub = offsets_scrub_icx,
- .offsets_demand = offsets_demand_icx,
+ .reg_rrl_ddr = &icx_reg_rrl_ddr,
};
static struct res_config i10nm_cfg1 = {
.type = I10NM,
.decs_did = 0x3452,
.busno_cfg_offset = 0xd0,
+ .ddr_imc_num = 4,
+ .ddr_chan_num = 2,
+ .ddr_dimm_num = 2,
.ddr_chan_mmio_sz = 0x4000,
- .sad_all_devfn = PCI_DEVFN(29, 0),
+ .sad_all_bdf = {1, 29, 0},
+ .pcu_cr3_bdf = {1, 30, 3},
+ .util_all_bdf = {1, 29, 1},
+ .uracu_bdf = {0, 0, 1},
+ .ddr_mdev_bdf = {0, 12, 0},
+ .hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
- .offsets_scrub = offsets_scrub_icx,
- .offsets_demand = offsets_demand_icx,
+ .reg_rrl_ddr = &icx_reg_rrl_ddr,
};
static struct res_config spr_cfg = {
.type = SPR,
.decs_did = 0x3252,
.busno_cfg_offset = 0xd0,
+ .ddr_imc_num = 4,
+ .ddr_chan_num = 2,
+ .ddr_dimm_num = 2,
+ .hbm_imc_num = 16,
+ .hbm_chan_num = 2,
+ .hbm_dimm_num = 1,
.ddr_chan_mmio_sz = 0x8000,
.hbm_chan_mmio_sz = 0x4000,
.support_ddr5 = true,
- .sad_all_devfn = PCI_DEVFN(10, 0),
+ .sad_all_bdf = {1, 10, 0},
+ .pcu_cr3_bdf = {1, 30, 3},
+ .util_all_bdf = {1, 29, 1},
+ .uracu_bdf = {0, 0, 1},
+ .ddr_mdev_bdf = {0, 12, 0},
+ .hbm_mdev_bdf = {0, 12, 1},
+ .sad_all_offset = 0x300,
+ .reg_rrl_ddr = &spr_reg_rrl_ddr,
+ .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0,
+ .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1,
+};
+
+static struct res_config gnr_cfg = {
+ .type = GNR,
+ .decs_did = 0x3252,
+ .busno_cfg_offset = 0xd0,
+ .ddr_imc_num = 12,
+ .ddr_chan_num = 1,
+ .ddr_dimm_num = 2,
+ .ddr_chan_mmio_sz = 0x4000,
+ .support_ddr5 = true,
+ .sad_all_bdf = {0, 13, 0},
+ .pcu_cr3_bdf = {0, 5, 0},
+ .util_all_bdf = {0, 13, 1},
+ .uracu_bdf = {0, 0, 1},
+ .ddr_mdev_bdf = {0, 5, 1},
.sad_all_offset = 0x300,
- .offsets_scrub = offsets_scrub_spr,
- .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
- .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1,
- .offsets_demand = offsets_demand_spr,
- .offsets_demand2 = offsets_demand2_spr,
- .offsets_demand_hbm0 = offsets_demand_spr_hbm0,
- .offsets_demand_hbm1 = offsets_demand_spr_hbm1,
+ .reg_rrl_ddr = &gnr_reg_rrl_ddr,
};
static const struct x86_cpu_id i10nm_cpuids[] = {
- X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
- X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
- X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
- X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
- X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
- X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
+ X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MIN, 0x3, &i10nm_cfg0),
+ X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, 0x4, X86_STEP_MAX, &i10nm_cfg1),
+ X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, X86_STEP_MIN, 0x3, &i10nm_cfg0),
+ X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, 0x4, X86_STEP_MAX, &i10nm_cfg1),
+ X86_MATCH_VFM( INTEL_ICELAKE_D, &i10nm_cfg1),
+
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_cfg),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_cfg),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_cfg),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_cfg),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_cfg),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_cfg),
+ X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_cfg),
{}
};
MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
@@ -651,12 +1058,21 @@ static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
return !!GET_BITFIELD(mcmtr, 2, 2);
}
+static bool i10nm_channel_disabled(struct skx_imc *imc, int chan)
+{
+ u32 mcmtr = I10NM_GET_MCMTR(imc, chan);
+
+ edac_dbg(1, "mc%d ch%d mcmtr reg %x\n", imc->mc, chan, mcmtr);
+
+ return (mcmtr == ~0 || GET_BITFIELD(mcmtr, 18, 18));
+}
+
static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
struct res_config *cfg)
{
struct skx_pvt *pvt = mci->pvt_info;
struct skx_imc *imc = pvt->imc;
- u32 mtr, amap, mcddrtcfg;
+ u32 mtr, mcddrtcfg = 0;
struct dimm_info *dimm;
int i, j, ndimms;
@@ -664,9 +1080,16 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
if (!imc->mbase)
continue;
+ if (i10nm_channel_disabled(imc, i)) {
+ edac_dbg(1, "mc%d ch%d is disabled.\n", imc->mc, i);
+ continue;
+ }
+
ndimms = 0;
- amap = I10NM_GET_AMAP(imc, i);
- mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
+
+ if (res_cfg->type != GNR)
+ mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
+
for (j = 0; j < imc->num_dimms; j++) {
dimm = edac_get_dimm(mci, i, j, 0);
mtr = I10NM_GET_DIMMMTR(imc, i, j);
@@ -674,7 +1097,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
mtr, mcddrtcfg, imc->mc, i, j);
if (IS_DIMM_PRESENT(mtr))
- ndimms += skx_get_dimm_info(mtr, 0, amap, dimm,
+ ndimms += skx_get_dimm_info(mtr, 0, 0, dimm,
imc, i, j, cfg);
else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
@@ -695,63 +1118,16 @@ static struct notifier_block i10nm_mce_dec = {
.priority = MCE_PRIO_EDAC,
};
-#ifdef CONFIG_EDAC_DEBUG
-/*
- * Debug feature.
- * Exercise the address decode logic by writing an address to
- * /sys/kernel/debug/edac/i10nm_test/addr.
- */
-static struct dentry *i10nm_test;
-
-static int debugfs_u64_set(void *data, u64 val)
-{
- struct mce m;
-
- pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
-
- memset(&m, 0, sizeof(m));
- /* ADDRV + MemRd + Unknown channel */
- m.status = MCI_STATUS_ADDRV + 0x90;
- /* One corrected error */
- m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
- m.addr = val;
- skx_mce_check_error(NULL, 0, &m);
-
- return 0;
-}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
-
-static void setup_i10nm_debug(void)
-{
- i10nm_test = edac_debugfs_create_dir("i10nm_test");
- if (!i10nm_test)
- return;
-
- if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
- NULL, &fops_u64_wo)) {
- debugfs_remove(i10nm_test);
- i10nm_test = NULL;
- }
-}
-
-static void teardown_i10nm_debug(void)
-{
- debugfs_remove_recursive(i10nm_test);
-}
-#else
-static inline void setup_i10nm_debug(void) {}
-static inline void teardown_i10nm_debug(void) {}
-#endif /*CONFIG_EDAC_DEBUG*/
-
static int __init i10nm_init(void)
{
- u8 mc = 0, src_id = 0, node_id = 0;
+ u8 mc = 0, src_id = 0;
const struct x86_cpu_id *id;
struct res_config *cfg;
const char *owner;
struct skx_dev *d;
int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
u64 tolm, tohm;
+ int imc_num;
edac_dbg(2, "\n");
@@ -770,6 +1146,7 @@ static int __init i10nm_init(void)
return -ENODEV;
cfg = (struct res_config *)id->driver_data;
+ skx_set_res_cfg(cfg);
res_cfg = cfg;
rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
@@ -784,6 +1161,10 @@ static int __init i10nm_init(void)
return -ENODEV;
}
+ rc = i10nm_get_imc_num(cfg);
+ if (rc < 0)
+ goto fail;
+
mem_cfg_2lm = i10nm_check_2lm(cfg);
skx_set_mem_cfg(mem_cfg_2lm);
@@ -792,35 +1173,33 @@ static int __init i10nm_init(void)
if (i10nm_get_hbm_munits() && rc)
goto fail;
+ imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num;
+
list_for_each_entry(d, i10nm_edac_list, list) {
rc = skx_get_src_id(d, 0xf8, &src_id);
if (rc < 0)
goto fail;
- rc = skx_get_node_id(d, &node_id);
- if (rc < 0)
- goto fail;
-
- edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
- for (i = 0; i < I10NM_NUM_IMC; i++) {
+ edac_dbg(2, "src_id = %d\n", src_id);
+ for (i = 0; i < imc_num; i++) {
if (!d->imc[i].mdev)
continue;
d->imc[i].mc = mc++;
d->imc[i].lmc = i;
- d->imc[i].src_id = src_id;
- d->imc[i].node_id = node_id;
+ d->imc[i].src_id = src_id;
if (d->imc[i].hbm_mc) {
d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
- d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
- d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS;
+ d->imc[i].num_channels = cfg->hbm_chan_num;
+ d->imc[i].num_dimms = cfg->hbm_dimm_num;
} else {
d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
- d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
- d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS;
+ d->imc[i].num_channels = cfg->ddr_chan_num;
+ d->imc[i].num_dimms = cfg->ddr_dimm_num;
}
- rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
+ rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev,
+ pci_name(d->imc[i].mdev),
"Intel_10nm Socket", EDAC_MOD_STR,
i10nm_get_dimm_config, cfg);
if (rc < 0)
@@ -834,9 +1213,9 @@ static int __init i10nm_init(void)
opstate_init();
mce_register_decode_chain(&i10nm_mce_dec);
- setup_i10nm_debug();
+ skx_setup_debug("i10nm_test");
- if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+ if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(true);
@@ -856,13 +1235,13 @@ static void __exit i10nm_exit(void)
{
edac_dbg(2, "\n");
- if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+ if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
skx_set_decode(NULL, NULL);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(false);
}
- teardown_i10nm_debug();
+ skx_teardown_debug();
mce_unregister_decode_chain(&i10nm_mce_dec);
skx_adxl_put();
skx_remove();
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index ba46057d4220..4a1bebc1ff14 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -338,11 +338,11 @@ struct i5000_pvt {
u16 mir0, mir1, mir2;
- u16 b0_mtr[NUM_MTRS]; /* Memory Technlogy Reg */
+ u16 b0_mtr[NUM_MTRS]; /* Memory Technology Reg */
u16 b0_ambpresent0; /* Branch 0, Channel 0 */
- u16 b0_ambpresent1; /* Brnach 0, Channel 1 */
+ u16 b0_ambpresent1; /* Branch 0, Channel 1 */
- u16 b1_mtr[NUM_MTRS]; /* Memory Technlogy Reg */
+ u16 b1_mtr[NUM_MTRS]; /* Memory Technology Reg */
u16 b1_ambpresent0; /* Branch 1, Channel 8 */
u16 b1_ambpresent1; /* Branch 1, Channel 1 */
@@ -1210,7 +1210,7 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
&pvt->b0_ambpresent1);
edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
- /* Only if we have 2 branchs (4 channels) */
+ /* Only if we have 2 branches (4 channels) */
if (pvt->maxch < CHANNELS_PER_BRANCH) {
pvt->b1_ambpresent0 = 0;
pvt->b1_ambpresent1 = 0;
@@ -1573,13 +1573,10 @@ module_init(i5000_init);
module_exit(i5000_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR
- ("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>");
-MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - "
- I5000_REVISION);
+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>");
+MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " I5000_REVISION);
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
module_param(misc_messages, int, 0444);
MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages");
-
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index f5d82518c15e..d470afe65001 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -909,7 +909,7 @@ static void i5100_do_inject(struct mem_ctl_info *mci)
*
* The injection code don't work without setting this register.
* The register needs to be flipped off then on else the hardware
- * will only preform the first injection.
+ * will only perform the first injection.
*
* Stop condition bits 7:4
* 1010 - Stop after one injection
@@ -1220,6 +1220,5 @@ module_init(i5100_init);
module_exit(i5100_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR
- ("Arthur Jones <ajones@riverbed.com>");
+MODULE_AUTHOR("Arthur Jones <ajones@riverbed.com>");
MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 49b4499269fb..b5cf25905b05 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
+#include <linux/string_choices.h>
#include "edac_module.h"
@@ -899,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr)
edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
- MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+ str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr)));
edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
edac_dbg(2, "\t\tNUMRANK: %s\n",
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 61adaa872ba7..69068f8d0cad 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
+#include <linux/string_choices.h>
#include "edac_module.h"
@@ -620,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt,
edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
- MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+ str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr)));
edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
edac_dbg(2, "\t\tNUMRANK: %s\n",
@@ -871,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci)
IS_MIRRORED(pvt->mc_settings) ? "" : "non-");
edac_dbg(0, "Error detection is %s\n",
- IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
+ str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings)));
edac_dbg(0, "Retry is %s\n",
- IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
+ str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings)));
/* Get Memory Interleave Range registers */
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0,
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 23d25724bae4..91e0a88ef904 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -376,7 +376,7 @@ static const struct pci_id_table pci_dev_table[] = {
PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
- {0,} /* 0 terminated list. */
+ { NULL, }
};
/*
@@ -385,7 +385,7 @@ static const struct pci_id_table pci_dev_table[] = {
static const struct pci_device_id i7core_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
- {0,} /* 0 terminated list. */
+ { 0, }
};
/****************************************************************************
diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index fbec90d00f1e..b8a497f0de28 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -355,8 +355,7 @@ module_init(i82860_init);
module_exit(i82860_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) "
- "Ben Woodard <woodard@redhat.com>");
+MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) Ben Woodard <woodard@redhat.com>");
MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers");
module_param(edac_op_state, int, 0444);
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 9ef13570f2e5..eaab6af143e1 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -19,7 +19,8 @@
* 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
* 0c08: Xeon E3-1200 v3 Processor DRAM Controller
* 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
- * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
+ * 590f: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
+ * 5918: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
* 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers
* 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers
* 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers
@@ -43,6 +44,7 @@
* but lo_hi_readq() ensures that we are safe across all e3-1200 processors.
*/
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
@@ -50,6 +52,8 @@
#include <linux/edac.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
#include "edac_module.h"
#define EDAC_MOD_STR "ie31200_edac"
@@ -67,7 +71,8 @@
#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F
#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918
#define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F
-#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x590f
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_12 0x5918
/* Coffee Lake-S */
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00
@@ -82,43 +87,46 @@
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca
-/* Test if HB is for Skylake or later. */
-#define DEVICE_ID_SKYLAKE_OR_LATER(did) \
- (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \
- ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \
- ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \
- ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \
- (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \
- PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK))
-
-#define IE31200_DIMMS 4
-#define IE31200_RANKS 8
-#define IE31200_RANKS_PER_CHANNEL 4
+/* Raptor Lake-S */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 /* 8P+8E, e.g. i7-13700 */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 /* 6P+8E, e.g. i5-13500, i5-13600, i5-14500 */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 /* 4P+0E, e.g. i3-13100E */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4 0xa700 /* 8P+16E, e.g. i9-13900, i9-14900 */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_5 0xa740 /* 8P+12E, e.g. i7-14700 */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6 0xa704 /* 6P+8E, e.g. i5-14600 */
+
+/* Raptor Lake-HX */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1 0xa702 /* 8P+16E, e.g. i9-13950HX */
+
+/* Alder Lake-S */
+#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660
+#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2 0x4668 /* 8P+4E, e.g. i7-12700K */
+#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3 0x4648 /* 6P+4E, e.g. i5-12600K */
+
+/* Bartlett Lake-S */
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1 0x4639
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2 0x463c
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3 0x4642
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_4 0x4643
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_5 0xa731
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_6 0xa732
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_7 0xa733
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_8 0xa741
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_9 0xa744
+#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_10 0xa745
+
+#define IE31200_RANKS_PER_CHANNEL 8
#define IE31200_DIMMS_PER_CHANNEL 2
#define IE31200_CHANNELS 2
+#define IE31200_IMC_NUM 2
/* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */
#define IE31200_MCHBAR_LOW 0x48
#define IE31200_MCHBAR_HIGH 0x4c
-#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15)
-#define IE31200_MMR_WINDOW_SIZE BIT(15)
/*
* Error Status Register (16b)
*
- * 15 reserved
- * 14 Isochronous TBWRR Run Behind FIFO Full
- * (ITCV)
- * 13 Isochronous TBWRR Run Behind FIFO Put
- * (ITSTV)
- * 12 reserved
- * 11 MCH Thermal Sensor Event
- * for SMI/SCI/SERR (GTSE)
- * 10 reserved
- * 9 LOCK to non-DRAM Memory Flag (LCKF)
- * 8 reserved
- * 7 DRAM Throttle Flag (DTF)
- * 6:2 reserved
* 1 Multi-bit DRAM ECC Error Flag (DMERR)
* 0 Single-bit DRAM ECC Error Flag (DSERR)
*/
@@ -127,68 +135,57 @@
#define IE31200_ERRSTS_CE BIT(0)
#define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE)
-/*
- * Channel 0 ECC Error Log (64b)
- *
- * 63:48 Error Column Address (ERRCOL)
- * 47:32 Error Row Address (ERRROW)
- * 31:29 Error Bank Address (ERRBANK)
- * 28:27 Error Rank Address (ERRRANK)
- * 26:24 reserved
- * 23:16 Error Syndrome (ERRSYND)
- * 15: 2 reserved
- * 1 Multiple Bit Error Status (MERRSTS)
- * 0 Correctable Error Status (CERRSTS)
- */
-
-#define IE31200_C0ECCERRLOG 0x40c8
-#define IE31200_C1ECCERRLOG 0x44c8
-#define IE31200_C0ECCERRLOG_SKL 0x4048
-#define IE31200_C1ECCERRLOG_SKL 0x4448
-#define IE31200_ECCERRLOG_CE BIT(0)
-#define IE31200_ECCERRLOG_UE BIT(1)
-#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27)
-#define IE31200_ECCERRLOG_RANK_SHIFT 27
-#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16)
-#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16
-
-#define IE31200_ECCERRLOG_SYNDROME(log) \
- ((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \
- IE31200_ECCERRLOG_SYNDROME_SHIFT)
-
#define IE31200_CAPID0 0xe4
#define IE31200_CAPID0_PDCD BIT(4)
#define IE31200_CAPID0_DDPCD BIT(6)
#define IE31200_CAPID0_ECC BIT(1)
-#define IE31200_MAD_DIMM_0_OFFSET 0x5004
-#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C
-#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
-#define IE31200_MAD_DIMM_A_RANK BIT(17)
-#define IE31200_MAD_DIMM_A_RANK_SHIFT 17
-#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10)
-#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10
-#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
-#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19
-#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8)
-#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8
-
-/* Skylake reports 1GB increments, everything else is 256MB */
-#define IE31200_PAGES(n, skl) \
- (n << (28 + (2 * skl) - PAGE_SHIFT))
-
static int nr_channels;
static struct pci_dev *mci_pdev;
static int ie31200_registered = 1;
+struct res_config {
+ enum mem_type mtype;
+ bool cmci;
+ int imc_num;
+ /* Host MMIO configuration register */
+ u64 reg_mchbar_mask;
+ u64 reg_mchbar_window_size;
+ /* ECC error log register */
+ u64 reg_eccerrlog_offset[IE31200_CHANNELS];
+ u64 reg_eccerrlog_ce_mask;
+ u64 reg_eccerrlog_ce_ovfl_mask;
+ u64 reg_eccerrlog_ue_mask;
+ u64 reg_eccerrlog_ue_ovfl_mask;
+ u64 reg_eccerrlog_rank_mask;
+ u64 reg_eccerrlog_syndrome_mask;
+ /* MSR to clear ECC error log register */
+ u32 msr_clear_eccerrlog_offset;
+ /* DIMM characteristics register */
+ u64 reg_mad_dimm_size_granularity;
+ u64 reg_mad_dimm_offset[IE31200_CHANNELS];
+ u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL];
+ u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL];
+ u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL];
+};
+
struct ie31200_priv {
void __iomem *window;
void __iomem *c0errlog;
void __iomem *c1errlog;
+ struct res_config *cfg;
+ struct mem_ctl_info *mci;
+ struct pci_dev *pdev;
+ struct device dev;
};
+static struct ie31200_pvt {
+ struct ie31200_priv *priv[IE31200_IMC_NUM];
+} ie31200_pvt;
+
enum ie31200_chips {
IE31200 = 0,
+ IE31200_1 = 1,
};
struct ie31200_dev_info {
@@ -199,18 +196,22 @@ struct ie31200_error_info {
u16 errsts;
u16 errsts2;
u64 eccerrlog[IE31200_CHANNELS];
+ u64 erraddr;
};
static const struct ie31200_dev_info ie31200_devs[] = {
[IE31200] = {
.ctl_name = "IE31200"
},
+ [IE31200_1] = {
+ .ctl_name = "IE31200_1"
+ },
};
struct dimm_data {
- u8 size; /* in multiples of 256MB, except Skylake is 1GB */
- u8 dual_rank : 1,
- x16_width : 2; /* 0 means x8 width */
+ u64 size; /* in bytes */
+ u8 ranks;
+ enum dev_type dtype;
};
static int how_many_channels(struct pci_dev *pdev)
@@ -248,29 +249,54 @@ static bool ecc_capable(struct pci_dev *pdev)
return true;
}
-static int eccerrlog_row(u64 log)
-{
- return ((log & IE31200_ECCERRLOG_RANK_BITS) >>
- IE31200_ECCERRLOG_RANK_SHIFT);
-}
+#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev)
static void ie31200_clear_error_info(struct mem_ctl_info *mci)
{
+ struct ie31200_priv *priv = mci->pvt_info;
+ struct res_config *cfg = priv->cfg;
+
+ /*
+ * The PCI ERRSTS register is deprecated. Write the MSR to clear
+ * the ECC error log registers in all memory controllers.
+ */
+ if (cfg->msr_clear_eccerrlog_offset) {
+ if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset,
+ cfg->reg_eccerrlog_ce_mask |
+ cfg->reg_eccerrlog_ce_ovfl_mask |
+ cfg->reg_eccerrlog_ue_mask |
+ cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0)
+ ie31200_printk(KERN_ERR, "Failed to wrmsr.\n");
+
+ return;
+ }
+
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
- pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS,
+ pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS,
IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS);
}
static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
- struct pci_dev *pdev;
+ struct pci_dev *pdev = mci_to_pci_dev(mci);
struct ie31200_priv *priv = mci->pvt_info;
- pdev = to_pci_dev(mci->pdev);
+ /*
+ * The PCI ERRSTS register is deprecated, directly read the
+ * MMIO-mapped ECC error log registers.
+ */
+ if (priv->cfg->msr_clear_eccerrlog_offset) {
+ info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
+ if (nr_channels == 2)
+ info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
+
+ ie31200_clear_error_info(mci);
+ return;
+ }
/*
* This is a mess because there is no atomic way to read all the
@@ -306,46 +332,56 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
static void ie31200_process_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
+ struct ie31200_priv *priv = mci->pvt_info;
+ struct res_config *cfg = priv->cfg;
int channel;
u64 log;
- if (!(info->errsts & IE31200_ERRSTS_BITS))
- return;
+ if (!cfg->msr_clear_eccerrlog_offset) {
+ if (!(info->errsts & IE31200_ERRSTS_BITS))
+ return;
- if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
- -1, -1, -1, "UE overwrote CE", "");
- info->errsts = info->errsts2;
+ if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "");
+ info->errsts = info->errsts2;
+ }
}
for (channel = 0; channel < nr_channels; channel++) {
log = info->eccerrlog[channel];
- if (log & IE31200_ECCERRLOG_UE) {
+ if (log & cfg->reg_eccerrlog_ue_mask) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- 0, 0, 0,
- eccerrlog_row(log),
+ info->erraddr >> PAGE_SHIFT, 0, 0,
+ field_get(cfg->reg_eccerrlog_rank_mask, log),
channel, -1,
"ie31200 UE", "");
- } else if (log & IE31200_ECCERRLOG_CE) {
+ } else if (log & cfg->reg_eccerrlog_ce_mask) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- 0, 0,
- IE31200_ECCERRLOG_SYNDROME(log),
- eccerrlog_row(log),
+ info->erraddr >> PAGE_SHIFT, 0,
+ field_get(cfg->reg_eccerrlog_syndrome_mask, log),
+ field_get(cfg->reg_eccerrlog_rank_mask, log),
channel, -1,
"ie31200 CE", "");
}
}
}
-static void ie31200_check(struct mem_ctl_info *mci)
+static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce)
{
struct ie31200_error_info info;
+ info.erraddr = mce ? mce->addr : 0;
ie31200_get_and_clear_error_info(mci, &info);
ie31200_process_error_info(mci, &info);
}
-static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
+static void ie31200_check(struct mem_ctl_info *mci)
+{
+ __ie31200_check(mci, NULL);
+}
+
+static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc)
{
union {
u64 mchbar;
@@ -358,7 +394,8 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low);
pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high);
- u.mchbar &= IE31200_MCHBAR_MASK;
+ u.mchbar &= cfg->reg_mchbar_mask;
+ u.mchbar += cfg->reg_mchbar_window_size * mc;
if (u.mchbar != (resource_size_t)u.mchbar) {
ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n",
@@ -366,7 +403,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
return NULL;
}
- window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE);
+ window = ioremap(u.mchbar, cfg->reg_mchbar_window_size);
if (!window)
ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n",
(unsigned long long)u.mchbar);
@@ -374,155 +411,108 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
return window;
}
-static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
- int chan)
+static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm,
+ struct res_config *cfg)
{
- dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE;
- dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0;
- dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >>
- (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4)));
+ dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity;
+ dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1;
+ dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8;
}
-static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
- int chan)
+static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window,
+ struct res_config *cfg, int mc)
{
- dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE;
- dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0;
- dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0;
-}
+ struct dimm_data dimm_info;
+ struct dimm_info *dimm;
+ unsigned long nr_pages;
+ u32 addr_decode;
+ int i, j, k;
-static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan,
- bool skl)
-{
- if (skl)
- __skl_populate_dimm_info(dd, addr_decode, chan);
- else
- __populate_dimm_info(dd, addr_decode, chan);
-}
+ for (i = 0; i < IE31200_CHANNELS; i++) {
+ addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]);
+ edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
+
+ for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
+ populate_dimm_info(&dimm_info, addr_decode, j, cfg);
+ edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n",
+ mc, i, j, dimm_info.size >> 20,
+ dimm_info.ranks,
+ dimm_info.dtype);
+
+ nr_pages = MiB_TO_PAGES(dimm_info.size >> 20);
+ if (nr_pages == 0)
+ continue;
+ nr_pages = nr_pages / dimm_info.ranks;
+ for (k = 0; k < dimm_info.ranks; k++) {
+ dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0);
+ dimm->nr_pages = nr_pages;
+ edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
+ dimm->grain = 8; /* just a guess */
+ dimm->mtype = cfg->mtype;
+ dimm->dtype = dimm_info.dtype;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
+ }
+ }
+}
-static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
+static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc)
{
- int i, j, ret;
- struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
- struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
- void __iomem *window;
struct ie31200_priv *priv;
- u32 addr_decode, mad_offset;
-
- /*
- * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit
- * this logic when adding new CPU support.
- */
- bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device);
-
- edac_dbg(0, "MC:\n");
-
- if (!ecc_capable(pdev)) {
- ie31200_printk(KERN_INFO, "No ECC support\n");
- return -ENODEV;
- }
+ struct mem_ctl_info *mci;
+ void __iomem *window;
+ int ret;
nr_channels = how_many_channels(pdev);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = IE31200_DIMMS;
+ layers[0].size = IE31200_RANKS_PER_CHANNEL;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = nr_channels;
layers[1].is_virt_csrow = false;
- mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers,
sizeof(struct ie31200_priv));
if (!mci)
return -ENOMEM;
- window = ie31200_map_mchbar(pdev);
+ window = ie31200_map_mchbar(pdev, cfg, mc);
if (!window) {
ret = -ENODEV;
goto fail_free;
}
edac_dbg(3, "MC: init mci\n");
- mci->pdev = &pdev->dev;
- if (skl)
- mci->mtype_cap = MEM_FLAG_DDR4;
- else
- mci->mtype_cap = MEM_FLAG_DDR3;
+ mci->mtype_cap = BIT(cfg->mtype);
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
- mci->ctl_name = ie31200_devs[dev_idx].ctl_name;
+ mci->ctl_name = ie31200_devs[mc].ctl_name;
mci->dev_name = pci_name(pdev);
- mci->edac_check = ie31200_check;
+ mci->edac_check = cfg->cmci ? NULL : ie31200_check;
mci->ctl_page_to_phys = NULL;
priv = mci->pvt_info;
priv->window = window;
- if (skl) {
- priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL;
- priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL;
- mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL;
- } else {
- priv->c0errlog = window + IE31200_C0ECCERRLOG;
- priv->c1errlog = window + IE31200_C1ECCERRLOG;
- mad_offset = IE31200_MAD_DIMM_0_OFFSET;
- }
-
- /* populate DIMM info */
- for (i = 0; i < IE31200_CHANNELS; i++) {
- addr_decode = readl(window + mad_offset +
- (i * 4));
- edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
- for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
- populate_dimm_info(&dimm_info[i][j], addr_decode, j,
- skl);
- edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
- dimm_info[i][j].size,
- dimm_info[i][j].dual_rank,
- dimm_info[i][j].x16_width);
- }
- }
-
+ priv->c0errlog = window + cfg->reg_eccerrlog_offset[0];
+ priv->c1errlog = window + cfg->reg_eccerrlog_offset[1];
+ priv->cfg = cfg;
+ priv->mci = mci;
+ priv->pdev = pdev;
+ device_initialize(&priv->dev);
/*
- * The dram rank boundary (DRB) reg values are boundary addresses
- * for each DRAM rank with a granularity of 64MB. DRB regs are
- * cumulative; the last one will contain the total memory
- * contained in all ranks.
+ * The EDAC core uses mci->pdev (pointer to the structure device)
+ * as the memory controller ID. The SoCs attach one or more memory
+ * controllers to a single pci_dev (a single pci_dev->dev can
+ * correspond to multiple memory controllers).
+ *
+ * To make mci->pdev unique, assign pci_dev->dev to mci->pdev
+ * for the first memory controller and assign a unique priv->dev
+ * to mci->pdev for each additional memory controller.
*/
- for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) {
- for (j = 0; j < IE31200_CHANNELS; j++) {
- struct dimm_info *dimm;
- unsigned long nr_pages;
-
- nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl);
- if (nr_pages == 0)
- continue;
-
- if (dimm_info[j][i].dual_rank) {
- nr_pages = nr_pages / 2;
- dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0);
- dimm->nr_pages = nr_pages;
- edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
- dimm->grain = 8; /* just a guess */
- if (skl)
- dimm->mtype = MEM_DDR4;
- else
- dimm->mtype = MEM_DDR3;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_UNKNOWN;
- }
- dimm = edac_get_dimm(mci, i * 2, j, 0);
- dimm->nr_pages = nr_pages;
- edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
- dimm->grain = 8; /* same guess */
- if (skl)
- dimm->mtype = MEM_DDR4;
- else
- dimm->mtype = MEM_DDR3;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_UNKNOWN;
- }
- }
+ mci->pdev = mc ? &priv->dev : &pdev->dev;
+ ie31200_get_dimm_config(mci, window, cfg, mc);
ie31200_clear_error_info(mci);
if (edac_mc_add_mc(mci)) {
@@ -531,16 +521,117 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
goto fail_unmap;
}
- /* get this far and it's successful */
- edac_dbg(3, "MC: success\n");
+ ie31200_pvt.priv[mc] = priv;
return 0;
-
fail_unmap:
+ put_device(&priv->dev);
iounmap(window);
-
fail_free:
edac_mc_free(mci);
+ return ret;
+}
+
+static void mce_check(struct mce *mce)
+{
+ struct ie31200_priv *priv;
+ int i;
+
+ for (i = 0; i < IE31200_IMC_NUM; i++) {
+ priv = ie31200_pvt.priv[i];
+ if (!priv)
+ continue;
+ __ie31200_check(priv->mci, mce);
+ }
+}
+
+static int mce_handler(struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ char *type;
+
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
+ /*
+ * Ignore unless this is a memory related error.
+ * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs.
+ */
+ if ((mce->status & 0xefff) >> 7 != 1)
+ return NOTIFY_DONE;
+
+ type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event";
+
+ edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
+ mce->extcpu, type, mce->mcgstatus,
+ mce->bank, mce->status);
+ edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
+ edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
+ edac_dbg(0, "MISC 0x%llx\n", mce->misc);
+ edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
+ mce->cpuvendor, mce->cpuid, mce->time,
+ mce->socketid, mce->apicid);
+
+ mce_check(mce);
+ mce->kflags |= MCE_HANDLED_EDAC;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ie31200_mce_dec = {
+ .notifier_call = mce_handler,
+ .priority = MCE_PRIO_EDAC,
+};
+
+static void ie31200_unregister_mcis(void)
+{
+ struct ie31200_priv *priv;
+ struct mem_ctl_info *mci;
+ int i;
+
+ for (i = 0; i < IE31200_IMC_NUM; i++) {
+ priv = ie31200_pvt.priv[i];
+ if (!priv)
+ continue;
+
+ mci = priv->mci;
+ edac_mc_del_mc(mci->pdev);
+ iounmap(priv->window);
+ put_device(&priv->dev);
+ edac_mc_free(mci);
+ }
+}
+
+static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg)
+{
+ int i, ret;
+
+ edac_dbg(0, "MC:\n");
+
+ if (!ecc_capable(pdev)) {
+ ie31200_printk(KERN_INFO, "No ECC support\n");
+ return -ENODEV;
+ }
+
+ for (i = 0; i < cfg->imc_num; i++) {
+ ret = ie31200_register_mci(pdev, cfg, i);
+ if (ret)
+ goto fail_register;
+ }
+
+ if (cfg->cmci) {
+ mce_register_decode_chain(&ie31200_mce_dec);
+ edac_op_state = EDAC_OPSTATE_INT;
+ } else {
+ edac_op_state = EDAC_OPSTATE_POLL;
+ }
+
+ /* get this far and it's successful. */
+ edac_dbg(3, "MC: success\n");
+ return 0;
+
+fail_register:
+ ie31200_unregister_mcis();
return ret;
}
@@ -552,7 +643,7 @@ static int ie31200_init_one(struct pci_dev *pdev,
edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
- rc = ie31200_probe1(pdev, ent->driver_data);
+ rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data);
if (rc == 0 && !mci_pdev)
mci_pdev = pci_dev_get(pdev);
@@ -561,42 +652,129 @@ static int ie31200_init_one(struct pci_dev *pdev,
static void ie31200_remove_one(struct pci_dev *pdev)
{
- struct mem_ctl_info *mci;
- struct ie31200_priv *priv;
+ struct ie31200_priv *priv = ie31200_pvt.priv[0];
edac_dbg(0, "\n");
pci_dev_put(mci_pdev);
mci_pdev = NULL;
- mci = edac_mc_del_mc(&pdev->dev);
- if (!mci)
- return;
- priv = mci->pvt_info;
- iounmap(priv->window);
- edac_mc_free(mci);
+ if (priv->cfg->cmci)
+ mce_unregister_decode_chain(&ie31200_mce_dec);
+ ie31200_unregister_mcis();
}
+static struct res_config snb_cfg = {
+ .mtype = MEM_DDR3,
+ .imc_num = 1,
+ .reg_mchbar_mask = GENMASK_ULL(38, 15),
+ .reg_mchbar_window_size = BIT_ULL(15),
+ .reg_eccerrlog_offset[0] = 0x40c8,
+ .reg_eccerrlog_offset[1] = 0x44c8,
+ .reg_eccerrlog_ce_mask = BIT_ULL(0),
+ .reg_eccerrlog_ue_mask = BIT_ULL(1),
+ .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
+ .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
+ .reg_mad_dimm_size_granularity = BIT_ULL(28),
+ .reg_mad_dimm_offset[0] = 0x5004,
+ .reg_mad_dimm_offset[1] = 0x5008,
+ .reg_mad_dimm_size_mask[0] = GENMASK(7, 0),
+ .reg_mad_dimm_size_mask[1] = GENMASK(15, 8),
+ .reg_mad_dimm_rank_mask[0] = BIT(17),
+ .reg_mad_dimm_rank_mask[1] = BIT(18),
+ .reg_mad_dimm_width_mask[0] = BIT(19),
+ .reg_mad_dimm_width_mask[1] = BIT(20),
+};
+
+static struct res_config skl_cfg = {
+ .mtype = MEM_DDR4,
+ .imc_num = 1,
+ .reg_mchbar_mask = GENMASK_ULL(38, 15),
+ .reg_mchbar_window_size = BIT_ULL(15),
+ .reg_eccerrlog_offset[0] = 0x4048,
+ .reg_eccerrlog_offset[1] = 0x4448,
+ .reg_eccerrlog_ce_mask = BIT_ULL(0),
+ .reg_eccerrlog_ue_mask = BIT_ULL(1),
+ .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
+ .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
+ .reg_mad_dimm_size_granularity = BIT_ULL(30),
+ .reg_mad_dimm_offset[0] = 0x500c,
+ .reg_mad_dimm_offset[1] = 0x5010,
+ .reg_mad_dimm_size_mask[0] = GENMASK(5, 0),
+ .reg_mad_dimm_size_mask[1] = GENMASK(21, 16),
+ .reg_mad_dimm_rank_mask[0] = BIT(10),
+ .reg_mad_dimm_rank_mask[1] = BIT(26),
+ .reg_mad_dimm_width_mask[0] = GENMASK(9, 8),
+ .reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
+};
+
+struct res_config rpl_s_cfg = {
+ .mtype = MEM_DDR5,
+ .cmci = true,
+ .imc_num = 2,
+ .reg_mchbar_mask = GENMASK_ULL(41, 17),
+ .reg_mchbar_window_size = BIT_ULL(16),
+ .reg_eccerrlog_offset[0] = 0xe048,
+ .reg_eccerrlog_offset[1] = 0xe848,
+ .reg_eccerrlog_ce_mask = BIT_ULL(0),
+ .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1),
+ .reg_eccerrlog_ue_mask = BIT_ULL(2),
+ .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3),
+ .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
+ .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
+ .msr_clear_eccerrlog_offset = 0x791,
+ .reg_mad_dimm_offset[0] = 0xd80c,
+ .reg_mad_dimm_offset[1] = 0xd810,
+ .reg_mad_dimm_size_granularity = BIT_ULL(29),
+ .reg_mad_dimm_size_mask[0] = GENMASK(6, 0),
+ .reg_mad_dimm_size_mask[1] = GENMASK(22, 16),
+ .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9),
+ .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26),
+ .reg_mad_dimm_width_mask[0] = GENMASK(8, 7),
+ .reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
+};
+
static const struct pci_device_id ie31200_pci_tbl[] = {
- { PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_5), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_4), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_5), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_6), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_7), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_8), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_9), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_10), (kernel_ulong_t)&rpl_s_cfg},
{ 0, } /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);
@@ -613,12 +791,10 @@ static int __init ie31200_init(void)
int pci_rc, i;
edac_dbg(3, "MC:\n");
- /* Ensure that the OPSTATE is set correctly for POLL or NMI */
- opstate_init();
pci_rc = pci_register_driver(&ie31200_driver);
if (pci_rc < 0)
- goto fail0;
+ return pci_rc;
if (!mci_pdev) {
ie31200_registered = 0;
@@ -629,11 +805,13 @@ static int __init ie31200_init(void)
if (mci_pdev)
break;
}
+
if (!mci_pdev) {
edac_dbg(0, "ie31200 pci_get_device fail\n");
pci_rc = -ENODEV;
- goto fail1;
+ goto fail0;
}
+
pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]);
if (pci_rc < 0) {
edac_dbg(0, "ie31200 init fail\n");
@@ -641,12 +819,12 @@ static int __init ie31200_init(void)
goto fail1;
}
}
- return 0;
+ return 0;
fail1:
- pci_unregister_driver(&ie31200_driver);
-fail0:
pci_dev_put(mci_pdev);
+fail0:
+ pci_unregister_driver(&ie31200_driver);
return pci_rc;
}
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 544dd19072ea..553c31a2d922 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -27,7 +27,7 @@
#include "edac_mc.h"
#include "edac_module.h"
-#define IGEN6_REVISION "v2.5"
+#define IGEN6_REVISION "v2.5.1"
#define EDAC_MOD_STR "igen6_edac"
#define IGEN6_NMI_NAME "igen6_ibecc"
@@ -58,6 +58,7 @@
/* Capability register E */
#define CAPID_E_OFFSET 0xf0
#define CAPID_E_IBECC BIT(12)
+#define CAPID_E_IBECC_BIT18 BIT(18)
/* Error Status */
#define ERRSTS_OFFSET 0xc8
@@ -80,6 +81,7 @@
#define ECC_ERROR_LOG_UE BIT_ULL(63)
#define ECC_ERROR_LOG_ADDR_SHIFT 5
#define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38)
+#define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45)
#define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61)
/* Host MMIO base address */
@@ -125,6 +127,7 @@
static struct res_config {
bool machine_check;
+ /* The number of present memory controllers. */
int num_imc;
u32 imc_base;
u32 cmf_base;
@@ -133,6 +136,8 @@ static struct res_config {
u32 ibecc_base;
u32 ibecc_error_log_offset;
bool (*ibecc_available)(struct pci_dev *pdev);
+ /* Extract error address logged in IBECC */
+ u64 (*err_addr)(u64 ecclog);
/* Convert error address logged in IBECC to system physical address */
u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
/* Convert error address logged in IBECC to integrated memory controller address */
@@ -222,6 +227,87 @@ static struct work_struct ecclog_work;
#define DID_ADL_SKU3 0x4621
#define DID_ADL_SKU4 0x4641
+/* Compute die IDs for Alder Lake-N with IBECC */
+#define DID_ADL_N_SKU1 0x4614
+#define DID_ADL_N_SKU2 0x4617
+#define DID_ADL_N_SKU3 0x461b
+#define DID_ADL_N_SKU4 0x461c
+#define DID_ADL_N_SKU5 0x4673
+#define DID_ADL_N_SKU6 0x4674
+#define DID_ADL_N_SKU7 0x4675
+#define DID_ADL_N_SKU8 0x4677
+#define DID_ADL_N_SKU9 0x4678
+#define DID_ADL_N_SKU10 0x4679
+#define DID_ADL_N_SKU11 0x467c
+#define DID_ADL_N_SKU12 0x4632
+
+/* Compute die IDs for Arizona Beach with IBECC */
+#define DID_AZB_SKU1 0x4676
+
+/* Compute did IDs for Amston Lake with IBECC */
+#define DID_ASL_SKU1 0x464a
+
+/* Compute die IDs for Raptor Lake-P with IBECC */
+#define DID_RPL_P_SKU1 0xa706
+#define DID_RPL_P_SKU2 0xa707
+#define DID_RPL_P_SKU3 0xa708
+#define DID_RPL_P_SKU4 0xa716
+#define DID_RPL_P_SKU5 0xa718
+
+/* Compute die IDs for Meteor Lake-PS with IBECC */
+#define DID_MTL_PS_SKU1 0x7d21
+#define DID_MTL_PS_SKU2 0x7d22
+#define DID_MTL_PS_SKU3 0x7d23
+#define DID_MTL_PS_SKU4 0x7d24
+
+/* Compute die IDs for Meteor Lake-P with IBECC */
+#define DID_MTL_P_SKU1 0x7d01
+#define DID_MTL_P_SKU2 0x7d02
+#define DID_MTL_P_SKU3 0x7d14
+
+/* Compute die IDs for Arrow Lake-UH with IBECC */
+#define DID_ARL_UH_SKU1 0x7d06
+#define DID_ARL_UH_SKU2 0x7d20
+#define DID_ARL_UH_SKU3 0x7d30
+
+/* Compute die IDs for Panther Lake-H with IBECC */
+#define DID_PTL_H_SKU1 0xb000
+#define DID_PTL_H_SKU2 0xb001
+#define DID_PTL_H_SKU3 0xb002
+
+/* Compute die IDs for Wildcat Lake with IBECC */
+#define DID_WCL_SKU1 0xfd00
+
+static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
+{
+ union {
+ u64 v;
+ struct {
+ u32 v_lo;
+ u32 v_hi;
+ };
+ } u;
+
+ if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
+ igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
+ return -ENODEV;
+ }
+
+ if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
+ igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
+ return -ENODEV;
+ }
+
+ if (!(u.v & MCHBAR_EN)) {
+ igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
+ return -ENODEV;
+ }
+
+ *mchbar = MCHBAR_BASE(u.v);
+
+ return 0;
+}
+
static bool ehl_ibecc_available(struct pci_dev *pdev)
{
u32 v;
@@ -245,7 +331,7 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
if (igen6_tom <= _4GB)
return eaddr + igen6_tolud - _4GB;
- if (eaddr < _4GB)
+ if (eaddr >= igen6_tom)
return eaddr + igen6_tolud - igen6_tom;
return eaddr;
@@ -272,6 +358,39 @@ static bool tgl_ibecc_available(struct pci_dev *pdev)
return !(CAPID_E_IBECC & v);
}
+static bool mtl_p_ibecc_available(struct pci_dev *pdev)
+{
+ u32 v;
+
+ if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
+ return false;
+
+ return !(CAPID_E_IBECC_BIT18 & v);
+}
+
+static bool mtl_ps_ibecc_available(struct pci_dev *pdev)
+{
+#define MCHBAR_MEMSS_IBECCDIS 0x13c00
+ void __iomem *window;
+ u64 mchbar;
+ u32 val;
+
+ if (get_mchbar(pdev, &mchbar))
+ return false;
+
+ window = ioremap(mchbar, MCHBAR_SIZE * 2);
+ if (!window) {
+ igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
+ return false;
+ }
+
+ val = readl(window + MCHBAR_MEMSS_IBECCDIS);
+ iounmap(window);
+
+ /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */
+ return !GET_BITFIELD(val, 6, 6);
+}
+
static u64 mem_addr_to_sys_addr(u64 maddr)
{
if (maddr < igen6_tolud)
@@ -358,6 +477,11 @@ static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
return imc_addr;
}
+static u64 rpl_p_err_addr(u64 ecclog)
+{
+ return ECC_ERROR_LOG_ADDR45(ecclog);
+}
+
static struct res_config ehl_cfg = {
.num_imc = 1,
.imc_base = 0x5000,
@@ -403,7 +527,63 @@ static struct res_config adl_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct pci_device_id igen6_pci_tbl[] = {
+static struct res_config adl_n_cfg = {
+ .machine_check = true,
+ .num_imc = 1,
+ .imc_base = 0xd800,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x68,
+ .ibecc_available = tgl_ibecc_available,
+ .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
+};
+
+static struct res_config rpl_p_cfg = {
+ .machine_check = true,
+ .num_imc = 2,
+ .imc_base = 0xd800,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x68,
+ .ibecc_available = tgl_ibecc_available,
+ .err_addr = rpl_p_err_addr,
+ .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
+};
+
+static struct res_config mtl_ps_cfg = {
+ .machine_check = true,
+ .num_imc = 2,
+ .imc_base = 0xd800,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x170,
+ .ibecc_available = mtl_ps_ibecc_available,
+ .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
+};
+
+static struct res_config mtl_p_cfg = {
+ .machine_check = true,
+ .num_imc = 2,
+ .imc_base = 0xd800,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x170,
+ .ibecc_available = mtl_p_ibecc_available,
+ .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
+};
+
+static struct res_config wcl_cfg = {
+ .machine_check = true,
+ .num_imc = 1,
+ .imc_base = 0xd800,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x170,
+ .ibecc_available = mtl_p_ibecc_available,
+ .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
+};
+
+static struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
@@ -424,6 +604,39 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg },
+ { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg },
+ { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg },
+ { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg },
+ { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg },
{ },
};
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
@@ -596,13 +809,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc)
{
u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
- if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
- /* Clear CE/UE bits by writing 1s */
- writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
- return ecclog;
- }
+ /*
+ * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
+ * the invalid value ~0. This will result in a flood of invalid
+ * error reports in polling mode. Skip it.
+ */
+ if (ecclog == ~0)
+ return 0;
- return 0;
+ /* Neither a CE nor a UE. Skip it.*/
+ if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
+ return 0;
+
+ /* Clear CE/UE bits by writing 1s */
+ writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
+
+ return ecclog;
}
static void errsts_clear(struct igen6_imc *imc)
@@ -627,7 +849,7 @@ static int errcmd_enable_error_reporting(bool enable)
rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
if (rc)
- return rc;
+ return pcibios_err_to_errno(rc);
if (enable)
errcmd |= ERRCMD_CE | ERRSTS_UE;
@@ -636,7 +858,7 @@ static int errcmd_enable_error_reporting(bool enable)
rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
if (rc)
- return rc;
+ return pcibios_err_to_errno(rc);
return 0;
}
@@ -679,8 +901,11 @@ static void ecclog_work_cb(struct work_struct *work)
llist_for_each_entry_safe(node, tmp, head, llnode) {
memset(&res, 0, sizeof(res));
- eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
- ECC_ERROR_LOG_ADDR_SHIFT;
+ if (res_cfg->err_addr)
+ eaddr = res_cfg->err_addr(node->ecclog);
+ else
+ eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
+ ECC_ERROR_LOG_ADDR_SHIFT;
res.mc = node->mc;
res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
@@ -969,22 +1194,8 @@ static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
igen6_tom = u.v & GENMASK_ULL(38, 20);
- if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
- igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
- goto fail;
- }
-
- if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
- igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
+ if (get_mchbar(pdev, mchbar))
goto fail;
- }
-
- if (!(u.v & MCHBAR_EN)) {
- igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
- goto fail;
- }
-
- *mchbar = MCHBAR_BASE(u.v);
#ifdef CONFIG_EDAC_DEBUG
if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
@@ -1000,23 +1211,35 @@ fail:
return -ENODEV;
}
-static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
+static void igen6_check(struct mem_ctl_info *mci)
+{
+ struct igen6_imc *imc = mci->pvt_info;
+ u64 ecclog;
+
+ /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
+ ecclog = ecclog_read_and_clear(imc);
+ if (!ecclog)
+ return;
+
+ if (!ecclog_gen_pool_add(imc->mc, ecclog))
+ irq_work_queue(&ecclog_irq_work);
+}
+
+/* Check whether the memory controller is absent. */
+static bool igen6_imc_absent(void __iomem *window)
+{
+ return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
+}
+
+static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
{
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct igen6_imc *imc;
- void __iomem *window;
int rc;
edac_dbg(2, "\n");
- mchbar += mc * MCHBAR_SIZE;
- window = ioremap(mchbar, MCHBAR_SIZE);
- if (!window) {
- igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
- return -ENODEV;
- }
-
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
@@ -1041,6 +1264,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->dev_name = pci_name(pdev);
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ mci->edac_check = igen6_check;
mci->pvt_info = &igen6_pvt->imc[mc];
imc = mci->pvt_info;
@@ -1075,11 +1300,12 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
imc->mci = mci;
return 0;
fail3:
+ put_device(&imc->dev);
+ mci->pvt_info = NULL;
kfree(mci->ctl_name);
fail2:
edac_mc_free(mci);
fail:
- iounmap(window);
return rc;
}
@@ -1099,11 +1325,65 @@ static void igen6_unregister_mcis(void)
edac_mc_del_mc(mci->pdev);
kfree(mci->ctl_name);
+ mci->pvt_info = NULL;
edac_mc_free(mci);
+ put_device(&imc->dev);
iounmap(imc->window);
}
}
+static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
+{
+ void __iomem *window;
+ int lmc, pmc, rc;
+ u64 base;
+
+ for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
+ base = mchbar + pmc * MCHBAR_SIZE;
+ window = ioremap(base, MCHBAR_SIZE);
+ if (!window) {
+ igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
+ rc = -ENOMEM;
+ goto out_unregister_mcis;
+ }
+
+ if (igen6_imc_absent(window)) {
+ iounmap(window);
+ edac_dbg(2, "Skip absent mc%d\n", pmc);
+ continue;
+ }
+
+ rc = igen6_register_mci(lmc, window, pdev);
+ if (rc)
+ goto out_iounmap;
+
+ /* Done, if all present MCs are detected and registered. */
+ if (++lmc >= res_cfg->num_imc)
+ break;
+ }
+
+ if (!lmc) {
+ igen6_printk(KERN_ERR, "No mc found.\n");
+ return -ENODEV;
+ }
+
+ if (lmc < res_cfg->num_imc) {
+ igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.",
+ res_cfg->num_imc, lmc);
+ res_cfg->num_imc = lmc;
+ }
+
+ return 0;
+
+out_iounmap:
+ iounmap(window);
+
+out_unregister_mcis:
+ igen6_unregister_mcis();
+
+ return rc;
+}
+
static int igen6_mem_slice_setup(u64 mchbar)
{
struct igen6_imc *imc = &igen6_pvt->imc[0];
@@ -1178,10 +1458,29 @@ static void unregister_err_handler(void)
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
}
+static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
+{
+ /*
+ * Quirk: Certain SoCs' error reporting interrupts don't work.
+ * Force polling mode for them to ensure that memory error
+ * events can be handled.
+ */
+ if (ent->device == DID_ADL_N_SKU4) {
+ edac_op_state = EDAC_OPSTATE_POLL;
+ return;
+ }
+
+ /* Set the mode according to the configuration data. */
+ if (cfg->machine_check)
+ edac_op_state = EDAC_OPSTATE_INT;
+ else
+ edac_op_state = EDAC_OPSTATE_NMI;
+}
+
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
u64 mchbar;
- int i, rc;
+ int rc;
edac_dbg(2, "\n");
@@ -1195,11 +1494,11 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto fail;
- for (i = 0; i < res_cfg->num_imc; i++) {
- rc = igen6_register_mci(i, mchbar, pdev);
- if (rc)
- goto fail2;
- }
+ opstate_set(res_cfg, ent);
+
+ rc = igen6_register_mcis(pdev, mchbar);
+ if (rc)
+ goto fail;
if (res_cfg->num_imc > 1) {
rc = igen6_mem_slice_setup(mchbar);
@@ -1216,9 +1515,6 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&ecclog_work, ecclog_work_cb);
init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
- /* Check if any pending errors before registering the NMI handler */
- ecclog_handler();
-
rc = register_err_handler();
if (rc)
goto fail3;
@@ -1230,6 +1526,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto fail4;
}
+ /* Check if any pending errors before/during the registration of the error handler */
+ ecclog_handler();
+
igen6_debug_setup();
return 0;
fail4:
@@ -1278,8 +1577,6 @@ static int __init igen6_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
- edac_op_state = EDAC_OPSTATE_NMI;
-
rc = pci_register_driver(&igen6_driver);
if (rc)
return rc;
diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c
new file mode 100644
index 000000000000..4348b3883b45
--- /dev/null
+++ b/drivers/edac/imh_base.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller.
+ * Copyright (c) 2025, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/mce.h>
+#include <asm/cpu.h>
+#include "edac_module.h"
+#include "skx_common.h"
+
+#define IMH_REVISION "v0.0.1"
+#define EDAC_MOD_STR "imh_edac"
+
+/* Debug macros */
+#define imh_printk(level, fmt, arg...) \
+ edac_printk(level, "imh", fmt, ##arg)
+
+/* Configuration Agent(Ubox) */
+#define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23)
+#define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3)
+
+/* PUNIT */
+#define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30)
+
+/* Memory Controller */
+#define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2)
+#define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15)
+
+/* System Cache Agent(SCA) */
+#define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16)
+#define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16)
+
+/* Home Agent (HA) */
+#define NMCACHING(reg) GET_BITFIELD(reg, 8, 8)
+
+/**
+ * struct local_reg - A register as described in the local package view.
+ *
+ * @pkg: (input) The package where the register is located.
+ * @pbase: (input) The IP MMIO base physical address in the local package view.
+ * @size: (input) The IP MMIO size.
+ * @offset: (input) The register offset from the IP MMIO base @pbase.
+ * @width: (input) The register width in byte.
+ * @vbase: (internal) The IP MMIO base virtual address.
+ * @val: (output) The register value.
+ */
+struct local_reg {
+ int pkg;
+ u64 pbase;
+ u32 size;
+ u32 offset;
+ u8 width;
+ void __iomem *vbase;
+ u64 val;
+};
+
+#define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name) \
+ struct local_reg name = { \
+ .pkg = package, \
+ .pbase = (north ? (cfg)->mmio_base_l_north : \
+ (cfg)->mmio_base_l_south) + \
+ (cfg)->ip_name##_base + \
+ (cfg)->ip_name##_size * (ip_idx), \
+ .size = (cfg)->ip_name##_size, \
+ .offset = (cfg)->ip_name##_reg_##reg_name##_offset, \
+ .width = (cfg)->ip_name##_reg_##reg_name##_width, \
+ }
+
+static u64 readx(void __iomem *addr, u8 width)
+{
+ switch (width) {
+ case 1:
+ return readb(addr);
+ case 2:
+ return readw(addr);
+ case 4:
+ return readl(addr);
+ case 8:
+ return readq(addr);
+ default:
+ imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width);
+ return 0;
+ }
+}
+
+static void __read_local_reg(void *reg)
+{
+ struct local_reg *r = (struct local_reg *)reg;
+
+ r->val = readx(r->vbase + r->offset, r->width);
+}
+
+/* Read a local-view register. */
+static bool read_local_reg(struct local_reg *reg)
+{
+ int cpu;
+
+ /* Get the target CPU in the package @reg->pkg. */
+ for_each_online_cpu(cpu) {
+ if (reg->pkg == topology_physical_package_id(cpu))
+ break;
+ }
+
+ if (cpu >= nr_cpu_ids)
+ return false;
+
+ reg->vbase = ioremap(reg->pbase, reg->size);
+ if (!reg->vbase) {
+ imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase);
+ return false;
+ }
+
+ /* Get the target CPU to read the register. */
+ smp_call_function_single(cpu, __read_local_reg, reg, 1);
+ iounmap(reg->vbase);
+
+ return true;
+}
+
+/* Get the bitmap of memory controller instances in package @pkg. */
+static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north)
+{
+ DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3);
+
+ if (!read_local_reg(&reg))
+ return 0;
+
+ edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n",
+ pkg, north ? "north" : "south",
+ DDR_IMC_BITMAP(reg.val), reg.val);
+
+ return DDR_IMC_BITMAP(reg.val);
+}
+
+static void imc_release(struct device *dev)
+{
+ edac_dbg(2, "imc device %s released\n", dev_name(dev));
+ kfree(dev);
+}
+
+static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d,
+ bool north, int lmc)
+{
+ unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num;
+ unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north);
+ void __iomem *mbase;
+ struct device *dev;
+ int i, rc, pmc;
+ u64 base;
+
+ for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) {
+ base = north ? d->mmio_base_h_north : d->mmio_base_h_south;
+ base += cfg->ddr_imc_base + size * i;
+
+ edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n",
+ d->pkg, lmc, base, size);
+
+ /* Set up the imc MMIO. */
+ mbase = ioremap(base, size);
+ if (!mbase) {
+ imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base);
+ return -ENOMEM;
+ }
+
+ d->imc[lmc].mbase = mbase;
+ d->imc[lmc].lmc = lmc;
+
+ /* Create the imc device instance. */
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->release = imc_release;
+ device_initialize(dev);
+ rc = dev_set_name(dev, "0x%llx", base);
+ if (rc) {
+ imh_printk(KERN_ERR, "Failed to set dev name\n");
+ put_device(dev);
+ return rc;
+ }
+
+ d->imc[lmc].dev = dev;
+
+ /* Set up the imc index mapping. */
+ pmc = north ? i : 8 + i;
+ skx_set_mc_mapping(d, pmc, lmc);
+
+ lmc++;
+ }
+
+ return lmc;
+}
+
+static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d)
+{
+ int lmc = __get_ddr_munits(cfg, d, true, 0);
+
+ if (lmc < 0)
+ return false;
+
+ lmc = __get_ddr_munits(cfg, d, false, lmc);
+ if (lmc <= 0)
+ return false;
+
+ return true;
+}
+
+static bool get_socket_id(struct res_config *cfg, struct skx_dev *d)
+{
+ DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id);
+ u8 src_id;
+ int i;
+
+ if (!read_local_reg(&reg))
+ return false;
+
+ src_id = SOCKET_ID(reg.val);
+ edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val);
+
+ for (i = 0; i < cfg->ddr_imc_num; i++)
+ d->imc[i].src_id = src_id;
+
+ return true;
+}
+
+/* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */
+static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm)
+{
+ DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm);
+
+ if (!read_local_reg(&reg))
+ return false;
+
+ *tolm = TOLM(reg.val);
+ edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val);
+
+ DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm);
+
+ if (!read_local_reg(&reg2))
+ return false;
+
+ *tohm = TOHM(reg2.val);
+ edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val);
+
+ return true;
+}
+
+/* Get the system-view MMIO_BASE_H for {north,south}-IMH. */
+static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list)
+{
+ int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num;
+ struct skx_dev *d;
+
+ for (i = 0; i < n; i++) {
+ d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base);
+
+ /* Get MMIO_BASE_H for the north-IMH. */
+ if (!read_local_reg(&reg) || !reg.val) {
+ kfree(d);
+ imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i);
+ return -ENODEV;
+ }
+
+ d->mmio_base_h_north = MMIO_BASE_H(reg.val);
+ edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n",
+ i, d->mmio_base_h_north, reg.val);
+
+ /* Get MMIO_BASE_H for the south-IMH (optional). */
+ DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base);
+
+ if (read_local_reg(&reg2)) {
+ d->mmio_base_h_south = MMIO_BASE_H(reg2.val);
+ edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n",
+ i, d->mmio_base_h_south, reg2.val);
+ }
+
+ d->pkg = i;
+ d->num_imc = imc_num;
+ skx_init_mc_mapping(d);
+ list_add_tail(&d->list, edac_list);
+ }
+
+ return 0;
+}
+
+/* Get the number of per-package memory controllers. */
+static int imh_get_imc_num(struct res_config *cfg)
+{
+ int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) +
+ hweight32(get_imc_bitmap(cfg, 0, false));
+
+ if (!imc_num) {
+ imh_printk(KERN_ERR, "Invalid mc number\n");
+ return -ENODEV;
+ }
+
+ if (cfg->ddr_imc_num != imc_num) {
+ /*
+ * Update the configuration data to reflect the number of
+ * present DDR memory controllers.
+ */
+ cfg->ddr_imc_num = imc_num;
+ edac_dbg(2, "Set ddr mc number %d\n", imc_num);
+ }
+
+ return 0;
+}
+
+/* Get all memory controllers' parameters. */
+static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list)
+{
+ struct skx_imc *imc;
+ struct skx_dev *d;
+ u8 mc = 0;
+ int i;
+
+ list_for_each_entry(d, edac_list, list) {
+ if (!get_ddr_munits(cfg, d)) {
+ imh_printk(KERN_ERR, "No mc found\n");
+ return -ENODEV;
+ }
+
+ if (!get_socket_id(cfg, d)) {
+ imh_printk(KERN_ERR, "Failed to get socket id\n");
+ return -ENODEV;
+ }
+
+ for (i = 0; i < cfg->ddr_imc_num; i++) {
+ imc = &d->imc[i];
+ if (!imc->mbase)
+ continue;
+
+ imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz;
+ imc->num_channels = cfg->ddr_chan_num;
+ imc->num_dimms = cfg->ddr_dimm_num;
+ imc->mc = mc++;
+ }
+ }
+
+ return 0;
+}
+
+static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx)
+{
+ DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode);
+
+ if (!read_local_reg(&reg))
+ return false;
+
+ if (!NMCACHING(reg.val))
+ return false;
+
+ edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx);
+ return true;
+}
+
+/* Check whether the system has a 2-level memory configuration. */
+static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head)
+{
+ struct skx_dev *d;
+ int i;
+
+ list_for_each_entry(d, head, list) {
+ for (i = 0; i < cfg->ddr_imc_num; i++)
+ if (check_2lm_enabled(cfg, d, i))
+ return true;
+ }
+
+ return false;
+}
+
+/* Helpers to read memory controller registers */
+static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width)
+{
+ return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width);
+}
+
+static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan)
+{
+ return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width);
+}
+
+static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm)
+{
+ return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset +
+ cfg->ddr_reg_dimmmtr_width * dimm,
+ cfg->ddr_reg_dimmmtr_width);
+}
+
+static bool ecc_enabled(u32 mcmtr)
+{
+ return (bool)ECC_ENABLED(mcmtr);
+}
+
+static bool dimm_populated(u32 dimmmtr)
+{
+ return (bool)DIMM_POPULATED(dimmmtr);
+}
+
+/* Get each DIMM's configurations of the memory controller @mci. */
+static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
+{
+ struct skx_pvt *pvt = mci->pvt_info;
+ struct skx_imc *imc = pvt->imc;
+ struct dimm_info *dimm;
+ u32 mcmtr, dimmmtr;
+ int i, j, ndimms;
+
+ for (i = 0; i < imc->num_channels; i++) {
+ if (!imc->mbase)
+ continue;
+
+ mcmtr = read_imc_mcmtr(cfg, imc, i);
+
+ for (ndimms = 0, j = 0; j < imc->num_dimms; j++) {
+ dimmmtr = read_imc_dimmmtr(cfg, imc, i, j);
+ edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n",
+ mcmtr, dimmmtr, imc->mc, i, j);
+
+ if (!dimm_populated(dimmmtr))
+ continue;
+
+ dimm = edac_get_dimm(mci, i, j, 0);
+ ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm,
+ imc, i, j, cfg);
+ }
+
+ if (ndimms && !ecc_enabled(mcmtr)) {
+ imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n",
+ imc->mc, i);
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+/* Register all memory controllers to the EDAC core. */
+static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list)
+{
+ struct skx_imc *imc;
+ struct skx_dev *d;
+ int i, rc;
+
+ list_for_each_entry(d, edac_list, list) {
+ for (i = 0; i < cfg->ddr_imc_num; i++) {
+ imc = &d->imc[i];
+ if (!imc->mbase)
+ continue;
+
+ rc = skx_register_mci(imc, imc->dev,
+ dev_name(imc->dev),
+ "Intel IMH-based Socket",
+ EDAC_MOD_STR,
+ imh_get_dimm_config, cfg);
+ if (rc)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static struct res_config dmr_cfg = {
+ .type = DMR,
+ .support_ddr5 = true,
+ .mmio_base_l_north = 0xf6800000,
+ .mmio_base_l_south = 0xf6000000,
+ .ddr_chan_num = 1,
+ .ddr_dimm_num = 2,
+ .ddr_imc_base = 0x39b000,
+ .ddr_chan_mmio_sz = 0x8000,
+ .ddr_reg_mcmtr_offset = 0x360,
+ .ddr_reg_mcmtr_width = 4,
+ .ddr_reg_dimmmtr_offset = 0x370,
+ .ddr_reg_dimmmtr_width = 4,
+ .ubox_base = 0x0,
+ .ubox_size = 0x2000,
+ .ubox_reg_mmio_base_offset = 0x580,
+ .ubox_reg_mmio_base_width = 4,
+ .ubox_reg_socket_id_offset = 0x1080,
+ .ubox_reg_socket_id_width = 4,
+ .pcu_base = 0x3000,
+ .pcu_size = 0x10000,
+ .pcu_reg_capid3_offset = 0x290,
+ .pcu_reg_capid3_width = 4,
+ .sca_base = 0x24c000,
+ .sca_size = 0x2500,
+ .sca_reg_tolm_offset = 0x2100,
+ .sca_reg_tolm_width = 8,
+ .sca_reg_tohm_offset = 0x2108,
+ .sca_reg_tohm_width = 8,
+ .ha_base = 0x3eb000,
+ .ha_size = 0x1000,
+ .ha_reg_mode_offset = 0x4a0,
+ .ha_reg_mode_width = 4,
+};
+
+static const struct x86_cpu_id imh_cpuids[] = {
+ X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, imh_cpuids);
+
+static struct notifier_block imh_mce_dec = {
+ .notifier_call = skx_mce_check_error,
+ .priority = MCE_PRIO_EDAC,
+};
+
+static int __init imh_init(void)
+{
+ const struct x86_cpu_id *id;
+ struct list_head *edac_list;
+ struct res_config *cfg;
+ const char *owner;
+ u64 tolm, tohm;
+ int rc;
+
+ edac_dbg(2, "\n");
+
+ if (ghes_get_devices())
+ return -EBUSY;
+
+ owner = edac_get_owner();
+ if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
+ return -EBUSY;
+
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
+ id = x86_match_cpu(imh_cpuids);
+ if (!id)
+ return -ENODEV;
+ cfg = (struct res_config *)id->driver_data;
+ skx_set_res_cfg(cfg);
+
+ if (!imh_get_tolm_tohm(cfg, &tolm, &tohm))
+ return -ENODEV;
+
+ skx_set_hi_lo(tolm, tohm);
+
+ rc = imh_get_imc_num(cfg);
+ if (rc < 0)
+ goto fail;
+
+ edac_list = skx_get_edac_list();
+
+ rc = imh_get_all_mmio_base_h(cfg, edac_list);
+ if (rc)
+ goto fail;
+
+ rc = imh_get_munits(cfg, edac_list);
+ if (rc)
+ goto fail;
+
+ skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list));
+
+ rc = imh_register_mci(cfg, edac_list);
+ if (rc)
+ goto fail;
+
+ rc = skx_adxl_get();
+ if (rc)
+ goto fail;
+
+ opstate_init();
+ mce_register_decode_chain(&imh_mce_dec);
+ skx_setup_debug("imh_test");
+
+ imh_printk(KERN_INFO, "%s\n", IMH_REVISION);
+
+ return 0;
+fail:
+ skx_remove();
+ return rc;
+}
+
+static void __exit imh_exit(void)
+{
+ edac_dbg(2, "\n");
+
+ skx_teardown_debug();
+ mce_unregister_decode_chain(&imh_mce_dec);
+ skx_adxl_put();
+ skx_remove();
+}
+
+module_init(imh_init);
+module_exit(imh_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Qiuxu Zhuo");
+MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller");
diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c
index 35ceaca578e1..a2caa7fc5412 100644
--- a/drivers/edac/layerscape_edac.c
+++ b/drivers/edac/layerscape_edac.c
@@ -21,6 +21,7 @@
static const struct of_device_id fsl_ddr_mc_err_of_match[] = {
{ .compatible = "fsl,qoriq-memory-controller", },
+ { .compatible = "nxp,imx9-memory-controller", .data = (void *)TYPE_IMX9, },
{},
};
MODULE_DEVICE_TABLE(of, fsl_ddr_mc_err_of_match);
@@ -69,8 +70,8 @@ static void __exit fsl_ddr_mc_exit(void)
module_exit(fsl_ddr_mc_exit);
+MODULE_DESCRIPTION("Freescale Layerscape EDAC driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("NXP Semiconductor");
module_param(edac_op_state, int, 0444);
-MODULE_PARM_DESC(edac_op_state,
- "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
new file mode 100644
index 000000000000..38745800ed01
--- /dev/null
+++ b/drivers/edac/loongson_edac.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited.
+ */
+
+#include <linux/acpi.h>
+#include <linux/edac.h>
+#include <linux/init.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include "edac_module.h"
+
+#define ECC_CS_COUNT_REG 0x18
+
+struct loongson_edac_pvt {
+ void __iomem *ecc_base;
+
+ /*
+ * The ECC register in this controller records the number of errors
+ * encountered since reset and cannot be zeroed so in order to be able
+ * to report the error count at each check, this records the previous
+ * register state.
+ */
+ int last_ce_count;
+};
+
+static int read_ecc(struct mem_ctl_info *mci)
+{
+ struct loongson_edac_pvt *pvt = mci->pvt_info;
+ u64 ecc;
+ int cs;
+
+ ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG);
+ /* cs0 -- cs3 */
+ cs = ecc & 0xff;
+ cs += (ecc >> 8) & 0xff;
+ cs += (ecc >> 16) & 0xff;
+ cs += (ecc >> 24) & 0xff;
+
+ return cs;
+}
+
+static void edac_check(struct mem_ctl_info *mci)
+{
+ struct loongson_edac_pvt *pvt = mci->pvt_info;
+ int new, add;
+
+ new = read_ecc(mci);
+ add = new - pvt->last_ce_count;
+ pvt->last_ce_count = new;
+ if (add <= 0)
+ return;
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
+ 0, 0, 0, 0, 0, -1, "error", "");
+}
+
+static void dimm_config_init(struct mem_ctl_info *mci)
+{
+ struct dimm_info *dimm;
+ u32 size, npages;
+
+ /* size not used */
+ size = -1;
+ npages = MiB_TO_PAGES(size);
+
+ dimm = edac_get_dimm(mci, 0, 0, 0);
+ dimm->nr_pages = npages;
+ snprintf(dimm->label, sizeof(dimm->label),
+ "MC#%uChannel#%u_DIMM#%u", mci->mc_idx, 0, 0);
+ dimm->grain = 8;
+}
+
+static void pvt_init(struct mem_ctl_info *mci, void __iomem *vbase)
+{
+ struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+ pvt->ecc_base = vbase;
+ pvt->last_ce_count = read_ecc(mci);
+}
+
+static int edac_probe(struct platform_device *pdev)
+{
+ struct edac_mc_layer layers[2];
+ struct mem_ctl_info *mci;
+ void __iomem *vbase;
+ int ret;
+
+ vbase = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(vbase))
+ return PTR_ERR(vbase);
+
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(struct loongson_edac_pvt));
+ if (mci == NULL)
+ return -ENOMEM;
+
+ mci->mc_idx = edac_device_alloc_index();
+ mci->mtype_cap = MEM_FLAG_RDDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE;
+ mci->edac_cap = EDAC_FLAG_NONE;
+ mci->mod_name = "loongson_edac.c";
+ mci->ctl_name = "loongson_edac_ctl";
+ mci->dev_name = "loongson_edac_dev";
+ mci->ctl_page_to_phys = NULL;
+ mci->pdev = &pdev->dev;
+ mci->error_desc.grain = 8;
+ mci->edac_check = edac_check;
+
+ pvt_init(mci, vbase);
+ dimm_config_init(mci);
+
+ ret = edac_mc_add_mc(mci);
+ if (ret) {
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+ edac_mc_free(mci);
+ return ret;
+ }
+ edac_op_state = EDAC_OPSTATE_POLL;
+
+ return 0;
+}
+
+static void edac_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
+
+ if (mci)
+ edac_mc_free(mci);
+}
+
+static const struct acpi_device_id loongson_edac_acpi_match[] = {
+ {"LOON0010", 0},
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, loongson_edac_acpi_match);
+
+static struct platform_driver loongson_edac_driver = {
+ .probe = edac_probe,
+ .remove = edac_remove,
+ .driver = {
+ .name = "loongson-mc-edac",
+ .acpi_match_table = loongson_edac_acpi_match,
+ },
+};
+module_platform_driver(loongson_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>");
+MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index cc5c63feb26a..af3c12284a1e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <asm/cpu.h>
+#include <asm/msr.h>
#include "mce_amd.h"
@@ -143,482 +144,6 @@ static const char * const mc6_mce_desc[] = {
"Status Register File",
};
-/* Scalable MCA error strings */
-static const char * const smca_ls_mce_desc[] = {
- "Load queue parity error",
- "Store queue parity error",
- "Miss address buffer payload parity error",
- "Level 1 TLB parity error",
- "DC Tag error type 5",
- "DC Tag error type 6",
- "DC Tag error type 1",
- "Internal error type 1",
- "Internal error type 2",
- "System Read Data Error Thread 0",
- "System Read Data Error Thread 1",
- "DC Tag error type 2",
- "DC Data error type 1 and poison consumption",
- "DC Data error type 2",
- "DC Data error type 3",
- "DC Tag error type 4",
- "Level 2 TLB parity error",
- "PDC parity error",
- "DC Tag error type 3",
- "DC Tag error type 5",
- "L2 Fill Data error",
-};
-
-static const char * const smca_ls2_mce_desc[] = {
- "An ECC error was detected on a data cache read by a probe or victimization",
- "An ECC error or L2 poison was detected on a data cache read by a load",
- "An ECC error was detected on a data cache read-modify-write by a store",
- "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization",
- "An ECC error or poison bit mismatch was detected on a tag read by a load",
- "An ECC error or poison bit mismatch was detected on a tag read by a store",
- "An ECC error was detected on an EMEM read by a load",
- "An ECC error was detected on an EMEM read-modify-write by a store",
- "A parity error was detected in an L1 TLB entry by any access",
- "A parity error was detected in an L2 TLB entry by any access",
- "A parity error was detected in a PWC entry by any access",
- "A parity error was detected in an STQ entry by any access",
- "A parity error was detected in an LDQ entry by any access",
- "A parity error was detected in a MAB entry by any access",
- "A parity error was detected in an SCB entry state field by any access",
- "A parity error was detected in an SCB entry address field by any access",
- "A parity error was detected in an SCB entry data field by any access",
- "A parity error was detected in a WCB entry by any access",
- "A poisoned line was detected in an SCB entry by any access",
- "A SystemReadDataError error was reported on read data returned from L2 for a load",
- "A SystemReadDataError error was reported on read data returned from L2 for an SCB store",
- "A SystemReadDataError error was reported on read data returned from L2 for a WCB store",
- "A hardware assertion error was reported",
- "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access",
-};
-
-static const char * const smca_if_mce_desc[] = {
- "Op Cache Microtag Probe Port Parity Error",
- "IC Microtag or Full Tag Multi-hit Error",
- "IC Full Tag Parity Error",
- "IC Data Array Parity Error",
- "Decoupling Queue PhysAddr Parity Error",
- "L0 ITLB Parity Error",
- "L1 ITLB Parity Error",
- "L2 ITLB Parity Error",
- "BPQ Thread 0 Snoop Parity Error",
- "BPQ Thread 1 Snoop Parity Error",
- "L1 BTB Multi-Match Error",
- "L2 BTB Multi-Match Error",
- "L2 Cache Response Poison Error",
- "System Read Data Error",
- "Hardware Assertion Error",
- "L1-TLB Multi-Hit",
- "L2-TLB Multi-Hit",
- "BSR Parity Error",
- "CT MCE",
-};
-
-static const char * const smca_l2_mce_desc[] = {
- "L2M Tag Multiple-Way-Hit error",
- "L2M Tag or State Array ECC Error",
- "L2M Data Array ECC Error",
- "Hardware Assert Error",
-};
-
-static const char * const smca_de_mce_desc[] = {
- "Micro-op cache tag parity error",
- "Micro-op cache data parity error",
- "Instruction buffer parity error",
- "Micro-op queue parity error",
- "Instruction dispatch queue parity error",
- "Fetch address FIFO parity error",
- "Patch RAM data parity error",
- "Patch RAM sequencer parity error",
- "Micro-op buffer parity error",
- "Hardware Assertion MCA Error",
-};
-
-static const char * const smca_ex_mce_desc[] = {
- "Watchdog Timeout error",
- "Physical register file parity error",
- "Flag register file parity error",
- "Immediate displacement register file parity error",
- "Address generator payload parity error",
- "EX payload parity error",
- "Checkpoint queue parity error",
- "Retire dispatch queue parity error",
- "Retire status queue parity error",
- "Scheduling queue parity error",
- "Branch buffer queue parity error",
- "Hardware Assertion error",
- "Spec Map parity error",
- "Retire Map parity error",
-};
-
-static const char * const smca_fp_mce_desc[] = {
- "Physical register file (PRF) parity error",
- "Freelist (FL) parity error",
- "Schedule queue parity error",
- "NSQ parity error",
- "Retire queue (RQ) parity error",
- "Status register file (SRF) parity error",
- "Hardware assertion",
-};
-
-static const char * const smca_l3_mce_desc[] = {
- "Shadow Tag Macro ECC Error",
- "Shadow Tag Macro Multi-way-hit Error",
- "L3M Tag ECC Error",
- "L3M Tag Multi-way-hit Error",
- "L3M Data ECC Error",
- "SDP Parity Error or SystemReadDataError from XI",
- "L3 Victim Queue Parity Error",
- "L3 Hardware Assertion",
-};
-
-static const char * const smca_cs_mce_desc[] = {
- "Illegal Request",
- "Address Violation",
- "Security Violation",
- "Illegal Response",
- "Unexpected Response",
- "Request or Probe Parity Error",
- "Read Response Parity Error",
- "Atomic Request Parity Error",
- "Probe Filter ECC Error",
-};
-
-static const char * const smca_cs2_mce_desc[] = {
- "Illegal Request",
- "Address Violation",
- "Security Violation",
- "Illegal Response",
- "Unexpected Response",
- "Request or Probe Parity Error",
- "Read Response Parity Error",
- "Atomic Request Parity Error",
- "SDP read response had no match in the CS queue",
- "Probe Filter Protocol Error",
- "Probe Filter ECC Error",
- "SDP read response had an unexpected RETRY error",
- "Counter overflow error",
- "Counter underflow error",
-};
-
-static const char * const smca_pie_mce_desc[] = {
- "Hardware Assert",
- "Register security violation",
- "Link Error",
- "Poison data consumption",
- "A deferred error was detected in the DF"
-};
-
-static const char * const smca_umc_mce_desc[] = {
- "DRAM ECC error",
- "Data poison error",
- "SDP parity error",
- "Advanced peripheral bus error",
- "Address/Command parity error",
- "Write data CRC error",
- "DCQ SRAM ECC error",
- "AES SRAM ECC error",
-};
-
-static const char * const smca_umc2_mce_desc[] = {
- "DRAM ECC error",
- "Data poison error",
- "SDP parity error",
- "Reserved",
- "Address/Command parity error",
- "Write data parity error",
- "DCQ SRAM ECC error",
- "Reserved",
- "Read data parity error",
- "Rdb SRAM ECC error",
- "RdRsp SRAM ECC error",
- "LM32 MP errors",
-};
-
-static const char * const smca_pb_mce_desc[] = {
- "An ECC error in the Parameter Block RAM array",
-};
-
-static const char * const smca_psp_mce_desc[] = {
- "An ECC or parity error in a PSP RAM instance",
-};
-
-static const char * const smca_psp2_mce_desc[] = {
- "High SRAM ECC or parity error",
- "Low SRAM ECC or parity error",
- "Instruction Cache Bank 0 ECC or parity error",
- "Instruction Cache Bank 1 ECC or parity error",
- "Instruction Tag Ram 0 parity error",
- "Instruction Tag Ram 1 parity error",
- "Data Cache Bank 0 ECC or parity error",
- "Data Cache Bank 1 ECC or parity error",
- "Data Cache Bank 2 ECC or parity error",
- "Data Cache Bank 3 ECC or parity error",
- "Data Tag Bank 0 parity error",
- "Data Tag Bank 1 parity error",
- "Data Tag Bank 2 parity error",
- "Data Tag Bank 3 parity error",
- "Dirty Data Ram parity error",
- "TLB Bank 0 parity error",
- "TLB Bank 1 parity error",
- "System Hub Read Buffer ECC or parity error",
-};
-
-static const char * const smca_smu_mce_desc[] = {
- "An ECC or parity error in an SMU RAM instance",
-};
-
-static const char * const smca_smu2_mce_desc[] = {
- "High SRAM ECC or parity error",
- "Low SRAM ECC or parity error",
- "Data Cache Bank A ECC or parity error",
- "Data Cache Bank B ECC or parity error",
- "Data Tag Cache Bank A ECC or parity error",
- "Data Tag Cache Bank B ECC or parity error",
- "Instruction Cache Bank A ECC or parity error",
- "Instruction Cache Bank B ECC or parity error",
- "Instruction Tag Cache Bank A ECC or parity error",
- "Instruction Tag Cache Bank B ECC or parity error",
- "System Hub Read Buffer ECC or parity error",
- "PHY RAM ECC error",
-};
-
-static const char * const smca_mp5_mce_desc[] = {
- "High SRAM ECC or parity error",
- "Low SRAM ECC or parity error",
- "Data Cache Bank A ECC or parity error",
- "Data Cache Bank B ECC or parity error",
- "Data Tag Cache Bank A ECC or parity error",
- "Data Tag Cache Bank B ECC or parity error",
- "Instruction Cache Bank A ECC or parity error",
- "Instruction Cache Bank B ECC or parity error",
- "Instruction Tag Cache Bank A ECC or parity error",
- "Instruction Tag Cache Bank B ECC or parity error",
-};
-
-static const char * const smca_mpdma_mce_desc[] = {
- "Main SRAM [31:0] bank ECC or parity error",
- "Main SRAM [63:32] bank ECC or parity error",
- "Main SRAM [95:64] bank ECC or parity error",
- "Main SRAM [127:96] bank ECC or parity error",
- "Data Cache Bank A ECC or parity error",
- "Data Cache Bank B ECC or parity error",
- "Data Tag Cache Bank A ECC or parity error",
- "Data Tag Cache Bank B ECC or parity error",
- "Instruction Cache Bank A ECC or parity error",
- "Instruction Cache Bank B ECC or parity error",
- "Instruction Tag Cache Bank A ECC or parity error",
- "Instruction Tag Cache Bank B ECC or parity error",
- "Data Cache Bank A ECC or parity error",
- "Data Cache Bank B ECC or parity error",
- "Data Tag Cache Bank A ECC or parity error",
- "Data Tag Cache Bank B ECC or parity error",
- "Instruction Cache Bank A ECC or parity error",
- "Instruction Cache Bank B ECC or parity error",
- "Instruction Tag Cache Bank A ECC or parity error",
- "Instruction Tag Cache Bank B ECC or parity error",
- "Data Cache Bank A ECC or parity error",
- "Data Cache Bank B ECC or parity error",
- "Data Tag Cache Bank A ECC or parity error",
- "Data Tag Cache Bank B ECC or parity error",
- "Instruction Cache Bank A ECC or parity error",
- "Instruction Cache Bank B ECC or parity error",
- "Instruction Tag Cache Bank A ECC or parity error",
- "Instruction Tag Cache Bank B ECC or parity error",
- "System Hub Read Buffer ECC or parity error",
- "MPDMA TVF DVSEC Memory ECC or parity error",
- "MPDMA TVF MMIO Mailbox0 ECC or parity error",
- "MPDMA TVF MMIO Mailbox1 ECC or parity error",
- "MPDMA TVF Doorbell Memory ECC or parity error",
- "MPDMA TVF SDP Slave Memory 0 ECC or parity error",
- "MPDMA TVF SDP Slave Memory 1 ECC or parity error",
- "MPDMA TVF SDP Slave Memory 2 ECC or parity error",
- "MPDMA TVF SDP Master Memory 0 ECC or parity error",
- "MPDMA TVF SDP Master Memory 1 ECC or parity error",
- "MPDMA TVF SDP Master Memory 2 ECC or parity error",
- "MPDMA TVF SDP Master Memory 3 ECC or parity error",
- "MPDMA TVF SDP Master Memory 4 ECC or parity error",
- "MPDMA TVF SDP Master Memory 5 ECC or parity error",
- "MPDMA TVF SDP Master Memory 6 ECC or parity error",
- "MPDMA PTE Command FIFO ECC or parity error",
- "MPDMA PTE Hub Data FIFO ECC or parity error",
- "MPDMA PTE Internal Data FIFO ECC or parity error",
- "MPDMA PTE Command Memory DMA ECC or parity error",
- "MPDMA PTE Command Memory Internal ECC or parity error",
- "MPDMA PTE DMA Completion FIFO ECC or parity error",
- "MPDMA PTE Tablewalk Completion FIFO ECC or parity error",
- "MPDMA PTE Descriptor Completion FIFO ECC or parity error",
- "MPDMA PTE ReadOnly Completion FIFO ECC or parity error",
- "MPDMA PTE DirectWrite Completion FIFO ECC or parity error",
- "SDP Watchdog Timer expired",
-};
-
-static const char * const smca_nbio_mce_desc[] = {
- "ECC or Parity error",
- "PCIE error",
- "SDP ErrEvent error",
- "SDP Egress Poison Error",
- "IOHC Internal Poison Error",
-};
-
-static const char * const smca_pcie_mce_desc[] = {
- "CCIX PER Message logging",
- "CCIX Read Response with Status: Non-Data Error",
- "CCIX Write Response with Status: Non-Data Error",
- "CCIX Read Response with Status: Data Error",
- "CCIX Non-okay write response with data error",
-};
-
-static const char * const smca_pcie2_mce_desc[] = {
- "SDP Parity Error logging",
-};
-
-static const char * const smca_xgmipcs_mce_desc[] = {
- "Data Loss Error",
- "Training Error",
- "Flow Control Acknowledge Error",
- "Rx Fifo Underflow Error",
- "Rx Fifo Overflow Error",
- "CRC Error",
- "BER Exceeded Error",
- "Tx Vcid Data Error",
- "Replay Buffer Parity Error",
- "Data Parity Error",
- "Replay Fifo Overflow Error",
- "Replay Fifo Underflow Error",
- "Elastic Fifo Overflow Error",
- "Deskew Error",
- "Flow Control CRC Error",
- "Data Startup Limit Error",
- "FC Init Timeout Error",
- "Recovery Timeout Error",
- "Ready Serial Timeout Error",
- "Ready Serial Attempt Error",
- "Recovery Attempt Error",
- "Recovery Relock Attempt Error",
- "Replay Attempt Error",
- "Sync Header Error",
- "Tx Replay Timeout Error",
- "Rx Replay Timeout Error",
- "LinkSub Tx Timeout Error",
- "LinkSub Rx Timeout Error",
- "Rx CMD Packet Error",
-};
-
-static const char * const smca_xgmiphy_mce_desc[] = {
- "RAM ECC Error",
- "ARC instruction buffer parity error",
- "ARC data buffer parity error",
- "PHY APB error",
-};
-
-static const char * const smca_nbif_mce_desc[] = {
- "Timeout error from GMI",
- "SRAM ECC error",
- "NTB Error Event",
- "SDP Parity error",
-};
-
-static const char * const smca_sata_mce_desc[] = {
- "Parity error for port 0",
- "Parity error for port 1",
- "Parity error for port 2",
- "Parity error for port 3",
- "Parity error for port 4",
- "Parity error for port 5",
- "Parity error for port 6",
- "Parity error for port 7",
-};
-
-static const char * const smca_usb_mce_desc[] = {
- "Parity error or ECC error for S0 RAM0",
- "Parity error or ECC error for S0 RAM1",
- "Parity error or ECC error for S0 RAM2",
- "Parity error for PHY RAM0",
- "Parity error for PHY RAM1",
- "AXI Slave Response error",
-};
-
-static const char * const smca_gmipcs_mce_desc[] = {
- "Data Loss Error",
- "Training Error",
- "Replay Parity Error",
- "Rx Fifo Underflow Error",
- "Rx Fifo Overflow Error",
- "CRC Error",
- "BER Exceeded Error",
- "Tx Fifo Underflow Error",
- "Replay Buffer Parity Error",
- "Tx Overflow Error",
- "Replay Fifo Overflow Error",
- "Replay Fifo Underflow Error",
- "Elastic Fifo Overflow Error",
- "Deskew Error",
- "Offline Error",
- "Data Startup Limit Error",
- "FC Init Timeout Error",
- "Recovery Timeout Error",
- "Ready Serial Timeout Error",
- "Ready Serial Attempt Error",
- "Recovery Attempt Error",
- "Recovery Relock Attempt Error",
- "Deskew Abort Error",
- "Rx Buffer Error",
- "Rx LFDS Fifo Overflow Error",
- "Rx LFDS Fifo Underflow Error",
- "LinkSub Tx Timeout Error",
- "LinkSub Rx Timeout Error",
- "Rx CMD Packet Error",
- "LFDS Training Timeout Error",
- "LFDS FC Init Timeout Error",
- "Data Loss Error",
-};
-
-struct smca_mce_desc {
- const char * const *descs;
- unsigned int num_descs;
-};
-
-static struct smca_mce_desc smca_mce_descs[] = {
- [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
- [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) },
- [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
- [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
- [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
- [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
- [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
- [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
- [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
- [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
- [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
- [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
- [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) },
- [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
- [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
- [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
- [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
- [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
- [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
- [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) },
- [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
- [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
- [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
- [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
- /* NBIF and SHUB have the same error descriptions, for now. */
- [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
- [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
- [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) },
- [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) },
- [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) },
- /* All the PHY bank types have the same error descriptions, for now. */
- [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
- [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
- [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
-};
-
static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
@@ -1060,7 +585,7 @@ static void decode_mc3_mce(struct mce *m)
static void decode_mc4_mce(struct mce *m)
{
unsigned int fam = x86_family(m->cpuid);
- int node_id = topology_die_id(m->extcpu);
+ int node_id = topology_amd_node_id(m->extcpu);
u16 ec = EC(m->status);
u8 xec = XEC(m->status, 0x1f);
u8 offset = 0;
@@ -1163,11 +688,51 @@ static void decode_mc6_mce(struct mce *m)
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
}
+static const char * const smca_long_names[] = {
+ [SMCA_LS ... SMCA_LS_V2] = "Load Store Unit",
+ [SMCA_IF] = "Instruction Fetch Unit",
+ [SMCA_L2_CACHE] = "L2 Cache",
+ [SMCA_DE] = "Decode Unit",
+ [SMCA_RESERVED] = "Reserved",
+ [SMCA_EX] = "Execution Unit",
+ [SMCA_FP] = "Floating Point Unit",
+ [SMCA_L3_CACHE] = "L3 Cache",
+ [SMCA_CS ... SMCA_CS_V2] = "Coherent Slave",
+ [SMCA_PIE] = "Power, Interrupts, etc.",
+
+ /* UMC v2 is separate because both of them can exist in a single system. */
+ [SMCA_UMC] = "Unified Memory Controller",
+ [SMCA_UMC_V2] = "Unified Memory Controller v2",
+ [SMCA_PB] = "Parameter Block",
+ [SMCA_PSP ... SMCA_PSP_V2] = "Platform Security Processor",
+ [SMCA_SMU ... SMCA_SMU_V2] = "System Management Unit",
+ [SMCA_MP5] = "Microprocessor 5 Unit",
+ [SMCA_MPDMA] = "MPDMA Unit",
+ [SMCA_NBIO] = "Northbridge IO Unit",
+ [SMCA_PCIE ... SMCA_PCIE_V2] = "PCI Express Unit",
+ [SMCA_XGMI_PCS] = "Ext Global Memory Interconnect PCS Unit",
+ [SMCA_NBIF] = "NBIF Unit",
+ [SMCA_SHUB] = "System Hub Unit",
+ [SMCA_SATA] = "SATA Unit",
+ [SMCA_USB] = "USB Unit",
+ [SMCA_GMI_PCS] = "Global Memory Interconnect PCS Unit",
+ [SMCA_XGMI_PHY] = "Ext Global Memory Interconnect PHY Unit",
+ [SMCA_WAFL_PHY] = "WAFL PHY Unit",
+ [SMCA_GMI_PHY] = "Global Memory Interconnect PHY Unit",
+};
+
+static const char *smca_get_long_name(enum smca_bank_types t)
+{
+ if (t >= N_SMCA_BANK_TYPES)
+ return NULL;
+
+ return smca_long_names[t];
+}
+
/* Decode errors according to Scalable MCA specification */
static void decode_smca_error(struct mce *m)
{
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
- const char *ip_name;
u8 xec = XEC(m->status, xec_mask);
if (bank_type >= N_SMCA_BANK_TYPES)
@@ -1178,16 +743,11 @@ static void decode_smca_error(struct mce *m)
return;
}
- ip_name = smca_get_long_name(bank_type);
-
- pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
+ pr_emerg(HW_ERR "%s Ext. Error Code: %d", smca_get_long_name(bank_type), xec);
- /* Only print the decode of valid error codes */
- if (xec < smca_mce_descs[bank_type].num_descs)
- pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
-
- if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
- decode_dram_ecc(topology_die_id(m->extcpu), m);
+ if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
+ xec == 0 && decode_dram_ecc)
+ decode_dram_ecc(topology_amd_node_id(m->extcpu), m);
}
static inline void amd_decode_err_code(u16 ec)
@@ -1234,7 +794,9 @@ static int
amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{
struct mce *m = (struct mce *)data;
+ struct mce_hw_err *err = to_mce_hw_err(m);
unsigned int fam = x86_family(m->cpuid);
+ u32 mca_config_lo = 0, dummy;
int ecc;
if (m->kflags & MCE_HANDLED_CEC)
@@ -1254,11 +816,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
if (boot_cpu_has(X86_FEATURE_SMCA)) {
- u32 low, high;
- u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+ rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(m->bank), &mca_config_lo, &dummy);
- if (!rdmsr_safe(addr, &low, &high) &&
- (low & MCI_CONFIG_MCAX))
+ if (mca_config_lo & MCI_CONFIG_MCAX)
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
@@ -1291,8 +851,18 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (boot_cpu_has(X86_FEATURE_SMCA)) {
pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
- if (m->status & MCI_STATUS_SYNDV)
- pr_cont(", Syndrome: 0x%016llx", m->synd);
+ if (m->status & MCI_STATUS_SYNDV) {
+ pr_cont(", Syndrome: 0x%016llx\n", m->synd);
+ if (mca_config_lo & MCI_CONFIG_FRUTEXT) {
+ char frutext[17];
+
+ frutext[16] = '\0';
+ memcpy(&frutext[0], &err->vendor.amd.synd1, 8);
+ memcpy(&frutext[8], &err->vendor.amd.synd2, 8);
+
+ pr_emerg(HW_ERR "FRU Text: %s", frutext);
+ }
+ }
pr_cont("\n");
diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c
new file mode 100644
index 000000000000..108d69209146
--- /dev/null
+++ b/drivers/edac/mem_repair.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The generic EDAC memory repair driver is designed to control the memory
+ * devices with memory repair features, such as Post Package Repair (PPR),
+ * memory sparing etc. The common sysfs memory repair interface abstracts
+ * the control of various arbitrary memory repair functionalities into a
+ * unified set of functions.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ */
+
+#include <linux/edac.h>
+
+enum edac_mem_repair_attributes {
+ MR_TYPE,
+ MR_PERSIST_MODE,
+ MR_SAFE_IN_USE,
+ MR_HPA,
+ MR_MIN_HPA,
+ MR_MAX_HPA,
+ MR_DPA,
+ MR_MIN_DPA,
+ MR_MAX_DPA,
+ MR_NIBBLE_MASK,
+ MR_BANK_GROUP,
+ MR_BANK,
+ MR_RANK,
+ MR_ROW,
+ MR_COLUMN,
+ MR_CHANNEL,
+ MR_SUB_CHANNEL,
+ MEM_DO_REPAIR,
+ MR_MAX_ATTRS
+};
+
+struct edac_mem_repair_dev_attr {
+ struct device_attribute dev_attr;
+ u8 instance;
+};
+
+struct edac_mem_repair_context {
+ char name[EDAC_FEAT_NAME_LEN];
+ struct edac_mem_repair_dev_attr mem_repair_dev_attr[MR_MAX_ATTRS];
+ struct attribute *mem_repair_attrs[MR_MAX_ATTRS + 1];
+ struct attribute_group group;
+};
+
+const char * const edac_repair_type[] = {
+ [EDAC_REPAIR_PPR] = "ppr",
+ [EDAC_REPAIR_CACHELINE_SPARING] = "cacheline-sparing",
+ [EDAC_REPAIR_ROW_SPARING] = "row-sparing",
+ [EDAC_REPAIR_BANK_SPARING] = "bank-sparing",
+ [EDAC_REPAIR_RANK_SPARING] = "rank-sparing",
+};
+EXPORT_SYMBOL_GPL(edac_repair_type);
+
+#define TO_MR_DEV_ATTR(_dev_attr) \
+ container_of(_dev_attr, struct edac_mem_repair_dev_attr, dev_attr)
+
+#define MR_ATTR_SHOW(attrib, cb, type, format) \
+static ssize_t attrib##_show(struct device *ras_feat_dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ u8 inst = TO_MR_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_mem_repair_ops *ops = \
+ ctx->mem_repair[inst].mem_repair_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \
+ &data); \
+ if (ret) \
+ return ret; \
+ \
+ return sysfs_emit(buf, format, data); \
+}
+
+MR_ATTR_SHOW(repair_type, get_repair_type, const char *, "%s\n")
+MR_ATTR_SHOW(persist_mode, get_persist_mode, bool, "%u\n")
+MR_ATTR_SHOW(repair_safe_when_in_use, get_repair_safe_when_in_use, bool, "%u\n")
+MR_ATTR_SHOW(hpa, get_hpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(min_hpa, get_min_hpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(max_hpa, get_max_hpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n")
+MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n")
+MR_ATTR_SHOW(bank, get_bank, u32, "%u\n")
+MR_ATTR_SHOW(rank, get_rank, u32, "%u\n")
+MR_ATTR_SHOW(row, get_row, u32, "0x%x\n")
+MR_ATTR_SHOW(column, get_column, u32, "%u\n")
+MR_ATTR_SHOW(channel, get_channel, u32, "%u\n")
+MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n")
+
+#define MR_ATTR_STORE(attrib, cb, type, conv_func) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ u8 inst = TO_MR_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_mem_repair_ops *ops = \
+ ctx->mem_repair[inst].mem_repair_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = conv_func(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \
+ data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul)
+MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64)
+MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64)
+MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul)
+MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul)
+MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul)
+MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul)
+MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul)
+MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul)
+MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul)
+MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul)
+
+#define MR_DO_OP(attrib, cb) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ u8 inst = TO_MR_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; \
+ unsigned long data; \
+ int ret; \
+ \
+ ret = kstrtoul(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+MR_DO_OP(repair, do_repair)
+
+static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id)
+{
+ struct device *ras_feat_dev = kobj_to_dev(kobj);
+ struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr);
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ u8 inst = TO_MR_DEV_ATTR(dev_attr)->instance;
+ const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops;
+
+ switch (attr_id) {
+ case MR_TYPE:
+ if (ops->get_repair_type)
+ return a->mode;
+ break;
+ case MR_PERSIST_MODE:
+ if (ops->get_persist_mode) {
+ if (ops->set_persist_mode)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_SAFE_IN_USE:
+ if (ops->get_repair_safe_when_in_use)
+ return a->mode;
+ break;
+ case MR_HPA:
+ if (ops->get_hpa) {
+ if (ops->set_hpa)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_MIN_HPA:
+ if (ops->get_min_hpa)
+ return a->mode;
+ break;
+ case MR_MAX_HPA:
+ if (ops->get_max_hpa)
+ return a->mode;
+ break;
+ case MR_DPA:
+ if (ops->get_dpa) {
+ if (ops->set_dpa)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_MIN_DPA:
+ if (ops->get_min_dpa)
+ return a->mode;
+ break;
+ case MR_MAX_DPA:
+ if (ops->get_max_dpa)
+ return a->mode;
+ break;
+ case MR_NIBBLE_MASK:
+ if (ops->get_nibble_mask) {
+ if (ops->set_nibble_mask)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_BANK_GROUP:
+ if (ops->get_bank_group) {
+ if (ops->set_bank_group)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_BANK:
+ if (ops->get_bank) {
+ if (ops->set_bank)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_RANK:
+ if (ops->get_rank) {
+ if (ops->set_rank)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_ROW:
+ if (ops->get_row) {
+ if (ops->set_row)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_COLUMN:
+ if (ops->get_column) {
+ if (ops->set_column)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_CHANNEL:
+ if (ops->get_channel) {
+ if (ops->set_channel)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_SUB_CHANNEL:
+ if (ops->get_sub_channel) {
+ if (ops->set_sub_channel)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MEM_DO_REPAIR:
+ if (ops->do_repair)
+ return a->mode;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct device_attribute mem_repair_dev_attr[] = {
+ [MR_TYPE] = __ATTR_RO(repair_type),
+ [MR_PERSIST_MODE] = __ATTR_RW(persist_mode),
+ [MR_SAFE_IN_USE] = __ATTR_RO(repair_safe_when_in_use),
+ [MR_HPA] = __ATTR_RW(hpa),
+ [MR_MIN_HPA] = __ATTR_RO(min_hpa),
+ [MR_MAX_HPA] = __ATTR_RO(max_hpa),
+ [MR_DPA] = __ATTR_RW(dpa),
+ [MR_MIN_DPA] = __ATTR_RO(min_dpa),
+ [MR_MAX_DPA] = __ATTR_RO(max_dpa),
+ [MR_NIBBLE_MASK] = __ATTR_RW(nibble_mask),
+ [MR_BANK_GROUP] = __ATTR_RW(bank_group),
+ [MR_BANK] = __ATTR_RW(bank),
+ [MR_RANK] = __ATTR_RW(rank),
+ [MR_ROW] = __ATTR_RW(row),
+ [MR_COLUMN] = __ATTR_RW(column),
+ [MR_CHANNEL] = __ATTR_RW(channel),
+ [MR_SUB_CHANNEL] = __ATTR_RW(sub_channel),
+ [MEM_DO_REPAIR] = __ATTR_WO(repair)
+};
+
+static int mem_repair_create_desc(struct device *dev,
+ const struct attribute_group **attr_groups,
+ u8 instance)
+{
+ struct edac_mem_repair_context *ctx;
+ struct attribute_group *group;
+ int i;
+ ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ for (i = 0; i < MR_MAX_ATTRS; i++) {
+ ctx->mem_repair_dev_attr[i].dev_attr = mem_repair_dev_attr[i];
+ ctx->mem_repair_dev_attr[i].instance = instance;
+ sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr);
+ ctx->mem_repair_attrs[i] =
+ &ctx->mem_repair_dev_attr[i].dev_attr.attr;
+ }
+
+ sprintf(ctx->name, "%s%d", "mem_repair", instance);
+ group = &ctx->group;
+ group->name = ctx->name;
+ group->attrs = ctx->mem_repair_attrs;
+ group->is_visible = mem_repair_attr_visible;
+ attr_groups[0] = group;
+
+ return 0;
+}
+
+/**
+ * edac_mem_repair_get_desc - get EDAC memory repair descriptors
+ * @dev: client device with memory repair feature
+ * @attr_groups: pointer to attribute group container
+ * @instance: device's memory repair instance number.
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ */
+int edac_mem_repair_get_desc(struct device *dev,
+ const struct attribute_group **attr_groups, u8 instance)
+{
+ if (!dev || !attr_groups)
+ return -EINVAL;
+
+ return mem_repair_create_desc(dev, attr_groups, instance);
+}
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index e50d7928bf8f..a45dc6b35ede 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -22,8 +22,7 @@
#include <linux/gfp.h>
#include <linux/fsl/edac.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include "edac_module.h"
@@ -301,7 +300,7 @@ err:
return res;
}
-static int mpc85xx_pci_err_remove(struct platform_device *op)
+static void mpc85xx_pci_err_remove(struct platform_device *op)
{
struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
@@ -313,8 +312,6 @@ static int mpc85xx_pci_err_remove(struct platform_device *op)
edac_pci_del_device(&op->dev);
edac_pci_free_ctl_info(pci);
-
- return 0;
}
static const struct platform_device_id mpc85xx_pci_err_match[] = {
@@ -499,7 +496,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op)
return -ENOMEM;
edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
- "cpu", 1, "L", 1, 2, NULL, 0,
+ "cpu", 1, "L", 1, 2,
edac_dev_idx);
if (!edac_dev) {
devres_release_group(&op->dev, mpc85xx_l2_err_probe);
@@ -592,7 +589,7 @@ err:
return res;
}
-static int mpc85xx_l2_err_remove(struct platform_device *op)
+static void mpc85xx_l2_err_remove(struct platform_device *op)
{
struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&op->dev);
struct mpc85xx_l2_pdata *pdata = edac_dev->pvt_info;
@@ -607,7 +604,6 @@ static int mpc85xx_l2_err_remove(struct platform_device *op)
out_be32(pdata->l2_vbase + MPC85XX_L2_ERRDIS, orig_l2_err_disable);
edac_device_del_device(&op->dev);
edac_device_free_ctl_info(edac_dev);
- return 0;
}
static const struct of_device_id mpc85xx_l2_err_of_match[] = {
@@ -708,8 +704,8 @@ static void __exit mpc85xx_mc_exit(void)
module_exit(mpc85xx_mc_exit);
+MODULE_DESCRIPTION("Freescale MPC85xx Memory Controller EDAC driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Montavista Software, Inc.");
module_param(edac_op_state, int, 0444);
-MODULE_PARM_DESC(edac_op_state,
- "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
diff --git a/drivers/edac/npcm_edac.c b/drivers/edac/npcm_edac.c
new file mode 100644
index 000000000000..e60a99eb8cfb
--- /dev/null
+++ b/drivers/edac/npcm_edac.c
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2022 Nuvoton Technology Corporation
+
+#include <linux/debugfs.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include "edac_module.h"
+
+#define EDAC_MOD_NAME "npcm-edac"
+#define EDAC_MSG_SIZE 256
+
+/* chip serials */
+#define NPCM7XX_CHIP BIT(0)
+#define NPCM8XX_CHIP BIT(1)
+
+/* syndrome values */
+#define UE_SYNDROME 0x03
+
+/* error injection */
+#define ERROR_TYPE_CORRECTABLE 0
+#define ERROR_TYPE_UNCORRECTABLE 1
+#define ERROR_LOCATION_DATA 0
+#define ERROR_LOCATION_CHECKCODE 1
+#define ERROR_BIT_DATA_MAX 63
+#define ERROR_BIT_CHECKCODE_MAX 7
+
+static char data_synd[] = {
+ 0xf4, 0xf1, 0xec, 0xea, 0xe9, 0xe6, 0xe5, 0xe3,
+ 0xdc, 0xda, 0xd9, 0xd6, 0xd5, 0xd3, 0xce, 0xcb,
+ 0xb5, 0xb0, 0xad, 0xab, 0xa8, 0xa7, 0xa4, 0xa2,
+ 0x9d, 0x9b, 0x98, 0x97, 0x94, 0x92, 0x8f, 0x8a,
+ 0x75, 0x70, 0x6d, 0x6b, 0x68, 0x67, 0x64, 0x62,
+ 0x5e, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x4f, 0x4a,
+ 0x34, 0x31, 0x2c, 0x2a, 0x29, 0x26, 0x25, 0x23,
+ 0x1c, 0x1a, 0x19, 0x16, 0x15, 0x13, 0x0e, 0x0b
+};
+
+static struct regmap *npcm_regmap;
+
+struct npcm_platform_data {
+ /* chip serials */
+ int chip;
+
+ /* memory controller registers */
+ u32 ctl_ecc_en;
+ u32 ctl_int_status;
+ u32 ctl_int_ack;
+ u32 ctl_int_mask_master;
+ u32 ctl_int_mask_ecc;
+ u32 ctl_ce_addr_l;
+ u32 ctl_ce_addr_h;
+ u32 ctl_ce_data_l;
+ u32 ctl_ce_data_h;
+ u32 ctl_ce_synd;
+ u32 ctl_ue_addr_l;
+ u32 ctl_ue_addr_h;
+ u32 ctl_ue_data_l;
+ u32 ctl_ue_data_h;
+ u32 ctl_ue_synd;
+ u32 ctl_source_id;
+ u32 ctl_controller_busy;
+ u32 ctl_xor_check_bits;
+
+ /* masks and shifts */
+ u32 ecc_en_mask;
+ u32 int_status_ce_mask;
+ u32 int_status_ue_mask;
+ u32 int_ack_ce_mask;
+ u32 int_ack_ue_mask;
+ u32 int_mask_master_non_ecc_mask;
+ u32 int_mask_master_global_mask;
+ u32 int_mask_ecc_non_event_mask;
+ u32 ce_addr_h_mask;
+ u32 ce_synd_mask;
+ u32 ce_synd_shift;
+ u32 ue_addr_h_mask;
+ u32 ue_synd_mask;
+ u32 ue_synd_shift;
+ u32 source_id_ce_mask;
+ u32 source_id_ce_shift;
+ u32 source_id_ue_mask;
+ u32 source_id_ue_shift;
+ u32 controller_busy_mask;
+ u32 xor_check_bits_mask;
+ u32 xor_check_bits_shift;
+ u32 writeback_en_mask;
+ u32 fwc_mask;
+};
+
+struct priv_data {
+ void __iomem *reg;
+ char message[EDAC_MSG_SIZE];
+ const struct npcm_platform_data *pdata;
+
+ /* error injection */
+ struct dentry *debugfs;
+ u8 error_type;
+ u8 location;
+ u8 bit;
+};
+
+static void handle_ce(struct mem_ctl_info *mci)
+{
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+ u32 val_h = 0, val_l, id, synd;
+ u64 addr = 0, data = 0;
+
+ pdata = priv->pdata;
+ regmap_read(npcm_regmap, pdata->ctl_ce_addr_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP) {
+ regmap_read(npcm_regmap, pdata->ctl_ce_addr_h, &val_h);
+ val_h &= pdata->ce_addr_h_mask;
+ }
+ addr = ((addr | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_ce_data_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP)
+ regmap_read(npcm_regmap, pdata->ctl_ce_data_h, &val_h);
+ data = ((data | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_source_id, &id);
+ id = (id & pdata->source_id_ce_mask) >> pdata->source_id_ce_shift;
+
+ regmap_read(npcm_regmap, pdata->ctl_ce_synd, &synd);
+ synd = (synd & pdata->ce_synd_mask) >> pdata->ce_synd_shift;
+
+ snprintf(priv->message, EDAC_MSG_SIZE,
+ "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id);
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, addr >> PAGE_SHIFT,
+ addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, "");
+}
+
+static void handle_ue(struct mem_ctl_info *mci)
+{
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+ u32 val_h = 0, val_l, id, synd;
+ u64 addr = 0, data = 0;
+
+ pdata = priv->pdata;
+ regmap_read(npcm_regmap, pdata->ctl_ue_addr_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP) {
+ regmap_read(npcm_regmap, pdata->ctl_ue_addr_h, &val_h);
+ val_h &= pdata->ue_addr_h_mask;
+ }
+ addr = ((addr | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_ue_data_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP)
+ regmap_read(npcm_regmap, pdata->ctl_ue_data_h, &val_h);
+ data = ((data | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_source_id, &id);
+ id = (id & pdata->source_id_ue_mask) >> pdata->source_id_ue_shift;
+
+ regmap_read(npcm_regmap, pdata->ctl_ue_synd, &synd);
+ synd = (synd & pdata->ue_synd_mask) >> pdata->ue_synd_shift;
+
+ snprintf(priv->message, EDAC_MSG_SIZE,
+ "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id);
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, addr >> PAGE_SHIFT,
+ addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, "");
+}
+
+static irqreturn_t edac_ecc_isr(int irq, void *dev_id)
+{
+ const struct npcm_platform_data *pdata;
+ struct mem_ctl_info *mci = dev_id;
+ u32 status;
+
+ pdata = ((struct priv_data *)mci->pvt_info)->pdata;
+ regmap_read(npcm_regmap, pdata->ctl_int_status, &status);
+ if (status & pdata->int_status_ce_mask) {
+ handle_ce(mci);
+
+ /* acknowledge the CE interrupt */
+ regmap_write(npcm_regmap, pdata->ctl_int_ack,
+ pdata->int_ack_ce_mask);
+ return IRQ_HANDLED;
+ } else if (status & pdata->int_status_ue_mask) {
+ handle_ue(mci);
+
+ /* acknowledge the UE interrupt */
+ regmap_write(npcm_regmap, pdata->ctl_int_ack,
+ pdata->int_ack_ue_mask);
+ return IRQ_HANDLED;
+ }
+
+ WARN_ON_ONCE(1);
+ return IRQ_NONE;
+}
+
+static ssize_t force_ecc_error(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct device *dev = file->private_data;
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+ u32 val, syndrome;
+ int ret;
+
+ pdata = priv->pdata;
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "force an ECC error, type = %d, location = %d, bit = %d\n",
+ priv->error_type, priv->location, priv->bit);
+
+ /* ensure no pending writes */
+ ret = regmap_read_poll_timeout(npcm_regmap, pdata->ctl_controller_busy,
+ val, !(val & pdata->controller_busy_mask),
+ 1000, 10000);
+ if (ret) {
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "wait pending writes timeout\n");
+ return count;
+ }
+
+ regmap_read(npcm_regmap, pdata->ctl_xor_check_bits, &val);
+ val &= ~pdata->xor_check_bits_mask;
+
+ /* write syndrome to XOR_CHECK_BITS */
+ if (priv->error_type == ERROR_TYPE_CORRECTABLE) {
+ if (priv->location == ERROR_LOCATION_DATA &&
+ priv->bit > ERROR_BIT_DATA_MAX) {
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "data bit should not exceed %d (%d)\n",
+ ERROR_BIT_DATA_MAX, priv->bit);
+ return count;
+ }
+
+ if (priv->location == ERROR_LOCATION_CHECKCODE &&
+ priv->bit > ERROR_BIT_CHECKCODE_MAX) {
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "checkcode bit should not exceed %d (%d)\n",
+ ERROR_BIT_CHECKCODE_MAX, priv->bit);
+ return count;
+ }
+
+ syndrome = priv->location ? 1 << priv->bit
+ : data_synd[priv->bit];
+
+ regmap_write(npcm_regmap, pdata->ctl_xor_check_bits,
+ val | (syndrome << pdata->xor_check_bits_shift) |
+ pdata->writeback_en_mask);
+ } else if (priv->error_type == ERROR_TYPE_UNCORRECTABLE) {
+ regmap_write(npcm_regmap, pdata->ctl_xor_check_bits,
+ val | (UE_SYNDROME << pdata->xor_check_bits_shift));
+ }
+
+ /* force write check */
+ regmap_update_bits(npcm_regmap, pdata->ctl_xor_check_bits,
+ pdata->fwc_mask, pdata->fwc_mask);
+
+ return count;
+}
+
+static const struct file_operations force_ecc_error_fops = {
+ .open = simple_open,
+ .write = force_ecc_error,
+ .llseek = generic_file_llseek,
+};
+
+/*
+ * Setup debugfs for error injection.
+ *
+ * Nodes:
+ * error_type - 0: CE, 1: UE
+ * location - 0: data, 1: checkcode
+ * bit - 0 ~ 63 for data and 0 ~ 7 for checkcode
+ * force_ecc_error - trigger
+ *
+ * Examples:
+ * 1. Inject a correctable error (CE) at checkcode bit 7.
+ * ~# echo 0 > /sys/kernel/debug/edac/npcm-edac/error_type
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/location
+ * ~# echo 7 > /sys/kernel/debug/edac/npcm-edac/bit
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error
+ *
+ * 2. Inject an uncorrectable error (UE).
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/error_type
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error
+ */
+static void setup_debugfs(struct mem_ctl_info *mci)
+{
+ struct priv_data *priv = mci->pvt_info;
+
+ priv->debugfs = edac_debugfs_create_dir(mci->mod_name);
+ if (!priv->debugfs)
+ return;
+
+ edac_debugfs_create_x8("error_type", 0644, priv->debugfs, &priv->error_type);
+ edac_debugfs_create_x8("location", 0644, priv->debugfs, &priv->location);
+ edac_debugfs_create_x8("bit", 0644, priv->debugfs, &priv->bit);
+ edac_debugfs_create_file("force_ecc_error", 0200, priv->debugfs,
+ &mci->dev, &force_ecc_error_fops);
+}
+
+static int setup_irq(struct mem_ctl_info *mci, struct platform_device *pdev)
+{
+ const struct npcm_platform_data *pdata;
+ int ret, irq;
+
+ pdata = ((struct priv_data *)mci->pvt_info)->pdata;
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME, "IRQ not defined in DTS\n");
+ return irq;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, edac_ecc_isr, 0,
+ dev_name(&pdev->dev), mci);
+ if (ret < 0) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME, "failed to request IRQ\n");
+ return ret;
+ }
+
+ /* enable the functional group of ECC and mask the others */
+ regmap_write(npcm_regmap, pdata->ctl_int_mask_master,
+ pdata->int_mask_master_non_ecc_mask);
+
+ if (pdata->chip == NPCM8XX_CHIP)
+ regmap_write(npcm_regmap, pdata->ctl_int_mask_ecc,
+ pdata->int_mask_ecc_non_event_mask);
+
+ return 0;
+}
+
+static const struct regmap_config npcm_regmap_cfg = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+};
+
+static int edac_probe(struct platform_device *pdev)
+{
+ const struct npcm_platform_data *pdata;
+ struct device *dev = &pdev->dev;
+ struct edac_mc_layer layers[1];
+ struct mem_ctl_info *mci;
+ struct priv_data *priv;
+ void __iomem *reg;
+ u32 val;
+ int rc;
+
+ reg = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(reg))
+ return PTR_ERR(reg);
+
+ npcm_regmap = devm_regmap_init_mmio(dev, reg, &npcm_regmap_cfg);
+ if (IS_ERR(npcm_regmap))
+ return PTR_ERR(npcm_regmap);
+
+ pdata = of_device_get_match_data(dev);
+ if (!pdata)
+ return -EINVAL;
+
+ /* bail out if ECC is not enabled */
+ regmap_read(npcm_regmap, pdata->ctl_ecc_en, &val);
+ if (!(val & pdata->ecc_en_mask)) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME, "ECC is not enabled\n");
+ return -EPERM;
+ }
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ layers[0].type = EDAC_MC_LAYER_ALL_MEM;
+ layers[0].size = 1;
+
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(struct priv_data));
+ if (!mci)
+ return -ENOMEM;
+
+ mci->pdev = &pdev->dev;
+ priv = mci->pvt_info;
+ priv->reg = reg;
+ priv->pdata = pdata;
+ platform_set_drvdata(pdev, mci);
+
+ mci->mtype_cap = MEM_FLAG_DDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_FLAG_HW_SRC;
+ mci->scrub_mode = SCRUB_HW_SRC;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->ctl_name = "npcm_ddr_controller";
+ mci->dev_name = dev_name(&pdev->dev);
+ mci->mod_name = EDAC_MOD_NAME;
+ mci->ctl_page_to_phys = NULL;
+
+ rc = setup_irq(mci, pdev);
+ if (rc)
+ goto free_edac_mc;
+
+ rc = edac_mc_add_mc(mci);
+ if (rc)
+ goto free_edac_mc;
+
+ if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP)
+ setup_debugfs(mci);
+
+ return rc;
+
+free_edac_mc:
+ edac_mc_free(mci);
+ return rc;
+}
+
+static void edac_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+
+ pdata = priv->pdata;
+ if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP)
+ edac_debugfs_remove_recursive(priv->debugfs);
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+
+ regmap_write(npcm_regmap, pdata->ctl_int_mask_master,
+ pdata->int_mask_master_global_mask);
+ regmap_update_bits(npcm_regmap, pdata->ctl_ecc_en, pdata->ecc_en_mask, 0);
+}
+
+static const struct npcm_platform_data npcm750_edac = {
+ .chip = NPCM7XX_CHIP,
+
+ /* memory controller registers */
+ .ctl_ecc_en = 0x174,
+ .ctl_int_status = 0x1d0,
+ .ctl_int_ack = 0x1d4,
+ .ctl_int_mask_master = 0x1d8,
+ .ctl_ce_addr_l = 0x188,
+ .ctl_ce_data_l = 0x190,
+ .ctl_ce_synd = 0x18c,
+ .ctl_ue_addr_l = 0x17c,
+ .ctl_ue_data_l = 0x184,
+ .ctl_ue_synd = 0x180,
+ .ctl_source_id = 0x194,
+
+ /* masks and shifts */
+ .ecc_en_mask = BIT(24),
+ .int_status_ce_mask = GENMASK(4, 3),
+ .int_status_ue_mask = GENMASK(6, 5),
+ .int_ack_ce_mask = GENMASK(4, 3),
+ .int_ack_ue_mask = GENMASK(6, 5),
+ .int_mask_master_non_ecc_mask = GENMASK(30, 7) | GENMASK(2, 0),
+ .int_mask_master_global_mask = BIT(31),
+ .ce_synd_mask = GENMASK(6, 0),
+ .ce_synd_shift = 0,
+ .ue_synd_mask = GENMASK(6, 0),
+ .ue_synd_shift = 0,
+ .source_id_ce_mask = GENMASK(29, 16),
+ .source_id_ce_shift = 16,
+ .source_id_ue_mask = GENMASK(13, 0),
+ .source_id_ue_shift = 0,
+};
+
+static const struct npcm_platform_data npcm845_edac = {
+ .chip = NPCM8XX_CHIP,
+
+ /* memory controller registers */
+ .ctl_ecc_en = 0x16c,
+ .ctl_int_status = 0x228,
+ .ctl_int_ack = 0x244,
+ .ctl_int_mask_master = 0x220,
+ .ctl_int_mask_ecc = 0x260,
+ .ctl_ce_addr_l = 0x18c,
+ .ctl_ce_addr_h = 0x190,
+ .ctl_ce_data_l = 0x194,
+ .ctl_ce_data_h = 0x198,
+ .ctl_ce_synd = 0x190,
+ .ctl_ue_addr_l = 0x17c,
+ .ctl_ue_addr_h = 0x180,
+ .ctl_ue_data_l = 0x184,
+ .ctl_ue_data_h = 0x188,
+ .ctl_ue_synd = 0x180,
+ .ctl_source_id = 0x19c,
+ .ctl_controller_busy = 0x20c,
+ .ctl_xor_check_bits = 0x174,
+
+ /* masks and shifts */
+ .ecc_en_mask = GENMASK(17, 16),
+ .int_status_ce_mask = GENMASK(1, 0),
+ .int_status_ue_mask = GENMASK(3, 2),
+ .int_ack_ce_mask = GENMASK(1, 0),
+ .int_ack_ue_mask = GENMASK(3, 2),
+ .int_mask_master_non_ecc_mask = GENMASK(30, 3) | GENMASK(1, 0),
+ .int_mask_master_global_mask = BIT(31),
+ .int_mask_ecc_non_event_mask = GENMASK(8, 4),
+ .ce_addr_h_mask = GENMASK(1, 0),
+ .ce_synd_mask = GENMASK(15, 8),
+ .ce_synd_shift = 8,
+ .ue_addr_h_mask = GENMASK(1, 0),
+ .ue_synd_mask = GENMASK(15, 8),
+ .ue_synd_shift = 8,
+ .source_id_ce_mask = GENMASK(29, 16),
+ .source_id_ce_shift = 16,
+ .source_id_ue_mask = GENMASK(13, 0),
+ .source_id_ue_shift = 0,
+ .controller_busy_mask = BIT(0),
+ .xor_check_bits_mask = GENMASK(23, 16),
+ .xor_check_bits_shift = 16,
+ .writeback_en_mask = BIT(24),
+ .fwc_mask = BIT(8),
+};
+
+static const struct of_device_id npcm_edac_of_match[] = {
+ {
+ .compatible = "nuvoton,npcm750-memory-controller",
+ .data = &npcm750_edac
+ },
+ {
+ .compatible = "nuvoton,npcm845-memory-controller",
+ .data = &npcm845_edac
+ },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, npcm_edac_of_match);
+
+static struct platform_driver npcm_edac_driver = {
+ .driver = {
+ .name = "npcm-edac",
+ .of_match_table = npcm_edac_of_match,
+ },
+ .probe = edac_probe,
+ .remove = edac_remove,
+};
+
+module_platform_driver(npcm_edac_driver);
+
+MODULE_AUTHOR("Medad CChien <medadyoung@gmail.com>");
+MODULE_AUTHOR("Marvin Lin <kflin@nuvoton.com>");
+MODULE_DESCRIPTION("Nuvoton NPCM EDAC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c
index c33059e9b0be..e6b1595a3cb5 100644
--- a/drivers/edac/octeon_edac-l2c.c
+++ b/drivers/edac/octeon_edac-l2c.c
@@ -138,7 +138,7 @@ static int octeon_l2c_probe(struct platform_device *pdev)
/* 'Tags' are block 0, 'Data' is block 1*/
l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0,
- NULL, 0, edac_device_alloc_index());
+ edac_device_alloc_index());
if (!l2c)
return -ENOMEM;
@@ -184,14 +184,12 @@ err:
return -ENXIO;
}
-static int octeon_l2c_remove(struct platform_device *pdev)
+static void octeon_l2c_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(l2c);
-
- return 0;
}
static struct platform_driver octeon_l2c_driver = {
@@ -203,5 +201,6 @@ static struct platform_driver octeon_l2c_driver = {
};
module_platform_driver(octeon_l2c_driver);
+MODULE_DESCRIPTION("Cavium Octeon Secondary Caches (L2C) EDAC driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c
index aeb222ca3ed1..f7176b95b4fe 100644
--- a/drivers/edac/octeon_edac-lmc.c
+++ b/drivers/edac/octeon_edac-lmc.c
@@ -302,13 +302,12 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev)
return 0;
}
-static int octeon_lmc_edac_remove(struct platform_device *pdev)
+static void octeon_lmc_edac_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
- return 0;
}
static struct platform_driver octeon_lmc_edac_driver = {
@@ -320,5 +319,6 @@ static struct platform_driver octeon_lmc_edac_driver = {
};
module_platform_driver(octeon_lmc_edac_driver);
+MODULE_DESCRIPTION("Cavium Octeon DRAM Memory Controller (LMC) EDAC driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c
index 754eced59c32..aa1219db0b17 100644
--- a/drivers/edac/octeon_edac-pc.c
+++ b/drivers/edac/octeon_edac-pc.c
@@ -92,7 +92,7 @@ static int co_cache_error_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, p);
p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(),
- "cache", 2, 0, NULL, 0,
+ "cache", 2, 0,
edac_device_alloc_index());
if (!p->ed)
goto err;
@@ -119,14 +119,13 @@ err:
return -ENXIO;
}
-static int co_cache_error_remove(struct platform_device *pdev)
+static void co_cache_error_remove(struct platform_device *pdev)
{
struct co_cache_error *p = platform_get_drvdata(pdev);
unregister_co_cache_error_notifier(&p->notifier);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(p->ed);
- return 0;
}
static struct platform_driver co_cache_error_driver = {
@@ -138,5 +137,6 @@ static struct platform_driver co_cache_error_driver = {
};
module_platform_driver(co_cache_error_driver);
+MODULE_DESCRIPTION("Cavium Octeon Primary Caches EDAC driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c
index 28b238eecefc..c4f3bc33a971 100644
--- a/drivers/edac/octeon_edac-pci.c
+++ b/drivers/edac/octeon_edac-pci.c
@@ -87,14 +87,12 @@ err:
return res;
}
-static int octeon_pci_remove(struct platform_device *pdev)
+static void octeon_pci_remove(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
edac_pci_del_device(&pdev->dev);
edac_pci_free_ctl_info(pci);
-
- return 0;
}
static struct platform_driver octeon_pci_driver = {
@@ -106,5 +104,6 @@ static struct platform_driver octeon_pci_driver = {
};
module_platform_driver(octeon_pci_driver);
+MODULE_DESCRIPTION("Cavium Octeon PCI Controller EDAC driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 2b306f2cc605..af14c8a3279f 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -16,18 +16,20 @@
* rank, bank, row and column using the appropriate "dunit_ops" functions/parameters.
*/
-#include <linux/module.h>
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
#include <linux/init.h>
+#include <linux/math64.h>
+#include <linux/mmzone.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
+#include <linux/sizes.h>
#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/edac.h>
-#include <linux/mmzone.h>
#include <linux/smp.h>
-#include <linux/bitmap.h>
-#include <linux/math64.h>
-#include <linux/mod_devicetable.h>
+
#include <linux/platform_data/x86/p2sb.h>
#include <asm/cpu_device_id.h>
@@ -109,7 +111,6 @@ static struct mem_ctl_info *pnd2_mci;
#define MOT_CHAN_INTLV_BIT_1SLC_2CH 12
#define MOT_CHAN_INTLV_BIT_2SLC_2CH 13
#define SELECTOR_DISABLED (-1)
-#define _4GB (1ul << 32)
#define PMI_ADDRESS_WIDTH 31
#define PND_MAX_PHYS_BIT 39
@@ -183,7 +184,7 @@ static int _apl_rd_reg(int port, int off, int op, u32 *data)
}
P2SB_READ(dword, P2SB_DATA_OFF, data);
- ret = (status >> 1) & 0x3;
+ ret = (status >> 1) & GENMASK(1, 0);
out:
/* Hide the P2SB device, if it was hidden before */
if (hidden)
@@ -307,7 +308,7 @@ static bool two_channels; /* Both PMI channels in one slice enabled */
static u8 sym_chan_mask;
static u8 asym_chan_mask;
-static u8 chan_mask;
+static unsigned long chan_mask;
static int slice_selector = -1;
static int chan_selector = -1;
@@ -329,7 +330,7 @@ static void mk_region_mask(char *name, struct region *rp, u64 base, u64 mask)
return;
}
if (mask != GENMASK_ULL(PND_MAX_PHYS_BIT, __ffs(mask))) {
- pr_info(FW_BUG "MOT mask not power of two\n");
+ pr_info(FW_BUG "MOT mask is invalid\n");
return;
}
if (base & ~mask) {
@@ -371,7 +372,7 @@ static int gen_asym_mask(struct b_cr_slice_channel_hash *p,
struct b_cr_asym_mem_region1_mchbar *as1,
struct b_cr_asym_2way_mem_region_mchbar *as2way)
{
- const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
+ static const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
int mask = 0;
if (as2way->asym_2way_interleave_enable)
@@ -488,7 +489,7 @@ static int dnv_get_registers(void)
*/
static int get_registers(void)
{
- const int intlv[] = { 10, 11, 12, 12 };
+ static const int intlv[] = { 10, 11, 12, 12 };
if (RD_REG(&tolud, b_cr_tolud_pci) ||
RD_REG(&touud_lo, b_cr_touud_lo_pci) ||
@@ -587,7 +588,7 @@ static int get_registers(void)
/* Get a contiguous memory address (remove the MMIO gap) */
static u64 remove_mmio_gap(u64 sys)
{
- return (sys < _4GB) ? sys : sys - (_4GB - top_lm);
+ return (sys < SZ_4G) ? sys : sys - (SZ_4G - top_lm);
}
/* Squeeze out one address bit, shift upper part down to fill gap */
@@ -598,7 +599,7 @@ static void remove_addr_bit(u64 *addr, int bitidx)
if (bitidx == -1)
return;
- mask = (1ull << bitidx) - 1;
+ mask = BIT_ULL(bitidx) - 1;
*addr = ((*addr >> 1) & ~mask) | (*addr & mask);
}
@@ -642,8 +643,8 @@ static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg)
int sym_chan_shift = sym_channels >> 1;
/* Give up if address is out of range, or in MMIO gap */
- if (addr >= (1ul << PND_MAX_PHYS_BIT) ||
- (addr >= top_lm && addr < _4GB) || addr >= top_hm) {
+ if (addr >= BIT(PND_MAX_PHYS_BIT) ||
+ (addr >= top_lm && addr < SZ_4G) || addr >= top_hm) {
snprintf(msg, PND2_MSG_SIZE, "Error address 0x%llx is not DRAM", addr);
return -EINVAL;
}
@@ -727,10 +728,10 @@ static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg)
}
/* Translate PMI address to memory (rank, row, bank, column) */
-#define C(n) (0x10 | (n)) /* column */
-#define B(n) (0x20 | (n)) /* bank */
-#define R(n) (0x40 | (n)) /* row */
-#define RS (0x80) /* rank */
+#define C(n) (BIT(4) | (n)) /* column */
+#define B(n) (BIT(5) | (n)) /* bank */
+#define R(n) (BIT(6) | (n)) /* row */
+#define RS (BIT(7)) /* rank */
/* addrdec values */
#define AMAP_1KB 0
@@ -1064,9 +1065,9 @@ static int apl_check_ecc_active(void)
int i, ret = 0;
/* Check dramtype and ECC mode for each present DIMM */
- for (i = 0; i < APL_NUM_CHANNELS; i++)
- if (chan_mask & BIT(i))
- ret += check_channel(i);
+ for_each_set_bit(i, &chan_mask, APL_NUM_CHANNELS)
+ ret += check_channel(i);
+
return ret ? -EINVAL : 0;
}
@@ -1205,10 +1206,7 @@ static void apl_get_dimm_config(struct mem_ctl_info *mci)
u64 capacity;
int i, g;
- for (i = 0; i < APL_NUM_CHANNELS; i++) {
- if (!(chan_mask & BIT(i)))
- continue;
-
+ for_each_set_bit(i, &chan_mask, APL_NUM_CHANNELS) {
dimm = edac_get_dimm(mci, i, 0, 0);
if (!dimm) {
edac_dbg(0, "No allocated DIMM for channel %d\n", i);
@@ -1228,8 +1226,7 @@ static void apl_get_dimm_config(struct mem_ctl_info *mci)
}
pvt->dimm_geom[i] = g;
- capacity = (d->rken0 + d->rken1) * 8 * (1ul << dimms[g].rowbits) *
- (1ul << dimms[g].colbits);
+ capacity = (d->rken0 + d->rken1) * 8 * BIT(dimms[g].rowbits + dimms[g].colbits);
edac_dbg(0, "Channel %d: %lld MByte DIMM\n", i, capacity >> (20 - 3));
dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3));
dimm->grain = 32;
@@ -1295,7 +1292,7 @@ static void dnv_get_dimm_config(struct mem_ctl_info *mci)
continue;
}
- capacity = ranks_of_dimm[j] * banks * (1ul << rowbits) * (1ul << colbits);
+ capacity = ranks_of_dimm[j] * banks * BIT(rowbits + colbits);
edac_dbg(0, "Channel %d DIMM %d: %lld MByte DIMM\n", i, j, capacity >> (20 - 3));
dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3));
dimm->grain = 32;
@@ -1514,8 +1511,8 @@ static struct dunit_ops dnv_ops = {
};
static const struct x86_cpu_id pnd2_cpuids[] = {
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &apl_ops),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &dnv_ops),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &apl_ops),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &dnv_ops),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids);
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
deleted file mode 100644
index 046969b4e82e..000000000000
--- a/drivers/edac/ppc4xx_edac.c
+++ /dev/null
@@ -1,1428 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2008 Nuovation System Designs, LLC
- * Grant Erickson <gerickson@nuovations.com>
- */
-
-#include <linux/edac.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#include <linux/types.h>
-
-#include <asm/dcr.h>
-
-#include "edac_module.h"
-#include "ppc4xx_edac.h"
-
-/*
- * This file implements a driver for monitoring and handling events
- * associated with the IMB DDR2 ECC controller found in the AMCC/IBM
- * 405EX[r], 440SP, 440SPe, 460EX, 460GT and 460SX.
- *
- * As realized in the 405EX[r], this controller features:
- *
- * - Support for registered- and non-registered DDR1 and DDR2 memory.
- * - 32-bit or 16-bit memory interface with optional ECC.
- *
- * o ECC support includes:
- *
- * - 4-bit SEC/DED
- * - Aligned-nibble error detect
- * - Bypass mode
- *
- * - Two (2) memory banks/ranks.
- * - Up to 1 GiB per bank/rank in 32-bit mode and up to 512 MiB per
- * bank/rank in 16-bit mode.
- *
- * As realized in the 440SP and 440SPe, this controller changes/adds:
- *
- * - 64-bit or 32-bit memory interface with optional ECC.
- *
- * o ECC support includes:
- *
- * - 8-bit SEC/DED
- * - Aligned-nibble error detect
- * - Bypass mode
- *
- * - Up to 4 GiB per bank/rank in 64-bit mode and up to 2 GiB
- * per bank/rank in 32-bit mode.
- *
- * As realized in the 460EX and 460GT, this controller changes/adds:
- *
- * - 64-bit or 32-bit memory interface with optional ECC.
- *
- * o ECC support includes:
- *
- * - 8-bit SEC/DED
- * - Aligned-nibble error detect
- * - Bypass mode
- *
- * - Four (4) memory banks/ranks.
- * - Up to 16 GiB per bank/rank in 64-bit mode and up to 8 GiB
- * per bank/rank in 32-bit mode.
- *
- * At present, this driver has ONLY been tested against the controller
- * realization in the 405EX[r] on the AMCC Kilauea and Haleakala
- * boards (256 MiB w/o ECC memory soldered onto the board) and a
- * proprietary board based on those designs (128 MiB ECC memory, also
- * soldered onto the board).
- *
- * Dynamic feature detection and handling needs to be added for the
- * other realizations of this controller listed above.
- *
- * Eventually, this driver will likely be adapted to the above variant
- * realizations of this controller as well as broken apart to handle
- * the other known ECC-capable controllers prevalent in other 4xx
- * processors:
- *
- * - IBM SDRAM (405GP, 405CR and 405EP) "ibm,sdram-4xx"
- * - IBM DDR1 (440GP, 440GX, 440EP and 440GR) "ibm,sdram-4xx-ddr"
- * - Denali DDR1/DDR2 (440EPX and 440GRX) "denali,sdram-4xx-ddr2"
- *
- * For this controller, unfortunately, correctable errors report
- * nothing more than the beat/cycle and byte/lane the correction
- * occurred on and the check bit group that covered the error.
- *
- * In contrast, uncorrectable errors also report the failing address,
- * the bus master and the transaction direction (i.e. read or write)
- *
- * Regardless of whether the error is a CE or a UE, we report the
- * following pieces of information in the driver-unique message to the
- * EDAC subsystem:
- *
- * - Device tree path
- * - Bank(s)
- * - Check bit error group
- * - Beat(s)/lane(s)
- */
-
-/* Preprocessor Definitions */
-
-#define EDAC_OPSTATE_INT_STR "interrupt"
-#define EDAC_OPSTATE_POLL_STR "polled"
-#define EDAC_OPSTATE_UNKNOWN_STR "unknown"
-
-#define PPC4XX_EDAC_MODULE_NAME "ppc4xx_edac"
-#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0"
-
-#define PPC4XX_EDAC_MESSAGE_SIZE 256
-
-/*
- * Kernel logging without an EDAC instance
- */
-#define ppc4xx_edac_printk(level, fmt, arg...) \
- edac_printk(level, "PPC4xx MC", fmt, ##arg)
-
-/*
- * Kernel logging with an EDAC instance
- */
-#define ppc4xx_edac_mc_printk(level, mci, fmt, arg...) \
- edac_mc_chipset_printk(mci, level, "PPC4xx", fmt, ##arg)
-
-/*
- * Macros to convert bank configuration size enumerations into MiB and
- * page values.
- */
-#define SDRAM_MBCF_SZ_MiB_MIN 4
-#define SDRAM_MBCF_SZ_TO_MiB(n) (SDRAM_MBCF_SZ_MiB_MIN \
- << (SDRAM_MBCF_SZ_DECODE(n)))
-#define SDRAM_MBCF_SZ_TO_PAGES(n) (SDRAM_MBCF_SZ_MiB_MIN \
- << (20 - PAGE_SHIFT + \
- SDRAM_MBCF_SZ_DECODE(n)))
-
-/*
- * The ibm,sdram-4xx-ddr2 Device Control Registers (DCRs) are
- * indirectly accessed and have a base and length defined by the
- * device tree. The base can be anything; however, we expect the
- * length to be precisely two registers, the first for the address
- * window and the second for the data window.
- */
-#define SDRAM_DCR_RESOURCE_LEN 2
-#define SDRAM_DCR_ADDR_OFFSET 0
-#define SDRAM_DCR_DATA_OFFSET 1
-
-/*
- * Device tree interrupt indices
- */
-#define INTMAP_ECCDED_INDEX 0 /* Double-bit Error Detect */
-#define INTMAP_ECCSEC_INDEX 1 /* Single-bit Error Correct */
-
-/* Type Definitions */
-
-/*
- * PPC4xx SDRAM memory controller private instance data
- */
-struct ppc4xx_edac_pdata {
- dcr_host_t dcr_host; /* Indirect DCR address/data window mapping */
- struct {
- int sec; /* Single-bit correctable error IRQ assigned */
- int ded; /* Double-bit detectable error IRQ assigned */
- } irqs;
-};
-
-/*
- * Various status data gathered and manipulated when checking and
- * reporting ECC status.
- */
-struct ppc4xx_ecc_status {
- u32 ecces;
- u32 besr;
- u32 bearh;
- u32 bearl;
- u32 wmirq;
-};
-
-/* Global Variables */
-
-/*
- * Device tree node type and compatible tuples this driver can match
- * on.
- */
-static const struct of_device_id ppc4xx_edac_match[] = {
- {
- .compatible = "ibm,sdram-4xx-ddr2"
- },
- { }
-};
-MODULE_DEVICE_TABLE(of, ppc4xx_edac_match);
-
-/*
- * TODO: The row and channel parameters likely need to be dynamically
- * set based on the aforementioned variant controller realizations.
- */
-static const unsigned ppc4xx_edac_nr_csrows = 2;
-static const unsigned ppc4xx_edac_nr_chans = 1;
-
-/*
- * Strings associated with PLB master IDs capable of being posted in
- * SDRAM_BESR or SDRAM_WMIRQ on uncorrectable ECC errors.
- */
-static const char * const ppc4xx_plb_masters[9] = {
- [SDRAM_PLB_M0ID_ICU] = "ICU",
- [SDRAM_PLB_M0ID_PCIE0] = "PCI-E 0",
- [SDRAM_PLB_M0ID_PCIE1] = "PCI-E 1",
- [SDRAM_PLB_M0ID_DMA] = "DMA",
- [SDRAM_PLB_M0ID_DCU] = "DCU",
- [SDRAM_PLB_M0ID_OPB] = "OPB",
- [SDRAM_PLB_M0ID_MAL] = "MAL",
- [SDRAM_PLB_M0ID_SEC] = "SEC",
- [SDRAM_PLB_M0ID_AHB] = "AHB"
-};
-
-/**
- * mfsdram - read and return controller register data
- * @dcr_host: A pointer to the DCR mapping.
- * @idcr_n: The indirect DCR register to read.
- *
- * This routine reads and returns the data associated with the
- * controller's specified indirect DCR register.
- *
- * Returns the read data.
- */
-static inline u32
-mfsdram(const dcr_host_t *dcr_host, unsigned int idcr_n)
-{
- return __mfdcri(dcr_host->base + SDRAM_DCR_ADDR_OFFSET,
- dcr_host->base + SDRAM_DCR_DATA_OFFSET,
- idcr_n);
-}
-
-/**
- * mtsdram - write controller register data
- * @dcr_host: A pointer to the DCR mapping.
- * @idcr_n: The indirect DCR register to write.
- * @value: The data to write.
- *
- * This routine writes the provided data to the controller's specified
- * indirect DCR register.
- */
-static inline void
-mtsdram(const dcr_host_t *dcr_host, unsigned int idcr_n, u32 value)
-{
- return __mtdcri(dcr_host->base + SDRAM_DCR_ADDR_OFFSET,
- dcr_host->base + SDRAM_DCR_DATA_OFFSET,
- idcr_n,
- value);
-}
-
-/**
- * ppc4xx_edac_check_bank_error - check a bank for an ECC bank error
- * @status: A pointer to the ECC status structure to check for an
- * ECC bank error.
- * @bank: The bank to check for an ECC error.
- *
- * This routine determines whether the specified bank has an ECC
- * error.
- *
- * Returns true if the specified bank has an ECC error; otherwise,
- * false.
- */
-static bool
-ppc4xx_edac_check_bank_error(const struct ppc4xx_ecc_status *status,
- unsigned int bank)
-{
- switch (bank) {
- case 0:
- return status->ecces & SDRAM_ECCES_BK0ER;
- case 1:
- return status->ecces & SDRAM_ECCES_BK1ER;
- default:
- return false;
- }
-}
-
-/**
- * ppc4xx_edac_generate_bank_message - generate interpretted bank status message
- * @mci: A pointer to the EDAC memory controller instance associated
- * with the bank message being generated.
- * @status: A pointer to the ECC status structure to generate the
- * message from.
- * @buffer: A pointer to the buffer in which to generate the
- * message.
- * @size: The size, in bytes, of space available in buffer.
- *
- * This routine generates to the provided buffer the portion of the
- * driver-unique report message associated with the ECCESS[BKNER]
- * field of the specified ECC status.
- *
- * Returns the number of characters generated on success; otherwise, <
- * 0 on error.
- */
-static int
-ppc4xx_edac_generate_bank_message(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status,
- char *buffer,
- size_t size)
-{
- int n, total = 0;
- unsigned int row, rows;
-
- n = snprintf(buffer, size, "%s: Banks: ", mci->dev_name);
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
-
- for (rows = 0, row = 0; row < mci->nr_csrows; row++) {
- if (ppc4xx_edac_check_bank_error(status, row)) {
- n = snprintf(buffer, size, "%s%u",
- (rows++ ? ", " : ""), row);
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
- }
- }
-
- n = snprintf(buffer, size, "%s; ", rows ? "" : "None");
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
-
- fail:
- return total;
-}
-
-/**
- * ppc4xx_edac_generate_checkbit_message - generate interpretted checkbit message
- * @mci: A pointer to the EDAC memory controller instance associated
- * with the checkbit message being generated.
- * @status: A pointer to the ECC status structure to generate the
- * message from.
- * @buffer: A pointer to the buffer in which to generate the
- * message.
- * @size: The size, in bytes, of space available in buffer.
- *
- * This routine generates to the provided buffer the portion of the
- * driver-unique report message associated with the ECCESS[CKBER]
- * field of the specified ECC status.
- *
- * Returns the number of characters generated on success; otherwise, <
- * 0 on error.
- */
-static int
-ppc4xx_edac_generate_checkbit_message(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status,
- char *buffer,
- size_t size)
-{
- const struct ppc4xx_edac_pdata *pdata = mci->pvt_info;
- const char *ckber = NULL;
-
- switch (status->ecces & SDRAM_ECCES_CKBER_MASK) {
- case SDRAM_ECCES_CKBER_NONE:
- ckber = "None";
- break;
- case SDRAM_ECCES_CKBER_32_ECC_0_3:
- ckber = "ECC0:3";
- break;
- case SDRAM_ECCES_CKBER_32_ECC_4_8:
- switch (mfsdram(&pdata->dcr_host, SDRAM_MCOPT1) &
- SDRAM_MCOPT1_WDTH_MASK) {
- case SDRAM_MCOPT1_WDTH_16:
- ckber = "ECC0:3";
- break;
- case SDRAM_MCOPT1_WDTH_32:
- ckber = "ECC4:8";
- break;
- default:
- ckber = "Unknown";
- break;
- }
- break;
- case SDRAM_ECCES_CKBER_32_ECC_0_8:
- ckber = "ECC0:8";
- break;
- default:
- ckber = "Unknown";
- break;
- }
-
- return snprintf(buffer, size, "Checkbit Error: %s", ckber);
-}
-
-/**
- * ppc4xx_edac_generate_lane_message - generate interpretted byte lane message
- * @mci: A pointer to the EDAC memory controller instance associated
- * with the byte lane message being generated.
- * @status: A pointer to the ECC status structure to generate the
- * message from.
- * @buffer: A pointer to the buffer in which to generate the
- * message.
- * @size: The size, in bytes, of space available in buffer.
- *
- * This routine generates to the provided buffer the portion of the
- * driver-unique report message associated with the ECCESS[BNCE]
- * field of the specified ECC status.
- *
- * Returns the number of characters generated on success; otherwise, <
- * 0 on error.
- */
-static int
-ppc4xx_edac_generate_lane_message(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status,
- char *buffer,
- size_t size)
-{
- int n, total = 0;
- unsigned int lane, lanes;
- const unsigned int first_lane = 0;
- const unsigned int lane_count = 16;
-
- n = snprintf(buffer, size, "; Byte Lane Errors: ");
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
-
- for (lanes = 0, lane = first_lane; lane < lane_count; lane++) {
- if ((status->ecces & SDRAM_ECCES_BNCE_ENCODE(lane)) != 0) {
- n = snprintf(buffer, size,
- "%s%u",
- (lanes++ ? ", " : ""), lane);
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
- }
- }
-
- n = snprintf(buffer, size, "%s; ", lanes ? "" : "None");
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
-
- fail:
- return total;
-}
-
-/**
- * ppc4xx_edac_generate_ecc_message - generate interpretted ECC status message
- * @mci: A pointer to the EDAC memory controller instance associated
- * with the ECCES message being generated.
- * @status: A pointer to the ECC status structure to generate the
- * message from.
- * @buffer: A pointer to the buffer in which to generate the
- * message.
- * @size: The size, in bytes, of space available in buffer.
- *
- * This routine generates to the provided buffer the portion of the
- * driver-unique report message associated with the ECCESS register of
- * the specified ECC status.
- *
- * Returns the number of characters generated on success; otherwise, <
- * 0 on error.
- */
-static int
-ppc4xx_edac_generate_ecc_message(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status,
- char *buffer,
- size_t size)
-{
- int n, total = 0;
-
- n = ppc4xx_edac_generate_bank_message(mci, status, buffer, size);
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
-
- n = ppc4xx_edac_generate_checkbit_message(mci, status, buffer, size);
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
-
- n = ppc4xx_edac_generate_lane_message(mci, status, buffer, size);
-
- if (n < 0 || n >= size)
- goto fail;
-
- buffer += n;
- size -= n;
- total += n;
-
- fail:
- return total;
-}
-
-/**
- * ppc4xx_edac_generate_plb_message - generate interpretted PLB status message
- * @mci: A pointer to the EDAC memory controller instance associated
- * with the PLB message being generated.
- * @status: A pointer to the ECC status structure to generate the
- * message from.
- * @buffer: A pointer to the buffer in which to generate the
- * message.
- * @size: The size, in bytes, of space available in buffer.
- *
- * This routine generates to the provided buffer the portion of the
- * driver-unique report message associated with the PLB-related BESR
- * and/or WMIRQ registers of the specified ECC status.
- *
- * Returns the number of characters generated on success; otherwise, <
- * 0 on error.
- */
-static int
-ppc4xx_edac_generate_plb_message(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status,
- char *buffer,
- size_t size)
-{
- unsigned int master;
- bool read;
-
- if ((status->besr & SDRAM_BESR_MASK) == 0)
- return 0;
-
- if ((status->besr & SDRAM_BESR_M0ET_MASK) == SDRAM_BESR_M0ET_NONE)
- return 0;
-
- read = ((status->besr & SDRAM_BESR_M0RW_MASK) == SDRAM_BESR_M0RW_READ);
-
- master = SDRAM_BESR_M0ID_DECODE(status->besr);
-
- return snprintf(buffer, size,
- "%s error w/ PLB master %u \"%s\"; ",
- (read ? "Read" : "Write"),
- master,
- (((master >= SDRAM_PLB_M0ID_FIRST) &&
- (master <= SDRAM_PLB_M0ID_LAST)) ?
- ppc4xx_plb_masters[master] : "UNKNOWN"));
-}
-
-/**
- * ppc4xx_edac_generate_message - generate interpretted status message
- * @mci: A pointer to the EDAC memory controller instance associated
- * with the driver-unique message being generated.
- * @status: A pointer to the ECC status structure to generate the
- * message from.
- * @buffer: A pointer to the buffer in which to generate the
- * message.
- * @size: The size, in bytes, of space available in buffer.
- *
- * This routine generates to the provided buffer the driver-unique
- * EDAC report message from the specified ECC status.
- */
-static void
-ppc4xx_edac_generate_message(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status,
- char *buffer,
- size_t size)
-{
- int n;
-
- if (buffer == NULL || size == 0)
- return;
-
- n = ppc4xx_edac_generate_ecc_message(mci, status, buffer, size);
-
- if (n < 0 || n >= size)
- return;
-
- buffer += n;
- size -= n;
-
- ppc4xx_edac_generate_plb_message(mci, status, buffer, size);
-}
-
-#ifdef DEBUG
-/**
- * ppc4xx_ecc_dump_status - dump controller ECC status registers
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the status being dumped.
- * @status: A pointer to the ECC status structure to generate the
- * dump from.
- *
- * This routine dumps to the kernel log buffer the raw and
- * interpretted specified ECC status.
- */
-static void
-ppc4xx_ecc_dump_status(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status)
-{
- char message[PPC4XX_EDAC_MESSAGE_SIZE];
-
- ppc4xx_edac_generate_message(mci, status, message, sizeof(message));
-
- ppc4xx_edac_mc_printk(KERN_INFO, mci,
- "\n"
- "\tECCES: 0x%08x\n"
- "\tWMIRQ: 0x%08x\n"
- "\tBESR: 0x%08x\n"
- "\tBEAR: 0x%08x%08x\n"
- "\t%s\n",
- status->ecces,
- status->wmirq,
- status->besr,
- status->bearh,
- status->bearl,
- message);
-}
-#endif /* DEBUG */
-
-/**
- * ppc4xx_ecc_get_status - get controller ECC status
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the status being retrieved.
- * @status: A pointer to the ECC status structure to populate the
- * ECC status with.
- *
- * This routine reads and masks, as appropriate, all the relevant
- * status registers that deal with ibm,sdram-4xx-ddr2 ECC errors.
- * While we read all of them, for correctable errors, we only expect
- * to deal with ECCES. For uncorrectable errors, we expect to deal
- * with all of them.
- */
-static void
-ppc4xx_ecc_get_status(const struct mem_ctl_info *mci,
- struct ppc4xx_ecc_status *status)
-{
- const struct ppc4xx_edac_pdata *pdata = mci->pvt_info;
- const dcr_host_t *dcr_host = &pdata->dcr_host;
-
- status->ecces = mfsdram(dcr_host, SDRAM_ECCES) & SDRAM_ECCES_MASK;
- status->wmirq = mfsdram(dcr_host, SDRAM_WMIRQ) & SDRAM_WMIRQ_MASK;
- status->besr = mfsdram(dcr_host, SDRAM_BESR) & SDRAM_BESR_MASK;
- status->bearl = mfsdram(dcr_host, SDRAM_BEARL);
- status->bearh = mfsdram(dcr_host, SDRAM_BEARH);
-}
-
-/**
- * ppc4xx_ecc_clear_status - clear controller ECC status
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the status being cleared.
- * @status: A pointer to the ECC status structure containing the
- * values to write to clear the ECC status.
- *
- * This routine clears--by writing the masked (as appropriate) status
- * values back to--the status registers that deal with
- * ibm,sdram-4xx-ddr2 ECC errors.
- */
-static void
-ppc4xx_ecc_clear_status(const struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status)
-{
- const struct ppc4xx_edac_pdata *pdata = mci->pvt_info;
- const dcr_host_t *dcr_host = &pdata->dcr_host;
-
- mtsdram(dcr_host, SDRAM_ECCES, status->ecces & SDRAM_ECCES_MASK);
- mtsdram(dcr_host, SDRAM_WMIRQ, status->wmirq & SDRAM_WMIRQ_MASK);
- mtsdram(dcr_host, SDRAM_BESR, status->besr & SDRAM_BESR_MASK);
- mtsdram(dcr_host, SDRAM_BEARL, 0);
- mtsdram(dcr_host, SDRAM_BEARH, 0);
-}
-
-/**
- * ppc4xx_edac_handle_ce - handle controller correctable ECC error (CE)
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the correctable error being handled and reported.
- * @status: A pointer to the ECC status structure associated with
- * the correctable error being handled and reported.
- *
- * This routine handles an ibm,sdram-4xx-ddr2 controller ECC
- * correctable error. Per the aforementioned discussion, there's not
- * enough status available to use the full EDAC correctable error
- * interface, so we just pass driver-unique message to the "no info"
- * interface.
- */
-static void
-ppc4xx_edac_handle_ce(struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status)
-{
- int row;
- char message[PPC4XX_EDAC_MESSAGE_SIZE];
-
- ppc4xx_edac_generate_message(mci, status, message, sizeof(message));
-
- for (row = 0; row < mci->nr_csrows; row++)
- if (ppc4xx_edac_check_bank_error(status, row))
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- 0, 0, 0,
- row, 0, -1,
- message, "");
-}
-
-/**
- * ppc4xx_edac_handle_ue - handle controller uncorrectable ECC error (UE)
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the uncorrectable error being handled and
- * reported.
- * @status: A pointer to the ECC status structure associated with
- * the uncorrectable error being handled and reported.
- *
- * This routine handles an ibm,sdram-4xx-ddr2 controller ECC
- * uncorrectable error.
- */
-static void
-ppc4xx_edac_handle_ue(struct mem_ctl_info *mci,
- const struct ppc4xx_ecc_status *status)
-{
- const u64 bear = ((u64)status->bearh << 32 | status->bearl);
- const unsigned long page = bear >> PAGE_SHIFT;
- const unsigned long offset = bear & ~PAGE_MASK;
- int row;
- char message[PPC4XX_EDAC_MESSAGE_SIZE];
-
- ppc4xx_edac_generate_message(mci, status, message, sizeof(message));
-
- for (row = 0; row < mci->nr_csrows; row++)
- if (ppc4xx_edac_check_bank_error(status, row))
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- page, offset, 0,
- row, 0, -1,
- message, "");
-}
-
-/**
- * ppc4xx_edac_check - check controller for ECC errors
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the ibm,sdram-4xx-ddr2 controller being
- * checked.
- *
- * This routine is used to check and post ECC errors and is called by
- * both the EDAC polling thread and this driver's CE and UE interrupt
- * handler.
- */
-static void
-ppc4xx_edac_check(struct mem_ctl_info *mci)
-{
-#ifdef DEBUG
- static unsigned int count;
-#endif
- struct ppc4xx_ecc_status status;
-
- ppc4xx_ecc_get_status(mci, &status);
-
-#ifdef DEBUG
- if (count++ % 30 == 0)
- ppc4xx_ecc_dump_status(mci, &status);
-#endif
-
- if (status.ecces & SDRAM_ECCES_UE)
- ppc4xx_edac_handle_ue(mci, &status);
-
- if (status.ecces & SDRAM_ECCES_CE)
- ppc4xx_edac_handle_ce(mci, &status);
-
- ppc4xx_ecc_clear_status(mci, &status);
-}
-
-/**
- * ppc4xx_edac_isr - SEC (CE) and DED (UE) interrupt service routine
- * @irq: The virtual interrupt number being serviced.
- * @dev_id: A pointer to the EDAC memory controller instance
- * associated with the interrupt being handled.
- *
- * This routine implements the interrupt handler for both correctable
- * (CE) and uncorrectable (UE) ECC errors for the ibm,sdram-4xx-ddr2
- * controller. It simply calls through to the same routine used during
- * polling to check, report and clear the ECC status.
- *
- * Unconditionally returns IRQ_HANDLED.
- */
-static irqreturn_t
-ppc4xx_edac_isr(int irq, void *dev_id)
-{
- struct mem_ctl_info *mci = dev_id;
-
- ppc4xx_edac_check(mci);
-
- return IRQ_HANDLED;
-}
-
-/**
- * ppc4xx_edac_get_dtype - return the controller memory width
- * @mcopt1: The 32-bit Memory Controller Option 1 register value
- * currently set for the controller, from which the width
- * is derived.
- *
- * This routine returns the EDAC device type width appropriate for the
- * current controller configuration.
- *
- * TODO: This needs to be conditioned dynamically through feature
- * flags or some such when other controller variants are supported as
- * the 405EX[r] is 16-/32-bit and the others are 32-/64-bit with the
- * 16- and 64-bit field definition/value/enumeration (b1) overloaded
- * among them.
- *
- * Returns a device type width enumeration.
- */
-static enum dev_type ppc4xx_edac_get_dtype(u32 mcopt1)
-{
- switch (mcopt1 & SDRAM_MCOPT1_WDTH_MASK) {
- case SDRAM_MCOPT1_WDTH_16:
- return DEV_X2;
- case SDRAM_MCOPT1_WDTH_32:
- return DEV_X4;
- default:
- return DEV_UNKNOWN;
- }
-}
-
-/**
- * ppc4xx_edac_get_mtype - return controller memory type
- * @mcopt1: The 32-bit Memory Controller Option 1 register value
- * currently set for the controller, from which the memory type
- * is derived.
- *
- * This routine returns the EDAC memory type appropriate for the
- * current controller configuration.
- *
- * Returns a memory type enumeration.
- */
-static enum mem_type ppc4xx_edac_get_mtype(u32 mcopt1)
-{
- bool rden = ((mcopt1 & SDRAM_MCOPT1_RDEN_MASK) == SDRAM_MCOPT1_RDEN);
-
- switch (mcopt1 & SDRAM_MCOPT1_DDR_TYPE_MASK) {
- case SDRAM_MCOPT1_DDR2_TYPE:
- return rden ? MEM_RDDR2 : MEM_DDR2;
- case SDRAM_MCOPT1_DDR1_TYPE:
- return rden ? MEM_RDDR : MEM_DDR;
- default:
- return MEM_UNKNOWN;
- }
-}
-
-/**
- * ppc4xx_edac_init_csrows - initialize driver instance rows
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the ibm,sdram-4xx-ddr2 controller for which
- * the csrows (i.e. banks/ranks) are being initialized.
- * @mcopt1: The 32-bit Memory Controller Option 1 register value
- * currently set for the controller, from which bank width
- * and memory typ information is derived.
- *
- * This routine initializes the virtual "chip select rows" associated
- * with the EDAC memory controller instance. An ibm,sdram-4xx-ddr2
- * controller bank/rank is mapped to a row.
- *
- * Returns 0 if OK; otherwise, -EINVAL if the memory bank size
- * configuration cannot be determined.
- */
-static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
-{
- const struct ppc4xx_edac_pdata *pdata = mci->pvt_info;
- int status = 0;
- enum mem_type mtype;
- enum dev_type dtype;
- enum edac_type edac_mode;
- int row, j;
- u32 mbxcf, size, nr_pages;
-
- /* Establish the memory type and width */
-
- mtype = ppc4xx_edac_get_mtype(mcopt1);
- dtype = ppc4xx_edac_get_dtype(mcopt1);
-
- /* Establish EDAC mode */
-
- if (mci->edac_cap & EDAC_FLAG_SECDED)
- edac_mode = EDAC_SECDED;
- else if (mci->edac_cap & EDAC_FLAG_EC)
- edac_mode = EDAC_EC;
- else
- edac_mode = EDAC_NONE;
-
- /*
- * Initialize each chip select row structure which correspond
- * 1:1 with a controller bank/rank.
- */
-
- for (row = 0; row < mci->nr_csrows; row++) {
- struct csrow_info *csi = mci->csrows[row];
-
- /*
- * Get the configuration settings for this
- * row/bank/rank and skip disabled banks.
- */
-
- mbxcf = mfsdram(&pdata->dcr_host, SDRAM_MBXCF(row));
-
- if ((mbxcf & SDRAM_MBCF_BE_MASK) != SDRAM_MBCF_BE_ENABLE)
- continue;
-
- /* Map the bank configuration size setting to pages. */
-
- size = mbxcf & SDRAM_MBCF_SZ_MASK;
-
- switch (size) {
- case SDRAM_MBCF_SZ_4MB:
- case SDRAM_MBCF_SZ_8MB:
- case SDRAM_MBCF_SZ_16MB:
- case SDRAM_MBCF_SZ_32MB:
- case SDRAM_MBCF_SZ_64MB:
- case SDRAM_MBCF_SZ_128MB:
- case SDRAM_MBCF_SZ_256MB:
- case SDRAM_MBCF_SZ_512MB:
- case SDRAM_MBCF_SZ_1GB:
- case SDRAM_MBCF_SZ_2GB:
- case SDRAM_MBCF_SZ_4GB:
- case SDRAM_MBCF_SZ_8GB:
- nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size);
- break;
- default:
- ppc4xx_edac_mc_printk(KERN_ERR, mci,
- "Unrecognized memory bank %d "
- "size 0x%08x\n",
- row, SDRAM_MBCF_SZ_DECODE(size));
- status = -EINVAL;
- goto done;
- }
-
- /*
- * It's unclear exactly what grain should be set to
- * here. The SDRAM_ECCES register allows resolution of
- * an error down to a nibble which would potentially
- * argue for a grain of '1' byte, even though we only
- * know the associated address for uncorrectable
- * errors. This value is not used at present for
- * anything other than error reporting so getting it
- * wrong should be of little consequence. Other
- * possible values would be the PLB width (16), the
- * page size (PAGE_SIZE) or the memory width (2 or 4).
- */
- for (j = 0; j < csi->nr_channels; j++) {
- struct dimm_info *dimm = csi->channels[j]->dimm;
-
- dimm->nr_pages = nr_pages / csi->nr_channels;
- dimm->grain = 1;
-
- dimm->mtype = mtype;
- dimm->dtype = dtype;
-
- dimm->edac_mode = edac_mode;
- }
- }
-
- done:
- return status;
-}
-
-/**
- * ppc4xx_edac_mc_init - initialize driver instance
- * @mci: A pointer to the EDAC memory controller instance being
- * initialized.
- * @op: A pointer to the OpenFirmware device tree node associated
- * with the controller this EDAC instance is bound to.
- * @dcr_host: A pointer to the DCR data containing the DCR mapping
- * for this controller instance.
- * @mcopt1: The 32-bit Memory Controller Option 1 register value
- * currently set for the controller, from which ECC capabilities
- * and scrub mode are derived.
- *
- * This routine performs initialization of the EDAC memory controller
- * instance and related driver-private data associated with the
- * ibm,sdram-4xx-ddr2 memory controller the instance is bound to.
- *
- * Returns 0 if OK; otherwise, < 0 on error.
- */
-static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
- struct platform_device *op,
- const dcr_host_t *dcr_host, u32 mcopt1)
-{
- int status = 0;
- const u32 memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK);
- struct ppc4xx_edac_pdata *pdata = NULL;
- const struct device_node *np = op->dev.of_node;
-
- if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL)
- return -EINVAL;
-
- /* Initial driver pointers and private data */
-
- mci->pdev = &op->dev;
-
- dev_set_drvdata(mci->pdev, mci);
-
- pdata = mci->pvt_info;
-
- pdata->dcr_host = *dcr_host;
-
- /* Initialize controller capabilities and configuration */
-
- mci->mtype_cap = (MEM_FLAG_DDR | MEM_FLAG_RDDR |
- MEM_FLAG_DDR2 | MEM_FLAG_RDDR2);
-
- mci->edac_ctl_cap = (EDAC_FLAG_NONE |
- EDAC_FLAG_EC |
- EDAC_FLAG_SECDED);
-
- mci->scrub_cap = SCRUB_NONE;
- mci->scrub_mode = SCRUB_NONE;
-
- /*
- * Update the actual capabilites based on the MCOPT1[MCHK]
- * settings. Scrubbing is only useful if reporting is enabled.
- */
-
- switch (memcheck) {
- case SDRAM_MCOPT1_MCHK_CHK:
- mci->edac_cap = EDAC_FLAG_EC;
- break;
- case SDRAM_MCOPT1_MCHK_CHK_REP:
- mci->edac_cap = (EDAC_FLAG_EC | EDAC_FLAG_SECDED);
- mci->scrub_mode = SCRUB_SW_SRC;
- break;
- default:
- mci->edac_cap = EDAC_FLAG_NONE;
- break;
- }
-
- /* Initialize strings */
-
- mci->mod_name = PPC4XX_EDAC_MODULE_NAME;
- mci->ctl_name = ppc4xx_edac_match->compatible;
- mci->dev_name = np->full_name;
-
- /* Initialize callbacks */
-
- mci->edac_check = ppc4xx_edac_check;
- mci->ctl_page_to_phys = NULL;
-
- /* Initialize chip select rows */
-
- status = ppc4xx_edac_init_csrows(mci, mcopt1);
-
- if (status)
- ppc4xx_edac_mc_printk(KERN_ERR, mci,
- "Failed to initialize rows!\n");
-
- return status;
-}
-
-/**
- * ppc4xx_edac_register_irq - setup and register controller interrupts
- * @op: A pointer to the OpenFirmware device tree node associated
- * with the controller this EDAC instance is bound to.
- * @mci: A pointer to the EDAC memory controller instance
- * associated with the ibm,sdram-4xx-ddr2 controller for which
- * interrupts are being registered.
- *
- * This routine parses the correctable (CE) and uncorrectable error (UE)
- * interrupts from the device tree node and maps and assigns them to
- * the associated EDAC memory controller instance.
- *
- * Returns 0 if OK; otherwise, -ENODEV if the interrupts could not be
- * mapped and assigned.
- */
-static int ppc4xx_edac_register_irq(struct platform_device *op,
- struct mem_ctl_info *mci)
-{
- int status = 0;
- int ded_irq, sec_irq;
- struct ppc4xx_edac_pdata *pdata = mci->pvt_info;
- struct device_node *np = op->dev.of_node;
-
- ded_irq = irq_of_parse_and_map(np, INTMAP_ECCDED_INDEX);
- sec_irq = irq_of_parse_and_map(np, INTMAP_ECCSEC_INDEX);
-
- if (!ded_irq || !sec_irq) {
- ppc4xx_edac_mc_printk(KERN_ERR, mci,
- "Unable to map interrupts.\n");
- status = -ENODEV;
- goto fail;
- }
-
- status = request_irq(ded_irq,
- ppc4xx_edac_isr,
- 0,
- "[EDAC] MC ECCDED",
- mci);
-
- if (status < 0) {
- ppc4xx_edac_mc_printk(KERN_ERR, mci,
- "Unable to request irq %d for ECC DED",
- ded_irq);
- status = -ENODEV;
- goto fail1;
- }
-
- status = request_irq(sec_irq,
- ppc4xx_edac_isr,
- 0,
- "[EDAC] MC ECCSEC",
- mci);
-
- if (status < 0) {
- ppc4xx_edac_mc_printk(KERN_ERR, mci,
- "Unable to request irq %d for ECC SEC",
- sec_irq);
- status = -ENODEV;
- goto fail2;
- }
-
- ppc4xx_edac_mc_printk(KERN_INFO, mci, "ECCDED irq is %d\n", ded_irq);
- ppc4xx_edac_mc_printk(KERN_INFO, mci, "ECCSEC irq is %d\n", sec_irq);
-
- pdata->irqs.ded = ded_irq;
- pdata->irqs.sec = sec_irq;
-
- return 0;
-
- fail2:
- free_irq(sec_irq, mci);
-
- fail1:
- free_irq(ded_irq, mci);
-
- fail:
- return status;
-}
-
-/**
- * ppc4xx_edac_map_dcrs - locate and map controller registers
- * @np: A pointer to the device tree node containing the DCR
- * resources to map.
- * @dcr_host: A pointer to the DCR data to populate with the
- * DCR mapping.
- *
- * This routine attempts to locate in the device tree and map the DCR
- * register resources associated with the controller's indirect DCR
- * address and data windows.
- *
- * Returns 0 if the DCRs were successfully mapped; otherwise, < 0 on
- * error.
- */
-static int ppc4xx_edac_map_dcrs(const struct device_node *np,
- dcr_host_t *dcr_host)
-{
- unsigned int dcr_base, dcr_len;
-
- if (np == NULL || dcr_host == NULL)
- return -EINVAL;
-
- /* Get the DCR resource extent and sanity check the values. */
-
- dcr_base = dcr_resource_start(np, 0);
- dcr_len = dcr_resource_len(np, 0);
-
- if (dcr_base == 0 || dcr_len == 0) {
- ppc4xx_edac_printk(KERN_ERR,
- "Failed to obtain DCR property.\n");
- return -ENODEV;
- }
-
- if (dcr_len != SDRAM_DCR_RESOURCE_LEN) {
- ppc4xx_edac_printk(KERN_ERR,
- "Unexpected DCR length %d, expected %d.\n",
- dcr_len, SDRAM_DCR_RESOURCE_LEN);
- return -ENODEV;
- }
-
- /* Attempt to map the DCR extent. */
-
- *dcr_host = dcr_map(np, dcr_base, dcr_len);
-
- if (!DCR_MAP_OK(*dcr_host)) {
- ppc4xx_edac_printk(KERN_INFO, "Failed to map DCRs.\n");
- return -ENODEV;
- }
-
- return 0;
-}
-
-/**
- * ppc4xx_edac_probe - check controller and bind driver
- * @op: A pointer to the OpenFirmware device tree node associated
- * with the controller being probed for driver binding.
- *
- * This routine probes a specific ibm,sdram-4xx-ddr2 controller
- * instance for binding with the driver.
- *
- * Returns 0 if the controller instance was successfully bound to the
- * driver; otherwise, < 0 on error.
- */
-static int ppc4xx_edac_probe(struct platform_device *op)
-{
- int status = 0;
- u32 mcopt1, memcheck;
- dcr_host_t dcr_host;
- const struct device_node *np = op->dev.of_node;
- struct mem_ctl_info *mci = NULL;
- struct edac_mc_layer layers[2];
- static int ppc4xx_edac_instance;
-
- /*
- * At this point, we only support the controller realized on
- * the AMCC PPC 405EX[r]. Reject anything else.
- */
-
- if (!of_device_is_compatible(np, "ibm,sdram-405ex") &&
- !of_device_is_compatible(np, "ibm,sdram-405exr")) {
- ppc4xx_edac_printk(KERN_NOTICE,
- "Only the PPC405EX[r] is supported.\n");
- return -ENODEV;
- }
-
- /*
- * Next, get the DCR property and attempt to map it so that we
- * can probe the controller.
- */
-
- status = ppc4xx_edac_map_dcrs(np, &dcr_host);
-
- if (status)
- return status;
-
- /*
- * First determine whether ECC is enabled at all. If not,
- * there is no useful checking or monitoring that can be done
- * for this controller.
- */
-
- mcopt1 = mfsdram(&dcr_host, SDRAM_MCOPT1);
- memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK);
-
- if (memcheck == SDRAM_MCOPT1_MCHK_NON) {
- ppc4xx_edac_printk(KERN_INFO, "%pOF: No ECC memory detected or "
- "ECC is disabled.\n", np);
- status = -ENODEV;
- goto done;
- }
-
- /*
- * At this point, we know ECC is enabled, allocate an EDAC
- * controller instance and perform the appropriate
- * initialization.
- */
- layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = ppc4xx_edac_nr_csrows;
- layers[0].is_virt_csrow = true;
- layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = ppc4xx_edac_nr_chans;
- layers[1].is_virt_csrow = false;
- mci = edac_mc_alloc(ppc4xx_edac_instance, ARRAY_SIZE(layers), layers,
- sizeof(struct ppc4xx_edac_pdata));
- if (mci == NULL) {
- ppc4xx_edac_printk(KERN_ERR, "%pOF: "
- "Failed to allocate EDAC MC instance!\n",
- np);
- status = -ENOMEM;
- goto done;
- }
-
- status = ppc4xx_edac_mc_init(mci, op, &dcr_host, mcopt1);
-
- if (status) {
- ppc4xx_edac_mc_printk(KERN_ERR, mci,
- "Failed to initialize instance!\n");
- goto fail;
- }
-
- /*
- * We have a valid, initialized EDAC instance bound to the
- * controller. Attempt to register it with the EDAC subsystem
- * and, if necessary, register interrupts.
- */
-
- if (edac_mc_add_mc(mci)) {
- ppc4xx_edac_mc_printk(KERN_ERR, mci,
- "Failed to add instance!\n");
- status = -ENODEV;
- goto fail;
- }
-
- if (edac_op_state == EDAC_OPSTATE_INT) {
- status = ppc4xx_edac_register_irq(op, mci);
-
- if (status)
- goto fail1;
- }
-
- ppc4xx_edac_instance++;
-
- return 0;
-
- fail1:
- edac_mc_del_mc(mci->pdev);
-
- fail:
- edac_mc_free(mci);
-
- done:
- return status;
-}
-
-/**
- * ppc4xx_edac_remove - unbind driver from controller
- * @op: A pointer to the OpenFirmware device tree node associated
- * with the controller this EDAC instance is to be unbound/removed
- * from.
- *
- * This routine unbinds the EDAC memory controller instance associated
- * with the specified ibm,sdram-4xx-ddr2 controller described by the
- * OpenFirmware device tree node passed as a parameter.
- *
- * Unconditionally returns 0.
- */
-static int
-ppc4xx_edac_remove(struct platform_device *op)
-{
- struct mem_ctl_info *mci = dev_get_drvdata(&op->dev);
- struct ppc4xx_edac_pdata *pdata = mci->pvt_info;
-
- if (edac_op_state == EDAC_OPSTATE_INT) {
- free_irq(pdata->irqs.sec, mci);
- free_irq(pdata->irqs.ded, mci);
- }
-
- dcr_unmap(pdata->dcr_host, SDRAM_DCR_RESOURCE_LEN);
-
- edac_mc_del_mc(mci->pdev);
- edac_mc_free(mci);
-
- return 0;
-}
-
-/**
- * ppc4xx_edac_opstate_init - initialize EDAC reporting method
- *
- * This routine ensures that the EDAC memory controller reporting
- * method is mapped to a sane value as the EDAC core defines the value
- * to EDAC_OPSTATE_INVAL by default. We don't call the global
- * opstate_init as that defaults to polling and we want interrupt as
- * the default.
- */
-static inline void __init
-ppc4xx_edac_opstate_init(void)
-{
- switch (edac_op_state) {
- case EDAC_OPSTATE_POLL:
- case EDAC_OPSTATE_INT:
- break;
- default:
- edac_op_state = EDAC_OPSTATE_INT;
- break;
- }
-
- ppc4xx_edac_printk(KERN_INFO, "Reporting type: %s\n",
- ((edac_op_state == EDAC_OPSTATE_POLL) ?
- EDAC_OPSTATE_POLL_STR :
- ((edac_op_state == EDAC_OPSTATE_INT) ?
- EDAC_OPSTATE_INT_STR :
- EDAC_OPSTATE_UNKNOWN_STR)));
-}
-
-static struct platform_driver ppc4xx_edac_driver = {
- .probe = ppc4xx_edac_probe,
- .remove = ppc4xx_edac_remove,
- .driver = {
- .name = PPC4XX_EDAC_MODULE_NAME,
- .of_match_table = ppc4xx_edac_match,
- },
-};
-
-/**
- * ppc4xx_edac_init - driver/module insertion entry point
- *
- * This routine is the driver/module insertion entry point. It
- * initializes the EDAC memory controller reporting state and
- * registers the driver as an OpenFirmware device tree platform
- * driver.
- */
-static int __init
-ppc4xx_edac_init(void)
-{
- ppc4xx_edac_printk(KERN_INFO, PPC4XX_EDAC_MODULE_REVISION "\n");
-
- ppc4xx_edac_opstate_init();
-
- return platform_driver_register(&ppc4xx_edac_driver);
-}
-
-/**
- * ppc4xx_edac_exit - driver/module removal entry point
- *
- * This routine is the driver/module removal entry point. It
- * unregisters the driver as an OpenFirmware device tree platform
- * driver.
- */
-static void __exit
-ppc4xx_edac_exit(void)
-{
- platform_driver_unregister(&ppc4xx_edac_driver);
-}
-
-module_init(ppc4xx_edac_init);
-module_exit(ppc4xx_edac_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Grant Erickson <gerickson@nuovations.com>");
-MODULE_DESCRIPTION("EDAC MC Driver for the PPC4xx IBM DDR2 Memory Controller");
-module_param(edac_op_state, int, 0444);
-MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting State: "
- "0=" EDAC_OPSTATE_POLL_STR ", 2=" EDAC_OPSTATE_INT_STR);
diff --git a/drivers/edac/ppc4xx_edac.h b/drivers/edac/ppc4xx_edac.h
deleted file mode 100644
index b38459aa58ee..000000000000
--- a/drivers/edac/ppc4xx_edac.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2008 Nuovation System Designs, LLC
- * Grant Erickson <gerickson@nuovations.com>
- *
- * This file defines processor mnemonics for accessing and managing
- * the IBM DDR1/DDR2 ECC controller found in the 405EX[r], 440SP,
- * 440SPe, 460EX, 460GT and 460SX.
- */
-
-#ifndef __PPC4XX_EDAC_H
-#define __PPC4XX_EDAC_H
-
-#include <linux/types.h>
-
-/*
- * Macro for generating register field mnemonics
- */
-#define PPC_REG_BITS 32
-#define PPC_REG_VAL(bit, val) ((val) << ((PPC_REG_BITS - 1) - (bit)))
-#define PPC_REG_DECODE(bit, val) ((val) >> ((PPC_REG_BITS - 1) - (bit)))
-
-/*
- * IBM 4xx DDR1/DDR2 SDRAM memory controller registers (at least those
- * relevant to ECC)
- */
-#define SDRAM_BESR 0x00 /* Error status (read/clear) */
-#define SDRAM_BESRT 0x01 /* Error statuss (test/set) */
-#define SDRAM_BEARL 0x02 /* Error address low */
-#define SDRAM_BEARH 0x03 /* Error address high */
-#define SDRAM_WMIRQ 0x06 /* Write master (read/clear) */
-#define SDRAM_WMIRQT 0x07 /* Write master (test/set) */
-#define SDRAM_MCOPT1 0x20 /* Controller options 1 */
-#define SDRAM_MBXCF_BASE 0x40 /* Bank n configuration base */
-#define SDRAM_MBXCF(n) (SDRAM_MBXCF_BASE + (4 * (n)))
-#define SDRAM_MB0CF SDRAM_MBXCF(0)
-#define SDRAM_MB1CF SDRAM_MBXCF(1)
-#define SDRAM_MB2CF SDRAM_MBXCF(2)
-#define SDRAM_MB3CF SDRAM_MBXCF(3)
-#define SDRAM_ECCCR 0x98 /* ECC error status */
-#define SDRAM_ECCES SDRAM_ECCCR
-
-/*
- * PLB Master IDs
- */
-#define SDRAM_PLB_M0ID_FIRST 0
-#define SDRAM_PLB_M0ID_ICU SDRAM_PLB_M0ID_FIRST
-#define SDRAM_PLB_M0ID_PCIE0 1
-#define SDRAM_PLB_M0ID_PCIE1 2
-#define SDRAM_PLB_M0ID_DMA 3
-#define SDRAM_PLB_M0ID_DCU 4
-#define SDRAM_PLB_M0ID_OPB 5
-#define SDRAM_PLB_M0ID_MAL 6
-#define SDRAM_PLB_M0ID_SEC 7
-#define SDRAM_PLB_M0ID_AHB 8
-#define SDRAM_PLB_M0ID_LAST SDRAM_PLB_M0ID_AHB
-#define SDRAM_PLB_M0ID_COUNT (SDRAM_PLB_M0ID_LAST - \
- SDRAM_PLB_M0ID_FIRST + 1)
-
-/*
- * Memory Controller Bus Error Status Register
- */
-#define SDRAM_BESR_MASK PPC_REG_VAL(7, 0xFF)
-#define SDRAM_BESR_M0ID_MASK PPC_REG_VAL(3, 0xF)
-#define SDRAM_BESR_M0ID_DECODE(n) PPC_REG_DECODE(3, n)
-#define SDRAM_BESR_M0ID_ICU PPC_REG_VAL(3, SDRAM_PLB_M0ID_ICU)
-#define SDRAM_BESR_M0ID_PCIE0 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE0)
-#define SDRAM_BESR_M0ID_PCIE1 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE1)
-#define SDRAM_BESR_M0ID_DMA PPC_REG_VAL(3, SDRAM_PLB_M0ID_DMA)
-#define SDRAM_BESR_M0ID_DCU PPC_REG_VAL(3, SDRAM_PLB_M0ID_DCU)
-#define SDRAM_BESR_M0ID_OPB PPC_REG_VAL(3, SDRAM_PLB_M0ID_OPB)
-#define SDRAM_BESR_M0ID_MAL PPC_REG_VAL(3, SDRAM_PLB_M0ID_MAL)
-#define SDRAM_BESR_M0ID_SEC PPC_REG_VAL(3, SDRAM_PLB_M0ID_SEC)
-#define SDRAM_BESR_M0ID_AHB PPC_REG_VAL(3, SDRAM_PLB_M0ID_AHB)
-#define SDRAM_BESR_M0ET_MASK PPC_REG_VAL(6, 0x7)
-#define SDRAM_BESR_M0ET_NONE PPC_REG_VAL(6, 0)
-#define SDRAM_BESR_M0ET_ECC PPC_REG_VAL(6, 1)
-#define SDRAM_BESR_M0RW_MASK PPC_REG_VAL(7, 1)
-#define SDRAM_BESR_M0RW_WRITE PPC_REG_VAL(7, 0)
-#define SDRAM_BESR_M0RW_READ PPC_REG_VAL(7, 1)
-
-/*
- * Memory Controller PLB Write Master Interrupt Register
- */
-#define SDRAM_WMIRQ_MASK PPC_REG_VAL(8, 0x1FF)
-#define SDRAM_WMIRQ_ENCODE(id) PPC_REG_VAL((id % \
- SDRAM_PLB_M0ID_COUNT), 1)
-#define SDRAM_WMIRQ_ICU PPC_REG_VAL(SDRAM_PLB_M0ID_ICU, 1)
-#define SDRAM_WMIRQ_PCIE0 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE0, 1)
-#define SDRAM_WMIRQ_PCIE1 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE1, 1)
-#define SDRAM_WMIRQ_DMA PPC_REG_VAL(SDRAM_PLB_M0ID_DMA, 1)
-#define SDRAM_WMIRQ_DCU PPC_REG_VAL(SDRAM_PLB_M0ID_DCU, 1)
-#define SDRAM_WMIRQ_OPB PPC_REG_VAL(SDRAM_PLB_M0ID_OPB, 1)
-#define SDRAM_WMIRQ_MAL PPC_REG_VAL(SDRAM_PLB_M0ID_MAL, 1)
-#define SDRAM_WMIRQ_SEC PPC_REG_VAL(SDRAM_PLB_M0ID_SEC, 1)
-#define SDRAM_WMIRQ_AHB PPC_REG_VAL(SDRAM_PLB_M0ID_AHB, 1)
-
-/*
- * Memory Controller Options 1 Register
- */
-#define SDRAM_MCOPT1_MCHK_MASK PPC_REG_VAL(3, 0x3) /* ECC mask */
-#define SDRAM_MCOPT1_MCHK_NON PPC_REG_VAL(3, 0x0) /* No ECC gen */
-#define SDRAM_MCOPT1_MCHK_GEN PPC_REG_VAL(3, 0x2) /* ECC gen */
-#define SDRAM_MCOPT1_MCHK_CHK PPC_REG_VAL(3, 0x1) /* ECC gen and chk */
-#define SDRAM_MCOPT1_MCHK_CHK_REP PPC_REG_VAL(3, 0x3) /* ECC gen/chk/rpt */
-#define SDRAM_MCOPT1_MCHK_DECODE(n) ((((u32)(n)) >> 28) & 0x3)
-#define SDRAM_MCOPT1_RDEN_MASK PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM mask */
-#define SDRAM_MCOPT1_RDEN PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM enbl */
-#define SDRAM_MCOPT1_WDTH_MASK PPC_REG_VAL(7, 0x1) /* Width mask */
-#define SDRAM_MCOPT1_WDTH_32 PPC_REG_VAL(7, 0x0) /* 32 bits */
-#define SDRAM_MCOPT1_WDTH_16 PPC_REG_VAL(7, 0x1) /* 16 bits */
-#define SDRAM_MCOPT1_DDR_TYPE_MASK PPC_REG_VAL(11, 0x1) /* DDR type mask */
-#define SDRAM_MCOPT1_DDR1_TYPE PPC_REG_VAL(11, 0x0) /* DDR1 type */
-#define SDRAM_MCOPT1_DDR2_TYPE PPC_REG_VAL(11, 0x1) /* DDR2 type */
-
-/*
- * Memory Bank 0 - n Configuration Register
- */
-#define SDRAM_MBCF_BA_MASK PPC_REG_VAL(12, 0x1FFF)
-#define SDRAM_MBCF_SZ_MASK PPC_REG_VAL(19, 0xF)
-#define SDRAM_MBCF_SZ_DECODE(mbxcf) PPC_REG_DECODE(19, mbxcf)
-#define SDRAM_MBCF_SZ_4MB PPC_REG_VAL(19, 0x0)
-#define SDRAM_MBCF_SZ_8MB PPC_REG_VAL(19, 0x1)
-#define SDRAM_MBCF_SZ_16MB PPC_REG_VAL(19, 0x2)
-#define SDRAM_MBCF_SZ_32MB PPC_REG_VAL(19, 0x3)
-#define SDRAM_MBCF_SZ_64MB PPC_REG_VAL(19, 0x4)
-#define SDRAM_MBCF_SZ_128MB PPC_REG_VAL(19, 0x5)
-#define SDRAM_MBCF_SZ_256MB PPC_REG_VAL(19, 0x6)
-#define SDRAM_MBCF_SZ_512MB PPC_REG_VAL(19, 0x7)
-#define SDRAM_MBCF_SZ_1GB PPC_REG_VAL(19, 0x8)
-#define SDRAM_MBCF_SZ_2GB PPC_REG_VAL(19, 0x9)
-#define SDRAM_MBCF_SZ_4GB PPC_REG_VAL(19, 0xA)
-#define SDRAM_MBCF_SZ_8GB PPC_REG_VAL(19, 0xB)
-#define SDRAM_MBCF_AM_MASK PPC_REG_VAL(23, 0xF)
-#define SDRAM_MBCF_AM_MODE0 PPC_REG_VAL(23, 0x0)
-#define SDRAM_MBCF_AM_MODE1 PPC_REG_VAL(23, 0x1)
-#define SDRAM_MBCF_AM_MODE2 PPC_REG_VAL(23, 0x2)
-#define SDRAM_MBCF_AM_MODE3 PPC_REG_VAL(23, 0x3)
-#define SDRAM_MBCF_AM_MODE4 PPC_REG_VAL(23, 0x4)
-#define SDRAM_MBCF_AM_MODE5 PPC_REG_VAL(23, 0x5)
-#define SDRAM_MBCF_AM_MODE6 PPC_REG_VAL(23, 0x6)
-#define SDRAM_MBCF_AM_MODE7 PPC_REG_VAL(23, 0x7)
-#define SDRAM_MBCF_AM_MODE8 PPC_REG_VAL(23, 0x8)
-#define SDRAM_MBCF_AM_MODE9 PPC_REG_VAL(23, 0x9)
-#define SDRAM_MBCF_BE_MASK PPC_REG_VAL(31, 0x1)
-#define SDRAM_MBCF_BE_DISABLE PPC_REG_VAL(31, 0x0)
-#define SDRAM_MBCF_BE_ENABLE PPC_REG_VAL(31, 0x1)
-
-/*
- * ECC Error Status
- */
-#define SDRAM_ECCES_MASK PPC_REG_VAL(21, 0x3FFFFF)
-#define SDRAM_ECCES_BNCE_MASK PPC_REG_VAL(15, 0xFFFF)
-#define SDRAM_ECCES_BNCE_ENCODE(lane) PPC_REG_VAL(((lane) & 0xF), 1)
-#define SDRAM_ECCES_CKBER_MASK PPC_REG_VAL(17, 0x3)
-#define SDRAM_ECCES_CKBER_NONE PPC_REG_VAL(17, 0)
-#define SDRAM_ECCES_CKBER_16_ECC_0_3 PPC_REG_VAL(17, 2)
-#define SDRAM_ECCES_CKBER_32_ECC_0_3 PPC_REG_VAL(17, 1)
-#define SDRAM_ECCES_CKBER_32_ECC_4_8 PPC_REG_VAL(17, 2)
-#define SDRAM_ECCES_CKBER_32_ECC_0_8 PPC_REG_VAL(17, 3)
-#define SDRAM_ECCES_CE PPC_REG_VAL(18, 1)
-#define SDRAM_ECCES_UE PPC_REG_VAL(19, 1)
-#define SDRAM_ECCES_BKNER_MASK PPC_REG_VAL(21, 0x3)
-#define SDRAM_ECCES_BK0ER PPC_REG_VAL(20, 1)
-#define SDRAM_ECCES_BK1ER PPC_REG_VAL(21, 1)
-
-#endif /* __PPC4XX_EDAC_H */
diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
index c45519f59dc1..f3da9385ca0d 100644
--- a/drivers/edac/qcom_edac.c
+++ b/drivers/edac/qcom_edac.c
@@ -21,30 +21,9 @@
#define TRP_SYN_REG_CNT 6
#define DRP_SYN_REG_CNT 8
-#define LLCC_COMMON_STATUS0 0x0003000c
#define LLCC_LB_CNT_MASK GENMASK(31, 28)
#define LLCC_LB_CNT_SHIFT 28
-/* Single & double bit syndrome register offsets */
-#define TRP_ECC_SB_ERR_SYN0 0x0002304c
-#define TRP_ECC_DB_ERR_SYN0 0x00020370
-#define DRP_ECC_SB_ERR_SYN0 0x0004204c
-#define DRP_ECC_DB_ERR_SYN0 0x00042070
-
-/* Error register offsets */
-#define TRP_ECC_ERROR_STATUS1 0x00020348
-#define TRP_ECC_ERROR_STATUS0 0x00020344
-#define DRP_ECC_ERROR_STATUS1 0x00042048
-#define DRP_ECC_ERROR_STATUS0 0x00042044
-
-/* TRP, DRP interrupt register offsets */
-#define DRP_INTERRUPT_STATUS 0x00041000
-#define TRP_INTERRUPT_0_STATUS 0x00020480
-#define DRP_INTERRUPT_CLEAR 0x00041008
-#define DRP_ECC_ERROR_CNTR_CLEAR 0x00040004
-#define TRP_INTERRUPT_0_CLEAR 0x00020484
-#define TRP_ECC_ERROR_CNTR_CLEAR 0x00020440
-
/* Mask and shift macros */
#define ECC_DB_ERR_COUNT_MASK GENMASK(4, 0)
#define ECC_DB_ERR_WAYS_MASK GENMASK(31, 16)
@@ -60,15 +39,6 @@
#define DRP_TRP_INT_CLEAR GENMASK(1, 0)
#define DRP_TRP_CNT_CLEAR GENMASK(1, 0)
-/* Config registers offsets*/
-#define DRP_ECC_ERROR_CFG 0x00040000
-
-/* Tag RAM, Data RAM interrupt register offsets */
-#define CMN_INTERRUPT_0_ENABLE 0x0003001c
-#define CMN_INTERRUPT_2_ENABLE 0x0003003c
-#define TRP_INTERRUPT_0_ENABLE 0x00020488
-#define DRP_INTERRUPT_ENABLE 0x0004100c
-
#define SB_ERROR_THRESHOLD 0x1
#define SB_ERROR_THRESHOLD_SHIFT 24
#define SB_DB_TRP_INTERRUPT_ENABLE 0x3
@@ -76,6 +46,8 @@
#define DRP0_INTERRUPT_ENABLE BIT(6)
#define SB_DB_DRP_INTERRUPT_ENABLE 0x3
+#define ECC_POLL_MSEC 5000
+
enum {
LLCC_DRAM_CE = 0,
LLCC_DRAM_UE,
@@ -86,9 +58,6 @@ enum {
static const struct llcc_edac_reg_data edac_reg_data[] = {
[LLCC_DRAM_CE] = {
.name = "DRAM Single-bit",
- .synd_reg = DRP_ECC_SB_ERR_SYN0,
- .count_status_reg = DRP_ECC_ERROR_STATUS1,
- .ways_status_reg = DRP_ECC_ERROR_STATUS0,
.reg_cnt = DRP_SYN_REG_CNT,
.count_mask = ECC_SB_ERR_COUNT_MASK,
.ways_mask = ECC_SB_ERR_WAYS_MASK,
@@ -96,9 +65,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
[LLCC_DRAM_UE] = {
.name = "DRAM Double-bit",
- .synd_reg = DRP_ECC_DB_ERR_SYN0,
- .count_status_reg = DRP_ECC_ERROR_STATUS1,
- .ways_status_reg = DRP_ECC_ERROR_STATUS0,
.reg_cnt = DRP_SYN_REG_CNT,
.count_mask = ECC_DB_ERR_COUNT_MASK,
.ways_mask = ECC_DB_ERR_WAYS_MASK,
@@ -106,9 +72,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
[LLCC_TRAM_CE] = {
.name = "TRAM Single-bit",
- .synd_reg = TRP_ECC_SB_ERR_SYN0,
- .count_status_reg = TRP_ECC_ERROR_STATUS1,
- .ways_status_reg = TRP_ECC_ERROR_STATUS0,
.reg_cnt = TRP_SYN_REG_CNT,
.count_mask = ECC_SB_ERR_COUNT_MASK,
.ways_mask = ECC_SB_ERR_WAYS_MASK,
@@ -116,9 +79,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
[LLCC_TRAM_UE] = {
.name = "TRAM Double-bit",
- .synd_reg = TRP_ECC_DB_ERR_SYN0,
- .count_status_reg = TRP_ECC_ERROR_STATUS1,
- .ways_status_reg = TRP_ECC_ERROR_STATUS0,
.reg_cnt = TRP_SYN_REG_CNT,
.count_mask = ECC_DB_ERR_COUNT_MASK,
.ways_mask = ECC_DB_ERR_WAYS_MASK,
@@ -126,7 +86,7 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
};
-static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
+static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_bcast_regmap)
{
u32 sb_err_threshold;
int ret;
@@ -135,31 +95,31 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
* Configure interrupt enable registers such that Tag, Data RAM related
* interrupts are propagated to interrupt controller for servicing
*/
- ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+ ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable,
TRP0_INTERRUPT_ENABLE,
TRP0_INTERRUPT_ENABLE);
if (ret)
return ret;
- ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
+ ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->trp_interrupt_0_enable,
SB_DB_TRP_INTERRUPT_ENABLE,
SB_DB_TRP_INTERRUPT_ENABLE);
if (ret)
return ret;
sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
- ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
+ ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_ecc_error_cfg,
sb_err_threshold);
if (ret)
return ret;
- ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+ ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable,
DRP0_INTERRUPT_ENABLE,
DRP0_INTERRUPT_ENABLE);
if (ret)
return ret;
- ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
+ ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_interrupt_enable,
SB_DB_DRP_INTERRUPT_ENABLE);
return ret;
}
@@ -168,29 +128,33 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
static int
qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
{
- int ret = 0;
+ int ret;
switch (err_type) {
case LLCC_DRAM_CE:
case LLCC_DRAM_UE:
- ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->drp_interrupt_clear,
DRP_TRP_INT_CLEAR);
if (ret)
return ret;
- ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->drp_ecc_error_cntr_clear,
DRP_TRP_CNT_CLEAR);
if (ret)
return ret;
break;
case LLCC_TRAM_CE:
case LLCC_TRAM_UE:
- ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->trp_interrupt_0_clear,
DRP_TRP_INT_CLEAR);
if (ret)
return ret;
- ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->trp_ecc_error_cntr_clear,
DRP_TRP_CNT_CLEAR);
if (ret)
return ret;
@@ -203,17 +167,55 @@ qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
return ret;
}
+struct qcom_llcc_syn_regs {
+ u32 synd_reg;
+ u32 count_status_reg;
+ u32 ways_status_reg;
+};
+
+static void get_reg_offsets(struct llcc_drv_data *drv, int err_type,
+ struct qcom_llcc_syn_regs *syn_regs)
+{
+ const struct llcc_edac_reg_offset *edac_reg_offset = drv->edac_reg_offset;
+
+ switch (err_type) {
+ case LLCC_DRAM_CE:
+ syn_regs->synd_reg = edac_reg_offset->drp_ecc_sb_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
+ break;
+ case LLCC_DRAM_UE:
+ syn_regs->synd_reg = edac_reg_offset->drp_ecc_db_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
+ break;
+ case LLCC_TRAM_CE:
+ syn_regs->synd_reg = edac_reg_offset->trp_ecc_sb_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
+ break;
+ case LLCC_TRAM_UE:
+ syn_regs->synd_reg = edac_reg_offset->trp_ecc_db_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
+ break;
+ }
+}
+
/* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
static int
dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
{
struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
+ struct qcom_llcc_syn_regs regs = { };
int err_cnt, err_ways, ret, i;
u32 synd_reg, synd_val;
+ get_reg_offsets(drv, err_type, &regs);
+
for (i = 0; i < reg_data.reg_cnt; i++) {
- synd_reg = reg_data.synd_reg + (i * 4);
- ret = regmap_read(drv->regmap, drv->offsets[bank] + synd_reg,
+ synd_reg = regs.synd_reg + (i * 4);
+ ret = regmap_read(drv->regmaps[bank], synd_reg,
&synd_val);
if (ret)
goto clear;
@@ -222,8 +224,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
reg_data.name, i, synd_val);
}
- ret = regmap_read(drv->regmap,
- drv->offsets[bank] + reg_data.count_status_reg,
+ ret = regmap_read(drv->regmaps[bank], regs.count_status_reg,
&err_cnt);
if (ret)
goto clear;
@@ -233,8 +234,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
reg_data.name, err_cnt);
- ret = regmap_read(drv->regmap,
- drv->offsets[bank] + reg_data.ways_status_reg,
+ ret = regmap_read(drv->regmaps[bank], regs.ways_status_reg,
&err_ways);
if (ret)
goto clear;
@@ -285,8 +285,7 @@ dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
return ret;
}
-static irqreturn_t
-llcc_ecc_irq_handler(int irq, void *edev_ctl)
+static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
{
struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data;
@@ -296,8 +295,7 @@ llcc_ecc_irq_handler(int irq, void *edev_ctl)
/* Iterate over the banks and look for Tag RAM or Data RAM errors */
for (i = 0; i < drv->num_banks; i++) {
- ret = regmap_read(drv->regmap,
- drv->offsets[i] + DRP_INTERRUPT_STATUS,
+ ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->drp_interrupt_status,
&drp_error);
if (!ret && (drp_error & SB_ECC_ERROR)) {
@@ -312,8 +310,7 @@ llcc_ecc_irq_handler(int irq, void *edev_ctl)
if (!ret)
irq_rc = IRQ_HANDLED;
- ret = regmap_read(drv->regmap,
- drv->offsets[i] + TRP_INTERRUPT_0_STATUS,
+ ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->trp_interrupt_0_status,
&trp_error);
if (!ret && (trp_error & SB_ECC_ERROR)) {
@@ -332,6 +329,11 @@ llcc_ecc_irq_handler(int irq, void *edev_ctl)
return irq_rc;
}
+static void llcc_ecc_check(struct edac_device_ctl_info *edev_ctl)
+{
+ llcc_ecc_irq_handler(0, edev_ctl);
+}
+
static int qcom_llcc_edac_probe(struct platform_device *pdev)
{
struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
@@ -340,14 +342,15 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
int ecc_irq;
int rc;
- rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
- if (rc)
- return rc;
+ if (!llcc_driv_data->ecc_irq_configured) {
+ rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap);
+ if (rc)
+ return rc;
+ }
/* Allocate edac control info */
edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
llcc_driv_data->num_banks, 1,
- NULL, 0,
edac_device_alloc_index());
if (!edev_ctl)
@@ -359,49 +362,56 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
edev_ctl->ctl_name = "llcc";
edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
- rc = edac_device_add_device(edev_ctl);
- if (rc)
- goto out_mem;
-
- platform_set_drvdata(pdev, edev_ctl);
-
- /* Request for ecc irq */
+ /* Check if LLCC driver has passed ECC IRQ */
ecc_irq = llcc_driv_data->ecc_irq;
- if (ecc_irq < 0) {
- rc = -ENODEV;
- goto out_dev;
- }
- rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
+ if (ecc_irq > 0) {
+ /* Use interrupt mode if IRQ is available */
+ rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
- if (rc)
- goto out_dev;
+ if (!rc) {
+ edac_op_state = EDAC_OPSTATE_INT;
+ goto irq_done;
+ }
+ }
- return rc;
+ /* Fall back to polling mode otherwise */
+ edev_ctl->poll_msec = ECC_POLL_MSEC;
+ edev_ctl->edac_check = llcc_ecc_check;
+ edac_op_state = EDAC_OPSTATE_POLL;
-out_dev:
- edac_device_del_device(edev_ctl->dev);
-out_mem:
- edac_device_free_ctl_info(edev_ctl);
+irq_done:
+ rc = edac_device_add_device(edev_ctl);
+ if (rc) {
+ edac_device_free_ctl_info(edev_ctl);
+ return rc;
+ }
+
+ platform_set_drvdata(pdev, edev_ctl);
return rc;
}
-static int qcom_llcc_edac_remove(struct platform_device *pdev)
+static void qcom_llcc_edac_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev);
edac_device_del_device(edev_ctl->dev);
edac_device_free_ctl_info(edev_ctl);
-
- return 0;
}
+static const struct platform_device_id qcom_llcc_edac_id_table[] = {
+ { .name = "qcom_llcc_edac" },
+ {}
+};
+MODULE_DEVICE_TABLE(platform, qcom_llcc_edac_id_table);
+
static struct platform_driver qcom_llcc_edac_driver = {
.probe = qcom_llcc_edac_probe,
.remove = qcom_llcc_edac_remove,
.driver = {
.name = "qcom_llcc_edac",
},
+ .id_table = qcom_llcc_edac_id_table,
};
module_platform_driver(qcom_llcc_edac_driver);
diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c
index d0aef83dca2a..61e979d5437a 100644
--- a/drivers/edac/r82600_edac.c
+++ b/drivers/edac/r82600_edac.c
@@ -415,8 +415,7 @@ module_init(r82600_init);
module_exit(r82600_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. "
- "on behalf of EADS Astrium");
+MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. on behalf of EADS Astrium");
MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers");
module_param(disable_hardware_scrub, bool, 0644);
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 0c779a0326b6..d5f12219598a 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -29,6 +29,8 @@
/* Static vars */
static LIST_HEAD(sbridge_edac_list);
+static char sb_msg[256];
+static char sb_msg_full[512];
/*
* Alter this version for the module when modifications are made
@@ -109,8 +111,8 @@ static const u32 knl_interleave_list[] = {
0x104, 0x10c, 0x114, 0x11c, /* 20-23 */
};
#define MAX_INTERLEAVE \
- (max_t(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \
- max_t(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \
+ (MAX_T(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \
+ MAX_T(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \
ARRAY_SIZE(knl_interleave_list))))
struct interleave_pkg {
@@ -439,7 +441,7 @@ static const struct pci_id_descr pci_dev_descr_sbridge[] = {
static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, ARRAY_SIZE(pci_dev_descr_sbridge), 1, SANDY_BRIDGE),
- {0,} /* 0 terminated list. */
+ { NULL, }
};
/* This changes depending if 1HA or 2HA:
@@ -505,7 +507,7 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = {
static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, 12, 2, IVY_BRIDGE),
- {0,} /* 0 terminated list. */
+ { NULL, }
};
/* Haswell support */
@@ -576,7 +578,7 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = {
static const struct pci_id_table pci_dev_descr_haswell_table[] = {
PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, 13, 2, HASWELL),
- {0,} /* 0 terminated list. */
+ { NULL, }
};
/* Knight's Landing Support */
@@ -620,7 +622,7 @@ static const struct pci_id_descr pci_dev_descr_knl[] = {
static const struct pci_id_table pci_dev_descr_knl_table[] = {
PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, ARRAY_SIZE(pci_dev_descr_knl), 1, KNIGHTS_LANDING),
- {0,}
+ { NULL, }
};
/*
@@ -686,7 +688,7 @@ static const struct pci_id_descr pci_dev_descr_broadwell[] = {
static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, 10, 2, BROADWELL),
- {0,} /* 0 terminated list. */
+ { NULL, }
};
@@ -3079,7 +3081,6 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
enum hw_event_mc_err_type tp_event;
- char *optype, msg[256], msg_full[512];
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -3095,10 +3096,10 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
* aligned address reported by patrol scrubber.
*/
u32 lsb = GET_BITFIELD(m->misc, 0, 5);
+ char *optype, *area_type = "DRAM";
long channel_mask, first_channel;
u8 rank = 0xff, socket, ha;
int rc, dimm;
- char *area_type = "DRAM";
if (pvt->info.type != SANDY_BRIDGE)
recoverable = true;
@@ -3168,32 +3169,32 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
channel = knl_channel_remap(m->bank == 16, channel);
channel_mask = 1 << channel;
- snprintf(msg, sizeof(msg),
- "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
- overflow ? " OVERFLOW" : "",
- (uncorrected_error && recoverable)
- ? " recoverable" : " ",
- mscod, errcode, channel, A + channel);
+ snprintf(sb_msg, sizeof(sb_msg),
+ "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable)
+ ? " recoverable" : " ",
+ mscod, errcode, channel, A + channel);
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
channel, 0, -1,
- optype, msg);
+ optype, sb_msg);
}
return;
} else if (lsb < 12) {
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
&channel_mask, &rank,
- &area_type, msg);
+ &area_type, sb_msg);
} else {
rc = get_memory_error_data_from_mce(mci, m, &socket, &ha,
- &channel_mask, msg);
+ &channel_mask, sb_msg);
}
if (rc < 0)
goto err_parsing;
new_mci = get_mci_for_node_id(socket, ha);
if (!new_mci) {
- strcpy(msg, "Error: socket got corrupted!");
+ strscpy(sb_msg, "Error: socket got corrupted!");
goto err_parsing;
}
mci = new_mci;
@@ -3218,7 +3219,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
*/
if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg)
channel = first_channel;
- snprintf(msg_full, sizeof(msg_full),
+ snprintf(sb_msg_full, sizeof(sb_msg_full),
"%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
@@ -3226,9 +3227,9 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
mscod, errcode,
socket, ha,
channel_mask,
- rank, msg);
+ rank, sb_msg);
- edac_dbg(0, "%s\n", msg_full);
+ edac_dbg(0, "%s\n", sb_msg_full);
/* FIXME: need support for channel mask */
@@ -3239,12 +3240,12 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
channel, dimm, -1,
- optype, msg_full);
+ optype, sb_msg_full);
return;
err_parsing:
edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
-1, -1, -1,
- msg, "");
+ sb_msg, "");
}
@@ -3546,13 +3547,13 @@ fail0:
}
static const struct x86_cpu_id sbridge_cpuids[] = {
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &pci_dev_descr_sbridge_table),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &pci_dev_descr_ibridge_table),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &pci_dev_descr_haswell_table),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &pci_dev_descr_broadwell_table),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &pci_dev_descr_broadwell_table),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &pci_dev_descr_knl_table),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &pci_dev_descr_knl_table),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &pci_dev_descr_sbridge_table),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &pci_dev_descr_ibridge_table),
+ X86_MATCH_VFM(INTEL_HASWELL_X, &pci_dev_descr_haswell_table),
+ X86_MATCH_VFM(INTEL_BROADWELL_X, &pci_dev_descr_broadwell_table),
+ X86_MATCH_VFM(INTEL_BROADWELL_D, &pci_dev_descr_broadwell_table),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &pci_dev_descr_knl_table),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &pci_dev_descr_knl_table),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c
new file mode 100644
index 000000000000..f9d02af2fc3a
--- /dev/null
+++ b/drivers/edac/scrub.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The generic EDAC scrub driver controls the memory scrubbers in the
+ * system. The common sysfs scrub interface abstracts the control of
+ * various arbitrary scrubbing functionalities into a unified set of
+ * functions.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ */
+
+#include <linux/edac.h>
+
+enum edac_scrub_attributes {
+ SCRUB_ADDRESS,
+ SCRUB_SIZE,
+ SCRUB_ENABLE_BACKGROUND,
+ SCRUB_MIN_CYCLE_DURATION,
+ SCRUB_MAX_CYCLE_DURATION,
+ SCRUB_CUR_CYCLE_DURATION,
+ SCRUB_MAX_ATTRS
+};
+
+struct edac_scrub_dev_attr {
+ struct device_attribute dev_attr;
+ u8 instance;
+};
+
+struct edac_scrub_context {
+ char name[EDAC_FEAT_NAME_LEN];
+ struct edac_scrub_dev_attr scrub_dev_attr[SCRUB_MAX_ATTRS];
+ struct attribute *scrub_attrs[SCRUB_MAX_ATTRS + 1];
+ struct attribute_group group;
+};
+
+#define TO_SCRUB_DEV_ATTR(_dev_attr) \
+ container_of(_dev_attr, struct edac_scrub_dev_attr, dev_attr)
+
+#define EDAC_SCRUB_ATTR_SHOW(attrib, cb, type, format) \
+static ssize_t attrib##_show(struct device *ras_feat_dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, &data); \
+ if (ret) \
+ return ret; \
+ \
+ return sysfs_emit(buf, format, data); \
+}
+
+EDAC_SCRUB_ATTR_SHOW(addr, read_addr, u64, "0x%llx\n")
+EDAC_SCRUB_ATTR_SHOW(size, read_size, u64, "0x%llx\n")
+EDAC_SCRUB_ATTR_SHOW(enable_background, get_enabled_bg, bool, "%u\n")
+EDAC_SCRUB_ATTR_SHOW(min_cycle_duration, get_min_cycle, u32, "%u\n")
+EDAC_SCRUB_ATTR_SHOW(max_cycle_duration, get_max_cycle, u32, "%u\n")
+EDAC_SCRUB_ATTR_SHOW(current_cycle_duration, get_cycle_duration, u32, "%u\n")
+
+#define EDAC_SCRUB_ATTR_STORE(attrib, cb, type, conv_func) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = conv_func(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+EDAC_SCRUB_ATTR_STORE(addr, write_addr, u64, kstrtou64)
+EDAC_SCRUB_ATTR_STORE(size, write_size, u64, kstrtou64)
+EDAC_SCRUB_ATTR_STORE(enable_background, set_enabled_bg, unsigned long, kstrtoul)
+EDAC_SCRUB_ATTR_STORE(current_cycle_duration, set_cycle_duration, unsigned long, kstrtoul)
+
+static umode_t scrub_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id)
+{
+ struct device *ras_feat_dev = kobj_to_dev(kobj);
+ struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr);
+ u8 inst = TO_SCRUB_DEV_ATTR(dev_attr)->instance;
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops;
+
+ switch (attr_id) {
+ case SCRUB_ADDRESS:
+ if (ops->read_addr) {
+ if (ops->write_addr)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case SCRUB_SIZE:
+ if (ops->read_size) {
+ if (ops->write_size)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case SCRUB_ENABLE_BACKGROUND:
+ if (ops->get_enabled_bg) {
+ if (ops->set_enabled_bg)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case SCRUB_MIN_CYCLE_DURATION:
+ if (ops->get_min_cycle)
+ return a->mode;
+ break;
+ case SCRUB_MAX_CYCLE_DURATION:
+ if (ops->get_max_cycle)
+ return a->mode;
+ break;
+ case SCRUB_CUR_CYCLE_DURATION:
+ if (ops->get_cycle_duration) {
+ if (ops->set_cycle_duration)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define EDAC_SCRUB_ATTR_RO(_name, _instance) \
+ ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RO(_name), \
+ .instance = _instance })
+
+#define EDAC_SCRUB_ATTR_WO(_name, _instance) \
+ ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_WO(_name), \
+ .instance = _instance })
+
+#define EDAC_SCRUB_ATTR_RW(_name, _instance) \
+ ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RW(_name), \
+ .instance = _instance })
+
+static int scrub_create_desc(struct device *scrub_dev,
+ const struct attribute_group **attr_groups, u8 instance)
+{
+ struct edac_scrub_context *scrub_ctx;
+ struct attribute_group *group;
+ int i;
+ struct edac_scrub_dev_attr dev_attr[] = {
+ [SCRUB_ADDRESS] = EDAC_SCRUB_ATTR_RW(addr, instance),
+ [SCRUB_SIZE] = EDAC_SCRUB_ATTR_RW(size, instance),
+ [SCRUB_ENABLE_BACKGROUND] = EDAC_SCRUB_ATTR_RW(enable_background, instance),
+ [SCRUB_MIN_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(min_cycle_duration, instance),
+ [SCRUB_MAX_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(max_cycle_duration, instance),
+ [SCRUB_CUR_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RW(current_cycle_duration, instance)
+ };
+
+ scrub_ctx = devm_kzalloc(scrub_dev, sizeof(*scrub_ctx), GFP_KERNEL);
+ if (!scrub_ctx)
+ return -ENOMEM;
+
+ group = &scrub_ctx->group;
+ for (i = 0; i < SCRUB_MAX_ATTRS; i++) {
+ memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i]));
+ sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr);
+ scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr;
+ }
+ sprintf(scrub_ctx->name, "%s%d", "scrub", instance);
+ group->name = scrub_ctx->name;
+ group->attrs = scrub_ctx->scrub_attrs;
+ group->is_visible = scrub_attr_visible;
+
+ attr_groups[0] = group;
+
+ return 0;
+}
+
+/**
+ * edac_scrub_get_desc - get EDAC scrub descriptors
+ * @scrub_dev: client device, with scrub support
+ * @attr_groups: pointer to attribute group container
+ * @instance: device's scrub instance number.
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ */
+int edac_scrub_get_desc(struct device *scrub_dev,
+ const struct attribute_group **attr_groups, u8 instance)
+{
+ if (!scrub_dev || !attr_groups)
+ return -EINVAL;
+
+ return scrub_create_desc(scrub_dev, attr_groups, instance);
+}
diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c
index b844e2626fd5..a2b193dc6604 100644
--- a/drivers/edac/sifive_edac.c
+++ b/drivers/edac/sifive_edac.c
@@ -52,8 +52,7 @@ static int ecc_register(struct platform_device *pdev)
platform_set_drvdata(pdev, p);
p->dci = edac_device_alloc_ctl_info(0, "sifive_ecc", 1, "sifive_ecc",
- 1, 1, NULL, 0,
- edac_device_alloc_index());
+ 1, 1, edac_device_alloc_index());
if (!p->dci)
return -ENOMEM;
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 9397abb42c49..aa6593ccda2d 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -33,6 +33,15 @@ static unsigned int nvdimm_count;
#define MASK26 0x3FFFFFF /* Mask for 2^26 */
#define MASK29 0x1FFFFFFF /* Mask for 2^29 */
+static struct res_config skx_cfg = {
+ .type = SKX,
+ .decs_did = 0x2016,
+ .busno_cfg_offset = 0xcc,
+ .ddr_imc_num = 2,
+ .ddr_chan_num = 3,
+ .ddr_dimm_num = 2,
+};
+
static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx)
{
struct skx_dev *d;
@@ -52,7 +61,7 @@ enum munittype {
struct munit {
u16 did;
- u16 devfn[SKX_NUM_IMC];
+ u16 devfn[2];
u8 busidx;
u8 per_socket;
enum munittype mtype;
@@ -89,11 +98,11 @@ static int get_all_munits(const struct munit *m)
if (!pdev)
break;
ndev++;
- if (m->per_socket == SKX_NUM_IMC) {
- for (i = 0; i < SKX_NUM_IMC; i++)
+ if (m->per_socket == skx_cfg.ddr_imc_num) {
+ for (i = 0; i < skx_cfg.ddr_imc_num; i++)
if (m->devfn[i] == pdev->devfn)
break;
- if (i == SKX_NUM_IMC)
+ if (i == skx_cfg.ddr_imc_num)
goto fail;
}
d = get_skx_dev(pdev->bus, m->busidx);
@@ -157,14 +166,8 @@ fail:
return -ENODEV;
}
-static struct res_config skx_cfg = {
- .type = SKX,
- .decs_did = 0x2016,
- .busno_cfg_offset = 0xcc,
-};
-
static const struct x86_cpu_id skx_cpuids[] = {
- X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x0, 0xf), &skx_cfg),
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_cfg),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
@@ -186,11 +189,11 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
/* Only the mcmtr on the first channel is effective */
pci_read_config_dword(imc->chan[0].cdev, 0x87c, &mcmtr);
- for (i = 0; i < SKX_NUM_CHANNELS; i++) {
+ for (i = 0; i < cfg->ddr_chan_num; i++) {
ndimms = 0;
pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
- for (j = 0; j < SKX_NUM_DIMMS; j++) {
+ for (j = 0; j < cfg->ddr_dimm_num; j++) {
dimm = edac_get_dimm(mci, i, j, 0);
pci_read_config_dword(imc->chan[i].cdev,
0x80 + 4 * j, &mtr);
@@ -510,7 +513,7 @@ rir_found:
}
static u8 skx_close_row[] = {
- 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
+ 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33, 34
};
static u8 skx_close_column[] = {
@@ -518,7 +521,7 @@ static u8 skx_close_column[] = {
};
static u8 skx_open_row[] = {
- 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
+ 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34
};
static u8 skx_open_column[] = {
@@ -587,54 +590,6 @@ static struct notifier_block skx_mce_dec = {
.priority = MCE_PRIO_EDAC,
};
-#ifdef CONFIG_EDAC_DEBUG
-/*
- * Debug feature.
- * Exercise the address decode logic by writing an address to
- * /sys/kernel/debug/edac/skx_test/addr.
- */
-static struct dentry *skx_test;
-
-static int debugfs_u64_set(void *data, u64 val)
-{
- struct mce m;
-
- pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
-
- memset(&m, 0, sizeof(m));
- /* ADDRV + MemRd + Unknown channel */
- m.status = MCI_STATUS_ADDRV + 0x90;
- /* One corrected error */
- m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
- m.addr = val;
- skx_mce_check_error(NULL, 0, &m);
-
- return 0;
-}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
-
-static void setup_skx_debug(void)
-{
- skx_test = edac_debugfs_create_dir("skx_test");
- if (!skx_test)
- return;
-
- if (!edac_debugfs_create_file("addr", 0200, skx_test,
- NULL, &fops_u64_wo)) {
- debugfs_remove(skx_test);
- skx_test = NULL;
- }
-}
-
-static void teardown_skx_debug(void)
-{
- debugfs_remove_recursive(skx_test);
-}
-#else
-static inline void setup_skx_debug(void) {}
-static inline void teardown_skx_debug(void) {}
-#endif /*CONFIG_EDAC_DEBUG*/
-
/*
* skx_init:
* make sure we are running on the correct cpu model
@@ -648,7 +603,7 @@ static int __init skx_init(void)
const struct munit *m;
const char *owner;
int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8};
- u8 mc = 0, src_id, node_id;
+ u8 mc = 0, src_id;
struct skx_dev *d;
edac_dbg(2, "\n");
@@ -668,6 +623,7 @@ static int __init skx_init(void)
return -ENODEV;
cfg = (struct res_config *)id->driver_data;
+ skx_set_res_cfg(cfg);
rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm);
if (rc)
@@ -698,16 +654,16 @@ static int __init skx_init(void)
rc = skx_get_src_id(d, 0xf0, &src_id);
if (rc < 0)
goto fail;
- rc = skx_get_node_id(d, &node_id);
- if (rc < 0)
- goto fail;
- edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
- for (i = 0; i < SKX_NUM_IMC; i++) {
+
+ edac_dbg(2, "src_id = %d\n", src_id);
+ for (i = 0; i < cfg->ddr_imc_num; i++) {
d->imc[i].mc = mc++;
d->imc[i].lmc = i;
d->imc[i].src_id = src_id;
- d->imc[i].node_id = node_id;
- rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
+ d->imc[i].num_channels = cfg->ddr_chan_num;
+ d->imc[i].num_dimms = cfg->ddr_dimm_num;
+ rc = skx_register_mci(&d->imc[i], &d->imc[i].chan[0].cdev->dev,
+ pci_name(d->imc[i].chan[0].cdev),
"Skylake Socket", EDAC_MOD_STR,
skx_get_dimm_config, cfg);
if (rc < 0)
@@ -728,7 +684,7 @@ static int __init skx_init(void)
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
- setup_skx_debug();
+ skx_setup_debug("skx_test");
mce_register_decode_chain(&skx_mce_dec);
@@ -742,7 +698,7 @@ static void __exit skx_exit(void)
{
edac_dbg(2, "\n");
mce_unregister_decode_chain(&skx_mce_dec);
- teardown_skx_debug();
+ skx_teardown_debug();
if (nvdimm_count)
skx_adxl_put();
skx_remove();
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index f0f8e98f6efb..3276afe43922 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -14,11 +14,14 @@
* Copyright (c) 2018, Intel Corporation.
*/
+#include <linux/topology.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/adxl.h>
+#include <linux/overflow.h>
#include <acpi/nfit.h>
#include <asm/mce.h>
+#include <asm/uv/uv.h>
#include "edac_module.h"
#include "skx_common.h"
@@ -47,8 +50,9 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list);
static bool skx_mem_cfg_2lm;
+static struct res_config *skx_res_cfg;
-int __init skx_adxl_get(void)
+int skx_adxl_get(void)
{
const char * const *names;
int i, j;
@@ -110,17 +114,56 @@ err:
return -ENODEV;
}
+EXPORT_SYMBOL_GPL(skx_adxl_get);
-void __exit skx_adxl_put(void)
+void skx_adxl_put(void)
{
+ adxl_component_count = 0;
kfree(adxl_values);
kfree(adxl_msg);
}
+EXPORT_SYMBOL_GPL(skx_adxl_put);
-static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
+void skx_init_mc_mapping(struct skx_dev *d)
{
+ /*
+ * By default, the BIOS presents all memory controllers within each
+ * socket to the EDAC driver. The physical indices are the same as
+ * the logical indices of the memory controllers enumerated by the
+ * EDAC driver.
+ */
+ for (int i = 0; i < d->num_imc; i++)
+ d->imc[i].mc_mapping = i;
+}
+EXPORT_SYMBOL_GPL(skx_init_mc_mapping);
+
+void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
+{
+ edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n",
+ pmc, lmc);
+
+ d->imc[lmc].mc_mapping = pmc;
+}
+EXPORT_SYMBOL_GPL(skx_set_mc_mapping);
+
+static int skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
+{
+ for (int lmc = 0; lmc < d->num_imc; lmc++) {
+ if (d->imc[lmc].mc_mapping == pmc) {
+ edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
+ pmc, lmc);
+
+ return lmc;
+ }
+ }
+
+ return -1;
+}
+
+static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
+{
+ int i, lmc, len = 0;
struct skx_dev *d;
- int i, len = 0;
if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
res->addr < BIT_ULL(32))) {
@@ -133,8 +176,24 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
return false;
}
+ /*
+ * GNR with a Flat2LM memory configuration may mistakenly classify
+ * a near-memory error(DDR5) as a far-memory error(CXL), resulting
+ * in the incorrect selection of decoded ADXL components.
+ * To address this, prefetch the decoded far-memory controller ID
+ * and adjust the error source to near-memory if the far-memory
+ * controller ID is invalid.
+ */
+ if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) {
+ res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+ if (res->imc == -1) {
+ err_src = ERR_SRC_2LM_NM;
+ edac_dbg(0, "Adjust the error source to near-memory.\n");
+ }
+ }
+
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
- if (error_in_1st_level_mem) {
+ if (err_src == ERR_SRC_2LM_NM) {
res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
(int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
@@ -150,7 +209,7 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
res->cs = (int)adxl_values[component_indices[INDEX_CS]];
}
- if (res->imc > NUM_IMC - 1 || res->imc < 0) {
+ if (res->imc < 0) {
skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
return false;
}
@@ -168,6 +227,14 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
return false;
}
+ lmc = skx_get_mc_mapping(d, res->imc);
+ if (lmc < 0) {
+ skx_printk(KERN_ERR, "No lmc for imc %d\n", res->imc);
+ return false;
+ }
+
+ res->imc = lmc;
+
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
@@ -187,38 +254,66 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
{
skx_mem_cfg_2lm = mem_cfg_2lm;
}
+EXPORT_SYMBOL_GPL(skx_set_mem_cfg);
+
+void skx_set_res_cfg(struct res_config *cfg)
+{
+ skx_res_cfg = cfg;
+}
+EXPORT_SYMBOL_GPL(skx_set_res_cfg);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{
driver_decode = decode;
skx_show_retry_rd_err_log = show_retry_log;
}
+EXPORT_SYMBOL_GPL(skx_set_decode);
-int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
+static int skx_get_pkg_id(struct skx_dev *d, u8 *id)
{
- u32 reg;
+ int node;
+ int cpu;
- if (pci_read_config_dword(d->util_all, off, &reg)) {
- skx_printk(KERN_ERR, "Failed to read src id\n");
- return -ENODEV;
+ node = pcibus_to_node(d->util_all->bus);
+ if (numa_valid_node(node)) {
+ for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && cpu_to_node(cpu) == node) {
+ *id = topology_physical_package_id(cpu);
+ return 0;
+ }
+ }
}
- *id = GET_BITFIELD(reg, 12, 14);
- return 0;
+ skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n");
+ return -ENODEV;
}
-int skx_get_node_id(struct skx_dev *d, u8 *id)
+int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
{
u32 reg;
- if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
- skx_printk(KERN_ERR, "Failed to read node id\n");
+ /*
+ * The 3-bit source IDs in PCI configuration space registers are limited
+ * to 8 unique IDs, and each ID is local to a UPI/QPI domain.
+ *
+ * Source IDs cannot be used to map devices to sockets on UV systems
+ * because they can exceed 8 sockets and have multiple UPI/QPI domains
+ * with identical, repeating source IDs.
+ */
+ if (is_uv_system())
+ return skx_get_pkg_id(d, id);
+
+ if (pci_read_config_dword(d->util_all, off, &reg)) {
+ skx_printk(KERN_ERR, "Failed to read src id\n");
return -ENODEV;
}
- *id = GET_BITFIELD(reg, 0, 2);
+ *id = GET_BITFIELD(reg, 12, 14);
return 0;
}
+EXPORT_SYMBOL_GPL(skx_get_src_id);
static int get_width(u32 mtr)
{
@@ -240,10 +335,10 @@ static int get_width(u32 mtr)
*/
int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
{
+ int ndev = 0, imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num;
struct pci_dev *pdev, *prev;
struct skx_dev *d;
u32 reg;
- int ndev = 0;
prev = NULL;
for (;;) {
@@ -251,7 +346,7 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
if (!pdev)
break;
ndev++;
- d = kzalloc(sizeof(*d), GFP_KERNEL);
+ d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL);
if (!d) {
pci_dev_put(pdev);
return -ENOMEM;
@@ -274,16 +369,27 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
d->seg = GET_BITFIELD(reg, 16, 23);
}
- edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
- d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
+ d->num_imc = imc_num;
+
+ edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x, imcs %d\n",
+ d->bus[0], d->bus[1], d->bus[2], d->bus[3], imc_num);
list_add_tail(&d->list, &dev_edac_list);
prev = pdev;
+
+ skx_init_mc_mapping(d);
}
if (list)
*list = &dev_edac_list;
return ndev;
}
+EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings);
+
+struct list_head *skx_get_edac_list(void)
+{
+ return &dev_edac_list;
+}
+EXPORT_SYMBOL_GPL(skx_get_edac_list);
int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
{
@@ -323,6 +429,14 @@ fail:
pci_dev_put(pdev);
return -ENODEV;
}
+EXPORT_SYMBOL_GPL(skx_get_hi_lo);
+
+void skx_set_hi_lo(u64 tolm, u64 tohm)
+{
+ skx_tolm = tolm;
+ skx_tohm = tohm;
+}
+EXPORT_SYMBOL_GPL(skx_set_hi_lo);
static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
int minval, int maxval, const char *name)
@@ -337,7 +451,7 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
}
#define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
-#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
+#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows")
#define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
@@ -355,7 +469,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
if (imc->hbm_mc) {
banks = 32;
mtype = MEM_HBM2;
- } else if (cfg->support_ddr5 && (amap & 0x8)) {
+ } else if (cfg->support_ddr5) {
banks = 32;
mtype = MEM_DDR5;
} else {
@@ -394,6 +508,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
return 1;
}
+EXPORT_SYMBOL_GPL(skx_get_dimm_info);
int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
int chan, int dimmno, const char *mod_str)
@@ -442,10 +557,11 @@ unknown_size:
return (size == 0 || size == ~0ull) ? 0 : 1;
}
+EXPORT_SYMBOL_GPL(skx_get_nvdimm_info);
-int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
- const char *ctl_name, const char *mod_str,
- get_dimm_config_f get_dimm_config,
+int skx_register_mci(struct skx_imc *imc, struct device *dev,
+ const char *dev_name, const char *ctl_name,
+ const char *mod_str, get_dimm_config_f get_dimm_config,
struct res_config *cfg)
{
struct mem_ctl_info *mci;
@@ -455,10 +571,10 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
/* Allocate a new MC control structure */
layers[0].type = EDAC_MC_LAYER_CHANNEL;
- layers[0].size = NUM_CHANNELS;
+ layers[0].size = imc->num_channels;
layers[0].is_virt_csrow = false;
layers[1].type = EDAC_MC_LAYER_SLOT;
- layers[1].size = NUM_DIMMS;
+ layers[1].size = imc->num_dimms;
layers[1].is_virt_csrow = true;
mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
sizeof(struct skx_pvt));
@@ -474,7 +590,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
pvt->imc = imc;
mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
- imc->node_id, imc->lmc);
+ imc->src_id, imc->lmc);
if (!mci->ctl_name) {
rc = -ENOMEM;
goto fail0;
@@ -486,7 +602,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = mod_str;
- mci->dev_name = pci_name(pdev);
+ mci->dev_name = dev_name;
mci->ctl_page_to_phys = NULL;
rc = get_dimm_config(mci, cfg);
@@ -494,7 +610,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
goto fail;
/* Record ptr to the generic device */
- mci->pdev = &pdev->dev;
+ mci->pdev = dev;
/* Add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) {
@@ -512,6 +628,7 @@ fail0:
imc->mci = NULL;
return rc;
}
+EXPORT_SYMBOL_GPL(skx_register_mci);
static void skx_unregister_mci(struct skx_imc *imc)
{
@@ -560,51 +677,35 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
tp_event = HW_EVENT_ERR_CORRECTED;
}
- /*
- * According to Intel Architecture spec vol 3B,
- * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
- * memory errors should fit one of these masks:
- * 000f 0000 1mmm cccc (binary)
- * 000f 0010 1mmm cccc (binary) [RAM used as cache]
- * where:
- * f = Correction Report Filtering Bit. If 1, subsequent errors
- * won't be shown
- * mmm = error type
- * cccc = channel
- * If the mask doesn't match, report an error to the parsing logic
- */
- if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) {
- optype = "Can't parse: it is not a mem";
- } else {
- switch (optypenum) {
- case 0:
- optype = "generic undef request error";
- break;
- case 1:
- optype = "memory read error";
- break;
- case 2:
- optype = "memory write error";
- break;
- case 3:
- optype = "addr/cmd error";
- break;
- case 4:
- optype = "memory scrubbing error";
- scrub_err = true;
- break;
- default:
- optype = "reserved";
- break;
- }
+ switch (optypenum) {
+ case 0:
+ optype = "generic undef request error";
+ break;
+ case 1:
+ optype = "memory read error";
+ break;
+ case 2:
+ optype = "memory write error";
+ break;
+ case 3:
+ optype = "addr/cmd error";
+ break;
+ case 4:
+ optype = "memory scrubbing error";
+ scrub_err = true;
+ break;
+ default:
+ optype = "reserved";
+ break;
}
+
if (res->decoded_by_adxl) {
- len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
+ len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, adxl_msg);
} else {
- len = snprintf(skx_msg, MSG_SIZE,
+ len = scnprintf(skx_msg, MSG_SIZE,
"%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
@@ -625,25 +726,27 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
optype, skx_msg);
}
-static bool skx_error_in_1st_level_mem(const struct mce *m)
+static enum error_source skx_error_source(const struct mce *m)
{
- u32 errcode;
+ u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;
- if (!skx_mem_cfg_2lm)
- return false;
+ if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR)
+ return ERR_SRC_NOT_MEMORY;
- errcode = GET_BITFIELD(m->status, 0, 15);
+ if (!skx_mem_cfg_2lm)
+ return ERR_SRC_1LM;
- if ((errcode & 0xef80) != 0x280)
- return false;
+ if (errcode == MCACOD_EXT_MEM_ERR)
+ return ERR_SRC_2LM_NM;
- return true;
+ return ERR_SRC_2LM_FM;
}
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
+ enum error_source err_src;
struct decoded_addr res;
struct mem_ctl_info *mci;
char *type;
@@ -651,18 +754,24 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if (mce->kflags & MCE_HANDLED_CEC)
return NOTIFY_DONE;
- /* ignore unless this is memory related with an address */
- if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
+ err_src = skx_error_source(mce);
+
+ /* Ignore unless this is memory related with an address */
+ if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
memset(&res, 0, sizeof(res));
res.mce = mce;
- res.addr = mce->addr;
+ res.addr = mce->addr & MCI_ADDR_PHYSADDR;
+ if (!pfn_to_online_page(res.addr >> PAGE_SHIFT) && !arch_is_platform_page(res.addr)) {
+ pr_err("Invalid address 0x%llx in IA32_MC%d_ADDR\n", mce->addr, mce->bank);
+ return NOTIFY_DONE;
+ }
/* Try driver decoder first */
if (!(driver_decode && driver_decode(&res))) {
/* Then try firmware decoder (ACPI DSM methods) */
- if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
+ if (!(adxl_component_count && skx_adxl_decode(&res, err_src)))
return NOTIFY_DONE;
}
@@ -694,6 +803,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_DONE;
}
+EXPORT_SYMBOL_GPL(skx_mce_check_error);
void skx_remove(void)
{
@@ -704,7 +814,7 @@ void skx_remove(void)
list_for_each_entry_safe(d, tmp, &dev_edac_list, list) {
list_del(&d->list);
- for (i = 0; i < NUM_IMC; i++) {
+ for (i = 0; i < d->num_imc; i++) {
if (d->imc[i].mci)
skx_unregister_mci(&d->imc[i]);
@@ -714,7 +824,10 @@ void skx_remove(void)
if (d->imc[i].mbase)
iounmap(d->imc[i].mbase);
- for (j = 0; j < NUM_CHANNELS; j++) {
+ if (d->imc[i].dev)
+ put_device(d->imc[i].dev);
+
+ for (j = 0; j < d->imc[i].num_channels; j++) {
if (d->imc[i].chan[j].cdev)
pci_dev_put(d->imc[i].chan[j].cdev);
}
@@ -731,3 +844,55 @@ void skx_remove(void)
kfree(d);
}
}
+EXPORT_SYMBOL_GPL(skx_remove);
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature.
+ * Exercise the address decode logic by writing an address to
+ * /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr.
+ */
+static struct dentry *skx_test;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+ struct mce m;
+
+ pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
+
+ memset(&m, 0, sizeof(m));
+ /* ADDRV + MemRd + Unknown channel */
+ m.status = MCI_STATUS_ADDRV + 0x90;
+ /* One corrected error */
+ m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
+ m.addr = val;
+ skx_mce_check_error(NULL, 0, &m);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+void skx_setup_debug(const char *name)
+{
+ skx_test = edac_debugfs_create_dir(name);
+ if (!skx_test)
+ return;
+
+ if (!edac_debugfs_create_file("addr", 0200, skx_test,
+ NULL, &fops_u64_wo)) {
+ debugfs_remove(skx_test);
+ skx_test = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(skx_setup_debug);
+
+void skx_teardown_debug(void)
+{
+ debugfs_remove_recursive(skx_test);
+}
+EXPORT_SYMBOL_GPL(skx_teardown_debug);
+#endif /*CONFIG_EDAC_DEBUG*/
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel server processors");
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 0cbadd3d2cd3..f88038e5b18c 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -29,24 +29,18 @@
#define GET_BITFIELD(v, lo, hi) \
(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
-#define SKX_NUM_IMC 2 /* Memory controllers per socket */
#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
-#define I10NM_NUM_DDR_IMC 4
#define I10NM_NUM_DDR_CHANNELS 2
#define I10NM_NUM_DDR_DIMMS 2
-#define I10NM_NUM_HBM_IMC 16
#define I10NM_NUM_HBM_CHANNELS 2
#define I10NM_NUM_HBM_DIMMS 1
-#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
#define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
#define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
@@ -57,36 +51,127 @@
#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
/*
+ * According to Intel Architecture spec vol 3B,
+ * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
+ * memory errors should fit one of these masks:
+ * 000f 0000 1mmm cccc (binary)
+ * 000f 0010 1mmm cccc (binary) [RAM used as cache]
+ * where:
+ * f = Correction Report Filtering Bit. If 1, subsequent errors
+ * won't be shown
+ * mmm = error type
+ * cccc = channel
+ */
+#define MCACOD_MEM_ERR_MASK 0xef80
+/*
+ * Errors from either the memory of the 1-level memory system or the
+ * 2nd level memory (the slow "far" memory) of the 2-level memory system.
+ */
+#define MCACOD_MEM_CTL_ERR 0x80
+/*
+ * Errors from the 1st level memory (the fast "near" memory as cache)
+ * of the 2-level memory system.
+ */
+#define MCACOD_EXT_MEM_ERR 0x280
+
+/* Max RRL register sets per {,sub-,pseudo-}channel. */
+#define NUM_RRL_SET 4
+/* Max RRL registers per set. */
+#define NUM_RRL_REG 6
+/* Max correctable error count registers. */
+#define NUM_CECNT_REG 8
+
+/* Modes of RRL register set. */
+enum rrl_mode {
+ /* Last read error from patrol scrub. */
+ LRE_SCRUB,
+ /* Last read error from demand. */
+ LRE_DEMAND,
+ /* First read error from patrol scrub. */
+ FRE_SCRUB,
+ /* First read error from demand. */
+ FRE_DEMAND,
+};
+
+/* RRL registers per {,sub-,pseudo-}channel. */
+struct reg_rrl {
+ /* RRL register parts. */
+ int set_num, reg_num;
+ enum rrl_mode modes[NUM_RRL_SET];
+ u32 offsets[NUM_RRL_SET][NUM_RRL_REG];
+ /* RRL register widths in byte per set. */
+ u8 widths[NUM_RRL_REG];
+ /* RRL control bits of the first register per set. */
+ u32 v_mask;
+ u32 uc_mask;
+ u32 over_mask;
+ u32 en_patspr_mask;
+ u32 noover_mask;
+ u32 en_mask;
+
+ /* CORRERRCNT register parts. */
+ int cecnt_num;
+ u32 cecnt_offsets[NUM_CECNT_REG];
+ u8 cecnt_widths[NUM_CECNT_REG];
+};
+
+/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
* memory controllers on the die.
*/
struct skx_dev {
- struct list_head list;
+ /* {skx,i10nm}_edac */
u8 bus[4];
int seg;
struct pci_dev *sad_all;
struct pci_dev *util_all;
- struct pci_dev *uracu; /* for i10nm CPU */
- struct pci_dev *pcu_cr3; /* for HBM memory detection */
+ struct pci_dev *uracu;
+ struct pci_dev *pcu_cr3;
u32 mcroute;
+
+ /* imh_edac */
+ /* System-view MMIO base physical addresses. */
+ u64 mmio_base_h_north;
+ u64 mmio_base_h_south;
+ int pkg;
+
+ int num_imc;
+ struct list_head list;
struct skx_imc {
+ /* i10nm_edac */
+ struct pci_dev *mdev;
+
+ /* imh_edac */
+ struct device *dev;
+
struct mem_ctl_info *mci;
- struct pci_dev *mdev; /* for i10nm CPU */
- void __iomem *mbase; /* for i10nm CPU */
- int chan_mmio_sz; /* for i10nm CPU */
+ void __iomem *mbase;
+ int chan_mmio_sz;
int num_channels; /* channels per memory controller */
int num_dimms; /* dimms per channel */
bool hbm_mc;
u8 mc; /* system wide mc# */
u8 lmc; /* socket relative mc# */
- u8 src_id, node_id;
+ u8 src_id;
+ /*
+ * Some server BIOS may hide certain memory controllers, and the
+ * EDAC driver skips those hidden memory controllers. However, the
+ * ADXL still decodes memory error address using physical memory
+ * controller indices. The mapping table is used to convert the
+ * physical indices (reported by ADXL) to the logical indices
+ * (used the EDAC driver) of present memory controllers during the
+ * error handling process.
+ */
+ u8 mc_mapping;
struct skx_channel {
struct pci_dev *cdev;
struct pci_dev *edev;
- u32 retry_rd_err_log_s;
- u32 retry_rd_err_log_d;
- u32 retry_rd_err_log_d2;
+ /*
+ * Two groups of RRL control registers per channel to save default RRL
+ * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
+ */
+ u32 rrl_ctl[2][NUM_RRL_SET];
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
@@ -95,7 +180,7 @@ struct skx_dev {
u8 colbits;
} dimms[NUM_DIMMS];
} chan[NUM_CHANNELS];
- } imc[NUM_IMC];
+ } imc[];
};
struct skx_pvt {
@@ -105,7 +190,9 @@ struct skx_pvt {
enum type {
SKX,
I10NM,
- SPR
+ SPR,
+ GNR,
+ DMR,
};
enum {
@@ -122,6 +209,13 @@ enum {
INDEX_MAX
};
+enum error_source {
+ ERR_SRC_1LM,
+ ERR_SRC_2LM_NM,
+ ERR_SRC_2LM_FM,
+ ERR_SRC_NOT_MEMORY,
+};
+
#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
@@ -149,28 +243,82 @@ struct decoded_addr {
bool decoded_by_adxl;
};
+struct pci_bdf {
+ u32 bus : 8;
+ u32 dev : 5;
+ u32 fun : 3;
+};
+
struct res_config {
enum type type;
- /* Configuration agent device ID */
- unsigned int decs_did;
- /* Default bus number configuration register offset */
- int busno_cfg_offset;
+ /* DDR memory controllers per socket */
+ int ddr_imc_num;
+ /* DDR channels per DDR memory controller */
+ int ddr_chan_num;
+ /* DDR DIMMs per DDR memory channel */
+ int ddr_dimm_num;
/* Per DDR channel memory-mapped I/O size */
int ddr_chan_mmio_sz;
+ /* HBM memory controllers per socket */
+ int hbm_imc_num;
+ /* HBM channels per HBM memory controller */
+ int hbm_chan_num;
+ /* HBM DIMMs per HBM memory channel */
+ int hbm_dimm_num;
/* Per HBM channel memory-mapped I/O size */
int hbm_chan_mmio_sz;
bool support_ddr5;
- /* SAD device number and function number */
- unsigned int sad_all_devfn;
- int sad_all_offset;
- /* Offsets of retry_rd_err_log registers */
- u32 *offsets_scrub;
- u32 *offsets_scrub_hbm0;
- u32 *offsets_scrub_hbm1;
- u32 *offsets_demand;
- u32 *offsets_demand2;
- u32 *offsets_demand_hbm0;
- u32 *offsets_demand_hbm1;
+ /* RRL register sets per DDR channel */
+ struct reg_rrl *reg_rrl_ddr;
+ /* RRL register sets per HBM channel */
+ struct reg_rrl *reg_rrl_hbm[2];
+ union {
+ /* {skx,i10nm}_edac */
+ struct {
+ /* Configuration agent device ID */
+ unsigned int decs_did;
+ /* Default bus number configuration register offset */
+ int busno_cfg_offset;
+ struct pci_bdf sad_all_bdf;
+ struct pci_bdf pcu_cr3_bdf;
+ struct pci_bdf util_all_bdf;
+ struct pci_bdf uracu_bdf;
+ struct pci_bdf ddr_mdev_bdf;
+ struct pci_bdf hbm_mdev_bdf;
+ int sad_all_offset;
+ };
+ /* imh_edac */
+ struct {
+ /* MMIO base physical address in local package view */
+ u64 mmio_base_l_north;
+ u64 mmio_base_l_south;
+ u64 ddr_imc_base;
+ u64 ddr_reg_mcmtr_offset;
+ u8 ddr_reg_mcmtr_width;
+ u64 ddr_reg_dimmmtr_offset;
+ u8 ddr_reg_dimmmtr_width;
+ u64 ubox_base;
+ u32 ubox_size;
+ u32 ubox_reg_mmio_base_offset;
+ u8 ubox_reg_mmio_base_width;
+ u32 ubox_reg_socket_id_offset;
+ u8 ubox_reg_socket_id_width;
+ u64 pcu_base;
+ u32 pcu_size;
+ u32 pcu_reg_capid3_offset;
+ u8 pcu_reg_capid3_width;
+ u64 sca_base;
+ u32 sca_size;
+ u32 sca_reg_tolm_offset;
+ u8 sca_reg_tolm_width;
+ u32 sca_reg_tohm_offset;
+ u8 sca_reg_tohm_width;
+ u64 ha_base;
+ u32 ha_size;
+ u32 ha_reg_mode_offset;
+ u8 ha_reg_mode_width;
+ };
+ };
};
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
@@ -178,17 +326,22 @@ typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
typedef bool (*skx_decode_f)(struct decoded_addr *res);
typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
-int __init skx_adxl_get(void);
-void __exit skx_adxl_put(void);
+int skx_adxl_get(void);
+void skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
void skx_set_mem_cfg(bool mem_cfg_2lm);
+void skx_set_res_cfg(struct res_config *cfg);
+void skx_init_mc_mapping(struct skx_dev *d);
+void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
-int skx_get_node_id(struct skx_dev *d, u8 *id);
int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
+struct list_head *skx_get_edac_list(void);
+
int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
+void skx_set_hi_lo(u64 tolm, u64 tohm);
int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
struct skx_imc *imc, int chan, int dimmno,
@@ -197,7 +350,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
int chan, int dimmno, const char *mod_str);
-int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
+int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name,
const char *ctl_name, const char *mod_str,
get_dimm_config_f get_dimm_config,
struct res_config *cfg);
@@ -207,4 +360,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void skx_remove(void);
+#ifdef CONFIG_EDAC_DEBUG
+void skx_setup_debug(const char *name);
+void skx_teardown_debug(void);
+#else
+static inline void skx_setup_debug(const char *name) {}
+static inline void skx_teardown_debug(void) {}
+#endif
+
#endif /* _SKX_COMM_EDAC_H */
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index f7d37c282819..51143b3257de 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -9,9 +9,10 @@
#include <linux/edac.h>
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/sizes.h>
#include <linux/interrupt.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include "edac_module.h"
@@ -300,6 +301,7 @@ struct synps_ecc_status {
/**
* struct synps_edac_priv - DDR memory controller private instance data.
* @baseaddr: Base address of the DDR controller.
+ * @reglock: Concurrent CSRs access lock.
* @message: Buffer for framing the event specific info.
* @stat: ECC status information.
* @p_data: Platform data.
@@ -314,6 +316,7 @@ struct synps_ecc_status {
*/
struct synps_edac_priv {
void __iomem *baseaddr;
+ spinlock_t reglock;
char message[SYNPS_EDAC_MSG_SIZE];
struct synps_ecc_status stat;
const struct synps_platform_data *p_data;
@@ -329,19 +332,29 @@ struct synps_edac_priv {
#endif
};
+enum synps_platform_type {
+ ZYNQ,
+ ZYNQMP,
+ SYNPS,
+};
+
/**
* struct synps_platform_data - synps platform data structure.
+ * @platform: Identifies the target hardware platform
* @get_error_info: Get EDAC error info.
* @get_mtype: Get mtype.
* @get_dtype: Get dtype.
- * @get_ecc_state: Get ECC state.
+ * @get_mem_info: Get EDAC memory info
* @quirks: To differentiate IPs.
*/
struct synps_platform_data {
+ enum synps_platform_type platform;
int (*get_error_info)(struct synps_edac_priv *priv);
enum mem_type (*get_mtype)(const void __iomem *base);
enum dev_type (*get_dtype)(const void __iomem *base);
- bool (*get_ecc_state)(void __iomem *base);
+#ifdef CONFIG_EDAC_DEBUG
+ u64 (*get_mem_info)(struct synps_edac_priv *priv);
+#endif
int quirks;
};
@@ -400,6 +413,25 @@ out:
return 0;
}
+#ifdef CONFIG_EDAC_DEBUG
+/**
+ * zynqmp_get_mem_info - Get the current memory info.
+ * @priv: DDR memory controller private instance data.
+ *
+ * Return: host interface address.
+ */
+static u64 zynqmp_get_mem_info(struct synps_edac_priv *priv)
+{
+ u64 hif_addr = 0, linear_addr;
+
+ linear_addr = priv->poison_addr;
+ if (linear_addr >= SZ_32G)
+ linear_addr = linear_addr - SZ_32G + SZ_2G;
+ hif_addr = linear_addr >> 3;
+ return hif_addr;
+}
+#endif
+
/**
* zynqmp_get_error_info - Get the current ECC error info.
* @priv: DDR memory controller private instance data.
@@ -409,7 +441,8 @@ out:
static int zynqmp_get_error_info(struct synps_edac_priv *priv)
{
struct synps_ecc_status *p;
- u32 regval, clearval = 0;
+ u32 regval, clearval;
+ unsigned long flags;
void __iomem *base;
base = priv->baseaddr;
@@ -453,10 +486,14 @@ ue_err:
p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK);
p->ueinfo.data = readl(base + ECC_UESYND0_OFST);
out:
- clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT;
- clearval |= ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
+ spin_lock_irqsave(&priv->reglock, flags);
+
+ clearval = readl(base + ECC_CLR_OFST) |
+ ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT |
+ ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
writel(clearval, base + ECC_CLR_OFST);
- writel(0x0, base + ECC_CLR_OFST);
+
+ spin_unlock_irqrestore(&priv->reglock, flags);
return 0;
}
@@ -516,24 +553,41 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
static void enable_intr(struct synps_edac_priv *priv)
{
+ unsigned long flags;
+
/* Enable UE/CE Interrupts */
- if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
- writel(DDR_UE_MASK | DDR_CE_MASK,
- priv->baseaddr + ECC_CLR_OFST);
- else
+ if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) {
writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
+ return;
+ }
+
+ spin_lock_irqsave(&priv->reglock, flags);
+
+ writel(DDR_UE_MASK | DDR_CE_MASK,
+ priv->baseaddr + ECC_CLR_OFST);
+
+ spin_unlock_irqrestore(&priv->reglock, flags);
}
static void disable_intr(struct synps_edac_priv *priv)
{
+ unsigned long flags;
+
/* Disable UE/CE Interrupts */
- if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
- writel(0x0, priv->baseaddr + ECC_CLR_OFST);
- else
+ if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) {
writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
+
+ return;
+ }
+
+ spin_lock_irqsave(&priv->reglock, flags);
+
+ writel(0, priv->baseaddr + ECC_CLR_OFST);
+
+ spin_unlock_irqrestore(&priv->reglock, flags);
}
/**
@@ -577,8 +631,6 @@ static irqreturn_t intr_handler(int irq, void *dev_id)
/* v3.0 of the controller does not have this register */
if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR))
writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
- else
- enable_intr(priv);
return IRQ_HANDLED;
}
@@ -674,51 +726,38 @@ static enum dev_type zynqmp_get_dtype(const void __iomem *base)
return dt;
}
-/**
- * zynq_get_ecc_state - Return the controller ECC enable/disable status.
- * @base: DDR memory controller base address.
- *
- * Get the ECC enable/disable status of the controller.
- *
- * Return: true if enabled, otherwise false.
- */
-static bool zynq_get_ecc_state(void __iomem *base)
-{
- enum dev_type dt;
- u32 ecctype;
-
- dt = zynq_get_dtype(base);
- if (dt == DEV_UNKNOWN)
- return false;
-
- ecctype = readl(base + SCRUB_OFST) & SCRUB_MODE_MASK;
- if ((ecctype == SCRUB_MODE_SECDED) && (dt == DEV_X2))
- return true;
-
- return false;
-}
-
-/**
- * zynqmp_get_ecc_state - Return the controller ECC enable/disable status.
- * @base: DDR memory controller base address.
- *
- * Get the ECC enable/disable status for the controller.
- *
- * Return: a ECC status boolean i.e true/false - enabled/disabled.
- */
-static bool zynqmp_get_ecc_state(void __iomem *base)
+static bool get_ecc_state(struct synps_edac_priv *priv)
{
+ u32 ecctype, clearval;
enum dev_type dt;
- u32 ecctype;
-
- dt = zynqmp_get_dtype(base);
- if (dt == DEV_UNKNOWN)
- return false;
- ecctype = readl(base + ECC_CFG0_OFST) & SCRUB_MODE_MASK;
- if ((ecctype == SCRUB_MODE_SECDED) &&
- ((dt == DEV_X2) || (dt == DEV_X4) || (dt == DEV_X8)))
- return true;
+ if (priv->p_data->platform == ZYNQ) {
+ dt = zynq_get_dtype(priv->baseaddr);
+ if (dt == DEV_UNKNOWN)
+ return false;
+
+ ecctype = readl(priv->baseaddr + SCRUB_OFST) & SCRUB_MODE_MASK;
+ if (ecctype == SCRUB_MODE_SECDED && dt == DEV_X2) {
+ clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_UE_ERR;
+ writel(clearval, priv->baseaddr + ECC_CTRL_OFST);
+ writel(0x0, priv->baseaddr + ECC_CTRL_OFST);
+ return true;
+ }
+ } else {
+ dt = zynqmp_get_dtype(priv->baseaddr);
+ if (dt == DEV_UNKNOWN)
+ return false;
+
+ ecctype = readl(priv->baseaddr + ECC_CFG0_OFST) & SCRUB_MODE_MASK;
+ if (ecctype == SCRUB_MODE_SECDED &&
+ (dt == DEV_X2 || dt == DEV_X4 || dt == DEV_X8)) {
+ clearval = readl(priv->baseaddr + ECC_CLR_OFST) |
+ ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT |
+ ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
+ writel(clearval, priv->baseaddr + ECC_CLR_OFST);
+ return true;
+ }
+ }
return false;
}
@@ -888,18 +927,21 @@ static int setup_irq(struct mem_ctl_info *mci,
}
static const struct synps_platform_data zynq_edac_def = {
+ .platform = ZYNQ,
.get_error_info = zynq_get_error_info,
.get_mtype = zynq_get_mtype,
.get_dtype = zynq_get_dtype,
- .get_ecc_state = zynq_get_ecc_state,
.quirks = 0,
};
static const struct synps_platform_data zynqmp_edac_def = {
+ .platform = ZYNQMP,
.get_error_info = zynqmp_get_error_info,
.get_mtype = zynqmp_get_mtype,
.get_dtype = zynqmp_get_dtype,
- .get_ecc_state = zynqmp_get_ecc_state,
+#ifdef CONFIG_EDAC_DEBUG
+ .get_mem_info = zynqmp_get_mem_info,
+#endif
.quirks = (DDR_ECC_INTR_SUPPORT
#ifdef CONFIG_EDAC_DEBUG
| DDR_ECC_DATA_POISON_SUPPORT
@@ -908,10 +950,10 @@ static const struct synps_platform_data zynqmp_edac_def = {
};
static const struct synps_platform_data synopsys_edac_def = {
+ .platform = SYNPS,
.get_error_info = zynqmp_get_error_info,
.get_mtype = zynqmp_get_mtype,
.get_dtype = zynqmp_get_dtype,
- .get_ecc_state = zynqmp_get_ecc_state,
.quirks = (DDR_ECC_INTR_SUPPORT | DDR_ECC_INTR_SELF_CLEAR
#ifdef CONFIG_EDAC_DEBUG
| DDR_ECC_DATA_POISON_SUPPORT
@@ -953,10 +995,16 @@ MODULE_DEVICE_TABLE(of, synps_edac_match);
static void ddr_poison_setup(struct synps_edac_priv *priv)
{
int col = 0, row = 0, bank = 0, bankgrp = 0, rank = 0, regval;
+ const struct synps_platform_data *p_data;
int index;
ulong hif_addr = 0;
- hif_addr = priv->poison_addr >> 3;
+ p_data = priv->p_data;
+
+ if (p_data->get_mem_info)
+ hif_addr = p_data->get_mem_info(priv);
+ else
+ hif_addr = priv->poison_addr >> 3;
for (index = 0; index < DDR_MAX_ROW_SHIFT; index++) {
if (priv->row_shift[index])
@@ -1325,11 +1373,9 @@ static int mc_probe(struct platform_device *pdev)
struct synps_edac_priv *priv;
struct mem_ctl_info *mci;
void __iomem *baseaddr;
- struct resource *res;
int rc;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- baseaddr = devm_ioremap_resource(&pdev->dev, res);
+ baseaddr = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(baseaddr))
return PTR_ERR(baseaddr);
@@ -1337,10 +1383,6 @@ static int mc_probe(struct platform_device *pdev)
if (!p_data)
return -ENODEV;
- if (!p_data->get_ecc_state(baseaddr)) {
- edac_printk(KERN_INFO, EDAC_MC, "ECC not enabled\n");
- return -ENXIO;
- }
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = SYNPS_EDAC_NR_CSROWS;
@@ -1360,6 +1402,13 @@ static int mc_probe(struct platform_device *pdev)
priv = mci->pvt_info;
priv->baseaddr = baseaddr;
priv->p_data = p_data;
+ if (!get_ecc_state(priv)) {
+ edac_printk(KERN_INFO, EDAC_MC, "ECC not enabled\n");
+ rc = -ENODEV;
+ goto free_edac_mc;
+ }
+
+ spin_lock_init(&priv->reglock);
mc_init(mci, pdev);
@@ -1411,7 +1460,7 @@ free_edac_mc:
*
* Return: Unconditionally 0
*/
-static int mc_remove(struct platform_device *pdev)
+static void mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
struct synps_edac_priv *priv = mci->pvt_info;
@@ -1426,8 +1475,6 @@ static int mc_remove(struct platform_device *pdev)
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
-
- return 0;
}
static struct platform_driver synps_edac_mc_driver = {
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 0bcd9f02c84a..75c04dfc3962 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -35,12 +35,6 @@ enum {
ERR_UNKNOWN = 3,
};
-#define MAX_SYNDROME_REGS 4
-
-struct error_syndrome {
- u64 reg[MAX_SYNDROME_REGS];
-};
-
struct error_descr {
int type;
u64 mask;
@@ -481,7 +475,7 @@ static int thunderx_create_debugfs_nodes(struct dentry *parent,
ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
parent, data, &attrs[i]->fops);
- if (!ent)
+ if (IS_ERR(ent))
break;
}
@@ -1133,7 +1127,7 @@ static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
decode_register(other, OCX_OTHER_SIZE,
ocx_com_errors, ctx->reg_com_int);
- strncat(msg, other, OCX_MESSAGE_SIZE);
+ strlcat(msg, other, OCX_MESSAGE_SIZE);
for (lane = 0; lane < OCX_RX_LANES; lane++)
if (ctx->reg_com_int & BIT(lane)) {
@@ -1142,12 +1136,12 @@ static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
lane, ctx->reg_lane_int[lane],
lane, ctx->reg_lane_stat11[lane]);
- strncat(msg, other, OCX_MESSAGE_SIZE);
+ strlcat(msg, other, OCX_MESSAGE_SIZE);
decode_register(other, OCX_OTHER_SIZE,
ocx_lane_errors,
ctx->reg_lane_int[lane]);
- strncat(msg, other, OCX_MESSAGE_SIZE);
+ strlcat(msg, other, OCX_MESSAGE_SIZE);
}
if (ctx->reg_com_int & OCX_COM_INT_CE)
@@ -1217,7 +1211,7 @@ static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
decode_register(other, OCX_OTHER_SIZE,
ocx_com_link_errors, ctx->reg_com_link_int);
- strncat(msg, other, OCX_MESSAGE_SIZE);
+ strlcat(msg, other, OCX_MESSAGE_SIZE);
if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
@@ -1365,8 +1359,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
idx = edac_device_alloc_index();
snprintf(name, sizeof(name), "OCX%d", idx);
edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
- name, 1, "CCPI", 1,
- 0, NULL, 0, idx);
+ name, 1, "CCPI", 1, 0, idx);
if (!edac_dev) {
dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
return -ENOMEM;
@@ -1896,7 +1889,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
- strncat(msg, other, L2C_MESSAGE_SIZE);
+ strlcat(msg, other, L2C_MESSAGE_SIZE);
if (ctx->reg_int & mask_ue)
edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
@@ -2004,8 +1997,7 @@ static int thunderx_l2c_probe(struct pci_dev *pdev,
snprintf(name, sizeof(name), fmt, idx);
edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
- name, 1, "L2C", 1, 0,
- NULL, 0, idx);
+ name, 1, "L2C", 1, 0, idx);
if (!edac_dev) {
dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
return -ENOMEM;
diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c
index 6971ded598de..39cc2ef9cac4 100644
--- a/drivers/edac/ti_edac.c
+++ b/drivers/edac/ti_edac.c
@@ -312,14 +312,12 @@ err:
return ret;
}
-static int ti_edac_remove(struct platform_device *pdev)
+static void ti_edac_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
-
- return 0;
}
static struct platform_driver ti_edac_driver = {
diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c
new file mode 100644
index 000000000000..5a43b5d43ca2
--- /dev/null
+++ b/drivers/edac/versal_edac.c
@@ -0,0 +1,1196 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx Versal memory controller driver
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ */
+#include <linux/bitfield.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/firmware/xlnx-event-manager.h>
+
+#include "edac_module.h"
+
+/* Granularity of reported error in bytes */
+#define XDDR_EDAC_ERR_GRAIN 1
+
+#define XDDR_EDAC_MSG_SIZE 256
+#define EVENT 2
+
+#define XDDR_PCSR_OFFSET 0xC
+#define XDDR_ISR_OFFSET 0x14
+#define XDDR_IRQ_EN_OFFSET 0x20
+#define XDDR_IRQ1_EN_OFFSET 0x2C
+#define XDDR_IRQ_DIS_OFFSET 0x24
+#define XDDR_IRQ_CE_MASK GENMASK(18, 15)
+#define XDDR_IRQ_UE_MASK GENMASK(14, 11)
+
+#define XDDR_REG_CONFIG0_OFFSET 0x258
+#define XDDR_REG_CONFIG0_BUS_WIDTH_MASK GENMASK(19, 18)
+#define XDDR_REG_CONFIG0_NUM_CHANS_MASK BIT(17)
+#define XDDR_REG_CONFIG0_NUM_RANKS_MASK GENMASK(15, 14)
+#define XDDR_REG_CONFIG0_SIZE_MASK GENMASK(10, 8)
+
+#define XDDR_REG_PINOUT_OFFSET 0x25C
+#define XDDR_REG_PINOUT_ECC_EN_MASK GENMASK(7, 5)
+
+#define ECCW0_FLIP_CTRL 0x109C
+#define ECCW0_FLIP0_OFFSET 0x10A0
+#define ECCW0_FLIP0_BITS 31
+#define ECCW0_FLIP1_OFFSET 0x10A4
+#define ECCW1_FLIP_CTRL 0x10AC
+#define ECCW1_FLIP0_OFFSET 0x10B0
+#define ECCW1_FLIP1_OFFSET 0x10B4
+#define ECCR0_CERR_STAT_OFFSET 0x10BC
+#define ECCR0_CE_ADDR_LO_OFFSET 0x10C0
+#define ECCR0_CE_ADDR_HI_OFFSET 0x10C4
+#define ECCR0_CE_DATA_LO_OFFSET 0x10C8
+#define ECCR0_CE_DATA_HI_OFFSET 0x10CC
+#define ECCR0_CE_DATA_PAR_OFFSET 0x10D0
+
+#define ECCR0_UERR_STAT_OFFSET 0x10D4
+#define ECCR0_UE_ADDR_LO_OFFSET 0x10D8
+#define ECCR0_UE_ADDR_HI_OFFSET 0x10DC
+#define ECCR0_UE_DATA_LO_OFFSET 0x10E0
+#define ECCR0_UE_DATA_HI_OFFSET 0x10E4
+#define ECCR0_UE_DATA_PAR_OFFSET 0x10E8
+
+#define ECCR1_CERR_STAT_OFFSET 0x10F4
+#define ECCR1_CE_ADDR_LO_OFFSET 0x10F8
+#define ECCR1_CE_ADDR_HI_OFFSET 0x10FC
+#define ECCR1_CE_DATA_LO_OFFSET 0x1100
+#define ECCR1_CE_DATA_HI_OFFSET 0x110C
+#define ECCR1_CE_DATA_PAR_OFFSET 0x1108
+
+#define ECCR1_UERR_STAT_OFFSET 0x110C
+#define ECCR1_UE_ADDR_LO_OFFSET 0x1110
+#define ECCR1_UE_ADDR_HI_OFFSET 0x1114
+#define ECCR1_UE_DATA_LO_OFFSET 0x1118
+#define ECCR1_UE_DATA_HI_OFFSET 0x111C
+#define ECCR1_UE_DATA_PAR_OFFSET 0x1120
+
+#define XDDR_NOC_REG_ADEC4_OFFSET 0x44
+#define RANK_1_MASK GENMASK(11, 6)
+#define LRANK_0_MASK GENMASK(17, 12)
+#define LRANK_1_MASK GENMASK(23, 18)
+#define MASK_24 GENMASK(29, 24)
+
+#define XDDR_NOC_REG_ADEC5_OFFSET 0x48
+#define XDDR_NOC_REG_ADEC6_OFFSET 0x4C
+#define XDDR_NOC_REG_ADEC7_OFFSET 0x50
+#define XDDR_NOC_REG_ADEC8_OFFSET 0x54
+#define XDDR_NOC_REG_ADEC9_OFFSET 0x58
+#define XDDR_NOC_REG_ADEC10_OFFSET 0x5C
+
+#define XDDR_NOC_REG_ADEC11_OFFSET 0x60
+#define MASK_0 GENMASK(5, 0)
+#define GRP_0_MASK GENMASK(11, 6)
+#define GRP_1_MASK GENMASK(17, 12)
+#define CH_0_MASK GENMASK(23, 18)
+
+#define XDDR_NOC_REG_ADEC12_OFFSET 0x71C
+#define XDDR_NOC_REG_ADEC13_OFFSET 0x720
+
+#define XDDR_NOC_REG_ADEC14_OFFSET 0x724
+#define XDDR_NOC_ROW_MATCH_MASK GENMASK(17, 0)
+#define XDDR_NOC_COL_MATCH_MASK GENMASK(27, 18)
+#define XDDR_NOC_BANK_MATCH_MASK GENMASK(29, 28)
+#define XDDR_NOC_GRP_MATCH_MASK GENMASK(31, 30)
+
+#define XDDR_NOC_REG_ADEC15_OFFSET 0x728
+#define XDDR_NOC_RANK_MATCH_MASK GENMASK(1, 0)
+#define XDDR_NOC_LRANK_MATCH_MASK GENMASK(4, 2)
+#define XDDR_NOC_CH_MATCH_MASK BIT(5)
+#define XDDR_NOC_MOD_SEL_MASK BIT(6)
+#define XDDR_NOC_MATCH_EN_MASK BIT(8)
+
+#define ECCR_UE_CE_ADDR_HI_ROW_MASK GENMASK(7, 0)
+
+#define XDDR_EDAC_NR_CSROWS 1
+#define XDDR_EDAC_NR_CHANS 1
+
+#define XDDR_BUS_WIDTH_64 0
+#define XDDR_BUS_WIDTH_32 1
+#define XDDR_BUS_WIDTH_16 2
+
+#define XDDR_MAX_ROW_CNT 18
+#define XDDR_MAX_COL_CNT 10
+#define XDDR_MAX_RANK_CNT 2
+#define XDDR_MAX_LRANK_CNT 3
+#define XDDR_MAX_BANK_CNT 2
+#define XDDR_MAX_GRP_CNT 2
+
+/*
+ * Config and system registers are usually locked. This is the
+ * code which unlocks them in order to accept writes. See
+ *
+ * https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PCSR_LOCK-XRAM_SLCR-Register
+ */
+#define PCSR_UNLOCK_VAL 0xF9E8D7C6
+#define PCSR_LOCK_VAL 1
+#define XDDR_ERR_TYPE_CE 0
+#define XDDR_ERR_TYPE_UE 1
+
+#define XILINX_DRAM_SIZE_4G 0
+#define XILINX_DRAM_SIZE_6G 1
+#define XILINX_DRAM_SIZE_8G 2
+#define XILINX_DRAM_SIZE_12G 3
+#define XILINX_DRAM_SIZE_16G 4
+#define XILINX_DRAM_SIZE_32G 5
+#define NUM_UE_BITPOS 2
+
+/**
+ * struct ecc_error_info - ECC error log information.
+ * @burstpos: Burst position.
+ * @lrank: Logical Rank number.
+ * @rank: Rank number.
+ * @group: Group number.
+ * @bank: Bank number.
+ * @col: Column number.
+ * @row: Row number.
+ * @rowhi: Row number higher bits.
+ * @i: ECC error info.
+ */
+union ecc_error_info {
+ struct {
+ u32 burstpos:3;
+ u32 lrank:3;
+ u32 rank:2;
+ u32 group:2;
+ u32 bank:2;
+ u32 col:10;
+ u32 row:10;
+ u32 rowhi;
+ };
+ u64 i;
+} __packed;
+
+union edac_info {
+ struct {
+ u32 row0:6;
+ u32 row1:6;
+ u32 row2:6;
+ u32 row3:6;
+ u32 row4:6;
+ u32 reserved:2;
+ };
+ struct {
+ u32 col1:6;
+ u32 col2:6;
+ u32 col3:6;
+ u32 col4:6;
+ u32 col5:6;
+ u32 reservedcol:2;
+ };
+ u32 i;
+} __packed;
+
+/**
+ * struct ecc_status - ECC status information to report.
+ * @ceinfo: Correctable error log information.
+ * @ueinfo: Uncorrectable error log information.
+ * @channel: Channel number.
+ * @error_type: Error type information.
+ */
+struct ecc_status {
+ union ecc_error_info ceinfo[2];
+ union ecc_error_info ueinfo[2];
+ u8 channel;
+ u8 error_type;
+};
+
+/**
+ * struct edac_priv - DDR memory controller private instance data.
+ * @ddrmc_baseaddr: Base address of the DDR controller.
+ * @ddrmc_noc_baseaddr: Base address of the DDRMC NOC.
+ * @message: Buffer for framing the event specific info.
+ * @mc_id: Memory controller ID.
+ * @ce_cnt: Correctable error count.
+ * @ue_cnt: UnCorrectable error count.
+ * @stat: ECC status information.
+ * @lrank_bit: Bit shifts for lrank bit.
+ * @rank_bit: Bit shifts for rank bit.
+ * @row_bit: Bit shifts for row bit.
+ * @col_bit: Bit shifts for column bit.
+ * @bank_bit: Bit shifts for bank bit.
+ * @grp_bit: Bit shifts for group bit.
+ * @ch_bit: Bit shifts for channel bit.
+ * @err_inject_addr: Data poison address.
+ * @debugfs: Debugfs handle.
+ */
+struct edac_priv {
+ void __iomem *ddrmc_baseaddr;
+ void __iomem *ddrmc_noc_baseaddr;
+ char message[XDDR_EDAC_MSG_SIZE];
+ u32 mc_id;
+ u32 ce_cnt;
+ u32 ue_cnt;
+ struct ecc_status stat;
+ u32 lrank_bit[3];
+ u32 rank_bit[2];
+ u32 row_bit[18];
+ u32 col_bit[10];
+ u32 bank_bit[2];
+ u32 grp_bit[2];
+ u32 ch_bit;
+#ifdef CONFIG_EDAC_DEBUG
+ u64 err_inject_addr;
+ struct dentry *debugfs;
+#endif
+};
+
+static void get_ce_error_info(struct edac_priv *priv)
+{
+ void __iomem *ddrmc_base;
+ struct ecc_status *p;
+ u32 regval;
+ u64 reghi;
+
+ ddrmc_base = priv->ddrmc_baseaddr;
+ p = &priv->stat;
+
+ p->error_type = XDDR_ERR_TYPE_CE;
+ regval = readl(ddrmc_base + ECCR0_CE_ADDR_LO_OFFSET);
+ reghi = regval & ECCR_UE_CE_ADDR_HI_ROW_MASK;
+ p->ceinfo[0].i = regval | reghi << 32;
+ regval = readl(ddrmc_base + ECCR0_CE_ADDR_HI_OFFSET);
+
+ edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n",
+ readl(ddrmc_base + ECCR0_CE_DATA_LO_OFFSET),
+ readl(ddrmc_base + ECCR0_CE_DATA_HI_OFFSET),
+ readl(ddrmc_base + ECCR0_CE_DATA_PAR_OFFSET));
+
+ regval = readl(ddrmc_base + ECCR1_CE_ADDR_LO_OFFSET);
+ reghi = readl(ddrmc_base + ECCR1_CE_ADDR_HI_OFFSET);
+ p->ceinfo[1].i = regval | reghi << 32;
+ regval = readl(ddrmc_base + ECCR1_CE_ADDR_HI_OFFSET);
+
+ edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n",
+ readl(ddrmc_base + ECCR1_CE_DATA_LO_OFFSET),
+ readl(ddrmc_base + ECCR1_CE_DATA_HI_OFFSET),
+ readl(ddrmc_base + ECCR1_CE_DATA_PAR_OFFSET));
+}
+
+static void get_ue_error_info(struct edac_priv *priv)
+{
+ void __iomem *ddrmc_base;
+ struct ecc_status *p;
+ u32 regval;
+ u64 reghi;
+
+ ddrmc_base = priv->ddrmc_baseaddr;
+ p = &priv->stat;
+
+ p->error_type = XDDR_ERR_TYPE_UE;
+ regval = readl(ddrmc_base + ECCR0_UE_ADDR_LO_OFFSET);
+ reghi = readl(ddrmc_base + ECCR0_UE_ADDR_HI_OFFSET);
+
+ p->ueinfo[0].i = regval | reghi << 32;
+ regval = readl(ddrmc_base + ECCR0_UE_ADDR_HI_OFFSET);
+
+ edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n",
+ readl(ddrmc_base + ECCR0_UE_DATA_LO_OFFSET),
+ readl(ddrmc_base + ECCR0_UE_DATA_HI_OFFSET),
+ readl(ddrmc_base + ECCR0_UE_DATA_PAR_OFFSET));
+
+ regval = readl(ddrmc_base + ECCR1_UE_ADDR_LO_OFFSET);
+ reghi = readl(ddrmc_base + ECCR1_UE_ADDR_HI_OFFSET);
+ p->ueinfo[1].i = regval | reghi << 32;
+
+ edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n",
+ readl(ddrmc_base + ECCR1_UE_DATA_LO_OFFSET),
+ readl(ddrmc_base + ECCR1_UE_DATA_HI_OFFSET),
+ readl(ddrmc_base + ECCR1_UE_DATA_PAR_OFFSET));
+}
+
+static bool get_error_info(struct edac_priv *priv)
+{
+ u32 eccr0_ceval, eccr1_ceval, eccr0_ueval, eccr1_ueval;
+ void __iomem *ddrmc_base;
+ struct ecc_status *p;
+
+ ddrmc_base = priv->ddrmc_baseaddr;
+ p = &priv->stat;
+
+ eccr0_ceval = readl(ddrmc_base + ECCR0_CERR_STAT_OFFSET);
+ eccr1_ceval = readl(ddrmc_base + ECCR1_CERR_STAT_OFFSET);
+ eccr0_ueval = readl(ddrmc_base + ECCR0_UERR_STAT_OFFSET);
+ eccr1_ueval = readl(ddrmc_base + ECCR1_UERR_STAT_OFFSET);
+
+ if (!eccr0_ceval && !eccr1_ceval && !eccr0_ueval && !eccr1_ueval)
+ return 1;
+ if (!eccr0_ceval)
+ p->channel = 1;
+ else
+ p->channel = 0;
+
+ if (eccr0_ceval || eccr1_ceval)
+ get_ce_error_info(priv);
+
+ if (eccr0_ueval || eccr1_ueval) {
+ if (!eccr0_ueval)
+ p->channel = 1;
+ else
+ p->channel = 0;
+ get_ue_error_info(priv);
+ }
+
+ /* Unlock the PCSR registers */
+ writel(PCSR_UNLOCK_VAL, ddrmc_base + XDDR_PCSR_OFFSET);
+
+ writel(0, ddrmc_base + ECCR0_CERR_STAT_OFFSET);
+ writel(0, ddrmc_base + ECCR1_CERR_STAT_OFFSET);
+ writel(0, ddrmc_base + ECCR0_UERR_STAT_OFFSET);
+ writel(0, ddrmc_base + ECCR1_UERR_STAT_OFFSET);
+
+ /* Lock the PCSR registers */
+ writel(1, ddrmc_base + XDDR_PCSR_OFFSET);
+
+ return 0;
+}
+
+/**
+ * convert_to_physical - Convert to physical address.
+ * @priv: DDR memory controller private instance data.
+ * @pinf: ECC error info structure.
+ *
+ * Return: Physical address of the DDR memory.
+ */
+static unsigned long convert_to_physical(struct edac_priv *priv, union ecc_error_info pinf)
+{
+ unsigned long err_addr = 0;
+ u32 index;
+ u32 row;
+
+ row = pinf.rowhi << 10 | pinf.row;
+ for (index = 0; index < XDDR_MAX_ROW_CNT; index++) {
+ err_addr |= (row & BIT(0)) << priv->row_bit[index];
+ row >>= 1;
+ }
+
+ for (index = 0; index < XDDR_MAX_COL_CNT; index++) {
+ err_addr |= (pinf.col & BIT(0)) << priv->col_bit[index];
+ pinf.col >>= 1;
+ }
+
+ for (index = 0; index < XDDR_MAX_BANK_CNT; index++) {
+ err_addr |= (pinf.bank & BIT(0)) << priv->bank_bit[index];
+ pinf.bank >>= 1;
+ }
+
+ for (index = 0; index < XDDR_MAX_GRP_CNT; index++) {
+ err_addr |= (pinf.group & BIT(0)) << priv->grp_bit[index];
+ pinf.group >>= 1;
+ }
+
+ for (index = 0; index < XDDR_MAX_RANK_CNT; index++) {
+ err_addr |= (pinf.rank & BIT(0)) << priv->rank_bit[index];
+ pinf.rank >>= 1;
+ }
+
+ for (index = 0; index < XDDR_MAX_LRANK_CNT; index++) {
+ err_addr |= (pinf.lrank & BIT(0)) << priv->lrank_bit[index];
+ pinf.lrank >>= 1;
+ }
+
+ err_addr |= (priv->stat.channel & BIT(0)) << priv->ch_bit;
+
+ return err_addr;
+}
+
+/**
+ * handle_error - Handle Correctable and Uncorrectable errors.
+ * @mci: EDAC memory controller instance.
+ * @stat: ECC status structure.
+ *
+ * Handles ECC correctable and uncorrectable errors.
+ */
+static void handle_error(struct mem_ctl_info *mci, struct ecc_status *stat)
+{
+ struct edac_priv *priv = mci->pvt_info;
+ union ecc_error_info pinf;
+
+ if (stat->error_type == XDDR_ERR_TYPE_CE) {
+ priv->ce_cnt++;
+ pinf = stat->ceinfo[stat->channel];
+ snprintf(priv->message, XDDR_EDAC_MSG_SIZE,
+ "Error type:%s MC ID: %d Addr at %lx Burst Pos: %d\n",
+ "CE", priv->mc_id,
+ convert_to_physical(priv, pinf), pinf.burstpos);
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ 1, 0, 0, 0, 0, 0, -1,
+ priv->message, "");
+ }
+
+ if (stat->error_type == XDDR_ERR_TYPE_UE) {
+ priv->ue_cnt++;
+ pinf = stat->ueinfo[stat->channel];
+ snprintf(priv->message, XDDR_EDAC_MSG_SIZE,
+ "Error type:%s MC ID: %d Addr at %lx Burst Pos: %d\n",
+ "UE", priv->mc_id,
+ convert_to_physical(priv, pinf), pinf.burstpos);
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ 1, 0, 0, 0, 0, 0, -1,
+ priv->message, "");
+ }
+
+ memset(stat, 0, sizeof(*stat));
+}
+
+/**
+ * err_callback - Handle Correctable and Uncorrectable errors.
+ * @payload: payload data.
+ * @data: mci controller data.
+ *
+ * Handles ECC correctable and uncorrectable errors.
+ */
+static void err_callback(const u32 *payload, void *data)
+{
+ struct mem_ctl_info *mci = (struct mem_ctl_info *)data;
+ struct edac_priv *priv;
+ struct ecc_status *p;
+ int regval;
+
+ priv = mci->pvt_info;
+ p = &priv->stat;
+
+ regval = readl(priv->ddrmc_baseaddr + XDDR_ISR_OFFSET);
+
+ if (payload[EVENT] == XPM_EVENT_ERROR_MASK_DDRMC_CR)
+ p->error_type = XDDR_ERR_TYPE_CE;
+ if (payload[EVENT] == XPM_EVENT_ERROR_MASK_DDRMC_NCR)
+ p->error_type = XDDR_ERR_TYPE_UE;
+
+ if (get_error_info(priv))
+ return;
+
+ handle_error(mci, &priv->stat);
+
+ /* Unlock the PCSR registers */
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+
+ /* Clear the ISR */
+ writel(regval, priv->ddrmc_baseaddr + XDDR_ISR_OFFSET);
+
+ /* Lock the PCSR registers */
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ edac_dbg(3, "Total error count CE %d UE %d\n",
+ priv->ce_cnt, priv->ue_cnt);
+}
+
+/**
+ * get_dwidth - Return the controller memory width.
+ * @base: DDR memory controller base address.
+ *
+ * Get the EDAC device type width appropriate for the controller
+ * configuration.
+ *
+ * Return: a device type width enumeration.
+ */
+static enum dev_type get_dwidth(const void __iomem *base)
+{
+ enum dev_type dt;
+ u32 regval;
+ u32 width;
+
+ regval = readl(base + XDDR_REG_CONFIG0_OFFSET);
+ width = FIELD_GET(XDDR_REG_CONFIG0_BUS_WIDTH_MASK, regval);
+
+ switch (width) {
+ case XDDR_BUS_WIDTH_16:
+ dt = DEV_X2;
+ break;
+ case XDDR_BUS_WIDTH_32:
+ dt = DEV_X4;
+ break;
+ case XDDR_BUS_WIDTH_64:
+ dt = DEV_X8;
+ break;
+ default:
+ dt = DEV_UNKNOWN;
+ }
+
+ return dt;
+}
+
+/**
+ * get_ecc_state - Return the controller ECC enable/disable status.
+ * @base: DDR memory controller base address.
+ *
+ * Get the ECC enable/disable status for the controller.
+ *
+ * Return: a ECC status boolean i.e true/false - enabled/disabled.
+ */
+static bool get_ecc_state(void __iomem *base)
+{
+ enum dev_type dt;
+ u32 ecctype;
+
+ dt = get_dwidth(base);
+ if (dt == DEV_UNKNOWN)
+ return false;
+
+ ecctype = readl(base + XDDR_REG_PINOUT_OFFSET);
+ ecctype &= XDDR_REG_PINOUT_ECC_EN_MASK;
+
+ return !!ecctype;
+}
+
+/**
+ * get_memsize - Get the size of the attached memory device.
+ * @priv: DDR memory controller private instance data.
+ *
+ * Return: the memory size in bytes.
+ */
+static u64 get_memsize(struct edac_priv *priv)
+{
+ u32 regval;
+ u64 size;
+
+ regval = readl(priv->ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET);
+ regval = FIELD_GET(XDDR_REG_CONFIG0_SIZE_MASK, regval);
+
+ switch (regval) {
+ case XILINX_DRAM_SIZE_4G:
+ size = 4U; break;
+ case XILINX_DRAM_SIZE_6G:
+ size = 6U; break;
+ case XILINX_DRAM_SIZE_8G:
+ size = 8U; break;
+ case XILINX_DRAM_SIZE_12G:
+ size = 12U; break;
+ case XILINX_DRAM_SIZE_16G:
+ size = 16U; break;
+ case XILINX_DRAM_SIZE_32G:
+ size = 32U; break;
+ /* Invalid configuration */
+ default:
+ size = 0; break;
+ }
+
+ size *= SZ_1G;
+ return size;
+}
+
+/**
+ * init_csrows - Initialize the csrow data.
+ * @mci: EDAC memory controller instance.
+ *
+ * Initialize the chip select rows associated with the EDAC memory
+ * controller instance.
+ */
+static void init_csrows(struct mem_ctl_info *mci)
+{
+ struct edac_priv *priv = mci->pvt_info;
+ struct csrow_info *csi;
+ struct dimm_info *dimm;
+ unsigned long size;
+ u32 row;
+ int ch;
+
+ size = get_memsize(priv);
+ for (row = 0; row < mci->nr_csrows; row++) {
+ csi = mci->csrows[row];
+ for (ch = 0; ch < csi->nr_channels; ch++) {
+ dimm = csi->channels[ch]->dimm;
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->mtype = MEM_DDR4;
+ dimm->nr_pages = (size >> PAGE_SHIFT) / csi->nr_channels;
+ dimm->grain = XDDR_EDAC_ERR_GRAIN;
+ dimm->dtype = get_dwidth(priv->ddrmc_baseaddr);
+ }
+ }
+}
+
+/**
+ * mc_init - Initialize one driver instance.
+ * @mci: EDAC memory controller instance.
+ * @pdev: platform device.
+ *
+ * Perform initialization of the EDAC memory controller instance and
+ * related driver-private data associated with the memory controller the
+ * instance is bound to.
+ */
+static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev)
+{
+ mci->pdev = &pdev->dev;
+ platform_set_drvdata(pdev, mci);
+
+ /* Initialize controller capabilities and configuration */
+ mci->mtype_cap = MEM_FLAG_DDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_HW_SRC;
+ mci->scrub_mode = SCRUB_NONE;
+
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->ctl_name = "xlnx_ddr_controller";
+ mci->dev_name = dev_name(&pdev->dev);
+ mci->mod_name = "xlnx_edac";
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ init_csrows(mci);
+}
+
+static void enable_intr(struct edac_priv *priv)
+{
+ /* Unlock the PCSR registers */
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+
+ /* Enable UE and CE Interrupts to support the interrupt case */
+ writel(XDDR_IRQ_CE_MASK | XDDR_IRQ_UE_MASK,
+ priv->ddrmc_baseaddr + XDDR_IRQ_EN_OFFSET);
+
+ writel(XDDR_IRQ_UE_MASK,
+ priv->ddrmc_baseaddr + XDDR_IRQ1_EN_OFFSET);
+ /* Lock the PCSR registers */
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+}
+
+static void disable_intr(struct edac_priv *priv)
+{
+ /* Unlock the PCSR registers */
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+
+ /* Disable UE/CE Interrupts */
+ writel(XDDR_IRQ_CE_MASK | XDDR_IRQ_UE_MASK,
+ priv->ddrmc_baseaddr + XDDR_IRQ_DIS_OFFSET);
+
+ /* Lock the PCSR registers */
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+}
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+#ifdef CONFIG_EDAC_DEBUG
+/**
+ * poison_setup - Update poison registers.
+ * @priv: DDR memory controller private instance data.
+ *
+ * Update poison registers as per DDR mapping upon write of the address
+ * location the fault is injected.
+ * Return: none.
+ */
+static void poison_setup(struct edac_priv *priv)
+{
+ u32 col = 0, row = 0, bank = 0, grp = 0, rank = 0, lrank = 0, ch = 0;
+ u32 index, regval;
+
+ for (index = 0; index < XDDR_MAX_ROW_CNT; index++) {
+ row |= (((priv->err_inject_addr >> priv->row_bit[index]) &
+ BIT(0)) << index);
+ }
+
+ for (index = 0; index < XDDR_MAX_COL_CNT; index++) {
+ col |= (((priv->err_inject_addr >> priv->col_bit[index]) &
+ BIT(0)) << index);
+ }
+
+ for (index = 0; index < XDDR_MAX_BANK_CNT; index++) {
+ bank |= (((priv->err_inject_addr >> priv->bank_bit[index]) &
+ BIT(0)) << index);
+ }
+
+ for (index = 0; index < XDDR_MAX_GRP_CNT; index++) {
+ grp |= (((priv->err_inject_addr >> priv->grp_bit[index]) &
+ BIT(0)) << index);
+ }
+
+ for (index = 0; index < XDDR_MAX_RANK_CNT; index++) {
+ rank |= (((priv->err_inject_addr >> priv->rank_bit[index]) &
+ BIT(0)) << index);
+ }
+
+ for (index = 0; index < XDDR_MAX_LRANK_CNT; index++) {
+ lrank |= (((priv->err_inject_addr >> priv->lrank_bit[index]) &
+ BIT(0)) << index);
+ }
+
+ ch = (priv->err_inject_addr >> priv->ch_bit) & BIT(0);
+ if (ch)
+ writel(0xFF, priv->ddrmc_baseaddr + ECCW1_FLIP_CTRL);
+ else
+ writel(0xFF, priv->ddrmc_baseaddr + ECCW0_FLIP_CTRL);
+
+ writel(0, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC12_OFFSET);
+ writel(0, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC13_OFFSET);
+
+ regval = row & XDDR_NOC_ROW_MATCH_MASK;
+ regval |= FIELD_PREP(XDDR_NOC_COL_MATCH_MASK, col);
+ regval |= FIELD_PREP(XDDR_NOC_BANK_MATCH_MASK, bank);
+ regval |= FIELD_PREP(XDDR_NOC_GRP_MATCH_MASK, grp);
+ writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC14_OFFSET);
+
+ regval = rank & XDDR_NOC_RANK_MATCH_MASK;
+ regval |= FIELD_PREP(XDDR_NOC_LRANK_MATCH_MASK, lrank);
+ regval |= FIELD_PREP(XDDR_NOC_CH_MATCH_MASK, ch);
+ regval |= (XDDR_NOC_MOD_SEL_MASK | XDDR_NOC_MATCH_EN_MASK);
+ writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC15_OFFSET);
+}
+
+static void xddr_inject_data_ce_store(struct mem_ctl_info *mci, u8 ce_bitpos)
+{
+ u32 ecc0_flip0, ecc1_flip0, ecc0_flip1, ecc1_flip1;
+ struct edac_priv *priv = mci->pvt_info;
+
+ if (ce_bitpos < ECCW0_FLIP0_BITS) {
+ ecc0_flip0 = BIT(ce_bitpos);
+ ecc1_flip0 = BIT(ce_bitpos);
+ ecc0_flip1 = 0;
+ ecc1_flip1 = 0;
+ } else {
+ ce_bitpos = ce_bitpos - ECCW0_FLIP0_BITS;
+ ecc0_flip1 = BIT(ce_bitpos);
+ ecc1_flip1 = BIT(ce_bitpos);
+ ecc0_flip0 = 0;
+ ecc1_flip0 = 0;
+ }
+
+ writel(ecc0_flip0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
+ writel(ecc1_flip0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET);
+ writel(ecc0_flip1, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET);
+ writel(ecc1_flip1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
+}
+
+/*
+ * To inject a correctable error, the following steps are needed:
+ *
+ * - Write the correctable error bit position value:
+ * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ce
+ *
+ * poison_setup() derives the row, column, bank, group and rank and
+ * writes to the ADEC registers based on the address given by the user.
+ *
+ * The ADEC12 and ADEC13 are mask registers; write 0 to make sure default
+ * configuration is there and no addresses are masked.
+ *
+ * The row, column, bank, group and rank registers are written to the
+ * match ADEC bit to generate errors at the particular address. ADEC14
+ * and ADEC15 have the match bits.
+ *
+ * xddr_inject_data_ce_store() updates the ECC FLIP registers with the
+ * bits to be corrupted based on the bit position given by the user.
+ *
+ * Upon doing a read to the address the errors are injected.
+ */
+static ssize_t inject_data_ce_store(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct device *dev = file->private_data;
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct edac_priv *priv = mci->pvt_info;
+ u8 ce_bitpos;
+ int ret;
+
+ ret = kstrtou8_from_user(data, count, 0, &ce_bitpos);
+ if (ret)
+ return ret;
+
+ /* Unlock the PCSR registers */
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+
+ poison_setup(priv);
+
+ xddr_inject_data_ce_store(mci, ce_bitpos);
+ ret = count;
+
+ /* Lock the PCSR registers */
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+
+ return ret;
+}
+
+static const struct file_operations xddr_inject_ce_fops = {
+ .open = simple_open,
+ .write = inject_data_ce_store,
+ .llseek = generic_file_llseek,
+};
+
+static void xddr_inject_data_ue_store(struct mem_ctl_info *mci, u32 val0, u32 val1)
+{
+ struct edac_priv *priv = mci->pvt_info;
+
+ writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
+ writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET);
+ writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
+ writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
+}
+
+/*
+ * To inject an uncorrectable error, the following steps are needed:
+ * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ue
+ *
+ * poison_setup() derives the row, column, bank, group and rank and
+ * writes to the ADEC registers based on the address given by the user.
+ *
+ * The ADEC12 and ADEC13 are mask registers; write 0 so that none of the
+ * addresses are masked. The row, column, bank, group and rank registers
+ * are written to the match ADEC bit to generate errors at the
+ * particular address. ADEC14 and ADEC15 have the match bits.
+ *
+ * xddr_inject_data_ue_store() updates the ECC FLIP registers with the
+ * bits to be corrupted based on the bit position given by the user. For
+ * uncorrectable errors
+ * 2 bit errors are injected.
+ *
+ * Upon doing a read to the address the errors are injected.
+ */
+static ssize_t inject_data_ue_store(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct device *dev = file->private_data;
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct edac_priv *priv = mci->pvt_info;
+ char buf[6], *pbuf, *token[2];
+ u32 val0 = 0, val1 = 0;
+ u8 len, ue0, ue1;
+ int i, ret;
+
+ len = min_t(size_t, count, sizeof(buf));
+ if (copy_from_user(buf, data, len))
+ return -EFAULT;
+
+ buf[len] = '\0';
+ pbuf = &buf[0];
+ for (i = 0; i < NUM_UE_BITPOS; i++)
+ token[i] = strsep(&pbuf, ",");
+
+ if (!token[0] || !token[1])
+ return -EFAULT;
+
+ ret = kstrtou8(token[0], 0, &ue0);
+ if (ret)
+ return ret;
+
+ ret = kstrtou8(token[1], 0, &ue1);
+ if (ret)
+ return ret;
+
+ if (ue0 < ECCW0_FLIP0_BITS) {
+ val0 = BIT(ue0);
+ } else {
+ ue0 = ue0 - ECCW0_FLIP0_BITS;
+ val1 = BIT(ue0);
+ }
+
+ if (ue1 < ECCW0_FLIP0_BITS) {
+ val0 |= BIT(ue1);
+ } else {
+ ue1 = ue1 - ECCW0_FLIP0_BITS;
+ val1 |= BIT(ue1);
+ }
+
+ /* Unlock the PCSR registers */
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+
+ poison_setup(priv);
+
+ xddr_inject_data_ue_store(mci, val0, val1);
+
+ /* Lock the PCSR registers */
+ writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ return count;
+}
+
+static const struct file_operations xddr_inject_ue_fops = {
+ .open = simple_open,
+ .write = inject_data_ue_store,
+ .llseek = generic_file_llseek,
+};
+
+static void create_debugfs_attributes(struct mem_ctl_info *mci)
+{
+ struct edac_priv *priv = mci->pvt_info;
+
+ priv->debugfs = edac_debugfs_create_dir(mci->dev_name);
+ if (!priv->debugfs)
+ return;
+
+ if (!edac_debugfs_create_file("inject_ce", 0200, priv->debugfs,
+ &mci->dev, &xddr_inject_ce_fops)) {
+ debugfs_remove_recursive(priv->debugfs);
+ return;
+ }
+
+ if (!edac_debugfs_create_file("inject_ue", 0200, priv->debugfs,
+ &mci->dev, &xddr_inject_ue_fops)) {
+ debugfs_remove_recursive(priv->debugfs);
+ return;
+ }
+ debugfs_create_x64("address", 0600, priv->debugfs,
+ &priv->err_inject_addr);
+ mci->debugfs = priv->debugfs;
+}
+
+static inline void process_bit(struct edac_priv *priv, unsigned int start, u32 regval)
+{
+ union edac_info rows;
+
+ rows.i = regval;
+ priv->row_bit[start] = rows.row0;
+ priv->row_bit[start + 1] = rows.row1;
+ priv->row_bit[start + 2] = rows.row2;
+ priv->row_bit[start + 3] = rows.row3;
+ priv->row_bit[start + 4] = rows.row4;
+}
+
+static void setup_row_address_map(struct edac_priv *priv)
+{
+ u32 regval;
+ union edac_info rows;
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC5_OFFSET);
+ process_bit(priv, 0, regval);
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC6_OFFSET);
+ process_bit(priv, 5, regval);
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC7_OFFSET);
+ process_bit(priv, 10, regval);
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC8_OFFSET);
+ rows.i = regval;
+
+ priv->row_bit[15] = rows.row0;
+ priv->row_bit[16] = rows.row1;
+ priv->row_bit[17] = rows.row2;
+}
+
+static void setup_column_address_map(struct edac_priv *priv)
+{
+ u32 regval;
+ union edac_info cols;
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC8_OFFSET);
+ priv->col_bit[0] = FIELD_GET(MASK_24, regval);
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC9_OFFSET);
+ cols.i = regval;
+ priv->col_bit[1] = cols.col1;
+ priv->col_bit[2] = cols.col2;
+ priv->col_bit[3] = cols.col3;
+ priv->col_bit[4] = cols.col4;
+ priv->col_bit[5] = cols.col5;
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC10_OFFSET);
+ cols.i = regval;
+ priv->col_bit[6] = cols.col1;
+ priv->col_bit[7] = cols.col2;
+ priv->col_bit[8] = cols.col3;
+ priv->col_bit[9] = cols.col4;
+}
+
+static void setup_bank_grp_ch_address_map(struct edac_priv *priv)
+{
+ u32 regval;
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC10_OFFSET);
+ priv->bank_bit[0] = FIELD_GET(MASK_24, regval);
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC11_OFFSET);
+ priv->bank_bit[1] = (regval & MASK_0);
+ priv->grp_bit[0] = FIELD_GET(GRP_0_MASK, regval);
+ priv->grp_bit[1] = FIELD_GET(GRP_1_MASK, regval);
+ priv->ch_bit = FIELD_GET(CH_0_MASK, regval);
+}
+
+static void setup_rank_lrank_address_map(struct edac_priv *priv)
+{
+ u32 regval;
+
+ regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC4_OFFSET);
+ priv->rank_bit[0] = (regval & MASK_0);
+ priv->rank_bit[1] = FIELD_GET(RANK_1_MASK, regval);
+ priv->lrank_bit[0] = FIELD_GET(LRANK_0_MASK, regval);
+ priv->lrank_bit[1] = FIELD_GET(LRANK_1_MASK, regval);
+ priv->lrank_bit[2] = FIELD_GET(MASK_24, regval);
+}
+
+/**
+ * setup_address_map - Set Address Map by querying ADDRMAP registers.
+ * @priv: DDR memory controller private instance data.
+ *
+ * Set Address Map by querying ADDRMAP registers.
+ *
+ * Return: none.
+ */
+static void setup_address_map(struct edac_priv *priv)
+{
+ setup_row_address_map(priv);
+
+ setup_column_address_map(priv);
+
+ setup_bank_grp_ch_address_map(priv);
+
+ setup_rank_lrank_address_map(priv);
+}
+#endif /* CONFIG_EDAC_DEBUG */
+
+static const struct of_device_id xlnx_edac_match[] = {
+ { .compatible = "xlnx,versal-ddrmc", },
+ {
+ /* end of table */
+ }
+};
+
+MODULE_DEVICE_TABLE(of, xlnx_edac_match);
+static u32 emif_get_id(struct device_node *node)
+{
+ u32 addr, my_addr, my_id = 0;
+ struct device_node *np;
+ const __be32 *addrp;
+
+ addrp = of_get_address(node, 0, NULL, NULL);
+ my_addr = (u32)of_translate_address(node, addrp);
+
+ for_each_matching_node(np, xlnx_edac_match) {
+ if (np == node)
+ continue;
+
+ addrp = of_get_address(np, 0, NULL, NULL);
+ addr = (u32)of_translate_address(np, addrp);
+
+ edac_printk(KERN_INFO, EDAC_MC,
+ "addr=%x, my_addr=%x\n",
+ addr, my_addr);
+
+ if (addr < my_addr)
+ my_id++;
+ }
+
+ return my_id;
+}
+
+static int mc_probe(struct platform_device *pdev)
+{
+ void __iomem *ddrmc_baseaddr, *ddrmc_noc_baseaddr;
+ struct edac_mc_layer layers[2];
+ struct mem_ctl_info *mci;
+ u8 num_chans, num_csrows;
+ struct edac_priv *priv;
+ u32 edac_mc_id, regval;
+ int rc;
+
+ ddrmc_baseaddr = devm_platform_ioremap_resource_byname(pdev, "base");
+ if (IS_ERR(ddrmc_baseaddr))
+ return PTR_ERR(ddrmc_baseaddr);
+
+ ddrmc_noc_baseaddr = devm_platform_ioremap_resource_byname(pdev, "noc");
+ if (IS_ERR(ddrmc_noc_baseaddr))
+ return PTR_ERR(ddrmc_noc_baseaddr);
+
+ if (!get_ecc_state(ddrmc_baseaddr))
+ return -ENXIO;
+
+ /* Allocate ID number for the EMIF controller */
+ edac_mc_id = emif_get_id(pdev->dev.of_node);
+
+ regval = readl(ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET);
+ num_chans = FIELD_GET(XDDR_REG_CONFIG0_NUM_CHANS_MASK, regval);
+ num_chans++;
+
+ num_csrows = FIELD_GET(XDDR_REG_CONFIG0_NUM_RANKS_MASK, regval);
+ num_csrows *= 2;
+ if (!num_csrows)
+ num_csrows = 1;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = num_csrows;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = num_chans;
+ layers[1].is_virt_csrow = false;
+
+ mci = edac_mc_alloc(edac_mc_id, ARRAY_SIZE(layers), layers,
+ sizeof(struct edac_priv));
+ if (!mci) {
+ edac_printk(KERN_ERR, EDAC_MC,
+ "Failed memory allocation for mc instance\n");
+ return -ENOMEM;
+ }
+
+ priv = mci->pvt_info;
+ priv->ddrmc_baseaddr = ddrmc_baseaddr;
+ priv->ddrmc_noc_baseaddr = ddrmc_noc_baseaddr;
+ priv->ce_cnt = 0;
+ priv->ue_cnt = 0;
+ priv->mc_id = edac_mc_id;
+
+ mc_init(mci, pdev);
+
+ rc = edac_mc_add_mc(mci);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_MC,
+ "Failed to register with EDAC core\n");
+ goto free_edac_mc;
+ }
+
+ rc = xlnx_register_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1,
+ XPM_EVENT_ERROR_MASK_DDRMC_CR | XPM_EVENT_ERROR_MASK_DDRMC_NCR,
+ false, err_callback, mci);
+ if (rc) {
+ if (rc == -EACCES)
+ rc = -EPROBE_DEFER;
+
+ goto del_mc;
+ }
+
+#ifdef CONFIG_EDAC_DEBUG
+ create_debugfs_attributes(mci);
+ setup_address_map(priv);
+#endif
+ enable_intr(priv);
+ return rc;
+
+del_mc:
+ edac_mc_del_mc(&pdev->dev);
+free_edac_mc:
+ edac_mc_free(mci);
+
+ return rc;
+}
+
+static void mc_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+ struct edac_priv *priv = mci->pvt_info;
+
+ disable_intr(priv);
+
+#ifdef CONFIG_EDAC_DEBUG
+ debugfs_remove_recursive(priv->debugfs);
+#endif
+
+ xlnx_unregister_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1,
+ XPM_EVENT_ERROR_MASK_DDRMC_CR |
+ XPM_EVENT_ERROR_MASK_DDRMC_NCR, err_callback, mci);
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+}
+
+static struct platform_driver xilinx_ddr_edac_mc_driver = {
+ .driver = {
+ .name = "xilinx-ddrmc-edac",
+ .of_match_table = xlnx_edac_match,
+ },
+ .probe = mc_probe,
+ .remove = mc_remove,
+};
+
+module_platform_driver(xilinx_ddr_edac_mc_driver);
+
+MODULE_AUTHOR("AMD Inc");
+MODULE_DESCRIPTION("Xilinx DDRMC ECC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c
new file mode 100644
index 000000000000..1a1092793092
--- /dev/null
+++ b/drivers/edac/versalnet_edac.c
@@ -0,0 +1,962 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Versal NET memory controller driver
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/cdx/edac_cdx_pcol.h>
+#include <linux/edac.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/ras.h>
+#include <linux/remoteproc.h>
+#include <linux/rpmsg.h>
+#include <linux/sizes.h>
+#include <ras/ras_event.h>
+
+#include "edac_module.h"
+
+/* Granularity of reported error in bytes */
+#define MC5_ERR_GRAIN 1
+#define MC_GET_DDR_CONFIG_IN_LEN 4
+
+#define MC5_IRQ_CE_MASK GENMASK(18, 15)
+#define MC5_IRQ_UE_MASK GENMASK(14, 11)
+
+#define MC5_RANK_1_MASK GENMASK(11, 6)
+#define MASK_24 GENMASK(29, 24)
+#define MASK_0 GENMASK(5, 0)
+
+#define MC5_LRANK_1_MASK GENMASK(11, 6)
+#define MC5_LRANK_2_MASK GENMASK(17, 12)
+#define MC5_BANK1_MASK GENMASK(11, 6)
+#define MC5_GRP_0_MASK GENMASK(17, 12)
+#define MC5_GRP_1_MASK GENMASK(23, 18)
+
+#define MC5_REGHI_ROW 7
+#define MC5_EACHBIT 1
+#define MC5_ERR_TYPE_CE 0
+#define MC5_ERR_TYPE_UE 1
+#define MC5_HIGH_MEM_EN BIT(20)
+#define MC5_MEM_MASK GENMASK(19, 0)
+#define MC5_X16_BASE 256
+#define MC5_X16_ECC 32
+#define MC5_X16_SIZE (MC5_X16_BASE + MC5_X16_ECC)
+#define MC5_X32_SIZE 576
+#define MC5_HIMEM_BASE (256 * SZ_1M)
+#define MC5_ILC_HIMEM_EN BIT(28)
+#define MC5_ILC_MEM GENMASK(27, 0)
+#define MC5_INTERLEAVE_SEL GENMASK(3, 0)
+#define MC5_BUS_WIDTH_MASK GENMASK(19, 18)
+#define MC5_NUM_CHANS_MASK BIT(17)
+#define MC5_RANK_MASK GENMASK(15, 14)
+
+#define ERROR_LEVEL 2
+#define ERROR_ID 3
+#define TOTAL_ERR_LENGTH 5
+#define MSG_ERR_OFFSET 8
+#define MSG_ERR_LENGTH 9
+#define ERROR_DATA 10
+#define MCDI_RESPONSE 0xFF
+
+#define REG_MAX 152
+#define ADEC_MAX 152
+#define NUM_CONTROLLERS 8
+#define REGS_PER_CONTROLLER 19
+#define ADEC_NUM 19
+#define BUFFER_SZ 80
+
+#define XDDR5_BUS_WIDTH_64 0
+#define XDDR5_BUS_WIDTH_32 1
+#define XDDR5_BUS_WIDTH_16 2
+
+/**
+ * struct ecc_error_info - ECC error log information.
+ * @burstpos: Burst position.
+ * @lrank: Logical Rank number.
+ * @rank: Rank number.
+ * @group: Group number.
+ * @bank: Bank number.
+ * @col: Column number.
+ * @row: Row number.
+ * @rowhi: Row number higher bits.
+ * @i: Combined ECC error vector containing encoded values of burst position,
+ * rank, bank, column, and row information.
+ */
+union ecc_error_info {
+ struct {
+ u32 burstpos:3;
+ u32 lrank:4;
+ u32 rank:2;
+ u32 group:3;
+ u32 bank:2;
+ u32 col:11;
+ u32 row:7;
+ u32 rowhi;
+ };
+ u64 i;
+} __packed;
+
+/* Row and column bit positions in the address decoder (ADEC) registers. */
+union row_col_mapping {
+ struct {
+ u32 row0:6;
+ u32 row1:6;
+ u32 row2:6;
+ u32 row3:6;
+ u32 row4:6;
+ u32 reserved:2;
+ };
+ struct {
+ u32 col1:6;
+ u32 col2:6;
+ u32 col3:6;
+ u32 col4:6;
+ u32 col5:6;
+ u32 reservedcol:2;
+ };
+ u32 i;
+} __packed;
+
+/**
+ * struct ecc_status - ECC status information to report.
+ * @ceinfo: Correctable errors.
+ * @ueinfo: Uncorrected errors.
+ * @channel: Channel number.
+ * @error_type: Error type.
+ */
+struct ecc_status {
+ union ecc_error_info ceinfo[2];
+ union ecc_error_info ueinfo[2];
+ u8 channel;
+ u8 error_type;
+};
+
+/**
+ * struct mc_priv - DDR memory controller private instance data.
+ * @message: Buffer for framing the event specific info.
+ * @stat: ECC status information.
+ * @error_id: The error id.
+ * @error_level: The error level.
+ * @dwidth: Width of data bus excluding ECC bits.
+ * @part_len: The support of the message received.
+ * @regs: The registers sent on the rpmsg.
+ * @adec: Address decode registers.
+ * @mci: Memory controller interface.
+ * @ept: rpmsg endpoint.
+ * @mcdi: The mcdi handle.
+ */
+struct mc_priv {
+ char message[256];
+ struct ecc_status stat;
+ u32 error_id;
+ u32 error_level;
+ u32 dwidth;
+ u32 part_len;
+ u32 regs[REG_MAX];
+ u32 adec[ADEC_MAX];
+ struct mem_ctl_info *mci[NUM_CONTROLLERS];
+ struct rpmsg_endpoint *ept;
+ struct cdx_mcdi *mcdi;
+};
+
+/*
+ * Address decoder (ADEC) registers to match the order in which the register
+ * information is received from the firmware.
+ */
+enum adec_info {
+ CONF = 0,
+ ADEC0,
+ ADEC1,
+ ADEC2,
+ ADEC3,
+ ADEC4,
+ ADEC5,
+ ADEC6,
+ ADEC7,
+ ADEC8,
+ ADEC9,
+ ADEC10,
+ ADEC11,
+ ADEC12,
+ ADEC13,
+ ADEC14,
+ ADEC15,
+ ADEC16,
+ ADECILC,
+};
+
+enum reg_info {
+ ISR = 0,
+ IMR,
+ ECCR0_ERR_STATUS,
+ ECCR0_ADDR_LO,
+ ECCR0_ADDR_HI,
+ ECCR0_DATA_LO,
+ ECCR0_DATA_HI,
+ ECCR0_PAR,
+ ECCR1_ERR_STATUS,
+ ECCR1_ADDR_LO,
+ ECCR1_ADDR_HI,
+ ECCR1_DATA_LO,
+ ECCR1_DATA_HI,
+ ECCR1_PAR,
+ XMPU_ERR,
+ XMPU_ERR_ADDR_L0,
+ XMPU_ERR_ADDR_HI,
+ XMPU_ERR_AXI_ID,
+ ADEC_CHK_ERR_LOG,
+};
+
+static bool get_ddr_info(u32 *error_data, struct mc_priv *priv)
+{
+ u32 reglo, reghi, parity, eccr0_val, eccr1_val, isr;
+ struct ecc_status *p;
+
+ isr = error_data[ISR];
+
+ if (!(isr & (MC5_IRQ_UE_MASK | MC5_IRQ_CE_MASK)))
+ return false;
+
+ eccr0_val = error_data[ECCR0_ERR_STATUS];
+ eccr1_val = error_data[ECCR1_ERR_STATUS];
+
+ if (!eccr0_val && !eccr1_val)
+ return false;
+
+ p = &priv->stat;
+
+ if (!eccr0_val)
+ p->channel = 1;
+ else
+ p->channel = 0;
+
+ reglo = error_data[ECCR0_ADDR_LO];
+ reghi = error_data[ECCR0_ADDR_HI];
+ if (isr & MC5_IRQ_CE_MASK)
+ p->ceinfo[0].i = reglo | (u64)reghi << 32;
+ else if (isr & MC5_IRQ_UE_MASK)
+ p->ueinfo[0].i = reglo | (u64)reghi << 32;
+
+ parity = error_data[ECCR0_PAR];
+ edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n",
+ reghi, reglo, parity);
+
+ reglo = error_data[ECCR1_ADDR_LO];
+ reghi = error_data[ECCR1_ADDR_HI];
+ if (isr & MC5_IRQ_CE_MASK)
+ p->ceinfo[1].i = reglo | (u64)reghi << 32;
+ else if (isr & MC5_IRQ_UE_MASK)
+ p->ueinfo[1].i = reglo | (u64)reghi << 32;
+
+ parity = error_data[ECCR1_PAR];
+ edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n",
+ reghi, reglo, parity);
+
+ return true;
+}
+
+/**
+ * convert_to_physical - Convert @error_data to a physical address.
+ * @priv: DDR memory controller private instance data.
+ * @pinf: ECC error info structure.
+ * @controller: Controller number of the MC5
+ * @error_data: the DDRMC5 ADEC address decoder register data
+ *
+ * Return: physical address of the DDR memory.
+ */
+static unsigned long convert_to_physical(struct mc_priv *priv,
+ union ecc_error_info pinf,
+ int controller, int *error_data)
+{
+ u32 row, blk, rsh_req_addr, interleave, ilc_base_ctrl_add, ilc_himem_en, reg, offset;
+ u64 high_mem_base, high_mem_offset, low_mem_offset, ilcmem_base;
+ unsigned long err_addr = 0, addr;
+ union row_col_mapping cols;
+ union row_col_mapping rows;
+ u32 col_bit_0;
+
+ row = pinf.rowhi << MC5_REGHI_ROW | pinf.row;
+ offset = controller * ADEC_NUM;
+
+ reg = error_data[ADEC6];
+ rows.i = reg;
+ err_addr |= (row & BIT(0)) << rows.row0;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row1;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row2;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row3;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row4;
+ row >>= MC5_EACHBIT;
+
+ reg = error_data[ADEC7];
+ rows.i = reg;
+ err_addr |= (row & BIT(0)) << rows.row0;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row1;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row2;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row3;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row4;
+ row >>= MC5_EACHBIT;
+
+ reg = error_data[ADEC8];
+ rows.i = reg;
+ err_addr |= (row & BIT(0)) << rows.row0;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row1;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row2;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row3;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row4;
+
+ reg = error_data[ADEC9];
+ rows.i = reg;
+
+ err_addr |= (row & BIT(0)) << rows.row0;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row1;
+ row >>= MC5_EACHBIT;
+ err_addr |= (row & BIT(0)) << rows.row2;
+ row >>= MC5_EACHBIT;
+
+ col_bit_0 = FIELD_GET(MASK_24, error_data[ADEC9]);
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << col_bit_0;
+
+ cols.i = error_data[ADEC10];
+ err_addr |= (pinf.col & 1) << cols.col1;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col2;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col3;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col4;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col5;
+ pinf.col >>= 1;
+
+ cols.i = error_data[ADEC11];
+ err_addr |= (pinf.col & 1) << cols.col1;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col2;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col3;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col4;
+ pinf.col >>= 1;
+ err_addr |= (pinf.col & 1) << cols.col5;
+ pinf.col >>= 1;
+
+ reg = error_data[ADEC12];
+ err_addr |= (pinf.bank & BIT(0)) << (reg & MASK_0);
+ pinf.bank >>= MC5_EACHBIT;
+ err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_BANK1_MASK, reg);
+ pinf.bank >>= MC5_EACHBIT;
+
+ err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_0_MASK, reg);
+ pinf.group >>= MC5_EACHBIT;
+ err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_1_MASK, reg);
+ pinf.group >>= MC5_EACHBIT;
+ err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MASK_24, reg);
+ pinf.group >>= MC5_EACHBIT;
+
+ reg = error_data[ADEC4];
+ err_addr |= (pinf.rank & BIT(0)) << (reg & MASK_0);
+ pinf.rank >>= MC5_EACHBIT;
+ err_addr |= (pinf.rank & BIT(0)) << FIELD_GET(MC5_RANK_1_MASK, reg);
+ pinf.rank >>= MC5_EACHBIT;
+
+ reg = error_data[ADEC5];
+ err_addr |= (pinf.lrank & BIT(0)) << (reg & MASK_0);
+ pinf.lrank >>= MC5_EACHBIT;
+ err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_1_MASK, reg);
+ pinf.lrank >>= MC5_EACHBIT;
+ err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_2_MASK, reg);
+ pinf.lrank >>= MC5_EACHBIT;
+ err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MASK_24, reg);
+ pinf.lrank >>= MC5_EACHBIT;
+
+ high_mem_base = (priv->adec[ADEC2 + offset] & MC5_MEM_MASK) * MC5_HIMEM_BASE;
+ interleave = priv->adec[ADEC13 + offset] & MC5_INTERLEAVE_SEL;
+
+ high_mem_offset = priv->adec[ADEC3 + offset] & MC5_MEM_MASK;
+ low_mem_offset = priv->adec[ADEC1 + offset] & MC5_MEM_MASK;
+ reg = priv->adec[ADEC14 + offset];
+ ilc_himem_en = !!(reg & MC5_ILC_HIMEM_EN);
+ ilcmem_base = (reg & MC5_ILC_MEM) * SZ_1M;
+ if (ilc_himem_en)
+ ilc_base_ctrl_add = ilcmem_base - high_mem_offset;
+ else
+ ilc_base_ctrl_add = ilcmem_base - low_mem_offset;
+
+ if (priv->dwidth == DEV_X16) {
+ blk = err_addr / MC5_X16_SIZE;
+ rsh_req_addr = (blk << 8) + ilc_base_ctrl_add;
+ err_addr = rsh_req_addr * interleave * 2;
+ } else {
+ blk = err_addr / MC5_X32_SIZE;
+ rsh_req_addr = (blk << 9) + ilc_base_ctrl_add;
+ err_addr = rsh_req_addr * interleave * 2;
+ }
+
+ if ((priv->adec[ADEC2 + offset] & MC5_HIGH_MEM_EN) && err_addr >= high_mem_base)
+ addr = err_addr - high_mem_offset;
+ else
+ addr = err_addr - low_mem_offset;
+
+ return addr;
+}
+
+/**
+ * handle_error - Handle errors.
+ * @priv: DDR memory controller private instance data.
+ * @stat: ECC status structure.
+ * @ctl_num: Controller number of the MC5
+ * @error_data: the MC5 ADEC address decoder register data
+ *
+ * Handles ECC correctable and uncorrectable errors.
+ */
+static void handle_error(struct mc_priv *priv, struct ecc_status *stat,
+ int ctl_num, int *error_data)
+{
+ union ecc_error_info pinf;
+ struct mem_ctl_info *mci;
+ unsigned long pa;
+ phys_addr_t pfn;
+ int err;
+
+ if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS))
+ return;
+
+ mci = priv->mci[ctl_num];
+
+ if (stat->error_type == MC5_ERR_TYPE_CE) {
+ pinf = stat->ceinfo[stat->channel];
+ snprintf(priv->message, sizeof(priv->message),
+ "Error type:%s Controller %d Addr at %lx\n",
+ "CE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ 1, 0, 0, 0, 0, 0, -1,
+ priv->message, "");
+ }
+
+ if (stat->error_type == MC5_ERR_TYPE_UE) {
+ pinf = stat->ueinfo[stat->channel];
+ snprintf(priv->message, sizeof(priv->message),
+ "Error type:%s controller %d Addr at %lx\n",
+ "UE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ 1, 0, 0, 0, 0, 0, -1,
+ priv->message, "");
+ pa = convert_to_physical(priv, pinf, ctl_num, error_data);
+ pfn = PHYS_PFN(pa);
+
+ if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) {
+ err = memory_failure(pfn, MF_ACTION_REQUIRED);
+ if (err)
+ edac_dbg(2, "memory_failure() error: %d", err);
+ else
+ edac_dbg(2, "Poison page at PA 0x%lx\n", pa);
+ }
+ }
+}
+
+static void mc_init(struct mem_ctl_info *mci, struct device *dev)
+{
+ struct mc_priv *priv = mci->pvt_info;
+ struct csrow_info *csi;
+ struct dimm_info *dimm;
+ u32 row;
+ int ch;
+
+ /* Initialize controller capabilities and configuration */
+ mci->mtype_cap = MEM_FLAG_DDR5;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_HW_SRC;
+ mci->scrub_mode = SCRUB_NONE;
+
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->ctl_name = "VersalNET DDR5";
+ mci->dev_name = dev_name(dev);
+ mci->mod_name = "versalnet_edac";
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ for (row = 0; row < mci->nr_csrows; row++) {
+ csi = mci->csrows[row];
+ for (ch = 0; ch < csi->nr_channels; ch++) {
+ dimm = csi->channels[ch]->dimm;
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->mtype = MEM_DDR5;
+ dimm->grain = MC5_ERR_GRAIN;
+ dimm->dtype = priv->dwidth;
+ }
+ }
+}
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+static unsigned int mcdi_rpc_timeout(struct cdx_mcdi *cdx, unsigned int cmd)
+{
+ return MCDI_RPC_TIMEOUT;
+}
+
+static void mcdi_request(struct cdx_mcdi *cdx,
+ const struct cdx_dword *hdr, size_t hdr_len,
+ const struct cdx_dword *sdu, size_t sdu_len)
+{
+ void *send_buf;
+ int ret;
+
+ send_buf = kzalloc(hdr_len + sdu_len, GFP_KERNEL);
+ if (!send_buf)
+ return;
+
+ memcpy(send_buf, hdr, hdr_len);
+ memcpy(send_buf + hdr_len, sdu, sdu_len);
+
+ ret = rpmsg_send(cdx->ept, send_buf, hdr_len + sdu_len);
+ if (ret)
+ dev_err(&cdx->rpdev->dev, "Failed to send rpmsg data: %d\n", ret);
+
+ kfree(send_buf);
+}
+
+static const struct cdx_mcdi_ops mcdi_ops = {
+ .mcdi_rpc_timeout = mcdi_rpc_timeout,
+ .mcdi_request = mcdi_request,
+};
+
+static void get_ddr_config(u32 index, u32 *buffer, struct cdx_mcdi *amd_mcdi)
+{
+ size_t outlen;
+ int ret;
+
+ MCDI_DECLARE_BUF(inbuf, MC_GET_DDR_CONFIG_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, BUFFER_SZ);
+
+ MCDI_SET_DWORD(inbuf, EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX, index);
+
+ ret = cdx_mcdi_rpc(amd_mcdi, MC_CMD_EDAC_GET_DDR_CONFIG, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (!ret)
+ memcpy(buffer, MCDI_PTR(outbuf, GET_DDR_CONFIG),
+ (ADEC_NUM * 4));
+}
+
+static int setup_mcdi(struct mc_priv *mc_priv)
+{
+ struct cdx_mcdi *amd_mcdi;
+ int ret, i;
+
+ amd_mcdi = kzalloc(sizeof(*amd_mcdi), GFP_KERNEL);
+ if (!amd_mcdi)
+ return -ENOMEM;
+
+ amd_mcdi->mcdi_ops = &mcdi_ops;
+ ret = cdx_mcdi_init(amd_mcdi);
+ if (ret) {
+ kfree(amd_mcdi);
+ return ret;
+ }
+
+ amd_mcdi->ept = mc_priv->ept;
+ mc_priv->mcdi = amd_mcdi;
+
+ for (i = 0; i < NUM_CONTROLLERS; i++)
+ get_ddr_config(i, &mc_priv->adec[ADEC_NUM * i], amd_mcdi);
+
+ return 0;
+}
+
+static const guid_t amd_versalnet_guid = GUID_INIT(0x82678888, 0xa556, 0x44f2,
+ 0xb8, 0xb4, 0x45, 0x56, 0x2e,
+ 0x8c, 0x5b, 0xec);
+
+static int rpmsg_cb(struct rpmsg_device *rpdev, void *data,
+ int len, void *priv, u32 src)
+{
+ struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev);
+ const guid_t *sec_type = &guid_null;
+ u32 length, offset, error_id;
+ u32 *result = (u32 *)data;
+ struct ecc_status *p;
+ int i, j, k, sec_sev;
+ const char *err_str;
+ u32 *adec_data;
+
+ if (*(u8 *)data == MCDI_RESPONSE) {
+ cdx_mcdi_process_cmd(mc_priv->mcdi, (struct cdx_dword *)data, len);
+ return 0;
+ }
+
+ sec_sev = result[ERROR_LEVEL];
+ error_id = result[ERROR_ID];
+ length = result[MSG_ERR_LENGTH];
+ offset = result[MSG_ERR_OFFSET];
+
+ /*
+ * The data can come in two stretches. Construct the regs from two
+ * messages. The offset indicates the offset from which the data is to
+ * be taken.
+ */
+ for (i = 0 ; i < length; i++) {
+ k = offset + i;
+ j = ERROR_DATA + i;
+ mc_priv->regs[k] = result[j];
+ }
+
+ if (result[TOTAL_ERR_LENGTH] > length) {
+ if (!mc_priv->part_len)
+ mc_priv->part_len = length;
+ else
+ mc_priv->part_len += length;
+
+ if (mc_priv->part_len < result[TOTAL_ERR_LENGTH])
+ return 0;
+ mc_priv->part_len = 0;
+ }
+
+ mc_priv->error_id = error_id;
+ mc_priv->error_level = result[ERROR_LEVEL];
+
+ switch (error_id) {
+ case 5: err_str = "General Software Non-Correctable error"; break;
+ case 6: err_str = "CFU error"; break;
+ case 7: err_str = "CFRAME error"; break;
+ case 10: err_str = "DDRMC Microblaze Correctable ECC error"; break;
+ case 11: err_str = "DDRMC Microblaze Non-Correctable ECC error"; break;
+ case 15: err_str = "MMCM error"; break;
+ case 16: err_str = "HNICX Correctable error"; break;
+ case 17: err_str = "HNICX Non-Correctable error"; break;
+
+ case 18:
+ p = &mc_priv->stat;
+ memset(p, 0, sizeof(struct ecc_status));
+ p->error_type = MC5_ERR_TYPE_CE;
+ for (i = 0 ; i < NUM_CONTROLLERS; i++) {
+ if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) {
+ adec_data = mc_priv->adec + ADEC_NUM * i;
+ handle_error(mc_priv, &mc_priv->stat, i, adec_data);
+ }
+ }
+ return 0;
+ case 19:
+ p = &mc_priv->stat;
+ memset(p, 0, sizeof(struct ecc_status));
+ p->error_type = MC5_ERR_TYPE_UE;
+ for (i = 0 ; i < NUM_CONTROLLERS; i++) {
+ if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) {
+ adec_data = mc_priv->adec + ADEC_NUM * i;
+ handle_error(mc_priv, &mc_priv->stat, i, adec_data);
+ }
+ }
+ return 0;
+
+ case 21: err_str = "GT Non-Correctable error"; break;
+ case 22: err_str = "PL Sysmon Correctable error"; break;
+ case 23: err_str = "PL Sysmon Non-Correctable error"; break;
+ case 111: err_str = "LPX unexpected dfx activation error"; break;
+ case 114: err_str = "INT_LPD Non-Correctable error"; break;
+ case 116: err_str = "INT_OCM Non-Correctable error"; break;
+ case 117: err_str = "INT_FPD Correctable error"; break;
+ case 118: err_str = "INT_FPD Non-Correctable error"; break;
+ case 120: err_str = "INT_IOU Non-Correctable error"; break;
+ case 123: err_str = "err_int_irq from APU GIC Distributor"; break;
+ case 124: err_str = "fault_int_irq from APU GIC Distribute"; break;
+ case 132 ... 139: err_str = "FPX SPLITTER error"; break;
+ case 140: err_str = "APU Cluster 0 error"; break;
+ case 141: err_str = "APU Cluster 1 error"; break;
+ case 142: err_str = "APU Cluster 2 error"; break;
+ case 143: err_str = "APU Cluster 3 error"; break;
+ case 145: err_str = "WWDT1 LPX error"; break;
+ case 147: err_str = "IPI error"; break;
+ case 152 ... 153: err_str = "AFIFS error"; break;
+ case 154 ... 155: err_str = "LPX glitch error"; break;
+ case 185 ... 186: err_str = "FPX AFIFS error"; break;
+ case 195 ... 199: err_str = "AFIFM error"; break;
+ case 108: err_str = "PSM Correctable error"; break;
+ case 59: err_str = "PMC correctable error"; break;
+ case 60: err_str = "PMC Un correctable error"; break;
+ case 43 ... 47: err_str = "PMC Sysmon error"; break;
+ case 163 ... 184: err_str = "RPU error"; break;
+ case 148: err_str = "OCM0 correctable error"; break;
+ case 149: err_str = "OCM1 correctable error"; break;
+ case 150: err_str = "OCM0 Un-correctable error"; break;
+ case 151: err_str = "OCM1 Un-correctable error"; break;
+ case 189: err_str = "PSX_CMN_3 PD block consolidated error"; break;
+ case 191: err_str = "FPD_INT_WRAP PD block consolidated error"; break;
+ case 232: err_str = "CRAM Un-Correctable error"; break;
+ default: err_str = "VERSAL_EDAC_ERR_ID: %d"; break;
+ }
+
+ snprintf(mc_priv->message,
+ sizeof(mc_priv->message),
+ "[VERSAL_EDAC_ERR_ID: %d] Error type: %s", error_id, err_str);
+
+ /* Convert to bytes */
+ length = result[TOTAL_ERR_LENGTH] * 4;
+ log_non_standard_event(sec_type, &amd_versalnet_guid, mc_priv->message,
+ sec_sev, (void *)&mc_priv->regs, length);
+
+ return 0;
+}
+
+static struct rpmsg_device_id amd_rpmsg_id_table[] = {
+ { .name = "error_ipc" },
+ { },
+};
+MODULE_DEVICE_TABLE(rpmsg, amd_rpmsg_id_table);
+
+static int rpmsg_probe(struct rpmsg_device *rpdev)
+{
+ struct rpmsg_channel_info chinfo;
+ struct mc_priv *pg;
+
+ pg = (struct mc_priv *)amd_rpmsg_id_table[0].driver_data;
+ chinfo.src = RPMSG_ADDR_ANY;
+ chinfo.dst = rpdev->dst;
+ strscpy(chinfo.name, amd_rpmsg_id_table[0].name,
+ strlen(amd_rpmsg_id_table[0].name));
+
+ pg->ept = rpmsg_create_ept(rpdev, rpmsg_cb, NULL, chinfo);
+ if (!pg->ept)
+ return dev_err_probe(&rpdev->dev, -ENXIO, "Failed to create ept for channel %s\n",
+ chinfo.name);
+
+ dev_set_drvdata(&rpdev->dev, pg);
+
+ return 0;
+}
+
+static void rpmsg_remove(struct rpmsg_device *rpdev)
+{
+ struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev);
+
+ rpmsg_destroy_ept(mc_priv->ept);
+ dev_set_drvdata(&rpdev->dev, NULL);
+}
+
+static struct rpmsg_driver amd_rpmsg_driver = {
+ .drv.name = KBUILD_MODNAME,
+ .probe = rpmsg_probe,
+ .remove = rpmsg_remove,
+ .callback = rpmsg_cb,
+ .id_table = amd_rpmsg_id_table,
+};
+
+static void versal_edac_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+static int init_versalnet(struct mc_priv *priv, struct platform_device *pdev)
+{
+ u32 num_chans, rank, dwidth, config;
+ struct edac_mc_layer layers[2];
+ struct mem_ctl_info *mci;
+ struct device *dev;
+ enum dev_type dt;
+ char *name;
+ int rc, i;
+
+ for (i = 0; i < NUM_CONTROLLERS; i++) {
+ config = priv->adec[CONF + i * ADEC_NUM];
+ num_chans = FIELD_GET(MC5_NUM_CHANS_MASK, config);
+ rank = 1 << FIELD_GET(MC5_RANK_MASK, config);
+ dwidth = FIELD_GET(MC5_BUS_WIDTH_MASK, config);
+
+ switch (dwidth) {
+ case XDDR5_BUS_WIDTH_16:
+ dt = DEV_X16;
+ break;
+ case XDDR5_BUS_WIDTH_32:
+ dt = DEV_X32;
+ break;
+ case XDDR5_BUS_WIDTH_64:
+ dt = DEV_X64;
+ break;
+ default:
+ dt = DEV_UNKNOWN;
+ }
+
+ if (dt == DEV_UNKNOWN)
+ continue;
+
+ /* Find the first enabled device and register that one. */
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = rank;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = num_chans;
+ layers[1].is_virt_csrow = false;
+
+ rc = -ENOMEM;
+ mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers,
+ sizeof(struct mc_priv));
+ if (!mci) {
+ edac_printk(KERN_ERR, EDAC_MC, "Failed memory allocation for MC%d\n", i);
+ goto err_alloc;
+ }
+
+ priv->mci[i] = mci;
+ priv->dwidth = dt;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ dev->release = versal_edac_release;
+ name = kmalloc(32, GFP_KERNEL);
+ sprintf(name, "versal-net-ddrmc5-edac-%d", i);
+ dev->init_name = name;
+ rc = device_register(dev);
+ if (rc)
+ goto err_alloc;
+
+ mci->pdev = dev;
+
+ platform_set_drvdata(pdev, priv);
+
+ mc_init(mci, dev);
+ rc = edac_mc_add_mc(mci);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_MC, "Failed to register MC%d with EDAC core\n", i);
+ goto err_alloc;
+ }
+ }
+ return 0;
+
+err_alloc:
+ while (i--) {
+ mci = priv->mci[i];
+ if (!mci)
+ continue;
+
+ if (mci->pdev) {
+ device_unregister(mci->pdev);
+ edac_mc_del_mc(mci->pdev);
+ }
+
+ edac_mc_free(mci);
+ }
+
+ return rc;
+}
+
+static void remove_versalnet(struct mc_priv *priv)
+{
+ struct mem_ctl_info *mci;
+ int i;
+
+ for (i = 0; i < NUM_CONTROLLERS; i++) {
+ device_unregister(priv->mci[i]->pdev);
+ mci = edac_mc_del_mc(priv->mci[i]->pdev);
+ if (!mci)
+ return;
+
+ edac_mc_free(mci);
+ }
+}
+
+static int mc_probe(struct platform_device *pdev)
+{
+ struct device_node *r5_core_node;
+ struct mc_priv *priv;
+ struct rproc *rp;
+ int rc;
+
+ r5_core_node = of_parse_phandle(pdev->dev.of_node, "amd,rproc", 0);
+ if (!r5_core_node) {
+ dev_err(&pdev->dev, "amd,rproc: invalid phandle\n");
+ return -EINVAL;
+ }
+
+ rp = rproc_get_by_phandle(r5_core_node->phandle);
+ if (!rp)
+ return -EPROBE_DEFER;
+
+ rc = rproc_boot(rp);
+ if (rc) {
+ dev_err(&pdev->dev, "Failed to attach to remote processor\n");
+ goto err_rproc_boot;
+ }
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+
+ amd_rpmsg_id_table[0].driver_data = (kernel_ulong_t)priv;
+
+ rc = register_rpmsg_driver(&amd_rpmsg_driver);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_MC, "Failed to register RPMsg driver: %d\n", rc);
+ goto err_alloc;
+ }
+
+ rc = setup_mcdi(priv);
+ if (rc)
+ goto err_unreg;
+
+ priv->mcdi->r5_rproc = rp;
+
+ rc = init_versalnet(priv, pdev);
+ if (rc)
+ goto err_init;
+
+ return 0;
+
+err_init:
+ cdx_mcdi_finish(priv->mcdi);
+
+err_unreg:
+ unregister_rpmsg_driver(&amd_rpmsg_driver);
+
+err_alloc:
+ rproc_shutdown(rp);
+
+err_rproc_boot:
+ rproc_put(rp);
+
+ return rc;
+}
+
+static void mc_remove(struct platform_device *pdev)
+{
+ struct mc_priv *priv = platform_get_drvdata(pdev);
+
+ unregister_rpmsg_driver(&amd_rpmsg_driver);
+ remove_versalnet(priv);
+ rproc_shutdown(priv->mcdi->r5_rproc);
+ cdx_mcdi_finish(priv->mcdi);
+}
+
+static const struct of_device_id amd_edac_match[] = {
+ { .compatible = "xlnx,versal-net-ddrmc5", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, amd_edac_match);
+
+static struct platform_driver amd_ddr_edac_mc_driver = {
+ .driver = {
+ .name = "versal-net-edac",
+ .of_match_table = amd_edac_match,
+ },
+ .probe = mc_probe,
+ .remove = mc_remove,
+};
+
+module_platform_driver(amd_ddr_edac_mc_driver);
+
+MODULE_AUTHOR("AMD Inc");
+MODULE_DESCRIPTION("Versal NET EDAC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
index c52b9dd9154c..9955396c9a52 100644
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/regmap.h>
+#include <linux/string_choices.h>
#include "edac_module.h"
@@ -913,8 +914,8 @@ static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
- edac_name, 1, "l2c", 1, 2, NULL,
- 0, edac_device_alloc_index());
+ edac_name, 1, "l2c", 1, 2,
+ edac_device_alloc_index());
if (!edac_dev) {
rc = -ENOMEM;
goto err_group;
@@ -1208,8 +1209,7 @@ static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
edac_idx = edac_device_alloc_index();
edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
- "l3c", 1, "l3c", 1, 0, NULL, 0,
- edac_idx);
+ "l3c", 1, "l3c", 1, 0, edac_idx);
if (!edac_dev) {
rc = -ENOMEM;
goto err_release_group;
@@ -1408,7 +1408,7 @@ static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
- info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
+ str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info),
info);
writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
@@ -1490,19 +1490,19 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
if (reg & AGENT_OFFLINE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to offline agent error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (reg & UNIMPL_RBPAGE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to unimplemented page error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (reg & WORD_ALIGNED_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s word aligned access error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (reg & PAGE_ACCESS_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s to page out of range access error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
return;
if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
@@ -1561,7 +1561,7 @@ rb_skip:
err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
- REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
+ str_read_write(REQTYPE_F2_RD(err_addr_hi)),
ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
if (reg & WRERR_RESP_MASK)
dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
@@ -1612,7 +1612,7 @@ chk_iob_axi0:
dev_err(edac_dev->dev,
"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
- REQTYPE_RD(err_addr_hi) ? "read" : "write",
+ str_read_write(REQTYPE_RD(err_addr_hi)),
ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
@@ -1626,7 +1626,7 @@ chk_iob_axi1:
dev_err(edac_dev->dev,
"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
- REQTYPE_RD(err_addr_hi) ? "read" : "write",
+ str_read_write(REQTYPE_RD(err_addr_hi)),
ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
}
@@ -1748,8 +1748,7 @@ static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
edac_idx = edac_device_alloc_index();
edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
- "SOC", 1, "SOC", 1, 2, NULL, 0,
- edac_idx);
+ "SOC", 1, "SOC", 1, 2, edac_idx);
if (!edac_dev) {
rc = -ENOMEM;
goto err_release_group;
@@ -1960,7 +1959,7 @@ out_err:
return rc;
}
-static int xgene_edac_remove(struct platform_device *pdev)
+static void xgene_edac_remove(struct platform_device *pdev)
{
struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
struct xgene_edac_mc_ctx *mcu;
@@ -1981,8 +1980,6 @@ static int xgene_edac_remove(struct platform_device *pdev)
list_for_each_entry_safe(node, temp_node, &edac->socs, next)
xgene_edac_soc_remove(node);
-
- return 0;
}
static const struct of_device_id xgene_edac_of_match[] = {
diff --git a/drivers/edac/zynqmp_edac.c b/drivers/edac/zynqmp_edac.c
new file mode 100644
index 000000000000..cdffc9e4194d
--- /dev/null
+++ b/drivers/edac/zynqmp_edac.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP OCM ECC Driver
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include "edac_module.h"
+
+#define ZYNQMP_OCM_EDAC_MSG_SIZE 256
+
+#define ZYNQMP_OCM_EDAC_STRING "zynqmp_ocm"
+
+/* Error/Interrupt registers */
+#define ERR_CTRL_OFST 0x0
+#define OCM_ISR_OFST 0x04
+#define OCM_IMR_OFST 0x08
+#define OCM_IEN_OFST 0x0C
+#define OCM_IDS_OFST 0x10
+
+/* ECC control register */
+#define ECC_CTRL_OFST 0x14
+
+/* Correctable error info registers */
+#define CE_FFA_OFST 0x1C
+#define CE_FFD0_OFST 0x20
+#define CE_FFD1_OFST 0x24
+#define CE_FFD2_OFST 0x28
+#define CE_FFD3_OFST 0x2C
+#define CE_FFE_OFST 0x30
+
+/* Uncorrectable error info registers */
+#define UE_FFA_OFST 0x34
+#define UE_FFD0_OFST 0x38
+#define UE_FFD1_OFST 0x3C
+#define UE_FFD2_OFST 0x40
+#define UE_FFD3_OFST 0x44
+#define UE_FFE_OFST 0x48
+
+/* ECC control register bit field definitions */
+#define ECC_CTRL_CLR_CE_ERR 0x40
+#define ECC_CTRL_CLR_UE_ERR 0x80
+
+/* Fault injection data and count registers */
+#define OCM_FID0_OFST 0x4C
+#define OCM_FID1_OFST 0x50
+#define OCM_FID2_OFST 0x54
+#define OCM_FID3_OFST 0x58
+#define OCM_FIC_OFST 0x74
+
+#define UE_MAX_BITPOS_LOWER 31
+#define UE_MIN_BITPOS_UPPER 32
+#define UE_MAX_BITPOS_UPPER 63
+
+/* Interrupt masks */
+#define OCM_CEINTR_MASK BIT(6)
+#define OCM_UEINTR_MASK BIT(7)
+#define OCM_ECC_ENABLE_MASK BIT(0)
+
+#define OCM_FICOUNT_MASK GENMASK(23, 0)
+#define OCM_NUM_UE_BITPOS 2
+#define OCM_BASEVAL 0xFFFC0000
+#define EDAC_DEVICE "ZynqMP-OCM"
+
+/**
+ * struct ecc_error_info - ECC error log information
+ * @addr: Fault generated at this address
+ * @fault_lo: Generated fault data (lower 32-bit)
+ * @fault_hi: Generated fault data (upper 32-bit)
+ */
+struct ecc_error_info {
+ u32 addr;
+ u32 fault_lo;
+ u32 fault_hi;
+};
+
+/**
+ * struct ecc_status - ECC status information to report
+ * @ce_cnt: Correctable error count
+ * @ue_cnt: Uncorrectable error count
+ * @ceinfo: Correctable error log information
+ * @ueinfo: Uncorrectable error log information
+ */
+struct ecc_status {
+ u32 ce_cnt;
+ u32 ue_cnt;
+ struct ecc_error_info ceinfo;
+ struct ecc_error_info ueinfo;
+};
+
+/**
+ * struct edac_priv - OCM private instance data
+ * @baseaddr: Base address of the OCM
+ * @message: Buffer for framing the event specific info
+ * @stat: ECC status information
+ * @ce_cnt: Correctable Error count
+ * @ue_cnt: Uncorrectable Error count
+ * @debugfs_dir: Directory entry for debugfs
+ * @ce_bitpos: Bit position for Correctable Error
+ * @ue_bitpos: Array to store UnCorrectable Error bit positions
+ * @fault_injection_cnt: Fault Injection Counter value
+ */
+struct edac_priv {
+ void __iomem *baseaddr;
+ char message[ZYNQMP_OCM_EDAC_MSG_SIZE];
+ struct ecc_status stat;
+ u32 ce_cnt;
+ u32 ue_cnt;
+#ifdef CONFIG_EDAC_DEBUG
+ struct dentry *debugfs_dir;
+ u8 ce_bitpos;
+ u8 ue_bitpos[OCM_NUM_UE_BITPOS];
+ u32 fault_injection_cnt;
+#endif
+};
+
+/**
+ * get_error_info - Get the current ECC error info
+ * @base: Pointer to the base address of the OCM
+ * @p: Pointer to the OCM ECC status structure
+ * @mask: Status register mask value
+ *
+ * Determines there is any ECC error or not
+ *
+ */
+static void get_error_info(void __iomem *base, struct ecc_status *p, int mask)
+{
+ if (mask & OCM_CEINTR_MASK) {
+ p->ce_cnt++;
+ p->ceinfo.fault_lo = readl(base + CE_FFD0_OFST);
+ p->ceinfo.fault_hi = readl(base + CE_FFD1_OFST);
+ p->ceinfo.addr = (OCM_BASEVAL | readl(base + CE_FFA_OFST));
+ writel(ECC_CTRL_CLR_CE_ERR, base + OCM_ISR_OFST);
+ } else if (mask & OCM_UEINTR_MASK) {
+ p->ue_cnt++;
+ p->ueinfo.fault_lo = readl(base + UE_FFD0_OFST);
+ p->ueinfo.fault_hi = readl(base + UE_FFD1_OFST);
+ p->ueinfo.addr = (OCM_BASEVAL | readl(base + UE_FFA_OFST));
+ writel(ECC_CTRL_CLR_UE_ERR, base + OCM_ISR_OFST);
+ }
+}
+
+/**
+ * handle_error - Handle error types CE and UE
+ * @dci: Pointer to the EDAC device instance
+ * @p: Pointer to the OCM ECC status structure
+ *
+ * Handles correctable and uncorrectable errors.
+ */
+static void handle_error(struct edac_device_ctl_info *dci, struct ecc_status *p)
+{
+ struct edac_priv *priv = dci->pvt_info;
+ struct ecc_error_info *pinf;
+
+ if (p->ce_cnt) {
+ pinf = &p->ceinfo;
+ snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
+ "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
+ "CE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
+ edac_device_handle_ce(dci, 0, 0, priv->message);
+ }
+
+ if (p->ue_cnt) {
+ pinf = &p->ueinfo;
+ snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
+ "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
+ "UE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
+ edac_device_handle_ue(dci, 0, 0, priv->message);
+ }
+
+ memset(p, 0, sizeof(*p));
+}
+
+/**
+ * intr_handler - ISR routine
+ * @irq: irq number
+ * @dev_id: device id pointer
+ *
+ * Return: IRQ_NONE, if CE/UE interrupt not set or IRQ_HANDLED otherwise
+ */
+static irqreturn_t intr_handler(int irq, void *dev_id)
+{
+ struct edac_device_ctl_info *dci = dev_id;
+ struct edac_priv *priv = dci->pvt_info;
+ int regval;
+
+ regval = readl(priv->baseaddr + OCM_ISR_OFST);
+ if (!(regval & (OCM_CEINTR_MASK | OCM_UEINTR_MASK))) {
+ WARN_ONCE(1, "Unhandled IRQ%d, ISR: 0x%x", irq, regval);
+ return IRQ_NONE;
+ }
+
+ get_error_info(priv->baseaddr, &priv->stat, regval);
+
+ priv->ce_cnt += priv->stat.ce_cnt;
+ priv->ue_cnt += priv->stat.ue_cnt;
+ handle_error(dci, &priv->stat);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * get_eccstate - Return the ECC status
+ * @base: Pointer to the OCM base address
+ *
+ * Get the ECC enable/disable status
+ *
+ * Return: ECC status 0/1.
+ */
+static bool get_eccstate(void __iomem *base)
+{
+ return readl(base + ECC_CTRL_OFST) & OCM_ECC_ENABLE_MASK;
+}
+
+#ifdef CONFIG_EDAC_DEBUG
+/**
+ * write_fault_count - write fault injection count
+ * @priv: Pointer to the EDAC private struct
+ *
+ * Update the fault injection count register, once the counter reaches
+ * zero, it injects errors
+ */
+static void write_fault_count(struct edac_priv *priv)
+{
+ u32 ficount = priv->fault_injection_cnt;
+
+ if (ficount & ~OCM_FICOUNT_MASK) {
+ ficount &= OCM_FICOUNT_MASK;
+ edac_printk(KERN_INFO, EDAC_DEVICE,
+ "Fault injection count value truncated to %d\n", ficount);
+ }
+
+ writel(ficount, priv->baseaddr + OCM_FIC_OFST);
+}
+
+/*
+ * To get the Correctable Error injected, the following steps are needed:
+ * - Setup the optional Fault Injection Count:
+ * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
+ * - Write the Correctable Error bit position value:
+ * echo <bit_pos val> > /sys/kernel/debug/edac/ocm/inject_ce_bitpos
+ */
+static ssize_t inject_ce_write(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct edac_device_ctl_info *edac_dev = file->private_data;
+ struct edac_priv *priv = edac_dev->pvt_info;
+ int ret;
+
+ if (!data)
+ return -EFAULT;
+
+ ret = kstrtou8_from_user(data, count, 0, &priv->ce_bitpos);
+ if (ret)
+ return ret;
+
+ if (priv->ce_bitpos > UE_MAX_BITPOS_UPPER)
+ return -EINVAL;
+
+ if (priv->ce_bitpos <= UE_MAX_BITPOS_LOWER) {
+ writel(BIT(priv->ce_bitpos), priv->baseaddr + OCM_FID0_OFST);
+ writel(0, priv->baseaddr + OCM_FID1_OFST);
+ } else {
+ writel(BIT(priv->ce_bitpos - UE_MIN_BITPOS_UPPER),
+ priv->baseaddr + OCM_FID1_OFST);
+ writel(0, priv->baseaddr + OCM_FID0_OFST);
+ }
+
+ write_fault_count(priv);
+
+ return count;
+}
+
+static const struct file_operations inject_ce_fops = {
+ .open = simple_open,
+ .write = inject_ce_write,
+ .llseek = generic_file_llseek,
+};
+
+/*
+ * To get the Uncorrectable Error injected, the following steps are needed:
+ * - Setup the optional Fault Injection Count:
+ * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
+ * - Write the Uncorrectable Error bit position values:
+ * echo <bit_pos0 val>,<bit_pos1 val> > /sys/kernel/debug/edac/ocm/inject_ue_bitpos
+ */
+static ssize_t inject_ue_write(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct edac_device_ctl_info *edac_dev = file->private_data;
+ struct edac_priv *priv = edac_dev->pvt_info;
+ char buf[6], *pbuf, *token[2];
+ u64 ue_bitpos;
+ int i, ret;
+ u8 len;
+
+ if (!data)
+ return -EFAULT;
+
+ len = min_t(size_t, count, sizeof(buf));
+ if (copy_from_user(buf, data, len))
+ return -EFAULT;
+
+ buf[len] = '\0';
+ pbuf = &buf[0];
+ for (i = 0; i < OCM_NUM_UE_BITPOS; i++)
+ token[i] = strsep(&pbuf, ",");
+
+ ret = kstrtou8(token[0], 0, &priv->ue_bitpos[0]);
+ if (ret)
+ return ret;
+
+ ret = kstrtou8(token[1], 0, &priv->ue_bitpos[1]);
+ if (ret)
+ return ret;
+
+ if (priv->ue_bitpos[0] > UE_MAX_BITPOS_UPPER ||
+ priv->ue_bitpos[1] > UE_MAX_BITPOS_UPPER)
+ return -EINVAL;
+
+ if (priv->ue_bitpos[0] == priv->ue_bitpos[1]) {
+ edac_printk(KERN_ERR, EDAC_DEVICE, "Bit positions should not be equal\n");
+ return -EINVAL;
+ }
+
+ ue_bitpos = BIT(priv->ue_bitpos[0]) | BIT(priv->ue_bitpos[1]);
+
+ writel((u32)ue_bitpos, priv->baseaddr + OCM_FID0_OFST);
+ writel((u32)(ue_bitpos >> 32), priv->baseaddr + OCM_FID1_OFST);
+
+ write_fault_count(priv);
+
+ return count;
+}
+
+static const struct file_operations inject_ue_fops = {
+ .open = simple_open,
+ .write = inject_ue_write,
+ .llseek = generic_file_llseek,
+};
+
+static void setup_debugfs(struct edac_device_ctl_info *edac_dev)
+{
+ struct edac_priv *priv = edac_dev->pvt_info;
+
+ priv->debugfs_dir = edac_debugfs_create_dir("ocm");
+ if (!priv->debugfs_dir)
+ return;
+
+ edac_debugfs_create_x32("inject_fault_count", 0644, priv->debugfs_dir,
+ &priv->fault_injection_cnt);
+ edac_debugfs_create_file("inject_ue_bitpos", 0644, priv->debugfs_dir,
+ edac_dev, &inject_ue_fops);
+ edac_debugfs_create_file("inject_ce_bitpos", 0644, priv->debugfs_dir,
+ edac_dev, &inject_ce_fops);
+}
+#endif
+
+static int edac_probe(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *dci;
+ struct edac_priv *priv;
+ void __iomem *baseaddr;
+ struct resource *res;
+ int irq, ret;
+
+ baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(baseaddr))
+ return PTR_ERR(baseaddr);
+
+ if (!get_eccstate(baseaddr)) {
+ edac_printk(KERN_INFO, EDAC_DEVICE, "ECC not enabled\n");
+ return -ENXIO;
+ }
+
+ dci = edac_device_alloc_ctl_info(sizeof(*priv), ZYNQMP_OCM_EDAC_STRING,
+ 1, ZYNQMP_OCM_EDAC_STRING, 1, 0,
+ edac_device_alloc_index());
+ if (!dci)
+ return -ENOMEM;
+
+ priv = dci->pvt_info;
+ platform_set_drvdata(pdev, dci);
+ dci->dev = &pdev->dev;
+ priv->baseaddr = baseaddr;
+ dci->mod_name = pdev->dev.driver->name;
+ dci->ctl_name = ZYNQMP_OCM_EDAC_STRING;
+ dci->dev_name = dev_name(&pdev->dev);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto free_dev_ctl;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, intr_handler, 0,
+ dev_name(&pdev->dev), dci);
+ if (ret) {
+ edac_printk(KERN_ERR, EDAC_DEVICE, "Failed to request Irq\n");
+ goto free_dev_ctl;
+ }
+
+ /* Enable UE, CE interrupts */
+ writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IEN_OFST);
+
+#ifdef CONFIG_EDAC_DEBUG
+ setup_debugfs(dci);
+#endif
+
+ ret = edac_device_add_device(dci);
+ if (ret)
+ goto free_dev_ctl;
+
+ return 0;
+
+free_dev_ctl:
+ edac_device_free_ctl_info(dci);
+
+ return ret;
+}
+
+static void edac_remove(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
+ struct edac_priv *priv = dci->pvt_info;
+
+ /* Disable UE, CE interrupts */
+ writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IDS_OFST);
+
+#ifdef CONFIG_EDAC_DEBUG
+ debugfs_remove_recursive(priv->debugfs_dir);
+#endif
+
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(dci);
+}
+
+static const struct of_device_id zynqmp_ocm_edac_match[] = {
+ { .compatible = "xlnx,zynqmp-ocmc-1.0"},
+ { /* end of table */ }
+};
+
+MODULE_DEVICE_TABLE(of, zynqmp_ocm_edac_match);
+
+static struct platform_driver zynqmp_ocm_edac_driver = {
+ .driver = {
+ .name = "zynqmp-ocm-edac",
+ .of_match_table = zynqmp_ocm_edac_match,
+ },
+ .probe = edac_probe,
+ .remove = edac_remove,
+};
+
+module_platform_driver(zynqmp_ocm_edac_driver);
+
+MODULE_AUTHOR("Advanced Micro Devices, Inc");
+MODULE_DESCRIPTION("Xilinx ZynqMP OCM ECC driver");
+MODULE_LICENSE("GPL");